diff -urP linux-2.5.67/arch/i386/Kconfig linux-2.5.67_patched/arch/i386/Kconfig --- linux-2.5.67/arch/i386/Kconfig Thu Apr 10 19:25:40 2003 +++ linux-2.5.67_patched/arch/i386/Kconfig Thu Apr 10 17:47:36 2003 @@ -373,6 +373,13 @@ depends on MK8 || MPENTIUM4 default y +config SWAP_PREFETCH + tristate "Prefetch swapped memory" + depends on SWAP + help + This option enables the kernel to prefetch swapped memory pages + when idle. + config HUGETLB_PAGE bool "Huge TLB Page Support" help diff -urP linux-2.5.67/fs/inode.c linux-2.5.67_patched/fs/inode.c --- linux-2.5.67/fs/inode.c Mon Apr 7 19:32:58 2003 +++ linux-2.5.67_patched/fs/inode.c Sat Apr 12 03:30:39 2003 @@ -180,6 +180,7 @@ INIT_LIST_HEAD(&inode->i_dentry); INIT_LIST_HEAD(&inode->i_devices); sema_init(&inode->i_sem, 1); + INIT_RADIX_TREE(&inode->i_data.swap_tree, GFP_ATOMIC); INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); rwlock_init(&inode->i_data.page_lock); init_MUTEX(&inode->i_data.i_shared_sem); diff -urP linux-2.5.67/include/linux/fs.h linux-2.5.67_patched/include/linux/fs.h --- linux-2.5.67/include/linux/fs.h Mon Apr 7 19:30:58 2003 +++ linux-2.5.67_patched/include/linux/fs.h Sat Apr 12 03:31:24 2003 @@ -312,6 +312,7 @@ struct backing_dev_info; struct address_space { struct inode *host; /* owner: inode, block_device */ + struct radix_tree_root swap_tree; /* radix tree of swapped pages */ struct radix_tree_root page_tree; /* radix tree of all pages */ rwlock_t page_lock; /* and rwlock protecting it */ struct list_head clean_pages; /* list of clean pages */ diff -urP linux-2.5.67/include/linux/swap.h linux-2.5.67_patched/include/linux/swap.h --- linux-2.5.67/include/linux/swap.h Thu Apr 10 19:25:40 2003 +++ linux-2.5.67_patched/include/linux/swap.h Thu Apr 10 18:36:33 2003 @@ -155,6 +155,8 @@ extern unsigned int nr_free_pages_pgdat(pg_data_t *pgdat); extern unsigned int nr_free_buffer_pages(void); extern unsigned int nr_free_pagecache_pages(void); +extern unsigned int nr_avail_buffer_pages(void); +extern unsigned int nr_avail_pagecache_pages(void); /* linux/mm/swap.c */ extern void FASTCALL(lru_cache_add(struct page *)); diff -urP linux-2.5.67/include/linux/swap_prefetch.h linux-2.5.67_patched/include/linux/swap_prefetch.h --- linux-2.5.67/include/linux/swap_prefetch.h Thu Jan 1 01:00:00 1970 +++ linux-2.5.67_patched/include/linux/swap_prefetch.h Wed Apr 16 16:00:09 2003 @@ -0,0 +1,57 @@ +#ifndef _LINUX_SWAP_PREFETCH_H +#define _LINUX_SWAP_PREFETCH_H + +#include +#include +#include + +struct swapped_entry_t { + struct list_head head; + swp_entry_t swp_entry; + struct address_space *mapping; +}; + +struct swapped_list_t { + spinlock_t lock; + struct list_head head; + kmem_cache_t *cache; +}; + +extern struct swapped_list_t swapped_list; + +static inline void add_to_swapped_list(struct address_space *mapping, + unsigned long index) +{ + struct swapped_entry_t *entry; + int error; + + entry = kmem_cache_alloc(swapped_list.cache, GFP_ATOMIC); + if(entry) { + entry->swp_entry.val = index; + entry->mapping = mapping; + + spin_lock(&swapped_list.lock); + error = radix_tree_insert(&mapping->swap_tree, index, entry); + if(!error) + list_add(&entry->head, &swapped_list.head); + else + kmem_cache_free(swapped_list.cache, entry); + spin_unlock(&swapped_list.lock); + } +} + +static inline void remove_from_swapped_list(struct address_space *mapping, + unsigned long index) +{ + struct swapped_entry_t *entry; + + spin_lock(&swapped_list.lock); + entry = radix_tree_delete(&mapping->swap_tree, index); + if(entry) { + list_del(&entry->head); + kmem_cache_free(swapped_list.cache, entry); + } + spin_unlock(&swapped_list.lock); +} + +#endif /* _LINUX_SWAP_PREFETCH_H */ diff -urP linux-2.5.67/kernel/ksyms.c linux-2.5.67_patched/kernel/ksyms.c --- linux-2.5.67/kernel/ksyms.c Thu Apr 10 19:25:40 2003 +++ linux-2.5.67_patched/kernel/ksyms.c Mon Apr 14 01:51:51 2003 @@ -58,6 +58,7 @@ #include #include #include +#include #include #if defined(CONFIG_PROC_FS) @@ -70,6 +71,13 @@ extern struct timezone sys_tz; extern int panic_timeout; + +/* needed for swap prefetch support */ +EXPORT_SYMBOL(swapped_list); +EXPORT_SYMBOL(swapper_space); +EXPORT_SYMBOL(swapin_readahead); +EXPORT_SYMBOL(do_page_cache_readahead); +EXPORT_SYMBOL(nr_avail_pagecache_pages); /* process memory management */ EXPORT_SYMBOL(do_mmap_pgoff); diff -urP linux-2.5.67/mm/Makefile linux-2.5.67_patched/mm/Makefile --- linux-2.5.67/mm/Makefile Thu Apr 10 19:25:40 2003 +++ linux-2.5.67_patched/mm/Makefile Thu Apr 10 17:47:36 2003 @@ -12,3 +12,5 @@ slab.o swap.o truncate.o vcache.o vmscan.o $(mmu-y) obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o + +obj-$(CONFIG_SWAP_PREFETCH) += swap_prefetch.o diff -urP linux-2.5.67/mm/filemap.c linux-2.5.67_patched/mm/filemap.c --- linux-2.5.67/mm/filemap.c Mon Apr 7 19:31:02 2003 +++ linux-2.5.67_patched/mm/filemap.c Wed Apr 16 16:04:40 2003 @@ -16,8 +16,7 @@ #include #include #include -#include -#include +#include #include #include #include @@ -84,6 +83,7 @@ BUG_ON(PageDirty(page) && !PageSwapCache(page)); + remove_from_swapped_list(mapping, page->index); radix_tree_delete(&mapping->page_tree, page->index); list_del(&page->list); page->mapping = NULL; @@ -223,8 +223,11 @@ int add_to_page_cache(struct page *page, struct address_space *mapping, pgoff_t offset, int gfp_mask) { - int error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); + int error; + remove_from_swapped_list(mapping, offset); + + error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); if (error == 0) { page_cache_get(page); write_lock(&mapping->page_lock); diff -urP linux-2.5.67/mm/page_alloc.c linux-2.5.67_patched/mm/page_alloc.c --- linux-2.5.67/mm/page_alloc.c Thu Apr 10 19:25:40 2003 +++ linux-2.5.67_patched/mm/page_alloc.c Thu Apr 10 17:47:36 2003 @@ -787,6 +787,48 @@ } #endif +static unsigned int nr_avail_zone_pages(int offset) +{ + pg_data_t *pgdat; + unsigned long avail = 0; + + for_each_pgdat(pgdat) { + struct zonelist *zonelist = pgdat->node_zonelists + offset; + struct zone **zonep = zonelist->zones; + struct zone *zone; + unsigned long low = 0; + + for (zone = *zonep++; zone; zone = *zonep++) { + unsigned long local_free = zone->free_pages; + unsigned long local_low = zone->pages_low; + + low += local_low; + if (local_free > low) { + avail = max(avail, local_free - low); + } + low += local_low * sysctl_lower_zone_protection; + } + } + + return avail; +} + +/* + * Amount of available RAM allocatable within ZONE_DMA and ZONE_NORMAL + */ +unsigned int nr_avail_buffer_pages(void) +{ + return nr_avail_zone_pages(GFP_USER & GFP_ZONEMASK); +} + +/* + * Amount of available RAM allocatable within all zones + */ +unsigned int nr_avail_pagecache_pages(void) +{ + return nr_avail_zone_pages(GFP_HIGHUSER & GFP_ZONEMASK); +} + #ifdef CONFIG_NUMA static void show_node(struct zone *zone) { diff -urP linux-2.5.67/mm/swap.c linux-2.5.67_patched/mm/swap.c --- linux-2.5.67/mm/swap.c Mon Apr 7 19:31:05 2003 +++ linux-2.5.67_patched/mm/swap.c Sat Apr 12 03:19:53 2003 @@ -13,9 +13,8 @@ * Buffermem limits added 12.3.98, Rik van Riel. */ -#include #include -#include +#include #include #include #include @@ -24,6 +23,11 @@ #include #include +struct swapped_list_t swapped_list = { + .lock = SPIN_LOCK_UNLOCKED, + .head = LIST_HEAD_INIT(swapped_list.head), +}; + /* How many pages do we try to swap or page in/out together? */ int page_cluster; @@ -390,4 +394,12 @@ * Right now other parts of the system means that we * _really_ don't want to cluster much more */ + + /* + * Create kmem cache for swapped entries + */ + swapped_list.cache = kmem_cache_create("swapped_entry", + sizeof(struct swapped_entry_t), 0, 0, NULL, NULL); + if(!swapped_list.cache) + panic("swap_setup(): cannot create swapped_entry SLAB cache"); } diff -urP linux-2.5.67/mm/swap_prefetch.c linux-2.5.67_patched/mm/swap_prefetch.c --- linux-2.5.67/mm/swap_prefetch.c Thu Jan 1 01:00:00 1970 +++ linux-2.5.67_patched/mm/swap_prefetch.c Thu Apr 17 00:29:40 2003 @@ -0,0 +1,88 @@ +#include +#include +#include + +#define RESERVED_PAGES 50 /* let 200 kByte of pagecache free */ +#define INTERVAL 60 /* (secs) Default is 1 minute */ + +static int reserved_pages = RESERVED_PAGES; +static int interval = INTERVAL; + +MODULE_PARM(reserved_pages,"i"); +MODULE_PARM_DESC(reserved_pages, + "count of pagechache pages to let free (default 50)"); + +MODULE_PARM(interval,"i"); +MODULE_PARM_DESC(interval, + "delay in seconds to wait between memory checks (default 60)"); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Thomas Schlichter "); +MODULE_DESCRIPTION("prefetches swap pages when there is free memory"); + +/* + * Our timer + */ +static void prefetch_timer_handler(unsigned long data); +static struct timer_list prefetch_timer = + TIMER_INITIALIZER(prefetch_timer_handler, 0, 0); + +/* + * Our work + */ +static void prefetch_work_handler(void *data); +static DECLARE_WORK(prefetch_work, prefetch_work_handler, 0); + +/* + * If the timer expires.. + */ +static void prefetch_timer_handler(unsigned long data) +{ + schedule_work(&prefetch_work); + prefetch_timer.expires = jiffies + interval * HZ; + add_timer(&prefetch_timer); +} + +/* + * ..do the work + */ +static void prefetch_work_handler(void *data) +{ + printk(KERN_INFO "Available pages before: %d\n", nr_avail_pagecache_pages()); + + while(nr_avail_pagecache_pages() > reserved_pages) { + struct swapped_entry_t *entry; + + spin_lock(&swapped_list.lock); + if(list_empty(&swapped_list.head)) { + spin_unlock(&swapped_list.lock); + break; + } + entry = list_entry(swapped_list.head.next, struct swapped_entry_t, head); + radix_tree_delete(&entry->mapping->swap_tree, entry->swp_entry.val); + list_del(&entry->head); + spin_unlock(&swapped_list.lock); + + if(entry->mapping == &swapper_space) + swapin_readahead(entry->swp_entry); + else + do_page_cache_readahead(entry->mapping, NULL, entry->swp_entry.val, 1); + kmem_cache_free(swapped_list.cache, entry); + } + + printk(KERN_INFO "Available pages after: %d\n", nr_avail_pagecache_pages()); +} + +static int __init prefetch_init(void) +{ + prefetch_timer_handler(0); + return 0; +} + +static void __exit prefetch_exit(void) +{ + del_timer(&prefetch_timer); +} + +module_init(prefetch_init); +module_exit(prefetch_exit); diff -urP linux-2.5.67/mm/swap_state.c linux-2.5.67_patched/mm/swap_state.c --- linux-2.5.67/mm/swap_state.c Thu Apr 10 19:25:40 2003 +++ linux-2.5.67_patched/mm/swap_state.c Sat Apr 12 03:29:59 2003 @@ -33,6 +33,7 @@ extern struct address_space_operations swap_aops; struct address_space swapper_space = { + .swap_tree = RADIX_TREE_INIT(GFP_ATOMIC), .page_tree = RADIX_TREE_INIT(GFP_ATOMIC), .page_lock = RW_LOCK_UNLOCKED, .clean_pages = LIST_HEAD_INIT(swapper_space.clean_pages), diff -urP linux-2.5.67/mm/vmscan.c linux-2.5.67_patched/mm/vmscan.c --- linux-2.5.67/mm/vmscan.c Thu Apr 10 19:25:40 2003 +++ linux-2.5.67_patched/mm/vmscan.c Thu Apr 17 14:41:17 2003 @@ -11,10 +11,9 @@ * Multiqueue VM started 5.8.00, Rik van Riel. */ -#include #include #include -#include +#include #include #include #include @@ -417,6 +416,9 @@ ret++; if (!pagevec_add(&freed_pvec, page)) __pagevec_release_nonlru(&freed_pvec); + if (mapping) +// if (mapping == &swapper_space) + add_to_swapped_list(mapping, page->index); continue; activate_locked: