oom-6 [was: Re: [patch] oom-5]

Andrea Arcangeli (andrea@e-mind.com)
Wed, 7 Oct 1998 16:21:31 +0200 (CEST)


On Wed, 7 Oct 1998, Andrea Arcangeli wrote:

>And BTW, my patch still need some other _minor_ improvement, like waking
>up kswapd after a process exit() if we are low in memory (I got this idea
>this morning while studying Analysis ;-).

I implemented this last improvement now. The patch is completly untested
(and I have no way to try it right now because I can' t reboot my box
and my other machine at home is used now).

I' d like to know if this patch make differences with version -5 (I am
still running -5 now btw).

oom-6 against 2.1.124 follows:

Index: linux/mm/filemap.c
diff -u linux/mm/filemap.c:1.3 linux/mm/filemap.c:1.3.2.4
--- linux/mm/filemap.c:1.3 Sun Oct 4 16:27:52 1998
+++ linux/mm/filemap.c Wed Oct 7 02:06:51 1998
@@ -153,7 +153,7 @@
} while (tmp != bh);

/* Refuse to swap out all buffer pages */
- if ((buffermem >> PAGE_SHIFT) * 100 < (buffer_mem.min_percent * num_physpages))
+ if (buffer_under_min())
goto next;
}

@@ -174,7 +174,7 @@
age_page(page);
if (page->age)
break;
- if (page_cache_size * 100 < (page_cache.min_percent * num_physpages))
+ if (pgcache_under_min())
break;
if (PageSwapCache(page)) {
delete_from_swap_cache(page);
Index: linux/mm/page_alloc.c
diff -u linux/mm/page_alloc.c:1.1.1.1 linux/mm/page_alloc.c:1.1.1.1.10.2
--- linux/mm/page_alloc.c:1.1.1.1 Fri Oct 2 19:22:39 1998
+++ linux/mm/page_alloc.c Tue Oct 6 21:14:37 1998
@@ -237,11 +237,12 @@
unsigned long __get_free_pages(int gfp_mask, unsigned long order)
{
unsigned long flags;
+ int again = 0;

if (order >= NR_MEM_LISTS)
goto nopage;

- if (gfp_mask & __GFP_WAIT) {
+ if (gfp_mask & __GFP_WAIT)
if (in_interrupt()) {
static int count = 0;
if (++count < 5) {
@@ -249,33 +250,19 @@
__builtin_return_address(0));
}
goto nopage;
- }
-
- if (freepages.min > nr_free_pages) {
- int freed;
- freed = try_to_free_pages(gfp_mask, SWAP_CLUSTER_MAX);
- /*
- * Low priority (user) allocations must not
- * succeed if we didn't have enough memory
- * and we couldn't get more..
- */
- if (!freed && !(gfp_mask & (__GFP_MED | __GFP_HIGH)))
- goto nopage;
}
- }
+ again:
spin_lock_irqsave(&page_alloc_lock, flags);
RMQUEUE(order, (gfp_mask & GFP_DMA));
spin_unlock_irqrestore(&page_alloc_lock, flags);

- /*
- * If we failed to find anything, we'll return NULL, but we'll
- * wake up kswapd _now_ ad even wait for it synchronously if
- * we can.. This way we'll at least make some forward progress
- * over time.
- */
- wake_up(&kswapd_wait);
- if (gfp_mask & __GFP_WAIT)
- schedule();
+ if (!again && nr_free_pages < freepages.min)
+ {
+ again = 1;
+ if (try_to_free_pages(gfp_mask, SWAP_CLUSTER_MAX))
+ goto again;
+ }
+
nopage:
return 0;
}
Index: linux/mm/vmscan.c
diff -u linux/mm/vmscan.c:1.4 linux/mm/vmscan.c:1.4.2.7
--- linux/mm/vmscan.c:1.4 Sun Oct 4 16:27:52 1998
+++ linux/mm/vmscan.c Wed Oct 7 16:10:33 1998
@@ -42,7 +42,7 @@
/*
* The wait queue for waking up the pageout daemon:
*/
-struct wait_queue * kswapd_wait = NULL;
+static struct wait_queue * kswapd_wait = NULL;

static void init_swap_timer(void);

@@ -447,40 +447,42 @@
static int do_try_to_free_page(int gfp_mask)
{
static int state = 0;
- int i=6;
- int stop;
+ int from_prio, to_prio;

- /* Always trim SLAB caches when memory gets low. */
- kmem_cache_reap(gfp_mask);
-
/* We try harder if we are waiting .. */
- stop = 3;
if (gfp_mask & __GFP_WAIT)
- stop = 0;
+ {
+ /* Trim SLAB caches when memory gets low and we can wait. */
+ kmem_cache_reap(gfp_mask);
+ from_prio = 3;
+ to_prio = 0;
+ } else {
+ from_prio = 6;
+ to_prio = 3;
+ }

- if (((buffermem >> PAGE_SHIFT) * 100 > buffer_mem.borrow_percent * num_physpages)
- || (page_cache_size * 100 > page_cache.borrow_percent * num_physpages))
- shrink_mmap(i, gfp_mask);
+ if (buffer_over_borrow() || pgcache_over_borrow())
+ state = 0;

switch (state) {
do {
case 0:
- if (shrink_mmap(i, gfp_mask))
+ if (shrink_mmap(from_prio, gfp_mask))
return 1;
state = 1;
case 1:
- if (shm_swap(i, gfp_mask))
+ if (shm_swap(from_prio, gfp_mask))
return 1;
state = 2;
case 2:
- if (swap_out(i, gfp_mask))
+ if (swap_out(from_prio, gfp_mask))
return 1;
state = 3;
case 3:
- shrink_dcache_memory(i, gfp_mask);
+ shrink_dcache_memory(from_prio, gfp_mask);
state = 0;
- i--;
- } while ((i - stop) >= 0);
+ from_prio--;
+ } while (from_prio >= to_prio);
}
return 0;
}
@@ -546,12 +548,14 @@
init_swap_timer();
add_wait_queue(&kswapd_wait, &wait);
while (1) {
- int tries;
+ int tries, free_memory, count;

current->state = TASK_INTERRUPTIBLE;
flush_signals(current);
run_task_queue(&tq_disk);
+ timer_active |= 1<<SWAP_TIMER;
schedule();
+ timer_active &= ~(1<<SWAP_TIMER);
swapstats.wakeups++;

/*
@@ -570,11 +574,20 @@
* woken up more often and the rate will be even
* higher).
*/
- tries = pager_daemon.tries_base;
- tries >>= 4*free_memory_available();
+ free_memory = free_memory_available();

- do {
- do_try_to_free_page(0);
+ if (free_memory == 2)
+ continue;
+ tries = pager_daemon.tries_base >> (free_memory + 2);
+
+ for (count = 0; count < tries; count++)
+ {
+ /*
+ * If we can' t free one page we can' t able to
+ * free tries page. -arca
+ */
+ if (!do_try_to_free_page(0))
+ break;
/*
* Syncing large chunks is faster than swapping
* synchronously (less head movement). -- Rik.
@@ -583,7 +596,7 @@
run_task_queue(&tq_disk);
if (free_memory_available() > 1)
break;
- } while (--tries > 0);
+ }
}
/* As if we could ever get here - maybe we want to make this killable */
remove_wait_queue(&kswapd_wait, &wait);
@@ -598,22 +611,22 @@
*
* The "PF_MEMALLOC" flag protects us against recursion:
* if we need more memory as part of a swap-out effort we
- * will just silently return "success" to tell the page
- * allocator to accept the allocation.
+ * will just silently return "fail" to tell the page
+ * allocator that we are OOM.
*/
int try_to_free_pages(unsigned int gfp_mask, int count)
{
- int retval = 1;
+ int retval = 0;

lock_kernel();
if (!(current->flags & PF_MEMALLOC)) {
current->flags |= PF_MEMALLOC;
- do {
+ while (count--)
+ {
retval = do_try_to_free_page(gfp_mask);
if (!retval)
break;
- count--;
- } while (count > 0);
+ }
current->flags &= ~PF_MEMALLOC;
}
unlock_kernel();
@@ -649,14 +662,19 @@
}

if ((long) (now - want) >= 0) {
- if (want_wakeup || (num_physpages * buffer_mem.max_percent) < (buffermem >> PAGE_SHIFT) * 100
- || (num_physpages * page_cache.max_percent < page_cache_size * 100)) {
+ if (want_wakeup || buffer_over_max() || pgcache_over_max())
+ {
/* Set the next wake-up time */
next_swap_jiffies = now + swapout_interval;
- wake_up(&kswapd_wait);
+ kswapd_wakeup();
}
}
timer_active |= (1<<SWAP_TIMER);
+}
+
+void kswapd_wakeup(void)
+{
+ wake_up(&kswapd_wait);
}

/*
Index: linux/kernel/fork.c
diff -u linux/kernel/fork.c:1.2 linux/kernel/fork.c:1.2.2.1
--- linux/kernel/fork.c:1.2 Mon Oct 5 01:08:12 1998
+++ linux/kernel/fork.c Wed Oct 7 16:10:16 1998
@@ -23,6 +23,8 @@
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/module.h>
+#include <linux/swap.h>
+#include <linux/swapctl.h>

#include <asm/system.h>
#include <asm/pgtable.h>
@@ -296,6 +298,8 @@
exit_mmap(mm);
free_page_tables(mm);
kmem_cache_free(mm_cachep, mm);
+ if (nr_free_pages < freepages.low)
+ kswapd_wakeup();
}
}

Index: linux/include/linux/mm.h
diff -u linux/include/linux/mm.h:1.3 linux/include/linux/mm.h:1.3.2.2
--- linux/include/linux/mm.h:1.3 Sun Oct 4 16:27:49 1998
+++ linux/include/linux/mm.h Wed Oct 7 16:10:52 1998
@@ -258,7 +258,6 @@
* Decide if we should try to do some swapout..
*/
extern int free_memory_available(void);
-extern struct wait_queue * kswapd_wait;

#define free_page(addr) free_pages((addr),0)
extern void FASTCALL(free_pages(unsigned long addr, unsigned long order));
@@ -309,6 +308,9 @@
extern unsigned long get_cached_page(struct inode *, unsigned long, int);
extern void put_cached_page(unsigned long);

+/* vmscan.c */
+extern FASTCALL(void kswapd_wakeup(void));
+
/*
* GFP bitmasks..
*/
@@ -379,6 +381,31 @@
vma = NULL;
return vma;
}
+
+#define buffer_under_min() ((buffermem >> PAGE_SHIFT) * 100 < \
+ buffer_mem.min_percent * num_physpages)
+#define buffer_under_borrow() ((buffermem >> PAGE_SHIFT) * 100 < \
+ buffer_mem.borrow_percent * num_physpages)
+#define buffer_under_max() ((buffermem >> PAGE_SHIFT) * 100 < \
+ buffer_mem.max_percent * num_physpages)
+#define buffer_over_min() ((buffermem >> PAGE_SHIFT) * 100 > \
+ buffer_mem.min_percent * num_physpages)
+#define buffer_over_borrow() ((buffermem >> PAGE_SHIFT) * 100 > \
+ buffer_mem.borrow_percent * num_physpages)
+#define buffer_over_max() ((buffermem >> PAGE_SHIFT) * 100 > \
+ buffer_mem.max_percent * num_physpages)
+#define pgcache_under_min() (page_cache_size * 100 < \
+ page_cache.min_percent * num_physpages)
+#define pgcache_under_borrow() (page_cache_size * 100 < \
+ page_cache.borrow_percent * num_physpages)
+#define pgcache_under_max() (page_cache_size * 100 < \
+ page_cache.max_percent * num_physpages)
+#define pgcache_over_min() (page_cache_size * 100 > \
+ page_cache.min_percent * num_physpages)
+#define pgcache_over_borrow() (page_cache_size * 100 > \
+ page_cache.borrow_percent * num_physpages)
+#define pgcache_over_max() (page_cache_size * 100 > \
+ page_cache.max_percent * num_physpages)

#endif /* __KERNEL__ */

Andrea[s] Arcangeli

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/