[PATCH 10/12] mm: page_alloc_wait

From: root
Date: Thu Apr 05 2007 - 14:04:08 EST


Introduce a mechanism to wait on free memory.

Currently congestion_wait() is abused to do this.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
---
arch/i386/lib/usercopy.c | 2 +-
fs/xfs/linux-2.6/kmem.c | 4 ++--
include/linux/mm.h | 3 +++
mm/page_alloc.c | 25 +++++++++++++++++++++++--
mm/shmem.c | 2 +-
mm/vmscan.c | 1 +
6 files changed, 31 insertions(+), 6 deletions(-)

Index: linux-2.6-mm/arch/i386/lib/usercopy.c
===================================================================
--- linux-2.6-mm.orig/arch/i386/lib/usercopy.c 2007-04-05 16:24:15.000000000 +0200
+++ linux-2.6-mm/arch/i386/lib/usercopy.c 2007-04-05 16:29:49.000000000 +0200
@@ -751,7 +751,7 @@ survive:

if (retval == -ENOMEM && is_init(current)) {
up_read(&current->mm->mmap_sem);
- congestion_wait(WRITE, HZ/50);
+ page_alloc_wait(HZ/50);
goto survive;
}

Index: linux-2.6-mm/fs/xfs/linux-2.6/kmem.c
===================================================================
--- linux-2.6-mm.orig/fs/xfs/linux-2.6/kmem.c 2007-04-05 16:24:15.000000000 +0200
+++ linux-2.6-mm/fs/xfs/linux-2.6/kmem.c 2007-04-05 16:29:49.000000000 +0200
@@ -53,7 +53,7 @@ kmem_alloc(size_t size, unsigned int __n
printk(KERN_ERR "XFS: possible memory allocation "
"deadlock in %s (mode:0x%x)\n",
__FUNCTION__, lflags);
- congestion_wait(WRITE, HZ/50);
+ page_alloc_wait(HZ/50);
} while (1);
}

@@ -131,7 +131,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsig
printk(KERN_ERR "XFS: possible memory allocation "
"deadlock in %s (mode:0x%x)\n",
__FUNCTION__, lflags);
- congestion_wait(WRITE, HZ/50);
+ page_alloc_wait(HZ/50);
} while (1);
}

Index: linux-2.6-mm/include/linux/mm.h
===================================================================
--- linux-2.6-mm.orig/include/linux/mm.h 2007-04-05 16:24:15.000000000 +0200
+++ linux-2.6-mm/include/linux/mm.h 2007-04-05 16:29:49.000000000 +0200
@@ -1028,6 +1028,9 @@ extern void setup_per_cpu_pageset(void);
static inline void setup_per_cpu_pageset(void) {}
#endif

+void page_alloc_ok(void);
+long page_alloc_wait(long timeout);
+
/* prio_tree.c */
void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *);
Index: linux-2.6-mm/mm/page_alloc.c
===================================================================
--- linux-2.6-mm.orig/mm/page_alloc.c 2007-04-05 16:24:15.000000000 +0200
+++ linux-2.6-mm/mm/page_alloc.c 2007-04-05 16:35:04.000000000 +0200
@@ -107,6 +107,9 @@ unsigned long __meminitdata nr_kernel_pa
unsigned long __meminitdata nr_all_pages;
static unsigned long __initdata dma_reserve;

+static wait_queue_head_t page_alloc_wqh =
+ __WAIT_QUEUE_HEAD_INITIALIZER(page_alloc_wqh);
+
#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
/*
* MAX_ACTIVE_REGIONS determines the maxmimum number of distinct
@@ -1698,7 +1701,7 @@ nofail_alloc:
if (page)
goto got_pg;
if (gfp_mask & __GFP_NOFAIL) {
- congestion_wait(WRITE, HZ/50);
+ page_alloc_wait(HZ/50);
goto nofail_alloc;
}
}
@@ -1763,7 +1766,7 @@ nofail_alloc:
do_retry = 1;
}
if (do_retry) {
- congestion_wait(WRITE, HZ/50);
+ page_alloc_wait(HZ/50);
goto rebalance;
}

@@ -4217,3 +4220,21 @@ void set_pageblock_flags_group(struct pa
else
__clear_bit(bitidx + start_bitidx, bitmap);
}
+
+void page_alloc_ok(void)
+{
+ if (waitqueue_active(&page_alloc_wqh))
+ wake_up(&page_alloc_wqh);
+}
+
+long page_alloc_wait(long timeout)
+{
+ long ret;
+ DEFINE_WAIT(wait);
+
+ prepare_to_wait(&page_alloc_wqh, &wait, TASK_UNINTERRUPTIBLE);
+ ret = schedule_timeout(timeout);
+ finish_wait(&page_alloc_wqh, &wait);
+ return ret;
+}
+EXPORT_SYMBOL(page_alloc_wait);
Index: linux-2.6-mm/mm/shmem.c
===================================================================
--- linux-2.6-mm.orig/mm/shmem.c 2007-04-05 16:24:15.000000000 +0200
+++ linux-2.6-mm/mm/shmem.c 2007-04-05 16:30:31.000000000 +0200
@@ -1216,7 +1216,7 @@ repeat:
page_cache_release(swappage);
if (error == -ENOMEM) {
/* let kswapd refresh zone for GFP_ATOMICs */
- congestion_wait(WRITE, HZ/50);
+ page_alloc_wait(HZ/50);
}
goto repeat;
}
Index: linux-2.6-mm/mm/vmscan.c
===================================================================
--- linux-2.6-mm.orig/mm/vmscan.c 2007-04-05 16:29:46.000000000 +0200
+++ linux-2.6-mm/mm/vmscan.c 2007-04-05 16:29:49.000000000 +0200
@@ -1436,6 +1436,7 @@ static int kswapd(void *p)
finish_wait(&pgdat->kswapd_wait, &wait);

balance_pgdat(pgdat, order);
+ page_alloc_ok();
}
return 0;
}

--

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/