Re: [PATCH 3/5] mm: Do not stall in synchronous compaction for THPallocations

From: Andrea Arcangeli
Date: Fri Nov 18 2011 - 12:34:33 EST


On Fri, Nov 18, 2011 at 04:58:42PM +0000, Mel Gorman wrote:
> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
> index 3a76faf..ef1b1af 100644
> --- a/include/linux/gfp.h
> +++ b/include/linux/gfp.h
> @@ -83,7 +83,18 @@ struct vm_area_struct;
> #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */
> #define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */
>
> +/*
> + * __GFP_NO_KSWAPD indicates that the VM should favour failing the allocation
> + * over excessive disruption of the system. Currently this means
> + * 1. Do not wake kswapd (hence the flag name)
> + * 2. Do not use stall in synchronous compaction for high-order allocations
> + * as this may cause the caller to stall writing out pages
> + *
> + * This flag it primarily intended for use with transparent hugepage support.
> + * If the flag is used outside the VM, linux-mm should be cc'd for review.
> + */
> #define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
> +
> #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
>
> /*
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 9dd443d..7a5c5b1 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2168,7 +2168,14 @@ rebalance:
> sync_migration);
> if (page)
> goto got_pg;
> - sync_migration = true;
> +
> + /*
> + * Do not use sync migration if __GFP_NO_KSWAPD is used to indicate
> + * the system should not be heavily disrupted. In practice, this is
> + * to avoid THP callers being stalled in writeback during migration
> + * as it's preferable for the the allocations to fail than to stall
> + */
> + sync_migration = !(gfp_mask & __GFP_NO_KSWAPD);
>
> /* Try direct reclaim and then allocating */
> page = __alloc_pages_direct_reclaim(gfp_mask, order,

I don't like this anymore. Also with sync migration compaction will
only scan only the migrate movable pageblocks.

What I'm running right now is this:

From: Andrea Arcangeli <aarcange@xxxxxxxxxx>
Subject: [PATCH] compaction: avoid overwork in migrate sync mode

Add a lightweight sync migration (sync == 2) mode that avoids overwork
so more suitable to be used by compaction to provide lower latency but
still write throttling.

Signed-off-by: Andrea Arcangeli <aarcange@xxxxxxxxxx>
---
include/linux/migrate.h | 8 ++--
mm/compaction.c | 2 +-
mm/memory-failure.c | 2 +-
mm/memory_hotplug.c | 2 +-
mm/mempolicy.c | 4 +-
mm/migrate.c | 95 +++++++++++++++++++++++++++++++----------------
6 files changed, 72 insertions(+), 41 deletions(-)

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index e39aeec..f26fc0e 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -14,10 +14,10 @@ extern int migrate_page(struct address_space *,
struct page *, struct page *);
extern int migrate_pages(struct list_head *l, new_page_t x,
unsigned long private, bool offlining,
- bool sync);
+ int sync);
extern int migrate_huge_pages(struct list_head *l, new_page_t x,
unsigned long private, bool offlining,
- bool sync);
+ int sync);

extern int fail_migrate_page(struct address_space *,
struct page *, struct page *);
@@ -36,10 +36,10 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
static inline void putback_lru_pages(struct list_head *l) {}
static inline int migrate_pages(struct list_head *l, new_page_t x,
unsigned long private, bool offlining,
- bool sync) { return -ENOSYS; }
+ int sync) { return -ENOSYS; }
static inline int migrate_huge_pages(struct list_head *l, new_page_t x,
unsigned long private, bool offlining,
- bool sync) { return -ENOSYS; }
+ int sync) { return -ENOSYS; }

static inline int migrate_prep(void) { return -ENOSYS; }
static inline int migrate_prep_local(void) { return -ENOSYS; }
diff --git a/mm/compaction.c b/mm/compaction.c
index 899d956..48a3579 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -555,7 +555,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
nr_migrate = cc->nr_migratepages;
err = migrate_pages(&cc->migratepages, compaction_alloc,
(unsigned long)cc, false,
- cc->sync);
+ cc->sync ? 2 : 0);
update_nr_listpages(cc);
nr_remaining = cc->nr_migratepages;

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 06d3479..d8a41d3 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1557,7 +1557,7 @@ int soft_offline_page(struct page *page, int flags)
page_is_file_cache(page));
list_add(&page->lru, &pagelist);
ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
- 0, true);
+ false, 1);
if (ret) {
putback_lru_pages(&pagelist);
pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 2168489..e1d6176 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -809,7 +809,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
}
/* this function returns # of failed pages */
ret = migrate_pages(&source, hotremove_migrate_alloc, 0,
- true, true);
+ true, 1);
if (ret)
putback_lru_pages(&source);
}
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index adc3954..0bf88ed 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -933,7 +933,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,

if (!list_empty(&pagelist)) {
err = migrate_pages(&pagelist, new_node_page, dest,
- false, true);
+ false, 1);
if (err)
putback_lru_pages(&pagelist);
}
@@ -1154,7 +1154,7 @@ static long do_mbind(unsigned long start, unsigned long len,
if (!list_empty(&pagelist)) {
nr_failed = migrate_pages(&pagelist, new_vma_page,
(unsigned long)vma,
- false, true);
+ false, 1);
if (nr_failed)
putback_lru_pages(&pagelist);
}
diff --git a/mm/migrate.c b/mm/migrate.c
index 578e291..612c3ba 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -564,7 +564,7 @@ static int fallback_migrate_page(struct address_space *mapping,
* == 0 - success
*/
static int move_to_new_page(struct page *newpage, struct page *page,
- int remap_swapcache, bool sync)
+ int remap_swapcache, bool force)
{
struct address_space *mapping;
int rc;
@@ -588,11 +588,11 @@ static int move_to_new_page(struct page *newpage, struct page *page,
rc = migrate_page(mapping, newpage, page);
else {
/*
- * Do not writeback pages if !sync and migratepage is
+ * Do not writeback pages if !force and migratepage is
* not pointing to migrate_page() which is nonblocking
* (swapcache/tmpfs uses migratepage = migrate_page).
*/
- if (PageDirty(page) && !sync &&
+ if (PageDirty(page) && !force &&
mapping->a_ops->migratepage != migrate_page)
rc = -EBUSY;
else if (mapping->a_ops->migratepage)
@@ -622,7 +622,7 @@ static int move_to_new_page(struct page *newpage, struct page *page,
}

static int __unmap_and_move(struct page *page, struct page *newpage,
- int force, bool offlining, bool sync)
+ bool force, bool offlining)
{
int rc = -EAGAIN;
int remap_swapcache = 1;
@@ -631,7 +631,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
struct anon_vma *anon_vma = NULL;

if (!trylock_page(page)) {
- if (!force || !sync)
+ if (!force)
goto out;

/*
@@ -676,14 +676,6 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
BUG_ON(charge);

if (PageWriteback(page)) {
- /*
- * For !sync, there is no point retrying as the retry loop
- * is expected to be too short for PageWriteback to be cleared
- */
- if (!sync) {
- rc = -EBUSY;
- goto uncharge;
- }
if (!force)
goto uncharge;
wait_on_page_writeback(page);
@@ -751,7 +743,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,

skip_unmap:
if (!page_mapped(page))
- rc = move_to_new_page(newpage, page, remap_swapcache, sync);
+ rc = move_to_new_page(newpage, page, remap_swapcache, force);

if (rc && remap_swapcache)
remove_migration_ptes(page, page);
@@ -774,7 +766,7 @@ out:
* to the newly allocated page in newpage.
*/
static int unmap_and_move(new_page_t get_new_page, unsigned long private,
- struct page *page, int force, bool offlining, bool sync)
+ struct page *page, bool force, bool offlining)
{
int rc = 0;
int *result = NULL;
@@ -792,7 +784,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
if (unlikely(split_huge_page(page)))
goto out;

- rc = __unmap_and_move(page, newpage, force, offlining, sync);
+ rc = __unmap_and_move(page, newpage, force, offlining);
out:
if (rc != -EAGAIN) {
/*
@@ -840,7 +832,7 @@ out:
*/
static int unmap_and_move_huge_page(new_page_t get_new_page,
unsigned long private, struct page *hpage,
- int force, bool offlining, bool sync)
+ bool force, bool offlining)
{
int rc = 0;
int *result = NULL;
@@ -853,7 +845,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
rc = -EAGAIN;

if (!trylock_page(hpage)) {
- if (!force || !sync)
+ if (!force)
goto out;
lock_page(hpage);
}
@@ -864,7 +856,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);

if (!page_mapped(hpage))
- rc = move_to_new_page(new_hpage, hpage, 1, sync);
+ rc = move_to_new_page(new_hpage, hpage, 1, force);

if (rc)
remove_migration_ptes(hpage, hpage);
@@ -907,11 +899,11 @@ out:
*/
int migrate_pages(struct list_head *from,
new_page_t get_new_page, unsigned long private, bool offlining,
- bool sync)
+ int sync)
{
int retry = 1;
int nr_failed = 0;
- int pass = 0;
+ int pass, passes;
struct page *page;
struct page *page2;
int swapwrite = current->flags & PF_SWAPWRITE;
@@ -920,15 +912,34 @@ int migrate_pages(struct list_head *from,
if (!swapwrite)
current->flags |= PF_SWAPWRITE;

- for(pass = 0; pass < 10 && retry; pass++) {
+ switch (sync) {
+ case 0:
+ /* 3 async 0 sync */
+ passes = 3;
+ break;
+ case 1:
+ /* 3 async, 7 sync */
+ passes = 10;
+ break;
+ case 2:
+ /*
+ * sync = 2 asks for a lightweight synchronous mode:
+ * 3 async, 2 sync.
+ */
+ passes = 5;
+ break;
+ default:
+ BUG();
+ }
+
+ for(pass = 0; pass < passes && retry; pass++) {
retry = 0;

list_for_each_entry_safe(page, page2, from, lru) {
cond_resched();

- rc = unmap_and_move(get_new_page, private,
- page, pass > 2, offlining,
- sync);
+ rc = unmap_and_move(get_new_page, private, page,
+ pass > 2, offlining);

switch(rc) {
case -ENOMEM:
@@ -958,24 +969,44 @@ out:

int migrate_huge_pages(struct list_head *from,
new_page_t get_new_page, unsigned long private, bool offlining,
- bool sync)
+ int sync)
{
int retry = 1;
int nr_failed = 0;
- int pass = 0;
+ int pass, passes;
struct page *page;
struct page *page2;
int rc;

- for (pass = 0; pass < 10 && retry; pass++) {
+ switch (sync) {
+ case 0:
+ /* 3 async 0 sync */
+ passes = 3;
+ break;
+ case 1:
+ /* 3 async, 7 sync */
+ passes = 10;
+ break;
+ case 2:
+ /*
+ * sync = 2 asks for a lightweight synchronous mode:
+ * 3 async, 2 sync.
+ */
+ passes = 5;
+ break;
+ default:
+ BUG();
+ }
+
+ for (pass = 0; pass < passes && retry; pass++) {
retry = 0;

list_for_each_entry_safe(page, page2, from, lru) {
cond_resched();

- rc = unmap_and_move_huge_page(get_new_page,
- private, page, pass > 2, offlining,
- sync);
+ rc = unmap_and_move_huge_page(get_new_page, private,
+ page, pass > 2,
+ offlining);

switch(rc) {
case -ENOMEM:
@@ -1104,7 +1135,7 @@ set_status:
err = 0;
if (!list_empty(&pagelist)) {
err = migrate_pages(&pagelist, new_page_node,
- (unsigned long)pm, 0, true);
+ (unsigned long)pm, false, 1);
if (err)
putback_lru_pages(&pagelist);
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/