[PATCH] mm, oom: protect !costly allocations some more

From: Michal Hocko
Date: Wed Mar 09 2016 - 04:57:42 EST


should_reclaim_retry will give up retries for higher order allocations
if none of the eligible zones has any requested or higher order pages
available even if we pass the watermak check for order-0. This is done
because there is no guarantee that the reclaimable and currently free
pages will form the required order.

This can, however, lead to situations were the high-order request (e.g.
order-2 required for the stack allocation during fork) will trigger
OOM too early - e.g. after the first reclaim/compaction round. Such a
system would have to be highly fragmented and there is no guarantee
further reclaim/compaction attempts would help but at least make sure
that the compaction was active before we go OOM and keep retrying even
if should_reclaim_retry tells us to oom if
- the last compaction round was either inactive (deferred,
skipped or bailed out early due to contention) or
- we haven't completed at least MAX_COMPACT_RETRIES successful
(either COMPACT_PARTIAL or COMPACT_COMPLETE) compaction
rounds.

The first rule ensures that the very last attempt for compaction
was ignored while the second guarantees that the compaction has done
some work. Multiple retries might be needed to prevent occasional
pigggy packing of other contexts to steal the compacted pages while
the current context manages to retry to allocate them.

If the given number of successful retries is not sufficient for a
reasonable workloads we should focus on the collected compaction
tracepoints data and try to address the issue in the compaction code.
If this is not feasible we can increase the retries limit.

Signed-off-by: Michal Hocko <mhocko@xxxxxxxx>
---
include/linux/compaction.h | 10 +++++++
mm/page_alloc.c | 68 +++++++++++++++++++++++++++++++++++-----------
2 files changed, 62 insertions(+), 16 deletions(-)

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index b167801187e7..7d028ccf440a 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -61,6 +61,12 @@ extern void compaction_defer_reset(struct zone *zone, int order,
bool alloc_success);
extern bool compaction_restarting(struct zone *zone, int order);

+static inline bool compaction_made_progress(enum compact_result result)
+{
+ return (compact_result > COMPACT_SKIPPED &&
+ compact_result < COMPACT_NO_SUITABLE_PAGE)
+}
+
#else
static inline enum compact_result try_to_compact_pages(gfp_t gfp_mask,
unsigned int order, int alloc_flags,
@@ -93,6 +99,10 @@ static inline bool compaction_deferred(struct zone *zone, int order)
return true;
}

+static inline bool compaction_made_progress(enum compact_result result)
+{
+ return false;
+}
#endif /* CONFIG_COMPACTION */

#if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4acc0aa1aee0..5f1fc3793836 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2813,34 +2813,33 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
return page;
}

+
+/*
+ * Maximum number of compaction retries wit a progress before OOM
+ * killer is consider as the only way to move forward.
+ */
+#define MAX_COMPACT_RETRIES 16
+
#ifdef CONFIG_COMPACTION
/* Try memory compaction for high-order allocations before reclaim */
static struct page *
__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
int alloc_flags, const struct alloc_context *ac,
enum migrate_mode mode, int *contended_compaction,
- bool *deferred_compaction)
+ enum compact_result *compact_result)
{
- enum compact_result compact_result;
struct page *page;

if (!order)
return NULL;

current->flags |= PF_MEMALLOC;
- compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
+ *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
mode, contended_compaction);
current->flags &= ~PF_MEMALLOC;

- switch (compact_result) {
- case COMPACT_DEFERRED:
- *deferred_compaction = true;
- /* fall-through */
- case COMPACT_SKIPPED:
+ if (*compact_result <= COMPACT_SKIPPED)
return NULL;
- default:
- break;
- }

/*
* At least in one zone compaction wasn't deferred or skipped, so let's
@@ -2870,15 +2869,44 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,

return NULL;
}
+
+static inline bool
+should_compact_retry(unsigned int order, enum compact_result compact_result,
+ int contended_compaction, int compaction_retries)
+{
+ /*
+ * !costly allocations are really important and we have to make sure
+ * the compaction wasn't deferred or didn't bail out early due to locks
+ * contention before we go OOM. Still cap the reclaim retry loops with
+ * progress to prevent from looping forever and potential trashing.
+ */
+ if (order && order <= PAGE_ALLOC_COSTLY_ORDER) {
+ if (compact_result <= COMPACT_SKIPPED)
+ return true;
+ if (contended_compaction > COMPACT_CONTENDED_NONE)
+ return true;
+ if (compaction_retries <= MAX_COMPACT_RETRIES)
+ return true;
+ }
+
+ return false;
+}
#else
static inline struct page *
__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
int alloc_flags, const struct alloc_context *ac,
enum migrate_mode mode, int *contended_compaction,
- bool *deferred_compaction)
+ enum compact_result *compact_result)
{
return NULL;
}
+
+static inline bool
+should_compact_retry(unsigned int order, enum compact_result compact_result,
+ int contended_compaction, int compaction_retries)
+{
+ return false;
+}
#endif /* CONFIG_COMPACTION */

/* Perform direct synchronous page reclaim */
@@ -3118,7 +3146,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
int alloc_flags;
unsigned long did_some_progress;
enum migrate_mode migration_mode = MIGRATE_ASYNC;
- bool deferred_compaction = false;
+ enum compact_result compact_result;
+ int compaction_retries = 0;
int contended_compaction = COMPACT_CONTENDED_NONE;
int no_progress_loops = 0;

@@ -3227,10 +3256,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
page = __alloc_pages_direct_compact(gfp_mask, order, alloc_flags, ac,
migration_mode,
&contended_compaction,
- &deferred_compaction);
+ &compact_result);
if (page)
goto got_pg;

+ if (order && compaction_made_progress(compact_result))
+ compaction_retries++;
+
/* Checks for THP-specific high-order allocations */
if (is_thp_gfp_mask(gfp_mask)) {
/*
@@ -3240,7 +3272,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
* to heavily disrupt the system, so we fail the allocation
* instead of entering direct reclaim.
*/
- if (deferred_compaction)
+ if (compact_result == COMPACT_DEFERRED)
goto nopage;

/*
@@ -3294,6 +3326,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
did_some_progress > 0, no_progress_loops))
goto retry;

+ if (should_compact_retry(order, compact_result, contended_compaction,
+ compaction_retries))
+ goto retry;
+
/* Reclaim has failed us, start killing things */
page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress);
if (page)
@@ -3314,7 +3350,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
page = __alloc_pages_direct_compact(gfp_mask, order, alloc_flags,
ac, migration_mode,
&contended_compaction,
- &deferred_compaction);
+ &compact_result);
if (page)
goto got_pg;
nopage:
--
2.7.0

--
Michal Hocko
SUSE Labs