Re: [patch] mm, oom: remove gfp helper function

From: Michal Hocko
Date: Fri Dec 05 2014 - 09:05:47 EST


On Thu 04-12-14 15:19:05, Johannes Weiner wrote:
[...]
> How about the following? It changes the code flow to clarify what's
> actually going on there and gets rid of oom_gfp_allowed() altogether,
> instead of awkwardly trying to explain something that has no meaning.

Yes this makes a lot of sense.

> Btw, it looks like there is a bug with oom_killer_disabled, because it
> will return NULL for __GFP_NOFAIL.

Right! __GFP_NOFAIL allocation after oom is disabled cannot be
guaranteed.

> ---
> From: Johannes Weiner <hannes@xxxxxxxxxxx>
> Subject: [patch] mm: page_alloc: embed OOM killing naturally into allocation
> slowpath
>
> The OOM killing invocation does a lot of duplicative checks against
> the task's allocation context. Rework it to take advantage of the
> existing checks in the allocator slowpath.

Nice! Just document the __GFP_NOFAIL fix here as well, please.

> Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx>

Acked-by: Michal Hocko <mhocko@xxxxxxx>

I will rebase my oom vs pm-freezer patch
(http://marc.info/?l=linux-mm&m=141634503316543&w=2) which touches this
area on top of your patch.

Thanks!

> ---
> include/linux/oom.h | 5 ----
> mm/page_alloc.c | 80 +++++++++++++++++++++++------------------------------
> 2 files changed, 35 insertions(+), 50 deletions(-)
>
> diff --git a/include/linux/oom.h b/include/linux/oom.h
> index e8d6e1058723..4971874f54db 100644
> --- a/include/linux/oom.h
> +++ b/include/linux/oom.h
> @@ -85,11 +85,6 @@ static inline void oom_killer_enable(void)
> oom_killer_disabled = false;
> }
>
> -static inline bool oom_gfp_allowed(gfp_t gfp_mask)
> -{
> - return (gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY);
> -}
> -
> extern struct task_struct *find_lock_task_mm(struct task_struct *p);
>
> /* sysctls */
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 616a2c956b4b..2df99ca56e28 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2232,12 +2232,21 @@ static inline struct page *
> __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
> struct zonelist *zonelist, enum zone_type high_zoneidx,
> nodemask_t *nodemask, struct zone *preferred_zone,
> - int classzone_idx, int migratetype)
> + int classzone_idx, int migratetype, unsigned long *did_some_progress)
> {
> struct page *page;
>
> - /* Acquire the per-zone oom lock for each zone */
> + *did_some_progress = 0;
> +
> + if (oom_killer_disabled)
> + return NULL;
> +
> + /*
> + * Acquire the per-zone oom lock for each zone. If that
> + * fails, somebody else is making progress for us.
> + */
> if (!oom_zonelist_trylock(zonelist, gfp_mask)) {
> + *did_some_progress = 1;
> schedule_timeout_uninterruptible(1);
> return NULL;
> }
> @@ -2263,12 +2272,18 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
> goto out;
>
> if (!(gfp_mask & __GFP_NOFAIL)) {
> + /* Coredumps can quickly deplete all memory reserves */
> + if (current->flags & PF_DUMPCORE)
> + goto out;
> /* The OOM killer will not help higher order allocs */
> if (order > PAGE_ALLOC_COSTLY_ORDER)
> goto out;
> /* The OOM killer does not needlessly kill tasks for lowmem */
> if (high_zoneidx < ZONE_NORMAL)
> goto out;
> + /* The OOM killer does not compensate for light reclaim */
> + if (!(gfp_mask & __GFP_FS))
> + goto out;
> /*
> * GFP_THISNODE contains __GFP_NORETRY and we never hit this.
> * Sanity check for bare calls of __GFP_THISNODE, not real OOM.
> @@ -2281,7 +2296,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
> }
> /* Exhausted what can be done so it's blamo time */
> out_of_memory(zonelist, gfp_mask, order, nodemask, false);
> -
> + *did_some_progress = 1;
> out:
> oom_zonelist_unlock(zonelist, gfp_mask);
> return page;
> @@ -2571,7 +2586,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
> (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
> goto nopage;
>
> -restart:
> if (!(gfp_mask & __GFP_NO_KSWAPD))
> wake_all_kswapds(order, zonelist, high_zoneidx,
> preferred_zone, nodemask);
> @@ -2701,51 +2715,27 @@ rebalance:
> if (page)
> goto got_pg;
>
> - /*
> - * If we failed to make any progress reclaiming, then we are
> - * running out of options and have to consider going OOM
> - */
> - if (!did_some_progress) {
> - if (oom_gfp_allowed(gfp_mask)) {
> - if (oom_killer_disabled)
> - goto nopage;
> - /* Coredumps can quickly deplete all memory reserves */
> - if ((current->flags & PF_DUMPCORE) &&
> - !(gfp_mask & __GFP_NOFAIL))
> - goto nopage;
> - page = __alloc_pages_may_oom(gfp_mask, order,
> - zonelist, high_zoneidx,
> - nodemask, preferred_zone,
> - classzone_idx, migratetype);
> - if (page)
> - goto got_pg;
> -
> - if (!(gfp_mask & __GFP_NOFAIL)) {
> - /*
> - * The oom killer is not called for high-order
> - * allocations that may fail, so if no progress
> - * is being made, there are no other options and
> - * retrying is unlikely to help.
> - */
> - if (order > PAGE_ALLOC_COSTLY_ORDER)
> - goto nopage;
> - /*
> - * The oom killer is not called for lowmem
> - * allocations to prevent needlessly killing
> - * innocent tasks.
> - */
> - if (high_zoneidx < ZONE_NORMAL)
> - goto nopage;
> - }
> -
> - goto restart;
> - }
> - }
> -
> /* Check if we should retry the allocation */
> pages_reclaimed += did_some_progress;
> if (should_alloc_retry(gfp_mask, order, did_some_progress,
> pages_reclaimed)) {
> + /*
> + * If we fail to make progress by freeing individual
> + * pages, but the allocation wants us to keep going,
> + * start OOM killing tasks.
> + */
> + if (!did_some_progress) {
> + page = __alloc_pages_may_oom(gfp_mask, order, zonelist,
> + high_zoneidx, nodemask,
> + preferred_zone, classzone_idx,
> + migratetype,&did_some_progress);
> + if (page)
> + goto got_pg;
> + if (!did_some_progress) {
> + BUG_ON(gfp_mask & __GFP_NOFAIL);
> + goto nopage;
> + }
> + }
> /* Wait for some write requests to complete then retry */
> wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
> goto rebalance;
> --
> 2.1.3
>

--
Michal Hocko
SUSE Labs
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/