[PATCH] Add slowpath enter/exit trace events

From: peter.enderborg
Date: Thu Nov 23 2017 - 05:44:39 EST


From: Peter Enderborg <peter.enderborg@xxxxxxxx>

The warning of slow allocation has been removed, this is
a other way to fetch that information. But you need
to enable the trace. The exit function also returns
information about the number of retries, how long
it was stalled and failure reason if that happened.

Signed-off-by: Peter Enderborg <peter.enderborg@xxxxxxxx>
---
include/trace/events/kmem.h | 68 +++++++++++++++++++++++++++++++++++++++++++++
mm/page_alloc.c | 62 +++++++++++++++++++++++++++++++----------
2 files changed, 116 insertions(+), 14 deletions(-)

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index eb57e30..bb882ca 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -315,6 +315,74 @@ TRACE_EVENT(mm_page_alloc_extfrag,
__entry->change_ownership)
);

+TRACE_EVENT(mm_page_alloc_slowpath_enter,
+
+ TP_PROTO(int alloc_order,
+ nodemask_t *nodemask,
+ gfp_t gfp_flags),
+
+ TP_ARGS(alloc_order, nodemask, gfp_flags),
+
+ TP_STRUCT__entry(
+ __field(int, alloc_order)
+ __field(nodemask_t *, nodemask)
+ __field(gfp_t, gfp_flags)
+ ),
+
+ TP_fast_assign(
+ __entry->alloc_order = alloc_order;
+ __entry->nodemask = nodemask;
+ __entry->gfp_flags = gfp_flags;
+ ),
+
+ TP_printk("alloc_order=%d nodemask=%*pbl gfp_flags=%s",
+ __entry->alloc_order,
+ nodemask_pr_args(__entry->nodemask),
+ show_gfp_flags(__entry->gfp_flags))
+);
+
+TRACE_EVENT(mm_page_alloc_slowpath_exit,
+
+ TP_PROTO(struct page *page,
+ int alloc_order,
+ nodemask_t *nodemask,
+ u64 alloc_start,
+ gfp_t gfp_flags,
+ int retrys,
+ int exit),
+
+ TP_ARGS(page, alloc_order, nodemask, alloc_start, gfp_flags,
+ retrys, exit),
+
+ TP_STRUCT__entry(__field(struct page *, page)
+ __field(int, alloc_order)
+ __field(nodemask_t *, nodemask)
+ __field(u64, msdelay)
+ __field(gfp_t, gfp_flags)
+ __field(int, retrys)
+ __field(int, exit)
+ ),
+
+ TP_fast_assign(
+ __entry->page = page;
+ __entry->alloc_order = alloc_order;
+ __entry->nodemask = nodemask;
+ __entry->msdelay = jiffies_to_msecs(jiffies-alloc_start);
+ __entry->gfp_flags = gfp_flags;
+ __entry->retrys = retrys;
+ __entry->exit = exit;
+ ),
+
+ TP_printk("page=%p alloc_order=%d nodemask=%*pbl msdelay=%llu gfp_flags=%s retrys=%d exit=%d",
+ __entry->page,
+ __entry->alloc_order,
+ nodemask_pr_args(__entry->nodemask),
+ __entry->msdelay,
+ show_gfp_flags(__entry->gfp_flags),
+ __entry->retrys,
+ __entry->exit)
+);
+
#endif /* _TRACE_KMEM_H */

/* This part must be outside protection */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 48b5b01..bae9cb9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -104,6 +104,17 @@ DEFINE_PER_CPU(struct work_struct, pcpu_drain);
volatile unsigned long latent_entropy __latent_entropy;
EXPORT_SYMBOL(latent_entropy);
#endif
+enum slowpath_exit {
+ SLOWPATH_NOZONE = -16,
+ SLOWPATH_COMPACT_DEFERRED,
+ SLOWPATH_CAN_NOT_DIRECT_RECLAIM,
+ SLOWPATH_RECURSION,
+ SLOWPATH_NO_RETRY,
+ SLOWPATH_COSTLY_ORDER,
+ SLOWPATH_OOM_VICTIM,
+ SLOWPATH_NO_DIRECT_RECLAIM,
+ SLOWPATH_ORDER
+};

/*
* Array of node states.
@@ -3908,8 +3919,15 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
enum compact_result compact_result;
int compaction_retries;
int no_progress_loops;
+ unsigned long alloc_start = jiffies;
unsigned int cpuset_mems_cookie;
int reserve_flags;
+ enum slowpath_exit slowpath_exit;
+ int retry_count = 0;
+
+ trace_mm_page_alloc_slowpath_enter(order,
+ ac->nodemask,
+ gfp_mask);

/*
* In the slowpath, we sanity check order to avoid ever trying to
@@ -3919,7 +3937,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
*/
if (order >= MAX_ORDER) {
WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN));
- return NULL;
+ slowpath_exit = SLOWPATH_ORDER;
+ goto fail;
}

/*
@@ -3951,8 +3970,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
*/
ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
ac->high_zoneidx, ac->nodemask);
- if (!ac->preferred_zoneref->zone)
+ if (!ac->preferred_zoneref->zone) {
+ slowpath_exit = SLOWPATH_NOZONE;
goto nopage;
+ }

if (gfp_mask & __GFP_KSWAPD_RECLAIM)
wake_all_kswapds(order, ac);
@@ -3998,8 +4019,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
* system, so we fail the allocation instead of entering
* direct reclaim.
*/
- if (compact_result == COMPACT_DEFERRED)
+ if (compact_result == COMPACT_DEFERRED) {
+ slowpath_exit = SLOWPATH_COMPACT_DEFERRED;
goto nopage;
+ }

/*
* Looks like reclaim/compaction is worth trying, but
@@ -4011,6 +4034,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
}

retry:
+ retry_count++;
/* Ensure kswapd doesn't accidentally go to sleep as long as we loop */
if (gfp_mask & __GFP_KSWAPD_RECLAIM)
wake_all_kswapds(order, ac);
@@ -4036,13 +4060,16 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
goto got_pg;

/* Caller is not willing to reclaim, we can't balance anything */
- if (!can_direct_reclaim)
+ if (!can_direct_reclaim) {
+ slowpath_exit = SLOWPATH_CAN_NOT_DIRECT_RECLAIM;
goto nopage;
+ }

/* Avoid recursion of direct reclaim */
- if (current->flags & PF_MEMALLOC)
+ if (current->flags & PF_MEMALLOC) {
+ slowpath_exit = SLOWPATH_RECURSION;
goto nopage;
-
+ }
/* Try direct reclaim and then allocating */
page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac,
&did_some_progress);
@@ -4056,16 +4083,18 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
goto got_pg;

/* Do not loop if specifically requested */
- if (gfp_mask & __GFP_NORETRY)
+ if (gfp_mask & __GFP_NORETRY) {
+ slowpath_exit = SLOWPATH_NO_RETRY;
goto nopage;
-
+ }
/*
* Do not retry costly high order allocations unless they are
* __GFP_RETRY_MAYFAIL
*/
- if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL))
+ if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL)) {
+ slowpath_exit = SLOWPATH_COSTLY_ORDER;
goto nopage;
-
+ }
if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
did_some_progress > 0, &no_progress_loops))
goto retry;
@@ -4095,9 +4124,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
/* Avoid allocations with no watermarks from looping endlessly */
if (tsk_is_oom_victim(current) &&
(alloc_flags == ALLOC_OOM ||
- (gfp_mask & __GFP_NOMEMALLOC)))
+ (gfp_mask & __GFP_NOMEMALLOC))) {
+ slowpath_exit = SLOWPATH_OOM_VICTIM;
goto nopage;
-
+ }
/* Retry as long as the OOM killer is making progress */
if (did_some_progress) {
no_progress_loops = 0;
@@ -4118,9 +4148,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
* All existing users of the __GFP_NOFAIL are blockable, so warn
* of any new users that actually require GFP_NOWAIT
*/
- if (WARN_ON_ONCE(!can_direct_reclaim))
+ if (WARN_ON_ONCE(!can_direct_reclaim)) {
+ slowpath_exit = SLOWPATH_NO_DIRECT_RECLAIM;
goto fail;
-
+ }
/*
* PF_MEMALLOC request from this context is rather bizarre
* because we cannot reclaim anything and only can loop waiting
@@ -4153,6 +4184,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
warn_alloc(gfp_mask, ac->nodemask,
"page allocation failure: order:%u", order);
got_pg:
+ trace_mm_page_alloc_slowpath_exit(page, order, ac->nodemask,
+ alloc_start, gfp_mask, retry_count, slowpath_exit);
+
return page;
}

--
2.7.4