Re: [PATCH v3 2/2] mm: memcg: introduce new event to trace shrink_memcg

From: Shakeel Butt
Date: Sat Nov 25 2023 - 01:36:41 EST


On Thu, Nov 23, 2023 at 10:39:37PM +0300, Dmitry Rokosov wrote:
> The shrink_memcg flow plays a crucial role in memcg reclamation.
> Currently, it is not possible to trace this point from non-direct
> reclaim paths. However, direct reclaim has its own tracepoint, so there
> is no issue there. In certain cases, when debugging memcg pressure,
> developers may need to identify all potential requests for memcg
> reclamation including kswapd(). The patchset introduces the tracepoints
> mm_vmscan_memcg_shrink_{begin|end}() to address this problem.
>
> Example of output in the kswapd context (non-direct reclaim):
> kswapd0-39 [001] ..... 240.356378: mm_vmscan_memcg_shrink_begin: order=0 gfp_flags=GFP_KERNEL memcg=16
> kswapd0-39 [001] ..... 240.356396: mm_vmscan_memcg_shrink_end: nr_reclaimed=0 memcg=16
> kswapd0-39 [001] ..... 240.356420: mm_vmscan_memcg_shrink_begin: order=0 gfp_flags=GFP_KERNEL memcg=16
> kswapd0-39 [001] ..... 240.356454: mm_vmscan_memcg_shrink_end: nr_reclaimed=1 memcg=16
> kswapd0-39 [001] ..... 240.356479: mm_vmscan_memcg_shrink_begin: order=0 gfp_flags=GFP_KERNEL memcg=16
> kswapd0-39 [001] ..... 240.356506: mm_vmscan_memcg_shrink_end: nr_reclaimed=4 memcg=16
> kswapd0-39 [001] ..... 240.356525: mm_vmscan_memcg_shrink_begin: order=0 gfp_flags=GFP_KERNEL memcg=16
> kswapd0-39 [001] ..... 240.356593: mm_vmscan_memcg_shrink_end: nr_reclaimed=11 memcg=16
> kswapd0-39 [001] ..... 240.356614: mm_vmscan_memcg_shrink_begin: order=0 gfp_flags=GFP_KERNEL memcg=16
> kswapd0-39 [001] ..... 240.356738: mm_vmscan_memcg_shrink_end: nr_reclaimed=25 memcg=16
> kswapd0-39 [001] ..... 240.356790: mm_vmscan_memcg_shrink_begin: order=0 gfp_flags=GFP_KERNEL memcg=16
> kswapd0-39 [001] ..... 240.357125: mm_vmscan_memcg_shrink_end: nr_reclaimed=53 memcg=16
>
> Signed-off-by: Dmitry Rokosov <ddrokosov@xxxxxxxxxxxxxxxxx>
> ---
> include/trace/events/vmscan.h | 22 ++++++++++++++++++++++
> mm/vmscan.c | 7 +++++++
> 2 files changed, 29 insertions(+)
>
> diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
> index e9093fa1c924..a4686afe571d 100644
> --- a/include/trace/events/vmscan.h
> +++ b/include/trace/events/vmscan.h
> @@ -180,6 +180,17 @@ DEFINE_EVENT(mm_vmscan_memcg_reclaim_begin_template, mm_vmscan_memcg_softlimit_r
> TP_ARGS(order, gfp_flags, memcg)
> );
>
> +DEFINE_EVENT(mm_vmscan_memcg_reclaim_begin_template, mm_vmscan_memcg_shrink_begin,
> +
> + TP_PROTO(int order, gfp_t gfp_flags, const struct mem_cgroup *memcg),
> +
> + TP_ARGS(order, gfp_flags, memcg)
> +);
> +
> +#else
> +
> +#define trace_mm_vmscan_memcg_shrink_begin(...)
> +
> #endif /* CONFIG_MEMCG */
>
> DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_end_template,
> @@ -243,6 +254,17 @@ DEFINE_EVENT(mm_vmscan_memcg_reclaim_end_template, mm_vmscan_memcg_softlimit_rec
> TP_ARGS(nr_reclaimed, memcg)
> );
>
> +DEFINE_EVENT(mm_vmscan_memcg_reclaim_end_template, mm_vmscan_memcg_shrink_end,
> +
> + TP_PROTO(unsigned long nr_reclaimed, const struct mem_cgroup *memcg),
> +
> + TP_ARGS(nr_reclaimed, memcg)
> +);
> +
> +#else
> +
> +#define trace_mm_vmscan_memcg_shrink_end(...)
> +
> #endif /* CONFIG_MEMCG */
>
> TRACE_EVENT(mm_shrink_slab_start,
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 45780952f4b5..f7e3ddc5a7ad 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -6461,6 +6461,10 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
> */
> cond_resched();
>
> + trace_mm_vmscan_memcg_shrink_begin(sc->order,
> + sc->gfp_mask,
> + memcg);
> +

If you place the start of the trace here, you may have only the begin
trace for memcgs whose usage are below their min or low limits. Is that
fine? Otherwise you can put it just before shrink_lruvec() call.

> mem_cgroup_calculate_protection(target_memcg, memcg);
>
> if (mem_cgroup_below_min(target_memcg, memcg)) {
> @@ -6491,6 +6495,9 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
> shrink_slab(sc->gfp_mask, pgdat->node_id, memcg,
> sc->priority);
>
> + trace_mm_vmscan_memcg_shrink_end(sc->nr_reclaimed - reclaimed,
> + memcg);
> +
> /* Record the group's reclaim efficiency */
> if (!sc->proactive)
> vmpressure(sc->gfp_mask, memcg, false,
> --
> 2.36.0
>