Re: [PATCH v2] vmscan: add trace events for lru_gen

From: T.J. Mercier
Date: Thu Sep 21 2023 - 16:21:35 EST


On Wed, Sep 20, 2023 at 11:19 PM Jaewon Kim <jaewon31.kim@xxxxxxxxxxx> wrote:
>
> As the legacy lru provides, the lru_gen needs some trace events for
> debugging.
>
Hi Jaewon, thanks for adding this.

> This commit introduces 2 trace events.
> trace_mm_vmscan_lru_gen_scan
> trace_mm_vmscan_lru_gen_evict
>
> Each event is similar to the following legacy events.
> trace_mm_vmscan_lru_isolate,
> trace_mm_vmscan_lru_shrink_[in]active
>
> Here's an example
> mm_vmscan_lru_gen_scan: isolate_mode=0 classzone=1 order=9 nr_requested=4096 nr_scanned=431 nr_skipped=0 nr_taken=55 lru=anon
> mm_vmscan_lru_gen_evict: nid=0 nr_reclaimed=42 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=13 nr_activate_file=0 nr_ref_keep=0 nr_unmap_fail=0 priority=2 flags=RECLAIM_WB_ANON|RECLAIM_WB_ASYNC
> mm_vmscan_lru_gen_scan: isolate_mode=0 classzone=1 order=9 nr_requested=4096 nr_scanned=66 nr_skipped=0 nr_taken=64 lru=file
> mm_vmscan_lru_gen_evict: nid=0 nr_reclaimed=62 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=2 nr_ref_keep=0 nr_unmap_fail=0 priority=2 flags=RECLAIM_WB_FILE|RECLAIM_WB_ASYNC
>
> Signed-off-by: Jaewon Kim <jaewon31.kim@xxxxxxxxxxx>
> ---
> v2: use condition and make it aligned
> v1: introduce trace events
> ---
> include/trace/events/mmflags.h | 5 ++
> include/trace/events/vmscan.h | 98 ++++++++++++++++++++++++++++++++++
> mm/vmscan.c | 17 ++++--
> 3 files changed, 115 insertions(+), 5 deletions(-)
>
> diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
> index 1478b9dd05fa..44e9b38f83e7 100644
> --- a/include/trace/events/mmflags.h
> +++ b/include/trace/events/mmflags.h
> @@ -274,6 +274,10 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \
> EM (LRU_ACTIVE_FILE, "active_file") \
> EMe(LRU_UNEVICTABLE, "unevictable")
>
> +#define LRU_GEN_NAMES \
> + EM (LRU_GEN_ANON, "anon") \
> + EMe(LRU_GEN_FILE, "file")
> +
> /*
> * First define the enums in the above macros to be exported to userspace
> * via TRACE_DEFINE_ENUM().
> @@ -288,6 +292,7 @@ COMPACTION_PRIORITY
> /* COMPACTION_FEEDBACK are defines not enums. Not needed here. */
> ZONE_TYPE
> LRU_NAMES
> +LRU_GEN_NAMES
>
> /*
> * Now redefine the EM() and EMe() macros to map the enums to the strings
> diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
> index d2123dd960d5..f0c3a4bd72db 100644
> --- a/include/trace/events/vmscan.h
> +++ b/include/trace/events/vmscan.h
> @@ -327,6 +327,57 @@ TRACE_EVENT(mm_vmscan_lru_isolate,
> __print_symbolic(__entry->lru, LRU_NAMES))
> );
>
> +TRACE_EVENT_CONDITION(mm_vmscan_lru_gen_scan,
> + TP_PROTO(int highest_zoneidx,
> + int order,
> + unsigned long nr_requested,
> + unsigned long nr_scanned,
> + unsigned long nr_skipped,
> + unsigned long nr_taken,
> + isolate_mode_t isolate_mode,
> + int lru),
> +
> + TP_ARGS(highest_zoneidx, order, nr_requested, nr_scanned, nr_skipped, nr_taken, isolate_mode, lru),
> +
> + TP_CONDITION(nr_scanned),
> +
> + TP_STRUCT__entry(
> + __field(int, highest_zoneidx)
> + __field(int, order)
> + __field(unsigned long, nr_requested)
> + __field(unsigned long, nr_scanned)
> + __field(unsigned long, nr_skipped)
> + __field(unsigned long, nr_taken)
> + __field(unsigned int, isolate_mode)
> + __field(int, lru)
> + ),
> +
> + TP_fast_assign(
> + __entry->highest_zoneidx = highest_zoneidx;
> + __entry->order = order;
> + __entry->nr_requested = nr_requested;
> + __entry->nr_scanned = nr_scanned;
> + __entry->nr_skipped = nr_skipped;
> + __entry->nr_taken = nr_taken;
> + __entry->isolate_mode = (__force unsigned int)isolate_mode;
> + __entry->lru = lru;
> + ),
> +
> + /*
> + * classzone is previous name of the highest_zoneidx.
> + * Reason not to change it is the ABI requirement of the tracepoint.
> + */
> + TP_printk("isolate_mode=%d classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s",
> + __entry->isolate_mode,
> + __entry->highest_zoneidx,
> + __entry->order,
> + __entry->nr_requested,
> + __entry->nr_scanned,
> + __entry->nr_skipped,
> + __entry->nr_taken,
> + __print_symbolic(__entry->lru, LRU_GEN_NAMES))
> +);
> +
> TRACE_EVENT(mm_vmscan_write_folio,
>
> TP_PROTO(struct folio *folio),
> @@ -437,6 +488,53 @@ TRACE_EVENT(mm_vmscan_lru_shrink_active,
> show_reclaim_flags(__entry->reclaim_flags))
> );
>
> +TRACE_EVENT(mm_vmscan_lru_gen_evict,
> +
> + TP_PROTO(int nid, unsigned long nr_reclaimed,
> + struct reclaim_stat *stat, int priority, int file),
> +
> + TP_ARGS(nid, nr_reclaimed, stat, priority, file),
> +
> + TP_STRUCT__entry(
> + __field(unsigned long, nr_reclaimed)
> + __field(unsigned long, nr_dirty)
> + __field(unsigned long, nr_writeback)
> + __field(unsigned long, nr_congested)
> + __field(unsigned long, nr_immediate)
> + __field(unsigned int, nr_activate0)
> + __field(unsigned int, nr_activate1)
> + __field(unsigned long, nr_ref_keep)
> + __field(unsigned long, nr_unmap_fail)
> + __field(int, nid)
> + __field(int, priority)
> + __field(int, reclaim_flags)
> + ),
> +
> + TP_fast_assign(
> + __entry->nid = nid;
> + __entry->nr_reclaimed = nr_reclaimed;
> + __entry->nr_dirty = stat->nr_dirty;
> + __entry->nr_writeback = stat->nr_writeback;
> + __entry->nr_congested = stat->nr_congested;
> + __entry->nr_immediate = stat->nr_immediate;
> + __entry->nr_activate0 = stat->nr_activate[0];
> + __entry->nr_activate1 = stat->nr_activate[1];
> + __entry->nr_ref_keep = stat->nr_ref_keep;
> + __entry->nr_unmap_fail = stat->nr_unmap_fail;
> + __entry->priority = priority;
> + __entry->reclaim_flags = trace_reclaim_flags(file);
> + ),
> +
> + TP_printk("nid=%d nr_reclaimed=%ld nr_dirty=%ld nr_writeback=%ld nr_congested=%ld nr_immediate=%ld nr_activate_anon=%d nr_activate_file=%d nr_ref_keep=%ld nr_unmap_fail=%ld priority=%d flags=%s",

Many of these values are unsigned so I think many of these format
specifiers should be %lu instead of %ld.

> + __entry->nid, __entry->nr_reclaimed,
> + __entry->nr_dirty, __entry->nr_writeback,
> + __entry->nr_congested, __entry->nr_immediate,
> + __entry->nr_activate0, __entry->nr_activate1,
> + __entry->nr_ref_keep, __entry->nr_unmap_fail,
> + __entry->priority,
> + show_reclaim_flags(__entry->reclaim_flags))
> +);
> +
> TRACE_EVENT(mm_vmscan_node_reclaim_begin,
>
> TP_PROTO(int nid, int order, gfp_t gfp_flags),
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 6f13394b112e..f453a0f8ceef 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -5005,6 +5005,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
> int sorted = 0;
> int scanned = 0;
> int isolated = 0;
> + int skipped = 0;
> int remaining = MAX_LRU_BATCH;
> struct lru_gen_folio *lrugen = &lruvec->lrugen;
> struct mem_cgroup *memcg = lruvec_memcg(lruvec);
> @@ -5018,7 +5019,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>
> for (i = MAX_NR_ZONES; i > 0; i--) {
> LIST_HEAD(moved);
> - int skipped = 0;
> + int skipped_zone = 0;
> int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES;
> struct list_head *head = &lrugen->folios[gen][type][zone];
>
> @@ -5040,16 +5041,17 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
> isolated += delta;
> } else {
> list_move(&folio->lru, &moved);
> - skipped += delta;
> + skipped_zone += delta;
> }
>
> - if (!--remaining || max(isolated, skipped) >= MIN_LRU_BATCH)
> + if (!--remaining || max(isolated, skipped_zone) >= MIN_LRU_BATCH)
> break;
> }
>
> - if (skipped) {
> + if (skipped_zone) {
> list_splice(&moved, head);
> - __count_zid_vm_events(PGSCAN_SKIP, zone, skipped);
> + __count_zid_vm_events(PGSCAN_SKIP, zone, skipped_zone);
> + skipped += skipped_zone;
> }
>
> if (!remaining || isolated >= MIN_LRU_BATCH)
> @@ -5065,6 +5067,9 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
> __count_memcg_events(memcg, PGREFILL, sorted);
> __count_vm_events(PGSCAN_ANON + type, isolated);
>
> + trace_mm_vmscan_lru_gen_scan(sc->reclaim_idx, sc->order, MAX_LRU_BATCH,
> + scanned, skipped, isolated,
> + sc->may_unmap ? 0 : ISOLATE_UNMAPPED, type);
> /*
> * There might not be eligible folios due to reclaim_idx. Check the
> * remaining to prevent livelock if it's not making progress.
> @@ -5194,6 +5199,8 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
> retry:
> reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false);
> sc->nr_reclaimed += reclaimed;
> + trace_mm_vmscan_lru_gen_evict(pgdat->node_id, reclaimed, &stat,
> + sc->priority, type);
>
> list_for_each_entry_safe_reverse(folio, next, &list, lru) {
> if (!folio_evictable(folio)) {
> --
> 2.17.1
>