Re: [PATCH RFC 3/4] mm/vmstat: rename pgdemote_* to pgdemote_dst_* and add pgdemote_src_*

From: Huang, Ying
Date: Thu Nov 02 2023 - 02:59:07 EST


"Zhijian Li (Fujitsu)" <lizhijian@xxxxxxxxxxx> writes:

> On 02/11/2023 13:45, Huang, Ying wrote:
>> Li Zhijian <lizhijian@xxxxxxxxxxx> writes:
>>
>>> pgdemote_src_*: pages demoted from this node.
>>> pgdemote_dst_*: pages demoted to this node.
>>>
>>> So that we are able to know their demotion per-node stats by checking this.
>>>
>>> In the environment, node0 and node1 are DRAM, node3 is PMEM.
>>>
>>> Global stats:
>>> $ grep -E 'demote' /proc/vmstat
>>> pgdemote_src_kswapd 130155
>>> pgdemote_src_direct 113497
>>> pgdemote_src_khugepaged 0
>>> pgdemote_dst_kswapd 130155
>>> pgdemote_dst_direct 113497
>>> pgdemote_dst_khugepaged 0
>>>
>>> Per-node stats:
>>> $ grep demote /sys/devices/system/node/node0/vmstat
>>> pgdemote_src_kswapd 68454
>>> pgdemote_src_direct 83431
>>> pgdemote_src_khugepaged 0
>>> pgdemote_dst_kswapd 0
>>> pgdemote_dst_direct 0
>>> pgdemote_dst_khugepaged 0
>>>
>>> $ grep demote /sys/devices/system/node/node1/vmstat
>>> pgdemote_src_kswapd 185834
>>> pgdemote_src_direct 30066
>>> pgdemote_src_khugepaged 0
>>> pgdemote_dst_kswapd 0
>>> pgdemote_dst_direct 0
>>> pgdemote_dst_khugepaged 0
>>>
>>> $ grep demote /sys/devices/system/node/node3/vmstat
>>> pgdemote_src_kswapd 0
>>> pgdemote_src_direct 0
>>> pgdemote_src_khugepaged 0
>>> pgdemote_dst_kswapd 254288
>>> pgdemote_dst_direct 113497
>>> pgdemote_dst_khugepaged 0
>>>
>>> From above stats, we know node3 is the demotion destination which one
>>> the node0 and node1 will demote to.
>>
>> Why do we need these information? Do you have some use case?
>
> I recall our customers have mentioned that they want to know how much the memory is demoted
> to the CXL memory device in a specific period.

This doesn't sound like a use case. Can you elaborate it? What can
only be tuned with the help of the added stats?

--
Best Regards,
Huang, Ying

>
>>>> mod_node_page_state(NODE_DATA(target_nid),
>>>> - PGDEMOTE_KSWAPD + reclaimer_offset(), nr_succeeded);
>>>> + PGDEMOTE_DST_KSWAPD + reclaimer_offset(), nr_succeeded);
>
> But if the *target_nid* is only indicate the preferred node, this accounting maybe not accurate.
>
>
> Thanks
> Zhijian
>
>>
>> --
>> Best Regards,
>> Huang, Ying
>>
>>> Signed-off-by: Li Zhijian <lizhijian@xxxxxxxxxxx>
>>> ---
>>> RFC: their names are open to discussion, maybe pgdemote_from/to_*
>>> Another defect of this patch is that, SUM(pgdemote_src_*) is always same
>>> as SUM(pgdemote_dst_*) in the global stats, shall we hide one of them.
>>> ---
>>> include/linux/mmzone.h | 9 ++++++---
>>> mm/vmscan.c | 13 ++++++++++---
>>> mm/vmstat.c | 9 ++++++---
>>> 3 files changed, 22 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
>>> index ad0309eea850..a6140d894bec 100644
>>> --- a/include/linux/mmzone.h
>>> +++ b/include/linux/mmzone.h
>>> @@ -207,9 +207,12 @@ enum node_stat_item {
>>> PGPROMOTE_SUCCESS, /* promote successfully */
>>> PGPROMOTE_CANDIDATE, /* candidate pages to promote */
>>> /* PGDEMOTE_*: pages demoted */
>>> - PGDEMOTE_KSWAPD,
>>> - PGDEMOTE_DIRECT,
>>> - PGDEMOTE_KHUGEPAGED,
>>> + PGDEMOTE_SRC_KSWAPD,
>>> + PGDEMOTE_SRC_DIRECT,
>>> + PGDEMOTE_SRC_KHUGEPAGED,
>>> + PGDEMOTE_DST_KSWAPD,
>>> + PGDEMOTE_DST_DIRECT,
>>> + PGDEMOTE_DST_KHUGEPAGED,
>>> #endif
>>> NR_VM_NODE_STAT_ITEMS
>>> };
>>> diff --git a/mm/vmscan.c b/mm/vmscan.c
>>> index 2f1fb4ec3235..55d2287d7150 100644
>>> --- a/mm/vmscan.c
>>> +++ b/mm/vmscan.c
>>> @@ -1111,13 +1111,18 @@ void drop_slab(void)
>>> static int reclaimer_offset(void)
>>> {
>>> BUILD_BUG_ON(PGSTEAL_DIRECT - PGSTEAL_KSWAPD !=
>>> - PGDEMOTE_DIRECT - PGDEMOTE_KSWAPD);
>>> + PGDEMOTE_SRC_DIRECT - PGDEMOTE_SRC_KSWAPD);
>>> BUILD_BUG_ON(PGSTEAL_DIRECT - PGSTEAL_KSWAPD !=
>>> PGSCAN_DIRECT - PGSCAN_KSWAPD);
>>> BUILD_BUG_ON(PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD !=
>>> - PGDEMOTE_KHUGEPAGED - PGDEMOTE_KSWAPD);
>>> + PGDEMOTE_SRC_KHUGEPAGED - PGDEMOTE_SRC_KSWAPD);
>>> BUILD_BUG_ON(PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD !=
>>> PGSCAN_KHUGEPAGED - PGSCAN_KSWAPD);
>>> + BUILD_BUG_ON(PGDEMOTE_SRC_DIRECT - PGDEMOTE_SRC_KSWAPD !=
>>> + PGDEMOTE_DST_DIRECT - PGDEMOTE_DST_KSWAPD);
>>> + BUILD_BUG_ON(PGDEMOTE_SRC_KHUGEPAGED - PGDEMOTE_SRC_KSWAPD !=
>>> + PGDEMOTE_DST_KHUGEPAGED - PGDEMOTE_DST_KSWAPD);
>>> +
>>>
>>> if (current_is_kswapd())
>>> return 0;
>>> @@ -1678,8 +1683,10 @@ static unsigned int demote_folio_list(struct list_head *demote_folios,
>>> (unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION,
>>> &nr_succeeded);
>>>
>>> + mod_node_page_state(pgdat,
>>> + PGDEMOTE_SRC_KSWAPD + reclaimer_offset(), nr_succeeded);
>>> mod_node_page_state(NODE_DATA(target_nid),
>>> - PGDEMOTE_KSWAPD + reclaimer_offset(), nr_succeeded);
>>> + PGDEMOTE_DST_KSWAPD + reclaimer_offset(), nr_succeeded);
>>>
>>> return nr_succeeded;
>>> }
>>> diff --git a/mm/vmstat.c b/mm/vmstat.c
>>> index f141c48c39e4..63f106a5e008 100644
>>> --- a/mm/vmstat.c
>>> +++ b/mm/vmstat.c
>>> @@ -1244,9 +1244,12 @@ const char * const vmstat_text[] = {
>>> #ifdef CONFIG_NUMA_BALANCING
>>> "pgpromote_success",
>>> "pgpromote_candidate",
>>> - "pgdemote_kswapd",
>>> - "pgdemote_direct",
>>> - "pgdemote_khugepaged",
>>> + "pgdemote_src_kswapd",
>>> + "pgdemote_src_direct",
>>> + "pgdemote_src_khugepaged",
>>> + "pgdemote_dst_kswapd",
>>> + "pgdemote_dst_direct",
>>> + "pgdemote_dst_khugepaged",
>>> #endif
>>>
>>> /* enum writeback_stat_item counters */