Re: [RESEND PATCH V3] NUMA:Improve the efficiency of calculating pages loss

From: Mike Rapoport
Date: Mon Aug 14 2023 - 12:01:12 EST


On Fri, Aug 04, 2023 at 11:32:51PM +0800, Liam Ni wrote:
> Optimize the way of calculating missing pages.
>
> In the previous implementation, We calculate missing pages as follows:
> 1. calculate numaram by traverse all the numa_meminfo's and for each of
> them traverse all the regions in memblock.memory to prepare for
> counting missing pages.
>
> 2. Traverse all the regions in memblock.memory again to get e820ram.
>
> 3. the missing page is (e820ram - numaram )
>
> But,it's enough to count memory in ‘memblock.memory’ that doesn't have
> the node assigned.
>
> V2:https://lore.kernel.org/all/20230619075315.49114-1-zhiguangni01@xxxxxxxxx/
> V1:https://lore.kernel.org/all/20230615142016.419570-1-zhiguangni01@xxxxxxxxx/
>
> Signed-off-by: Liam Ni <zhiguangni01@xxxxxxxxx>
> ---
> arch/loongarch/kernel/numa.c | 23 ++++++++---------------
> arch/x86/mm/numa.c | 26 +++++++-------------------
> include/linux/mm.h | 1 +
> mm/mm_init.c | 20 ++++++++++++++++++++
> 4 files changed, 36 insertions(+), 34 deletions(-)
>
> diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c
> index 708665895b47..0239891e4d19 100644
> --- a/arch/loongarch/kernel/numa.c
> +++ b/arch/loongarch/kernel/numa.c
> @@ -262,25 +262,18 @@ static void __init node_mem_init(unsigned int node)
> * Sanity check to catch more bad NUMA configurations (they are amazingly
> * common). Make sure the nodes cover all memory.
> */
> -static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
> +static bool __init memblock_validate_numa_coverage(const u64 limit)

There is no need to have arch specific memblock_validate_numa_coverage().
You can add this function to memblock and call it from NUMA initialization
instead of numa_meminfo_cover_memory().

The memblock_validate_numa_coverage() will count all the pages without node
ID set and compare to the threshold provided by the architectures.

> {
> - int i;
> - u64 numaram, biosram;
> + u64 lo_pg;
>
> - numaram = 0;
> - for (i = 0; i < mi->nr_blks; i++) {
> - u64 s = mi->blk[i].start >> PAGE_SHIFT;
> - u64 e = mi->blk[i].end >> PAGE_SHIFT;
> + lo_pg = max_pfn - calculate_without_node_pages_in_range();
>
> - numaram += e - s;
> - numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
> - if ((s64)numaram < 0)
> - numaram = 0;
> + /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
> + if (lo_pg >= limit) {
> + pr_err("NUMA: We lost 1m size page.\n");
> + return false;
> }
> - max_pfn = max_low_pfn;
> - biosram = max_pfn - absent_pages_in_range(0, max_pfn);
>
> - BUG_ON((s64)(biosram - numaram) >= (1 << (20 - PAGE_SHIFT)));
> return true;
> }
>
> @@ -428,7 +421,7 @@ int __init init_numa_memory(void)
> return -EINVAL;
>
> init_node_memblock();
> - if (numa_meminfo_cover_memory(&numa_meminfo) == false)
> + if (memblock_validate_numa_coverage(SZ_1M) == false)
> return -EINVAL;
>
> for_each_node_mask(node, node_possible_map) {
> diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
> index 2aadb2019b4f..14feec144675 100644
> --- a/arch/x86/mm/numa.c
> +++ b/arch/x86/mm/numa.c
> @@ -451,30 +451,18 @@ EXPORT_SYMBOL(__node_distance);
> * Sanity check to catch more bad NUMA configurations (they are amazingly
> * common). Make sure the nodes cover all memory.
> */
> -static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
> +static bool __init memblock_validate_numa_coverage(const u64 limit)
> {
> - u64 numaram, e820ram;
> - int i;
> + u64 lo_pg;
>
> - numaram = 0;
> - for (i = 0; i < mi->nr_blks; i++) {
> - u64 s = mi->blk[i].start >> PAGE_SHIFT;
> - u64 e = mi->blk[i].end >> PAGE_SHIFT;
> - numaram += e - s;
> - numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
> - if ((s64)numaram < 0)
> - numaram = 0;
> - }
> -
> - e820ram = max_pfn - absent_pages_in_range(0, max_pfn);
> + lo_pg = max_pfn - calculate_without_node_pages_in_range();
>
> /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
> - if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
> - printk(KERN_ERR "NUMA: nodes only cover %LuMB of your
> %LuMB e820 RAM. Not used.\n",
> - (numaram << PAGE_SHIFT) >> 20,
> - (e820ram << PAGE_SHIFT) >> 20);
> + if (lo_pg >= limit) {
> + pr_err("NUMA: We lost 1m size page.\n");
> return false;
> }
> +
> return true;
> }
>
> @@ -583,7 +571,7 @@ static int __init numa_register_memblks(struct
> numa_meminfo *mi)
> return -EINVAL;
> }
> }
> - if (!numa_meminfo_cover_memory(mi))
> + if (!memblock_validate_numa_coverage(SZ_1M))
> return -EINVAL;
>
> /* Finally register nodes. */
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 0daef3f2f029..b32457ad1ae3 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3043,6 +3043,7 @@ unsigned long __absent_pages_in_range(int nid,
> unsigned long start_pfn,
> unsigned long end_pfn);
> extern unsigned long absent_pages_in_range(unsigned long start_pfn,
> unsigned long end_pfn);
> +extern unsigned long calculate_without_node_pages_in_range(void);
> extern void get_pfn_range_for_nid(unsigned int nid,
> unsigned long *start_pfn, unsigned long *end_pfn);
>
> diff --git a/mm/mm_init.c b/mm/mm_init.c
> index 3ddd18a89b66..13a4883787e3 100644
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -1132,6 +1132,26 @@ static void __init
> adjust_zone_range_for_zone_movable(int nid,
> }
> }
>
> +/**
> + * @start_pfn: The start PFN to start searching for holes
> + * @end_pfn: The end PFN to stop searching for holes
> + *
> + * Return: Return the number of page frames without node assigned
> within a range.
> + */
> +unsigned long __init calculate_without_node_pages_in_range(void)
> +{
> + unsigned long num_pages;
> + unsigned long start_pfn, end_pfn;
> + int nid, i;
> +
> + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
> + if (nid == NUMA_NO_NODE)
> + num_pages += end_pfn - start_pfn;
> + }
> +
> + return num_pages;
> +}
> +
> /*
> * Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
> * then all holes in the requested range will be accounted for.
> --
> 2.25.1

--
Sincerely yours,
Mike.