Re: [tip:x86/mm] x86: fix bootmem cross node for 32bit numa

From: Yinghai Lu
Date: Sun Mar 08 2009 - 19:16:26 EST


we may need this one (and two followings one) to be applied for
stable with 2.6.27, 2.6.28, 2.6.29.

YH

On Wed, Mar 4, 2009 at 1:00 PM, Yinghai Lu <yinghai@xxxxxxxxxx> wrote:
> Commit-ID:  a71edd1f46c8a599509bda478fb4eea27fb0da63
> Gitweb:     http://git.kernel.org/tip/a71edd1f46c8a599509bda478fb4eea27fb0da63
> Author:     "Yinghai Lu" <yinghai@xxxxxxxxxx>
> AuthorDate: Wed, 4 Mar 2009 01:22:35 -0800
> Commit:     Ingo Molnar <mingo@xxxxxxx>
> CommitDate: Wed, 4 Mar 2009 20:55:03 +0100
>
> x86: fix bootmem cross node for 32bit numa
>
> Impact: fix panic on system 2g x4 sockets
>
> Found one system with 4 sockets and every sockets has 2g can not boot
> with numa32 because boot mem is crossing nodes.
>
> So try to have numa version of setup_bootmem_allocator().
>
> Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxx>
> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
> LKML-Reference: <49AE485B.8000902@xxxxxxxxxx>
> Signed-off-by: Ingo Molnar <mingo@xxxxxxx>
>
>
> ---
>  arch/x86/mm/init_32.c |   46 ++++++++++++++++++++++++++++++++++++++++------
>  arch/x86/mm/numa_32.c |    5 +++--
>  2 files changed, 43 insertions(+), 8 deletions(-)
>
> diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
> index 917c4e6..67bdb59 100644
> --- a/arch/x86/mm/init_32.c
> +++ b/arch/x86/mm/init_32.c
> @@ -776,9 +776,37 @@ static void __init zone_sizes_init(void)
>        free_area_init_nodes(max_zone_pfns);
>  }
>
> +#ifdef CONFIG_NEED_MULTIPLE_NODES
> +static unsigned long __init setup_node_bootmem(int nodeid,
> +                                unsigned long start_pfn,
> +                                unsigned long end_pfn,
> +                                unsigned long bootmap)
> +{
> +       unsigned long bootmap_size;
> +
> +       if (start_pfn > max_low_pfn)
> +               return bootmap;
> +       if (end_pfn > max_low_pfn)
> +               end_pfn = max_low_pfn;
> +
> +       /* don't touch min_low_pfn */
> +       bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
> +                                        bootmap >> PAGE_SHIFT,
> +                                        start_pfn, end_pfn);
> +       printk(KERN_INFO "  node %d low ram: %08lx - %08lx\n",
> +               nodeid, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
> +       printk(KERN_INFO "  node %d bootmap %08lx - %08lx\n",
> +                nodeid, bootmap, bootmap + bootmap_size);
> +       free_bootmem_with_active_regions(nodeid, end_pfn);
> +       early_res_to_bootmem(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
> +
> +       return bootmap + bootmap_size;
> +}
> +#endif
> +
>  void __init setup_bootmem_allocator(void)
>  {
> -       int i;
> +       int nodeid;
>        unsigned long bootmap_size, bootmap;
>        /*
>         * Initialize the boot-time allocator (with low memory only):
> @@ -791,18 +819,24 @@ void __init setup_bootmem_allocator(void)
>                panic("Cannot find bootmem map of size %ld\n", bootmap_size);
>        reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
>
> -       /* don't touch min_low_pfn */
> -       bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
> -                                        min_low_pfn, max_low_pfn);
>        printk(KERN_INFO "  mapped low ram: 0 - %08lx\n",
>                 max_pfn_mapped<<PAGE_SHIFT);
>        printk(KERN_INFO "  low ram: %08lx - %08lx\n",
>                 min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT);
> +
> +#ifdef CONFIG_NEED_MULTIPLE_NODES
> +       for_each_online_node(nodeid)
> +               bootmap = setup_node_bootmem(nodeid, node_start_pfn[nodeid],
> +                                       node_end_pfn[nodeid], bootmap);
> +#else
> +       /* don't touch min_low_pfn */
> +       bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
> +                                        min_low_pfn, max_low_pfn);
>        printk(KERN_INFO "  bootmap %08lx - %08lx\n",
>                 bootmap, bootmap + bootmap_size);
> -       for_each_online_node(i)
> -               free_bootmem_with_active_regions(i, max_low_pfn);
> +       free_bootmem_with_active_regions(0, max_low_pfn);
>        early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
> +#endif
>
>        after_init_bootmem = 1;
>  }
> diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
> index 451fe95..3daefa0 100644
> --- a/arch/x86/mm/numa_32.c
> +++ b/arch/x86/mm/numa_32.c
> @@ -416,10 +416,11 @@ void __init initmem_init(unsigned long start_pfn,
>        for_each_online_node(nid)
>                propagate_e820_map_node(nid);
>
> -       for_each_online_node(nid)
> +       for_each_online_node(nid) {
>                memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
> +               NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
> +       }
>
> -       NODE_DATA(0)->bdata = &bootmem_node_data[0];
>        setup_bootmem_allocator();
>  }
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/