Re: [PATCH 4.14 023/159] mm/sparsemem: Allocate mem_section at runtime for CONFIG_SPARSEMEM_EXTREME=y

From: Dan Rue
Date: Fri Dec 22 2017 - 09:18:20 EST


On Fri, Dec 22, 2017 at 09:45:08AM +0100, Greg Kroah-Hartman wrote:
> 4.14-stable review patch. If anyone has any objections, please let me know.
>
> ------------------
>
> From: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
>
> commit 83e3c48729d9ebb7af5a31a504f3fd6aff0348c4 upstream.
>
> Size of the mem_section[] array depends on the size of the physical address space.
>
> In preparation for boot-time switching between paging modes on x86-64
> we need to make the allocation of mem_section[] dynamic, because otherwise
> we waste a lot of RAM: with CONFIG_NODE_SHIFT=10, mem_section[] size is 32kB
> for 4-level paging and 2MB for 5-level paging mode.
>
> The patch allocates the array on the first call to sparse_memory_present_with_active_regions().
>
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
> Cc: Andy Lutomirski <luto@xxxxxxxxxxxxxx>
> Cc: Borislav Petkov <bp@xxxxxxx>
> Cc: Cyrill Gorcunov <gorcunov@xxxxxxxxxx>
> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> Cc: linux-mm@xxxxxxxxx
> Link: http://lkml.kernel.org/r/20170929140821.37654-2-kirill.shutemov@xxxxxxxxxxxxxxx
> Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>

This patch causes a boot failure on arm64.

Please drop this patch, or pick up the fix in:

commit 629a359bdb0e0652a8227b4ff3125431995fec6e
Author: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
Date: Tue Nov 7 11:33:37 2017 +0300

mm/sparsemem: Fix ARM64 boot crash when CONFIG_SPARSEMEM_EXTREME=y

See https://www.mail-archive.com/linux-kernel@xxxxxxxxxxxxxxx/msg1527427.html

>
> ---
> include/linux/mmzone.h | 6 +++++-
> mm/page_alloc.c | 10 ++++++++++
> mm/sparse.c | 17 +++++++++++------
> 3 files changed, 26 insertions(+), 7 deletions(-)
>
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -1152,13 +1152,17 @@ struct mem_section {
> #define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1)
>
> #ifdef CONFIG_SPARSEMEM_EXTREME
> -extern struct mem_section *mem_section[NR_SECTION_ROOTS];
> +extern struct mem_section **mem_section;
> #else
> extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
> #endif
>
> static inline struct mem_section *__nr_to_section(unsigned long nr)
> {
> +#ifdef CONFIG_SPARSEMEM_EXTREME
> + if (!mem_section)
> + return NULL;
> +#endif
> if (!mem_section[SECTION_NR_TO_ROOT(nr)])
> return NULL;
> return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -5651,6 +5651,16 @@ void __init sparse_memory_present_with_a
> unsigned long start_pfn, end_pfn;
> int i, this_nid;
>
> +#ifdef CONFIG_SPARSEMEM_EXTREME
> + if (!mem_section) {
> + unsigned long size, align;
> +
> + size = sizeof(struct mem_section) * NR_SECTION_ROOTS;
> + align = 1 << (INTERNODE_CACHE_SHIFT);
> + mem_section = memblock_virt_alloc(size, align);
> + }
> +#endif
> +
> for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid)
> memory_present(this_nid, start_pfn, end_pfn);
> }
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -23,8 +23,7 @@
> * 1) mem_section - memory sections, mem_map's for valid memory
> */
> #ifdef CONFIG_SPARSEMEM_EXTREME
> -struct mem_section *mem_section[NR_SECTION_ROOTS]
> - ____cacheline_internodealigned_in_smp;
> +struct mem_section **mem_section;
> #else
> struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
> ____cacheline_internodealigned_in_smp;
> @@ -101,7 +100,7 @@ static inline int sparse_index_init(unsi
> int __section_nr(struct mem_section* ms)
> {
> unsigned long root_nr;
> - struct mem_section* root;
> + struct mem_section *root = NULL;
>
> for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
> root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
> @@ -112,7 +111,7 @@ int __section_nr(struct mem_section* ms)
> break;
> }
>
> - VM_BUG_ON(root_nr == NR_SECTION_ROOTS);
> + VM_BUG_ON(!root);
>
> return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
> }
> @@ -330,11 +329,17 @@ again:
> static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
> {
> unsigned long usemap_snr, pgdat_snr;
> - static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
> - static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
> + static unsigned long old_usemap_snr;
> + static unsigned long old_pgdat_snr;
> struct pglist_data *pgdat = NODE_DATA(nid);
> int usemap_nid;
>
> + /* First call */
> + if (!old_usemap_snr) {
> + old_usemap_snr = NR_MEM_SECTIONS;
> + old_pgdat_snr = NR_MEM_SECTIONS;
> + }
> +
> usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
> pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
> if (usemap_snr == pgdat_snr)
>
>