Subject: [PATCH] x86, 64bit: Map first 1M ram early before memblock_x86_fill() This one intend to fix bugs: memblock initial memory only have 128 entry, and some EFI system could have more entries than that. So during memblock_x86_fill need to double that array. and efi_reserve_boot_services() could make thing more worse. aka need more entries in memblock.memory.regions. For 64bit, We have low kernel mapping, and high kernel mapping. high kernel mapping is done early in head_64.S. low kernel mapping is done in init_memory_mapping. Now we have max_pfn_mapped actually is for high mapping, later when we need to get buff for the doubling memblock, we could get buf for that new array, but we can not use it. As __va() in double_array() will return virtual address with low kernel mapping. The patch will try to map first 1M range, and find early page table space from BRK. Also add max_pfn_high_mapped to track high mapped range, so we could keep initial max_pfn_mapped to 0 for 64bit. -v2: Update changelog and comments according to Pekka Enberg. Signed-off-by: Yinghai Lu --- arch/x86/include/asm/page_types.h | 1 arch/x86/include/asm/pgtable.h | 1 arch/x86/kernel/head64.c | 2 - arch/x86/kernel/setup.c | 2 + arch/x86/mm/init.c | 41 ++++++++++++++++++++++++++++++++++++-- arch/x86/mm/init_64.c | 9 +++++++- 6 files changed, 52 insertions(+), 4 deletions(-) Index: linux-2.6/arch/x86/include/asm/page_types.h =================================================================== --- linux-2.6.orig/arch/x86/include/asm/page_types.h +++ linux-2.6/arch/x86/include/asm/page_types.h @@ -45,6 +45,7 @@ extern int devmem_is_allowed(unsigned lo extern unsigned long max_low_pfn_mapped; extern unsigned long max_pfn_mapped; +extern unsigned long max_pfn_high_mapped; static inline phys_addr_t get_max_mapped(void) { Index: linux-2.6/arch/x86/include/asm/pgtable.h =================================================================== --- linux-2.6.orig/arch/x86/include/asm/pgtable.h +++ linux-2.6/arch/x86/include/asm/pgtable.h @@ -598,6 +598,7 @@ static inline int pgd_none(pgd_t pgd) extern int direct_gbpages; void init_mem_mapping(void); +void early_init_mem_mapping(void); /* local pte updates need not use xchg for locking */ static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) Index: linux-2.6/arch/x86/kernel/head64.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/head64.c +++ linux-2.6/arch/x86/kernel/head64.c @@ -76,7 +76,7 @@ void __init x86_64_start_kernel(char * r /* Make NULL pointers segfault */ zap_identity_mappings(); - max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; + max_pfn_high_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { #ifdef CONFIG_EARLY_PRINTK Index: linux-2.6/arch/x86/kernel/setup.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/setup.c +++ linux-2.6/arch/x86/kernel/setup.c @@ -896,6 +896,8 @@ void __init setup_arch(char **cmdline_p) reserve_ibft_region(); + early_init_mem_mapping(); + /* * Need to conclude brk, before memblock_x86_fill() * it could use memblock_find_in_range, could overlap with Index: linux-2.6/arch/x86/mm/init.c =================================================================== --- linux-2.6.orig/arch/x86/mm/init.c +++ linux-2.6/arch/x86/mm/init.c @@ -377,8 +377,6 @@ void __init init_mem_mapping(void) { unsigned long tables = 0, good_end, end; - probe_page_size_mask(); - /* * Find space for the kernel direct mapping tables. * @@ -437,6 +435,45 @@ void __init init_mem_mapping(void) early_memtest(0, max_pfn_mapped << PAGE_SHIFT); } +RESERVE_BRK(early_pgt_alloc, 65536); + +void __init early_init_mem_mapping(void) +{ + unsigned long tables; + phys_addr_t base; + unsigned long start = 0, end = ISA_END_ADDRESS; + + probe_page_size_mask(); + + /* + * 64bit at this point max_pfn_mapped should on 0 + * 32bit should have that correct set in head_32.S, aka non-zero. + */ + if (max_pfn_mapped) + return; + + tables = calculate_table_space_size(start, end); + base = __pa(extend_brk(tables, PAGE_SIZE)); + + pgt_buf_start = base >> PAGE_SHIFT; + pgt_buf_end = pgt_buf_start; + pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); + + printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx] prealloc\n", + end - 1, pgt_buf_start << PAGE_SHIFT, + (pgt_buf_top << PAGE_SHIFT) - 1); + + init_memory_mapping(start, end); + + printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx] final\n", + end - 1, pgt_buf_start << PAGE_SHIFT, + (pgt_buf_end << PAGE_SHIFT) - 1); + /* return not used brk */ + _brk_end -= (pgt_buf_top - pgt_buf_end) << PAGE_SHIFT; + + pgt_buf_top = 0; +} + /* * devmem_is_allowed() checks to see if /dev/mem access to a certain address * is valid. The argument is a physical page number. Index: linux-2.6/arch/x86/mm/init_64.c =================================================================== --- linux-2.6.orig/arch/x86/mm/init_64.c +++ linux-2.6/arch/x86/mm/init_64.c @@ -286,6 +286,9 @@ void __init init_extra_mapping_uc(unsign __init_extra_mapping(phys, size, PAGE_KERNEL_LARGE_NOCACHE); } +/* max_pfn_high_mapped: highest mapped pfn with high kernel mapping */ +unsigned long max_pfn_high_mapped; + /* * The head.S code sets up the kernel high mapping: * @@ -302,7 +305,8 @@ void __init init_extra_mapping_uc(unsign void __init cleanup_highmap(void) { unsigned long vaddr = __START_KERNEL_map; - unsigned long vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT); + unsigned long vaddr_end = __START_KERNEL_map + + (max_pfn_high_mapped << PAGE_SHIFT); unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1; pmd_t *pmd = level2_kernel_pgt; @@ -312,6 +316,9 @@ void __init cleanup_highmap(void) if (vaddr < (unsigned long) _text || vaddr > end) set_pmd(pmd, __pmd(0)); } + max_pfn_high_mapped = __pa(end) >> PAGE_SHIFT; + + pr_info("max_pfn_high_mapped: %lx\n", max_pfn_high_mapped); } static __ref void *alloc_low_page(unsigned long *phys)