Re: [V181,22/54] x86/cpu_entry_area: Move it out of fixmap

From: Andrei Vagin
Date: Thu Dec 21 2017 - 21:46:56 EST


Hi Thomas,

The kernel with this patch doesn't boot, if CONFIG_KASAN is set:
[ 0.000000] Linux version 4.14.0-00142-g8604322546c0 (avagin@laptop) (gcc version 7.2.1 20170915 (Red Hat 7.2.1-2) (GCC)) #11 SMP Thu Dec 21 18:38:44 PST 2017
[ 0.000000] Command line: root=/dev/vda2 ro debug console=ttyS0,115200 LANG=en_US.UTF-8 slub_debug=FZP raid=noautodetect selinux=0 earlyprintk=serial,ttyS0,115200
[ 0.000000] x86/fpu: Supporting XSAVE feature 0x001: 'x87 floating point registers'
[ 0.000000] x86/fpu: Supporting XSAVE feature 0x002: 'SSE registers'
[ 0.000000] x86/fpu: Supporting XSAVE feature 0x004: 'AVX registers'
[ 0.000000] x86/fpu: Supporting XSAVE feature 0x008: 'MPX bounds registers'
[ 0.000000] x86/fpu: Supporting XSAVE feature 0x010: 'MPX CSR'
[ 0.000000] x86/fpu: xstate_offset[2]: 576, xstate_sizes[2]: 256
[ 0.000000] x86/fpu: xstate_offset[3]: 832, xstate_sizes[3]: 64
[ 0.000000] x86/fpu: xstate_offset[4]: 896, xstate_sizes[4]: 64
[ 0.000000] x86/fpu: Enabled xstate features 0x1f, context size is 960 bytes, using 'compacted' format.
[ 0.000000] e820: BIOS-provided physical RAM map:
[ 0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000009fbff] usable
[ 0.000000] BIOS-e820: [mem 0x000000000009fc00-0x000000000009ffff] reserved
[ 0.000000] BIOS-e820: [mem 0x00000000000f0000-0x00000000000fffff] reserved
[ 0.000000] BIOS-e820: [mem 0x0000000000100000-0x000000007ffd8fff] usable
[ 0.000000] BIOS-e820: [mem 0x000000007ffd9000-0x000000007fffffff] reserved
[ 0.000000] BIOS-e820: [mem 0x00000000feffc000-0x00000000feffffff] reserved
[ 0.000000] BIOS-e820: [mem 0x00000000fffc0000-0x00000000ffffffff] reserved
[ 0.000000] bootconsole [earlyser0] enabled
[ 0.000000] NX (Execute Disable) protection: active
[ 0.000000] random: fast init done
[ 0.000000] SMBIOS 2.8 present.
[ 0.000000] DMI: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc26 04/01/2014
[ 0.000000] Hypervisor detected: KVM
[ 0.000000] tsc: Fast TSC calibration using PIT
[ 0.000000] e820: update [mem 0x00000000-0x00000fff] usable ==> reserved
[ 0.000000] e820: remove [mem 0x000a0000-0x000fffff] usable
[ 0.000000] e820: last_pfn = 0x7ffd9 max_arch_pfn = 0x400000000
[ 0.000000] MTRR default type: write-back
[ 0.000000] MTRR fixed ranges enabled:
[ 0.000000] 00000-9FFFF write-back
[ 0.000000] A0000-BFFFF uncachable
[ 0.000000] C0000-FFFFF write-protect
[ 0.000000] MTRR variable ranges enabled:
[ 0.000000] 0 base 0080000000 mask FF80000000 uncachable
[ 0.000000] 1 disabled
[ 0.000000] 2 disabled
[ 0.000000] 3 disabled
[ 0.000000] 4 disabled
[ 0.000000] 5 disabled
[ 0.000000] 6 disabled
[ 0.000000] 7 disabled
[ 0.000000] x86/PAT: Configuration [0-7]: WB WC UC- UC WB WP UC- WT
[ 0.000000] found SMP MP-table at [mem 0x000f6bd0-0x000f6bdf] mapped at [ffffffffff200bd0]
[ 0.000000] Base memory trampoline at [ffff880000099000] 99000 size 24576
[ 0.000000] Using GB pages for direct mapping
[ 0.000000] BRK [0x5bf4e000, 0x5bf4efff] PGTABLE
[ 0.000000] BRK [0x5bf4f000, 0x5bf4ffff] PGTABLE
[ 0.000000] BRK [0x5bf50000, 0x5bf50fff] PGTABLE
[ 0.000000] BRK [0x5bf51000, 0x5bf51fff] PGTABLE
[ 0.000000] BRK [0x5bf52000, 0x5bf52fff] PGTABLE
[ 0.000000] ACPI: Early table checksum verification disabled
[ 0.000000] ACPI: RSDP 0x00000000000F69C0 000014 (v00 BOCHS )
[ 0.000000] ACPI: RSDT 0x000000007FFE12FF 00002C (v01 BOCHS BXPCRSDT 00000001 BXPC 00000001)
[ 0.000000] ACPI: FACP 0x000000007FFE120B 000074 (v01 BOCHS BXPCFACP 00000001 BXPC 00000001)
[ 0.000000] ACPI: DSDT 0x000000007FFE0040 0011CB (v01 BOCHS BXPCDSDT 00000001 BXPC 00000001)
[ 0.000000] ACPI: FACS 0x000000007FFE0000 000040
[ 0.000000] ACPI: APIC 0x000000007FFE127F 000080 (v01 BOCHS BXPCAPIC 00000001 BXPC 00000001)
[ 0.000000] ACPI: Local APIC address 0xfee00000
[ 0.000000] No NUMA configuration found
[ 0.000000] Faking a node at [mem 0x0000000000000000-0x000000007ffd8fff]
[ 0.000000] NODE_DATA(0) allocated [mem 0x7ffc2000-0x7ffd8fff]
[ 0.000000] kvm-clock: Using msrs 4b564d01 and 4b564d00
[ 0.000000] kvm-clock: cpu 0, msr 0:7ffc1001, primary cpu clock
[ 0.000000] kvm-clock: using sched offset of 137192604594 cycles
[ 0.000000] clocksource: kvm-clock: mask: 0xffffffffffffffff max_cycles: 0x1cd42e4dffb, max_idle_ns: 881590591483 ns
[ 0.000000] Zone ranges:
[ 0.000000] DMA [mem 0x0000000000001000-0x0000000000ffffff]
[ 0.000000] DMA32 [mem 0x0000000001000000-0x000000007ffd8fff]
[ 0.000000] Normal empty
[ 0.000000] Device empty
[ 0.000000] Movable zone start for each node
[ 0.000000] Early memory node ranges
[ 0.000000] node 0: [mem 0x0000000000001000-0x000000000009efff]
[ 0.000000] node 0: [mem 0x0000000000100000-0x000000007ffd8fff]
[ 0.000000] Initmem setup node 0 [mem 0x0000000000001000-0x000000007ffd8fff]
[ 0.000000] On node 0 totalpages: 524151
[ 0.000000] DMA zone: 64 pages used for memmap
[ 0.000000] DMA zone: 21 pages reserved
[ 0.000000] DMA zone: 3998 pages, LIFO batch:0
[ 0.000000] DMA32 zone: 8128 pages used for memmap
[ 0.000000] DMA32 zone: 520153 pages, LIFO batch:31

And then it starts booting again...

On Wed, Dec 20, 2017 at 10:35:25PM +0100, Thomas Gleixner wrote:
> Put the cpu_entry_area into a separate p4d entry. The fixmap gets too bug
> and 0-day already hit a case where the fixmap ptes were cleared by
> cleanup_highmap().
>
> Aside of that the fixmap API is a pain as it's all backwards.
>
> Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> ---
> Documentation/x86/x86_64/mm.txt | 2 +
> arch/x86/include/asm/cpu_entry_area.h | 24 ++++++++++++-
> arch/x86/include/asm/desc.h | 1
> arch/x86/include/asm/fixmap.h | 32 -----------------
> arch/x86/include/asm/pgtable_32_types.h | 15 ++++++--
> arch/x86/include/asm/pgtable_64_types.h | 47 +++++++++++++++-----------
> arch/x86/kernel/dumpstack.c | 1
> arch/x86/kernel/traps.c | 5 +-
> arch/x86/mm/cpu_entry_area.c | 57 +++++++++++++++++++++++---------
> arch/x86/mm/dump_pagetables.c | 6 ++-
> arch/x86/mm/init_32.c | 6 +++
> arch/x86/mm/kasan_init_64.c | 6 ++-
> arch/x86/mm/pgtable_32.c | 1
> arch/x86/xen/mmu_pv.c | 2 -
> 14 files changed, 128 insertions(+), 77 deletions(-)
>
> --- a/Documentation/x86/x86_64/mm.txt
> +++ b/Documentation/x86/x86_64/mm.txt
> @@ -12,6 +12,7 @@ ffffea0000000000 - ffffeaffffffffff (=40
> ... unused hole ...
> ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
> ... unused hole ...
> +fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
> ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
> ... unused hole ...
> ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
> @@ -35,6 +36,7 @@ ffd4000000000000 - ffd5ffffffffffff (=49
> ... unused hole ...
> ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
> ... unused hole ...
> +fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
> ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
> ... unused hole ...
> ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
> --- a/arch/x86/include/asm/cpu_entry_area.h
> +++ b/arch/x86/include/asm/cpu_entry_area.h
> @@ -43,10 +43,32 @@ struct cpu_entry_area {
> };
>
> #define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
> -#define CPU_ENTRY_AREA_PAGES (CPU_ENTRY_AREA_SIZE / PAGE_SIZE)
> +#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
>
> DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
>
> extern void setup_cpu_entry_areas(void);
> +extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
> +
> +#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
> +#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
> +
> +#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
> +
> +#define CPU_ENTRY_AREA_MAP_SIZE \
> + (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
> +
> +static inline struct cpu_entry_area *get_cpu_entry_area(int cpu)
> +{
> + unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
> + BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
> +
> + return (struct cpu_entry_area *) va;
> +}
> +
> +static inline struct entry_stack *cpu_entry_stack(int cpu)
> +{
> + return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
> +}
>
> #endif
> --- a/arch/x86/include/asm/desc.h
> +++ b/arch/x86/include/asm/desc.h
> @@ -7,6 +7,7 @@
> #include <asm/mmu.h>
> #include <asm/fixmap.h>
> #include <asm/irq_vectors.h>
> +#include <asm/cpu_entry_area.h>
>
> #include <linux/smp.h>
> #include <linux/percpu.h>
> --- a/arch/x86/include/asm/fixmap.h
> +++ b/arch/x86/include/asm/fixmap.h
> @@ -25,7 +25,6 @@
> #else
> #include <uapi/asm/vsyscall.h>
> #endif
> -#include <asm/cpu_entry_area.h>
>
> /*
> * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall
> @@ -84,7 +83,6 @@ enum fixed_addresses {
> FIX_IO_APIC_BASE_0,
> FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
> #endif
> - FIX_RO_IDT, /* Virtual mapping for read-only IDT */
> #ifdef CONFIG_X86_32
> FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
> FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
> @@ -100,9 +98,6 @@ enum fixed_addresses {
> #ifdef CONFIG_X86_INTEL_MID
> FIX_LNW_VRTC,
> #endif
> - /* Fixmap entries to remap the GDTs, one per processor. */
> - FIX_CPU_ENTRY_AREA_TOP,
> - FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + (CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1,
>
> #ifdef CONFIG_ACPI_APEI_GHES
> /* Used for GHES mapping from assorted contexts */
> @@ -143,7 +138,7 @@ enum fixed_addresses {
> extern void reserve_top_address(unsigned long reserve);
>
> #define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
> -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
> +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
>
> extern int fixmaps_set;
>
> @@ -191,30 +186,5 @@ void __init *early_memremap_decrypted_wp
> void __early_set_fixmap(enum fixed_addresses idx,
> phys_addr_t phys, pgprot_t flags);
>
> -static inline unsigned int __get_cpu_entry_area_page_index(int cpu, int page)
> -{
> - BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
> -
> - return FIX_CPU_ENTRY_AREA_BOTTOM - cpu*CPU_ENTRY_AREA_PAGES - page;
> -}
> -
> -#define __get_cpu_entry_area_offset_index(cpu, offset) ({ \
> - BUILD_BUG_ON(offset % PAGE_SIZE != 0); \
> - __get_cpu_entry_area_page_index(cpu, offset / PAGE_SIZE); \
> - })
> -
> -#define get_cpu_entry_area_index(cpu, field) \
> - __get_cpu_entry_area_offset_index((cpu), offsetof(struct cpu_entry_area, field))
> -
> -static inline struct cpu_entry_area *get_cpu_entry_area(int cpu)
> -{
> - return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0));
> -}
> -
> -static inline struct entry_stack *cpu_entry_stack(int cpu)
> -{
> - return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
> -}
> -
> #endif /* !__ASSEMBLY__ */
> #endif /* _ASM_X86_FIXMAP_H */
> --- a/arch/x86/include/asm/pgtable_32_types.h
> +++ b/arch/x86/include/asm/pgtable_32_types.h
> @@ -38,13 +38,22 @@ extern bool __vmalloc_start_set; /* set
> #define LAST_PKMAP 1024
> #endif
>
> -#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1)) \
> - & PMD_MASK)
> +/*
> + * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
> + * to avoid include recursion hell
> + */
> +#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40)
> +
> +#define CPU_ENTRY_AREA_BASE \
> + ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK)
> +
> +#define PKMAP_BASE \
> + ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
>
> #ifdef CONFIG_HIGHMEM
> # define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
> #else
> -# define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE)
> +# define VMALLOC_END (CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE)
> #endif
>
> #define MODULES_VADDR VMALLOC_START
> --- a/arch/x86/include/asm/pgtable_64_types.h
> +++ b/arch/x86/include/asm/pgtable_64_types.h
> @@ -76,32 +76,41 @@ typedef struct { pteval_t pte; } pte_t;
> #define PGDIR_MASK (~(PGDIR_SIZE - 1))
>
> /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
> -#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
> +#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
> +
> #ifdef CONFIG_X86_5LEVEL
> -#define VMALLOC_SIZE_TB _AC(16384, UL)
> -#define __VMALLOC_BASE _AC(0xff92000000000000, UL)
> -#define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
> +# define VMALLOC_SIZE_TB _AC(16384, UL)
> +# define __VMALLOC_BASE _AC(0xff92000000000000, UL)
> +# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
> #else
> -#define VMALLOC_SIZE_TB _AC(32, UL)
> -#define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
> -#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
> +# define VMALLOC_SIZE_TB _AC(32, UL)
> +# define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
> +# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
> #endif
> +
> #ifdef CONFIG_RANDOMIZE_MEMORY
> -#define VMALLOC_START vmalloc_base
> -#define VMEMMAP_START vmemmap_base
> +# define VMALLOC_START vmalloc_base
> +# define VMEMMAP_START vmemmap_base
> #else
> -#define VMALLOC_START __VMALLOC_BASE
> -#define VMEMMAP_START __VMEMMAP_BASE
> +# define VMALLOC_START __VMALLOC_BASE
> +# define VMEMMAP_START __VMEMMAP_BASE
> #endif /* CONFIG_RANDOMIZE_MEMORY */
> -#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
> -#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
> +
> +#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
> +
> +#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
> /* The module sections ends with the start of the fixmap */
> -#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1)
> -#define MODULES_LEN (MODULES_END - MODULES_VADDR)
> -#define ESPFIX_PGD_ENTRY _AC(-2, UL)
> -#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
> -#define EFI_VA_START ( -4 * (_AC(1, UL) << 30))
> -#define EFI_VA_END (-68 * (_AC(1, UL) << 30))
> +#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1)
> +#define MODULES_LEN (MODULES_END - MODULES_VADDR)
> +
> +#define ESPFIX_PGD_ENTRY _AC(-2, UL)
> +#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
> +
> +#define CPU_ENTRY_AREA_PGD _AC(-3, UL)
> +#define CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT)
> +
> +#define EFI_VA_START ( -4 * (_AC(1, UL) << 30))
> +#define EFI_VA_END (-68 * (_AC(1, UL) << 30))
>
> #define EARLY_DYNAMIC_PAGE_TABLES 64
>
> --- a/arch/x86/kernel/dumpstack.c
> +++ b/arch/x86/kernel/dumpstack.c
> @@ -18,6 +18,7 @@
> #include <linux/nmi.h>
> #include <linux/sysfs.h>
>
> +#include <asm/cpu_entry_area.h>
> #include <asm/stacktrace.h>
> #include <asm/unwind.h>
>
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -951,8 +951,9 @@ void __init trap_init(void)
> * "sidt" instruction will not leak the location of the kernel, and
> * to defend the IDT against arbitrary memory write vulnerabilities.
> * It will be reloaded in cpu_init() */
> - __set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO);
> - idt_descr.address = fix_to_virt(FIX_RO_IDT);
> + cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table),
> + PAGE_KERNEL_RO);
> + idt_descr.address = CPU_ENTRY_AREA_RO_IDT;
>
> /*
> * Should be a barrier for any external CPU state:
> --- a/arch/x86/mm/cpu_entry_area.c
> +++ b/arch/x86/mm/cpu_entry_area.c
> @@ -13,11 +13,18 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char,
> [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
> #endif
>
> +void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
> +{
> + unsigned long va = (unsigned long) cea_vaddr;
> +
> + set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags));
> +}
> +
> static void __init
> -set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
> +cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
> {
> - for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
> - __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
> + for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE)
> + cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
> }
>
> /* Setup the fixmap mappings only once per-processor */
> @@ -45,10 +52,12 @@ static void __init setup_cpu_entry_area(
> pgprot_t tss_prot = PAGE_KERNEL;
> #endif
>
> - __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
> - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page),
> - per_cpu_ptr(&entry_stack_storage, cpu), 1,
> - PAGE_KERNEL);
> + cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu),
> + gdt_prot);
> +
> + cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page,
> + per_cpu_ptr(&entry_stack_storage, cpu), 1,
> + PAGE_KERNEL);
>
> /*
> * The Intel SDM says (Volume 3, 7.2.1):
> @@ -70,10 +79,9 @@ static void __init setup_cpu_entry_area(
> BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
> offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
> BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
> - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
> - &per_cpu(cpu_tss_rw, cpu),
> - sizeof(struct tss_struct) / PAGE_SIZE,
> - tss_prot);
> + cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss,
> + &per_cpu(cpu_tss_rw, cpu),
> + sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
>
> #ifdef CONFIG_X86_32
> per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
> @@ -83,20 +91,37 @@ static void __init setup_cpu_entry_area(
> BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
> BUILD_BUG_ON(sizeof(exception_stacks) !=
> sizeof(((struct cpu_entry_area *)0)->exception_stacks));
> - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
> - &per_cpu(exception_stacks, cpu),
> - sizeof(exception_stacks) / PAGE_SIZE,
> - PAGE_KERNEL);
> + cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks,
> + &per_cpu(exception_stacks, cpu),
> + sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
>
> - __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
> + cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline,
> __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
> #endif
> }
>
> +static __init void setup_cpu_entry_area_ptes(void)
> +{
> +#ifdef CONFIG_X86_32
> + unsigned long start, end;
> +
> + BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE);
> + BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
> +
> + start = CPU_ENTRY_AREA_BASE;
> + end = start + CPU_ENTRY_AREA_MAP_SIZE;
> +
> + for (; start < end; start += PMD_SIZE)
> + populate_extra_pte(start);
> +#endif
> +}
> +
> void __init setup_cpu_entry_areas(void)
> {
> unsigned int cpu;
>
> + setup_cpu_entry_area_ptes();
> +
> for_each_possible_cpu(cpu)
> setup_cpu_entry_area(cpu);
> }
> --- a/arch/x86/mm/dump_pagetables.c
> +++ b/arch/x86/mm/dump_pagetables.c
> @@ -58,6 +58,7 @@ enum address_markers_idx {
> KASAN_SHADOW_START_NR,
> KASAN_SHADOW_END_NR,
> #endif
> + CPU_ENTRY_AREA_NR,
> #ifdef CONFIG_X86_ESPFIX64
> ESPFIX_START_NR,
> #endif
> @@ -81,6 +82,7 @@ static struct addr_marker address_marker
> [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
> [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" },
> #endif
> + [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
> #ifdef CONFIG_X86_ESPFIX64
> [ESPFIX_START_NR] = { ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
> #endif
> @@ -104,6 +106,7 @@ enum address_markers_idx {
> #ifdef CONFIG_HIGHMEM
> PKMAP_BASE_NR,
> #endif
> + CPU_ENTRY_AREA_NR,
> FIXADDR_START_NR,
> END_OF_SPACE_NR,
> };
> @@ -116,6 +119,7 @@ static struct addr_marker address_marker
> #ifdef CONFIG_HIGHMEM
> [PKMAP_BASE_NR] = { 0UL, "Persistent kmap() Area" },
> #endif
> + [CPU_ENTRY_AREA_NR] = { 0UL, "CPU entry area" },
> [FIXADDR_START_NR] = { 0UL, "Fixmap area" },
> [END_OF_SPACE_NR] = { -1, NULL }
> };
> @@ -541,8 +545,8 @@ static int __init pt_dump_init(void)
> address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
> # endif
> address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
> + address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE;
> #endif
> -
> return 0;
> }
> __initcall(pt_dump_init);
> --- a/arch/x86/mm/init_32.c
> +++ b/arch/x86/mm/init_32.c
> @@ -50,6 +50,7 @@
> #include <asm/setup.h>
> #include <asm/set_memory.h>
> #include <asm/page_types.h>
> +#include <asm/cpu_entry_area.h>
> #include <asm/init.h>
>
> #include "mm_internal.h"
> @@ -766,6 +767,7 @@ void __init mem_init(void)
> mem_init_print_info(NULL);
> printk(KERN_INFO "virtual kernel memory layout:\n"
> " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
> + " cpu_entry : 0x%08lx - 0x%08lx (%4ld kB)\n"
> #ifdef CONFIG_HIGHMEM
> " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
> #endif
> @@ -777,6 +779,10 @@ void __init mem_init(void)
> FIXADDR_START, FIXADDR_TOP,
> (FIXADDR_TOP - FIXADDR_START) >> 10,
>
> + CPU_ENTRY_AREA_BASE,
> + CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE,
> + CPU_ENTRY_AREA_MAP_SIZE >> 10,
> +
> #ifdef CONFIG_HIGHMEM
> PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
> (LAST_PKMAP*PAGE_SIZE) >> 10,
> --- a/arch/x86/mm/kasan_init_64.c
> +++ b/arch/x86/mm/kasan_init_64.c
> @@ -15,6 +15,7 @@
> #include <asm/tlbflush.h>
> #include <asm/sections.h>
> #include <asm/pgtable.h>
> +#include <asm/cpu_entry_area.h>
>
> extern struct range pfn_mapped[E820_MAX_ENTRIES];
>
> @@ -330,12 +331,13 @@ void __init kasan_init(void)
> (unsigned long)kasan_mem_to_shadow(_end),
> early_pfn_to_nid(__pa(_stext)));
>
> - shadow_cpu_entry_begin = (void *)__fix_to_virt(FIX_CPU_ENTRY_AREA_BOTTOM);
> + shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE;
> shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
> shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
> PAGE_SIZE);
>
> - shadow_cpu_entry_end = (void *)(__fix_to_virt(FIX_CPU_ENTRY_AREA_TOP) + PAGE_SIZE);
> + shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE +
> + CPU_ENTRY_AREA_TOT_SIZE);
> shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
> shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
> PAGE_SIZE);
> --- a/arch/x86/mm/pgtable_32.c
> +++ b/arch/x86/mm/pgtable_32.c
> @@ -10,6 +10,7 @@
> #include <linux/pagemap.h>
> #include <linux/spinlock.h>
>
> +#include <asm/cpu_entry_area.h>
> #include <asm/pgtable.h>
> #include <asm/pgalloc.h>
> #include <asm/fixmap.h>
> --- a/arch/x86/xen/mmu_pv.c
> +++ b/arch/x86/xen/mmu_pv.c
> @@ -2261,7 +2261,6 @@ static void xen_set_fixmap(unsigned idx,
>
> switch (idx) {
> case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
> - case FIX_RO_IDT:
> #ifdef CONFIG_X86_32
> case FIX_WP_TEST:
> # ifdef CONFIG_HIGHMEM
> @@ -2272,7 +2271,6 @@ static void xen_set_fixmap(unsigned idx,
> #endif
> case FIX_TEXT_POKE0:
> case FIX_TEXT_POKE1:
> - case FIX_CPU_ENTRY_AREA_TOP ... FIX_CPU_ENTRY_AREA_BOTTOM:
> /* All local page mappings */
> pte = pfn_pte(phys, prot);
> break;