[tip:x86/pti] x86/kaslr: Fix the vaddr_end mess

From: tip-bot for Thomas Gleixner
Date: Thu Jan 04 2018 - 17:16:43 EST


Commit-ID: 1b3ef54207f068dae9c36d891ff69dd4d37c5c2f
Gitweb: https://git.kernel.org/tip/1b3ef54207f068dae9c36d891ff69dd4d37c5c2f
Author: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
AuthorDate: Thu, 4 Jan 2018 12:32:03 +0100
Committer: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
CommitDate: Thu, 4 Jan 2018 23:04:57 +0100

x86/kaslr: Fix the vaddr_end mess

vaddr_end for KASLR is only documented in the KASLR code itself and is
adjusted depending on config options. So it's not surprising that a change
of the memory layout causes KASLR to have the wrong vaddr_end. This can map
arbitrary stuff into other areas causing hard to understand problems.

Remove the whole ifdef magic and define the start of the cpu_entry_area to
be the end of the KASLR vaddr range.

Add documentation to that effect.

Fixes: 92a0f81d8957 ("x86/cpu_entry_area: Move it out of the fixmap")
Reported-by: Benjamin Gilbert <benjamin.gilbert@xxxxxxxxxx>
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Tested-by: Benjamin Gilbert <benjamin.gilbert@xxxxxxxxxx>
Cc: Andy Lutomirski <luto@xxxxxxxxxx>
Cc: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
Cc: stable <stable@xxxxxxxxxxxxxxx>
Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Thomas Garnier <thgarnie@xxxxxxxxxx>,
Cc: Alexander Kuleshov <kuleshovmail@xxxxxxxxx>
Link: alpine.DEB.2.20.1801041320360.1771@nanos">https://lkml.kernel.org/r/alpine.DEB.2.20.1801041320360.1771@nanos
---
Documentation/x86/x86_64/mm.txt | 6 ++++++
arch/x86/include/asm/pgtable_64_types.h | 8 +++++++-
arch/x86/mm/kaslr.c | 34 ++++++++++-----------------------
3 files changed, 23 insertions(+), 25 deletions(-)

diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index f7dabe1..ea91cb6 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -12,6 +12,7 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
... unused hole ...
ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
... unused hole ...
+ vaddr_end for KASLR
fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
fffffe8000000000 - fffffeffffffffff (=39 bits) LDT remap for PTI
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
@@ -37,6 +38,7 @@ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
... unused hole ...
ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
... unused hole ...
+ vaddr_end for KASLR
fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
... unused hole ...
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
@@ -71,3 +73,7 @@ during EFI runtime calls.
Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
physical memory, vmalloc/ioremap space and virtual memory map are randomized.
Their order is preserved but their base will be offset early at boot time.
+
+Be very careful vs. KASLR when changing anything here. The KASLR address
+range must not overlap with anything except the KASAN shadow area, which is
+correct as KASAN disables KASLR.
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 61b4b60..6b8f73d 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -75,7 +75,13 @@ typedef struct { pteval_t pte; } pte_t;
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))

-/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
+/*
+ * See Documentation/x86/x86_64/mm.txt for a description of the memory map.
+ *
+ * Be very careful vs. KASLR when changing anything here. The KASLR address
+ * range must not overlap with anything except the KASAN shadow area, which
+ * is correct as KASAN disables KASLR.
+ */
#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)

#ifdef CONFIG_X86_5LEVEL
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 879ef93..b805a61 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -34,25 +34,14 @@
#define TB_SHIFT 40

/*
- * Virtual address start and end range for randomization. The end changes base
- * on configuration to have the highest amount of space for randomization.
- * It increases the possible random position for each randomized region.
+ * Virtual address start and end range for randomization.
*
- * You need to add an if/def entry if you introduce a new memory region
- * compatible with KASLR. Your entry must be in logical order with memory
- * layout. For example, ESPFIX is before EFI because its virtual address is
- * before. You also need to add a BUILD_BUG_ON() in kernel_randomize_memory() to
- * ensure that this order is correct and won't be changed.
+ * The end address could depend on more configuration options to make the
+ * highest amount of space for randomization available, but that's too hard
+ * to keep straight and caused issues already.
*/
static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
-
-#if defined(CONFIG_X86_ESPFIX64)
-static const unsigned long vaddr_end = ESPFIX_BASE_ADDR;
-#elif defined(CONFIG_EFI)
-static const unsigned long vaddr_end = EFI_VA_END;
-#else
-static const unsigned long vaddr_end = __START_KERNEL_map;
-#endif
+static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE;

/* Default values */
unsigned long page_offset_base = __PAGE_OFFSET_BASE;
@@ -101,16 +90,13 @@ void __init kernel_randomize_memory(void)
unsigned long remain_entropy;

/*
- * All these BUILD_BUG_ON checks ensures the memory layout is
- * consistent with the vaddr_start/vaddr_end variables.
+ * These BUILD_BUG_ON checks ensure the memory layout is consistent
+ * with the vaddr_start/vaddr_end variables. These checks are very
+ * limited....
*/
BUILD_BUG_ON(vaddr_start >= vaddr_end);
- BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) &&
- vaddr_end >= EFI_VA_END);
- BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) ||
- IS_ENABLED(CONFIG_EFI)) &&
- vaddr_end >= __START_KERNEL_map);
- BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
+ BUILD_BUG_ON)(vaddr_end != CPU_ENTRY_AREA_BASE);
+- BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);

if (!kaslr_memory_enabled())
return;