[tip: x86/mm] x86/mm: Randomize per-cpu entry area

From: tip-bot2 for Peter Zijlstra
Date: Fri Oct 21 2022 - 18:26:08 EST


The following commit has been merged into the x86/mm branch of tip:

Commit-ID: 11a4f78908cb8a6cccbb49dd7d0455a94741e959
Gitweb: https://git.kernel.org/tip/11a4f78908cb8a6cccbb49dd7d0455a94741e959
Author: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
AuthorDate: Fri, 07 Oct 2022 10:42:36 +02:00
Committer: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
CommitterDate: Fri, 21 Oct 2022 09:48:58 -07:00

x86/mm: Randomize per-cpu entry area

Seth found that the CPU-entry-area; the piece of per-cpu data that is
mapped into the userspace page-tables for kPTI is not subject to any
randomization -- irrespective of kASLR settings.

On x86_64 a whole P4D (512 GB) of virtual address space is reserved for
this structure, which is plenty large enough to randomize things a
little.

As such, use a straightforward randomization scheme that avoids
duplicates to spread the existing CPUs over the available space.

This makes it harder to find the addresses of important structures in
the cpu entry areas like the entry stacks.

[ dhansen: add minor comment in "sodding terrible" loop ]

Reported-by: Seth Jenkins <sethjenkins@xxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Signed-off-by: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Reviewed-by: Kees Cook <keescook@xxxxxxxxxxxx>
---
arch/x86/include/asm/cpu_entry_area.h | 4 +--
arch/x86/include/asm/pgtable_areas.h | 8 +++-
arch/x86/kernel/hw_breakpoint.c | 2 +-
arch/x86/mm/cpu_entry_area.c | 47 +++++++++++++++++++++++---
4 files changed, 51 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index 75efc4c..462fc34 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -130,10 +130,6 @@ struct cpu_entry_area {
};

#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
-#define CPU_ENTRY_AREA_ARRAY_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
-
-/* Total size includes the readonly IDT mapping page as well: */
-#define CPU_ENTRY_AREA_TOTAL_SIZE (CPU_ENTRY_AREA_ARRAY_SIZE + PAGE_SIZE)

DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
diff --git a/arch/x86/include/asm/pgtable_areas.h b/arch/x86/include/asm/pgtable_areas.h
index d34cce1..62e5ede 100644
--- a/arch/x86/include/asm/pgtable_areas.h
+++ b/arch/x86/include/asm/pgtable_areas.h
@@ -11,6 +11,12 @@

#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)

-#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)
+#ifdef CONFIG_X86_32
+#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + \
+ (CPU_ENTRY_AREA_SIZE * NR_CPUS) - \
+ CPU_ENTRY_AREA_BASE
+#else
+#define CPU_ENTRY_AREA_MAP_SIZE P4D_SIZE
+#endif

#endif /* _ASM_X86_PGTABLE_AREAS_H */
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 668a4a6..bbb0f73 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -266,7 +266,7 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end)

/* CPU entry erea is always used for CPU entry */
if (within_area(addr, end, CPU_ENTRY_AREA_BASE,
- CPU_ENTRY_AREA_TOTAL_SIZE))
+ CPU_ENTRY_AREA_MAP_SIZE))
return true;

/*
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index 6c2f1b7..ad1f750 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -15,16 +15,54 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage)
#ifdef CONFIG_X86_64
static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks);
DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks);
-#endif

-#ifdef CONFIG_X86_32
+static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, _cea_offset);
+
+static inline unsigned int cea_offset(unsigned int cpu)
+{
+ return per_cpu(_cea_offset, cpu);
+}
+
+static __init void init_cea_offsets(void)
+{
+ unsigned int max_cea;
+ unsigned int i, j;
+
+ max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE;
+
+ /* O(sodding terrible) */
+ for_each_possible_cpu(i) {
+ unsigned int cea;
+
+again:
+ cea = prandom_u32_max(max_cea);
+
+ /* Make sure that no previous CPU shares the offset: */
+ for_each_possible_cpu(j) {
+ if (cea_offset(j) == cea)
+ goto again;
+
+ if (i == j)
+ break;
+ }
+
+ per_cpu(_cea_offset, i) = cea;
+ }
+}
+#else /* !X86_64 */
DECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack);
+
+static inline unsigned int cea_offset(unsigned int cpu)
+{
+ return cpu;
+}
+static inline void init_cea_offsets(void) { }
#endif

/* Is called from entry code, so must be noinstr */
noinstr struct cpu_entry_area *get_cpu_entry_area(int cpu)
{
- unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
+ unsigned long va = CPU_ENTRY_AREA_PER_CPU + cea_offset(cpu) * CPU_ENTRY_AREA_SIZE;
BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);

return (struct cpu_entry_area *) va;
@@ -205,7 +243,6 @@ static __init void setup_cpu_entry_area_ptes(void)

/* The +1 is for the readonly IDT: */
BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE);
- BUILD_BUG_ON(CPU_ENTRY_AREA_TOTAL_SIZE != CPU_ENTRY_AREA_MAP_SIZE);
BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);

start = CPU_ENTRY_AREA_BASE;
@@ -221,6 +258,8 @@ void __init setup_cpu_entry_areas(void)
{
unsigned int cpu;

+ init_cea_offsets();
+
setup_cpu_entry_area_ptes();

for_each_possible_cpu(cpu)