[tip:x86/boot] x86/mm: Enable KASLR for physical mapping memory regions

From: tip-bot for Thomas Garnier
Date: Fri Jul 08 2016 - 16:38:02 EST


Commit-ID: 021182e52fe01c1f7b126f97fd6ba048dc4234fd
Gitweb: http://git.kernel.org/tip/021182e52fe01c1f7b126f97fd6ba048dc4234fd
Author: Thomas Garnier <thgarnie@xxxxxxxxxx>
AuthorDate: Tue, 21 Jun 2016 17:47:03 -0700
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitDate: Fri, 8 Jul 2016 17:35:15 +0200

x86/mm: Enable KASLR for physical mapping memory regions

Add the physical mapping in the list of randomized memory regions.

The physical memory mapping holds most allocations from boot and heap
allocators. Knowing the base address and physical memory size, an attacker
can deduce the PDE virtual address for the vDSO memory page. This attack
was demonstrated at CanSecWest 2016, in the following presentation:

"Getting Physical: Extreme Abuse of Intel Based Paged Systems":
https://github.com/n3k/CansecWest2016_Getting_Physical_Extreme_Abuse_of_Intel_Based_Paging_Systems/blob/master/Presentation/CanSec2016_Presentation.pdf

(See second part of the presentation).

The exploits used against Linux worked successfully against 4.6+ but
fail with KASLR memory enabled:

https://github.com/n3k/CansecWest2016_Getting_Physical_Extreme_Abuse_of_Intel_Based_Paging_Systems/tree/master/Demos/Linux/exploits

Similar research was done at Google leading to this patch proposal.

Variants exists to overwrite /proc or /sys objects ACLs leading to
elevation of privileges. These variants were tested against 4.6+.

The page offset used by the compressed kernel retains the static value
since it is not yet randomized during this boot stage.

Signed-off-by: Thomas Garnier <thgarnie@xxxxxxxxxx>
Signed-off-by: Kees Cook <keescook@xxxxxxxxxxxx>
Cc: Alexander Kuleshov <kuleshovmail@xxxxxxxxx>
Cc: Alexander Popov <alpopov@xxxxxxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Andy Lutomirski <luto@xxxxxxxxxx>
Cc: Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxxxxxxxxxx>
Cc: Baoquan He <bhe@xxxxxxxxxx>
Cc: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxx>
Cc: Brian Gerst <brgerst@xxxxxxxxx>
Cc: Christian Borntraeger <borntraeger@xxxxxxxxxx>
Cc: Dan Williams <dan.j.williams@xxxxxxxxx>
Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Cc: Dave Young <dyoung@xxxxxxxxxx>
Cc: Denys Vlasenko <dvlasenk@xxxxxxxxxx>
Cc: Dmitry Vyukov <dvyukov@xxxxxxxxxx>
Cc: H. Peter Anvin <hpa@xxxxxxxxx>
Cc: Jan Beulich <JBeulich@xxxxxxxx>
Cc: Joerg Roedel <jroedel@xxxxxxx>
Cc: Jonathan Corbet <corbet@xxxxxxx>
Cc: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
Cc: Juergen Gross <jgross@xxxxxxxx>
Cc: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Lv Zheng <lv.zheng@xxxxxxxxx>
Cc: Mark Salter <msalter@xxxxxxxxxx>
Cc: Martin Schwidefsky <schwidefsky@xxxxxxxxxx>
Cc: Matt Fleming <matt@xxxxxxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Stephen Smalley <sds@xxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Toshi Kani <toshi.kani@xxxxxxx>
Cc: Xiao Guangrong <guangrong.xiao@xxxxxxxxxxxxxxx>
Cc: Yinghai Lu <yinghai@xxxxxxxxxx>
Cc: kernel-hardening@xxxxxxxxxxxxxxxxxx
Cc: linux-doc@xxxxxxxxxxxxxxx
Link: http://lkml.kernel.org/r/1466556426-32664-7-git-send-email-keescook@xxxxxxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
arch/x86/boot/compressed/pagetable.c | 3 +++
arch/x86/include/asm/kaslr.h | 2 ++
arch/x86/include/asm/page_64_types.h | 11 ++++++++++-
arch/x86/kernel/head_64.S | 2 +-
arch/x86/mm/kaslr.c | 18 +++++++++++++++---
5 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
index 6e31a6a..56589d0 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -20,6 +20,9 @@
/* These actually do the work of building the kernel identity maps. */
#include <asm/init.h>
#include <asm/pgtable.h>
+/* Use the static base for this part of the boot process */
+#undef __PAGE_OFFSET
+#define __PAGE_OFFSET __PAGE_OFFSET_BASE
#include "../../mm/ident_map.c"

/* Used by pgtable.h asm code to force instruction serialization. */
diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h
index 683c9d7..62b1b81 100644
--- a/arch/x86/include/asm/kaslr.h
+++ b/arch/x86/include/asm/kaslr.h
@@ -4,6 +4,8 @@
unsigned long kaslr_get_random_long(const char *purpose);

#ifdef CONFIG_RANDOMIZE_MEMORY
+extern unsigned long page_offset_base;
+
void kernel_randomize_memory(void);
#else
static inline void kernel_randomize_memory(void) { }
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index d5c2f8b..9215e05 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -1,6 +1,10 @@
#ifndef _ASM_X86_PAGE_64_DEFS_H
#define _ASM_X86_PAGE_64_DEFS_H

+#ifndef __ASSEMBLY__
+#include <asm/kaslr.h>
+#endif
+
#ifdef CONFIG_KASAN
#define KASAN_STACK_ORDER 1
#else
@@ -32,7 +36,12 @@
* hypervisor to fit. Choosing 16 slots here is arbitrary, but it's
* what Xen requires.
*/
-#define __PAGE_OFFSET _AC(0xffff880000000000, UL)
+#define __PAGE_OFFSET_BASE _AC(0xffff880000000000, UL)
+#ifdef CONFIG_RANDOMIZE_MEMORY
+#define __PAGE_OFFSET page_offset_base
+#else
+#define __PAGE_OFFSET __PAGE_OFFSET_BASE
+#endif /* CONFIG_RANDOMIZE_MEMORY */

#define __START_KERNEL_map _AC(0xffffffff80000000, UL)

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index c7920ba..9f8efc9 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -38,7 +38,7 @@

#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))

-L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET)
+L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
L4_START_KERNEL = pgd_index(__START_KERNEL_map)
L3_START_KERNEL = pud_index(__START_KERNEL_map)

diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index d5380a4..609ecf2 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -43,8 +43,12 @@
* before. You also need to add a BUILD_BUG_ON in kernel_randomize_memory to
* ensure that this order is correct and won't be changed.
*/
-static const unsigned long vaddr_start;
-static const unsigned long vaddr_end;
+static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
+static const unsigned long vaddr_end = VMALLOC_START;
+
+/* Default values */
+unsigned long page_offset_base = __PAGE_OFFSET_BASE;
+EXPORT_SYMBOL(page_offset_base);

/*
* Memory regions randomized by KASLR (except modules that use a separate logic
@@ -55,6 +59,7 @@ static __initdata struct kaslr_memory_region {
unsigned long *base;
unsigned long size_tb;
} kaslr_regions[] = {
+ { &page_offset_base, 64/* Maximum */ },
};

/* Get size in bytes used by the memory region */
@@ -77,13 +82,20 @@ void __init kernel_randomize_memory(void)
{
size_t i;
unsigned long vaddr = vaddr_start;
- unsigned long rand;
+ unsigned long rand, memory_tb;
struct rnd_state rand_state;
unsigned long remain_entropy;

if (!kaslr_memory_enabled())
return;

+ BUG_ON(kaslr_regions[0].base != &page_offset_base);
+ memory_tb = ((max_pfn << PAGE_SHIFT) >> TB_SHIFT);
+
+ /* Adapt phyiscal memory region size based on available memory */
+ if (memory_tb < kaslr_regions[0].size_tb)
+ kaslr_regions[0].size_tb = memory_tb;
+
/* Calculate entropy available between regions */
remain_entropy = vaddr_end - vaddr_start;
for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++)