[PATCH PTI v3 03/10] x86/pti/64: Fix ESPFIX64 user mapping

From: Andy Lutomirski
Date: Tue Dec 12 2017 - 10:58:25 EST


The ESPFIX64 user mapping belongs in pti.c just like all the other
user mappings. Move it there and make it work correctly while we're
at it.

Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx>
---
arch/x86/kernel/espfix_64.c | 16 ----------------
arch/x86/mm/pti.c | 42 +++++++++++++++++++++++++++++++++++++-----
init/main.c | 11 +++++++----
3 files changed, 44 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 1c44e72ed1bc..9c4e7ba6870c 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -129,22 +129,6 @@ void __init init_espfix_bsp(void)
p4d = p4d_alloc(&init_mm, pgd, ESPFIX_BASE_ADDR);
p4d_populate(&init_mm, p4d, espfix_pud_page);

- /*
- * Just copy the top-level PGD that is mapping the espfix area to
- * ensure it is mapped into the user page tables.
- *
- * For 5-level paging, the espfix pgd was populated when
- * pti_init() pre-populated all the pgd entries. The above
- * p4d_alloc() would never do anything and the p4d_populate() would
- * be done to a p4d already mapped in the userspace pgd.
- */
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
- if (CONFIG_PGTABLE_LEVELS <= 4) {
- set_pgd(kernel_to_user_pgdp(pgd),
- __pgd(_KERNPG_TABLE | (p4d_pfn(*p4d) << PAGE_SHIFT)));
- }
-#endif
-
/* Randomize the locations */
init_espfix_random();

diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index f48645d2f3fd..e01c4aa3ec73 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -68,14 +68,12 @@ void __init pti_check_boottime_disable(void)
* Walk the user copy of the page tables (optionally) trying to allocate
* page table pages on the way down.
*
- * Returns a pointer to a PMD on success, or NULL on failure.
+ * Returns a pointer to a P4D on success, or NULL on failure.
*/
-static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
+static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
{
pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
- pud_t *pud;
- p4d_t *p4d;

if (address < PAGE_OFFSET) {
WARN_ONCE(1, "attempt to walk user address\n");
@@ -96,7 +94,21 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
}
BUILD_BUG_ON(pgd_large(*pgd) != 0);

- p4d = p4d_offset(pgd, address);
+ return p4d_offset(pgd, address);
+}
+
+/*
+ * Walk the user copy of the page tables (optionally) trying to allocate
+ * page table pages on the way down.
+ *
+ * Returns a pointer to a PMD on success, or NULL on failure.
+ */
+static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
+{
+ gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+ p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
+ pud_t *pud;
+
BUILD_BUG_ON(p4d_large(*p4d) != 0);
if (p4d_none(*p4d)) {
unsigned long new_pud_page = __get_free_page(gfp);
@@ -174,6 +186,25 @@ pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
}
}

+static void __init pti_setup_espfix64(void)
+{
+#ifdef CONFIG_X86_ESPFIX64
+ /*
+ * ESPFIX64 uses a single p4d (i.e. a top-level entry on 4-level
+ * systems and a next-level entry on 5-level systems. Share that
+ * entry between the user and kernel pagetables.
+ */
+ pgd_t *kernel_pgd;
+ p4d_t *kernel_p4d, *user_p4d;
+
+ pr_err("espfix64 base = %lx\n", ESPFIX_BASE_ADDR);
+ user_p4d = pti_user_pagetable_walk_p4d(ESPFIX_BASE_ADDR);
+ kernel_pgd = pgd_offset_k(ESPFIX_BASE_ADDR);
+ kernel_p4d = p4d_offset(kernel_pgd, ESPFIX_BASE_ADDR);
+ *user_p4d = *kernel_p4d;
+#endif
+}
+
/*
* Clone the populated PMDs of the user shared fixmaps into the user space
* visible page table.
@@ -212,4 +243,5 @@ void __init pti_init(void)

pti_clone_user_shared();
pti_clone_entry_text();
+ pti_setup_espfix64();
}
diff --git a/init/main.c b/init/main.c
index 64b00b89e9e1..ce18b938c382 100644
--- a/init/main.c
+++ b/init/main.c
@@ -505,6 +505,13 @@ static void __init mm_init(void)
pgtable_init();
vmalloc_init();
ioremap_huge_init();
+
+#ifdef CONFIG_X86_ESPFIX64
+ /* Should be run before the first non-init thread is created */
+ init_espfix_bsp();
+#endif
+
+ /* Should be run after espfix64 is set up. */
pti_init();
}

@@ -676,10 +683,6 @@ asmlinkage __visible void __init start_kernel(void)
if (efi_enabled(EFI_RUNTIME_SERVICES))
efi_enter_virtual_mode();
#endif
-#ifdef CONFIG_X86_ESPFIX64
- /* Should be run before the first non-init thread is created */
- init_espfix_bsp();
-#endif
thread_stack_cache_init();
cred_init();
fork_init();
--
2.13.6