[patch 36/60] x86/mm/kpti: Add functions to clone kernel PMDs

From: Thomas Gleixner
Date: Mon Dec 04 2017 - 12:02:27 EST


From: Andy Lutomirski <luto@xxxxxxxxxx>

Provide infrastructure to:

- find a kernel PMD for a mapping which must be visible to user space for
the entry/exit code to work.

- walk an address range and share the kernel PMD with it.

This reuses a small part of the original KAISER patches to populate the
user space page table.

[ tglx: Made it universally usable so it can be used for any kind of shared
mapping. Add a mechanism to clear specific bits in the user space
visible PMD entry. ]

Originally-by: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx>
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>

---
arch/x86/mm/kpti.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 102 insertions(+)

--- a/arch/x86/mm/kpti.c
+++ b/arch/x86/mm/kpti.c
@@ -65,6 +65,108 @@ void __init kpti_check_boottime_disable(
}

/*
+ * Walk the user copy of the page tables (optionally) trying to allocate
+ * page table pages on the way down.
+ *
+ * Returns a pointer to a PMD on success, or NULL on failure.
+ */
+static pmd_t *kpti_user_pagetable_walk_pmd(unsigned long address)
+{
+ pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
+ gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+ pud_t *pud;
+ p4d_t *p4d;
+
+ if (address < PAGE_OFFSET) {
+ WARN_ONCE(1, "attempt to walk user address\n");
+ return NULL;
+ }
+
+ if (pgd_none(*pgd)) {
+ WARN_ONCE(1, "All user pgds should have been populated\n");
+ return NULL;
+ }
+ BUILD_BUG_ON(pgd_large(*pgd) != 0);
+
+ p4d = p4d_offset(pgd, address);
+ BUILD_BUG_ON(p4d_large(*p4d) != 0);
+ if (p4d_none(*p4d)) {
+ unsigned long new_pud_page = __get_free_page(gfp);
+ if (!new_pud_page)
+ return NULL;
+
+ if (p4d_none(*p4d)) {
+ set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
+ new_pud_page = 0;
+ }
+ if (new_pud_page)
+ free_page(new_pud_page);
+ }
+
+ pud = pud_offset(p4d, address);
+ /* The user page tables do not use large mappings: */
+ if (pud_large(*pud)) {
+ WARN_ON(1);
+ return NULL;
+ }
+ if (pud_none(*pud)) {
+ unsigned long new_pmd_page = __get_free_page(gfp);
+ if (!new_pmd_page)
+ return NULL;
+
+ if (pud_none(*pud)) {
+ set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
+ new_pmd_page = 0;
+ }
+ if (new_pmd_page)
+ free_page(new_pmd_page);
+ }
+
+ return pmd_offset(pud, address);
+}
+
+static void __init
+kpti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
+{
+ unsigned long addr;
+
+ /*
+ * Clone the populated PMDs which cover start to end. These PMD areas
+ * can have holes.
+ */
+ for (addr = start; addr < end; addr += PMD_SIZE) {
+ pmd_t *pmd, *target_pmd;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+
+ pgd = pgd_offset_k(addr);
+ if (WARN_ON(pgd_none(*pgd)))
+ return;
+ p4d = p4d_offset(pgd, addr);
+ if (WARN_ON(p4d_none(*p4d)))
+ return;
+ pud = pud_offset(p4d, addr);
+ if (pud_none(*pud))
+ continue;
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(*pmd))
+ continue;
+
+ target_pmd = kpti_user_pagetable_walk_pmd(addr);
+ if (WARN_ON(!target_pmd))
+ return;
+
+ /*
+ * Copy the PMD. That is, the kernelmode and usermode
+ * tables will share the last-level page tables of this
+ * address range
+ */
+ *target_pmd = pmd_clear_flags(*pmd, clear);
+ }
+}
+
+/*
* Ensure that the top level of the user page tables are entirely
* populated. This ensures that all processes that get forked have the
* same entries. This way, we do not have to ever go set up new entries in