Re: [PATCH 30/43] x86/mm/kaiser: Map espfix structures

From: Peter Zijlstra
Date: Mon Nov 27 2017 - 10:35:26 EST


On Mon, Nov 27, 2017 at 10:14:24AM +0100, Peter Zijlstra wrote:

> But if we can freely spill here, should we not do the kernel switch
> instead of doing this user mapping? The way I understand things, the
> less of these magic mappings we have the better.

Turns out, we don't need more scratch regs at all.

The below seems to survive tools/testing/selftests/x86/sigreturn_64
which exercises the ESPFIX crud.

---
arch/x86/entry/entry_64.S | 11 ++++++++---
arch/x86/kernel/espfix_64.c | 10 ++--------
2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index df0152bee8a8..289ba2680952 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -825,7 +825,9 @@ ENTRY(native_iret)
*/

pushq %rdi /* Stash user RDI */
- SWAPGS
+ SWAPGS /* to kernel GS */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */
+
movq PER_CPU_VAR(espfix_waddr), %rdi
movq %rax, (0*8)(%rdi) /* user RAX */
movq (1*8)(%rsp), %rax /* user RIP */
@@ -841,7 +843,6 @@ ENTRY(native_iret)
/* Now RAX == RSP. */

andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */
- popq %rdi /* Restore user RDI */

/*
* espfix_stack[31:16] == 0. The page tables are set up such that
@@ -852,7 +853,11 @@ ENTRY(native_iret)
* still points to an RO alias of the ESPFIX stack.
*/
orq PER_CPU_VAR(espfix_stack), %rax
- SWAPGS
+
+ SWITCH_TO_USER_CR3 scratch_reg=%rdi /* to user CR3 */
+ SWAPGS /* to user GS */
+ popq %rdi /* Restore user RDI */
+
movq %rax, %rsp
UNWIND_HINT_IRET_REGS offset=8

diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 8bb116d73aaa..8826475d786c 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -61,8 +61,8 @@
#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)

/* This contains the *bottom* address of the espfix stack */
-DEFINE_PER_CPU_USER_MAPPED(unsigned long, espfix_stack);
-DEFINE_PER_CPU_USER_MAPPED(unsigned long, espfix_waddr);
+DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack);
+DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);

/* Initialization mutex - should this be a spinlock? */
static DEFINE_MUTEX(espfix_init_mutex);
@@ -225,10 +225,4 @@ void init_espfix_ap(int cpu)
per_cpu(espfix_stack, cpu) = addr;
per_cpu(espfix_waddr, cpu) = (unsigned long)stack_page
+ (addr & ~PAGE_MASK);
- /*
- * _PAGE_GLOBAL is not really required. This is not a hot
- * path, but we do it here for consistency.
- */
- kaiser_add_mapping((unsigned long)stack_page, PAGE_SIZE,
- __PAGE_KERNEL | _PAGE_GLOBAL);
}