[PATCH 3/7] x86/enter: Use IBRS on syscall and interrupts

From: Tim Chen
Date: Thu Jan 04 2018 - 13:19:12 EST


Set IBRS upon kernel entrance via syscall and interrupts. Clear it
upon exit.

If NMI runs when exiting kernel between IBRS_DISABLE and
SWAPGS, the NMI would have turned on IBRS bit 0 and then it would have
left enabled when exiting the NMI. IBRS bit 0 would then be left
enabled in userland until the next enter kernel.

That is a minor inefficiency only, but we can eliminate it by saving
the MSR when entering the NMI in save_paranoid and restoring it when
exiting the NMI.

Signed-off-by: Andrea Arcangeli <aarcange@xxxxxxxxxx>
Signed-off-by: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx>
---
arch/x86/entry/entry_64.S | 24 ++++++++++++++++++++++++
arch/x86/entry/entry_64_compat.S | 9 +++++++++
2 files changed, 33 insertions(+)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 3f72f5c..0c4d542 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -37,6 +37,7 @@
#include <asm/pgtable_types.h>
#include <asm/export.h>
#include <asm/frame.h>
+#include <asm/spec_ctrl.h>
#include <linux/err.h>

#include "calling.h"
@@ -170,6 +171,8 @@ ENTRY(entry_SYSCALL_64_trampoline)

/* Load the top of the task stack into RSP */
movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
+ /* Stack is usable, use the non-clobbering IBRS enable: */
+ ENABLE_IBRS

/* Start building the simulated IRET frame. */
pushq $__USER_DS /* pt_regs->ss */
@@ -213,6 +216,8 @@ ENTRY(entry_SYSCALL_64)
* is not required to switch CR3.
*/
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ /* Stack is usable, use the non-clobbering IBRS enable: */
+ ENABLE_IBRS

TRACE_IRQS_OFF

@@ -407,6 +412,7 @@ syscall_return_via_sysret:
* We are on the trampoline stack. All regs except RDI are live.
* We can do future final exit work right here.
*/
+ DISABLE_IBRS
SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi

popq %rdi
@@ -745,6 +751,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
* We can do future final exit work right here.
*/

+ DISABLE_IBRS
SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi

/* Restore RDI. */
@@ -832,6 +839,14 @@ native_irq_return_ldt:
SWAPGS /* to kernel GS */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */

+ /*
+ * Normally we enable IBRS when we switch to kernel's CR3.
+ * But we are going to switch back to user CR3 immediately
+ * in this routine after fixing ESPFIX stack. There is
+ * no vulnerable code branching for IBRS to protect.
+ * We don't toggle IBRS to avoid the cost of two MSR writes.
+ */
+
movq PER_CPU_VAR(espfix_waddr), %rdi
movq %rax, (0*8)(%rdi) /* user RAX */
movq (1*8)(%rsp), %rax /* user RIP */
@@ -965,6 +980,8 @@ ENTRY(switch_to_thread_stack)
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
movq %rsp, %rdi
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ /* Stack is usable, use the non-clobbering IBRS enable: */
+ ENABLE_IBRS
UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI

pushq 7*8(%rdi) /* regs->ss */
@@ -1265,6 +1282,7 @@ ENTRY(paranoid_entry)

1:
SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
+ ENABLE_IBRS_SAVE_AND_CLOBBER save_reg=%r13d

ret
END(paranoid_entry)
@@ -1288,6 +1306,7 @@ ENTRY(paranoid_exit)
testl %ebx, %ebx /* swapgs needed? */
jnz .Lparanoid_exit_no_swapgs
TRACE_IRQS_IRETQ
+ RESTORE_IBRS_CLOBBER save_reg=%r13d
RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
SWAPGS_UNSAFE_STACK
jmp .Lparanoid_exit_restore
@@ -1318,6 +1337,7 @@ ENTRY(error_entry)
SWAPGS
/* We have user CR3. Change to kernel CR3. */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ ENABLE_IBRS_CLOBBER

.Lerror_entry_from_usermode_after_swapgs:
/* Put us onto the real thread stack. */
@@ -1365,6 +1385,7 @@ ENTRY(error_entry)
*/
SWAPGS
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ ENABLE_IBRS_CLOBBER
jmp .Lerror_entry_done

.Lbstep_iret:
@@ -1379,6 +1400,7 @@ ENTRY(error_entry)
*/
SWAPGS
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ ENABLE_IBRS

/*
* Pretend that the exception came from user mode: set up pt_regs
@@ -1480,6 +1502,7 @@ ENTRY(nmi)
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
movq %rsp, %rdx
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ ENABLE_IBRS
UNWIND_HINT_IRET_REGS base=%rdx offset=8
pushq 5*8(%rdx) /* pt_regs->ss */
pushq 4*8(%rdx) /* pt_regs->rsp */
@@ -1730,6 +1753,7 @@ end_repeat_nmi:
movq $-1, %rsi
call do_nmi

+ RESTORE_IBRS_CLOBBER save_reg=%r13d
RESTORE_CR3 scratch_reg=%r15 save_reg=%r14

testl %ebx, %ebx /* swapgs needed? */
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 40f1700..88ee1c0 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -14,6 +14,7 @@
#include <asm/irqflags.h>
#include <asm/asm.h>
#include <asm/smap.h>
+#include <asm/spec_ctrl.h>
#include <linux/linkage.h>
#include <linux/err.h>

@@ -54,6 +55,7 @@ ENTRY(entry_SYSENTER_compat)
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp

movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ ENABLE_IBRS

/*
* User tracing code (ptrace or signal handlers) might assume that
@@ -224,6 +226,7 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
* preserved during the C calls inside TRACE_IRQS_OFF anyway.
*/
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+ ENABLE_IBRS_CLOBBER /* clobbers %rax, %rcx, %rdx */

/*
* User mode is traced as though IRQs are on, and SYSENTER
@@ -240,6 +243,12 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
/* Opportunistic SYSRET */
sysret32_from_system_call:
TRACE_IRQS_ON /* User mode traces as IRQs on. */
+ /*
+ * Clobber of %rax, %rcx, %rdx is OK before register restoring.
+ * This is safe to do here because we have no indirect branches
+ * between here and the return to userspace (sysretl).
+ */
+ DISABLE_IBRS_CLOBBER
movq RBX(%rsp), %rbx /* pt_regs->rbx */
movq RBP(%rsp), %rbp /* pt_regs->rbp */
movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */
--
2.9.4