[PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs

From: David Woodhouse
Date: Fri Jan 12 2018 - 12:49:42 EST


When we context switch from a shallow call stack to a deeper one, as we
'ret' up the deeper side we may encounter RSB entries (predictions for
where the 'ret' goes to) which were populated in userspace. This is
problematic if we have neither SMEP nor KPTI (the latter of which marks
userspace pages as NX for the kernel), as malicious code in userspace
may then be executed speculatively. So overwrite the CPU's return
prediction stack with calls which are predicted to return to an infinite
loop, to "capture" speculation if this happens. This is required both
for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI.

On Skylake+ the problem is slightly different, and an *underflow* of the
RSB may cause errant branch predictions to occur. So there it's not so
much overwrite, as *filling* the RSB to attempt to prevent it getting
empty. This is only a partial solution for Skylake+ since there are many
other conditions which may result in the RSB becoming empty. The full
solution on Skylake+ is to use IBRS, which will prevent the problem even
when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
required on context switch.

Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx>
Acked-by: Arjan van de Ven <arjan@xxxxxxxxxxxxxxx>
---
arch/x86/entry/entry_32.S | 11 +++++++++++
arch/x86/entry/entry_64.S | 11 +++++++++++
arch/x86/include/asm/cpufeatures.h | 1 +
arch/x86/kernel/cpu/bugs.c | 34 ++++++++++++++++++++++++++++++++++
4 files changed, 57 insertions(+)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index a1f28a5..ef0e478 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -244,6 +244,17 @@ ENTRY(__switch_to_asm)
movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
#endif

+#ifdef CONFIG_RETPOLINE
+ /*
+ * When we switch from a shallower to a deeper call stack
+ * the RSB may either underflow or use entries populated
+ * with userspace addresses. On CPUs where those concerns
+ * exist, overwrite the RSB with entries which capture
+ * speculative execution to prevent attack.
+ */
+ FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popl %esi
popl %edi
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 59874bc..b2937d8 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -487,6 +487,17 @@ ENTRY(__switch_to_asm)
movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
#endif

+#ifdef CONFIG_RETPOLINE
+ /*
+ * When we switch from a shallower to a deeper call stack
+ * the RSB may either underflow or use entries populated
+ * with userspace addresses. On CPUs where those concerns
+ * exist, overwrite the RSB with entries which capture
+ * speculative execution to prevent attack.
+ */
+ FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popq %r15
popq %r14
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index f275447..aa09559 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -211,6 +211,7 @@
#define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */

#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
+#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */

/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index e4dc261..c17cce3 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -23,6 +23,7 @@
#include <asm/alternative.h>
#include <asm/pgtable.h>
#include <asm/set_memory.h>
+#include <asm/intel-family.h>

static void __init spectre_v2_select_mitigation(void);

@@ -155,6 +156,22 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
return SPECTRE_V2_CMD_NONE;
}

+/* Check for Skylake-like CPUs (for RSB handling) */
+static bool __init is_skylake_era(void)
+{
+ if (boot_cpu_data.x86 == 6) {
+ switch (boot_cpu_data.x86_model) {
+ case INTEL_FAM6_SKYLAKE_MOBILE:
+ case INTEL_FAM6_SKYLAKE_DESKTOP:
+ case INTEL_FAM6_SKYLAKE_X:
+ case INTEL_FAM6_KABYLAKE_MOBILE:
+ case INTEL_FAM6_KABYLAKE_DESKTOP:
+ return true;
+ }
+ }
+ return false;
+}
+
static void __init spectre_v2_select_mitigation(void)
{
enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -213,6 +230,23 @@ static void __init spectre_v2_select_mitigation(void)

spectre_v2_enabled = mode;
pr_info("%s\n", spectre_v2_strings[mode]);
+
+ /*
+ * If we don't have SMEP or KPTI, then we run the risk of hitting
+ * userspace addresses in the RSB after a context switch from a
+ * shallow call stack to a deeper one. We must must fill the entire
+ * RSB to avoid that, even when using IBRS.
+ *
+ * Skylake era CPUs have a separate issue with *underflow* of the
+ * RSB, when they will predict 'ret' targets from the generic BTB.
+ * IBRS makes that safe, but we need to fill the RSB on context
+ * switch if we're using retpoline.
+ */
+ if ((!boot_cpu_has(X86_FEATURE_PTI) &&
+ !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
+ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
+ pr_info("Filling RSB on context switch\n");
+ }
}

#undef pr_fmt
--
2.7.4