[tip: x86/bugs] x86/retpoline: Ensure default return thunk isn't used at runtime

From: tip-bot2 for Josh Poimboeuf
Date: Mon Feb 12 2024 - 09:29:20 EST


The following commit has been merged into the x86/bugs branch of tip:

Commit-ID: 4461438a8405e800f90e0e40409e5f3d07eed381
Gitweb: https://git.kernel.org/tip/4461438a8405e800f90e0e40409e5f3d07eed381
Author: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
AuthorDate: Wed, 03 Jan 2024 19:36:26 +01:00
Committer: Borislav Petkov (AMD) <bp@xxxxxxxxx>
CommitterDate: Mon, 12 Feb 2024 11:42:15 +01:00

x86/retpoline: Ensure default return thunk isn't used at runtime

Make sure the default return thunk is not used after all return
instructions have been patched by the alternatives because the default
return thunk is insufficient when it comes to mitigating Retbleed or
SRSO.

Fix based on an earlier version by David Kaplan <david.kaplan@xxxxxxx>.

[ bp: Fix the compilation error of warn_thunk_thunk being an invisible
symbol, hoist thunk macro into calling.h ]

Signed-off-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
Co-developed-by: Borislav Petkov (AMD) <bp@xxxxxxxxx>
Signed-off-by: Borislav Petkov (AMD) <bp@xxxxxxxxx>
Link: https://lore.kernel.org/r/20231010171020.462211-4-david.kaplan@xxxxxxx
Link: https://lore.kernel.org/r/20240104132446.GEZZaxnrIgIyat0pqf@fat_crate.local
---
arch/x86/entry/calling.h | 60 +++++++++++++++++++++++++++-
arch/x86/entry/entry.S | 4 ++-
arch/x86/entry/thunk_32.S | 34 +++------------
arch/x86/entry/thunk_64.S | 33 +---------------
arch/x86/include/asm/nospec-branch.h | 2 +-
arch/x86/kernel/cpu/bugs.c | 5 ++-
arch/x86/lib/retpoline.S | 15 ++-----
7 files changed, 85 insertions(+), 68 deletions(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 39e069b..bd31b25 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -426,3 +426,63 @@ For 32-bit we have the following conventions - kernel is built with
.endm

#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_X86_64
+
+/* rdi: arg1 ... normal C conventions. rax is saved/restored. */
+.macro THUNK name, func
+SYM_FUNC_START(\name)
+ pushq %rbp
+ movq %rsp, %rbp
+
+ pushq %rdi
+ pushq %rsi
+ pushq %rdx
+ pushq %rcx
+ pushq %rax
+ pushq %r8
+ pushq %r9
+ pushq %r10
+ pushq %r11
+
+ call \func
+
+ popq %r11
+ popq %r10
+ popq %r9
+ popq %r8
+ popq %rax
+ popq %rcx
+ popq %rdx
+ popq %rsi
+ popq %rdi
+ popq %rbp
+ RET
+SYM_FUNC_END(\name)
+ _ASM_NOKPROBE(\name)
+.endm
+
+#else /* CONFIG_X86_32 */
+
+/* put return address in eax (arg1) */
+.macro THUNK name, func, put_ret_addr_in_eax=0
+SYM_CODE_START_NOALIGN(\name)
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+
+ .if \put_ret_addr_in_eax
+ /* Place EIP in the arg1 */
+ movl 3*4(%esp), %eax
+ .endif
+
+ call \func
+ popl %edx
+ popl %ecx
+ popl %eax
+ RET
+ _ASM_NOKPROBE(\name)
+SYM_CODE_END(\name)
+ .endm
+
+#endif
diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
index 8c8d38f..582731f 100644
--- a/arch/x86/entry/entry.S
+++ b/arch/x86/entry/entry.S
@@ -7,6 +7,8 @@
#include <linux/linkage.h>
#include <asm/msr-index.h>

+#include "calling.h"
+
.pushsection .noinstr.text, "ax"

SYM_FUNC_START(entry_ibpb)
@@ -20,3 +22,5 @@ SYM_FUNC_END(entry_ibpb)
EXPORT_SYMBOL_GPL(entry_ibpb);

.popsection
+
+THUNK warn_thunk_thunk, __warn_thunk
diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S
index 0103e10..da37f42 100644
--- a/arch/x86/entry/thunk_32.S
+++ b/arch/x86/entry/thunk_32.S
@@ -4,33 +4,15 @@
* Copyright 2008 by Steven Rostedt, Red Hat, Inc
* (inspired by Andi Kleen's thunk_64.S)
*/
- #include <linux/export.h>
- #include <linux/linkage.h>
- #include <asm/asm.h>

- /* put return address in eax (arg1) */
- .macro THUNK name, func, put_ret_addr_in_eax=0
-SYM_CODE_START_NOALIGN(\name)
- pushl %eax
- pushl %ecx
- pushl %edx
+#include <linux/export.h>
+#include <linux/linkage.h>
+#include <asm/asm.h>

- .if \put_ret_addr_in_eax
- /* Place EIP in the arg1 */
- movl 3*4(%esp), %eax
- .endif
+#include "calling.h"

- call \func
- popl %edx
- popl %ecx
- popl %eax
- RET
- _ASM_NOKPROBE(\name)
-SYM_CODE_END(\name)
- .endm
-
- THUNK preempt_schedule_thunk, preempt_schedule
- THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace
- EXPORT_SYMBOL(preempt_schedule_thunk)
- EXPORT_SYMBOL(preempt_schedule_notrace_thunk)
+THUNK preempt_schedule_thunk, preempt_schedule
+THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace
+EXPORT_SYMBOL(preempt_schedule_thunk)
+EXPORT_SYMBOL(preempt_schedule_notrace_thunk)

diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
index 416b400..119ebdc 100644
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -9,39 +9,6 @@
#include "calling.h"
#include <asm/asm.h>

- /* rdi: arg1 ... normal C conventions. rax is saved/restored. */
- .macro THUNK name, func
-SYM_FUNC_START(\name)
- pushq %rbp
- movq %rsp, %rbp
-
- pushq %rdi
- pushq %rsi
- pushq %rdx
- pushq %rcx
- pushq %rax
- pushq %r8
- pushq %r9
- pushq %r10
- pushq %r11
-
- call \func
-
- popq %r11
- popq %r10
- popq %r9
- popq %r8
- popq %rax
- popq %rcx
- popq %rdx
- popq %rsi
- popq %rdi
- popq %rbp
- RET
-SYM_FUNC_END(\name)
- _ASM_NOKPROBE(\name)
- .endm
-
THUNK preempt_schedule_thunk, preempt_schedule
THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace
EXPORT_SYMBOL(preempt_schedule_thunk)
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 2c0679e..5575461 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -357,6 +357,8 @@ extern void entry_ibpb(void);

extern void (*x86_return_thunk)(void);

+extern void __warn_thunk(void);
+
#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
extern void call_depth_return_thunk(void);

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index f277541..a78892b 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -2850,3 +2850,8 @@ ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *bu
return cpu_show_common(dev, attr, buf, X86_BUG_GDS);
}
#endif
+
+void __warn_thunk(void)
+{
+ WARN_ONCE(1, "Unpatched return thunk in use. This should not happen!\n");
+}
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 0045153..721b528 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -369,19 +369,16 @@ SYM_FUNC_END(call_depth_return_thunk)
* 'JMP __x86_return_thunk' sites are changed to something else by
* apply_returns().
*
- * This should be converted eventually to call a warning function which
- * should scream loudly when the default return thunk is called after
- * alternatives have been applied.
- *
- * That warning function cannot BUG() because the bug splat cannot be
- * displayed in all possible configurations, leading to users not really
- * knowing why the machine froze.
+ * The ALTERNATIVE below adds a really loud warning to catch the case
+ * where the insufficient default return thunk ends up getting used for
+ * whatever reason like miscompilation or failure of
+ * objtool/alternatives/etc to patch all the return sites.
*/
SYM_CODE_START(__x86_return_thunk)
UNWIND_HINT_FUNC
ANNOTATE_NOENDBR
- ANNOTATE_UNRET_SAFE
- ret
+ ALTERNATIVE __stringify(ANNOTATE_UNRET_SAFE; ret), \
+ "jmp warn_thunk_thunk", X86_FEATURE_ALWAYS
int3
SYM_CODE_END(__x86_return_thunk)
EXPORT_SYMBOL(__x86_return_thunk)