[RFC][PATCH 12/17] x86/cpu: Rename original retbleed return thunk

From: Peter Zijlstra
Date: Wed Aug 09 2023 - 03:27:12 EST


Rename the original retbleed return thunk from __x86_return_thunk to
zen_return_thunk, matching zen_untrain_ret.

Pull the dummy __x86_return_thunk from the !CPU_UNRET_ENTRY case and
explicitly set zen_return_thunk in the retbleed=unret case.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
arch/x86/include/asm/nospec-branch.h | 2 ++
arch/x86/kernel/cpu/bugs.c | 1 +
arch/x86/kernel/vmlinux.lds.S | 2 +-
arch/x86/lib/retpoline.S | 25 +++++++++++--------------
tools/objtool/check.c | 9 +++++++--
5 files changed, 22 insertions(+), 17 deletions(-)

--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -339,6 +339,8 @@ extern retpoline_thunk_t __x86_indirect_
extern retpoline_thunk_t __x86_indirect_jump_thunk_array[];

extern void __x86_return_thunk(void);
+
+extern void zen_return_thunk(void);
extern void srso_return_thunk(void);
extern void srso_alias_return_thunk(void);

--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -953,6 +953,7 @@ static void __init retbleed_select_mitig

case RETBLEED_MITIGATION_UNRET:
setup_force_cpu_cap(X86_FEATURE_UNRET);
+ x86_return_thunk = zen_return_thunk;
do_rethunk:
setup_force_cpu_cap(X86_FEATURE_RETHUNK);

--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -523,7 +523,7 @@ INIT_PER_CPU(irq_stack_backing_store);
#endif

#ifdef CONFIG_CPU_UNRET_ENTRY
-. = ASSERT((__x86_return_thunk & 0x3f) == 0, "__x86_return_thunk not cacheline-aligned");
+. = ASSERT((zen_return_thunk & 0x3f) == 0, "zen_return_thunk not cacheline-aligned");
. = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned");
/*
* GNU ld cannot do XOR so do: (A | B) - (A & B) in order to compute the XOR
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -161,7 +161,7 @@ __EXPORT_THUNK(srso_untrain_ret_alias)

.section .text.__x86.rethunk_safe

-/* Needs a definition for the __x86_return_thunk alternative below. */
+/* Needs a definition for the zen_return_thunk alternative below. */
SYM_START(srso_safe_ret_alias, SYM_L_GLOBAL, SYM_A_NONE)
add $8, %_ASM_SP
UNWIND_HINT_FUNC
@@ -174,7 +174,7 @@ SYM_FUNC_END(srso_safe_ret_alias)

/*
* Safety details here pertain to the AMD Zen{1,2} microarchitecture:
- * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for
+ * 1) The RET at zen_return_thunk must be on a 64 byte boundary, for
* alignment within the BTB.
* 2) The instruction at zen_untrain_ret must contain, and not
* end with, the 0xc3 byte of the RET.
@@ -182,7 +182,7 @@ SYM_FUNC_END(srso_safe_ret_alias)
* from re-poisioning the BTB prediction.
*/
.align 64
- .skip 64 - (__x86_return_thunk - zen_untrain_ret), 0xcc
+ .skip 64 - (zen_return_thunk - zen_untrain_ret), 0xcc
SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
ANNOTATE_NOENDBR
/*
@@ -190,16 +190,16 @@ SYM_START(zen_untrain_ret, SYM_L_GLOBAL,
*
* TEST $0xcc, %bl
* LFENCE
- * JMP __x86_return_thunk
+ * JMP zen_return_thunk
*
* Executing the TEST instruction has a side effect of evicting any BTB
* prediction (potentially attacker controlled) attached to the RET, as
- * __x86_return_thunk + 1 isn't an instruction boundary at the moment.
+ * zen_return_thunk + 1 isn't an instruction boundary at the moment.
*/
.byte 0xf6

/*
- * As executed from __x86_return_thunk, this is a plain RET.
+ * As executed from zen_return_thunk, this is a plain RET.
*
* As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
*
@@ -211,13 +211,13 @@ SYM_START(zen_untrain_ret, SYM_L_GLOBAL,
* With SMT enabled and STIBP active, a sibling thread cannot poison
* RET's prediction to a type of its choice, but can evict the
* prediction due to competitive sharing. If the prediction is
- * evicted, __x86_return_thunk will suffer Straight Line Speculation
+ * evicted, zen_return_thunk will suffer Straight Line Speculation
* which will be contained safely by the INT3.
*/
-SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL)
+SYM_INNER_LABEL(zen_return_thunk, SYM_L_GLOBAL)
ret
int3
-SYM_CODE_END(__x86_return_thunk)
+SYM_CODE_END(zen_return_thunk)

/*
* Ensure the TEST decoding / BTB invalidation is complete.
@@ -228,7 +228,7 @@ SYM_CODE_END(__x86_return_thunk)
* Jump back and execute the RET in the middle of the TEST instruction.
* INT3 is for SLS protection.
*/
- jmp __x86_return_thunk
+ jmp zen_return_thunk
int3
SYM_FUNC_END(zen_untrain_ret)
__EXPORT_THUNK(zen_untrain_ret)
@@ -288,7 +288,7 @@ SYM_CODE_START(srso_alias_return_thunk)
ud2
SYM_CODE_END(srso_alias_return_thunk)

-#else /* CONFIG_CPU_UNRET_ENTRY */
+#endif /* CONFIG_CPU_UNRET_ENTRY */

.section .text.__x86.return_thunk

@@ -299,9 +299,6 @@ SYM_CODE_START(__x86_return_thunk)
ret
int3
SYM_CODE_END(__x86_return_thunk)
-
-#endif /* CONFIG_CPU_UNRET_ENTRY */
-
__EXPORT_THUNK(__x86_return_thunk)

#endif /* CONFIG_RETHUNK */
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -455,7 +455,12 @@ static int decode_instructions(struct ob
return -1;
}

- if (func->return_thunk || !strcmp(func->name, "srso_safe_ret") || func->alias != func)
+ /*
+ * Both zen_return_thunk() and srso_safe_ret() are embedded inside
+ * another instruction and objtool doesn't grok that. Skip validating them.
+ */
+ if (!strcmp(func->name, "zen_return_thunk") ||
+ !strcmp(func->name, "srso_safe_ret") || func->alias != func)
continue;

if (!find_insn(file, sec, func->offset)) {
@@ -1583,7 +1588,7 @@ static int add_jump_destinations(struct
* middle of another instruction. Objtool only
* knows about the outer instruction.
*/
- if (sym && sym->return_thunk) {
+ if (sym && !strcmp(sym->name, "zen_return_thunk")) {
add_return_call(file, insn, false);
continue;
}