[RFC PATCH 1/7] static_call/x86: Add __static_call_returnl0()

From: Frederic Weisbecker
Date: Mon Nov 09 2020 - 19:56:31 EST


From: Peter Zijlstra <peterz@xxxxxxxxxxxxx>

Provide a stub function that return 0 and wire up the static call site
patching to replace the CALL with a single 5 byte instruction that
clears %RAX, the return value register.

The function can be cast to any function pointer type that has a
single %RAX return (including pointers). Also provide a version that
returns an int for convenience. We are clearing the entire %RAX register
in any case, whether the return value is 32 or 64 bits, since %RAX is
always a scratch register anyway.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Michal Hocko <mhocko@xxxxxxxxxx>
Cc: Paul E. McKenney <paulmck@xxxxxxxxxx>
[fweisbec: s/disp16/data16, integrate into text_get_insn(), elaborate
comment on the resulting insn, emulate on int3 trap, provide validation,
uninline __static_call_return0() for HAVE_STATIC_CALL]
Signed-off-by: Frederic Weisbecker <frederic@xxxxxxxxxx>
---
arch/x86/include/asm/text-patching.h | 26 +++++++++++++++++++++++++-
arch/x86/kernel/alternative.c | 5 +++++
arch/x86/kernel/static_call.c | 10 ++++++++--
include/linux/static_call.h | 9 +++++++++
kernel/static_call.c | 10 ++++++++++
5 files changed, 57 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index b7421780e4e9..1250f440d1be 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -65,6 +65,9 @@ extern void text_poke_finish(void);
#define JMP8_INSN_SIZE 2
#define JMP8_INSN_OPCODE 0xEB

+#define XOR5RAX_INSN_SIZE 5
+#define XOR5RAX_INSN_OPCODE 0x31
+
#define DISP32_SIZE 4

static __always_inline int text_opcode_size(u8 opcode)
@@ -80,6 +83,7 @@ static __always_inline int text_opcode_size(u8 opcode)
__CASE(CALL);
__CASE(JMP32);
__CASE(JMP8);
+ __CASE(XOR5RAX);
}

#undef __CASE
@@ -99,8 +103,21 @@ static __always_inline
void *text_gen_insn(u8 opcode, const void *addr, const void *dest)
{
static union text_poke_insn insn; /* per instance */
- int size = text_opcode_size(opcode);
+ int size;

+ if (opcode == XOR5RAX_INSN_OPCODE) {
+ /*
+ * data16 data16 xorq %rax, %rax - a single 5 byte instruction that clears %rax
+ * The REX.W cancels the effect of any data16.
+ */
+ static union text_poke_insn xor5rax = {
+ .text = { 0x66, 0x66, 0x48, 0x31, 0xc0 },
+ };
+
+ return &xor5rax.text;
+ }
+
+ size = text_opcode_size(opcode);
insn.opcode = opcode;

if (size > 1) {
@@ -165,6 +182,13 @@ void int3_emulate_ret(struct pt_regs *regs)
unsigned long ip = int3_emulate_pop(regs);
int3_emulate_jmp(regs, ip);
}
+
+static __always_inline
+void int3_emulate_xor5rax(struct pt_regs *regs)
+{
+ regs->ax = 0;
+}
+
#endif /* !CONFIG_UML_X86 */

#endif /* _ASM_X86_TEXT_PATCHING_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 2400ad62f330..37592f576a10 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1125,6 +1125,10 @@ noinstr int poke_int3_handler(struct pt_regs *regs)
int3_emulate_jmp(regs, (long)ip + tp->rel32);
break;

+ case XOR5RAX_INSN_OPCODE:
+ int3_emulate_xor5rax(regs);
+ break;
+
default:
BUG();
}
@@ -1291,6 +1295,7 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
switch (tp->opcode) {
case INT3_INSN_OPCODE:
case RET_INSN_OPCODE:
+ case XOR5RAX_INSN_OPCODE:
break;

case CALL_INSN_OPCODE:
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index ca9a380d9c0b..3a36eaf3dd1f 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -18,7 +18,11 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void

switch (type) {
case CALL:
- code = text_gen_insn(CALL_INSN_OPCODE, insn, func);
+ if (func == &__static_call_return0 ||
+ func == &__static_call_returnl0)
+ code = text_gen_insn(XOR5RAX_INSN_OPCODE, insn, func);
+ else
+ code = text_gen_insn(CALL_INSN_OPCODE, insn, func);
break;

case NOP:
@@ -54,7 +58,9 @@ static void __static_call_validate(void *insn, bool tail)
return;
} else {
if (opcode == CALL_INSN_OPCODE ||
- !memcmp(insn, ideal_nops[NOP_ATOMIC5], 5))
+ !memcmp(insn, ideal_nops[NOP_ATOMIC5], 5) ||
+ !memcmp(insn, text_gen_insn(XOR5RAX_INSN_OPCODE, NULL, NULL),
+ XOR5RAX_INSN_SIZE))
return;
}

diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index 695da4c9b338..055544793430 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -136,6 +136,9 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func, bool

#ifdef CONFIG_HAVE_STATIC_CALL_INLINE

+extern int __static_call_return0(void);
+extern long __static_call_returnl0(void);
+
extern int __init static_call_init(void);

struct static_call_mod {
@@ -187,6 +190,9 @@ extern int static_call_text_reserved(void *start, void *end);

#elif defined(CONFIG_HAVE_STATIC_CALL)

+extern int __static_call_return0(void);
+extern long __static_call_returnl0(void);
+
static inline int static_call_init(void) { return 0; }

struct static_call_key {
@@ -234,6 +240,9 @@ static inline int static_call_text_reserved(void *start, void *end)

#else /* Generic implementation */

+static inline int __static_call_return0(void) { return 0; }
+static inline long __static_call_returnl0(void) { return 0; }
+
static inline int static_call_init(void) { return 0; }

struct static_call_key {
diff --git a/kernel/static_call.c b/kernel/static_call.c
index 84565c2a41b8..3cb371e71be6 100644
--- a/kernel/static_call.c
+++ b/kernel/static_call.c
@@ -438,6 +438,16 @@ int __init static_call_init(void)
}
early_initcall(static_call_init);

+int __static_call_return0(void)
+{
+ return 0;
+}
+
+long __static_call_returnl0(void)
+{
+ return 0;
+}
+
#ifdef CONFIG_STATIC_CALL_SELFTEST

static int func_a(int x)
--
2.25.1