Re: [PATCH v2 4/4] x86/static_call: Add inline static call implementation for x86-64

From: Peter Zijlstra
Date: Tue Nov 27 2018 - 03:44:05 EST


On Mon, Nov 26, 2018 at 03:26:28PM -0600, Josh Poimboeuf wrote:

> Yeah, that's probably better. I assume you also mean that we would have
> all text_poke_bp() users create a handler callback? That way the
> interface is clear and consistent for everybody. Like:

Can do, it does indeed make the interface less like a hack. It is not
like there are too many users.

> diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
> index aac0c1f7e354..d4b0abe4912d 100644
> --- a/arch/x86/kernel/jump_label.c
> +++ b/arch/x86/kernel/jump_label.c
> @@ -37,6 +37,11 @@ static void bug_at(unsigned char *ip, int line)
> BUG();
> }
>
> +static inline void jump_label_bp_handler(struct pt_regs *regs, void *data)
> +{
> + regs->ip += JUMP_LABEL_NOP_SIZE - 1;
> +}
> +
> static void __ref __jump_label_transform(struct jump_entry *entry,
> enum jump_label_type type,
> void *(*poker)(void *, const void *, size_t),
> @@ -91,7 +96,7 @@ static void __ref __jump_label_transform(struct jump_entry *entry,
> }
>
> text_poke_bp((void *)jump_entry_code(entry), code, JUMP_LABEL_NOP_SIZE,
> - (void *)jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
> + jump_label_bp_handler, NULL);
> }
>
> void arch_jump_label_transform(struct jump_entry *entry,

Per that example..

> diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
> index d3869295b88d..e05ebc6d4db5 100644
> --- a/arch/x86/kernel/static_call.c
> +++ b/arch/x86/kernel/static_call.c
> @@ -7,24 +7,30 @@
>
> #define CALL_INSN_SIZE 5
>
> +struct static_call_bp_data {
> + unsigned long func, ret;
> +};
> +
> +static void static_call_bp_handler(struct pt_regs *regs, void *_data)
> +{
> + struct static_call_bp_data *data = _data;
> +
> + /*
> + * For inline static calls, push the return address on the stack so the
> + * "called" function will return to the location immediately after the
> + * call site.
> + *
> + * NOTE: This code will need to be revisited when kernel CET gets
> + * implemented.
> + */
> + if (data->ret) {
> + regs->sp -= sizeof(long);
> + *(unsigned long *)regs->sp = data->ret;
> + }
> +
> + /* The exception handler will 'return' to the destination function. */
> + regs->ip = data->func;
> +}

Now; if I'm not mistaken, the below @site is in fact @regs->ip - 1, no?

We already patched site with INT3, which is what we just trapped on. So
we could in fact write something like:

static void static_call_bp_handler(struct pt_regs *regs, void *data)
{
struct static_call_bp_data *scd = data;

switch (data->type) {
case CALL_INSN: /* emulate CALL instruction */
regs->sp -= sizeof(unsigned long);
*(unsigned long *)regs->sp = regs->ip + CALL_INSN_SIZE - 1;
regs->ip = data->func;
break;

case JMP_INSN: /* emulate JMP instruction */
regs->ip = data->func;
break;
}
}

> void arch_static_call_transform(void *site, void *tramp, void *func)
> {
> @@ -32,11 +38,17 @@ void arch_static_call_transform(void *site, void *tramp, void *func)
> unsigned long insn;
> unsigned char insn_opcode;
> unsigned char opcodes[CALL_INSN_SIZE];
> + struct static_call_bp_data handler_data;
> +
> + handler_data.func = (unsigned long)func;
>
> - if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE))
> + if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE)) {
> insn = (unsigned long)site;
> + handler_data.ret = insn + CALL_INSN_SIZE;
> + } else {
> insn = (unsigned long)tramp;
> + handler_data.ret = 0;
> + }

handler_data = (struct static_call_bp_data){
.type = IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) ? CALL_INSN : JMP_INSN,
.func = func,
};

> mutex_lock(&text_mutex);
>
> @@ -52,14 +64,9 @@ void arch_static_call_transform(void *site, void *tramp, void *func)
> opcodes[0] = insn_opcode;
> memcpy(&opcodes[1], &dest_relative, CALL_INSN_SIZE - 1);
>
> /* Patch the call site: */
> text_poke_bp((void *)insn, opcodes, CALL_INSN_SIZE,
> + static_call_bp_handler, &handler_data);
>
> done:
> mutex_unlock(&text_mutex);