Re: [PATCH 2/3] x86/alternatives,jump_label: Provide better text_poke() batching interface

From: Masami Hiramatsu
Date: Thu Oct 03 2019 - 01:50:43 EST


Hi Peter,

On Tue, 27 Aug 2019 20:06:24 +0200
Peter Zijlstra <peterz@xxxxxxxxxxxxx> wrote:

> Adding another text_poke_bp_batch() user made me realize the interface
> is all sorts of wrong. The text poke vector should be internal to the
> implementation.
>
> This then results in a trivial interface:
>
> text_poke_queue() - which has the 'normal' text_poke_bp() interface
> text_poke_finish() - which takes no arguments and flushes any
> pending text_poke()s.

Looks good to me. Maybe it is easy to apply to optprobe too.

Reviewed-by: Masami Hiramatsu <mhiramat@xxxxxxxxxx>

Thank you,

>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
> Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
> Cc: Daniel Bristot de Oliveira <bristot@xxxxxxxxxx>
> Cc: Masami Hiramatsu <mhiramat@xxxxxxxxxx>
> ---
> arch/x86/include/asm/text-patching.h | 16 ++-----
> arch/x86/kernel/alternative.c | 64 +++++++++++++++++++++++++---
> arch/x86/kernel/jump_label.c | 80 +++++++++--------------------------
> 3 files changed, 84 insertions(+), 76 deletions(-)
>
> --- a/arch/x86/include/asm/text-patching.h
> +++ b/arch/x86/include/asm/text-patching.h
> @@ -25,14 +25,6 @@ static inline void apply_paravirt(struct
> */
> #define POKE_MAX_OPCODE_SIZE 5
>
> -struct text_poke_loc {
> - void *addr;
> - int len;
> - s32 rel32;
> - u8 opcode;
> - const char text[POKE_MAX_OPCODE_SIZE];
> -};
> -
> extern void text_poke_early(void *addr, const void *opcode, size_t len);
>
> /*
> @@ -53,13 +45,15 @@ extern void *text_poke(void *addr, const
> extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
> extern int poke_int3_handler(struct pt_regs *regs);
> extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate);
> -extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries);
> -extern void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
> - const void *opcode, size_t len, const void *emulate);
> +
> +extern void text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate);
> +extern void text_poke_finish(void);
> +
> extern int after_bootmem;
> extern __ro_after_init struct mm_struct *poking_mm;
> extern __ro_after_init unsigned long poking_addr;
>
> +
> #ifndef CONFIG_UML_X86
> static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
> {
> --- a/arch/x86/kernel/alternative.c
> +++ b/arch/x86/kernel/alternative.c
> @@ -936,6 +936,14 @@ static void do_sync_core(void *info)
> sync_core();
> }
>
> +struct text_poke_loc {
> + void *addr;
> + int len;
> + s32 rel32;
> + u8 opcode;
> + const char text[POKE_MAX_OPCODE_SIZE];
> +};
> +
> static struct bp_patching_desc {
> struct text_poke_loc *vec;
> int nr_entries;
> @@ -1017,6 +1025,10 @@ int poke_int3_handler(struct pt_regs *re
> }
> NOKPROBE_SYMBOL(poke_int3_handler);
>
> +#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc))
> +static struct text_poke_loc tp_vec[TP_VEC_MAX];
> +static int tp_vec_nr;
> +
> /**
> * text_poke_bp_batch() -- update instructions on live kernel on SMP
> * @tp: vector of instructions to patch
> @@ -1038,7 +1050,7 @@ NOKPROBE_SYMBOL(poke_int3_handler);
> * replacing opcode
> * - sync cores
> */
> -void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
> +static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
> {
> unsigned char int3 = INT3_INSN_OPCODE;
> int patched_all_but_first = 0;
> @@ -1105,11 +1117,7 @@ void text_poke_loc_init(struct text_poke
> {
> struct insn insn;
>
> - if (!opcode)
> - opcode = (void *)tp->text;
> - else
> - memcpy((void *)tp->text, opcode, len);
> -
> + memcpy((void *)tp->text, opcode, len);
> if (!emulate)
> emulate = opcode;
>
> @@ -1147,6 +1155,50 @@ void text_poke_loc_init(struct text_poke
> }
> }
>
> +/*
> + * We hard rely on the tp_vec being ordered; ensure this is so by flushing
> + * early if needed.
> + */
> +static bool tp_order_fail(void *addr)
> +{
> + struct text_poke_loc *tp;
> +
> + if (!tp_vec_nr)
> + return false;
> +
> + if (!addr) /* force */
> + return true;
> +
> + tp = &tp_vec[tp_vec_nr - 1];
> + if ((unsigned long)tp->addr > (unsigned long)addr)
> + return true;
> +
> + return false;
> +}
> +
> +static void text_poke_flush(void *addr)
> +{
> + if (tp_vec_nr == TP_VEC_MAX || tp_order_fail(addr)) {
> + text_poke_bp_batch(tp_vec, tp_vec_nr);
> + tp_vec_nr = 0;
> + }
> +}
> +
> +void text_poke_finish(void)
> +{
> + text_poke_flush(NULL);
> +}
> +
> +void text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate)
> +{
> + struct text_poke_loc *tp;
> +
> + text_poke_flush(addr);
> +
> + tp = &tp_vec[tp_vec_nr++];
> + text_poke_loc_init(tp, addr, opcode, len, emulate);
> +}
> +
> /**
> * text_poke_bp() -- update instructions on live kernel on SMP
> * @addr: address to patch
> --- a/arch/x86/kernel/jump_label.c
> +++ b/arch/x86/kernel/jump_label.c
> @@ -35,18 +35,19 @@ static void bug_at(unsigned char *ip, in
> BUG();
> }
>
> -static void __jump_label_set_jump_code(struct jump_entry *entry,
> - enum jump_label_type type,
> - union jump_code_union *code,
> - int init)
> +static const void *
> +__jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type, int init)
> {
> + static union jump_code_union code; /* relies on text_mutex */
> const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
> const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
> const void *expect;
> int line;
>
> - code->jump = 0xe9;
> - code->offset = jump_entry_target(entry) -
> + lockdep_assert_held(&text_mutex);
> +
> + code.jump = JMP32_INSN_OPCODE;
> + code.offset = jump_entry_target(entry) -
> (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
>
> if (init) {
> @@ -54,23 +55,23 @@ static void __jump_label_set_jump_code(s
> } else if (type == JUMP_LABEL_JMP) {
> expect = ideal_nop; line = __LINE__;
> } else {
> - expect = code->code; line = __LINE__;
> + expect = code.code; line = __LINE__;
> }
>
> if (memcmp((void *)jump_entry_code(entry), expect, JUMP_LABEL_NOP_SIZE))
> bug_at((void *)jump_entry_code(entry), line);
>
> if (type == JUMP_LABEL_NOP)
> - memcpy(code, ideal_nop, JUMP_LABEL_NOP_SIZE);
> + memcpy(&code, ideal_nop, JUMP_LABEL_NOP_SIZE);
> +
> + return &code;
> }
>
> static void __ref __jump_label_transform(struct jump_entry *entry,
> enum jump_label_type type,
> int init)
> {
> - union jump_code_union code;
> -
> - __jump_label_set_jump_code(entry, type, &code, init);
> + const void *opcode = __jump_label_set_jump_code(entry, type, init);
>
> /*
> * As long as only a single processor is running and the code is still
> @@ -84,12 +85,12 @@ static void __ref __jump_label_transform
> * always nop being the 'currently valid' instruction
> */
> if (init || system_state == SYSTEM_BOOTING) {
> - text_poke_early((void *)jump_entry_code(entry), &code,
> + text_poke_early((void *)jump_entry_code(entry), opcode,
> JUMP_LABEL_NOP_SIZE);
> return;
> }
>
> - text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE, NULL);
> + text_poke_bp((void *)jump_entry_code(entry), opcode, JUMP_LABEL_NOP_SIZE, NULL);
> }
>
> void arch_jump_label_transform(struct jump_entry *entry,
> @@ -100,15 +101,10 @@ void arch_jump_label_transform(struct ju
> mutex_unlock(&text_mutex);
> }
>
> -#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc))
> -static struct text_poke_loc tp_vec[TP_VEC_MAX];
> -static int tp_vec_nr;
> -
> bool arch_jump_label_transform_queue(struct jump_entry *entry,
> enum jump_label_type type)
> {
> - struct text_poke_loc *tp;
> - void *entry_code;
> + const void *opcode;
>
> if (system_state == SYSTEM_BOOTING) {
> /*
> @@ -118,53 +114,19 @@ bool arch_jump_label_transform_queue(str
> return true;
> }
>
> - /*
> - * No more space in the vector, tell upper layer to apply
> - * the queue before continuing.
> - */
> - if (tp_vec_nr == TP_VEC_MAX)
> - return false;
> -
> - tp = &tp_vec[tp_vec_nr];
> -
> - entry_code = (void *)jump_entry_code(entry);
> -
> - /*
> - * The INT3 handler will do a bsearch in the queue, so we need entries
> - * to be sorted. We can survive an unsorted list by rejecting the entry,
> - * forcing the generic jump_label code to apply the queue. Warning once,
> - * to raise the attention to the case of an unsorted entry that is
> - * better not happen, because, in the worst case we will perform in the
> - * same way as we do without batching - with some more overhead.
> - */
> - if (tp_vec_nr > 0) {
> - int prev = tp_vec_nr - 1;
> - struct text_poke_loc *prev_tp = &tp_vec[prev];
> -
> - if (WARN_ON_ONCE(prev_tp->addr > entry_code))
> - return false;
> - }
> -
> - __jump_label_set_jump_code(entry, type,
> - (union jump_code_union *)&tp->text, 0);
> -
> - text_poke_loc_init(tp, entry_code, NULL, JUMP_LABEL_NOP_SIZE, NULL);
> -
> - tp_vec_nr++;
> -
> + mutex_lock(&text_mutex);
> + opcode = __jump_label_set_jump_code(entry, type, 0);
> + text_poke_queue((void *)jump_entry_code(entry),
> + opcode, JUMP_LABEL_NOP_SIZE, NULL);
> + mutex_unlock(&text_mutex);
> return true;
> }
>
> void arch_jump_label_transform_apply(void)
> {
> - if (!tp_vec_nr)
> - return;
> -
> mutex_lock(&text_mutex);
> - text_poke_bp_batch(tp_vec, tp_vec_nr);
> + text_poke_finish();
> mutex_unlock(&text_mutex);
> -
> - tp_vec_nr = 0;
> }
>
> static enum {
>
>


--
Masami Hiramatsu <mhiramat@xxxxxxxxxx>