Re: [PATCH 3/3] x86/ftrace: Use text_poke()

From: Daniel Bristot de Oliveira
Date: Wed Oct 02 2019 - 12:35:37 EST


On 27/08/2019 20:06, Peter Zijlstra wrote:
> Move ftrace over to using the generic x86 text_poke functions; this
> avoids having a second/different copy of that code around.
>
> This also avoids ftrace violating the (new) W^X rule and avoids
> fragmenting the kernel text page-tables, due to no longer having to
> toggle them RW.

I tested this patch, and it works... but it generates more IPIs than the
previous one.

> Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
> Cc: Daniel Bristot de Oliveira <bristot@xxxxxxxxxx>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
> ---
> arch/x86/include/asm/ftrace.h | 2
> arch/x86/kernel/alternative.c | 4
> arch/x86/kernel/ftrace.c | 630 ++++++------------------------------------
> arch/x86/kernel/traps.c | 9
> 4 files changed, 93 insertions(+), 552 deletions(-)
>
> --- a/arch/x86/include/asm/ftrace.h
> +++ b/arch/x86/include/asm/ftrace.h

[ ... jumping to the point ...]

> -
> void ftrace_replace_code(int enable)
> {
> struct ftrace_rec_iter *iter;
> struct dyn_ftrace *rec;
> - const char *report = "adding breakpoints";
> - int count = 0;
> + const char *new, *old;
> int ret;
>
> for_ftrace_rec_iter(iter) {
> rec = ftrace_rec_iter_record(iter);
>
> - ret = add_breakpoints(rec, enable);
> - if (ret)
> - goto remove_breakpoints;
> - count++;
> - }
> -
> - run_sync();

ftrace was already batching the updates, for instance, causing 3 IPIs to enable
all functions. The text_poke() batching also works. But because of the limited
buffer [ see the reply to the patch 2/3 ], it is flushing the buffer during the
operation, causing more IPIs than the previous code. Using the 5.4-rc1 in a VM,
when enabling the function tracer, I see 250+ intermediate text_poke_finish()
because of a full buffer...

Would this be the case of trying to use a dynamically allocated buffer?

Thoughts?

-- Daniel

> -
> - report = "updating code";
> - count = 0;
> -
> - for_ftrace_rec_iter(iter) {
> - rec = ftrace_rec_iter_record(iter);
> -
> - ret = add_update(rec, enable);
> - if (ret)
> - goto remove_breakpoints;
> - count++;
> + switch (ftrace_test_record(rec, enable)) {
> + case FTRACE_UPDATE_IGNORE:
> + default:
> + continue;
> +
> + case FTRACE_UPDATE_MAKE_CALL:
> + old = ftrace_nop_replace();
> + break;
> +
> + case FTRACE_UPDATE_MODIFY_CALL:
> + case FTRACE_UPDATE_MAKE_NOP:
> + old = ftrace_call_replace(rec->ip, ftrace_get_addr_curr(rec));
> + break;
> + };
> +
> + ret = ftrace_verify_code(rec->ip, old);
> + if (ret) {
> + ftrace_bug(ret, rec);
> + return;
> + }
> }
>
> - run_sync();
> -
> - report = "removing breakpoints";
> - count = 0;
> -
> for_ftrace_rec_iter(iter) {
> rec = ftrace_rec_iter_record(iter);
>
> - ret = finish_update(rec, enable);
> - if (ret)
> - goto remove_breakpoints;
> - count++;
> - }
> -
> - run_sync();
> -
> - return;
> + switch (ftrace_test_record(rec, enable)) {
> + case FTRACE_UPDATE_IGNORE:
> + default:
> + continue;
> +
> + case FTRACE_UPDATE_MAKE_CALL:
> + case FTRACE_UPDATE_MODIFY_CALL:
> + new = ftrace_call_replace(rec->ip, ftrace_get_addr_new(rec));
> + break;
> +
> + case FTRACE_UPDATE_MAKE_NOP:
> + new = ftrace_nop_replace();
> + break;
> + };
>
> - remove_breakpoints:
> - pr_warn("Failed on %s (%d):\n", report, count);
> - ftrace_bug(ret, rec);
> - for_ftrace_rec_iter(iter) {
> - rec = ftrace_rec_iter_record(iter);
> - /*
> - * Breakpoints are handled only when this function is in
> - * progress. The system could not work with them.
> - */
> - if (remove_breakpoint(rec))
> - BUG();
> + text_poke_queue((void *)rec->ip, new, MCOUNT_INSN_SIZE, NULL);
> + ftrace_update_record(rec, enable);
> }
> - run_sync();
> -}
> -
> -static int
> -ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
> - unsigned const char *new_code)
> -{
> - int ret;
> -
> - ret = add_break(ip, old_code);
> - if (ret)
> - goto out;
> -
> - run_sync();
> -
> - ret = add_update_code(ip, new_code);
> - if (ret)
> - goto fail_update;
> -
> - run_sync();
> -
> - ret = ftrace_write(ip, new_code, 1);
> - /*
> - * The breakpoint is handled only when this function is in progress.
> - * The system could not work if we could not remove it.
> - */
> - BUG_ON(ret);
> - out:
> - run_sync();
> - return ret;
> -
> - fail_update:
> - /* Also here the system could not work with the breakpoint */
> - if (ftrace_write(ip, old_code, 1))
> - BUG();
> - goto out;
> + text_poke_finish();
> }
>
> void arch_ftrace_update_code(int command)
> {
> - /* See comment above by declaration of modifying_ftrace_code */
> - atomic_inc(&modifying_ftrace_code);
> -
> ftrace_modify_all_code(command);
> -
> - atomic_dec(&modifying_ftrace_code);
> }
>
> int __init ftrace_dyn_arch_init(void)
> @@ -828,11 +394,7 @@ create_trampoline(struct ftrace_ops *ops
>
> set_vm_flush_reset_perms(trampoline);
>
> - /*
> - * Module allocation needs to be completed by making the page
> - * executable. The page is still writable, which is a security hazard,
> - * but anyhow ftrace breaks W^X completely.
> - */
> + set_memory_ro((unsigned long)trampoline, npages);
> set_memory_x((unsigned long)trampoline, npages);
> return (unsigned long)trampoline;
> fail:
> @@ -859,11 +421,10 @@ static unsigned long calc_trampoline_cal
> void arch_ftrace_update_trampoline(struct ftrace_ops *ops)
> {
> ftrace_func_t func;
> - unsigned char *new;
> unsigned long offset;
> unsigned long ip;
> unsigned int size;
> - int ret, npages;
> + const char *new;
>
> if (ops->trampoline) {
> /*
> @@ -872,14 +433,11 @@ void arch_ftrace_update_trampoline(struc
> */
> if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
> return;
> - npages = PAGE_ALIGN(ops->trampoline_size) >> PAGE_SHIFT;
> - set_memory_rw(ops->trampoline, npages);
> } else {
> ops->trampoline = create_trampoline(ops, &size);
> if (!ops->trampoline)
> return;
> ops->trampoline_size = size;
> - npages = PAGE_ALIGN(size) >> PAGE_SHIFT;
> }
>
> offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS);
> @@ -887,15 +445,11 @@ void arch_ftrace_update_trampoline(struc
>
> func = ftrace_ops_get_func(ops);
>
> - ftrace_update_func_call = (unsigned long)func;
> -
> + mutex_lock(&text_mutex);
> /* Do a safe modify in case the trampoline is executing */
> new = ftrace_call_replace(ip, (unsigned long)func);
> - ret = update_ftrace_func(ip, new);
> - set_memory_ro(ops->trampoline, npages);
> -
> - /* The update should never fail */
> - WARN_ON(ret);
> + text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
> + mutex_unlock(&text_mutex);
> }
>
> /* Return the address of the function the trampoline calls */
> @@ -981,19 +535,18 @@ void arch_ftrace_trampoline_free(struct
> #ifdef CONFIG_DYNAMIC_FTRACE
> extern void ftrace_graph_call(void);
>
> -static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)
> +static const char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)
> {
> - return ftrace_text_replace(0xe9, ip, addr);
> + return ftrace_text_replace(JMP32_INSN_OPCODE, ip, addr);
> }
>
> static int ftrace_mod_jmp(unsigned long ip, void *func)
> {
> - unsigned char *new;
> + const char *new;
>
> - ftrace_update_func_call = 0UL;
> new = ftrace_jmp_replace(ip, (unsigned long)func);
> -
> - return update_ftrace_func(ip, new);
> + text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL); // BATCH
> + return 0;
> }
>
> int ftrace_enable_ftrace_graph_caller(void)
> @@ -1019,10 +572,9 @@ int ftrace_disable_ftrace_graph_caller(v
> void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
> unsigned long frame_pointer)
> {
> + unsigned long return_hooker = (unsigned long)&return_to_handler;
> unsigned long old;
> int faulted;
> - unsigned long return_hooker = (unsigned long)
> - &return_to_handler;
>
> /*
> * When resuming from suspend-to-ram, this function can be indirectly
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -568,15 +568,6 @@ NOKPROBE_SYMBOL(do_general_protection);
>
> dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
> {
> -#ifdef CONFIG_DYNAMIC_FTRACE
> - /*
> - * ftrace must be first, everything else may cause a recursive crash.
> - * See note by declaration of modifying_ftrace_code in ftrace.c
> - */
> - if (unlikely(atomic_read(&modifying_ftrace_code)) &&
> - ftrace_int3_handler(regs))
> - return;
> -#endif
> if (poke_int3_handler(regs))
> return;
>
>
>