Re: [RFC PATCH v2 26/31] fprobe: Rewrite fprobe on function-graph tracer

From: Google
Date: Fri Nov 10 2023 - 12:44:44 EST


Hi,

On Wed, 8 Nov 2023 23:29:22 +0900
"Masami Hiramatsu (Google)" <mhiramat@xxxxxxxxxx> wrote:

> + */
> +static inline int __fprobe_handler(unsigned long ip, unsigned long parent_ip,
> + struct fprobe *fp, struct ftrace_regs *fregs,
> + void *data)
> +{
> + int ret = 0;
>
> + if (fp->entry_handler) {
> + if (fp->exit_handler && fp->entry_data_size)
> + data += sizeof(unsigned long);
> + else
> + data = NULL;
> + ret = fp->entry_handler(fp, ip, parent_ip, fregs, data);
> + }
> +
> + return ret;
> +}
> +
> +static inline int __fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
> + struct fprobe *fp, struct ftrace_regs *fregs,
> + void *data)
> +{
> + int ret;
> /*
> * This user handler is shared with other kprobes and is not expected to be
> * called recursively. So if any other kprobe handler is running, this will
> @@ -108,45 +210,173 @@ static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
> */
> if (unlikely(kprobe_running())) {
> fp->nmissed++;
> - goto recursion_unlock;
> + return 0;
> }
>
> kprobe_busy_begin();
> - __fprobe_handler(ip, parent_ip, ops, fregs);
> + ret = __fprobe_handler(ip, parent_ip, fp, fregs, data);
> kprobe_busy_end();
> -
> -recursion_unlock:
> - ftrace_test_recursion_unlock(bit);
> + return ret;
> }
>
> -static void fprobe_exit_handler(struct rethook_node *rh, void *data,
> - unsigned long ret_ip, struct pt_regs *regs)
> +static int fprobe_entry(unsigned long func, unsigned long ret_ip,
> + struct ftrace_regs *fregs, struct fgraph_ops *gops)
> {
> - struct fprobe *fp = (struct fprobe *)data;
> - struct fprobe_rethook_node *fpr;
> - struct ftrace_regs *fregs = (struct ftrace_regs *)regs;
> - int bit;
> + struct fprobe_hlist_node *node, *first;
> + unsigned long header;
> + void *fgraph_data = NULL;
> + struct fprobe *fp;
> + int size, used, ret;
>
> - if (!fp || fprobe_disabled(fp))
> - return;
> + if (WARN_ON_ONCE(!fregs))
> + return 0;
>
> - fpr = container_of(rh, struct fprobe_rethook_node, node);
> + first = node = find_first_fprobe_node(func);
> + if (unlikely(!first))
> + return 0;
> +
> + size = 0;
> + hlist_for_each_entry_from_rcu(node, hlist) {
> + if (node->addr != func)
> + break;
> + fp = READ_ONCE(node->fp);
> + /*
> + * Since fprobe can be enabled until the next loop, we ignore the
> + * disabled flag in this loop.
> + */
> + if (fp && fp->exit_handler)
> + size += FPROBE_HEADER_SIZE + fp->entry_data_size;
> + }
> + node = first;
> + /* size can be 0 because fp only has entry_handler. */
> + if (size) {
> + fgraph_data = fgraph_reserve_data(size);
> + if (unlikely(!fgraph_data)) {
> + hlist_for_each_entry_from_rcu(node, hlist) {
> + if (node->addr != func)
> + break;
> + fp = READ_ONCE(node->fp);
> + if (fp && !fprobe_disabled(fp))
> + fp->nmissed++;
> + }
> + return 0;
> + }
> + }
>
> /*
> - * we need to assure no calls to traceable functions in-between the
> - * end of fprobe_handler and the beginning of fprobe_exit_handler.
> + * TODO: recursion detection has been done in the fgraph. Thus we need
> + * to add a callback to increment missed counter.
> */
> - bit = ftrace_test_recursion_trylock(fpr->entry_ip, fpr->entry_parent_ip);
> - if (bit < 0) {
> - fp->nmissed++;
> + used = 0;
> + hlist_for_each_entry_from_rcu(node, hlist) {
> + if (node->addr != func)
> + break;
> + fp = READ_ONCE(node->fp);
> + if (!fp || fprobe_disabled(fp))
> + continue;
> +
> + if (fprobe_shared_with_kprobes(fp))
> + ret = __fprobe_kprobe_handler(func, ret_ip,
> + fp, fregs, fgraph_data + used);
> + else
> + ret = __fprobe_handler(func, ret_ip, fp,
> + fregs, fgraph_data + used);


Since the fgraph callback is under rcu-locked but not preempt-disabled,
fprobe unittest fails. I need to add preempt_disable_notrace() and
preempt_enable_notrace() around this. Note that kprobe_busy_begin()/end()
also access to per-cpu variable, so it requires to disable preemption.

Thank you,


--
Masami Hiramatsu (Google) <mhiramat@xxxxxxxxxx>