Re: [PATCH v9.1 02/11] tracing/probes: Add fprobe events for tracing function entry and exit.

From: Steven Rostedt
Date: Fri May 05 2023 - 12:12:20 EST


On Tue, 2 May 2023 11:17:36 +0900
"Masami Hiramatsu (Google)" <mhiramat@xxxxxxxxxx> wrote:

> diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
> new file mode 100644
> index 000000000000..0049d9ef2402
> --- /dev/null
> +++ b/kernel/trace/trace_fprobe.c
> @@ -0,0 +1,1053 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Fprobe-based tracing events
> + * Copyright (C) 2022 Google LLC.
> + */
> +#define pr_fmt(fmt) "trace_fprobe: " fmt
> +
> +#include <linux/fprobe.h>
> +#include <linux/module.h>
> +#include <linux/rculist.h>
> +#include <linux/security.h>
> +#include <linux/uaccess.h>
> +
> +#include "trace_dynevent.h"
> +#include "trace_probe.h"
> +#include "trace_probe_kernel.h"
> +#include "trace_probe_tmpl.h"
> +
> +#define FPROBE_EVENT_SYSTEM "fprobes"
> +#define RETHOOK_MAXACTIVE_MAX 4096
> +
> +static int trace_fprobe_create(const char *raw_command);
> +static int trace_fprobe_show(struct seq_file *m, struct dyn_event *ev);
> +static int trace_fprobe_release(struct dyn_event *ev);
> +static bool trace_fprobe_is_busy(struct dyn_event *ev);
> +static bool trace_fprobe_match(const char *system, const char *event,
> + int argc, const char **argv, struct dyn_event *ev);
> +
> +static struct dyn_event_operations trace_fprobe_ops = {
> + .create = trace_fprobe_create,
> + .show = trace_fprobe_show,
> + .is_busy = trace_fprobe_is_busy,
> + .free = trace_fprobe_release,
> + .match = trace_fprobe_match,
> +};
> +
> +/*
> + * Fprobe event core functions
> + */
> +struct trace_fprobe {
> + struct dyn_event devent;
> + struct fprobe fp;
> + const char *symbol;
> + struct trace_probe tp;
> +};
> +
> +static bool is_trace_fprobe(struct dyn_event *ev)
> +{
> + return ev->ops == &trace_fprobe_ops;
> +}
> +
> +static struct trace_fprobe *to_trace_fprobe(struct dyn_event *ev)
> +{
> + return container_of(ev, struct trace_fprobe, devent);
> +}
> +
> +/**
> + * for_each_trace_fprobe - iterate over the trace_fprobe list
> + * @pos: the struct trace_fprobe * for each entry
> + * @dpos: the struct dyn_event * to use as a loop cursor
> + */
> +#define for_each_trace_fprobe(pos, dpos) \
> + for_each_dyn_event(dpos) \
> + if (is_trace_fprobe(dpos) && (pos = to_trace_fprobe(dpos)))
> +
> +static bool trace_fprobe_is_return(struct trace_fprobe *tf)
> +{
> + return tf->fp.exit_handler != NULL;
> +}
> +
> +static const char *trace_fprobe_symbol(struct trace_fprobe *tf)
> +{
> + return tf->symbol ? tf->symbol : "unknown";
> +}
> +
> +static bool trace_fprobe_is_busy(struct dyn_event *ev)
> +{
> + struct trace_fprobe *tf = to_trace_fprobe(ev);
> +
> + return trace_probe_is_enabled(&tf->tp);
> +}
> +
> +static bool trace_fprobe_match_command_head(struct trace_fprobe *tf,
> + int argc, const char **argv)
> +{
> + char buf[MAX_ARGSTR_LEN + 1];
> +
> + if (!argc)
> + return true;
> +
> + snprintf(buf, sizeof(buf), "%s", trace_fprobe_symbol(tf));
> + if (strcmp(buf, argv[0]))
> + return false;
> + argc--; argv++;
> +
> + return trace_probe_match_command_args(&tf->tp, argc, argv);
> +}
> +
> +static bool trace_fprobe_match(const char *system, const char *event,
> + int argc, const char **argv, struct dyn_event *ev)
> +{
> + struct trace_fprobe *tf = to_trace_fprobe(ev);
> +
> + return (event[0] == '\0' ||
> + strcmp(trace_probe_name(&tf->tp), event) == 0) &&
> + (!system || strcmp(trace_probe_group_name(&tf->tp), system) == 0) &&
> + trace_fprobe_match_command_head(tf, argc, argv);

The above is really hard to read, and Linus hates these kinds of
statements. Please break it up (the compiler should do the right thing).

Reversing the tests to return false:

if (event[0] != '\0' &&
strcmp(trace_probe_name(&tf->tp), event) != 0))
return false;

if (system && strcmp(trace_probe_group_name(&tf->tp), system) != 0))
return false;

return trace_fprobe_match_command_head(tf, argc, argv);


> +}
> +
> +static bool trace_fprobe_is_registered(struct trace_fprobe *tf)
> +{
> + return fprobe_is_registered(&tf->fp);
> +}
> +
> +/* Note that we don't verify it, since the code does not come from user space */

Verify what?

Hmm, I see this is a copy of the comment from both trace_kprobe.c and
trace_uprobe.c. I think this requires a bit more explanation (and also in
those locations as well).

> +static int
> +process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
> + void *base)
> +{
> + struct pt_regs *regs = rec;
> + unsigned long val;
> +
> +retry:
> + /* 1st stage: get value from context */
> + switch (code->op) {
> + case FETCH_OP_REG:
> + val = regs_get_register(regs, code->param);
> + break;
> + case FETCH_OP_STACK:
> + val = regs_get_kernel_stack_nth(regs, code->param);
> + break;
> + case FETCH_OP_STACKP:
> + val = kernel_stack_pointer(regs);
> + break;
> + case FETCH_OP_RETVAL:
> + val = regs_return_value(regs);
> + break;


> + case FETCH_OP_IMM:
> + val = code->immediate;
> + break;
> + case FETCH_OP_COMM:
> + val = (unsigned long)current->comm;
> + break;
> + case FETCH_OP_DATA:
> + val = (unsigned long)code->data;
> + break;

These are new and not part of trace_kprobe.c. Should we have a version that
the two could share?

Probably a helper function that can be called by these two.

> +#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
> + case FETCH_OP_ARG:
> + val = regs_get_kernel_argument(regs, code->param);
> + break;
> +#endif
> + case FETCH_NOP_SYMBOL: /* Ignore a place holder */
> + code++;
> + goto retry;
> + default:
> + return -EILSEQ;
> + }
> + code++;
> +
> + return process_fetch_insn_bottom(code, val, dest, base);
> +}
> +NOKPROBE_SYMBOL(process_fetch_insn)
> +
> +/* function entry handler */
> +static nokprobe_inline void
> +__fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
> + struct pt_regs *regs,
> + struct trace_event_file *trace_file)
> +{
> + struct fentry_trace_entry_head *entry;
> + struct trace_event_call *call = trace_probe_event_call(&tf->tp);
> + struct trace_event_buffer fbuffer;
> + int dsize;
> +
> + WARN_ON(call != trace_file->event_call);

WARN_ON_ONCE()?

And if you are doing the check, perhaps even:

if (WARN_ON_ONCE(call != trace_file->event_call))
return;

-- Steve

> +
> + if (trace_trigger_soft_disabled(trace_file))
> + return;
> +
> + dsize = __get_data_size(&tf->tp, regs);
> +
> + entry = trace_event_buffer_reserve(&fbuffer, trace_file,
> + sizeof(*entry) + tf->tp.size + dsize);
> + if (!entry)
> + return;
> +
> + fbuffer.regs = regs;
> + entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
> + entry->ip = entry_ip;
> + store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize);
> +
> + trace_event_buffer_commit(&fbuffer);
> +}
> +