Re: [PATCH 22/32] tracing: Add support for 'synthetic' events

From: Namhyung Kim
Date: Sun Jul 23 2017 - 08:01:30 EST


Hi Tom,

On Mon, Jun 26, 2017 at 05:49:23PM -0500, Tom Zanussi wrote:
> Synthetic events are user-defined events generated from hist trigger
> variables saved from one or more other events.
>
> To define a synthetic event, the user writes a simple specification
> consisting of the name of the new event along with one or more
> variables and their type(s), to the tracing/synthetic_events file.
>
> For instance, the following creates a new event named 'wakeup_latency'
> with 3 fields: lat, pid, and prio:
>
> # echo 'wakeup_latency u64 lat; pid_t pid; int prio' >> \
> /sys/kernel/debug/tracing/synthetic_events
>
> Reading the tracing/synthetic_events file lists all the
> currently-defined synthetic events, in this case the event we defined
> above:
>
> # cat /sys/kernel/debug/tracing/synthetic_events
> wakeup_latency u64 lat; pid_t pid; int prio
>
> At this point, the synthetic event is ready to use, and a histogram
> can be defined using it:
>
> # echo 'hist:keys=pid,prio,lat.log2:sort=pid,lat' >> \
> /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger
>
> The new event is created under the tracing/events/synthetic/ directory
> and looks and behaves just like any other event:
>
> # ls /sys/kernel/debug/tracing/events/synthetic/wakeup_latency
> enable filter format hist id trigger
>
> Although a histogram can be defined for it, nothing will happen until
> an action tracing that event via the trace_synth() function occurs.
> The trace_synth() function is very similar to all the other trace_*
> invocations spread throughout the kernel, except in this case the
> trace_ function and its corresponding tracepoint isn't statically
> generated but defined by the user at run-time.
>
> How this can be automatically hooked up via a hist trigger 'action' is
> discussed in a subsequent patch.
>
> Signed-off-by: Tom Zanussi <tom.zanussi@xxxxxxxxxxxxxxx>
> ---
> kernel/trace/trace_events_hist.c | 738 +++++++++++++++++++++++++++++++++++++++
> 1 file changed, 738 insertions(+)
>
> diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
> index 338a9d5..e11b3a3 100644
> --- a/kernel/trace/trace_events_hist.c
> +++ b/kernel/trace/trace_events_hist.c

[SNIP]
> @@ -273,6 +294,688 @@ struct action_data {
> unsigned int var_ref_idx;
> };
>
> +static LIST_HEAD(synth_event_list);
> +static DEFINE_MUTEX(synth_event_mutex);
> +
> +struct synth_trace_event {
> + struct trace_entry ent;
> + int n_fields;
> + u64 fields[];
> +};
> +
> +static int synth_event_define_fields(struct trace_event_call *call)
> +{
> + struct synth_trace_event trace;
> + int offset = offsetof(typeof(trace), fields);
> + struct synth_event *event = call->data;
> + unsigned int i, size;
> + char *name, *type;
> + bool is_signed;
> + int ret = 0;
> +
> + for (i = 0; i < event->n_fields; i++) {
> + size = event->fields[i]->size;
> + is_signed = event->fields[i]->is_signed;
> + type = event->fields[i]->type;
> + name = event->fields[i]->name;
> + ret = trace_define_field(call, type, name, offset, size,
> + is_signed, FILTER_OTHER);
> + offset += sizeof(u64);

So you assume size of a field is up to 64-bit, right? Does it
guaranteed somewhere?


> + }
> +
> + return ret;
> +}
> +
> +static enum print_line_t print_synth_event(struct trace_iterator *iter,
> + int flags,
> + struct trace_event *event)
> +{
> + struct trace_array *tr = iter->tr;
> + struct trace_seq *s = &iter->seq;
> + struct synth_trace_event *entry;
> + struct synth_event *se;
> + unsigned int i;
> +
> + entry = (struct synth_trace_event *)iter->ent;
> + se = container_of(event, struct synth_event, call.event);
> +
> + trace_seq_printf(s, "%s: ", se->name);
> +
> + for (i = 0; i < entry->n_fields; i++) {
> + if (trace_seq_has_overflowed(s))
> + goto end;
> +
> + /* parameter types */
> + if (tr->trace_flags & TRACE_ITER_VERBOSE)
> + trace_seq_printf(s, "%s ", "u64");

Why did you hardcode 'u64' here?

> +
> + /* parameter values */
> + trace_seq_printf(s, "%s=%llu%s", se->fields[i]->name,
> + entry->fields[i],
> + i == entry->n_fields - 1 ? "" : ", ");
> + }
> +end:
> + trace_seq_putc(s, '\n');
> +
> + return trace_handle_return(s);
> +}
> +
> +static struct trace_event_functions synth_event_funcs = {
> + .trace = print_synth_event
> +};
> +

[SNIP]
> +static unsigned int synth_field_size(char *type)
> +{
> + unsigned int size = 0;
> +
> + if (strcmp(type, "s64") == 0)
> + size = sizeof(s64);
> + else if (strcmp(type, "u64") == 0)
> + size = sizeof(u64);
> + else if (strcmp(type, "s32") == 0)
> + size = sizeof(s32);
> + else if (strcmp(type, "u32") == 0)
> + size = sizeof(u32);
> + else if (strcmp(type, "s16") == 0)
> + size = sizeof(s16);
> + else if (strcmp(type, "u16") == 0)
> + size = sizeof(u16);
> + else if (strcmp(type, "s8") == 0)
> + size = sizeof(s8);
> + else if (strcmp(type, "u8") == 0)
> + size = sizeof(u8);
> + else if (strcmp(type, "char") == 0)
> + size = sizeof(char);
> + else if (strcmp(type, "unsigned char") == 0)
> + size = sizeof(unsigned char);
> + else if (strcmp(type, "int") == 0)
> + size = sizeof(int);
> + else if (strcmp(type, "unsigned int") == 0)
> + size = sizeof(unsigned int);
> + else if (strcmp(type, "long") == 0)
> + size = sizeof(long);
> + else if (strcmp(type, "unsigned long") == 0)
> + size = sizeof(unsigned long);
> + else if (strcmp(type, "pid_t") == 0)
> + size = sizeof(pid_t);
> + else if (strstr(type, "[") == 0)
> + size = sizeof(u64);

Ah ok, so you don't accept arrays and all supported fields are less
than or equal to 8 bytes.

> +
> + return size;
> +}
> +

[SNIP]
> +static inline void trace_synth(struct synth_event *event, u64 *var_ref_vals,
> + unsigned int var_ref_idx)
> +{
> + struct tracepoint *tp = event->tp;
> +
> + if (unlikely(atomic_read(&tp->key.enabled) > 0)) {
> + struct tracepoint_func *it_func_ptr;
> + void *it_func;
> + void *__data;
> +
> + if (!(cpu_online(raw_smp_processor_id())))
> + return;
> +
> + it_func_ptr = rcu_dereference_sched((tp)->funcs);
> + if (it_func_ptr) {
> + do {
> + it_func = (it_func_ptr)->func;
> + __data = (it_func_ptr)->data;
> + ((void(*)(void *__data, u64 *var_ref_vals, unsigned int var_ref_idx))(it_func))(__data, var_ref_vals, var_ref_idx);

Ouch, why not defining a function pointer?

Thanks,
Namhyung


> + } while ((++it_func_ptr)->func);
> + }
> + }
> +}