Re: [for-next][PATCH 1/5] tracepoint: Give priority to probes of tracepoints

From: Mathieu Desnoyers
Date: Thu Oct 29 2015 - 13:52:30 EST


----- On Oct 29, 2015, at 3:07 AM, rostedt rostedt@xxxxxxxxxxx wrote:

> From: "Steven Rostedt (Red Hat)" <rostedt@xxxxxxxxxxx>
>
> In order to guarantee that a probe will be called before other probes that
> are attached to a tracepoint, there needs to be a mechanism to provide
> priority of one probe over the others.
>
> Adding a prio field to the struct tracepoint_func, which lets the probes be
> sorted by the priority set in the structure. If no priority is specified,
> then a priority of 10 is given (this is a macro, and perhaps may be changed
> in the future).
>
> Now probes may be added to affect other probes that are attached to a
> tracepoint with a guaranteed order.
>
> One use case would be to allow tracing of tracepoints be able to filter by
> pid. A special (higher priority probe) may be added to the sched_switch
> tracepoint and set the necessary flags of the other tracepoints to notify
> them if they should be traced or not. In case a tracepoint is enabled at the
> sched_switch tracepoint too, the order of the two are not random.
>
> Cc: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx>
> Signed-off-by: Steven Rostedt <rostedt@xxxxxxxxxxx>

Sounds good to me,

Acked-by: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx>

Thanks,

Mathieu

> ---
> include/linux/tracepoint.h | 13 ++++++++++
> kernel/tracepoint.c | 61 +++++++++++++++++++++++++++++++++++++---------
> 2 files changed, 63 insertions(+), 11 deletions(-)
>
> diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
> index afada369c5b7..6b79537a42b1 100644
> --- a/include/linux/tracepoint.h
> +++ b/include/linux/tracepoint.h
> @@ -26,6 +26,7 @@ struct notifier_block;
> struct tracepoint_func {
> void *func;
> void *data;
> + int prio;
> };
>
> struct tracepoint {
> @@ -42,9 +43,14 @@ struct trace_enum_map {
> unsigned long enum_value;
> };
>
> +#define TRACEPOINT_DEFAULT_PRIO 10
> +
> extern int
> tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data);
> extern int
> +tracepoint_probe_register_prio(struct tracepoint *tp, void *probe, void *data,
> + int prio);
> +extern int
> tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data);
> extern void
> for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv),
> @@ -207,6 +213,13 @@ extern void syscall_unregfunc(void);
> (void *)probe, data); \
> } \
> static inline int \
> + register_trace_prio_##name(void (*probe)(data_proto), void *data,\
> + int prio) \
> + { \
> + return tracepoint_probe_register_prio(&__tracepoint_##name, \
> + (void *)probe, data, prio); \
> + } \
> + static inline int \
> unregister_trace_##name(void (*probe)(data_proto), void *data) \
> { \
> return tracepoint_probe_unregister(&__tracepoint_##name,\
> diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
> index 3490407dc7b7..ecd536de603a 100644
> --- a/kernel/tracepoint.c
> +++ b/kernel/tracepoint.c
> @@ -91,11 +91,13 @@ static void debug_print_probes(struct tracepoint_func
> *funcs)
> printk(KERN_DEBUG "Probe %d : %p\n", i, funcs[i].func);
> }
>
> -static struct tracepoint_func *func_add(struct tracepoint_func **funcs,
> - struct tracepoint_func *tp_func)
> +static struct tracepoint_func *
> +func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func,
> + int prio)
> {
> - int nr_probes = 0;
> struct tracepoint_func *old, *new;
> + int nr_probes = 0;
> + int pos = -1;
>
> if (WARN_ON(!tp_func->func))
> return ERR_PTR(-EINVAL);
> @@ -104,18 +106,33 @@ static struct tracepoint_func *func_add(struct
> tracepoint_func **funcs,
> old = *funcs;
> if (old) {
> /* (N -> N+1), (N != 0, 1) probes */
> - for (nr_probes = 0; old[nr_probes].func; nr_probes++)
> + for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
> + /* Insert before probes of lower priority */
> + if (pos < 0 && old[nr_probes].prio < prio)
> + pos = nr_probes;
> if (old[nr_probes].func == tp_func->func &&
> old[nr_probes].data == tp_func->data)
> return ERR_PTR(-EEXIST);
> + }
> }
> /* + 2 : one for new probe, one for NULL func */
> new = allocate_probes(nr_probes + 2);
> if (new == NULL)
> return ERR_PTR(-ENOMEM);
> - if (old)
> - memcpy(new, old, nr_probes * sizeof(struct tracepoint_func));
> - new[nr_probes] = *tp_func;
> + if (old) {
> + if (pos < 0) {
> + pos = nr_probes;
> + memcpy(new, old, nr_probes * sizeof(struct tracepoint_func));
> + } else {
> + /* Copy higher priority probes ahead of the new probe */
> + memcpy(new, old, pos * sizeof(struct tracepoint_func));
> + /* Copy the rest after it. */
> + memcpy(new + pos + 1, old + pos,
> + (nr_probes - pos) * sizeof(struct tracepoint_func));
> + }
> + } else
> + pos = 0;
> + new[pos] = *tp_func;
> new[nr_probes + 1].func = NULL;
> *funcs = new;
> debug_print_probes(*funcs);
> @@ -174,7 +191,7 @@ static void *func_remove(struct tracepoint_func **funcs,
> * Add the probe function to a tracepoint.
> */
> static int tracepoint_add_func(struct tracepoint *tp,
> - struct tracepoint_func *func)
> + struct tracepoint_func *func, int prio)
> {
> struct tracepoint_func *old, *tp_funcs;
>
> @@ -183,7 +200,7 @@ static int tracepoint_add_func(struct tracepoint *tp,
>
> tp_funcs = rcu_dereference_protected(tp->funcs,
> lockdep_is_held(&tracepoints_mutex));
> - old = func_add(&tp_funcs, func);
> + old = func_add(&tp_funcs, func, prio);
> if (IS_ERR(old)) {
> WARN_ON_ONCE(1);
> return PTR_ERR(old);
> @@ -240,6 +257,7 @@ static int tracepoint_remove_func(struct tracepoint *tp,
> * @tp: tracepoint
> * @probe: probe handler
> * @data: tracepoint data
> + * @prio: priority of this function over other registered functions
> *
> * Returns 0 if ok, error value on error.
> * Note: if @tp is within a module, the caller is responsible for
> @@ -247,7 +265,8 @@ static int tracepoint_remove_func(struct tracepoint *tp,
> * performed either with a tracepoint module going notifier, or from
> * within module exit functions.
> */
> -int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data)
> +int tracepoint_probe_register_prio(struct tracepoint *tp, void *probe,
> + void *data, int prio)
> {
> struct tracepoint_func tp_func;
> int ret;
> @@ -255,10 +274,30 @@ int tracepoint_probe_register(struct tracepoint *tp, void
> *probe, void *data)
> mutex_lock(&tracepoints_mutex);
> tp_func.func = probe;
> tp_func.data = data;
> - ret = tracepoint_add_func(tp, &tp_func);
> + tp_func.prio = prio;
> + ret = tracepoint_add_func(tp, &tp_func, prio);
> mutex_unlock(&tracepoints_mutex);
> return ret;
> }
> +EXPORT_SYMBOL_GPL(tracepoint_probe_register_prio);
> +
> +/**
> + * tracepoint_probe_register - Connect a probe to a tracepoint
> + * @tp: tracepoint
> + * @probe: probe handler
> + * @data: tracepoint data
> + * @prio: priority of this function over other registered functions
> + *
> + * Returns 0 if ok, error value on error.
> + * Note: if @tp is within a module, the caller is responsible for
> + * unregistering the probe before the module is gone. This can be
> + * performed either with a tracepoint module going notifier, or from
> + * within module exit functions.
> + */
> +int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data)
> +{
> + return tracepoint_probe_register_prio(tp, probe, data,
> TRACEPOINT_DEFAULT_PRIO);
> +}
> EXPORT_SYMBOL_GPL(tracepoint_probe_register);
>
> /**
> --
> 2.6.1

--
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/