Re: [for-next][PATCH 2/5] tracing: Add set_event_pid directory for future use

From: Paul E. McKenney
Date: Thu Nov 19 2015 - 18:24:14 EST


On Thu, Oct 29, 2015 at 03:07:56AM -0400, Steven Rostedt wrote:
> From: "Steven Rostedt (Red Hat)" <rostedt@xxxxxxxxxxx>
>
> Create a tracing directory called set_event_pid, which currently has no
> function, but will be used to filter all events for the tracing instance or
> the pids that are added to the file.
>
> The reason no functionality is added with this commit is that this commit
> focuses on the creation and removal of the pids in a safe manner. And tests
> can be made against this change to make sure things are correct before
> hooking features to the list of pids.
>
> Cc: "Paul E. McKenney" <paulmck@xxxxxxxxxxxxxxxxxx>
> Signed-off-by: Steven Rostedt <rostedt@xxxxxxxxxxx>
> ---
> kernel/trace/trace.h | 7 ++
> kernel/trace/trace_events.c | 287 ++++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 294 insertions(+)
>
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index fb8a61c710ea..250481043bb5 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -176,6 +176,12 @@ struct trace_options {
> struct trace_option_dentry *topts;
> };
>
> +struct trace_pid_list {
> + unsigned int nr_pids;
> + int order;
> + pid_t *pids;
> +};
> +
> /*
> * The trace array - an array of per-CPU trace arrays. This is the
> * highest level data structure that individual tracers deal with.
> @@ -201,6 +207,7 @@ struct trace_array {
> bool allocated_snapshot;
> unsigned long max_latency;
> #endif
> + struct trace_pid_list __rcu *filtered_pids;
> /*
> * max_lock is used to protect the swapping of buffers
> * when taking a max snapshot. The buffers themselves are
> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
> index d120cfe3cca7..2ad7014707ee 100644
> --- a/kernel/trace/trace_events.c
> +++ b/kernel/trace/trace_events.c
> @@ -15,8 +15,10 @@
> #include <linux/kthread.h>
> #include <linux/tracefs.h>
> #include <linux/uaccess.h>
> +#include <linux/bsearch.h>
> #include <linux/module.h>
> #include <linux/ctype.h>
> +#include <linux/sort.h>
> #include <linux/slab.h>
> #include <linux/delay.h>
>
> @@ -445,6 +447,43 @@ static void ftrace_clear_events(struct trace_array *tr)
> mutex_unlock(&event_mutex);
> }
>
> +static int cmp_pid(const void *key, const void *elt)
> +{
> + const pid_t *search_pid = key;
> + const pid_t *pid = elt;
> +
> + if (*search_pid == *pid)
> + return 0;
> + if (*search_pid < *pid)
> + return -1;
> + return 1;
> +}
> +
> +static void __ftrace_clear_event_pids(struct trace_array *tr)
> +{
> + struct trace_pid_list *pid_list;
> +
> + pid_list = rcu_dereference_protected(tr->filtered_pids,
> + lockdep_is_held(&event_mutex));
> + if (!pid_list)
> + return;
> +
> + rcu_assign_pointer(tr->filtered_pids, NULL);
> +
> + /* Wait till all users are no longer using pid filtering */
> + synchronize_sched();
> +
> + free_pages((unsigned long)pid_list->pids, pid_list->order);
> + kfree(pid_list);
> +}
> +
> +static void ftrace_clear_event_pids(struct trace_array *tr)
> +{
> + mutex_lock(&event_mutex);
> + __ftrace_clear_event_pids(tr);
> + mutex_unlock(&event_mutex);
> +}
> +
> static void __put_system(struct event_subsystem *system)
> {
> struct event_filter *filter = system->filter;
> @@ -777,6 +816,56 @@ static void t_stop(struct seq_file *m, void *p)
> mutex_unlock(&event_mutex);
> }
>
> +static void *p_start(struct seq_file *m, loff_t *pos)
> +{
> + struct trace_pid_list *pid_list;
> + struct trace_array *tr = m->private;
> +
> + /*
> + * Grab the mutex, to keep calls to p_next() having the same
> + * tr->filtered_pids as p_start() has.
> + * If we just passed the tr->filtered_pids around, then RCU would
> + * have been enough, but doing that makes things more complex.
> + */
> + mutex_lock(&event_mutex);
> + rcu_read_lock_sched();

This looks interesting... You hold the mutex, which I am guessing
blocks changes. Then why the need for rcu_read_lock_sched()?

Thanx, Paul

> +
> + pid_list = rcu_dereference_sched(tr->filtered_pids);
> +
> + if (!pid_list || *pos >= pid_list->nr_pids)
> + return NULL;
> +
> + return (void *)&pid_list->pids[*pos];
> +}
> +
> +static void p_stop(struct seq_file *m, void *p)
> +{
> + rcu_read_unlock_sched();
> + mutex_unlock(&event_mutex);
> +}
> +
> +static void *
> +p_next(struct seq_file *m, void *v, loff_t *pos)
> +{
> + struct trace_array *tr = m->private;
> + struct trace_pid_list *pid_list = rcu_dereference_sched(tr->filtered_pids);
> +
> + (*pos)++;
> +
> + if (*pos >= pid_list->nr_pids)
> + return NULL;
> +
> + return (void *)&pid_list->pids[*pos];
> +}
> +
> +static int p_show(struct seq_file *m, void *v)
> +{
> + pid_t *pid = v;
> +
> + seq_printf(m, "%d\n", *pid);
> + return 0;
> +}

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/