Re: [PATCH v4 1/3] tracing: Add support for recording tgid of tasks

From: Steven Rostedt
Date: Mon Jun 26 2017 - 15:09:35 EST


On Mon, 26 Jun 2017 12:02:12 -0700
Joel Fernandes <joelaf@xxxxxxxxxx> wrote:

> On Mon, Jun 26, 2017 at 9:52 AM, Steven Rostedt <rostedt@xxxxxxxxxxx> wrote:
> > On Sun, 25 Jun 2017 22:38:42 -0700
> > Joel Fernandes <joelaf@xxxxxxxxxx> wrote:
> >
> >> +int *tgid_map;
> >> +
> >> +void tracing_alloc_tgid_map(void)
> >> +{
> >> + if (tgid_map)
> >> + return;
> >> +
> >> + tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
> >> + GFP_KERNEL);
> >> + WARN_ONCE(!tgid_map, "Allocation of tgid_map failed\n");
> >
> > This needs to return a value. If it fails to allocate, then we must not
> > set the bit to do the recording. More below about why.
> >
> >> +}
> >> +
> >> #define SAVED_CMDLINES_DEFAULT 128
> >> #define NO_CMDLINE_MAP UINT_MAX
> >> static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
> >> @@ -1722,7 +1734,7 @@ struct saved_cmdlines_buffer {
> >> static struct saved_cmdlines_buffer *savedcmd;
> >>
> >> /* temporary disable recording */
> >> -static atomic_t trace_record_cmdline_disabled __read_mostly;
> >> +static atomic_t trace_record_taskinfo_disabled __read_mostly;
> >>
> >> static inline char *get_saved_cmdlines(int idx)
> >> {
> >> @@ -1992,16 +2004,87 @@ void trace_find_cmdline(int pid, char comm[])
> >> preempt_enable();
> >> }
> >>
> >> -void tracing_record_cmdline(struct task_struct *tsk)
> >> +int trace_find_tgid(int pid)
> >> +{
> >> + if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
> >> + return 0;
> >> +
> >> + return tgid_map[pid];
> >> +}
> >> +
> >> +static int trace_save_tgid(struct task_struct *tsk)
> >> +{
> >> + if (unlikely(!tgid_map || !tsk->pid || tsk->pid > PID_MAX_DEFAULT))
> >> + return 0;
> >
> > If we failed to allocate, this will always return 0.
> >
> >> +
> >> + tgid_map[tsk->pid] = tsk->tgid;
> >> + return 1;
> >> +}
> >> +
> >> +static bool tracing_record_taskinfo_skip(int flags)
> >> +{
> >> + if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
> >> + return true;
> >> + if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
> >> + return true;
> >> + if (!__this_cpu_read(trace_taskinfo_save))
> >> + return true;
> >> + return false;
> >> +}
> >> +
> >> +/**
> >> + * tracing_record_taskinfo - record the task info of a task
> >> + *
> >> + * @task - task to record
> >> + * @flags - TRACE_RECORD_CMDLINE for recording comm
> >> + * - TRACE_RECORD_TGID for recording tgid
> >> + */
> >> +void tracing_record_taskinfo(struct task_struct *task, int flags)
> >> +{
> >> + if (tracing_record_taskinfo_skip(flags))
> >> + return;
> >> + if ((flags & TRACE_RECORD_CMDLINE) && !trace_save_cmdline(task))
> >> + return;
> >> + if ((flags & TRACE_RECORD_TGID) && !trace_save_tgid(task))
> >
> > With a failed allocation, this will return here.
> >
> >> + return;
> >> +
> >> + __this_cpu_write(trace_taskinfo_save, false);
> >
> > The above will never be cleared, and this callback will constantly be
> > called, slowing down the tracer.
> >
> >
> >> +}
> >> +
> >> +/**
> >> + * tracing_record_taskinfo_sched_switch - record task info for sched_switch
> >> + *
> >> + * @prev - previous task during sched_switch
> >> + * @next - next task during sched_switch
> >> + * @flags - TRACE_RECORD_CMDLINE for recording comm
> >> + * TRACE_RECORD_TGID for recording tgid
> >> + */
> >> +void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
> >> + struct task_struct *next, int flags)
> >> {
> >> - if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
> >> + if (tracing_record_taskinfo_skip(flags))
> >> + return;
> >> +
> >> + if ((flags & TRACE_RECORD_CMDLINE) &&
> >> + (!trace_save_cmdline(prev) || !trace_save_cmdline(next)))
> >> return;
> >>
> >> - if (!__this_cpu_read(trace_cmdline_save))
> >> + if ((flags & TRACE_RECORD_TGID) &&
> >> + (!trace_save_tgid(prev) || !trace_save_tgid(next)))
> >
> > On failed allocation, this will return here.
> >
> >> return;
> >>
> >> - if (trace_save_cmdline(tsk))
> >> - __this_cpu_write(trace_cmdline_save, false);
> >> + __this_cpu_write(trace_taskinfo_save, false);
> >
> > This will not be cleared.
> >
> >> +}
> >> +
> >> +/* Helpers to record a specific task information */
> >> +void tracing_record_cmdline(struct task_struct *task)
> >> +{
> >> + tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
> >> +}
> >> +
> >> +void tracing_record_tgid(struct task_struct *task)
> >> +{
> >> + tracing_record_taskinfo(task, TRACE_RECORD_TGID);
> >> }
> >>
> >> /*
> >> @@ -3146,7 +3229,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
> >> #endif
> >>
> >> if (!iter->snapshot)
> >> - atomic_inc(&trace_record_cmdline_disabled);
> >> + atomic_inc(&trace_record_taskinfo_disabled);
> >>
> >> if (*pos != iter->pos) {
> >> iter->ent = NULL;
> >> @@ -3191,7 +3274,7 @@ static void s_stop(struct seq_file *m, void *p)
> >> #endif
> >>
> >> if (!iter->snapshot)
> >> - atomic_dec(&trace_record_cmdline_disabled);
> >> + atomic_dec(&trace_record_taskinfo_disabled);
> >>
> >> trace_access_unlock(iter->cpu_file);
> >> trace_event_read_unlock();
> >> @@ -4238,6 +4321,9 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
> >> if (mask == TRACE_ITER_RECORD_CMD)
> >> trace_event_enable_cmd_record(enabled);
> >>
> >> + if (mask == TRACE_ITER_RECORD_TGID)
> >> + trace_event_enable_tgid_record(enabled);
> >> +
> >> if (mask == TRACE_ITER_EVENT_FORK)
> >> trace_event_follow_fork(tr, enabled);
> >>
> >> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> >> index 39fd77330aab..90318b016fd9 100644
> >> --- a/kernel/trace/trace.h
> >> +++ b/kernel/trace/trace.h
> >> @@ -635,8 +635,13 @@ void trace_graph_return(struct ftrace_graph_ret *trace);
> >> int trace_graph_entry(struct ftrace_graph_ent *trace);
> >> void set_graph_array(struct trace_array *tr);
> >>
> >> +extern int *tgid_map;
> >> +void tracing_alloc_tgid_map(void);
> >> void tracing_start_cmdline_record(void);
> >> void tracing_stop_cmdline_record(void);
> >> +void tracing_start_tgid_record(void);
> >> +void tracing_stop_tgid_record(void);
> >> +
> >> int register_tracer(struct tracer *type);
> >> int is_tracing_stopped(void);
> >>
> >> @@ -697,6 +702,7 @@ static inline void __trace_stack(struct trace_array *tr, unsigned long flags,
> >> extern u64 ftrace_now(int cpu);
> >>
> >> extern void trace_find_cmdline(int pid, char comm[]);
> >> +extern int trace_find_tgid(int pid);
> >> extern void trace_event_follow_fork(struct trace_array *tr, bool enable);
> >>
> >> #ifdef CONFIG_DYNAMIC_FTRACE
> >> @@ -1107,6 +1113,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
> >> C(CONTEXT_INFO, "context-info"), /* Print pid/cpu/time */ \
> >> C(LATENCY_FMT, "latency-format"), \
> >> C(RECORD_CMD, "record-cmd"), \
> >> + C(RECORD_TGID, "record-tgid"), \
> >> C(OVERWRITE, "overwrite"), \
> >> C(STOP_ON_FREE, "disable_on_free"), \
> >> C(IRQ_INFO, "irq-info"), \
> >> @@ -1423,6 +1430,8 @@ struct ftrace_event_field *
> >> trace_find_event_field(struct trace_event_call *call, char *name);
> >>
> >> extern void trace_event_enable_cmd_record(bool enable);
> >> +extern void trace_event_enable_tgid_record(bool enable);
> >> +
> >> extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr);
> >> extern int event_trace_del_tracer(struct trace_array *tr);
> >>
> >> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
> >> index e7973e10398c..240c6df95ea6 100644
> >> --- a/kernel/trace/trace_events.c
> >> +++ b/kernel/trace/trace_events.c
> >> @@ -343,6 +343,28 @@ void trace_event_enable_cmd_record(bool enable)
> >> mutex_unlock(&event_mutex);
> >> }
> >>
> >> +void trace_event_enable_tgid_record(bool enable)
> >
> > This should return a value.
> >
> >> +{
> >> + struct trace_event_file *file;
> >> + struct trace_array *tr;
> >> +
> >> + mutex_lock(&event_mutex);
> >> + do_for_each_event_file(tr, file) {
> >> + if (!(file->flags & EVENT_FILE_FL_ENABLED))
> >> + continue;
> >> +
> >> + if (enable) {
> >> + tracing_start_tgid_record();
> >
> > If we fail to start, the bit should not be set, and we should return
> > failure. Note, it can only fail on the first try, as once it is
> > allocated, you don't need to worry about it failing. Thus, if it fails,
> > break out of the loop now and return failure.
> >
>
> That seems Ok with me to do, I think a valid point.
>
> I think that I should do it in the second call to
> tracing_start_tgid_record too then (__ftrace_event_enable_disable) to
> error out if the allocation fails.
>
> While going this code I again, I noticed another potential issue in
> __ftrace_event_enable_disable

set_tracer