Re: [RFC] tracing: Add support for critical section event tracing

From: Steven Rostedt
Date: Fri Jun 30 2017 - 08:52:07 EST


On Tue, 11 Apr 2017 22:38:51 -0700
Joel Fernandes <joelaf@xxxxxxxxxx> wrote:

> diff --git a/include/trace/events/critical.h b/include/trace/events/critical.h
> new file mode 100644
> index 000000000000..bfd58dd4f48f
> --- /dev/null
> +++ b/include/trace/events/critical.h
> @@ -0,0 +1,55 @@
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM critical
> +

There should be a:

#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS

around the entire header.

> +#if !defined(_TRACE_CRITICAL_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_CRITICAL_H
> +
> +#include <linux/ktime.h>
> +#include <linux/tracepoint.h>
> +#include <linux/string.h>
> +
> +TRACE_EVENT(critical_start,
> +
> + TP_PROTO(unsigned long ip, unsigned long parent_ip),
> +
> + TP_ARGS(ip, parent_ip),
> +
> + TP_STRUCT__entry(
> + __array(char, func, 16)
> + __array(char, parent, 16)

Save only the ip, not the name.

> + ),
> +
> + TP_fast_assign(
> + snprintf(__entry->func, 16, "%pf", (void *)ip);
> + snprintf(__entry->parent, 16, "%pf", (void *)parent_ip);

This is very slow to have in the fast path.

> + ),
> +
> + TP_printk("caller=%s parent=%s\n",

Put in %pF here, trace-cmd and perf know how to handle those, and then
the translation from ip to function name is done in the slow path.


> + __entry->func,
> + __entry->parent)
> +);
> +
> +TRACE_EVENT(critical_end,
> +
> + TP_PROTO(unsigned long ip, unsigned long parent_ip),
> +
> + TP_ARGS(ip, parent_ip),
> +
> + TP_STRUCT__entry(
> + __array(char, func, 16)
> + __array(char, parent, 16)
> + ),
> +
> + TP_fast_assign(
> + snprintf(__entry->func, 16, "%pf", (void *)ip);
> + snprintf(__entry->parent, 16, "%pf", (void *)parent_ip);

Same here.

> + ),
> +
> + TP_printk("caller=%s parent=%s\n",
> + __entry->func,
> + __entry->parent)
> +);
> +#endif /* _TRACE_CRITICAL_H */
> +
> +/* This part ust be outside protection */
> +#include <trace/define_trace.h>
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index d5038005eb5d..986a07985360 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -208,6 +208,12 @@ config PREEMPT_TRACER
> enabled. This option and the irqs-off timing option can be
> used together or separately.)
>
> +config TRACE_CRITICAL_SECTION_EVENTS
> + bool "Trace critical sections as trace events"
> + default n
> + select IRQSOFF_TRACER
> + select PREEMPT_TRACER

Either make TRACE_CRITICAL_SECTION_EVENTS depend on irqsoff and preempt
tracers, or change trace_irqsoff.c to be able to handle it without
those tracers enabled.

"select" should be avoided when possible.

> +
> config SCHED_TRACER
> bool "Scheduling Latency Tracer"
> select GENERIC_TRACER
> diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
> index 7758bc0617cb..a14ce6185885 100644
> --- a/kernel/trace/trace_irqsoff.c
> +++ b/kernel/trace/trace_irqsoff.c
> @@ -13,6 +13,8 @@
> #include <linux/uaccess.h>
> #include <linux/module.h>
> #include <linux/ftrace.h>
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/critical.h>
>
> #include "trace.h"
>
> @@ -20,6 +22,7 @@ static struct trace_array *irqsoff_trace __read_mostly;
> static int tracer_enabled __read_mostly;
>
> static DEFINE_PER_CPU(int, tracing_cpu);
> +static DEFINE_PER_CPU(int, tracing_events_cpu);
>
> static DEFINE_RAW_SPINLOCK(max_trace_lock);
>
> @@ -362,6 +365,44 @@ check_critical_timing(struct trace_array *tr,
> __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
> }
>
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> +/*
> + * Called when either preempt or irq are turned off
> + */
> +static inline void
> +start_critical_event(unsigned long ip, unsigned long parent_ip)
> +{
> + int cpu;
> +
> + cpu = raw_smp_processor_id();

Nuke the above and ...
> +
> + if (per_cpu(tracing_events_cpu, cpu))

use this_cpu_read() instead. It's much faster.

> + return;
> +
> + trace_critical_start(ip, parent_ip);
> +
> + per_cpu(tracing_events_cpu, cpu) = 1;

this_cpu_write(tracing_events_cpu, 1);

> +}
> +
> +/*
> + * Called when both preempt and irq are turned back on
> + */
> +static inline void
> +stop_critical_event(unsigned long ip, unsigned long parent_ip)
> +{
> + int cpu;
> +
> + cpu = raw_smp_processor_id();

Same here.

> +
> + if (unlikely(per_cpu(tracing_events_cpu, cpu)))
> + per_cpu(tracing_events_cpu, cpu) = 0;
> + else
> + return;
> +
> + trace_critical_end(ip, parent_ip);

Are you not worried about recursion here? There's no protection.
Wouldn't it be better to have:

if (!this_cpu_read(tracing_events_cpu))
return;

trace_critical_end(ip, parent_ip);

this_cpu_write(tracing_events_cpu, 0);

?

> +}

Add:

#else

static inline void start_critical_event(unsigned long ip,
unsigned long pip) { }
static inline void stop_critical_event(unsigned long ip,
unsigned long pip) { }

static inline int trace_critical_start_enabled() { return 0; }
static inline int trace_critical_end_enabled() { return 0; }

> +#endif
> +
> static inline void
> start_critical_timing(unsigned long ip, unsigned long parent_ip)
> {
> @@ -434,6 +475,10 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
> /* start and stop critical timings used to for stoppage (in idle) */
> void start_critical_timings(void)
> {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> + if (irqs_disabled() || preempt_count())
> + start_critical_event(CALLER_ADDR0, CALLER_ADDR1);
> +#endif

And replace the above with:

if (trace_critical_start_enabled())
start_critical_event(CALLER_ADDR0, CALLER_ADDR1);

to get rid of the #ifdef embedded in the code.

Also, notice the use of "trace_critical_start_enabled()".

> if (preempt_trace() || irq_trace())
> start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
> }
> @@ -441,6 +486,10 @@ EXPORT_SYMBOL_GPL(start_critical_timings);
>
> void stop_critical_timings(void)
> {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> + if (irqs_disabled() || preempt_count())
> + stop_critical_event(CALLER_ADDR0, CALLER_ADDR1);
> +#endif

Same here, but for the stop functions.

> if (preempt_trace() || irq_trace())
> stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
> }
> @@ -483,6 +532,10 @@ inline void print_irqtrace_events(struct task_struct *curr)
> */
> void trace_hardirqs_on(void)
> {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> + if (!preempt_count())
> + stop_critical_event(CALLER_ADDR0, CALLER_ADDR1);
> +#endif

and here.

> if (!preempt_trace() && irq_trace())
> stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
> }
> @@ -490,6 +543,10 @@ EXPORT_SYMBOL(trace_hardirqs_on);
>
> void trace_hardirqs_off(void)
> {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> + if (!preempt_count())
> + start_critical_event(CALLER_ADDR0, CALLER_ADDR1);
> +#endif

and here.

> if (!preempt_trace() && irq_trace())
> start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
> }
> @@ -497,6 +554,10 @@ EXPORT_SYMBOL(trace_hardirqs_off);
>
> __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
> {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> + if (!preempt_count())
> + stop_critical_event(CALLER_ADDR0, caller_addr);
> +#endif

and here.

> if (!preempt_trace() && irq_trace())
> stop_critical_timing(CALLER_ADDR0, caller_addr);
> }
> @@ -504,6 +565,10 @@ EXPORT_SYMBOL(trace_hardirqs_on_caller);
>
> __visible void trace_hardirqs_off_caller(unsigned long caller_addr)
> {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> + if (!preempt_count())
> + start_critical_event(CALLER_ADDR0, caller_addr);
> +#endif

and here.

> if (!preempt_trace() && irq_trace())
> start_critical_timing(CALLER_ADDR0, caller_addr);
> }
> @@ -515,12 +580,20 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller);
> #ifdef CONFIG_PREEMPT_TRACER
> void trace_preempt_on(unsigned long a0, unsigned long a1)
> {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> + if (!irqs_disabled())
> + stop_critical_event(a0, a1);
> +#endif

and here.

> if (preempt_trace() && !irq_trace())
> stop_critical_timing(a0, a1);
> }
>
> void trace_preempt_off(unsigned long a0, unsigned long a1)
> {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> + if (!irqs_disabled())
> + start_critical_event(a0, a1);
> +#endif

and here.

(that was repetitive).

-- Steve

> if (preempt_trace() && !irq_trace())
> start_critical_timing(a0, a1);
> }