[PATCH] tracing/timerlat: Add latency threshold

From: Costa Shulyupin
Date: Sun Jul 16 2023 - 15:30:36 EST


The timerlat tracer generates a huge amount of traces.
This affects the performance of the system and
the delays we are trying to measure with timerlat.
However, we are often interested in spikes of delay
rather than small values.

The patch effectively filters out irrelevant traces
before they are generated and produces more reliable
data.

This patch helped to debug a very big problem
and find this solution:
https://lore.kernel.org/lkml/20221208075604.811710-1-junxiao.chang@xxxxxxxxx/

Signed-off-by: Costa Shulyupin <costa.shul@xxxxxxxxxx>
---
Documentation/trace/timerlat-tracer.rst | 1 +
kernel/trace/trace_osnoise.c | 17 +++++++++++++++++
2 files changed, 18 insertions(+)

diff --git a/Documentation/trace/timerlat-tracer.rst b/Documentation/trace/timerlat-tracer.rst
index 53a56823e903..71b1c63ca403 100644
--- a/Documentation/trace/timerlat-tracer.rst
+++ b/Documentation/trace/timerlat-tracer.rst
@@ -68,6 +68,7 @@ directory. The timerlat configs are:

- cpus: CPUs at which a timerlat thread will execute.
- timerlat_period_us: the period of the timerlat thread.
+ - timerlat_threshold_ns: filter out timer latencies below the threshold
- stop_tracing_us: stop the system tracing if a
timer latency at the *irq* context higher than the configured
value happens. Writing 0 disables this option.
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index bd0d01d00fb9..43284a1e8bea 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -346,6 +346,7 @@ static struct osnoise_data {
u64 stop_tracing_total; /* stop trace in the final operation (report/thread) */
#ifdef CONFIG_TIMERLAT_TRACER
u64 timerlat_period; /* timerlat period */
+ u64 timerlat_threshold_ns;
u64 print_stack; /* print IRQ stack if total > */
int timerlat_tracer; /* timerlat tracer */
#endif
@@ -358,6 +359,7 @@ static struct osnoise_data {
#ifdef CONFIG_TIMERLAT_TRACER
.print_stack = 0,
.timerlat_period = DEFAULT_TIMERLAT_PERIOD,
+ .timerlat_threshold_ns = 0,
.timerlat_tracer = 0,
#endif
};
@@ -597,6 +599,10 @@ static void trace_timerlat_sample(struct timerlat_sample *sample)
struct osnoise_instance *inst;
struct trace_buffer *buffer;

+ if (osnoise_data.timerlat_threshold_ns &&
+ sample->timer_latency < osnoise_data.timerlat_threshold_ns)
+ return;
+
rcu_read_lock();
list_for_each_entry_rcu(inst, &osnoise_instances, list) {
buffer = inst->tr->array_buffer.buffer;
@@ -2663,6 +2669,11 @@ static struct trace_min_max_param timerlat_period = {
.min = &timerlat_min_period,
};

+static struct trace_min_max_param timerlat_threshold = {
+ .lock = &interface_lock,
+ .val = &osnoise_data.timerlat_threshold_ns,
+};
+
static const struct file_operations timerlat_fd_fops = {
.open = timerlat_fd_open,
.read = timerlat_fd_read,
@@ -2759,6 +2770,12 @@ static int init_timerlat_tracefs(struct dentry *top_dir)
if (!tmp)
return -ENOMEM;

+ tmp = tracefs_create_file("timerlat_threshold_ns", TRACE_MODE_WRITE,
+ top_dir, &timerlat_threshold,
+ &trace_min_max_fops);
+ if (!tmp)
+ return -ENOMEM;
+
retval = osnoise_create_cpu_timerlat_fd(top_dir);
if (retval)
return retval;
--
2.41.0