[PATCH v2] tracing: Add filter-buffer option

From: Steven Rostedt
Date: Fri Dec 15 2023 - 13:25:12 EST


From 62a1de0f0f9d942934565e625a7880fd85ae216a Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@xxxxxxxxxxx>
Date: Fri, 15 Dec 2023 10:26:33 -0500
Subject: [PATCH] tracing: Add filter-buffer option

Normally, when the filter is enabled, a temporary buffer is created to
copy the event data into it to perform the filtering logic. If the filter
passes and the event should be recorded, then the event is copied from the
temporary buffer into the ring buffer. If the event is to be discarded
then it is simply dropped. If another event comes in via an interrupt, it
will not use the temporary buffer as it is busy and will write directly
into the ring buffer.

The filter-buffer option will allow the user to disable this feature. By
default, it is enabled. When disabled, it disables the temporary buffer
and always writes into the ring buffer. This will avoid the copy when the
event is to be recorded, but also adds a bit more overhead on the discard,
and if another event were to interrupt the event that is to be discarded,
then the event will not be removed from the ring buffer but instead
converted to padding that will not be read by the reader. Padding will
still take up space on the ring buffer.

This option can be beneficial if most events are recorded and not
discarded, or simply for debugging the discard functionality of the ring
buffer.

Also fix some whitespace (that was fixed by editing this in vscode).

Signed-off-by: Steven Rostedt (Google) <rostedt@xxxxxxxxxxx>
---
Changes since v1: https://lore.kernel.org/linux-trace-kernel/20231215102633.7a24cb77@xxxxxxxxxxxxxxxxxxxx

- Renamed "disable-filter-buffer" to "filter-buffer" and made it
default enabled, where the user needs to disable it. (Mathieu Desnoyers)


Documentation/trace/ftrace.rst | 23 ++++++++++++++++++++
kernel/trace/trace.c | 39 ++++++++++++++++++++--------------
kernel/trace/trace.h | 1 +
3 files changed, 47 insertions(+), 16 deletions(-)

diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst
index 23572f6697c0..7ec26eb814e9 100644
--- a/Documentation/trace/ftrace.rst
+++ b/Documentation/trace/ftrace.rst
@@ -1239,6 +1239,29 @@ Here are the available options:
When the free_buffer is closed, tracing will
stop (tracing_on set to 0).

+ filter-buffer
+ Normally, when the filter is enabled, a temporary buffer is
+ created to copy the event data into it to perform the
+ filtering logic. If the filter passes and the event should
+ be recorded, then the event is copied from the temporary
+ buffer into the ring buffer. If the event is to be discarded
+ then it is simply dropped. If another event comes in via
+ an interrupt, it will not use the temporary buffer as it is
+ busy and will write directly into the ring buffer.
+
+ This option, when cleared, will disable the temporary buffer and always
+ write into the ring buffer. This will avoid the copy when
+ the event is to be recorded, but also adds a bit more
+ overhead on the discard, and if another event were to interrupt
+ the event that is to be discarded, then the event will not
+ be removed from the ring buffer but instead converted to
+ padding that will not be read by the reader. Padding will
+ still take up space on the ring buffer.
+
+ This option can be beneficial if most events are recorded and
+ not discarded, or simply for debugging the discard functionality
+ of the ring buffer.
+
irq-info
Shows the interrupt, preempt count, need resched data.
When disabled, the trace looks like::
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 55dabee4c78b..e18c83104e24 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -466,7 +466,7 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_export);
TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \
- TRACE_ITER_HASH_PTR)
+ TRACE_ITER_HASH_PTR | TRACE_ITER_FILTER_BUF)

/* trace_options that are only supported by global_trace */
#define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
@@ -5398,6 +5398,8 @@ int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
return 0;
}

+static int __tracing_set_filter_buffering(struct trace_array *tr, bool set);
+
int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
{
int *map;
@@ -5451,6 +5453,9 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
if (mask == TRACE_ITER_FUNC_FORK)
ftrace_pid_follow_fork(tr, enabled);

+ if (mask == TRACE_ITER_FILTER_BUF)
+ __tracing_set_filter_buffering(tr, !enabled);
+
if (mask == TRACE_ITER_OVERWRITE) {
ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
#ifdef CONFIG_TRACER_MAX_TRACE
@@ -6464,7 +6469,7 @@ static void tracing_set_nop(struct trace_array *tr)
{
if (tr->current_trace == &nop_trace)
return;
-
+
tr->current_trace->enabled--;

if (tr->current_trace->reset)
@@ -7552,27 +7557,29 @@ u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_eve
return ring_buffer_event_time_stamp(buffer, rbe);
}

-/*
- * Set or disable using the per CPU trace_buffer_event when possible.
- */
-int tracing_set_filter_buffering(struct trace_array *tr, bool set)
+static int __tracing_set_filter_buffering(struct trace_array *tr, bool set)
{
- int ret = 0;
-
- mutex_lock(&trace_types_lock);
-
if (set && tr->no_filter_buffering_ref++)
- goto out;
+ return 0;

if (!set) {
- if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
- ret = -EINVAL;
- goto out;
- }
+ if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
+ return -EINVAL;

--tr->no_filter_buffering_ref;
}
- out:
+ return 0;
+}
+
+/*
+ * Set or disable using the per CPU trace_buffer_event when possible.
+ */
+int tracing_set_filter_buffering(struct trace_array *tr, bool set)
+{
+ int ret;
+
+ mutex_lock(&trace_types_lock);
+ ret = __tracing_set_filter_buffering(tr, set);
mutex_unlock(&trace_types_lock);

return ret;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 79180aed13ee..f82dce7ea3ff 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1251,6 +1251,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
C(EVENT_FORK, "event-fork"), \
C(PAUSE_ON_TRACE, "pause-on-trace"), \
C(HASH_PTR, "hash-ptr"), /* Print hashed pointer */ \
+ C(FILTER_BUF, "filter-buffer"), \
FUNCTION_FLAGS \
FGRAPH_FLAGS \
STACK_FLAGS \
--
2.42.0