[PATCH] tracing: Add trace_trigger kernel command line option

From: Steven Rostedt
Date: Wed Oct 19 2022 - 20:01:46 EST


From: "Steven Rostedt (Google)" <rostedt@xxxxxxxxxxx>

Allow triggers to be enabled at kernel boot up. For example:

trace_trigger="sched_switch.stacktrace if prev_state == 2"

The above will enable the stacktrace trigger on top of the sched_switch
event and only trigger if its prev_state is 2 (TASK_UNINTERRUPTIBLE). Then
at boot up, a stacktrace will trigger and be recorded in the tracing ring
buffer every time the sched_switch happens where the previous state is
TASK_INTERRUPTIBLE.

As this calls into tracepoint logic during very early boot (before
interrupts are enabled), a check has to be done to see if early boot
interrupts are still disabled, and if so, avoid any call to RCU
synchronization, as that will enable interrupts and cause boot up issues.

Signed-off-by: Steven Rostedt (Google) <rostedt@xxxxxxxxxxx>
---
.../admin-guide/kernel-parameters.txt | 19 ++++++
include/linux/tracepoint.h | 4 ++
kernel/trace/trace.c | 3 +-
kernel/trace/trace_events.c | 63 ++++++++++++++++++-
kernel/tracepoint.c | 6 ++
5 files changed, 92 insertions(+), 3 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a465d5242774..ccf91a4bf113 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6257,6 +6257,25 @@
See also Documentation/trace/ftrace.rst "trace options"
section.

+ trace_trigger=[trigger-list]
+ [FTRACE] Add a event trigger on specific events.
+ Set a trigger on top of a specific event, with an optional
+ filter.
+
+ The format is is "trace_trigger=<event>.<trigger>[ if <filter>],..."
+ Where more than one trigger may be specified that are comma deliminated.
+
+ For example:
+
+ trace_trigger="sched_switch.stacktrace if prev_state == 2"
+
+ The above will enable the "stacktrace" trigger on the "sched_switch"
+ event but only trigger it if the "prev_state" of the "sched_switch"
+ event is "2" (TASK_UNINTERUPTIBLE).
+
+ See also "Event triggers" in Documentation/trace/events.rst
+
+
traceoff_on_warning
[FTRACE] enable this option to disable tracing when a
warning is hit. This turns off "tracing_on". Tracing can
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 4b33b95eb8be..a5c6b5772897 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -90,6 +90,10 @@ int unregister_tracepoint_module_notifier(struct notifier_block *nb)
#ifdef CONFIG_TRACEPOINTS
static inline void tracepoint_synchronize_unregister(void)
{
+ /* Early updates do not need synchronization */
+ if (early_boot_irqs_disabled)
+ return;
+
synchronize_srcu(&tracepoint_srcu);
synchronize_rcu();
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 47a44b055a1d..c03fd7037add 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2749,7 +2749,8 @@ void trace_buffered_event_disable(void)
preempt_enable();

/* Wait for all current users to finish */
- synchronize_rcu();
+ if (!early_boot_irqs_disabled)
+ synchronize_rcu();

for_each_tracing_cpu(cpu) {
free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 0356cae0cf74..06554939252c 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2796,6 +2796,44 @@ trace_create_new_event(struct trace_event_call *call,
return file;
}

+#ifdef CONFIG_HIST_TRIGGERS
+#define MAX_BOOT_TRIGGERS 32
+
+static struct boot_triggers {
+ const char *event;
+ char *trigger;
+} bootup_triggers[MAX_BOOT_TRIGGERS];
+
+static char bootup_trigger_buf[COMMAND_LINE_SIZE];
+static int nr_boot_triggers;
+
+static __init int setup_trace_triggers(char *str)
+{
+ char *trigger;
+ char *buf;
+ int i;
+
+ strlcpy(bootup_trigger_buf, str, COMMAND_LINE_SIZE);
+ ring_buffer_expanded = true;
+ disable_tracing_selftest("running event triggers");
+
+ buf = bootup_trigger_buf;
+ for (i = 0; i < MAX_BOOT_TRIGGERS; i++) {
+ trigger = strsep(&buf, ",");
+ if (!trigger)
+ break;
+ bootup_triggers[i].event = strsep(&trigger, ".");
+ bootup_triggers[i].trigger = strsep(&trigger, ".");
+ if (!bootup_triggers[i].trigger)
+ break;
+ }
+
+ nr_boot_triggers = i;
+ return 1;
+}
+__setup("trace_trigger=", setup_trace_triggers);
+#endif
+
/* Add an event to a trace directory */
static int
__trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)
@@ -2822,12 +2860,32 @@ __trace_early_add_new_event(struct trace_event_call *call,
struct trace_array *tr)
{
struct trace_event_file *file;
+ int ret;
+ int i;

file = trace_create_new_event(call, tr);
if (!file)
return -ENOMEM;

- return event_define_fields(call);
+ ret = event_define_fields(call);
+ if (ret)
+ return ret;
+
+#ifdef CONFIG_HIST_TRIGGERS
+ for (i = 0; i < nr_boot_triggers; i++) {
+ if (strcmp(trace_event_name(call), bootup_triggers[i].event))
+ continue;
+ mutex_lock(&event_mutex);
+ ret = trigger_process_regex(file, bootup_triggers[i].trigger);
+ mutex_unlock(&event_mutex);
+ if (ret)
+ pr_err("Failed to register trigger '%s' on event %s\n",
+ bootup_triggers[i].trigger,
+ bootup_triggers[i].event);
+ }
+#endif
+
+ return 0;
}

struct ftrace_module_file_ops;
@@ -3726,6 +3784,8 @@ static __init int event_trace_enable(void)
list_add(&call->list, &ftrace_events);
}

+ register_trigger_cmds();
+
/*
* We need the top trace array to have a working set of trace
* points at early init, before the debug files and directories
@@ -3740,7 +3800,6 @@ static __init int event_trace_enable(void)

register_event_cmds();

- register_trigger_cmds();

return 0;
}
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index f23144af5743..f6e4ee1e40b3 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -48,6 +48,9 @@ static void tp_rcu_get_state(enum tp_transition_sync sync)
{
struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync];

+ if (early_boot_irqs_disabled)
+ return;
+
/* Keep the latest get_state snapshot. */
snapshot->rcu = get_state_synchronize_rcu();
snapshot->srcu = start_poll_synchronize_srcu(&tracepoint_srcu);
@@ -58,6 +61,9 @@ static void tp_rcu_cond_sync(enum tp_transition_sync sync)
{
struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync];

+ if (early_boot_irqs_disabled)
+ return;
+
if (!snapshot->ongoing)
return;
cond_synchronize_rcu(snapshot->rcu);
--
2.35.1