[RFC PATCH 6/6] tracing: use sched-RCU instead of SRCU for rcuidle tracepoints

From: Michael Jeanson
Date: Fri Oct 23 2020 - 15:54:35 EST


From: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx>

Considering that tracer callbacks expect RCU to be watching (for
instance, perf uses rcu_read_lock), we need rcuidle tracepoints to issue
rcu_irq_{enter,exit}_irqson around calls to the callbacks. So there is
no point in using SRCU anymore given that rcuidle tracepoints need to
ensure RCU is watching. Therefore, simply use sched-RCU like normal
tracepoints for rcuidle tracepoints.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx>
Cc: Michael Jeanson <mjeanson@xxxxxxxxxxxx>
Cc: Steven Rostedt (VMware) <rostedt@xxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Alexei Starovoitov <ast@xxxxxxxxxx>
Cc: Yonghong Song <yhs@xxxxxx>
Cc: Paul E. McKenney <paulmck@xxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
Cc: Mark Rutland <mark.rutland@xxxxxxx>
Cc: Alexander Shishkin <alexander.shishkin@xxxxxxxxxxxxxxx>
Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
Cc: Namhyung Kim <namhyung@xxxxxxxxxx>
Cc: Joel Fernandes (Google) <joel@xxxxxxxxxxxxxxxxx>
Cc: bpf@xxxxxxxxxxxxxxx
---
include/linux/tracepoint.h | 33 +++++++--------------------------
kernel/tracepoint.c | 25 +++++++++----------------
2 files changed, 16 insertions(+), 42 deletions(-)

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 0386b54cbcbb..1414b11f864b 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -13,7 +13,6 @@
*/

#include <linux/smp.h>
-#include <linux/srcu.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/cpumask.h>
@@ -33,8 +32,6 @@ struct trace_eval_map {

#define TRACEPOINT_DEFAULT_PRIO 10

-extern struct srcu_struct tracepoint_srcu;
-
extern int
tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data);
extern int
@@ -86,7 +83,6 @@ int unregister_tracepoint_module_notifier(struct notifier_block *nb)
static inline void tracepoint_synchronize_unregister(void)
{
synchronize_rcu_tasks_trace();
- synchronize_srcu(&tracepoint_srcu);
synchronize_rcu();
}
#else
@@ -175,25 +171,13 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
if (!(cond)) \
return; \
\
- /* srcu can't be used from NMI */ \
- WARN_ON_ONCE(rcuidle && in_nmi()); \
- \
- if (maysleep) { \
- might_sleep(); \
+ might_sleep_if(maysleep); \
+ if (rcuidle) \
+ rcu_irq_enter_irqson(); \
+ if (maysleep) \
rcu_read_lock_trace(); \
- } else { \
- /* keep srcu and sched-rcu usage consistent */ \
+ else \
preempt_disable_notrace(); \
- } \
- \
- /* \
- * For rcuidle callers, use srcu since sched-rcu \
- * doesn't work from the idle path. \
- */ \
- if (rcuidle) { \
- __idx = srcu_read_lock_notrace(&tracepoint_srcu);\
- rcu_irq_enter_irqson(); \
- } \
\
it_func_ptr = rcu_dereference_raw((tp)->funcs); \
\
@@ -205,15 +189,12 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
} while ((++it_func_ptr)->func); \
} \
\
- if (rcuidle) { \
- rcu_irq_exit_irqson(); \
- srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
- } \
- \
if (maysleep) \
rcu_read_unlock_trace(); \
else \
preempt_enable_notrace(); \
+ if (rcuidle) \
+ rcu_irq_exit_irqson(); \
} while (0)

#ifndef MODULE
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 8d8e41c5d8a5..68b4e50798b1 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -18,9 +18,6 @@
extern tracepoint_ptr_t __start___tracepoints_ptrs[];
extern tracepoint_ptr_t __stop___tracepoints_ptrs[];

-DEFINE_SRCU(tracepoint_srcu);
-EXPORT_SYMBOL_GPL(tracepoint_srcu);
-
/* Set to 1 to enable tracepoint debug output */
static const int tracepoint_debug;

@@ -65,14 +62,9 @@ static void rcu_tasks_trace_free_old_probes(struct rcu_head *head)
kfree(container_of(head, struct tp_probes, rcu));
}

-static void srcu_free_old_probes(struct rcu_head *head)
-{
- call_rcu_tasks_trace(head, rcu_tasks_trace_free_old_probes);
-}
-
static void rcu_free_old_probes(struct rcu_head *head)
{
- call_srcu(&tracepoint_srcu, head, srcu_free_old_probes);
+ call_rcu_tasks_trace(head, rcu_tasks_trace_free_old_probes);
}

static __init int release_early_probes(void)
@@ -90,7 +82,7 @@ static __init int release_early_probes(void)
return 0;
}

-/* SRCU and Tasks Trace RCU are initialized at core_initcall */
+/* Tasks Trace RCU is initialized at core_initcall */
postcore_initcall(release_early_probes);

static inline void release_probes(struct tracepoint_func *old)
@@ -100,9 +92,8 @@ static inline void release_probes(struct tracepoint_func *old)
struct tp_probes, probes[0]);

/*
- * We can't free probes if SRCU and Tasks Trace RCU are not
- * initialized yet. Postpone the freeing till after both are
- * initialized.
+ * We can't free probes if Tasks Trace RCU is not initialized yet.
+ * Postpone the freeing till after Tasks Trace RCU is initialized.
*/
if (unlikely(!ok_to_free_tracepoints)) {
tp_probes->rcu.next = early_probes;
@@ -111,9 +102,11 @@ static inline void release_probes(struct tracepoint_func *old)
}

/*
- * Tracepoint probes are protected by sched RCU, SRCU and
- * Tasks Trace RCU by chaining the callbacks we cover all three
- * cases and wait for all three grace periods.
+ * Tracepoint probes are protected by both sched RCU and
+ * Tasks Trace RCU, by calling the Tasks Trace RCU callback in
+ * the sched RCU callback we cover both cases. So let us chain
+ * the Tasks Trace RCU and sched RCU callbacks to wait for both
+ * grace periods.
*/
call_rcu(&tp_probes->rcu, rcu_free_old_probes);
}
--
2.25.1