[PATCH rcu 09/10] rcu: Allow expedited RCU CPU stall warnings to dump task stacks

From: Paul E. McKenney
Date: Wed Jan 04 2023 - 19:24:00 EST


This commit introduces the rcupdate.rcu_exp_stall_task_details kernel
boot parameter, which cause expedited RCU CPU stall warnings to dump
the stacks of any tasks blocking the current expedited grace period.

Reported-by: David Howells <dhowells@xxxxxxxxxx>
Signed-off-by: Paul E. McKenney <paulmck@xxxxxxxxxx>
---
.../admin-guide/kernel-parameters.txt | 5 +++
kernel/rcu/rcu.h | 1 +
kernel/rcu/tree_exp.h | 41 +++++++++++++++++++
kernel/rcu/update.c | 2 +
4 files changed, 49 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 6cfa6e3996cf7..aa453f9202d89 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5113,6 +5113,11 @@
rcupdate.rcu_cpu_stall_timeout to be used (after
conversion from seconds to milliseconds).

+ rcupdate.rcu_exp_stall_task_details= [KNL]
+ Print stack dumps of any tasks blocking the
+ current expedited RCU grace period during an
+ expedited RCU CPU stall warning.
+
rcupdate.rcu_expedited= [KNL]
Use expedited grace-period primitives, for
example, synchronize_rcu_expedited() instead
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index c5aa934de59b0..fa640c45172e9 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -224,6 +224,7 @@ extern int rcu_cpu_stall_ftrace_dump;
extern int rcu_cpu_stall_suppress;
extern int rcu_cpu_stall_timeout;
extern int rcu_exp_cpu_stall_timeout;
+extern bool rcu_exp_stall_task_details __read_mostly;
int rcu_jiffies_till_stall_check(void);
int rcu_exp_jiffies_till_stall_check(void);

diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 927abaf6c822e..249c2967d9e6c 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -11,6 +11,7 @@

static void rcu_exp_handler(void *unused);
static int rcu_print_task_exp_stall(struct rcu_node *rnp);
+static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp);

/*
* Record the start of an expedited grace period.
@@ -671,6 +672,7 @@ static void synchronize_rcu_expedited_wait(void)
dump_cpu_task(cpu);
preempt_enable();
}
+ rcu_exp_print_detail_task_stall_rnp(rnp);
}
jiffies_stall = 3 * rcu_exp_jiffies_till_stall_check() + 3;
panic_on_rcu_stall();
@@ -813,6 +815,36 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
return ndetected;
}

+/*
+ * Scan the current list of tasks blocked within RCU read-side critical
+ * sections, dumping the stack of each that is blocking the current
+ * expedited grace period.
+ */
+static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp)
+{
+ unsigned long flags;
+ struct task_struct *t;
+
+ if (!rcu_exp_stall_task_details)
+ return;
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ if (!READ_ONCE(rnp->exp_tasks)) {
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ return;
+ }
+ t = list_entry(rnp->exp_tasks->prev,
+ struct task_struct, rcu_node_entry);
+ list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
+ /*
+ * We could be printing a lot while holding a spinlock.
+ * Avoid triggering hard lockup.
+ */
+ touch_nmi_watchdog();
+ sched_show_task(t);
+ }
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+}
+
#else /* #ifdef CONFIG_PREEMPT_RCU */

/* Request an expedited quiescent state. */
@@ -885,6 +917,15 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
return 0;
}

+/*
+ * Because preemptible RCU does not exist, we never have to print out
+ * tasks blocked within RCU read-side critical sections that are blocking
+ * the current expedited grace period.
+ */
+static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp)
+{
+}
+
#endif /* #else #ifdef CONFIG_PREEMPT_RCU */

/**
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 587b97c401914..6ed5020aee6d1 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -509,6 +509,8 @@ int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
module_param(rcu_cpu_stall_timeout, int, 0644);
int rcu_exp_cpu_stall_timeout __read_mostly = CONFIG_RCU_EXP_CPU_STALL_TIMEOUT;
module_param(rcu_exp_cpu_stall_timeout, int, 0644);
+bool rcu_exp_stall_task_details __read_mostly;
+module_param(rcu_exp_stall_task_details, bool, 0644);
#endif /* #ifdef CONFIG_RCU_STALL_COMMON */

// Suppress boot-time RCU CPU stall warnings and rcutorture writer stall
--
2.31.1.189.g2e36527f23