Re: [PATCH 6/9] lockdep: Introduce wait-type checks

From: Peter Zijlstra
Date: Tue Mar 31 2020 - 10:56:20 EST


On Tue, Mar 31, 2020 at 03:25:21PM +0200, Geert Uytterhoeven wrote:
> On arm64 (e.g. R-Car H3 ES2.0):
>
> +=============================
> +[ BUG: Invalid wait context ]
> +5.6.0-salvator-x-09423-gb29514ba13a9c459-dirty #679 Not tainted
> +-----------------------------
> +swapper/5/0 is trying to lock:
> +ffffff86ff76f398 (&pool->lock){..-.}-{3:3}, at: __queue_work+0x134/0x430
> +other info that might help us debug this:
> +1 lock held by swapper/5/0:
> + #0: ffffffc01103a4a0 (rcu_read_lock){....}-{1:3}, at:
> rcu_lock_acquire.constprop.59+0x0/0x38
> +stack backtrace:
> +CPU: 5 PID: 0 Comm: swapper/5 Not tainted
> 5.6.0-salvator-x-09423-gb29514ba13a9c459-dirty #679
> +Hardware name: Renesas Salvator-X 2nd version board based on r8a77951 (DT)
> +Call trace:
> + dump_backtrace+0x0/0x180
> + show_stack+0x14/0x1c
> + dump_stack+0xdc/0x12c
> + __lock_acquire+0x37c/0xf9c
> + lock_acquire+0x258/0x288
> + _raw_spin_lock+0x34/0x48
> + __queue_work+0x134/0x430
> + queue_work_on+0x48/0x8c
> + timers_update_nohz+0x24/0x2c
> + tick_nohz_activate.isra.15.part.16+0x5c/0x80
> + tick_setup_sched_timer+0xe0/0xf0
> + hrtimer_run_queues+0x88/0xf8

So this is complaining that it cannot take pool->lock, which is
WAIT_CONFIG while holding RCU, which presents a WAIT_CONFIG context.

This seems to implicate something is amiss, because that should be
allowed. The thing it doesn't print is the context, which in the above
case is a (hrtimer) interrupt.

I suspect this really is a hardirq context and the next patch won't cure
things. It looks nohz (full?) related.

Frederic, can you untangle this?

---

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 1511690e4de7..ac10db66cc63 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3952,10 +3952,36 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
return ret;
}

+static inline short task_wait_context(struct task_struct *curr)
+{
+ /*
+ * Set appropriate wait type for the context; for IRQs we have to take
+ * into account force_irqthread as that is implied by PREEMPT_RT.
+ */
+ if (curr->hardirq_context) {
+ /*
+ * Check if force_irqthreads will run us threaded.
+ */
+ if (curr->hardirq_threaded || curr->irq_config)
+ return LD_WAIT_CONFIG;
+
+ return LD_WAIT_SPIN;
+ } else if (curr->softirq_context) {
+ /*
+ * Softirqs are always threaded.
+ */
+ return LD_WAIT_CONFIG;
+ }
+
+ return LD_WAIT_MAX;
+}
+
static int
print_lock_invalid_wait_context(struct task_struct *curr,
struct held_lock *hlock)
{
+ short curr_inner;
+
if (!debug_locks_off())
return 0;
if (debug_locks_silent)
@@ -3971,6 +3997,10 @@ print_lock_invalid_wait_context(struct task_struct *curr,
print_lock(hlock);

pr_warn("other info that might help us debug this:\n");
+
+ curr_inner = task_wait_context(curr);
+ pr_warn("context-{%d:%d}\n", curr_inner, curr_inner);
+
lockdep_print_held_locks(curr);

pr_warn("stack backtrace:\n");
@@ -4017,26 +4047,7 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next)
}
depth++;

- /*
- * Set appropriate wait type for the context; for IRQs we have to take
- * into account force_irqthread as that is implied by PREEMPT_RT.
- */
- if (curr->hardirq_context) {
- /*
- * Check if force_irqthreads will run us threaded.
- */
- if (curr->hardirq_threaded || curr->irq_config)
- curr_inner = LD_WAIT_CONFIG;
- else
- curr_inner = LD_WAIT_SPIN;
- } else if (curr->softirq_context) {
- /*
- * Softirqs are always threaded.
- */
- curr_inner = LD_WAIT_CONFIG;
- } else {
- curr_inner = LD_WAIT_MAX;
- }
+ curr_inner = task_wait_context(curr);

for (; depth < curr->lockdep_depth; depth++) {
struct held_lock *prev = curr->held_locks + depth;