[PATCH] rcu: Fix race in set and clear TICK_DEP_BIT_RCU_EXP bitmask

From: Zqiang
Date: Tue Dec 20 2022 - 06:19:54 EST


For the kernel bulit with CONFIG_NO_HZ_FULL enabled and the following
cpus is nohz_full cpus:

CPU1 CPU2
rcu_report_exp_cpu_mult synchronize_rcu_expedited_wait
acquires rnp->lock mask = rnp->expmask;
for_each_leaf_node_cpu_mask(rnp, cpu, mask)
rnp->expmask = rnp->expmask & ~mask; rdp = per_cpu_ptr(&rcu_data, cpu1);
for_each_leaf_node_cpu_mask(rnp, cpu, mask)
rdp = per_cpu_ptr(&rcu_data, cpu1);
if (!rdp->rcu_forced_tick_exp)
continue; rdp->rcu_forced_tick_exp = true;
tick_dep_set_cpu(cpu1, TICK_DEP_BIT_RCU_EXP);

In the above scenario, after CPU1 reported the quiescent state, CPU1
misses the opportunity to clear the TICK_DEP_BIT_RCU_EXP bitmask, it
will not be cleared until the next expedited grace period starts and
the CPU1 quiescent state is reported again. during this window period,
the CPU1 whose tick can not be stopped, if CPU1 has only one runnable
task and this task has aggressive real-time response constraints, this
task may have one of the worst response times.

Therefore, this commit add rnp->lock when set TICK_DEP_BIT_RCU_EXP
bitmask to fix this race.

Signed-off-by: Zqiang <qiang1.zhang@xxxxxxxxx>
---
kernel/rcu/tree_exp.h | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 927abaf6c822..e5fe0099488b 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -593,6 +593,7 @@ static void synchronize_rcu_expedited_wait(void)
struct rcu_data *rdp;
struct rcu_node *rnp;
struct rcu_node *rnp_root = rcu_get_root();
+ unsigned long flags;

trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("startwait"));
jiffies_stall = rcu_exp_jiffies_till_stall_check();
@@ -601,17 +602,17 @@ static void synchronize_rcu_expedited_wait(void)
if (synchronize_rcu_expedited_wait_once(1))
return;
rcu_for_each_leaf_node(rnp) {
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
mask = READ_ONCE(rnp->expmask);
for_each_leaf_node_cpu_mask(rnp, cpu, mask) {
rdp = per_cpu_ptr(&rcu_data, cpu);
if (rdp->rcu_forced_tick_exp)
continue;
rdp->rcu_forced_tick_exp = true;
- preempt_disable();
if (cpu_online(cpu))
tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
- preempt_enable();
}
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
j = READ_ONCE(jiffies_till_first_fqs);
if (synchronize_rcu_expedited_wait_once(j + HZ))
--
2.25.1