Re: [PATCH v9 01/13] rcu: Fix missing nocb gp wake on rcu_barrier()

From: Paul E. McKenney
Date: Sun Oct 16 2022 - 14:23:31 EST


On Sun, Oct 16, 2022 at 12:25:32PM -0400, Joel Fernandes wrote:
> On Sun, Oct 16, 2022 at 12:23 PM Joel Fernandes (Google)
> <joel@xxxxxxxxxxxxxxxxx> wrote:
> >
> > From: Frederic Weisbecker <frederic@xxxxxxxxxx>
> >
> > In preparation of RCU lazy changes, wake up the RCU nocb gp thread if
> > needed after an entrain. Otherwise, the RCU barrier callback can wait in
> > the queue for several seconds before the lazy callbacks in front of it
> > are serviced.
> >
> > Reported-by: Joel Fernandes (Google) <joel@xxxxxxxxxxxxxxxxx>
> > Signed-off-by: Frederic Weisbecker <frederic@xxxxxxxxxx>
> > Signed-off-by: Joel Fernandes (Google) <joel@xxxxxxxxxxxxxxxxx>
> > Change-Id: I830269cd41b18862a1a58b26ce3292c6c4457bc7
>
> Ah, sorry for the Change-id Paul! If you don't mind, please edit this
> out from the change log for this and the next patch (it is only in
> these 2!)

That I can safely do. ;-)

But not today, too many timezones yesterday.

Thanx, Paul

> - Joel
>
>
> > ---
> > kernel/rcu/tree.c | 11 +++++++++++
> > kernel/rcu/tree.h | 1 +
> > kernel/rcu/tree_nocb.h | 5 +++++
> > 3 files changed, 17 insertions(+)
> >
> > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > index 5ec97e3f7468..67a1ae5151f5 100644
> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> > @@ -3894,6 +3894,8 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)
> > {
> > unsigned long gseq = READ_ONCE(rcu_state.barrier_sequence);
> > unsigned long lseq = READ_ONCE(rdp->barrier_seq_snap);
> > + bool wake_nocb = false;
> > + bool was_alldone = false;
> >
> > lockdep_assert_held(&rcu_state.barrier_lock);
> > if (rcu_seq_state(lseq) || !rcu_seq_state(gseq) || rcu_seq_ctr(lseq) != rcu_seq_ctr(gseq))
> > @@ -3902,7 +3904,14 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)
> > rdp->barrier_head.func = rcu_barrier_callback;
> > debug_rcu_head_queue(&rdp->barrier_head);
> > rcu_nocb_lock(rdp);
> > + /*
> > + * Flush bypass and wakeup rcuog if we add callbacks to an empty regular
> > + * queue. This way we don't wait for bypass timer that can reach seconds
> > + * if it's fully lazy.
> > + */
> > + was_alldone = rcu_rdp_is_offloaded(rdp) && !rcu_segcblist_pend_cbs(&rdp->cblist);
> > WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies));
> > + wake_nocb = was_alldone && rcu_segcblist_pend_cbs(&rdp->cblist);
> > if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) {
> > atomic_inc(&rcu_state.barrier_cpu_count);
> > } else {
> > @@ -3910,6 +3919,8 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)
> > rcu_barrier_trace(TPS("IRQNQ"), -1, rcu_state.barrier_sequence);
> > }
> > rcu_nocb_unlock(rdp);
> > + if (wake_nocb)
> > + wake_nocb_gp(rdp, false);
> > smp_store_release(&rdp->barrier_seq_snap, gseq);
> > }
> >
> > diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> > index d4a97e40ea9c..925dd98f8b23 100644
> > --- a/kernel/rcu/tree.h
> > +++ b/kernel/rcu/tree.h
> > @@ -439,6 +439,7 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp);
> > static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
> > static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
> > static void rcu_init_one_nocb(struct rcu_node *rnp);
> > +static bool wake_nocb_gp(struct rcu_data *rdp, bool force);
> > static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
> > unsigned long j);
> > static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
> > diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
> > index f77a6d7e1356..094fd454b6c3 100644
> > --- a/kernel/rcu/tree_nocb.h
> > +++ b/kernel/rcu/tree_nocb.h
> > @@ -1558,6 +1558,11 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
> > {
> > }
> >
> > +static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
> > +{
> > + return false;
> > +}
> > +
> > static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
> > unsigned long j)
> > {
> > --
> > 2.38.0.413.g74048e4d9e-goog
> >