[PATCH rcu 05/12] rcu: Make polled grace-period API account for expedited grace periods

From: Paul E. McKenney
Date: Mon Jun 20 2022 - 18:52:23 EST


Currently, this code could splat:

oldstate = get_state_synchronize_rcu();
synchronize_rcu_expedited();
WARN_ON_ONCE(!poll_state_synchronize_rcu(oldstate));

This situation is counter-intuitive and user-unfriendly. After all, there
really was a perfectly valid full grace period right after the call to
get_state_synchronize_rcu(), so why shouldn't poll_state_synchronize_rcu()
know about it?

This commit therefore makes the polled grace-period API aware of expedited
grace periods in addition to the normal grace periods that it is already
aware of. With this change, the above code is guaranteed not to splat.

Please note that the above code can still splat due to counter wrap on the
one hand and situations involving partially overlapping normal/expedited
grace periods on the other. On 64-bit systems, the second is of course
much more likely than the first. It is possible to modify this approach
to prevent overlapping grace periods from causing splats, but only at
the expense of greatly increasing the probability of counter wrap, as
in within milliseconds on 32-bit systems and within minutes on 64-bit
systems.

This commit is in preparation for polled expedited grace periods.

Link: https://lore.kernel.org/all/20220121142454.1994916-1-bfoster@xxxxxxxxxx/
Link: https://docs.google.com/document/d/1RNKWW9jQyfjxw2E8dsXVTdvZYh0HnYeSHDKog9jhdN8/edit?usp=sharing
Cc: Brian Foster <bfoster@xxxxxxxxxx>
Cc: Dave Chinner <david@xxxxxxxxxxxxx>
Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
Cc: Ian Kent <raven@xxxxxxxxxx>
Signed-off-by: Paul E. McKenney <paulmck@xxxxxxxxxx>
---
kernel/rcu/tree.c | 9 +++++----
kernel/rcu/tree.h | 1 +
kernel/rcu/tree_exp.h | 16 ++++++++++++++--
3 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 637e8f9454573..251eb9a8cd925 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1812,6 +1812,7 @@ static void rcu_poll_gp_seq_end(unsigned long *snap)
if (*snap && *snap == rcu_state.gp_seq_polled) {
rcu_seq_end(&rcu_state.gp_seq_polled);
rcu_state.gp_seq_polled_snap = 0;
+ rcu_state.gp_seq_polled_exp_snap = 0;
} else {
*snap = 0;
}
@@ -3913,10 +3914,10 @@ void synchronize_rcu(void)
"Illegal synchronize_rcu() in RCU read-side critical section");
if (rcu_blocking_is_gp()) {
// Note well that this code runs with !PREEMPT && !SMP.
- // In addition, all code that advances grace periods runs
- // at process level. Therefore, this GP overlaps with other
- // GPs only by being fully nested within them, which allows
- // reuse of ->gp_seq_polled_snap.
+ // In addition, all code that advances grace periods runs at
+ // process level. Therefore, this normal GP overlaps with
+ // other normal GPs only by being fully nested within them,
+ // which allows reuse of ->gp_seq_polled_snap.
rcu_poll_gp_seq_start_unlocked(&rcu_state.gp_seq_polled_snap);
rcu_poll_gp_seq_end_unlocked(&rcu_state.gp_seq_polled_snap);
if (rcu_init_invoked())
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 9c853033f159d..5634e76106c48 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -325,6 +325,7 @@ struct rcu_state {
unsigned long gp_wake_seq; /* ->gp_seq at ^^^. */
unsigned long gp_seq_polled; /* GP seq for polled API. */
unsigned long gp_seq_polled_snap; /* ->gp_seq_polled at normal GP start. */
+ unsigned long gp_seq_polled_exp_snap; /* ->gp_seq_polled at expedited GP start. */

/* End of fields guarded by root rcu_node's lock. */

diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 0f70f62039a90..e0258066b881e 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -18,6 +18,7 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp);
static void rcu_exp_gp_seq_start(void)
{
rcu_seq_start(&rcu_state.expedited_sequence);
+ rcu_poll_gp_seq_start_unlocked(&rcu_state.gp_seq_polled_exp_snap);
}

/*
@@ -34,6 +35,7 @@ static __maybe_unused unsigned long rcu_exp_gp_seq_endval(void)
*/
static void rcu_exp_gp_seq_end(void)
{
+ rcu_poll_gp_seq_end_unlocked(&rcu_state.gp_seq_polled_exp_snap);
rcu_seq_end(&rcu_state.expedited_sequence);
smp_mb(); /* Ensure that consecutive grace periods serialize. */
}
@@ -913,8 +915,18 @@ void synchronize_rcu_expedited(void)
"Illegal synchronize_rcu_expedited() in RCU read-side critical section");

/* Is the state is such that the call is a grace period? */
- if (rcu_blocking_is_gp())
- return;
+ if (rcu_blocking_is_gp()) {
+ // Note well that this code runs with !PREEMPT && !SMP.
+ // In addition, all code that advances grace periods runs
+ // at process level. Therefore, this expedited GP overlaps
+ // with other expedited GPs only by being fully nested within
+ // them, which allows reuse of ->gp_seq_polled_exp_snap.
+ rcu_poll_gp_seq_start_unlocked(&rcu_state.gp_seq_polled_exp_snap);
+ rcu_poll_gp_seq_end_unlocked(&rcu_state.gp_seq_polled_exp_snap);
+ if (rcu_init_invoked())
+ cond_resched();
+ return; // Context allows vacuous grace periods.
+ }

/* If expedited grace periods are prohibited, fall back to normal. */
if (rcu_gp_is_normal()) {
--
2.31.1.189.g2e36527f23