[PATCH tip/core/rcu 26/40] srcu: Use rcu_segcblist to track SRCU callbacks

From: Paul E. McKenney
Date: Wed Apr 12 2017 - 13:41:46 EST


This commit switches SRCU from custom-built callback queues to the new
rcu_segcblist structure. This change associates grace-period sequence
numbers with groups of callbacks, which will be needed for efficient
processing of per-CPU callbacks.

Signed-off-by: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
---
include/linux/rcu_segcblist.h | 678 ++++++++++++++++++++++++++++++++++++++++++
include/linux/srcu.h | 24 +-
kernel/rcu/rcu.h | 6 +
kernel/rcu/rcu_segcblist.h | 671 -----------------------------------------
kernel/rcu/srcu.c | 159 ++--------
kernel/rcu/tree.h | 2 +-
6 files changed, 721 insertions(+), 819 deletions(-)
create mode 100644 include/linux/rcu_segcblist.h
delete mode 100644 kernel/rcu/rcu_segcblist.h

diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h
new file mode 100644
index 000000000000..74b1e7243955
--- /dev/null
+++ b/include/linux/rcu_segcblist.h
@@ -0,0 +1,678 @@
+/*
+ * RCU segmented callback lists
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright IBM Corporation, 2017
+ *
+ * Authors: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
+ */
+
+#ifndef __KERNEL_RCU_SEGCBLIST_H
+#define __KERNEL_RCU_SEGCBLIST_H
+
+/* Simple unsegmented callback lists. */
+struct rcu_cblist {
+ struct rcu_head *head;
+ struct rcu_head **tail;
+ long len;
+ long len_lazy;
+};
+
+#define RCU_CBLIST_INITIALIZER(n) { .head = NULL, .tail = &n.head }
+
+/* Initialize simple callback list. */
+static inline void rcu_cblist_init(struct rcu_cblist *rclp)
+{
+ rclp->head = NULL;
+ rclp->tail = &rclp->head;
+ rclp->len = 0;
+ rclp->len_lazy = 0;
+}
+
+/* Is simple callback list empty? */
+static inline bool rcu_cblist_empty(struct rcu_cblist *rclp)
+{
+ return !rclp->head;
+}
+
+/* Return number of callbacks in simple callback list. */
+static inline long rcu_cblist_n_cbs(struct rcu_cblist *rclp)
+{
+ return rclp->len;
+}
+
+/* Return number of lazy callbacks in simple callback list. */
+static inline long rcu_cblist_n_lazy_cbs(struct rcu_cblist *rclp)
+{
+ return rclp->len_lazy;
+}
+
+/*
+ * Debug function to actually count the number of callbacks.
+ * If the number exceeds the limit specified, return -1.
+ */
+static inline long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim)
+{
+ int cnt = 0;
+ struct rcu_head **rhpp = &rclp->head;
+
+ for (;;) {
+ if (!*rhpp)
+ return cnt;
+ if (++cnt > lim)
+ return -1;
+ rhpp = &(*rhpp)->next;
+ }
+}
+
+/*
+ * Dequeue the oldest rcu_head structure from the specified callback
+ * list. This function assumes that the callback is non-lazy, but
+ * the caller can later invoke rcu_cblist_dequeued_lazy() if it
+ * finds otherwise (and if it cares about laziness). This allows
+ * different users to have different ways of determining laziness.
+ */
+static inline struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp)
+{
+ struct rcu_head *rhp;
+
+ rhp = rclp->head;
+ if (!rhp)
+ return NULL;
+ rclp->len--;
+ rclp->head = rhp->next;
+ if (!rclp->head)
+ rclp->tail = &rclp->head;
+ return rhp;
+}
+
+/*
+ * Account for the fact that a previously dequeued callback turned out
+ * to be marked as lazy.
+ */
+static inline void rcu_cblist_dequeued_lazy(struct rcu_cblist *rclp)
+{
+ rclp->len_lazy--;
+}
+
+/*
+ * Interim function to return rcu_cblist head pointer. Longer term, the
+ * rcu_cblist will be used more pervasively, removing the need for this
+ * function.
+ */
+static inline struct rcu_head *rcu_cblist_head(struct rcu_cblist *rclp)
+{
+ return rclp->head;
+}
+
+/*
+ * Interim function to return rcu_cblist head pointer. Longer term, the
+ * rcu_cblist will be used more pervasively, removing the need for this
+ * function.
+ */
+static inline struct rcu_head **rcu_cblist_tail(struct rcu_cblist *rclp)
+{
+ WARN_ON_ONCE(rcu_cblist_empty(rclp));
+ return rclp->tail;
+}
+
+/* Complicated segmented callback lists. ;-) */
+
+/*
+ * Index values for segments in rcu_segcblist structure.
+ *
+ * The segments are as follows:
+ *
+ * [head, *tails[RCU_DONE_TAIL]):
+ * Callbacks whose grace period has elapsed, and thus can be invoked.
+ * [*tails[RCU_DONE_TAIL], *tails[RCU_WAIT_TAIL]):
+ * Callbacks waiting for the current GP from the current CPU's viewpoint.
+ * [*tails[RCU_WAIT_TAIL], *tails[RCU_NEXT_READY_TAIL]):
+ * Callbacks that arrived before the next GP started, again from
+ * the current CPU's viewpoint. These can be handled by the next GP.
+ * [*tails[RCU_NEXT_READY_TAIL], *tails[RCU_NEXT_TAIL]):
+ * Callbacks that might have arrived after the next GP started.
+ * There is some uncertainty as to when a given GP starts and
+ * ends, but a CPU knows the exact times if it is the one starting
+ * or ending the GP. Other CPUs know that the previous GP ends
+ * before the next one starts.
+ *
+ * Note that RCU_WAIT_TAIL cannot be empty unless RCU_NEXT_READY_TAIL is also
+ * empty.
+ *
+ * The ->gp_seq[] array contains the grace-period number at which the
+ * corresponding segment of callbacks will be ready to invoke. A given
+ * element of this array is meaningful only when the corresponding segment
+ * is non-empty, and it is never valid for RCU_DONE_TAIL (whose callbacks
+ * are already ready to invoke) or for RCU_NEXT_TAIL (whose callbacks have
+ * not yet been assigned a grace-period number).
+ */
+#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */
+#define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */
+#define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */
+#define RCU_NEXT_TAIL 3
+#define RCU_CBLIST_NSEGS 4
+
+struct rcu_segcblist {
+ struct rcu_head *head;
+ struct rcu_head **tails[RCU_CBLIST_NSEGS];
+ unsigned long gp_seq[RCU_CBLIST_NSEGS];
+ long len;
+ long len_lazy;
+};
+
+#define RCU_SEGCBLIST_INITIALIZER(n) \
+{ \
+ .head = NULL, \
+ .tails[RCU_DONE_TAIL] = &n.head, \
+ .tails[RCU_WAIT_TAIL] = &n.head, \
+ .tails[RCU_NEXT_READY_TAIL] = &n.head, \
+ .tails[RCU_NEXT_TAIL] = &n.head, \
+}
+
+/*
+ * Initialize an rcu_segcblist structure.
+ */
+static inline void rcu_segcblist_init(struct rcu_segcblist *rsclp)
+{
+ int i;
+
+ BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq));
+ BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq));
+ rsclp->head = NULL;
+ for (i = 0; i < RCU_CBLIST_NSEGS; i++)
+ rsclp->tails[i] = &rsclp->head;
+ rsclp->len = 0;
+ rsclp->len_lazy = 0;
+}
+
+/*
+ * Is the specified rcu_segcblist structure empty?
+ *
+ * But careful! The fact that the ->head field is NULL does not
+ * necessarily imply that there are no callbacks associated with
+ * this structure. When callbacks are being invoked, they are
+ * removed as a group. If callback invocation must be preempted,
+ * the remaining callbacks will be added back to the list. Either
+ * way, the counts are updated later.
+ *
+ * So it is often the case that rcu_segcblist_n_cbs() should be used
+ * instead.
+ */
+static inline bool rcu_segcblist_empty(struct rcu_segcblist *rsclp)
+{
+ return !rsclp->head;
+}
+
+/* Return number of callbacks in segmented callback list. */
+static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp)
+{
+ return READ_ONCE(rsclp->len);
+}
+
+/* Return number of lazy callbacks in segmented callback list. */
+static inline long rcu_segcblist_n_lazy_cbs(struct rcu_segcblist *rsclp)
+{
+ return rsclp->len_lazy;
+}
+
+/* Return number of lazy callbacks in segmented callback list. */
+static inline long rcu_segcblist_n_nonlazy_cbs(struct rcu_segcblist *rsclp)
+{
+ return rsclp->len - rsclp->len_lazy;
+}
+
+/*
+ * Is the specified rcu_segcblist enabled, for example, not corresponding
+ * to an offline or callback-offloaded CPU?
+ */
+static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp)
+{
+ return !!rsclp->tails[RCU_NEXT_TAIL];
+}
+
+/*
+ * Disable the specified rcu_segcblist structure, so that callbacks can
+ * no longer be posted to it. This structure must be empty.
+ */
+static inline void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
+{
+ WARN_ON_ONCE(!rcu_segcblist_empty(rsclp));
+ WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp));
+ WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp));
+ rsclp->tails[RCU_NEXT_TAIL] = NULL;
+}
+
+/*
+ * Is the specified segment of the specified rcu_segcblist structure
+ * empty of callbacks?
+ */
+static inline bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg)
+{
+ if (seg == RCU_DONE_TAIL)
+ return &rsclp->head == rsclp->tails[RCU_DONE_TAIL];
+ return rsclp->tails[seg - 1] == rsclp->tails[seg];
+}
+
+/*
+ * Are all segments following the specified segment of the specified
+ * rcu_segcblist structure empty of callbacks? (The specified
+ * segment might well contain callbacks.)
+ */
+static inline bool rcu_segcblist_restempty(struct rcu_segcblist *rsclp, int seg)
+{
+ return !*rsclp->tails[seg];
+}
+
+/*
+ * Does the specified rcu_segcblist structure contain callbacks that
+ * are ready to be invoked?
+ */
+static inline bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp)
+{
+ return rcu_segcblist_is_enabled(rsclp) &&
+ &rsclp->head != rsclp->tails[RCU_DONE_TAIL];
+}
+
+/*
+ * Does the specified rcu_segcblist structure contain callbacks that
+ * are still pending, that is, not yet ready to be invoked?
+ */
+static inline bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp)
+{
+ return rcu_segcblist_is_enabled(rsclp) &&
+ !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL);
+}
+
+/*
+ * Dequeue and return the first ready-to-invoke callback. If there
+ * are no ready-to-invoke callbacks, return NULL. Disables interrupts
+ * to avoid interference. Does not protect from interference from other
+ * CPUs or tasks.
+ */
+static inline struct rcu_head *
+rcu_segcblist_dequeue(struct rcu_segcblist *rsclp)
+{
+ unsigned long flags;
+ int i;
+ struct rcu_head *rhp;
+
+ local_irq_save(flags);
+ if (!rcu_segcblist_ready_cbs(rsclp)) {
+ local_irq_restore(flags);
+ return NULL;
+ }
+ rhp = rsclp->head;
+ BUG_ON(!rhp);
+ rsclp->head = rhp->next;
+ for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) {
+ if (rsclp->tails[i] != &rhp->next)
+ break;
+ rsclp->tails[i] = &rsclp->head;
+ }
+ smp_mb(); /* Dequeue before decrement for rcu_barrier(). */
+ WRITE_ONCE(rsclp->len, rsclp->len - 1);
+ local_irq_restore(flags);
+ return rhp;
+}
+
+/*
+ * Account for the fact that a previously dequeued callback turned out
+ * to be marked as lazy.
+ */
+static inline void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ rsclp->len_lazy--;
+ local_irq_restore(flags);
+}
+
+/*
+ * Return a pointer to the first callback in the specified rcu_segcblist
+ * structure. This is useful for diagnostics.
+ */
+static inline struct rcu_head *
+rcu_segcblist_first_cb(struct rcu_segcblist *rsclp)
+{
+ if (rcu_segcblist_is_enabled(rsclp))
+ return rsclp->head;
+ return NULL;
+}
+
+/*
+ * Return a pointer to the first pending callback in the specified
+ * rcu_segcblist structure. This is useful just after posting a given
+ * callback -- if that callback is the first pending callback, then
+ * you cannot rely on someone else having already started up the required
+ * grace period.
+ */
+static inline struct rcu_head *
+rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp)
+{
+ if (rcu_segcblist_is_enabled(rsclp))
+ return *rsclp->tails[RCU_DONE_TAIL];
+ return NULL;
+}
+
+/*
+ * Does the specified rcu_segcblist structure contain callbacks that
+ * have not yet been processed beyond having been posted, that is,
+ * does it contain callbacks in its last segment?
+ */
+static inline bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp)
+{
+ return rcu_segcblist_is_enabled(rsclp) &&
+ !rcu_segcblist_restempty(rsclp, RCU_NEXT_READY_TAIL);
+}
+
+/*
+ * Enqueue the specified callback onto the specified rcu_segcblist
+ * structure, updating accounting as needed. Note that the ->len
+ * field may be accessed locklessly, hence the WRITE_ONCE().
+ * The ->len field is used by rcu_barrier() and friends to determine
+ * if it must post a callback on this structure, and it is OK
+ * for rcu_barrier() to sometimes post callbacks needlessly, but
+ * absolutely not OK for it to ever miss posting a callback.
+ */
+static inline void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp,
+ struct rcu_head *rhp, bool lazy)
+{
+ WRITE_ONCE(rsclp->len, rsclp->len + 1); /* ->len sampled locklessly. */
+ if (lazy)
+ rsclp->len_lazy++;
+ smp_mb(); /* Ensure counts are updated before callback is enqueued. */
+ rhp->next = NULL;
+ *rsclp->tails[RCU_NEXT_TAIL] = rhp;
+ rsclp->tails[RCU_NEXT_TAIL] = &rhp->next;
+}
+
+/*
+ * Extract only the counts from the specified rcu_segcblist structure,
+ * and place them in the specified rcu_cblist structure. This function
+ * supports both callback orphaning and invocation, hence the separation
+ * of counts and callbacks. (Callbacks ready for invocation must be
+ * orphaned and adopted separately from pending callbacks, but counts
+ * apply to all callbacks. Locking must be used to make sure that
+ * both orphaned-callbacks lists are consistent.)
+ */
+static inline void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp,
+ struct rcu_cblist *rclp)
+{
+ rclp->len_lazy += rsclp->len_lazy;
+ rclp->len += rsclp->len;
+ rsclp->len_lazy = 0;
+ WRITE_ONCE(rsclp->len, 0); /* ->len sampled locklessly. */
+}
+
+/*
+ * Extract only those callbacks ready to be invoked from the specified
+ * rcu_segcblist structure and place them in the specified rcu_cblist
+ * structure.
+ */
+static inline void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp,
+ struct rcu_cblist *rclp)
+{
+ int i;
+
+ if (!rcu_segcblist_ready_cbs(rsclp))
+ return; /* Nothing to do. */
+ *rclp->tail = rsclp->head;
+ rsclp->head = *rsclp->tails[RCU_DONE_TAIL];
+ *rsclp->tails[RCU_DONE_TAIL] = NULL;
+ rclp->tail = rsclp->tails[RCU_DONE_TAIL];
+ for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--)
+ if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL])
+ rsclp->tails[i] = &rsclp->head;
+}
+
+/*
+ * Extract only those callbacks still pending (not yet ready to be
+ * invoked) from the specified rcu_segcblist structure and place them in
+ * the specified rcu_cblist structure. Note that this loses information
+ * about any callbacks that might have been partway done waiting for
+ * their grace period. Too bad! They will have to start over.
+ */
+static inline void
+rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp,
+ struct rcu_cblist *rclp)
+{
+ int i;
+
+ if (!rcu_segcblist_pend_cbs(rsclp))
+ return; /* Nothing to do. */
+ *rclp->tail = *rsclp->tails[RCU_DONE_TAIL];
+ rclp->tail = rsclp->tails[RCU_NEXT_TAIL];
+ *rsclp->tails[RCU_DONE_TAIL] = NULL;
+ for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++)
+ rsclp->tails[i] = rsclp->tails[RCU_DONE_TAIL];
+}
+
+/*
+ * Move the entire contents of the specified rcu_segcblist structure,
+ * counts, callbacks, and all, to the specified rcu_cblist structure.
+ * @@@ Why do we need this??? Moving early-boot CBs to NOCB lists?
+ * @@@ Memory barrier needed? (Not if only used at boot time...)
+ */
+static inline void rcu_segcblist_extract_all(struct rcu_segcblist *rsclp,
+ struct rcu_cblist *rclp)
+{
+ rcu_segcblist_extract_done_cbs(rsclp, rclp);
+ rcu_segcblist_extract_pend_cbs(rsclp, rclp);
+ rcu_segcblist_extract_count(rsclp, rclp);
+}
+
+/*
+ * Insert counts from the specified rcu_cblist structure in the
+ * specified rcu_segcblist structure.
+ */
+static inline void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp,
+ struct rcu_cblist *rclp)
+{
+ rsclp->len_lazy += rclp->len_lazy;
+ /* ->len sampled locklessly. */
+ WRITE_ONCE(rsclp->len, rsclp->len + rclp->len);
+ rclp->len_lazy = 0;
+ rclp->len = 0;
+}
+
+/*
+ * Move callbacks from the specified rcu_cblist to the beginning of the
+ * done-callbacks segment of the specified rcu_segcblist.
+ */
+static inline void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp,
+ struct rcu_cblist *rclp)
+{
+ int i;
+
+ if (!rclp->head)
+ return; /* No callbacks to move. */
+ *rclp->tail = rsclp->head;
+ rsclp->head = rclp->head;
+ for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++)
+ if (&rsclp->head == rsclp->tails[i])
+ rsclp->tails[i] = rclp->tail;
+ else
+ break;
+ rclp->head = NULL;
+ rclp->tail = &rclp->head;
+}
+
+/*
+ * Move callbacks from the specified rcu_cblist to the end of the
+ * new-callbacks segment of the specified rcu_segcblist.
+ */
+static inline void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,
+ struct rcu_cblist *rclp)
+{
+ if (!rclp->head)
+ return; /* Nothing to do. */
+ *rsclp->tails[RCU_NEXT_TAIL] = rclp->head;
+ rsclp->tails[RCU_NEXT_TAIL] = rclp->tail;
+ rclp->head = NULL;
+ rclp->tail = &rclp->head;
+}
+
+/*
+ * Advance the callbacks in the specified rcu_segcblist structure based
+ * on the current value passed in for the grace-period counter.
+ */
+static inline void rcu_segcblist_advance(struct rcu_segcblist *rsclp,
+ unsigned long seq)
+{
+ int i, j;
+
+ WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
+ WARN_ON_ONCE(rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL));
+
+ /*
+ * Find all callbacks whose ->gp_seq numbers indicate that they
+ * are ready to invoke, and put them into the RCU_DONE_TAIL segment.
+ */
+ for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
+ if (ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
+ break;
+ rsclp->tails[RCU_DONE_TAIL] = rsclp->tails[i];
+ }
+
+ /* If no callbacks moved, nothing more need be done. */
+ if (i == RCU_WAIT_TAIL)
+ return;
+
+ /* Clean up tail pointers that might have been misordered above. */
+ for (j = RCU_WAIT_TAIL; j < i; j++)
+ rsclp->tails[j] = rsclp->tails[RCU_DONE_TAIL];
+
+ /*
+ * Callbacks moved, so clean up the misordered ->tails[] pointers
+ * that now point into the middle of the list of ready-to-invoke
+ * callbacks. The overall effect is to copy down the later pointers
+ * into the gap that was created by the now-ready segments.
+ */
+ for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
+ if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL])
+ break; /* No more callbacks. */
+ rsclp->tails[j] = rsclp->tails[i];
+ rsclp->gp_seq[j] = rsclp->gp_seq[i];
+ }
+}
+
+/*
+ * "Accelerate" callbacks based on more-accurate grace-period information.
+ * The reason for this is that RCU does not synchronize the beginnings and
+ * ends of grace periods, and that callbacks are posted locally. This in
+ * turn means that the callbacks must be labelled conservatively early
+ * on, as getting exact information would degrade both performance and
+ * scalability. When more accurate grace-period information becomes
+ * available, previously posted callbacks can be "accelerated", marking
+ * them to complete at the end of the earlier grace period.
+ *
+ * This function operates on an rcu_segcblist structure, and also the
+ * grace-period sequence number at which new callbacks would become
+ * ready to invoke.
+ */
+static inline bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp,
+ unsigned long seq)
+{
+ int i;
+
+ WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
+ WARN_ON_ONCE(rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL));
+
+ /*
+ * Find the segment preceding the oldest segment of callbacks
+ * whose ->gp_seq[] completion is at or after that passed in via
+ * "seq", skipping any empty segments. This oldest segment, along
+ * with any later segments, can be merged in with any newly arrived
+ * callbacks in the RCU_NEXT_TAIL segment, and assigned "seq"
+ * as their ->gp_seq[] grace-period completion sequence number.
+ */
+ for (i = RCU_NEXT_READY_TAIL; i > RCU_DONE_TAIL; i--)
+ if (rsclp->tails[i] != rsclp->tails[i - 1] &&
+ ULONG_CMP_LT(rsclp->gp_seq[i], seq))
+ break;
+
+ /*
+ * If all the segments contain callbacks that correspond to
+ * earlier grace-period sequence numbers than "seq", leave.
+ * Assuming that the rcu_segcblist structure has enough
+ * segments in its arrays, this can only happen if some of
+ * the non-done segments contain callbacks that really are
+ * ready to invoke. This situation will get straightened
+ * out by the next call to rcu_segcblist_advance().
+ *
+ * Also advance to the oldest segment of callbacks whose
+ * ->gp_seq[] completion is at or after that passed in via "seq",
+ * skipping any empty segments.
+ */
+ if (++i >= RCU_NEXT_TAIL)
+ return false;
+
+ /*
+ * Merge all later callbacks, including newly arrived callbacks,
+ * into the segment located by the for-loop above. Assign "seq"
+ * as the ->gp_seq[] value in order to correctly handle the case
+ * where there were no pending callbacks in the rcu_segcblist
+ * structure other than in the RCU_NEXT_TAIL segment.
+ */
+ for (; i < RCU_NEXT_TAIL; i++) {
+ rsclp->tails[i] = rsclp->tails[RCU_NEXT_TAIL];
+ rsclp->gp_seq[i] = seq;
+ }
+ return true;
+}
+
+/*
+ * Scan the specified rcu_segcblist structure for callbacks that need
+ * a grace period later than the one specified by "seq". We don't look
+ * at the RCU_DONE_TAIL or RCU_NEXT_TAIL segments because they don't
+ * have a grace-period sequence number.
+ */
+static inline bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
+ unsigned long seq)
+{
+ int i;
+
+ for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
+ if (rsclp->tails[i - 1] != rsclp->tails[i] &&
+ ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
+ return true;
+ return false;
+}
+
+/*
+ * Interim function to return rcu_segcblist head pointer. Longer term, the
+ * rcu_segcblist will be used more pervasively, removing the need for this
+ * function.
+ */
+static inline struct rcu_head *rcu_segcblist_head(struct rcu_segcblist *rsclp)
+{
+ return rsclp->head;
+}
+
+/*
+ * Interim function to return rcu_segcblist head pointer. Longer term, the
+ * rcu_segcblist will be used more pervasively, removing the need for this
+ * function.
+ */
+static inline struct rcu_head **rcu_segcblist_tail(struct rcu_segcblist *rsclp)
+{
+ WARN_ON_ONCE(rcu_segcblist_empty(rsclp));
+ return rsclp->tails[RCU_NEXT_TAIL];
+}
+
+#endif /* __KERNEL_RCU_SEGCBLIST_H */
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 047ac8c28a4e..ad154a7bc114 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -22,7 +22,7 @@
* Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
*
* For detailed explanation of Read-Copy Update mechanism see -
- * Documentation/RCU/ *.txt
+ * Documentation/RCU/ *.txt
*
*/

@@ -32,31 +32,20 @@
#include <linux/mutex.h>
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
+#include <linux/rcu_segcblist.h>

struct srcu_array {
unsigned long lock_count[2];
unsigned long unlock_count[2];
};

-struct rcu_batch {
- struct rcu_head *head, **tail;
-};
-
-#define RCU_BATCH_INIT(name) { NULL, &(name.head) }
-
struct srcu_struct {
unsigned long completed;
unsigned long srcu_gp_seq;
struct srcu_array __percpu *per_cpu_ref;
- spinlock_t queue_lock; /* protect ->batch_queue, ->running */
+ spinlock_t queue_lock; /* protect ->srcu_cblist, ->srcu_state */
int srcu_state;
- /* callbacks just queued */
- struct rcu_batch batch_queue;
- /* callbacks try to do the first check_zero */
- struct rcu_batch batch_check0;
- /* callbacks done with the first check_zero and the flip */
- struct rcu_batch batch_check1;
- struct rcu_batch batch_done;
+ struct rcu_segcblist srcu_cblist;
struct delayed_work work;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
@@ -97,10 +86,7 @@ void process_srcu(struct work_struct *work);
.per_cpu_ref = &name##_srcu_array, \
.queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \
.srcu_state = SRCU_STATE_IDLE, \
- .batch_queue = RCU_BATCH_INIT(name.batch_queue), \
- .batch_check0 = RCU_BATCH_INIT(name.batch_check0), \
- .batch_check1 = RCU_BATCH_INIT(name.batch_check1), \
- .batch_done = RCU_BATCH_INIT(name.batch_done), \
+ .srcu_cblist = RCU_SEGCBLIST_INITIALIZER(name.srcu_cblist),\
.work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\
__SRCU_DEP_MAP_INIT(name) \
}
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 0bc1313c49e2..a943b42a9cf7 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -87,6 +87,12 @@ static inline unsigned long rcu_seq_snap(unsigned long *sp)
return s;
}

+/* Return the current value the update side's sequence number, no ordering. */
+static inline unsigned long rcu_seq_current(unsigned long *sp)
+{
+ return READ_ONCE(*sp);
+}
+
/*
* Given a snapshot from rcu_seq_snap(), determine whether or not a
* full update-side operation has occurred.
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
deleted file mode 100644
index c32d13b368ac..000000000000
--- a/kernel/rcu/rcu_segcblist.h
+++ /dev/null
@@ -1,671 +0,0 @@
-/*
- * RCU segmented callback lists
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * Copyright IBM Corporation, 2017
- *
- * Authors: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
- */
-
-#ifndef __KERNEL_RCU_SEGCBLIST_H
-#define __KERNEL_RCU_SEGCBLIST_H
-
-/* Simple unsegmented callback lists. */
-struct rcu_cblist {
- struct rcu_head *head;
- struct rcu_head **tail;
- long len;
- long len_lazy;
-};
-
-#define RCU_CBLIST_INITIALIZER(n) { .head = NULL, .tail = &n.head }
-
-/* Initialize simple callback list. */
-static inline void rcu_cblist_init(struct rcu_cblist *rclp)
-{
- rclp->head = NULL;
- rclp->tail = &rclp->head;
- rclp->len = 0;
- rclp->len_lazy = 0;
-}
-
-/* Is simple callback list empty? */
-static inline bool rcu_cblist_empty(struct rcu_cblist *rclp)
-{
- return !rclp->head;
-}
-
-/* Return number of callbacks in simple callback list. */
-static inline long rcu_cblist_n_cbs(struct rcu_cblist *rclp)
-{
- return rclp->len;
-}
-
-/* Return number of lazy callbacks in simple callback list. */
-static inline long rcu_cblist_n_lazy_cbs(struct rcu_cblist *rclp)
-{
- return rclp->len_lazy;
-}
-
-/*
- * Debug function to actually count the number of callbacks.
- * If the number exceeds the limit specified, return -1.
- */
-static inline long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim)
-{
- int cnt = 0;
- struct rcu_head **rhpp = &rclp->head;
-
- for (;;) {
- if (!*rhpp)
- return cnt;
- if (++cnt > lim)
- return -1;
- rhpp = &(*rhpp)->next;
- }
-}
-
-/*
- * Dequeue the oldest rcu_head structure from the specified callback
- * list. This function assumes that the callback is non-lazy, but
- * the caller can later invoke rcu_cblist_dequeued_lazy() if it
- * finds otherwise (and if it cares about laziness). This allows
- * different users to have different ways of determining laziness.
- */
-static inline struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp)
-{
- struct rcu_head *rhp;
-
- rhp = rclp->head;
- if (!rhp)
- return NULL;
- prefetch(rhp);
- rclp->len--;
- rclp->head = rhp->next;
- if (!rclp->head)
- rclp->tail = &rclp->head;
- return rhp;
-}
-
-/*
- * Account for the fact that a previously dequeued callback turned out
- * to be marked as lazy.
- */
-static inline void rcu_cblist_dequeued_lazy(struct rcu_cblist *rclp)
-{
- rclp->len_lazy--;
-}
-
-/*
- * Interim function to return rcu_cblist head pointer. Longer term, the
- * rcu_cblist will be used more pervasively, removing the need for this
- * function.
- */
-static inline struct rcu_head *rcu_cblist_head(struct rcu_cblist *rclp)
-{
- return rclp->head;
-}
-
-/*
- * Interim function to return rcu_cblist head pointer. Longer term, the
- * rcu_cblist will be used more pervasively, removing the need for this
- * function.
- */
-static inline struct rcu_head **rcu_cblist_tail(struct rcu_cblist *rclp)
-{
- WARN_ON_ONCE(rcu_cblist_empty(rclp));
- return rclp->tail;
-}
-
-/* Complicated segmented callback lists. ;-) */
-
-/*
- * Index values for segments in rcu_segcblist structure.
- *
- * The segments are as follows:
- *
- * [head, *tails[RCU_DONE_TAIL]):
- * Callbacks whose grace period has elapsed, and thus can be invoked.
- * [*tails[RCU_DONE_TAIL], *tails[RCU_WAIT_TAIL]):
- * Callbacks waiting for the current GP from the current CPU's viewpoint.
- * [*tails[RCU_WAIT_TAIL], *tails[RCU_NEXT_READY_TAIL]):
- * Callbacks that arrived before the next GP started, again from
- * the current CPU's viewpoint. These can be handled by the next GP.
- * [*tails[RCU_NEXT_READY_TAIL], *tails[RCU_NEXT_TAIL]):
- * Callbacks that might have arrived after the next GP started.
- * There is some uncertainty as to when a given GP starts and
- * ends, but a CPU knows the exact times if it is the one starting
- * or ending the GP. Other CPUs know that the previous GP ends
- * before the next one starts.
- *
- * Note that RCU_WAIT_TAIL cannot be empty unless RCU_NEXT_READY_TAIL is also
- * empty.
- *
- * The ->gp_seq[] array contains the grace-period number at which the
- * corresponding segment of callbacks will be ready to invoke. A given
- * element of this array is meaningful only when the corresponding segment
- * is non-empty, and it is never valid for RCU_DONE_TAIL (whose callbacks
- * are already ready to invoke) or for RCU_NEXT_TAIL (whose callbacks have
- * not yet been assigned a grace-period number).
- */
-#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */
-#define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */
-#define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */
-#define RCU_NEXT_TAIL 3
-#define RCU_CBLIST_NSEGS 4
-
-struct rcu_segcblist {
- struct rcu_head *head;
- struct rcu_head **tails[RCU_CBLIST_NSEGS];
- unsigned long gp_seq[RCU_CBLIST_NSEGS];
- long len;
- long len_lazy;
-};
-
-/*
- * Initialize an rcu_segcblist structure.
- */
-static inline void rcu_segcblist_init(struct rcu_segcblist *rsclp)
-{
- int i;
-
- BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq));
- BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq));
- rsclp->head = NULL;
- for (i = 0; i < RCU_CBLIST_NSEGS; i++)
- rsclp->tails[i] = &rsclp->head;
- rsclp->len = 0;
- rsclp->len_lazy = 0;
-}
-
-/*
- * Is the specified rcu_segcblist structure empty?
- *
- * But careful! The fact that the ->head field is NULL does not
- * necessarily imply that there are no callbacks associated with
- * this structure. When callbacks are being invoked, they are
- * removed as a group. If callback invocation must be preempted,
- * the remaining callbacks will be added back to the list. Either
- * way, the counts are updated later.
- *
- * So it is often the case that rcu_segcblist_n_cbs() should be used
- * instead.
- */
-static inline bool rcu_segcblist_empty(struct rcu_segcblist *rsclp)
-{
- return !rsclp->head;
-}
-
-/* Return number of callbacks in segmented callback list. */
-static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp)
-{
- return READ_ONCE(rsclp->len);
-}
-
-/* Return number of lazy callbacks in segmented callback list. */
-static inline long rcu_segcblist_n_lazy_cbs(struct rcu_segcblist *rsclp)
-{
- return rsclp->len_lazy;
-}
-
-/* Return number of lazy callbacks in segmented callback list. */
-static inline long rcu_segcblist_n_nonlazy_cbs(struct rcu_segcblist *rsclp)
-{
- return rsclp->len - rsclp->len_lazy;
-}
-
-/*
- * Is the specified rcu_segcblist enabled, for example, not corresponding
- * to an offline or callback-offloaded CPU?
- */
-static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp)
-{
- return !!rsclp->tails[RCU_NEXT_TAIL];
-}
-
-/*
- * Disable the specified rcu_segcblist structure, so that callbacks can
- * no longer be posted to it. This structure must be empty.
- */
-static inline void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
-{
- WARN_ON_ONCE(!rcu_segcblist_empty(rsclp));
- WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp));
- WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp));
- rsclp->tails[RCU_NEXT_TAIL] = NULL;
-}
-
-/*
- * Is the specified segment of the specified rcu_segcblist structure
- * empty of callbacks?
- */
-static inline bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg)
-{
- if (seg == RCU_DONE_TAIL)
- return &rsclp->head == rsclp->tails[RCU_DONE_TAIL];
- return rsclp->tails[seg - 1] == rsclp->tails[seg];
-}
-
-/*
- * Are all segments following the specified segment of the specified
- * rcu_segcblist structure empty of callbacks? (The specified
- * segment might well contain callbacks.)
- */
-static inline bool rcu_segcblist_restempty(struct rcu_segcblist *rsclp, int seg)
-{
- return !*rsclp->tails[seg];
-}
-
-/*
- * Does the specified rcu_segcblist structure contain callbacks that
- * are ready to be invoked?
- */
-static inline bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp)
-{
- return rcu_segcblist_is_enabled(rsclp) &&
- &rsclp->head != rsclp->tails[RCU_DONE_TAIL];
-}
-
-/*
- * Does the specified rcu_segcblist structure contain callbacks that
- * are still pending, that is, not yet ready to be invoked?
- */
-static inline bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp)
-{
- return rcu_segcblist_is_enabled(rsclp) &&
- !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL);
-}
-
-/*
- * Dequeue and return the first ready-to-invoke callback. If there
- * are no ready-to-invoke callbacks, return NULL. Disables interrupts
- * to avoid interference. Does not protect from interference from other
- * CPUs or tasks.
- */
-static inline struct rcu_head *
-rcu_segcblist_dequeue(struct rcu_segcblist *rsclp)
-{
- unsigned long flags;
- int i;
- struct rcu_head *rhp;
-
- local_irq_save(flags);
- if (!rcu_segcblist_ready_cbs(rsclp)) {
- local_irq_restore(flags);
- return NULL;
- }
- rhp = rsclp->head;
- BUG_ON(!rhp);
- rsclp->head = rhp->next;
- for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) {
- if (rsclp->tails[i] != &rhp->next)
- break;
- rsclp->tails[i] = &rsclp->head;
- }
- smp_mb(); /* Dequeue before decrement for rcu_barrier(). */
- WRITE_ONCE(rsclp->len, rsclp->len - 1);
- local_irq_restore(flags);
- return rhp;
-}
-
-/*
- * Account for the fact that a previously dequeued callback turned out
- * to be marked as lazy.
- */
-static inline void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- rsclp->len_lazy--;
- local_irq_restore(flags);
-}
-
-/*
- * Return a pointer to the first callback in the specified rcu_segcblist
- * structure. This is useful for diagnostics.
- */
-static inline struct rcu_head *
-rcu_segcblist_first_cb(struct rcu_segcblist *rsclp)
-{
- if (rcu_segcblist_is_enabled(rsclp))
- return rsclp->head;
- return NULL;
-}
-
-/*
- * Return a pointer to the first pending callback in the specified
- * rcu_segcblist structure. This is useful just after posting a given
- * callback -- if that callback is the first pending callback, then
- * you cannot rely on someone else having already started up the required
- * grace period.
- */
-static inline struct rcu_head *
-rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp)
-{
- if (rcu_segcblist_is_enabled(rsclp))
- return *rsclp->tails[RCU_DONE_TAIL];
- return NULL;
-}
-
-/*
- * Does the specified rcu_segcblist structure contain callbacks that
- * have not yet been processed beyond having been posted, that is,
- * does it contain callbacks in its last segment?
- */
-static inline bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp)
-{
- return rcu_segcblist_is_enabled(rsclp) &&
- !rcu_segcblist_restempty(rsclp, RCU_NEXT_READY_TAIL);
-}
-
-/*
- * Enqueue the specified callback onto the specified rcu_segcblist
- * structure, updating accounting as needed. Note that the ->len
- * field may be accessed locklessly, hence the WRITE_ONCE().
- * The ->len field is used by rcu_barrier() and friends to determine
- * if it must post a callback on this structure, and it is OK
- * for rcu_barrier() to sometimes post callbacks needlessly, but
- * absolutely not OK for it to ever miss posting a callback.
- */
-static inline void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp,
- struct rcu_head *rhp, bool lazy)
-{
- WRITE_ONCE(rsclp->len, rsclp->len + 1); /* ->len sampled locklessly. */
- if (lazy)
- rsclp->len_lazy++;
- smp_mb(); /* Ensure counts are updated before callback is enqueued. */
- rhp->next = NULL;
- *rsclp->tails[RCU_NEXT_TAIL] = rhp;
- rsclp->tails[RCU_NEXT_TAIL] = &rhp->next;
-}
-
-/*
- * Extract only the counts from the specified rcu_segcblist structure,
- * and place them in the specified rcu_cblist structure. This function
- * supports both callback orphaning and invocation, hence the separation
- * of counts and callbacks. (Callbacks ready for invocation must be
- * orphaned and adopted separately from pending callbacks, but counts
- * apply to all callbacks. Locking must be used to make sure that
- * both orphaned-callbacks lists are consistent.)
- */
-static inline void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp,
- struct rcu_cblist *rclp)
-{
- rclp->len_lazy += rsclp->len_lazy;
- rclp->len += rsclp->len;
- rsclp->len_lazy = 0;
- WRITE_ONCE(rsclp->len, 0); /* ->len sampled locklessly. */
-}
-
-/*
- * Extract only those callbacks ready to be invoked from the specified
- * rcu_segcblist structure and place them in the specified rcu_cblist
- * structure.
- */
-static inline void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp,
- struct rcu_cblist *rclp)
-{
- int i;
-
- if (!rcu_segcblist_ready_cbs(rsclp))
- return; /* Nothing to do. */
- *rclp->tail = rsclp->head;
- rsclp->head = *rsclp->tails[RCU_DONE_TAIL];
- *rsclp->tails[RCU_DONE_TAIL] = NULL;
- rclp->tail = rsclp->tails[RCU_DONE_TAIL];
- for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--)
- if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL])
- rsclp->tails[i] = &rsclp->head;
-}
-
-/*
- * Extract only those callbacks still pending (not yet ready to be
- * invoked) from the specified rcu_segcblist structure and place them in
- * the specified rcu_cblist structure. Note that this loses information
- * about any callbacks that might have been partway done waiting for
- * their grace period. Too bad! They will have to start over.
- */
-static inline void
-rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp,
- struct rcu_cblist *rclp)
-{
- int i;
-
- if (!rcu_segcblist_pend_cbs(rsclp))
- return; /* Nothing to do. */
- *rclp->tail = *rsclp->tails[RCU_DONE_TAIL];
- rclp->tail = rsclp->tails[RCU_NEXT_TAIL];
- *rsclp->tails[RCU_DONE_TAIL] = NULL;
- for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++)
- rsclp->tails[i] = rsclp->tails[RCU_DONE_TAIL];
-}
-
-/*
- * Move the entire contents of the specified rcu_segcblist structure,
- * counts, callbacks, and all, to the specified rcu_cblist structure.
- * @@@ Why do we need this??? Moving early-boot CBs to NOCB lists?
- * @@@ Memory barrier needed? (Not if only used at boot time...)
- */
-static inline void rcu_segcblist_extract_all(struct rcu_segcblist *rsclp,
- struct rcu_cblist *rclp)
-{
- rcu_segcblist_extract_done_cbs(rsclp, rclp);
- rcu_segcblist_extract_pend_cbs(rsclp, rclp);
- rcu_segcblist_extract_count(rsclp, rclp);
-}
-
-/*
- * Insert counts from the specified rcu_cblist structure in the
- * specified rcu_segcblist structure.
- */
-static inline void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp,
- struct rcu_cblist *rclp)
-{
-
- rsclp->len_lazy += rclp->len_lazy;
- /* ->len sampled locklessly. */
- WRITE_ONCE(rsclp->len, rsclp->len + rclp->len);
- rclp->len_lazy = 0;
- rclp->len = 0;
-}
-
-/*
- * Move callbacks from the specified rcu_cblist to the beginning of the
- * done-callbacks segment of the specified rcu_segcblist.
- */
-static inline void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp,
- struct rcu_cblist *rclp)
-{
- int i;
-
- if (!rclp->head)
- return; /* No callbacks to move. */
- *rclp->tail = rsclp->head;
- rsclp->head = rclp->head;
- for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++)
- if (&rsclp->head == rsclp->tails[i])
- rsclp->tails[i] = rclp->tail;
- else
- break;
- rclp->head = NULL;
- rclp->tail = &rclp->head;
-}
-
-/*
- * Move callbacks from the specified rcu_cblist to the end of the
- * new-callbacks segment of the specified rcu_segcblist.
- */
-static inline void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,
- struct rcu_cblist *rclp)
-{
- if (!rclp->head)
- return; /* Nothing to do. */
- *rsclp->tails[RCU_NEXT_TAIL] = rclp->head;
- rsclp->tails[RCU_NEXT_TAIL] = rclp->tail;
- rclp->head = NULL;
- rclp->tail = &rclp->head;
-}
-
-/*
- * Advance the callbacks in the specified rcu_segcblist structure based
- * on the current value passed in for the grace-period counter.
- */
-static inline void rcu_segcblist_advance(struct rcu_segcblist *rsclp,
- unsigned long seq)
-{
- int i, j;
-
- WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
- WARN_ON_ONCE(rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL));
-
- /*
- * Find all callbacks whose ->gp_seq numbers indicate that they
- * are ready to invoke, and put them into the RCU_DONE_TAIL segment.
- */
- for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
- if (ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
- break;
- rsclp->tails[RCU_DONE_TAIL] = rsclp->tails[i];
- }
-
- /* If no callbacks moved, nothing more need be done. */
- if (i == RCU_WAIT_TAIL)
- return;
-
- /* Clean up tail pointers that might have been misordered above. */
- for (j = RCU_WAIT_TAIL; j < i; j++)
- rsclp->tails[j] = rsclp->tails[RCU_DONE_TAIL];
-
- /*
- * Callbacks moved, so clean up the misordered ->tails[] pointers
- * that now point into the middle of the list of ready-to-invoke
- * callbacks. The overall effect is to copy down the later pointers
- * into the gap that was created by the now-ready segments.
- */
- for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
- if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL])
- break; /* No more callbacks. */
- rsclp->tails[j] = rsclp->tails[i];
- rsclp->gp_seq[j] = rsclp->gp_seq[i];
- }
-}
-
-/*
- * "Accelerate" callbacks based on more-accurate grace-period information.
- * The reason for this is that RCU does not synchronize the beginnings and
- * ends of grace periods, and that callbacks are posted locally. This in
- * turn means that the callbacks must be labelled conservatively early
- * on, as getting exact information would degrade both performance and
- * scalability. When more accurate grace-period information becomes
- * available, previously posted callbacks can be "accelerated", marking
- * them to complete at the end of the earlier grace period.
- *
- * This function operates on an rcu_segcblist structure, and also the
- * grace-period sequence number at which new callbacks would become
- * ready to invoke.
- */
-static inline bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp,
- unsigned long seq)
-{
- int i;
-
- WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
- WARN_ON_ONCE(rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL));
-
- /*
- * Find the segment preceding the oldest segment of callbacks
- * whose ->gp_seq[] completion is at or after that passed in via
- * "seq", skipping any empty segments. This oldest segment, along
- * with any later segments, can be merged in with any newly arrived
- * callbacks in the RCU_NEXT_TAIL segment, and assigned "seq"
- * as their ->gp_seq[] grace-period completion sequence number.
- */
- for (i = RCU_NEXT_READY_TAIL; i > RCU_DONE_TAIL; i--)
- if (rsclp->tails[i] != rsclp->tails[i - 1] &&
- ULONG_CMP_LT(rsclp->gp_seq[i], seq))
- break;
-
- /*
- * If all the segments contain callbacks that correspond to
- * earlier grace-period sequence numbers than "seq", leave.
- * Assuming that the rcu_segcblist structure has enough
- * segments in its arrays, this can only happen if some of
- * the non-done segments contain callbacks that really are
- * ready to invoke. This situation will get straightened
- * out by the next call to rcu_segcblist_advance().
- *
- * Also advance to the oldest segment of callbacks whose
- * ->gp_seq[] completion is at or after that passed in via "seq",
- * skipping any empty segments.
- */
- if (++i >= RCU_NEXT_TAIL)
- return false;
-
- /*
- * Merge all later callbacks, including newly arrived callbacks,
- * into the segment located by the for-loop above. Assign "seq"
- * as the ->gp_seq[] value in order to correctly handle the case
- * where there were no pending callbacks in the rcu_segcblist
- * structure other than in the RCU_NEXT_TAIL segment.
- */
- for (; i < RCU_NEXT_TAIL; i++) {
- rsclp->tails[i] = rsclp->tails[RCU_NEXT_TAIL];
- rsclp->gp_seq[i] = seq;
- }
- return true;
-}
-
-/*
- * Scan the specified rcu_segcblist structure for callbacks that need
- * a grace period later than the one specified by "seq". We don't look
- * at the RCU_DONE_TAIL or RCU_NEXT_TAIL segments because they don't
- * have a grace-period sequence number.
- */
-static inline bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
- unsigned long seq)
-{
- int i;
-
- for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
- if (rsclp->tails[i - 1] != rsclp->tails[i] &&
- ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
- return true;
- return false;
-}
-
-/*
- * Interim function to return rcu_segcblist head pointer. Longer term, the
- * rcu_segcblist will be used more pervasively, removing the need for this
- * function.
- */
-static inline struct rcu_head *rcu_segcblist_head(struct rcu_segcblist *rsclp)
-{
- return rsclp->head;
-}
-
-/*
- * Interim function to return rcu_segcblist head pointer. Longer term, the
- * rcu_segcblist will be used more pervasively, removing the need for this
- * function.
- */
-static inline struct rcu_head **rcu_segcblist_tail(struct rcu_segcblist *rsclp)
-{
- WARN_ON_ONCE(rcu_segcblist_empty(rsclp));
- return rsclp->tails[RCU_NEXT_TAIL];
-}
-
-#endif /* __KERNEL_RCU_SEGCBLIST_H */
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index ed7c3d082b9f..132cd57c07f1 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -22,7 +22,7 @@
* Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
*
* For detailed explanation of Read-Copy Update mechanism see -
- * Documentation/RCU/ *.txt
+ * Documentation/RCU/ *.txt
*
*/

@@ -38,85 +38,13 @@

#include "rcu.h"

-/*
- * Initialize an rcu_batch structure to empty.
- */
-static inline void rcu_batch_init(struct rcu_batch *b)
-{
- b->head = NULL;
- b->tail = &b->head;
-}
-
-/*
- * Enqueue a callback onto the tail of the specified rcu_batch structure.
- */
-static inline void rcu_batch_queue(struct rcu_batch *b, struct rcu_head *head)
-{
- *b->tail = head;
- b->tail = &head->next;
-}
-
-/*
- * Is the specified rcu_batch structure empty?
- */
-static inline bool rcu_batch_empty(struct rcu_batch *b)
-{
- return b->tail == &b->head;
-}
-
-/*
- * Are all batches empty for the specified srcu_struct?
- */
-static inline bool rcu_all_batches_empty(struct srcu_struct *sp)
-{
- return rcu_batch_empty(&sp->batch_done) &&
- rcu_batch_empty(&sp->batch_check1) &&
- rcu_batch_empty(&sp->batch_check0) &&
- rcu_batch_empty(&sp->batch_queue);
-}
-
-/*
- * Remove the callback at the head of the specified rcu_batch structure
- * and return a pointer to it, or return NULL if the structure is empty.
- */
-static inline struct rcu_head *rcu_batch_dequeue(struct rcu_batch *b)
-{
- struct rcu_head *head;
-
- if (rcu_batch_empty(b))
- return NULL;
-
- head = b->head;
- b->head = head->next;
- if (b->tail == &head->next)
- rcu_batch_init(b);
-
- return head;
-}
-
-/*
- * Move all callbacks from the rcu_batch structure specified by "from" to
- * the structure specified by "to".
- */
-static inline void rcu_batch_move(struct rcu_batch *to, struct rcu_batch *from)
-{
- if (!rcu_batch_empty(from)) {
- *to->tail = from->head;
- to->tail = from->tail;
- rcu_batch_init(from);
- }
-}
-
static int init_srcu_struct_fields(struct srcu_struct *sp)
{
sp->completed = 0;
sp->srcu_gp_seq = 0;
spin_lock_init(&sp->queue_lock);
sp->srcu_state = SRCU_STATE_IDLE;
- rcu_batch_init(&sp->batch_queue);
- rcu_batch_init(&sp->batch_check0);
- rcu_batch_init(&sp->batch_check1);
- rcu_batch_init(&sp->batch_done);
+ rcu_segcblist_init(&sp->srcu_cblist);
INIT_DELAYED_WORK(&sp->work, process_srcu);
sp->per_cpu_ref = alloc_percpu(struct srcu_array);
return sp->per_cpu_ref ? 0 : -ENOMEM;
@@ -262,7 +190,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp)
{
if (WARN_ON(srcu_readers_active(sp)))
return; /* Leakage unless caller handles error. */
- if (WARN_ON(!rcu_all_batches_empty(sp)))
+ if (WARN_ON(!rcu_segcblist_empty(&sp->srcu_cblist)))
return; /* Leakage unless caller handles error. */
flush_delayed_work(&sp->work);
if (WARN_ON(READ_ONCE(sp->srcu_state) != SRCU_STATE_IDLE))
@@ -318,6 +246,8 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
*/
static void srcu_gp_start(struct srcu_struct *sp)
{
+ rcu_segcblist_accelerate(&sp->srcu_cblist,
+ rcu_seq_snap(&sp->srcu_gp_seq));
WRITE_ONCE(sp->srcu_state, SRCU_STATE_SCAN1);
rcu_seq_start(&sp->srcu_gp_seq);
}
@@ -365,6 +295,11 @@ static void srcu_gp_end(struct srcu_struct *sp)
{
rcu_seq_end(&sp->srcu_gp_seq);
WRITE_ONCE(sp->srcu_state, SRCU_STATE_DONE);
+
+ spin_lock_irq(&sp->queue_lock);
+ rcu_segcblist_advance(&sp->srcu_cblist,
+ rcu_seq_current(&sp->srcu_gp_seq));
+ spin_unlock_irq(&sp->queue_lock);
}

/*
@@ -403,7 +338,7 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
head->func = func;
spin_lock_irqsave(&sp->queue_lock, flags);
smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
- rcu_batch_queue(&sp->batch_queue, head);
+ rcu_segcblist_enqueue(&sp->srcu_cblist, head, false);
if (READ_ONCE(sp->srcu_state) == SRCU_STATE_IDLE) {
srcu_gp_start(sp);
queue_delayed_work(system_power_efficient_wq, &sp->work, 0);
@@ -439,13 +374,13 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
if (READ_ONCE(sp->srcu_state) == SRCU_STATE_IDLE) {
/* steal the processing owner */
+ rcu_segcblist_enqueue(&sp->srcu_cblist, head, false);
srcu_gp_start(sp);
- rcu_batch_queue(&sp->batch_check0, head);
spin_unlock_irq(&sp->queue_lock);
/* give the processing owner to work_struct */
srcu_reschedule(sp, 0);
} else {
- rcu_batch_queue(&sp->batch_queue, head);
+ rcu_segcblist_enqueue(&sp->srcu_cblist, head, false);
spin_unlock_irq(&sp->queue_lock);
}

@@ -543,19 +478,6 @@ EXPORT_SYMBOL_GPL(srcu_batches_completed);
#define SRCU_INTERVAL 1

/*
- * Move any new SRCU callbacks to the first stage of the SRCU grace
- * period pipeline.
- */
-static void srcu_collect_new(struct srcu_struct *sp)
-{
- if (!rcu_batch_empty(&sp->batch_queue)) {
- spin_lock_irq(&sp->queue_lock);
- rcu_batch_move(&sp->batch_check0, &sp->batch_queue);
- spin_unlock_irq(&sp->queue_lock);
- }
-}
-
-/*
* Core SRCU state machine. Advance callbacks from ->batch_check0 to
* ->batch_check1 and then to ->batch_done as readers drain.
*/
@@ -580,26 +502,7 @@ static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
idx = 1 ^ (sp->completed & 1);
if (!try_check_zero(sp, idx, trycount))
return; /* readers present, retry after SRCU_INTERVAL */
-
- /*
- * The callbacks in ->batch_check1 have already done
- * with their first zero check and flip back when they were
- * enqueued on ->batch_check0 in a previous invocation of
- * srcu_advance_batches(). (Presumably try_check_zero()
- * returned false during that invocation, leaving the
- * callbacks stranded on ->batch_check1.) They are therefore
- * ready to invoke, so move them to ->batch_done.
- */
- rcu_batch_move(&sp->batch_done, &sp->batch_check1);
srcu_flip(sp);
-
- /*
- * The callbacks in ->batch_check0 just finished their
- * first check zero and flip, so move them to ->batch_check1
- * for future checking on the other idx.
- */
- rcu_batch_move(&sp->batch_check1, &sp->batch_check0);
-
WRITE_ONCE(sp->srcu_state, SRCU_STATE_SCAN2);
}

@@ -613,14 +516,6 @@ static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
trycount = trycount < 2 ? 2 : trycount;
if (!try_check_zero(sp, idx, trycount))
return; /* readers present, retry after SRCU_INTERVAL */
-
- /*
- * The callbacks in ->batch_check1 have now waited for
- * all pre-existing readers using both idx values. They are
- * therefore ready to invoke, so move them to ->batch_done.
- */
- rcu_batch_move(&sp->batch_done, &sp->batch_check1);
-
srcu_gp_end(sp);
}
}
@@ -633,17 +528,26 @@ static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
*/
static void srcu_invoke_callbacks(struct srcu_struct *sp)
{
- int i;
- struct rcu_head *head;
+ struct rcu_cblist ready_cbs;
+ struct rcu_head *rhp;

- for (i = 0; i < SRCU_CALLBACK_BATCH; i++) {
- head = rcu_batch_dequeue(&sp->batch_done);
- if (!head)
- break;
+ spin_lock_irq(&sp->queue_lock);
+ if (!rcu_segcblist_ready_cbs(&sp->srcu_cblist)) {
+ spin_unlock_irq(&sp->queue_lock);
+ return;
+ }
+ rcu_cblist_init(&ready_cbs);
+ rcu_segcblist_extract_done_cbs(&sp->srcu_cblist, &ready_cbs);
+ spin_unlock_irq(&sp->queue_lock);
+ rhp = rcu_cblist_dequeue(&ready_cbs);
+ for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
local_bh_disable();
- head->func(head);
+ rhp->func(rhp);
local_bh_enable();
}
+ spin_lock_irq(&sp->queue_lock);
+ rcu_segcblist_insert_count(&sp->srcu_cblist, &ready_cbs);
+ spin_unlock_irq(&sp->queue_lock);
}

/*
@@ -654,9 +558,9 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
{
bool pending = true;

- if (rcu_all_batches_empty(sp)) {
+ if (rcu_segcblist_empty(&sp->srcu_cblist)) {
spin_lock_irq(&sp->queue_lock);
- if (rcu_all_batches_empty(sp) &&
+ if (rcu_segcblist_empty(&sp->srcu_cblist) &&
READ_ONCE(sp->srcu_state) == SRCU_STATE_DONE) {
WRITE_ONCE(sp->srcu_state, SRCU_STATE_IDLE);
pending = false;
@@ -677,7 +581,6 @@ void process_srcu(struct work_struct *work)

sp = container_of(work, struct srcu_struct, work.work);

- srcu_collect_new(sp);
srcu_advance_batches(sp, 1);
srcu_invoke_callbacks(sp);
srcu_reschedule(sp, SRCU_INTERVAL);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 93889ff21dbb..4f62651588ea 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -30,7 +30,7 @@
#include <linux/seqlock.h>
#include <linux/swait.h>
#include <linux/stop_machine.h>
-#include "rcu_segcblist.h"
+#include <linux/rcu_segcblist.h>

/*
* Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
--
2.5.2