[PATCH 1/9] locking/pvqspinlock: Code relocation and extraction

From: Waiman Long
Date: Tue Jul 07 2020 - 20:46:51 EST

Next message: Mimi Zohar: "Re: [PATCH 4/4] module: Add hook for security_kernel_post_read_file()"
Previous message: Chris Healy: "Re: [PATCH v2] arm64: dts: zii-ultra: update MDIO speed and preamble device"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

Move pv_kick_node() and the unlock functions up and extract out the hash
and lock code from pv_wait_head_or_lock() into pv_hash_lock(). There
is no functional change.

Signed-off-by: Waiman Long <longman@xxxxxxxxxx>
---
kernel/locking/qspinlock_paravirt.h | 302 ++++++++++++++--------------
1 file changed, 156 insertions(+), 146 deletions(-)

diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index e84d21aa0722..8eec58320b85 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -55,6 +55,7 @@ struct pv_node {

/*
* Hybrid PV queued/unfair lock
+ * ----------------------------
*
* By replacing the regular queued_spin_trylock() with the function below,
* it will be called once when a lock waiter enter the PV slowpath before
@@ -259,6 +260,156 @@ static struct pv_node *pv_unhash(struct qspinlock *lock)
BUG();
}

+/*
+ * Insert lock into hash and set _Q_SLOW_VAL.
+ * Return true if lock acquired.
+ */
+static inline bool pv_hash_lock(struct qspinlock *lock, struct pv_node *node)
+{
+ struct qspinlock **lp = pv_hash(lock, node);
+
+ /*
+ * We must hash before setting _Q_SLOW_VAL, such that
+ * when we observe _Q_SLOW_VAL in __pv_queued_spin_unlock()
+ * we'll be sure to be able to observe our hash entry.
+ *
+ * [S] <hash> [Rmw] l->locked == _Q_SLOW_VAL
+ * MB RMB
+ * [RmW] l->locked = _Q_SLOW_VAL [L] <unhash>
+ *
+ * Matches the smp_rmb() in __pv_queued_spin_unlock().
+ */
+ if (xchg(&lock->locked, _Q_SLOW_VAL) == 0) {
+ /*
+ * The lock was free and now we own the lock.
+ * Change the lock value back to _Q_LOCKED_VAL
+ * and unhash the table.
+ */
+ WRITE_ONCE(lock->locked, _Q_LOCKED_VAL);
+ WRITE_ONCE(*lp, NULL);
+ return true;
+ }
+ return false;
+}
+
+/*
+ * Called after setting next->locked = 1 when we're the lock owner.
+ *
+ * Instead of waking the waiters stuck in pv_wait_node() advance their state
+ * such that they're waiting in pv_wait_head_or_lock(), this avoids a
+ * wake/sleep cycle.
+ */
+static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
+{
+ struct pv_node *pn = (struct pv_node *)node;
+
+ /*
+ * If the vCPU is indeed halted, advance its state to match that of
+ * pv_wait_node(). If OTOH this fails, the vCPU was running and will
+ * observe its next->locked value and advance itself.
+ *
+ * Matches with smp_store_mb() and cmpxchg() in pv_wait_node()
+ *
+ * The write to next->locked in arch_mcs_spin_unlock_contended()
+ * must be ordered before the read of pn->state in the cmpxchg()
+ * below for the code to work correctly. To guarantee full ordering
+ * irrespective of the success or failure of the cmpxchg(),
+ * a relaxed version with explicit barrier is used. The control
+ * dependency will order the reading of pn->state before any
+ * subsequent writes.
+ */
+ smp_mb__before_atomic();
+ if (cmpxchg_relaxed(&pn->state, vcpu_halted, vcpu_hashed)
+ != vcpu_halted)
+ return;
+
+ /*
+ * Put the lock into the hash table and set the _Q_SLOW_VAL.
+ *
+ * As this is the same vCPU that will check the _Q_SLOW_VAL value and
+ * the hash table later on at unlock time, no atomic instruction is
+ * needed.
+ */
+ WRITE_ONCE(lock->locked, _Q_SLOW_VAL);
+ (void)pv_hash(lock, pn);
+}
+
+/*
+ * PV versions of the unlock fastpath and slowpath functions to be used
+ * instead of queued_spin_unlock().
+ */
+__visible void
+__pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
+{
+ struct pv_node *node;
+
+ if (unlikely(locked != _Q_SLOW_VAL)) {
+ WARN(!debug_locks_silent,
+ "pvqspinlock: lock 0x%lx has corrupted value 0x%x!\n",
+ (unsigned long)lock, atomic_read(&lock->val));
+ return;
+ }
+
+ /*
+ * A failed cmpxchg doesn't provide any memory-ordering guarantees,
+ * so we need a barrier to order the read of the node data in
+ * pv_unhash *after* we've read the lock being _Q_SLOW_VAL.
+ *
+ * Matches the cmpxchg() in pv_wait_head_or_lock() setting _Q_SLOW_VAL.
+ */
+ smp_rmb();
+
+ /*
+ * Since the above failed to release, this must be the SLOW path.
+ * Therefore start by looking up the blocked node and unhashing it.
+ */
+ node = pv_unhash(lock);
+
+ /*
+ * Now that we have a reference to the (likely) blocked pv_node,
+ * release the lock.
+ */
+ smp_store_release(&lock->locked, 0);
+
+ /*
+ * At this point the memory pointed at by lock can be freed/reused,
+ * however we can still use the pv_node to kick the CPU.
+ * The other vCPU may not really be halted, but kicking an active
+ * vCPU is harmless other than the additional latency in completing
+ * the unlock.
+ */
+ lockevent_inc(pv_kick_unlock);
+ pv_kick(node->cpu);
+}
+
+/*
+ * Include the architecture specific callee-save thunk of the
+ * __pv_queued_spin_unlock(). This thunk is put together with
+ * __pv_queued_spin_unlock() to make the callee-save thunk and the real unlock
+ * function close to each other sharing consecutive instruction cachelines.
+ * Alternatively, architecture specific version of __pv_queued_spin_unlock()
+ * can be defined.
+ */
+#include <asm/qspinlock_paravirt.h>
+
+#ifndef __pv_queued_spin_unlock
+__visible void __pv_queued_spin_unlock(struct qspinlock *lock)
+{
+ u8 locked;
+
+ /*
+ * We must not unlock if SLOW, because in that case we must first
+ * unhash. Otherwise it would be possible to have multiple @lock
+ * entries, which would be BAD.
+ */
+ locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
+ if (likely(locked == _Q_LOCKED_VAL))
+ return;
+
+ __pv_queued_spin_unlock_slowpath(lock, locked);
+}
+#endif /* __pv_queued_spin_unlock */
+
/*
* Return true if when it is time to check the previous node which is not
* in a running state.
@@ -350,48 +501,6 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
*/
}

-/*
- * Called after setting next->locked = 1 when we're the lock owner.
- *
- * Instead of waking the waiters stuck in pv_wait_node() advance their state
- * such that they're waiting in pv_wait_head_or_lock(), this avoids a
- * wake/sleep cycle.
- */
-static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
-{
- struct pv_node *pn = (struct pv_node *)node;
-
- /*
- * If the vCPU is indeed halted, advance its state to match that of
- * pv_wait_node(). If OTOH this fails, the vCPU was running and will
- * observe its next->locked value and advance itself.
- *
- * Matches with smp_store_mb() and cmpxchg() in pv_wait_node()
- *
- * The write to next->locked in arch_mcs_spin_unlock_contended()
- * must be ordered before the read of pn->state in the cmpxchg()
- * below for the code to work correctly. To guarantee full ordering
- * irrespective of the success or failure of the cmpxchg(),
- * a relaxed version with explicit barrier is used. The control
- * dependency will order the reading of pn->state before any
- * subsequent writes.
- */
- smp_mb__before_atomic();
- if (cmpxchg_relaxed(&pn->state, vcpu_halted, vcpu_hashed)
- != vcpu_halted)
- return;
-
- /*
- * Put the lock into the hash table and set the _Q_SLOW_VAL.
- *
- * As this is the same vCPU that will check the _Q_SLOW_VAL value and
- * the hash table later on at unlock time, no atomic instruction is
- * needed.
- */
- WRITE_ONCE(lock->locked, _Q_SLOW_VAL);
- (void)pv_hash(lock, pn);
-}
-
/*
* Wait for l->locked to become clear and acquire the lock;
* halt the vcpu after a short spin.
@@ -403,16 +512,13 @@ static u32
pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
{
struct pv_node *pn = (struct pv_node *)node;
- struct qspinlock **lp = NULL;
int waitcnt = 0;
int loop;
-
/*
- * If pv_kick_node() already advanced our state, we don't need to
+ * If pv_kick_node() had already advanced our state, we don't need to
* insert ourselves into the hash table anymore.
*/
- if (READ_ONCE(pn->state) == vcpu_hashed)
- lp = (struct qspinlock **)1;
+ bool hashed = (READ_ONCE(pn->state) == vcpu_hashed);

/*
* Tracking # of slowpath locking operations
@@ -439,30 +545,10 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
clear_pending(lock);

- if (!lp) { /* ONCE */
- lp = pv_hash(lock, pn);
-
- /*
- * We must hash before setting _Q_SLOW_VAL, such that
- * when we observe _Q_SLOW_VAL in __pv_queued_spin_unlock()
- * we'll be sure to be able to observe our hash entry.
- *
- * [S] <hash> [Rmw] l->locked == _Q_SLOW_VAL
- * MB RMB
- * [RmW] l->locked = _Q_SLOW_VAL [L] <unhash>
- *
- * Matches the smp_rmb() in __pv_queued_spin_unlock().
- */
- if (xchg(&lock->locked, _Q_SLOW_VAL) == 0) {
- /*
- * The lock was free and now we own the lock.
- * Change the lock value back to _Q_LOCKED_VAL
- * and unhash the table.
- */
- WRITE_ONCE(lock->locked, _Q_LOCKED_VAL);
- WRITE_ONCE(*lp, NULL);
+ if (!hashed) { /* ONCE */
+ if (pv_hash_lock(lock, pn))
goto gotlock;
- }
+ hashed = true;
}
WRITE_ONCE(pn->state, vcpu_hashed);
lockevent_inc(pv_wait_head);
@@ -484,79 +570,3 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
gotlock:
return (u32)(atomic_read(&lock->val) | _Q_LOCKED_VAL);
}
-
-/*
- * PV versions of the unlock fastpath and slowpath functions to be used
- * instead of queued_spin_unlock().
- */
-__visible void
-__pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
-{
- struct pv_node *node;
-
- if (unlikely(locked != _Q_SLOW_VAL)) {
- WARN(!debug_locks_silent,
- "pvqspinlock: lock 0x%lx has corrupted value 0x%x!\n",
- (unsigned long)lock, atomic_read(&lock->val));
- return;
- }
-
- /*
- * A failed cmpxchg doesn't provide any memory-ordering guarantees,
- * so we need a barrier to order the read of the node data in
- * pv_unhash *after* we've read the lock being _Q_SLOW_VAL.
- *
- * Matches the cmpxchg() in pv_wait_head_or_lock() setting _Q_SLOW_VAL.
- */
- smp_rmb();
-
- /*
- * Since the above failed to release, this must be the SLOW path.
- * Therefore start by looking up the blocked node and unhashing it.
- */
- node = pv_unhash(lock);
-
- /*
- * Now that we have a reference to the (likely) blocked pv_node,
- * release the lock.
- */
- smp_store_release(&lock->locked, 0);
-
- /*
- * At this point the memory pointed at by lock can be freed/reused,
- * however we can still use the pv_node to kick the CPU.
- * The other vCPU may not really be halted, but kicking an active
- * vCPU is harmless other than the additional latency in completing
- * the unlock.
- */
- lockevent_inc(pv_kick_unlock);
- pv_kick(node->cpu);
-}
-
-/*
- * Include the architecture specific callee-save thunk of the
- * __pv_queued_spin_unlock(). This thunk is put together with
- * __pv_queued_spin_unlock() to make the callee-save thunk and the real unlock
- * function close to each other sharing consecutive instruction cachelines.
- * Alternatively, architecture specific version of __pv_queued_spin_unlock()
- * can be defined.
- */
-#include <asm/qspinlock_paravirt.h>
-
-#ifndef __pv_queued_spin_unlock
-__visible void __pv_queued_spin_unlock(struct qspinlock *lock)
-{
- u8 locked;
-
- /*
- * We must not unlock if SLOW, because in that case we must first
- * unhash. Otherwise it would be possible to have multiple @lock
- * entries, which would be BAD.
- */
- locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
- if (likely(locked == _Q_LOCKED_VAL))
- return;
-
- __pv_queued_spin_unlock_slowpath(lock, locked);
-}
-#endif /* __pv_queued_spin_unlock */
--
2.18.1

--------------40473F93CD0C6D6973E234D5
Content-Type: text/x-patch; charset=UTF-8;
name="0002-locking-pvqspinlock-Introduce-CONFIG_PARAVIRT_QSPINL.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
filename*0="0002-locking-pvqspinlock-Introduce-CONFIG_PARAVIRT_QSPINL.pa";
filename*1="tch"

Next message: Mimi Zohar: "Re: [PATCH 4/4] module: Add hook for security_kernel_post_read_file()"
Previous message: Chris Healy: "Re: [PATCH v2] arm64: dts: zii-ultra: update MDIO speed and preamble device"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]