Re: Crash with PREEMPT_RT on aarch64 machine

From: Mel Gorman
Date: Thu Dec 01 2022 - 12:09:55 EST


On Wed, Nov 30, 2022 at 08:22:10PM +0000, Mel Gorman wrote:
> > extern void rt_mutex_base_init(struct rt_mutex_base *rtb);
> > diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
> > index 7779ee8abc2a0..e3cc673e0c988 100644
> > --- a/kernel/locking/rtmutex.c
> > +++ b/kernel/locking/rtmutex.c
> > @@ -97,7 +97,7 @@ rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
> > if (rt_mutex_has_waiters(lock))
> > val |= RT_MUTEX_HAS_WAITERS;
> >
> > - WRITE_ONCE(lock->owner, (struct task_struct *)val);
> > + WARN_ON_ONCE(cmpxchg_acquire(&lock->owner, RT_MUTEX_HAS_WAITERS, val) != RT_MUTEX_HAS_WAITERS);
> > }
> >
> > static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
> > @@ -106,6 +106,17 @@ static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
> > ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
> > }
> >
> > +static __always_inline void
> > +rt_mutex_set_owner_pi(struct rt_mutex_base *lock, struct task_struct *owner)
> > +{
>
> What does pi mean in this context? I think the naming here might
> misleading. rt_mutex_set_owner_pi is used when initialising and when
> clearing the owner. rt_mutex_set_owner is set when acquiring the lock.
>
> Consider renaming rt_mutex_set_owner_pi to rt_mutex_clear_owner. The init
> could still use rt_mutex_set_owner as an extra barrier is not a big deal
> during init if the straight assignment was unpopular. The init could also
> do a plain assignment because it cannot have any waiters yet.
>
> What is less obvious is if rt_mutex_clear_owner should have explicit release
> semantics to pair with rt_mutex_set_owner. It looks like it might not
> matter because at least some paths end up having release semantics anyway
> due to a spinlock but I didn't check all cases and it's potentially fragile.
>

There are not release semantics so, this? It passed 1 iteration but will
leave it running 10 times overnight

---
kernel/locking/rtmutex.c | 48 +++++++++++++++++++++++++++++++++++---------
kernel/locking/rtmutex_api.c | 6 +++---
2 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 7779ee8abc2a..35212f260148 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -89,15 +89,33 @@ static inline int __ww_mutex_check_kill(struct rt_mutex *lock,
* set this bit before looking at the lock.
*/

-static __always_inline void
-rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
+static __always_inline struct task_struct *
+rt_mutex_owner_encode(struct rt_mutex_base *lock, struct task_struct *owner)
{
unsigned long val = (unsigned long)owner;

if (rt_mutex_has_waiters(lock))
val |= RT_MUTEX_HAS_WAITERS;

- WRITE_ONCE(lock->owner, (struct task_struct *)val);
+ return (struct task_struct *)val;
+}
+
+
+static __always_inline void
+rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
+{
+ /*
+ * lock->wait_lock is held but explicit acquire semantics are needed
+ * for a new lock owner so WRITE_ONCE is insufficient.
+ */
+ xchg_acquire(&lock->owner, rt_mutex_owner_encode(lock, owner));
+}
+
+static __always_inline void
+rt_mutex_clear_owner(struct rt_mutex_base *lock)
+{
+ /* lock->wait_lock is held so the unlock provides release semantics. */
+ WRITE_ONCE(lock->owner, rt_mutex_owner_encode(lock, NULL));
}

static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
@@ -106,7 +124,8 @@ static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
}

-static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock)
+static __always_inline void
+fixup_rt_mutex_waiters(struct rt_mutex_base *lock, bool acquire_lock)
{
unsigned long owner, *p = (unsigned long *) &lock->owner;

@@ -172,8 +191,19 @@ static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock)
* still set.
*/
owner = READ_ONCE(*p);
- if (owner & RT_MUTEX_HAS_WAITERS)
- WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
+ if (owner & RT_MUTEX_HAS_WAITERS) {
+ /*
+ * See comments in rt_mutex_set_owner and
+ * rt_mutex_clear_owner on why xchg_acquire is used for
+ * updating owner for locking and WRITE_ONCE for unlocking.
+ * WRITE_ONCE would work for both here although other lock
+ * acquisitions may enter the slow path unnecessarily.
+ */
+ if (acquire_lock)
+ xchg_acquire(p, owner & ~RT_MUTEX_HAS_WAITERS);
+ else
+ WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
+ }
}

/*
@@ -1243,7 +1273,7 @@ static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock)
* try_to_take_rt_mutex() sets the lock waiters bit
* unconditionally. Clean this up.
*/
- fixup_rt_mutex_waiters(lock);
+ fixup_rt_mutex_waiters(lock, true);

return ret;
}
@@ -1604,7 +1634,7 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
* try_to_take_rt_mutex() sets the waiter bit
* unconditionally. We might have to fix that up.
*/
- fixup_rt_mutex_waiters(lock);
+ fixup_rt_mutex_waiters(lock, true);

trace_contention_end(lock, ret);

@@ -1719,7 +1749,7 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
* try_to_take_rt_mutex() sets the waiter bit unconditionally.
* We might have to fix that up:
*/
- fixup_rt_mutex_waiters(lock);
+ fixup_rt_mutex_waiters(lock, true);
debug_rt_mutex_free_waiter(&waiter);

trace_contention_end(lock, 0);
diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c
index 900220941caa..cb9fdff76a8a 100644
--- a/kernel/locking/rtmutex_api.c
+++ b/kernel/locking/rtmutex_api.c
@@ -267,7 +267,7 @@ void __sched rt_mutex_init_proxy_locked(struct rt_mutex_base *lock,
void __sched rt_mutex_proxy_unlock(struct rt_mutex_base *lock)
{
debug_rt_mutex_proxy_unlock(lock);
- rt_mutex_set_owner(lock, NULL);
+ rt_mutex_clear_owner(lock);
}

/**
@@ -382,7 +382,7 @@ int __sched rt_mutex_wait_proxy_lock(struct rt_mutex_base *lock,
* try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
* have to fix that up.
*/
- fixup_rt_mutex_waiters(lock);
+ fixup_rt_mutex_waiters(lock, true);
raw_spin_unlock_irq(&lock->wait_lock);

return ret;
@@ -438,7 +438,7 @@ bool __sched rt_mutex_cleanup_proxy_lock(struct rt_mutex_base *lock,
* try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
* have to fix that up.
*/
- fixup_rt_mutex_waiters(lock);
+ fixup_rt_mutex_waiters(lock, false);

raw_spin_unlock_irq(&lock->wait_lock);