Re: PI futexes + lock stealing woes

From: Peter Zijlstra
Date: Thu Dec 07 2017 - 05:45:27 EST


On Wed, Dec 06, 2017 at 08:09:28PM -0600, Gratian Crisan wrote:
>
> Peter Zijlstra writes:
>
> > On Wed, Nov 29, 2017 at 11:56:05AM -0600, Julia Cartwright wrote:
> >
> >> fixup_owner() used to have additional seemingly relevant checks in place
> >> that were removed 73d786bd043eb ("futex: Rework inconsistent
> >> rt_mutex/futex_q state").
> >
> > *groan*... yes. I completely missed that extra case also applied to
> > requeue_pi (requeue always did hurt my brain).
>
> FWIW I have been testing for about two days now with the fixup_owner()
> hunk of 73d786bd043eb ("futex: Rework inconsistent rt_mutex/futex_q
> state") reverted. So far it hasn't hit the race/deadlock. It normally
> takes around 8 hours to reproduce.

Yeah, that should more-or-less work I think. But I'm trying to see if
there's anything saner we can do, but so far my brain keeps slipping
off.

At the very least I want to kill the various wait_lock lockbreaks in
there, those hurt my brain and make me nervous as hell. That fixup does
_3_ consecutive wait_lock sections, and it becomes a very complicated
story to argue why that's not riddled with holes.

For now I have something like the below; which obviously doesn't
compile yet. Let me grab lunch and such things before attempting more.

---
kernel/futex.c | 33 ++++++++++++++++++++++++++++++++-
kernel/locking/rtmutex.c | 27 +++++++++++++++++++--------
kernel/locking/rtmutex_common.h | 1 +
3 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index 76ed5921117a..8ad5221fbd84 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2303,14 +2303,35 @@ static void unqueue_me_pi(struct futex_q *q)
static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
struct task_struct *newowner)
{
- u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
struct futex_pi_state *pi_state = q->pi_state;
u32 uval, uninitialized_var(curval), newval;
struct task_struct *oldowner;
+ u32 newtid;
int ret;

raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);

+ if (!newowner) {
+ if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
+ ret = 0;
+ goto out_unlock;
+ }
+
+ newowner = rt_mutex_owner(&pi_state->pi_mutex);
+ if (WARN_ON_ONCE(!newowner)) {
+ /*
+ * We just attempted a trylock; since that failed there
+ * must be an owner, right?
+ */
+ ret = -EFUCKED; /* XXX: check return paths */
+ goto out_unlock;
+ }
+
+ /* OK we have a newowner, fixup uval */
+ }
+
+ newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
+
oldowner = pi_state->owner;
/* Owner died? */
if (!pi_state->owner)
@@ -2443,6 +2464,16 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
goto out;
}

+ /*
+ * If we didn't get the lock; check if nobody stole it from us.
+ * In that case, we need to fix up the uval to point to them
+ * instead of us, otherwise bad things happen.
+ */
+ if (q->pi_state->owner == current) {
+ ret = fixup_pi_state_owner(uaddr, q, NULL);
+ goto out;
+ }
+
/*
* Paranoia check. If we did not take the lock, then we should not be
* the owner of the rt_mutex.
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 6f3dba6e4e9e..21705f2fae1c 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1290,13 +1290,25 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
return ret;
}

+static inline int __rt_mutex_slowtrylock(struct rt_mutex *lock)
+{
+ int ret = try_to_take_rt_mutex(lock, current, NULL);
+
+ /*
+ * try_to_take_rt_mutex() sets the lock waiters bit
+ * unconditionally. Clean this up.
+ */
+ fixup_rt_mutex_waiters(lock);
+
+ return ret;
+}
+
/*
* Slow path try-lock function:
*/
static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
{
unsigned long flags;
- int ret;

/*
* If the lock already has an owner we fail to get the lock.
@@ -1312,13 +1324,7 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
*/
raw_spin_lock_irqsave(&lock->wait_lock, flags);

- ret = try_to_take_rt_mutex(lock, current, NULL);
-
- /*
- * try_to_take_rt_mutex() sets the lock waiters bit
- * unconditionally. Clean this up.
- */
- fixup_rt_mutex_waiters(lock);
+ ret = __rt_mutex_slowtrylock(lock);

raw_spin_unlock_irqrestore(&lock->wait_lock, flags);

@@ -1505,6 +1511,11 @@ int __sched rt_mutex_futex_trylock(struct rt_mutex *lock)
return rt_mutex_slowtrylock(lock);
}

+int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock)
+{
+ return __rt_mutex_slowtrylock(lock);
+}
+
/**
* rt_mutex_timed_lock - lock a rt_mutex interruptible
* the timeout structure is provided
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index 124e98ca0b17..68686b3ec3c1 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -148,6 +148,7 @@ extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter);

extern int rt_mutex_futex_trylock(struct rt_mutex *l);
+extern int __rt_mutex_futex_trylock(struct rt_mutex *l);

extern void rt_mutex_futex_unlock(struct rt_mutex *lock);
extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock,