Re: [PATCH 8/8] io_uring: add support for vectored futex waits

From: Jens Axboe
Date: Tue Sep 26 2023 - 02:41:32 EST


After discussing this one with Thomas yesterday at KR2023 I had this
nagging feeling that something was still amiss. Took a closer look at
it, and there is an issue with the odd case of
futex_wait_multiple_setup() returning 1. It does so if a wakeup was
triggered during setup. Which is fine, except then it also unqueues ALL
the futexes at that point, which is unlike the normal wakeup path on the
io_uring side.

It'd be nice to unify those and leave the cleanup to the caller, but
since we also re-loop in that setup handler if nobody was woken AND we
use the futex_unqueue_multiple() to see if we were woken to begin with,
I think it's cleaner to just note this fact in io_uring and deal with
it.

I'm folding in the below incremental for now. Has a few cleanups in
there too that I spotted while doing that, the important bit is the
->futexv_unqueued part.

diff --git a/io_uring/futex.c b/io_uring/futex.c
index 0c07df8668aa..8a2b0a260d5b 100644
--- a/io_uring/futex.c
+++ b/io_uring/futex.c
@@ -23,6 +23,7 @@ struct io_futex {
unsigned long futexv_owned;
u32 futex_flags;
unsigned int futex_nr;
+ bool futexv_unqueued;
};

struct io_futex_data {
@@ -71,25 +72,21 @@ static void io_futexv_complete(struct io_kiocb *req, struct io_tw_state *ts)
{
struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex);
struct futex_vector *futexv = req->async_data;
- struct io_ring_ctx *ctx = req->ctx;
- int res = 0;

- io_tw_lock(ctx, ts);
+ io_tw_lock(req->ctx, ts);
+
+ if (!iof->futexv_unqueued) {
+ int res = futex_unqueue_multiple(futexv, iof->futex_nr);

- res = futex_unqueue_multiple(futexv, iof->futex_nr);
- if (res != -1)
- io_req_set_res(req, res, 0);
+ if (res != -1)
+ io_req_set_res(req, res, 0);
+ }

kfree(req->async_data);
req->flags &= ~REQ_F_ASYNC_DATA;
__io_futex_complete(req, ts);
}

-static bool io_futexv_claimed(struct io_futex *iof)
-{
- return test_bit(0, &iof->futexv_owned);
-}
-
static bool io_futexv_claim(struct io_futex *iof)
{
if (test_bit(0, &iof->futexv_owned) ||
@@ -238,6 +235,7 @@ int io_futexv_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
}

iof->futexv_owned = 0;
+ iof->futexv_unqueued = 0;
req->flags |= REQ_F_ASYNC_DATA;
req->async_data = futexv;
return 0;
@@ -278,6 +276,18 @@ int io_futexv_wait(struct io_kiocb *req, unsigned int issue_flags)

ret = futex_wait_multiple_setup(futexv, iof->futex_nr, &woken);

+ /*
+ * We got woken while setting up, let that side do the completion.
+ * Note that futex_wait_multiple_setup() will have unqueued all
+ * the futexes in this case. Mark us as having done that already,
+ * since this is different from normal wakeup.
+ */
+ if (ret == 1) {
+ iof->futexv_unqueued = 1;
+ io_req_set_res(req, woken, 0);
+ goto skip;
+ }
+
/*
* The above call leaves us potentially non-running. This is fine
* for the sync syscall as it'll be blocking unless we already got
@@ -287,29 +297,23 @@ int io_futexv_wait(struct io_kiocb *req, unsigned int issue_flags)
__set_current_state(TASK_RUNNING);

/*
- * We got woken while setting up, let that side do the completion
+ * 0 return means that we successfully setup the waiters, and that
+ * nobody triggered a wakeup while we were doing so. If the wakeup
+ * happened post setup, the task_work will be run post this issue
+ * and under the submission lock.
*/
- if (io_futexv_claimed(iof)) {
+ if (!ret) {
+ hlist_add_head(&req->hash_node, &ctx->futex_list);
skip:
io_ring_submit_unlock(ctx, issue_flags);
return IOU_ISSUE_SKIP_COMPLETE;
}

/*
- * 0 return means that we successfully setup the waiters, and that
- * nobody triggered a wakeup while we were doing so. < 0 or 1 return
- * is either an error or we got a wakeup while setting up.
+ * Error case, ret is < 0. Mark the request as failed.
*/
- if (!ret) {
- hlist_add_head(&req->hash_node, &ctx->futex_list);
- goto skip;
- }
-
io_ring_submit_unlock(ctx, issue_flags);
- if (ret < 0)
- req_set_fail(req);
- else if (woken != -1)
- ret = woken;
+ req_set_fail(req);
io_req_set_res(req, ret, 0);
kfree(futexv);
req->flags &= ~REQ_F_ASYNC_DATA;

--
Jens Axboe