Re: [PATCH 4/8] io_uring: add support for futex wake and wait

From: Jens Axboe
Date: Wed Sep 27 2023 - 08:05:44 EST


On 9/27/23 3:05 AM, Peter Zijlstra wrote:
> On Thu, Sep 21, 2023 at 12:29:04PM -0600, Jens Axboe wrote:
>
>> +int io_futex_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>> +{
>> + struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex);
>> + u32 flags;
>> +
>> + if (unlikely(sqe->fd || sqe->len || sqe->buf_index || sqe->file_index))
>> + return -EINVAL;
>> +
>> + iof->uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr));
>> + iof->futex_val = READ_ONCE(sqe->addr2);
>> + iof->futex_mask = READ_ONCE(sqe->addr3);
>> + flags = READ_ONCE(sqe->futex_flags);
>> +
>> + if (flags & ~FUTEX2_VALID_MASK)
>> + return -EINVAL;
>> +
>> + iof->futex_flags = futex2_to_flags(flags);
>
> So prep does the flags conversion..
>
>> + if (!futex_flags_valid(iof->futex_flags))
>> + return -EINVAL;
>> +
>> + if (!futex_validate_input(iof->futex_flags, iof->futex_val) ||
>> + !futex_validate_input(iof->futex_flags, iof->futex_mask))
>> + return -EINVAL;
>> +
>> + return 0;
>> +}
>
>> +int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags)
>> +{
>> + struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex);
>> + struct io_ring_ctx *ctx = req->ctx;
>> + struct io_futex_data *ifd = NULL;
>> + struct futex_hash_bucket *hb;
>> + int ret;
>> +
>> + if (!iof->futex_mask) {
>> + ret = -EINVAL;
>> + goto done;
>> + }
>> +
>> + io_ring_submit_lock(ctx, issue_flags);
>> + ifd = io_alloc_ifd(ctx);
>> + if (!ifd) {
>> + ret = -ENOMEM;
>> + goto done_unlock;
>> + }
>> +
>> + req->async_data = ifd;
>> + ifd->q = futex_q_init;
>> + ifd->q.bitset = iof->futex_mask;
>> + ifd->q.wake = io_futex_wake_fn;
>> + ifd->req = req;
>> +
>> + ret = futex_wait_setup(iof->uaddr, iof->futex_val,
>> + futex2_to_flags(iof->futex_flags), &ifd->q, &hb);
>
> But then wait and..
>
>> + if (!ret) {
>> + hlist_add_head(&req->hash_node, &ctx->futex_list);
>> + io_ring_submit_unlock(ctx, issue_flags);
>> +
>> + futex_queue(&ifd->q, hb);
>> + return IOU_ISSUE_SKIP_COMPLETE;
>> + }
>> +
>> +done_unlock:
>> + io_ring_submit_unlock(ctx, issue_flags);
>> +done:
>> + if (ret < 0)
>> + req_set_fail(req);
>> + io_req_set_res(req, ret, 0);
>> + kfree(ifd);
>> + return IOU_OK;
>> +}
>> +
>> +int io_futex_wake(struct io_kiocb *req, unsigned int issue_flags)
>> +{
>> + struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex);
>> + int ret;
>> +
>> + ret = futex_wake(iof->uaddr, futex2_to_flags(iof->futex_flags),
>
> ... wake do it both again?

Oops good catch, yes just the prep side should do it of course. I'll fix
that up.

> Also, I think we want wake to have wake do:
>
> 'FLAGS_STRICT | iof->futex_flags'
>
> See 43adf8449510 ("futex: FLAGS_STRICT"), I'm thinking that waking 0
> futexes should honour that request by waking 0, not 1 :-)

Thanks for the pointer, yeah agree that sounds sane. Most syscalls that
take an number/size that is zero will indeed return zero. I'll add a
test case for that too.

--
Jens Axboe