Re: [BUG] io_uring: defer logic based on shared data

From: Pavel Begunkov
Date: Fri Oct 25 2019 - 12:22:16 EST


On 25/10/2019 19:03, Jens Axboe wrote:
> On 10/25/19 3:55 AM, Pavel Begunkov wrote:
>> I found 2 problems with __io_sequence_defer().
>>
>> 1. it uses @sq_dropped, but doesn't consider @cq_overflow
>> 2. @sq_dropped and @cq_overflow are write-shared with userspace, so
>> it can be maliciously changed.
>>
>> see sent liburing test (test/defer *_hung()), which left an unkillable
>> process for me
>
> OK, how about the below. I'll split this in two, as it's really two
> separate fixes.
cached_sq_dropped is good, but I was concerned about cached_cq_overflow.
io_cqring_fill_event() can be called in async, so shouldn't we do some
synchronisation then?

>
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 5d10984381cf..5d9d960c1c17 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -191,6 +191,7 @@ struct io_ring_ctx {
> unsigned sq_entries;
> unsigned sq_mask;
> unsigned sq_thread_idle;
> + unsigned cached_sq_dropped;
> struct io_uring_sqe *sq_sqes;
>
> struct list_head defer_list;
> @@ -208,6 +209,7 @@ struct io_ring_ctx {
>
> struct {
> unsigned cached_cq_tail;
> + unsigned cached_cq_overflow;
> unsigned cq_entries;
> unsigned cq_mask;
> struct wait_queue_head cq_wait;
> @@ -419,7 +421,8 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
> static inline bool __io_sequence_defer(struct io_ring_ctx *ctx,
> struct io_kiocb *req)
> {
> - return req->sequence != ctx->cached_cq_tail + ctx->rings->sq_dropped;
> + return req->sequence != ctx->cached_cq_tail + ctx->cached_sq_dropped
> + + ctx->cached_cq_overflow;
> }
>
> static inline bool io_sequence_defer(struct io_ring_ctx *ctx,
> @@ -590,9 +593,8 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
> WRITE_ONCE(cqe->res, res);
> WRITE_ONCE(cqe->flags, 0);
> } else {
> - unsigned overflow = READ_ONCE(ctx->rings->cq_overflow);
> -
> - WRITE_ONCE(ctx->rings->cq_overflow, overflow + 1);
> + ctx->cached_cq_overflow++;
> + WRITE_ONCE(ctx->rings->cq_overflow, ctx->cached_cq_overflow);
> }
> }
>
> @@ -2601,7 +2603,8 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
>
> /* drop invalid entries */
> ctx->cached_sq_head++;
> - rings->sq_dropped++;
> + ctx->cached_sq_dropped++;
> + WRITE_ONCE(rings->sq_dropped, ctx->cached_sq_dropped);
> return false;
> }
>
> @@ -2685,6 +2688,7 @@ static int io_sq_thread(void *data)
>
> timeout = inflight = 0;
> while (!kthread_should_park()) {
> + unsigned prev_cq, cur_cq;
> bool mm_fault = false;
> unsigned int to_submit;
>
> @@ -2767,8 +2771,12 @@ static int io_sq_thread(void *data)
> }
>
> to_submit = min(to_submit, ctx->sq_entries);
> + prev_cq = ctx->cached_cq_tail + ctx->cached_cq_overflow;
> inflight += io_submit_sqes(ctx, to_submit, cur_mm != NULL,
> mm_fault);
> + cur_cq = ctx->cached_cq_tail + ctx->cached_cq_overflow;
> + if ((ctx->flags & IORING_SETUP_IOPOLL) && (prev_cq != cur_cq))
> + inflight -= cur_cq - prev_cq;
>
> /* Commit SQ ring head once we've consumed all SQEs */
> io_commit_sqring(ctx);
>

--
Yours sincerely,
Pavel Begunkov

Attachment: signature.asc
Description: OpenPGP digital signature