Re: [BUG] io_uring: defer logic based on shared data

From: Jens Axboe
Date: Fri Oct 25 2019 - 12:03:27 EST


On 10/25/19 3:55 AM, Pavel Begunkov wrote:
> I found 2 problems with __io_sequence_defer().
>
> 1. it uses @sq_dropped, but doesn't consider @cq_overflow
> 2. @sq_dropped and @cq_overflow are write-shared with userspace, so
> it can be maliciously changed.
>
> see sent liburing test (test/defer *_hung()), which left an unkillable
> process for me

OK, how about the below. I'll split this in two, as it's really two
separate fixes.


diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5d10984381cf..5d9d960c1c17 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -191,6 +191,7 @@ struct io_ring_ctx {
unsigned sq_entries;
unsigned sq_mask;
unsigned sq_thread_idle;
+ unsigned cached_sq_dropped;
struct io_uring_sqe *sq_sqes;

struct list_head defer_list;
@@ -208,6 +209,7 @@ struct io_ring_ctx {

struct {
unsigned cached_cq_tail;
+ unsigned cached_cq_overflow;
unsigned cq_entries;
unsigned cq_mask;
struct wait_queue_head cq_wait;
@@ -419,7 +421,8 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
static inline bool __io_sequence_defer(struct io_ring_ctx *ctx,
struct io_kiocb *req)
{
- return req->sequence != ctx->cached_cq_tail + ctx->rings->sq_dropped;
+ return req->sequence != ctx->cached_cq_tail + ctx->cached_sq_dropped
+ + ctx->cached_cq_overflow;
}

static inline bool io_sequence_defer(struct io_ring_ctx *ctx,
@@ -590,9 +593,8 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
WRITE_ONCE(cqe->res, res);
WRITE_ONCE(cqe->flags, 0);
} else {
- unsigned overflow = READ_ONCE(ctx->rings->cq_overflow);
-
- WRITE_ONCE(ctx->rings->cq_overflow, overflow + 1);
+ ctx->cached_cq_overflow++;
+ WRITE_ONCE(ctx->rings->cq_overflow, ctx->cached_cq_overflow);
}
}

@@ -2601,7 +2603,8 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)

/* drop invalid entries */
ctx->cached_sq_head++;
- rings->sq_dropped++;
+ ctx->cached_sq_dropped++;
+ WRITE_ONCE(rings->sq_dropped, ctx->cached_sq_dropped);
return false;
}

@@ -2685,6 +2688,7 @@ static int io_sq_thread(void *data)

timeout = inflight = 0;
while (!kthread_should_park()) {
+ unsigned prev_cq, cur_cq;
bool mm_fault = false;
unsigned int to_submit;

@@ -2767,8 +2771,12 @@ static int io_sq_thread(void *data)
}

to_submit = min(to_submit, ctx->sq_entries);
+ prev_cq = ctx->cached_cq_tail + ctx->cached_cq_overflow;
inflight += io_submit_sqes(ctx, to_submit, cur_mm != NULL,
mm_fault);
+ cur_cq = ctx->cached_cq_tail + ctx->cached_cq_overflow;
+ if ((ctx->flags & IORING_SETUP_IOPOLL) && (prev_cq != cur_cq))
+ inflight -= cur_cq - prev_cq;

/* Commit SQ ring head once we've consumed all SQEs */
io_commit_sqring(ctx);

--
Jens Axboe