[PATCH 5.3 138/197] io_uring: Fix broken links with offloading

From: Greg Kroah-Hartman
Date: Sun Oct 27 2019 - 17:27:11 EST


From: Pavel Begunkov <asml.silence@xxxxxxxxx>

[ Upstream commit fb5ccc98782f654778cb8d96ba8a998304f9a51f ]

io_sq_thread() processes sqes by 8 without considering links. As a
result, links will be randomely subdivided.

The easiest way to fix it is to call io_get_sqring() inside
io_submit_sqes() as do io_ring_submit().

Downsides:
1. This removes optimisation of not grabbing mm_struct for fixed files
2. It submitting all sqes in one go, without finer-grained sheduling
with cq processing.

Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx>
Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>
---
fs/io_uring.c | 58 +++++++++++++++++++++++++++------------------------
1 file changed, 31 insertions(+), 27 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 79f9c9f7b298e..518042cc6628b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -687,6 +687,14 @@ static unsigned io_cqring_events(struct io_cq_ring *ring)
return READ_ONCE(ring->r.tail) - READ_ONCE(ring->r.head);
}

+static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
+{
+ struct io_sq_ring *ring = ctx->sq_ring;
+
+ /* make sure SQ entry isn't read before tail */
+ return smp_load_acquire(&ring->r.tail) - ctx->cached_sq_head;
+}
+
/*
* Find and free completed poll iocbs
*/
@@ -2268,8 +2276,8 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
return false;
}

-static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
- unsigned int nr, bool has_user, bool mm_fault)
+static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
+ bool has_user, bool mm_fault)
{
struct io_submit_state state, *statep = NULL;
struct io_kiocb *link = NULL;
@@ -2282,6 +2290,11 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
}

for (i = 0; i < nr; i++) {
+ struct sqe_submit s;
+
+ if (!io_get_sqring(ctx, &s))
+ break;
+
/*
* If previous wasn't linked and we have a linked command,
* that's the end of the chain. Submit the previous link.
@@ -2290,16 +2303,16 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
io_queue_sqe(ctx, link, &link->submit);
link = NULL;
}
- prev_was_link = (sqes[i].sqe->flags & IOSQE_IO_LINK) != 0;
+ prev_was_link = (s.sqe->flags & IOSQE_IO_LINK) != 0;

if (unlikely(mm_fault)) {
- io_cqring_add_event(ctx, sqes[i].sqe->user_data,
+ io_cqring_add_event(ctx, s.sqe->user_data,
-EFAULT);
} else {
- sqes[i].has_user = has_user;
- sqes[i].needs_lock = true;
- sqes[i].needs_fixed_file = true;
- io_submit_sqe(ctx, &sqes[i], statep, &link);
+ s.has_user = has_user;
+ s.needs_lock = true;
+ s.needs_fixed_file = true;
+ io_submit_sqe(ctx, &s, statep, &link);
submitted++;
}
}
@@ -2314,7 +2327,6 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,

static int io_sq_thread(void *data)
{
- struct sqe_submit sqes[IO_IOPOLL_BATCH];
struct io_ring_ctx *ctx = data;
struct mm_struct *cur_mm = NULL;
mm_segment_t old_fs;
@@ -2329,8 +2341,8 @@ static int io_sq_thread(void *data)

timeout = inflight = 0;
while (!kthread_should_park()) {
- bool all_fixed, mm_fault = false;
- int i;
+ bool mm_fault = false;
+ unsigned int to_submit;

if (inflight) {
unsigned nr_events = 0;
@@ -2363,7 +2375,8 @@ static int io_sq_thread(void *data)
timeout = jiffies + ctx->sq_thread_idle;
}

- if (!io_get_sqring(ctx, &sqes[0])) {
+ to_submit = io_sqring_entries(ctx);
+ if (!to_submit) {
/*
* We're polling. If we're within the defined idle
* period, then let us spin without work before going
@@ -2394,7 +2407,8 @@ static int io_sq_thread(void *data)
/* make sure to read SQ tail after writing flags */
smp_mb();

- if (!io_get_sqring(ctx, &sqes[0])) {
+ to_submit = io_sqring_entries(ctx);
+ if (!to_submit) {
if (kthread_should_park()) {
finish_wait(&ctx->sqo_wait, &wait);
break;
@@ -2412,19 +2426,8 @@ static int io_sq_thread(void *data)
ctx->sq_ring->flags &= ~IORING_SQ_NEED_WAKEUP;
}

- i = 0;
- all_fixed = true;
- do {
- if (all_fixed && io_sqe_needs_user(sqes[i].sqe))
- all_fixed = false;
-
- i++;
- if (i == ARRAY_SIZE(sqes))
- break;
- } while (io_get_sqring(ctx, &sqes[i]));
-
/* Unless all new commands are FIXED regions, grab mm */
- if (!all_fixed && !cur_mm) {
+ if (!cur_mm) {
mm_fault = !mmget_not_zero(ctx->sqo_mm);
if (!mm_fault) {
use_mm(ctx->sqo_mm);
@@ -2432,8 +2435,9 @@ static int io_sq_thread(void *data)
}
}

- inflight += io_submit_sqes(ctx, sqes, i, cur_mm != NULL,
- mm_fault);
+ to_submit = min(to_submit, ctx->sq_entries);
+ inflight += io_submit_sqes(ctx, to_submit, cur_mm != NULL,
+ mm_fault);

/* Commit SQ ring head once we've consumed all SQEs */
io_commit_sqring(ctx);
--
2.20.1