Re: [RESEND PATCH] fs/aio: Replace kmap{,_atomic}() with kmap_local_page()

From: Fabio M. De Francesco
Date: Thu Dec 01 2022 - 09:29:27 EST


On domenica 16 ottobre 2022 17:06:56 CET Fabio M. De Francesco wrote:
> The use of kmap() and kmap_atomic() are being deprecated in favor of
> kmap_local_page().
>
> There are two main problems with kmap(): (1) It comes with an overhead as
> the mapping space is restricted and protected by a global lock for
> synchronization and (2) it also requires global TLB invalidation when the
> kmap’s pool wraps and it might block when the mapping space is fully
> utilized until a slot becomes available.
>
> With kmap_local_page() the mappings are per thread, CPU local, can take
> page faults, and can be called from any context (including interrupts).
> It is faster than kmap() in kernels with HIGHMEM enabled. Furthermore,
> the tasks can be preempted and, when they are scheduled to run again, the
> kernel virtual addresses are restored and still valid.
>
> Since its use in fs/aio.c is safe everywhere, it should be preferred.
>
> Therefore, replace kmap() and kmap_atomic() with kmap_local_page() in
> fs/aio.c.
>
> Tested with xfstests on a QEMU/KVM x86_32 VM, 6GB RAM, booting a kernel
> with HIGHMEM64GB enabled.
>
> Cc: "Venkataramanan, Anirudh" <anirudh.venkataramanan@xxxxxxxxx>
> Suggested-by: Ira Weiny <ira.weiny@xxxxxxxxx>
> Reviewed-by: Ira Weiny <ira.weiny@xxxxxxxxx>
Reviewed-by: Jeff Moyer <jmoyer@xxxxxxxxxx>
> Signed-off-by: Fabio M. De Francesco <fmdefrancesco@xxxxxxxxx>
> ---

I'm sorry to resend again. Last time I forgot to forward the "Reviewed-by:"
tag from Jeff (thanks!).

>
> I've tested with "./check -g aio". The tests in this group fail 3/26
> times, with and without my patch. Therefore, these changes don't introduce
> further errors. I'm not aware of any further tests I may run, so that
> any suggestions would be precious and much appreciated :-)
>
> I'm resending this patch because some recipients were missing in the
> previous submissions. In the meantime I'm also adding some more information
> in the commit message. There are no changes in the code.
>
> fs/aio.c | 32 ++++++++++++++++----------------
> 1 file changed, 16 insertions(+), 16 deletions(-)
>
> diff --git a/fs/aio.c b/fs/aio.c
> index 3c249b938632..343fea0c6d1a 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -567,7 +567,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned
int
> nr_events) ctx->user_id = ctx->mmap_base;
> ctx->nr_events = nr_events; /* trusted copy */
>
> - ring = kmap_atomic(ctx->ring_pages[0]);
> + ring = kmap_local_page(ctx->ring_pages[0]);
> ring->nr = nr_events; /* user copy */
> ring->id = ~0U;
> ring->head = ring->tail = 0;
> @@ -575,7 +575,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned
int
> nr_events) ring->compat_features = AIO_RING_COMPAT_FEATURES;
> ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
> ring->header_length = sizeof(struct aio_ring);
> - kunmap_atomic(ring);
> + kunmap_local(ring);
> flush_dcache_page(ctx->ring_pages[0]);
>
> return 0;
> @@ -678,9 +678,9 @@ static int ioctx_add_table(struct kioctx *ctx, struct
> mm_struct *mm) * we are protected from page migration
> * changes ring_pages by -
>ring_lock.
> */
> - ring = kmap_atomic(ctx-
>ring_pages[0]);
> + ring = kmap_local_page(ctx-
>ring_pages[0]);
> ring->id = ctx->id;
> - kunmap_atomic(ring);
> + kunmap_local(ring);
> return 0;
> }
>
> @@ -1024,9 +1024,9 @@ static void user_refill_reqs_available(struct kioctx
> *ctx) * against ctx->completed_events below will make sure we do the
> * safe/right thing.
> */
> - ring = kmap_atomic(ctx->ring_pages[0]);
> + ring = kmap_local_page(ctx->ring_pages[0]);
> head = ring->head;
> - kunmap_atomic(ring);
> + kunmap_local(ring);
>
> refill_reqs_available(ctx, head, ctx->tail);
> }
> @@ -1132,12 +1132,12 @@ static void aio_complete(struct aio_kiocb *iocb)
> if (++tail >= ctx->nr_events)
> tail = 0;
>
> - ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
> + ev_page = kmap_local_page(ctx->ring_pages[pos /
AIO_EVENTS_PER_PAGE]);
> event = ev_page + pos % AIO_EVENTS_PER_PAGE;
>
> *event = iocb->ki_res;
>
> - kunmap_atomic(ev_page);
> + kunmap_local(ev_page);
> flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
>
> pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx\n", ctx, tail, iocb,
> @@ -1151,10 +1151,10 @@ static void aio_complete(struct aio_kiocb *iocb)
>
> ctx->tail = tail;
>
> - ring = kmap_atomic(ctx->ring_pages[0]);
> + ring = kmap_local_page(ctx->ring_pages[0]);
> head = ring->head;
> ring->tail = tail;
> - kunmap_atomic(ring);
> + kunmap_local(ring);
> flush_dcache_page(ctx->ring_pages[0]);
>
> ctx->completed_events++;
> @@ -1214,10 +1214,10 @@ static long aio_read_events_ring(struct kioctx *ctx,
> mutex_lock(&ctx->ring_lock);
>
> /* Access to ->ring_pages here is protected by ctx->ring_lock. */
> - ring = kmap_atomic(ctx->ring_pages[0]);
> + ring = kmap_local_page(ctx->ring_pages[0]);
> head = ring->head;
> tail = ring->tail;
> - kunmap_atomic(ring);
> + kunmap_local(ring);
>
> /*
> * Ensure that once we've read the current tail pointer, that
> @@ -1249,10 +1249,10 @@ static long aio_read_events_ring(struct kioctx *ctx,
> avail = min(avail, nr - ret);
> avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos);
>
> - ev = kmap(page);
> + ev = kmap_local_page(page);
> copy_ret = copy_to_user(event + ret, ev + pos,
> sizeof(*ev) * avail);
> - kunmap(page);
> + kunmap_local(ev);
>
> if (unlikely(copy_ret)) {
> ret = -EFAULT;
> @@ -1264,9 +1264,9 @@ static long aio_read_events_ring(struct kioctx *ctx,
> head %= ctx->nr_events;
> }
>
> - ring = kmap_atomic(ctx->ring_pages[0]);
> + ring = kmap_local_page(ctx->ring_pages[0]);
> ring->head = head;
> - kunmap_atomic(ring);
> + kunmap_local(ring);
> flush_dcache_page(ctx->ring_pages[0]);
>
> pr_debug("%li h%u t%u\n", ret, head, tail);
> --
> 2.36.1