Re: [for-next PATCH 4/5] RDMA/rxe: refactor iova_to_vaddr

From: Fabio M. De Francesco
Date: Wed Nov 16 2022 - 07:49:59 EST


On venerdì 11 novembre 2022 05:30:29 CET Li Zhijian wrote:
> For IB_MR_TYPE_USER MR, iova_to_vaddr() will call kmap_local_page() to
> map page.
>
> Signed-off-by: Li Zhijian <lizhijian@xxxxxxxxxxx>
> ---
> drivers/infiniband/sw/rxe/rxe_loc.h | 1 +
> drivers/infiniband/sw/rxe/rxe_mr.c | 38 +++++++++++++++------------
> drivers/infiniband/sw/rxe/rxe_resp.c | 1 +
> drivers/infiniband/sw/rxe/rxe_verbs.h | 5 +++-
> 4 files changed, 27 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h
> b/drivers/infiniband/sw/rxe/rxe_loc.h index c2a5c8814a48..22a8c44d39c8
100644
> --- a/drivers/infiniband/sw/rxe/rxe_loc.h
> +++ b/drivers/infiniband/sw/rxe/rxe_loc.h
> @@ -73,6 +73,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
int
> length, int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info
> *dma, void *addr, int length, enum rxe_mr_copy_dir dir);
> void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length);
> +void rxe_unmap_vaddr(struct rxe_mr *mr, void *vaddr);
> struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
> enum rxe_mr_lookup_type type);
> int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length);
> diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c
> b/drivers/infiniband/sw/rxe/rxe_mr.c index a4e786b657b7..d26a4a33119c 100644
> --- a/drivers/infiniband/sw/rxe/rxe_mr.c
> +++ b/drivers/infiniband/sw/rxe/rxe_mr.c
> @@ -120,9 +120,7 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64
> length, u64 iova, struct ib_umem *umem;
> struct sg_page_iter sg_iter;
> int num_buf;
> - void *vaddr;
> int err;
> - int i;
>
> umem = ib_umem_get(&rxe->ib_dev, start, length, access);
> if (IS_ERR(umem)) {
> @@ -159,18 +157,9 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start,
u64
> length, u64 iova, num_buf = 0;
> }
>
> - vaddr =
page_address(sg_page_iter_page(&sg_iter));
> - if (!vaddr) {
> - pr_warn("%s: Unable to get virtual
address\n",
> - __func__);
> - err = -ENOMEM;
> - goto err_cleanup_map;
> - }
> -
> - buf->addr = (uintptr_t)vaddr;
> + buf->page = sg_page_iter_page(&sg_iter);
> num_buf++;
> buf++;
> -
> }
> }
>
> @@ -182,10 +171,6 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start,
u64
> length, u64 iova,
>
> return 0;
>
> -err_cleanup_map:
> - for (i = 0; i < mr->num_map; i++)
> - kfree(mr->map[i]);
> - kfree(mr->map);
> err_release_umem:
> ib_umem_release(umem);
> err_out:
> @@ -246,6 +231,12 @@ static void lookup_iova(struct rxe_mr *mr, u64 iova,
int
> *m_out, int *n_out, }
> }
>
> +void rxe_unmap_vaddr(struct rxe_mr *mr, void *vaddr)
> +{
> + if (mr->ibmr.type == IB_MR_TYPE_USER)
> + kunmap_local(vaddr);
> +}
> +
> static void *__iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
> {
> size_t offset;
> @@ -258,9 +249,21 @@ static void *__iova_to_vaddr(struct rxe_mr *mr, u64
iova,
> int length) return NULL;
> }
>
> - return (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
> + if (mr->ibmr.type == IB_MR_TYPE_USER) {
> + char *paddr;
> + struct page *pg = mr->map[m]->buf[n].page;
> +
> + paddr = kmap_local_page(pg);
> + if (paddr == NULL) {
> + pr_warn("Failed to map page");
> + return NULL;
> + }

I know nothing about this code but I am here as a result of regular checks for
changes to HIGHMEM mappings across the entire kernel. So please forgive me if
I'm objecting on correct changes.

1) It looks like this code had a call to page_address() and you converted it
to mapping with kmap_local_page().

If page_address() is related and it used to work properly, the page you are
mapping cannot come from ZONE_HIGHMEM. Therefore, kmap_local_page() looks like
an overkill.

I'm probably missing something...

2) What made you think that kmap_local_page() might fail and return NULL?
AFAIK, kmap_local_page() won't ever return NULL, therefore there is no need to
check.

Regards,

Fabio

P.S.: I just noticed that the second entry in my list was missing from the
other email which should be discarded.



> + return paddr + offset;
> + } else
> + return (void *)(uintptr_t)mr->map[m]->buf[n].addr +
offset;
> }
>
> +/* must call rxe_unmap_vaddr to unmap vaddr */
> void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
> {
> if (mr->state != RXE_MR_STATE_VALID) {
> @@ -326,6 +329,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
> int length, dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
>
> memcpy(dest, src, bytes);
> + rxe_unmap_vaddr(mr, va);
>
> length -= bytes;
> addr += bytes;
> diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c
> b/drivers/infiniband/sw/rxe/rxe_resp.c index 483043dc4e89..765cb9f8538a
> 100644
> --- a/drivers/infiniband/sw/rxe/rxe_resp.c
> +++ b/drivers/infiniband/sw/rxe/rxe_resp.c
> @@ -636,6 +636,7 @@ static enum resp_states atomic_reply(struct rxe_qp *qp,
>
> *vaddr = value;
> spin_unlock_bh(&atomic_ops_lock);
> + rxe_unmap_vaddr(mr, vaddr);
>
> qp->resp.msn++;
>
> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h
> b/drivers/infiniband/sw/rxe/rxe_verbs.h index acab785ba7e2..6080a4b32f09
> 100644
> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
> @@ -280,7 +280,10 @@ enum rxe_mr_lookup_type {
> #define RXE_BUF_PER_MAP (PAGE_SIZE / sizeof(struct
rxe_phys_buf))
>
> struct rxe_phys_buf {
> - u64 addr;
> + union {
> + u64 addr; /* IB_MR_TYPE_MEM_REG */
> + struct page *page; /* IB_MR_TYPE_USER */
> + };
> };
>
> struct rxe_map {
> --
> 2.31.1