Re: [PATCH for-next v5 5/7] RDMA/rxe: Allow registering MRs for On-Demand Paging

From: Jason Gunthorpe
Date: Mon Jun 12 2023 - 12:19:02 EST


On Thu, May 18, 2023 at 05:21:50PM +0900, Daisuke Matsuda wrote:

> +static void rxe_mr_set_xarray(struct rxe_mr *mr, unsigned long start,
> + unsigned long end, unsigned long *pfn_list)
> +{
> + unsigned long lower, upper, idx;
> + struct page *page;
> +
> + lower = rxe_mr_iova_to_index(mr, start);
> + upper = rxe_mr_iova_to_index(mr, end);
> +
> + /* make pages visible in xarray. no sleep while taking the lock */
> + spin_lock(&mr->page_list.xa_lock);
> + for (idx = lower; idx <= upper; idx++) {
> + page = hmm_pfn_to_page(pfn_list[idx]);
> + __xa_store(&mr->page_list, idx, page, GFP_ATOMIC);

All of these loops can be performance improved a lot by using xas
loops

> unsigned long cur_seq)
> @@ -54,3 +72,105 @@ static bool rxe_ib_invalidate_range(struct mmu_interval_notifier *mni,
> const struct mmu_interval_notifier_ops rxe_mn_ops = {
> .invalidate = rxe_ib_invalidate_range,
> };
> +
> +#define RXE_PAGEFAULT_RDONLY BIT(1)
> +#define RXE_PAGEFAULT_SNAPSHOT BIT(2)
> +static int rxe_odp_do_pagefault(struct rxe_mr *mr, u64 user_va, int bcnt, u32 flags)
> +{
> + int np;
> + u64 access_mask;
> + bool fault = !(flags & RXE_PAGEFAULT_SNAPSHOT);
> + struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
> +
> + access_mask = ODP_READ_ALLOWED_BIT;
> + if (umem_odp->umem.writable && !(flags & RXE_PAGEFAULT_RDONLY))
> + access_mask |= ODP_WRITE_ALLOWED_BIT;
> +
> + /*
> + * ib_umem_odp_map_dma_and_lock() locks umem_mutex on success.
> + * Callers must release the lock later to let invalidation handler
> + * do its work again.
> + */
> + np = ib_umem_odp_map_dma_and_lock(umem_odp, user_va, bcnt,
> + access_mask, fault);
> + if (np < 0)
> + return np;
> +
> + /* umem_mutex is still locked here, so we can use hmm_pfn_to_page()
> + * safely to fetch pages in the range.

All the comments should be in the style like the first one, not the
second

> + */
> + rxe_mr_set_xarray(mr, user_va, user_va + bcnt, umem_odp->pfn_list);
> +
> + return np;
> +}
> +
> +static int rxe_odp_init_pages(struct rxe_mr *mr)
> +{
> + int ret;
> + struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
> +
> + ret = rxe_odp_do_pagefault(mr, mr->umem->address, mr->umem->length,
> + RXE_PAGEFAULT_SNAPSHOT);

Probably suffix this with "and_lock"

> + mr->odp_enabled = true;
> + mr->umem = &umem_odp->umem;
> + mr->access = access_flags;
> + mr->ibmr.length = length;
> + mr->ibmr.iova = iova;
> + mr->page_offset = ib_umem_offset(&umem_odp->umem);
> +
> + err = rxe_odp_init_pages(mr);
> + if (err) {
> + ib_umem_odp_release(umem_odp);
> + return err;
> + }
> +
> + err = rxe_mr_fill_pages_from_sgt(mr, &umem_odp->umem.sgt_append.sgt);

Uh? What is this? The sgt is not used in the ODP mode?

> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
> index b6fbd9b3d086..de5a982c7c7e 100644
> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
> @@ -333,6 +333,8 @@ struct rxe_mr {
> u32 nbuf;
>
> struct xarray page_list;
> +
> + bool odp_enabled;

You can tell from the umem, don't need a flag

Jason