Re: [PATCH RFC 3/8] memory-provider: dmabuf devmem memory provider

From: Mina Almasry
Date: Mon Nov 13 2023 - 08:42:38 EST


On Mon, Nov 13, 2023 at 5:00 AM Yunsheng Lin <linyunsheng@xxxxxxxxxx> wrote:
>
> From: Mina Almasry <almasrymina@xxxxxxxxxx>
>
> Implement a memory provider that allocates dmabuf devmem page_pool_iovs.
>
> Support of PP_FLAG_DMA_MAP and PP_FLAG_DMA_SYNC_DEV is omitted for
> simplicity.
>
> The provider receives a reference to the struct netdev_dmabuf_binding
> via the pool->mp_priv pointer. The driver needs to set this pointer for
> the provider in the page_pool_params.
>
> The provider obtains a reference on the netdev_dmabuf_binding which
> guarantees the binding and the underlying mapping remains alive until
> the provider is destroyed.
>
> Signed-off-by: Willem de Bruijn <willemb@xxxxxxxxxx>
> Signed-off-by: Kaiyuan Zhang <kaiyuanz@xxxxxxxxxx>
> Signed-off-by: Mina Almasry <almasrymina@xxxxxxxxxx>
> Signed-off-by: Yunsheng Lin <linyunsheng@xxxxxxxxxx>
> ---
> include/net/page_pool/types.h | 28 +++++++++++
> net/core/page_pool.c | 93 +++++++++++++++++++++++++++++++++++
> 2 files changed, 121 insertions(+)
>
> diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
> index 5e4fcd45ba50..52e4cf98ebc6 100644
> --- a/include/net/page_pool/types.h
> +++ b/include/net/page_pool/types.h
> @@ -124,6 +124,7 @@ struct mem_provider;
>
> enum pp_memory_provider_type {
> __PP_MP_NONE, /* Use system allocator directly */
> + PP_MP_DMABUF_DEVMEM, /* dmabuf devmem provider */
> };
>
> struct pp_memory_provider_ops {
> @@ -134,6 +135,33 @@ struct pp_memory_provider_ops {
> void (*free_pages)(struct page_pool *pool, struct page *page);
> };
>
> +extern const struct pp_memory_provider_ops dmabuf_devmem_ops;
> +
> +struct page_pool_iov {
> + unsigned long res0;
> + unsigned long pp_magic;
> + struct page_pool *pp;
> + struct page *page; /* dmabuf memory provider specific field */
> + unsigned long dma_addr;
> + atomic_long_t pp_frag_count;
> + unsigned int res1;
> + refcount_t _refcount;
> +};
> +
> +#define PAGE_POOL_MATCH(pg, iov) \
> + static_assert(offsetof(struct page, pg) == \
> + offsetof(struct page_pool_iov, iov))
> +PAGE_POOL_MATCH(flags, res0);
> +PAGE_POOL_MATCH(pp_magic, pp_magic);
> +PAGE_POOL_MATCH(pp, pp);
> +PAGE_POOL_MATCH(_pp_mapping_pad, page);
> +PAGE_POOL_MATCH(dma_addr, dma_addr);
> +PAGE_POOL_MATCH(pp_frag_count, pp_frag_count);
> +PAGE_POOL_MATCH(_mapcount, res1);
> +PAGE_POOL_MATCH(_refcount, _refcount);
> +#undef PAGE_POOL_MATCH
> +static_assert(sizeof(struct page_pool_iov) <= sizeof(struct page));
> +

You're doing exactly what I think you're doing, and what was nacked in RFC v1.

You've converted 'struct page_pool_iov' to essentially become a
duplicate of 'struct page'. Then, you're casting page_pool_iov* into
struct page* in mp_dmabuf_devmem_alloc_pages(), then, you're calling
mm APIs like page_ref_*() on the page_pool_iov* because you've fooled
the mm stack into thinking dma-buf memory is a struct page.

RFC v1 was almost exactly the same, except instead of creating a
duplicate definition of struct page, it just allocated 'struct page'
instead of allocating another struct that is identical to struct page
and casting it into struct page.

I don't think what you're doing here reverses the nacks I got in RFC
v1. You also did not CC any dma-buf or mm people on this proposal that
would bring up these concerns again.

> struct page_pool {
> struct page_pool_params p;
>
> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> index 6c502bea842b..1bd7a2306f09 100644
> --- a/net/core/page_pool.c
> +++ b/net/core/page_pool.c
> @@ -231,6 +231,9 @@ static int page_pool_init(struct page_pool *pool,
> switch (pool->p.memory_provider) {
> case __PP_MP_NONE:
> break;
> + case PP_MP_DMABUF_DEVMEM:
> + pool->mp_ops = &dmabuf_devmem_ops;
> + break;
> default:
> err = -EINVAL;
> goto free_ptr_ring;
> @@ -1010,3 +1013,93 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid)
> }
> }
> EXPORT_SYMBOL(page_pool_update_nid);
> +
> +/*** "Dmabuf devmem memory provider" ***/
> +
> +static int mp_dmabuf_devmem_init(struct page_pool *pool)
> +{
> + if (pool->p.flags & PP_FLAG_DMA_MAP ||
> + pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
> + return -EOPNOTSUPP;
> + return 0;
> +}
> +
> +static struct page *mp_dmabuf_devmem_alloc_pages(struct page_pool *pool,
> + gfp_t gfp)
> +{
> + struct page_pool_iov *ppiov;
> + struct page *page;
> + dma_addr_t dma;
> +
> + ppiov = kvmalloc(sizeof(*ppiov), gfp | __GFP_ZERO);
> + if (!ppiov)
> + return NULL;
> +
> + page = alloc_pages_node(pool->p.nid, gfp, pool->p.order);
> + if (!page) {
> + kvfree(ppiov);
> + return NULL;
> + }
> +
> + dma = dma_map_page_attrs(pool->p.dev, page, 0,
> + (PAGE_SIZE << pool->p.order),
> + pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC |
> + DMA_ATTR_WEAK_ORDERING);
> + if (dma_mapping_error(pool->p.dev, dma)) {
> + put_page(page);
> + kvfree(ppiov);
> + return NULL;
> + }
> +
> + ppiov->pp = pool;
> + ppiov->pp_magic = PP_SIGNATURE;
> + ppiov->page = page;
> + refcount_set(&ppiov->_refcount, 1);
> + page_pool_fragment_page((struct page *)ppiov, 1);
> + page_pool_set_dma_addr((struct page *)ppiov, dma);
> + pool->pages_state_hold_cnt++;
> + trace_page_pool_state_hold(pool, (struct page *)ppiov,
> + pool->pages_state_hold_cnt);
> + return (struct page *)ppiov;
> +}
> +
> +static void mp_dmabuf_devmem_destroy(struct page_pool *pool)
> +{
> +}
> +
> +static void mp_dmabuf_devmem_release_page(struct page_pool *pool,
> + struct page *page)
> +{
> + struct page_pool_iov *ppiov = (struct page_pool_iov *)page;
> + dma_addr_t dma;
> +
> + dma = page_pool_get_dma_addr(page);
> +
> + /* When page is unmapped, it cannot be returned to our pool */
> + dma_unmap_page_attrs(pool->p.dev, dma,
> + PAGE_SIZE << pool->p.order, pool->p.dma_dir,
> + DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
> + page_pool_set_dma_addr(page, 0);
> +
> + put_page(ppiov->page);
> +}
> +
> +static void mp_dmabuf_devmem_free_pages(struct page_pool *pool,
> + struct page *page)
> +{
> + int count;
> +
> + count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt);
> + trace_page_pool_state_release(pool, page, count);
> +
> + kvfree(page);
> +}
> +
> +const struct pp_memory_provider_ops dmabuf_devmem_ops = {
> + .init = mp_dmabuf_devmem_init,
> + .destroy = mp_dmabuf_devmem_destroy,
> + .alloc_pages = mp_dmabuf_devmem_alloc_pages,
> + .release_page = mp_dmabuf_devmem_release_page,
> + .free_pages = mp_dmabuf_devmem_free_pages,
> +};
> +EXPORT_SYMBOL(dmabuf_devmem_ops);
> --
> 2.33.0
>


--
Thanks,
Mina