Re: [net-next v1 02/16] net: page_pool: create hooks for custom page providers

From: Ilias Apalodimas
Date: Tue Dec 12 2023 - 03:08:06 EST


Hi Mina,

Apologies for not participating in the party earlier.

On Fri, 8 Dec 2023 at 02:52, Mina Almasry <almasrymina@xxxxxxxxxx> wrote:
>
> From: Jakub Kicinski <kuba@xxxxxxxxxx>
>
> The page providers which try to reuse the same pages will
> need to hold onto the ref, even if page gets released from
> the pool - as in releasing the page from the pp just transfers
> the "ownership" reference from pp to the provider, and provider
> will wait for other references to be gone before feeding this
> page back into the pool.
>
> Signed-off-by: Jakub Kicinski <kuba@xxxxxxxxxx>
> Signed-off-by: Mina Almasry <almasrymina@xxxxxxxxxx>
>
> ---
>
> This is implemented by Jakub in his RFC:
> https://lore.kernel.org/netdev/f8270765-a27b-6ccf-33ea-cda097168d79@xxxxxxxxxx/T/
>
> I take no credit for the idea or implementation; I only added minor
> edits to make this workable with device memory TCP, and removed some
> hacky test code. This is a critical dependency of device memory TCP
> and thus I'm pulling it into this series to make it revewable and
> mergable.
>
> RFC v3 -> v1
> - Removed unusued mem_provider. (Yunsheng).
> - Replaced memory_provider & mp_priv with netdev_rx_queue (Jakub).
>
> ---
> include/net/page_pool/types.h | 12 ++++++++++
> net/core/page_pool.c | 43 +++++++++++++++++++++++++++++++----
> 2 files changed, 50 insertions(+), 5 deletions(-)
>
> diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
> index ac286ea8ce2d..0e9fa79a5ef1 100644
> --- a/include/net/page_pool/types.h
> +++ b/include/net/page_pool/types.h
> @@ -51,6 +51,7 @@ struct pp_alloc_cache {
> * @dev: device, for DMA pre-mapping purposes
> * @netdev: netdev this pool will serve (leave as NULL if none or multiple)
> * @napi: NAPI which is the sole consumer of pages, otherwise NULL
> + * @queue: struct netdev_rx_queue this page_pool is being created for.
> * @dma_dir: DMA mapping direction
> * @max_len: max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV
> * @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV
> @@ -63,6 +64,7 @@ struct page_pool_params {
> int nid;
> struct device *dev;
> struct napi_struct *napi;
> + struct netdev_rx_queue *queue;
> enum dma_data_direction dma_dir;
> unsigned int max_len;
> unsigned int offset;
> @@ -125,6 +127,13 @@ struct page_pool_stats {
> };
> #endif
>
> +struct memory_provider_ops {
> + int (*init)(struct page_pool *pool);
> + void (*destroy)(struct page_pool *pool);
> + struct page *(*alloc_pages)(struct page_pool *pool, gfp_t gfp);
> + bool (*release_page)(struct page_pool *pool, struct page *page);
> +};
> +
> struct page_pool {
> struct page_pool_params_fast p;
>
> @@ -174,6 +183,9 @@ struct page_pool {
> */
> struct ptr_ring ring;
>
> + void *mp_priv;
> + const struct memory_provider_ops *mp_ops;
> +
> #ifdef CONFIG_PAGE_POOL_STATS
> /* recycle stats are per-cpu to avoid locking */
> struct page_pool_recycle_stats __percpu *recycle_stats;
> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> index ca1b3b65c9b5..f5c84d2a4510 100644
> --- a/net/core/page_pool.c
> +++ b/net/core/page_pool.c
> @@ -25,6 +25,8 @@
>
> #include "page_pool_priv.h"
>
> +static DEFINE_STATIC_KEY_FALSE(page_pool_mem_providers);

We could add the existing page pool mechanisms as another 'provider',
but I assume this is coded like this for performance reasons (IOW skip
the expensive ptr call for the default case?)

> +
> #define DEFER_TIME (msecs_to_jiffies(1000))
> #define DEFER_WARN_INTERVAL (60 * HZ)
>
> @@ -174,6 +176,7 @@ static int page_pool_init(struct page_pool *pool,
> const struct page_pool_params *params)
> {
> unsigned int ring_qsize = 1024; /* Default */
> + int err;
>
> memcpy(&pool->p, &params->fast, sizeof(pool->p));
> memcpy(&pool->slow, &params->slow, sizeof(pool->slow));
> @@ -234,10 +237,25 @@ static int page_pool_init(struct page_pool *pool,
> /* Driver calling page_pool_create() also call page_pool_destroy() */
> refcount_set(&pool->user_cnt, 1);
>
> + if (pool->mp_ops) {
> + err = pool->mp_ops->init(pool);
> + if (err) {
> + pr_warn("%s() mem-provider init failed %d\n",
> + __func__, err);
> + goto free_ptr_ring;
> + }
> +
> + static_branch_inc(&page_pool_mem_providers);
> + }
> +
> if (pool->p.flags & PP_FLAG_DMA_MAP)
> get_device(pool->p.dev);
>
> return 0;
> +
> +free_ptr_ring:
> + ptr_ring_cleanup(&pool->ring, NULL);
> + return err;
> }
>
> static void page_pool_uninit(struct page_pool *pool)
> @@ -519,7 +537,10 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
> return page;
>
> /* Slow-path: cache empty, do real allocation */
> - page = __page_pool_alloc_pages_slow(pool, gfp);
> + if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)

Why do we need && pool->mp_ops? On the init function, we only bump
page_pool_mem_providers if the ops are there

> + page = pool->mp_ops->alloc_pages(pool, gfp);
> + else
> + page = __page_pool_alloc_pages_slow(pool, gfp);
> return page;
> }
> EXPORT_SYMBOL(page_pool_alloc_pages);
> @@ -576,10 +597,13 @@ void __page_pool_release_page_dma(struct page_pool *pool, struct page *page)
> void page_pool_return_page(struct page_pool *pool, struct page *page)
> {
> int count;
> + bool put;
>
> - __page_pool_release_page_dma(pool, page);
> -
> - page_pool_clear_pp_info(page);
> + put = true;
> + if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)

ditto

> + put = pool->mp_ops->release_page(pool, page);
> + else
> + __page_pool_release_page_dma(pool, page);
>
> /* This may be the last page returned, releasing the pool, so
> * it is not safe to reference pool afterwards.
> @@ -587,7 +611,10 @@ void page_pool_return_page(struct page_pool *pool, struct page *page)
> count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt);
> trace_page_pool_state_release(pool, page, count);
>
> - put_page(page);
> + if (put) {
> + page_pool_clear_pp_info(page);
> + put_page(page);
> + }
> /* An optimization would be to call __free_pages(page, pool->p.order)
> * knowing page is not part of page-cache (thus avoiding a
> * __page_cache_release() call).
> @@ -857,6 +884,12 @@ static void __page_pool_destroy(struct page_pool *pool)
>
> page_pool_unlist(pool);
> page_pool_uninit(pool);
> +
> + if (pool->mp_ops) {

Same here. Using a mix of pool->mp_ops and page_pool_mem_providers
will work, but since we always check the ptr on init, can't we simply
rely on page_pool_mem_providers for the rest of the code?

Thanks
/Ilias
> + pool->mp_ops->destroy(pool);
> + static_branch_dec(&page_pool_mem_providers);
> + }
> +
> kfree(pool);
> }
>
> --
> 2.43.0.472.g3155946c3a-goog
>