Re: [RFC PATCH v3 04/12] netdev: support binding dma-buf to netdevice

From: Paolo Abeni
Date: Thu Nov 09 2023 - 03:30:49 EST


On Sun, 2023-11-05 at 18:44 -0800, Mina Almasry wrote:
[...]
> +int netdev_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
> + struct netdev_dmabuf_binding **out)
> +{
> + struct netdev_dmabuf_binding *binding;
> + struct scatterlist *sg;
> + struct dma_buf *dmabuf;
> + unsigned int sg_idx, i;
> + unsigned long virtual;
> + int err;
> +
> + if (!capable(CAP_NET_ADMIN))
> + return -EPERM;
> +
> + dmabuf = dma_buf_get(dmabuf_fd);
> + if (IS_ERR_OR_NULL(dmabuf))
> + return -EBADFD;
> +
> + binding = kzalloc_node(sizeof(*binding), GFP_KERNEL,
> + dev_to_node(&dev->dev));
> + if (!binding) {
> + err = -ENOMEM;
> + goto err_put_dmabuf;
> + }
> +
> + xa_init_flags(&binding->bound_rxq_list, XA_FLAGS_ALLOC);
> +
> + refcount_set(&binding->ref, 1);
> +
> + binding->dmabuf = dmabuf;
> +
> + binding->attachment = dma_buf_attach(binding->dmabuf, dev->dev.parent);
> + if (IS_ERR(binding->attachment)) {
> + err = PTR_ERR(binding->attachment);
> + goto err_free_binding;
> + }
> +
> + binding->sgt = dma_buf_map_attachment(binding->attachment,
> + DMA_BIDIRECTIONAL);
> + if (IS_ERR(binding->sgt)) {
> + err = PTR_ERR(binding->sgt);
> + goto err_detach;
> + }
> +
> + /* For simplicity we expect to make PAGE_SIZE allocations, but the
> + * binding can be much more flexible than that. We may be able to
> + * allocate MTU sized chunks here. Leave that for future work...
> + */
> + binding->chunk_pool = gen_pool_create(PAGE_SHIFT,
> + dev_to_node(&dev->dev));
> + if (!binding->chunk_pool) {
> + err = -ENOMEM;
> + goto err_unmap;
> + }
> +
> + virtual = 0;
> + for_each_sgtable_dma_sg(binding->sgt, sg, sg_idx) {
> + dma_addr_t dma_addr = sg_dma_address(sg);
> + struct dmabuf_genpool_chunk_owner *owner;
> + size_t len = sg_dma_len(sg);
> + struct page_pool_iov *ppiov;
> +
> + owner = kzalloc_node(sizeof(*owner), GFP_KERNEL,
> + dev_to_node(&dev->dev));
> + owner->base_virtual = virtual;
> + owner->base_dma_addr = dma_addr;
> + owner->num_ppiovs = len / PAGE_SIZE;
> + owner->binding = binding;
> +
> + err = gen_pool_add_owner(binding->chunk_pool, dma_addr,
> + dma_addr, len, dev_to_node(&dev->dev),
> + owner);
> + if (err) {
> + err = -EINVAL;
> + goto err_free_chunks;
> + }
> +
> + owner->ppiovs = kvmalloc_array(owner->num_ppiovs,
> + sizeof(*owner->ppiovs),
> + GFP_KERNEL);
> + if (!owner->ppiovs) {
> + err = -ENOMEM;
> + goto err_free_chunks;
> + }
> +
> + for (i = 0; i < owner->num_ppiovs; i++) {
> + ppiov = &owner->ppiovs[i];
> + ppiov->owner = owner;
> + refcount_set(&ppiov->refcount, 1);
> + }
> +
> + dma_addr += len;

I'm trying to wrap my head around the whole infra... the above line is
confusing. Why do you increment dma_addr? it will be re-initialized in
the next iteration.

Cheers,

Paolo