RE: [PATCH V2,net-next, 2/3] net: mana: Enable RX path to handle various MTU sizes

From: Haiyang Zhang
Date: Wed Apr 12 2023 - 10:39:36 EST




> -----Original Message-----
> From: Jacob Keller <jacob.e.keller@xxxxxxxxx>
> Sent: Tuesday, April 11, 2023 7:11 PM
> To: Haiyang Zhang <haiyangz@xxxxxxxxxxxxx>; linux-hyperv@xxxxxxxxxxxxxxx;
> netdev@xxxxxxxxxxxxxxx
> Cc: Dexuan Cui <decui@xxxxxxxxxxxxx>; KY Srinivasan <kys@xxxxxxxxxxxxx>;
> Paul Rosswurm <paulros@xxxxxxxxxxxxx>; olaf@xxxxxxxxx;
> vkuznets@xxxxxxxxxx; davem@xxxxxxxxxxxxx; wei.liu@xxxxxxxxxx;
> edumazet@xxxxxxxxxx; kuba@xxxxxxxxxx; pabeni@xxxxxxxxxx;
> leon@xxxxxxxxxx; Long Li <longli@xxxxxxxxxxxxx>;
> ssengar@xxxxxxxxxxxxxxxxxxx; linux-rdma@xxxxxxxxxxxxxxx;
> daniel@xxxxxxxxxxxxx; john.fastabend@xxxxxxxxx; bpf@xxxxxxxxxxxxxxx;
> ast@xxxxxxxxxx; Ajay Sharma <sharmaajay@xxxxxxxxxxxxx>;
> hawk@xxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx
> Subject: Re: [PATCH V2,net-next, 2/3] net: mana: Enable RX path to handle
> various MTU sizes
>
>
>
> On 4/7/2023 1:59 PM, Haiyang Zhang wrote:
> > Update RX data path to allocate and use RX queue DMA buffers with
> > proper size based on potentially various MTU sizes.
> >
> > Signed-off-by: Haiyang Zhang <haiyangz@xxxxxxxxxxxxx>
> >
> > ---
> > V2:
> > Refectored to multiple patches for readability. Suggested by Yunsheng Lin.
> >
> > ---
> > drivers/net/ethernet/microsoft/mana/mana_en.c | 188 +++++++++++------
> -
> > include/net/mana/mana.h | 13 +-
> > 2 files changed, 124 insertions(+), 77 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c
> b/drivers/net/ethernet/microsoft/mana/mana_en.c
> > index 112c642dc89b..e5d5dea763f2 100644
> > --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> > +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> > @@ -1185,10 +1185,10 @@ static void mana_post_pkt_rxq(struct
> mana_rxq *rxq)
> > WARN_ON_ONCE(recv_buf_oob->wqe_inf.wqe_size_in_bu != 1);
> > }
> >
> > -static struct sk_buff *mana_build_skb(void *buf_va, uint pkt_len,
> > - struct xdp_buff *xdp)
> > +static struct sk_buff *mana_build_skb(struct mana_rxq *rxq, void *buf_va,
> > + uint pkt_len, struct xdp_buff *xdp)
> > {
> > - struct sk_buff *skb = napi_build_skb(buf_va, PAGE_SIZE);
> > + struct sk_buff *skb = napi_build_skb(buf_va, rxq->alloc_size);
> >
> > if (!skb)
> > return NULL;
> > @@ -1196,11 +1196,12 @@ static struct sk_buff *mana_build_skb(void
> *buf_va, uint pkt_len,
> > if (xdp->data_hard_start) {
> > skb_reserve(skb, xdp->data - xdp->data_hard_start);
> > skb_put(skb, xdp->data_end - xdp->data);
> > - } else {
> > - skb_reserve(skb, XDP_PACKET_HEADROOM);
> > - skb_put(skb, pkt_len);
> > + return skb;
> > }
> >
> > + skb_reserve(skb, rxq->headroom);
> > + skb_put(skb, pkt_len);
> > +
> > return skb;
> > }
> >
> > @@ -1233,7 +1234,7 @@ static void mana_rx_skb(void *buf_va, struct
> mana_rxcomp_oob *cqe,
> > if (act != XDP_PASS && act != XDP_TX)
> > goto drop_xdp;
> >
> > - skb = mana_build_skb(buf_va, pkt_len, &xdp);
> > + skb = mana_build_skb(rxq, buf_va, pkt_len, &xdp);
> >
> > if (!skb)
> > goto drop;
> > @@ -1282,14 +1283,72 @@ static void mana_rx_skb(void *buf_va, struct
> mana_rxcomp_oob *cqe,
> > u64_stats_update_end(&rx_stats->syncp);
> >
> > drop:
> > - WARN_ON_ONCE(rxq->xdp_save_page);
> > - rxq->xdp_save_page = virt_to_page(buf_va);
> > + WARN_ON_ONCE(rxq->xdp_save_va);
> > + /* Save for reuse */
> > + rxq->xdp_save_va = buf_va;
> >
> > ++ndev->stats.rx_dropped;
> >
> > return;
> > }
> >
> > +static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev,
> > + dma_addr_t *da, bool is_napi)
> > +{
> > + struct page *page;
> > + void *va;
> > +
> > + /* Reuse XDP dropped page if available */
> > + if (rxq->xdp_save_va) {
> > + va = rxq->xdp_save_va;
> > + rxq->xdp_save_va = NULL;
> > + } else if (rxq->alloc_size > PAGE_SIZE) {
> > + if (is_napi)
> > + va = napi_alloc_frag(rxq->alloc_size);
> > + else
> > + va = netdev_alloc_frag(rxq->alloc_size);
> > +
> > + if (!va)
> > + return NULL;
> > + } else {
> > + page = dev_alloc_page();
> > + if (!page)
> > + return NULL;
> > +
> > + va = page_to_virt(page);
> > + }
> > +
> > + *da = dma_map_single(dev, va + rxq->headroom, rxq->datasize,
> > + DMA_FROM_DEVICE);
> > +
> > + if (dma_mapping_error(dev, *da)) {
> > + put_page(virt_to_head_page(va));
> > + return NULL;
> > + }
> > +
> > + return va;
> > +}
> > +
> > +/* Allocate frag for rx buffer, and save the old buf */
> > +static void mana_refill_rxoob(struct device *dev, struct mana_rxq *rxq,
> > + struct mana_recv_buf_oob *rxoob, void
> **old_buf)
> > +{
> > + dma_addr_t da;
> > + void *va;
> > +
> > + va = mana_get_rxfrag(rxq, dev, &da, true);
> > +
> > + if (!va)
> > + return;
> > +
> > + dma_unmap_single(dev, rxoob->sgl[0].address, rxq->datasize,
> > + DMA_FROM_DEVICE);
> > + *old_buf = rxoob->buf_va;
> > +
> > + rxoob->buf_va = va;
> > + rxoob->sgl[0].address = da;
> > +}
> > +
>
> So you're pulling out these functions from the code below, which is
> good, but it makes it hard to tell what code actually changed.
>
> > static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
> > struct gdma_comp *cqe)
> > {
> > @@ -1299,10 +1358,8 @@ static void mana_process_rx_cqe(struct
> mana_rxq *rxq, struct mana_cq *cq,
> > struct mana_recv_buf_oob *rxbuf_oob;
> > struct mana_port_context *apc;
> > struct device *dev = gc->dev;
> > - void *new_buf, *old_buf;
> > - struct page *new_page;
> > + void *old_buf = NULL;
> > u32 curr, pktlen;
> > - dma_addr_t da;
> >
> > apc = netdev_priv(ndev);
> >
> > @@ -1345,40 +1402,11 @@ static void mana_process_rx_cqe(struct
> mana_rxq *rxq, struct mana_cq *cq,
> > rxbuf_oob = &rxq->rx_oobs[curr];
> > WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1);
> >
> > - /* Reuse XDP dropped page if available */
> > - if (rxq->xdp_save_page) {
> > - new_page = rxq->xdp_save_page;
> > - rxq->xdp_save_page = NULL;
> > - } else {
> > - new_page = alloc_page(GFP_ATOMIC);
> > - }
> > -
> > - if (new_page) {
> > - da = dma_map_page(dev, new_page,
> XDP_PACKET_HEADROOM, rxq->datasize,
> > - DMA_FROM_DEVICE);
> > -
> > - if (dma_mapping_error(dev, da)) {
> > - __free_page(new_page);
> > - new_page = NULL;
> > - }
> > - }
> > -
> > - new_buf = new_page ? page_to_virt(new_page) : NULL;
> > -
> > - if (new_buf) {
> > - dma_unmap_page(dev, rxbuf_oob->buf_dma_addr, rxq-
> >datasize,
> > - DMA_FROM_DEVICE);
> > -
> > - old_buf = rxbuf_oob->buf_va;
> > -
> > - /* refresh the rxbuf_oob with the new page */
> > - rxbuf_oob->buf_va = new_buf;
> > - rxbuf_oob->buf_dma_addr = da;
> > - rxbuf_oob->sgl[0].address = rxbuf_oob->buf_dma_addr;
> > - } else {
> > - old_buf = NULL; /* drop the packet if no memory */
> > - }
>
> Could you do this split into helper functions first in a separate change
> before adding support for handling various MTU size?
>
> Doing it that way would make it much easier to review what actually
> changes in that block of code.

Will do.

Thanks,
- Haiyang