[RFC PATCH net-next 11/11] net: enetc: add TX support for zero-copy XDP sockets

From: Vladimir Oltean
Date: Mon Feb 06 2023 - 05:10:14 EST


Schedule NAPI by hand from enetc_xsk_wakeup(), and send frames from the
XSK TX queue from NAPI context. Add them to the completion queue from
the enetc_clean_tx_ring() procedure which is common for all kinds of
traffic.

We reuse one of the TX rings for XDP (XDP_TX/XDP_REDIRECT) for XSK as
well. They are already cropped from the TX rings that the network stack
can use when XDP is enabled (with or without AF_XDP).

As for XDP_REDIRECT calling enetc's ndo_xdp_xmit, I'm not sure if that
can run simultaneously with enetc_poll() (on different CPUs, but towards
the same TXQ). I guess it probably can, but idk what to do about it.
The problem is that enetc_xdp_xmit() sends to
priv->xdp_tx_ring[smp_processor_id()], while enetc_xsk_xmit() and XDP_TX
send to priv->xdp_tx_ring[NAPI instance]. So when the NAPI instance runs
on a different CPU that the one it is numerically equal to, we should
have a lock that serializes XDP_REDIRECT with the others.

Signed-off-by: Vladimir Oltean <vladimir.oltean@xxxxxxx>
---
drivers/net/ethernet/freescale/enetc/enetc.c | 102 ++++++++++++++++++-
drivers/net/ethernet/freescale/enetc/enetc.h | 2 +
2 files changed, 99 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 3990c006c011..bc0db788afc7 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -84,7 +84,7 @@ static void enetc_free_tx_swbd(struct enetc_bdr *tx_ring,
struct xdp_frame *xdp_frame = enetc_tx_swbd_get_xdp_frame(tx_swbd);
struct sk_buff *skb = enetc_tx_swbd_get_skb(tx_swbd);

- if (tx_swbd->dma)
+ if (!tx_swbd->is_xsk && tx_swbd->dma)
enetc_unmap_tx_buff(tx_ring, tx_swbd);

if (xdp_frame) {
@@ -817,7 +817,8 @@ static void enetc_recycle_xdp_tx_buff(struct enetc_bdr *tx_ring,
rx_ring->xdp.xdp_tx_in_flight--;
}

-static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
+static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget,
+ int *xsk_confirmed)
{
int tx_frm_cnt = 0, tx_byte_cnt = 0, tx_win_drop = 0;
struct net_device *ndev = tx_ring->ndev;
@@ -854,7 +855,9 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
tx_win_drop++;
}

- if (tx_swbd->is_xdp_tx)
+ if (tx_swbd->is_xsk)
+ (*xsk_confirmed)++;
+ else if (tx_swbd->is_xdp_tx)
enetc_recycle_xdp_tx_buff(tx_ring, tx_swbd);
else if (likely(tx_swbd->dma))
enetc_unmap_tx_buff(tx_ring, tx_swbd);
@@ -1465,6 +1468,58 @@ int enetc_xdp_xmit(struct net_device *ndev, int num_frames,
}
EXPORT_SYMBOL_GPL(enetc_xdp_xmit);

+static void enetc_xsk_map_tx_desc(struct enetc_tx_swbd *tx_swbd,
+ const struct xdp_desc *xsk_desc,
+ struct xsk_buff_pool *pool)
+{
+ dma_addr_t dma;
+
+ dma = xsk_buff_raw_get_dma(pool, xsk_desc->addr);
+ xsk_buff_raw_dma_sync_for_device(pool, dma, xsk_desc->len);
+
+ tx_swbd->dma = dma;
+ tx_swbd->len = xsk_desc->len;
+ tx_swbd->is_xsk = true;
+ tx_swbd->is_eof = true;
+}
+
+static bool enetc_xsk_xmit(struct net_device *ndev, struct xsk_buff_pool *pool,
+ u32 queue_id)
+{
+ struct enetc_ndev_priv *priv = netdev_priv(ndev);
+ struct xdp_desc *xsk_descs = pool->tx_descs;
+ struct enetc_tx_swbd tx_swbd = {0};
+ struct enetc_bdr *tx_ring;
+ u32 budget, batch;
+ int i, k;
+
+ tx_ring = priv->xdp_tx_ring[queue_id];
+
+ /* Shouldn't race with anyone because we are running in the softirq
+ * of the only CPU that sends packets to this TX ring
+ */
+ budget = min(enetc_bd_unused(tx_ring) - 1, ENETC_XSK_TX_BATCH);
+
+ batch = xsk_tx_peek_release_desc_batch(pool, budget);
+ if (!batch)
+ return true;
+
+ i = tx_ring->next_to_use;
+
+ for (k = 0; k < batch; k++) {
+ enetc_xsk_map_tx_desc(&tx_swbd, &xsk_descs[k], pool);
+ enetc_xdp_map_tx_buff(tx_ring, i, &tx_swbd, tx_swbd.len);
+ enetc_bdr_idx_inc(tx_ring, &i);
+ }
+
+ tx_ring->next_to_use = i;
+
+ xsk_tx_release(pool);
+ enetc_update_tx_ring_tail(tx_ring);
+
+ return budget != batch;
+}
+
static void enetc_map_rx_buff_to_xdp(struct enetc_bdr *rx_ring, int i,
struct xdp_buff *xdp_buff, u16 size)
{
@@ -1881,6 +1936,7 @@ static int enetc_poll(struct napi_struct *napi, int budget)
struct enetc_bdr *rx_ring = &v->rx_ring;
struct xsk_buff_pool *pool;
struct bpf_prog *prog;
+ int xsk_confirmed = 0;
bool complete = true;
int work_done;
int i;
@@ -1888,7 +1944,8 @@ static int enetc_poll(struct napi_struct *napi, int budget)
enetc_lock_mdio();

for (i = 0; i < v->count_tx_rings; i++)
- if (!enetc_clean_tx_ring(&v->tx_ring[i], budget))
+ if (!enetc_clean_tx_ring(&v->tx_ring[i], budget,
+ &xsk_confirmed))
complete = false;

prog = rx_ring->xdp.prog;
@@ -1901,6 +1958,17 @@ static int enetc_poll(struct napi_struct *napi, int budget)
else
work_done = enetc_clean_rx_ring(rx_ring, napi, budget);

+ if (pool) {
+ if (xsk_confirmed)
+ xsk_tx_completed(pool, xsk_confirmed);
+
+ if (xsk_uses_need_wakeup(pool))
+ xsk_set_tx_need_wakeup(pool);
+
+ if (!enetc_xsk_xmit(rx_ring->ndev, pool, rx_ring->index))
+ complete = false;
+ }
+
if (work_done == budget)
complete = false;
if (work_done)
@@ -3122,7 +3190,31 @@ static int enetc_setup_xsk_pool(struct net_device *ndev,

int enetc_xsk_wakeup(struct net_device *ndev, u32 queue_id, u32 flags)
{
- /* xp_assign_dev() wants this; nothing needed for RX */
+ struct enetc_ndev_priv *priv = netdev_priv(ndev);
+ struct enetc_int_vector *v;
+ struct enetc_bdr *rx_ring;
+
+ if (!netif_running(ndev) || !netif_carrier_ok(ndev))
+ return -ENETDOWN;
+
+ if (queue_id >= priv->bdr_int_num)
+ return -ERANGE;
+
+ v = priv->int_vector[queue_id];
+ rx_ring = &v->rx_ring;
+
+ if (!rx_ring->xdp.xsk_pool || !rx_ring->xdp.prog)
+ return -EINVAL;
+
+ /* No way to kick TX by triggering a hardirq right away =>
+ * raise softirq. This might schedule NAPI on a CPU different than the
+ * smp_affinity of its IRQ would suggest, but that would happen anyway
+ * if, say, we change that affinity under heavy traffic.
+ * So enetc_poll() has to be prepared for it anyway.
+ */
+ if (!napi_if_scheduled_mark_missed(&v->napi))
+ napi_schedule(&v->napi);
+
return 0;
}

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index e1a746e37c9a..403f40473b52 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -36,6 +36,7 @@ struct enetc_tx_swbd {
u8 is_eof:1;
u8 is_xdp_tx:1;
u8 is_xdp_redirect:1;
+ u8 is_xsk:1;
u8 qbv_en:1;
};

@@ -86,6 +87,7 @@ struct enetc_xdp_data {
#define ENETC_RX_RING_DEFAULT_SIZE 2048
#define ENETC_TX_RING_DEFAULT_SIZE 2048
#define ENETC_DEFAULT_TX_WORK (ENETC_TX_RING_DEFAULT_SIZE / 2)
+#define ENETC_XSK_TX_BATCH ENETC_DEFAULT_TX_WORK

struct enetc_bdr_resource {
/* Input arguments saved for teardown */
--
2.34.1