Re: [PATCH v3] Drop Tx network packet when Tx TmFIFO is full

From: Hans de Goede
Date: Mon Jan 22 2024 - 06:35:28 EST


Hi,

On 1/11/24 18:31, Liming Sun wrote:
> Starting from Linux 5.16 kernel, Tx timeout mechanism was added
> in the virtio_net driver which prints the "Tx timeout" warning
> message when a packet stays in Tx queue for too long. Below is an
> example of the reported message:
>
> "[494105.316739] virtio_net virtio1 tmfifo_net0: TX timeout on
> queue: 0, sq: output.0, vq: 0×1, name: output.0, usecs since
> last trans: 3079892256".
>
> This issue could happen when external host driver which drains the
> FIFO is restared, stopped or upgraded. To avoid such confusing
> "Tx timeout" messages, this commit adds logic to drop the outstanding
> Tx packet if it's not able to transmit in two seconds due to Tx FIFO
> full, which can be considered as congestion or out-of-resource drop.
>
> This commit also handles the special case that the packet is half-
> transmitted into the Tx FIFO. In such case, the packet is discarded
> with remaining length stored in vring->rem_padding. So paddings with
> zeros can be sent out when Tx space is available to maintain the
> integrity of the packet format. The padded packet will be dropped on
> the receiving side.
>
> Signed-off-by: Liming Sun <limings@xxxxxxxxxx>

Thank you for your patch/series, I've applied this patch
(series) to my review-hans branch:
https://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git/log/?h=review-hans

Note it will show up in the pdx86 review-hans branch once I've
pushed my local branch there, which might take a while.

I will include this patch in my next fixes pull-req to Linus
for the current kernel development cycle.

Regards,

Hans




> ---
> v2->v3:
> Updates for Ilpo's comments:
> - Revises commit message to avoid confusion.
> v2: Fixed formatting warning
> v1: Initial version
> ---
> drivers/platform/mellanox/mlxbf-tmfifo.c | 67 ++++++++++++++++++++++++
> 1 file changed, 67 insertions(+)
>
> diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
> index 5c683b4eaf10..f39b7b9d2bfe 100644
> --- a/drivers/platform/mellanox/mlxbf-tmfifo.c
> +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
> @@ -47,6 +47,9 @@
> /* Message with data needs at least two words (for header & data). */
> #define MLXBF_TMFIFO_DATA_MIN_WORDS 2
>
> +/* Tx timeout in milliseconds. */
> +#define TMFIFO_TX_TIMEOUT 2000
> +
> /* ACPI UID for BlueField-3. */
> #define TMFIFO_BF3_UID 1
>
> @@ -62,12 +65,14 @@ struct mlxbf_tmfifo;
> * @drop_desc: dummy desc for packet dropping
> * @cur_len: processed length of the current descriptor
> * @rem_len: remaining length of the pending packet
> + * @rem_padding: remaining bytes to send as paddings
> * @pkt_len: total length of the pending packet
> * @next_avail: next avail descriptor id
> * @num: vring size (number of descriptors)
> * @align: vring alignment size
> * @index: vring index
> * @vdev_id: vring virtio id (VIRTIO_ID_xxx)
> + * @tx_timeout: expire time of last tx packet
> * @fifo: pointer to the tmfifo structure
> */
> struct mlxbf_tmfifo_vring {
> @@ -79,12 +84,14 @@ struct mlxbf_tmfifo_vring {
> struct vring_desc drop_desc;
> int cur_len;
> int rem_len;
> + int rem_padding;
> u32 pkt_len;
> u16 next_avail;
> int num;
> int align;
> int index;
> int vdev_id;
> + unsigned long tx_timeout;
> struct mlxbf_tmfifo *fifo;
> };
>
> @@ -819,6 +826,50 @@ static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring,
> return true;
> }
>
> +static void mlxbf_tmfifo_check_tx_timeout(struct mlxbf_tmfifo_vring *vring)
> +{
> + unsigned long flags;
> +
> + /* Only handle Tx timeout for network vdev. */
> + if (vring->vdev_id != VIRTIO_ID_NET)
> + return;
> +
> + /* Initialize the timeout or return if not expired. */
> + if (!vring->tx_timeout) {
> + /* Initialize the timeout. */
> + vring->tx_timeout = jiffies +
> + msecs_to_jiffies(TMFIFO_TX_TIMEOUT);
> + return;
> + } else if (time_before(jiffies, vring->tx_timeout)) {
> + /* Return if not timeout yet. */
> + return;
> + }
> +
> + /*
> + * Drop the packet after timeout. The outstanding packet is
> + * released and the remaining bytes will be sent with padding byte 0x00
> + * as a recovery. On the peer(host) side, the padding bytes 0x00 will be
> + * either dropped directly, or appended into existing outstanding packet
> + * thus dropped as corrupted network packet.
> + */
> + vring->rem_padding = round_up(vring->rem_len, sizeof(u64));
> + mlxbf_tmfifo_release_pkt(vring);
> + vring->cur_len = 0;
> + vring->rem_len = 0;
> + vring->fifo->vring[0] = NULL;
> +
> + /*
> + * Make sure the load/store are in order before
> + * returning back to virtio.
> + */
> + virtio_mb(false);
> +
> + /* Notify upper layer. */
> + spin_lock_irqsave(&vring->fifo->spin_lock[0], flags);
> + vring_interrupt(0, vring->vq);
> + spin_unlock_irqrestore(&vring->fifo->spin_lock[0], flags);
> +}
> +
> /* Rx & Tx processing of a queue. */
> static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
> {
> @@ -841,6 +892,7 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
> return;
>
> do {
> +retry:
> /* Get available FIFO space. */
> if (avail == 0) {
> if (is_rx)
> @@ -851,6 +903,17 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
> break;
> }
>
> + /* Insert paddings for discarded Tx packet. */
> + if (!is_rx) {
> + vring->tx_timeout = 0;
> + while (vring->rem_padding >= sizeof(u64)) {
> + writeq(0, vring->fifo->tx.data);
> + vring->rem_padding -= sizeof(u64);
> + if (--avail == 0)
> + goto retry;
> + }
> + }
> +
> /* Console output always comes from the Tx buffer. */
> if (!is_rx && devid == VIRTIO_ID_CONSOLE) {
> mlxbf_tmfifo_console_tx(fifo, avail);
> @@ -860,6 +923,10 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
> /* Handle one descriptor. */
> more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail);
> } while (more);
> +
> + /* Check Tx timeout. */
> + if (avail <= 0 && !is_rx)
> + mlxbf_tmfifo_check_tx_timeout(vring);
> }
>
> /* Handle Rx or Tx queues. */