Re: [RFC PATCH v3] can: virtio: Initial virtio CAN driver.

From: Harald Mommer
Date: Fri May 12 2023 - 08:30:02 EST


On 11.05.23 17:40, Simon Horman wrote:
On Thu, May 11, 2023 at 05:14:44PM +0200, Mikhail Golubev-Ciuchea wrote:
From: Harald Mommer<harald.mommer@xxxxxxxxxxxxxxx>

- CAN Control

- "ip link set up can0" starts the virtual CAN controller,
- "ip link set up can0" stops the virtual CAN controller

- CAN RX

Receive CAN frames. CAN frames can be standard or extended, classic or
CAN FD. Classic CAN RTR frames are supported.

- CAN TX

Send CAN frames. CAN frames can be standard or extended, classic or
CAN FD. Classic CAN RTR frames are supported.

- CAN BusOff indication

CAN BusOff is handled by a bit in the configuration space.

Signed-off-by: Harald Mommer<Harald.Mommer@xxxxxxxxxxxxxxx>
Signed-off-by: Mikhail Golubev-Ciuchea<Mikhail.Golubev-Ciuchea@xxxxxxxxxxxxxxx>
Co-developed-by: Marc Kleine-Budde<mkl@xxxxxxxxxxxxxx>
Signed-off-by: Marc Kleine-Budde<mkl@xxxxxxxxxxxxxx>
Cc: Damir Shaikhutdinov<Damir.Shaikhutdinov@xxxxxxxxxxxxxxx>
Hi Mikhail,

thanks for your patch.
Some minor feedback from my side.

...

diff --git a/drivers/net/can/virtio_can.c b/drivers/net/can/virtio_can.c
...

+/* Send a control message with message type either
+ *
+ * - VIRTIO_CAN_SET_CTRL_MODE_START or
+ * - VIRTIO_CAN_SET_CTRL_MODE_STOP.
+ *
+ * Unlike AUTOSAR CAN Driver Can_SetControllerMode() there is no requirement
+ * for this Linux driver to have an asynchronous implementation of the mode
+ * setting function so in order to keep things simple the function is
+ * implemented as synchronous function. Design pattern is
+ * virtio_console.c/__send_control_msg() & virtio_net.c/virtnet_send_command().
+ */
+static u8 virtio_can_send_ctrl_msg(struct net_device *ndev, u16 msg_type)
+{
+ struct virtio_can_priv *priv = netdev_priv(ndev);
+ struct device *dev = &priv->vdev->dev;
+ struct virtqueue *vq = priv->vqs[VIRTIO_CAN_QUEUE_CONTROL];
+ struct scatterlist sg_out[1];
+ struct scatterlist sg_in[1];
+ struct scatterlist *sgs[2];
+ int err;
+ unsigned int len;
nit: For networking code please arrange local variables in reverse xmas
tree order - longest line to shortest.

You can check this using:https://github.com/ecree-solarflare/xmastree

In this case I think it would be:

struct virtio_can_priv *priv = netdev_priv(ndev);
struct device *dev = &priv->vdev->dev;
struct scatterlist sg_out[1];
struct scatterlist sg_in[1];
struct scatterlist *sgs[2];
struct virtqueue *vq;
unsigned int len;
int err;

vq = priv->vqs[VIRTIO_CAN_QUEUE_CONTROL];

...

https://docs.kernel.org/process/maintainer-netdev.html

I see, I see...

+static netdev_tx_t virtio_can_start_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct virtio_can_priv *priv = netdev_priv(dev);
+ struct canfd_frame *cf = (struct canfd_frame *)skb->data;
+ struct virtio_can_tx *can_tx_msg;
+ struct virtqueue *vq = priv->vqs[VIRTIO_CAN_QUEUE_TX];
+ struct scatterlist sg_out[1];
+ struct scatterlist sg_in[1];
+ struct scatterlist *sgs[2];
+ unsigned long flags;
+ u32 can_flags;
+ int err;
+ int putidx;
+ netdev_tx_t xmit_ret = NETDEV_TX_OK;
+ const unsigned int hdr_size = offsetof(struct virtio_can_tx_out, sdu);
+
+ if (can_dev_dropped_skb(dev, skb))
+ goto kick; /* No way to return NET_XMIT_DROP here */
+
+ /* No local check for CAN_RTR_FLAG or FD frame against negotiated
+ * features. The device will reject those anyway if not supported.
+ */
+
+ can_tx_msg = kzalloc(sizeof(*can_tx_msg), GFP_ATOMIC);
+ if (!can_tx_msg)
+ goto kick; /* No way to return NET_XMIT_DROP here */
+
+ can_tx_msg->tx_out.msg_type = cpu_to_le16(VIRTIO_CAN_TX);
+ can_flags = 0;
+
+ if (cf->can_id & CAN_EFF_FLAG) {
+ can_flags |= VIRTIO_CAN_FLAGS_EXTENDED;
+ can_tx_msg->tx_out.can_id = cpu_to_le32(cf->can_id & CAN_EFF_MASK);
+ } else {
+ can_tx_msg->tx_out.can_id = cpu_to_le32(cf->can_id & CAN_SFF_MASK);
+ }
+ if (cf->can_id & CAN_RTR_FLAG)
+ can_flags |= VIRTIO_CAN_FLAGS_RTR;
+ else
+ memcpy(can_tx_msg->tx_out.sdu, cf->data, cf->len);
+ if (can_is_canfd_skb(skb))
+ can_flags |= VIRTIO_CAN_FLAGS_FD;
+
+ can_tx_msg->tx_out.flags = cpu_to_le32(can_flags);
+ can_tx_msg->tx_out.length = cpu_to_le16(cf->len);
+
+ /* Prepare sending of virtio message */
+ sg_init_one(&sg_out[0], &can_tx_msg->tx_out, hdr_size + cf->len);
+ sg_init_one(&sg_in[0], &can_tx_msg->tx_in, sizeof(can_tx_msg->tx_in));
+ sgs[0] = sg_out;
+ sgs[1] = sg_in;
+
+ putidx = virtio_can_alloc_tx_idx(priv);
+
+ if (unlikely(putidx < 0)) {
+ netif_stop_queue(dev);
+ kfree(can_tx_msg);
+ netdev_warn(dev, "TX: Stop queue, no putidx available\n");
If I understand things correctly, this code is on the datapath.
So perhaps these should be rate limited, or only logged once.
Likewise elsewhere in this function.

This block is almost a never. Like all blocks returning NETDEV_TX_BUSY.

virtio_can_alloc_tx_idx() may return on error -ENOSPC or -ENOMEM.

The queue is stopped under "/* Normal queue stop when no transmission slots are left */" before -ENOSPC is seen here. When -ENOMEM is returned we have more severe problems as some trace, considered as a theoretical issue only. So if the trace is seen most likely flow control got broken, needs to be fixed and I won't want to miss this.

+ xmit_ret = NETDEV_TX_BUSY;
+ goto kick;
+ }
+
+ can_tx_msg->putidx = (unsigned int)putidx;
+
+ /* Protect list operation */
+ spin_lock_irqsave(&priv->tx_lock, flags);
+ list_add_tail(&can_tx_msg->list, &priv->tx_list);
+ spin_unlock_irqrestore(&priv->tx_lock, flags);
+
+ /* Push loopback echo. Will be looped back on TX interrupt/TX NAPI */
+ can_put_echo_skb(skb, dev, can_tx_msg->putidx, 0);
+
+ /* Protect queue and list operations */
+ spin_lock_irqsave(&priv->tx_lock, flags);
+ err = virtqueue_add_sgs(vq, sgs, 1u, 1u, can_tx_msg, GFP_ATOMIC);
+ if (err != 0) { /* unlikely when vq->num_free was considered */
+ list_del(&can_tx_msg->list);
+ can_free_echo_skb(dev, can_tx_msg->putidx, NULL);
+ virtio_can_free_tx_idx(priv, can_tx_msg->putidx);
+ spin_unlock_irqrestore(&priv->tx_lock, flags);
+ netif_stop_queue(dev);
+ kfree(can_tx_msg);
+ if (err == -ENOSPC)
+ netdev_dbg(dev, "TX: Stop queue, no space left\n");
+ else
+ netdev_warn(dev, "TX: Stop queue, reason = %d\n", err);
+ xmit_ret = NETDEV_TX_BUSY;
+ goto kick;
+ }
Below is the "normal flow control" block which is likely to be entered under load. Everything else returning NETDEV_TX_BUSY is "never" or at least "almost never".
+ /* Normal queue stop when no transmission slots are left */
+ if (atomic_read(&priv->tx_inflight) >= priv->can.echo_skb_max ||
+ vq->num_free == 0 || (vq->num_free < 2 &&
+ !virtio_has_feature(vq->vdev, VIRTIO_RING_F_INDIRECT_DESC))) {
+ netif_stop_queue(dev);
+ netdev_dbg(dev, "TX: Normal stop queue\n");
+ }
+
+ spin_unlock_irqrestore(&priv->tx_lock, flags);
+
+kick:
+ if (netif_queue_stopped(dev) || !netdev_xmit_more()) {
+ if (!virtqueue_kick(vq))
+ netdev_err(dev, "%s(): Kick failed\n", __func__);
+ }
+
+ return xmit_ret;
+}
...