Re: [PATCH v9] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc

From: Andy Shevchenko
Date: Wed Feb 13 2019 - 13:11:21 EST


On Wed, Feb 13, 2019 at 3:27 PM Liming Sun <lsun@xxxxxxxxxxxx> wrote:
>
> This commit adds the TmFifo platform driver for Mellanox BlueField
> Soc. TmFifo is a shared FIFO which enables external host machine
> to exchange data with the SoC via USB or PCIe. The driver is based
> on virtio framework and has console and network access enabled.

Thanks for an update, my comments below.

Again, to Mellanox: guys, please, establish internal mailing list for
review and don't come with such quality of code.

Next time I would like to see Reviewed-by from Mellanox people I know,
like Vadim or Leon.

> +config MLXBF_TMFIFO
> + tristate "Mellanox BlueField SoC TmFifo platform driver"

> + depends on ARM64 && ACPI && VIRTIO_CONSOLE && VIRTIO_NET

Split this to three logical parts.

> + help
> + Say y here to enable TmFifo support. The TmFifo driver provides
> + platform driver support for the TmFifo which supports console
> + and networking based on the virtio framework.

> obj-$(CONFIG_MLXREG_HOTPLUG) += mlxreg-hotplug.o
> obj-$(CONFIG_MLXREG_IO) += mlxreg-io.o
> +obj-$(CONFIG_MLXBF_TMFIFO) += mlxbf-tmfifo.o

I would suggest to keep it sorted.

> +#define MLXBF_TMFIFO_TX_DATA 0x0

I suggest to use same fixed format for offsets.
Here, for example, 0x00 would be better.

> +#define MLXBF_TMFIFO_TX_STS__COUNT_RMASK 0x1ff
> +#define MLXBF_TMFIFO_TX_STS__COUNT_MASK 0x1ff

#include <linux/bits.h>
...
GENMASK()

> +#define MLXBF_TMFIFO_TX_CTL__LWM_RMASK 0xff
> +#define MLXBF_TMFIFO_TX_CTL__LWM_MASK 0xff

> +#define MLXBF_TMFIFO_TX_CTL__HWM_RMASK 0xff
> +#define MLXBF_TMFIFO_TX_CTL__HWM_MASK 0xff00

> +#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
> +#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK 0x1ff00000000ULL

GENMASK() / GENMASK_ULL()

> +#define MLXBF_TMFIFO_RX_STS__COUNT_RMASK 0x1ff
> +#define MLXBF_TMFIFO_RX_STS__COUNT_MASK 0x1ff

GENMASK()

> +#define MLXBF_TMFIFO_RX_CTL__LWM_RMASK 0xff
> +#define MLXBF_TMFIFO_RX_CTL__LWM_MASK 0xff

> +#define MLXBF_TMFIFO_RX_CTL__HWM_RMASK 0xff
> +#define MLXBF_TMFIFO_RX_CTL__HWM_MASK 0xff00

> +#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
> +#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK 0x1ff00000000ULL

Ditto.

> +#include <linux/acpi.h>
> +#include <linux/byteorder/generic.h>
> +#include <linux/bitfield.h>
> +#include <linux/cache.h>
> +#include <linux/device.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/efi.h>
> +#include <linux/io.h>
> +#include <linux/interrupt.h>
> +#include <linux/irq.h>
> +#include <linux/kernel.h>
> +#include <linux/math64.h>
> +#include <linux/module.h>
> +#include <linux/moduleparam.h>
> +#include <linux/mutex.h>
> +#include <linux/platform_device.h>
> +#include <linux/resource.h>
> +#include <linux/slab.h>
> +#include <linux/types.h>
> +#include <linux/version.h>
> +#include <linux/virtio.h>
> +#include <linux/virtio_config.h>
> +#include <linux/virtio_console.h>
> +#include <linux/virtio_ids.h>
> +#include <linux/virtio_net.h>
> +#include <linux/virtio_ring.h>

Do you need all of them?

> +#define MLXBF_TMFIFO_VRING_SIZE 1024

SZ_1K ?

> +/* Console Tx buffer size. */
> +#define MLXBF_TMFIFO_CONS_TX_BUF_SIZE (32 * 1024)

SZ_32K ?

> +/* House-keeping timer interval. */
> +static int mlxbf_tmfifo_timer_interval = HZ / 10;

> +/* Global lock. */

Noise. Either explain what it protects, or remove.

> +static DEFINE_MUTEX(mlxbf_tmfifo_lock);

> +/* Struct declaration. */

Noise.

> +/* Structure to maintain the ring state. */
> +struct mlxbf_tmfifo_vring {
> + void *va; /* virtual address */
> + dma_addr_t dma; /* dma address */
> + struct virtqueue *vq; /* virtqueue pointer */
> + struct vring_desc *desc; /* current desc */
> + struct vring_desc *desc_head; /* current desc head */
> + int cur_len; /* processed len in current desc */
> + int rem_len; /* remaining length to be processed */
> + int size; /* vring size */
> + int align; /* vring alignment */
> + int id; /* vring id */
> + int vdev_id; /* TMFIFO_VDEV_xxx */
> + u32 pkt_len; /* packet total length */
> + u16 next_avail; /* next avail desc id */
> + struct mlxbf_tmfifo *fifo; /* pointer back to the tmfifo */
> +};

Perhaps kernel-doc?

> +/* Interrupt types. */
> +enum {
> + MLXBF_TM_RX_LWM_IRQ, /* Rx low water mark irq */
> + MLXBF_TM_RX_HWM_IRQ, /* Rx high water mark irq */
> + MLXBF_TM_TX_LWM_IRQ, /* Tx low water mark irq */
> + MLXBF_TM_TX_HWM_IRQ, /* Tx high water mark irq */
> + MLXBF_TM_IRQ_CNT

CNT...

> +};
> +
> +/* Ring types (Rx & Tx). */
> +enum {
> + MLXBF_TMFIFO_VRING_RX, /* Rx ring */
> + MLXBF_TMFIFO_VRING_TX, /* Tx ring */
> + MLXBF_TMFIFO_VRING_NUM

...NUM

Perhaps one style for max numbers?

> +};

> +
> +/* Structure for the virtual device. */
> +struct mlxbf_tmfifo_vdev {
> + struct virtio_device vdev; /* virtual device */
> + u8 status;
> + u64 features;
> + union { /* virtio config space */
> + struct virtio_console_config cons;
> + struct virtio_net_config net;
> + } config;

Describe, which field allows to distinguish what type of the data is in a union.

> + struct mlxbf_tmfifo_vring vrings[MLXBF_TMFIFO_VRING_NUM];
> + u8 *tx_buf; /* tx buffer */
> + u32 tx_head; /* tx buffer head */
> + u32 tx_tail; /* tx buffer tail */
> +};

kernel-doc?

> +/* Structure of the interrupt information. */
> +struct mlxbf_tmfifo_irq_info {
> + struct mlxbf_tmfifo *fifo; /* tmfifo structure */
> + int irq; /* interrupt number */
> + int index; /* array index */
> +};

Ditto.

> +
> +/* Structure of the TmFifo information. */
> +struct mlxbf_tmfifo {
> + struct mlxbf_tmfifo_vdev *vdev[MLXBF_TMFIFO_VDEV_MAX]; /* devices */
> + struct platform_device *pdev; /* platform device */
> + struct mutex lock; /* fifo lock */
> + void __iomem *rx_base; /* mapped register base */
> + void __iomem *tx_base; /* mapped register base */
> + int tx_fifo_size; /* number of entries of the Tx FIFO */
> + int rx_fifo_size; /* number of entries of the Rx FIFO */
> + unsigned long pend_events; /* pending bits for deferred process */
> + struct mlxbf_tmfifo_irq_info irq_info[MLXBF_TM_IRQ_CNT]; /* irq info */
> + struct work_struct work; /* work struct for deferred process */
> + struct timer_list timer; /* keepalive timer */
> + struct mlxbf_tmfifo_vring *vring[2]; /* current Tx/Rx ring */
> + bool is_ready; /* ready flag */
> + spinlock_t spin_lock; /* spin lock */
> +};

Ditto.

> +/* Use a union struction for 64-bit little/big endian. */

What does this mean?

> +union mlxbf_tmfifo_data_64bit {
> + u64 data;
> + __le64 data_le;
> +};
> +
> +/* Message header used to demux data in the TmFifo. */
> +union mlxbf_tmfifo_msg_hdr {
> + struct {
> + u8 type; /* message type */
> + __be16 len; /* payload length */
> + u8 unused[5]; /* reserved, set to 0 */
> + } __packed;

It's already packed. No?

> + union mlxbf_tmfifo_data_64bit u; /* 64-bit data */
> +};

> +/* MTU setting of the virtio-net interface. */
> +#define MLXBF_TMFIFO_NET_MTU 1500

Don't we have this globally defined?

> +/* Supported virtio-net features. */
> +#define MLXBF_TMFIFO_NET_FEATURES ((1UL << VIRTIO_NET_F_MTU) | \
> + (1UL << VIRTIO_NET_F_STATUS) | \
> + (1UL << VIRTIO_NET_F_MAC))

BIT_UL() ?

> +/* Function declarations. */

Noise.

> +static int mlxbf_tmfifo_remove(struct platform_device *pdev);

Why do you need forward declaration for this?

> +/* Return the consumed Tx buffer space. */
> +static int mlxbf_tmfifo_vdev_tx_buf_len(struct mlxbf_tmfifo_vdev *vdev)
> +{
> + return ((vdev->tx_tail >= vdev->tx_head) ?
> + (vdev->tx_tail - vdev->tx_head) :
> + (MLXBF_TMFIFO_CONS_TX_BUF_SIZE - vdev->tx_head +
> + vdev->tx_tail));

Split this for better reading.

> +}
> +
> +/* Return the available Tx buffer space. */
> +static int mlxbf_tmfifo_vdev_tx_buf_avail(struct mlxbf_tmfifo_vdev *vdev)
> +{
> + return (MLXBF_TMFIFO_CONS_TX_BUF_RSV_SIZE -
> + mlxbf_tmfifo_vdev_tx_buf_len(vdev));

Redundant parens.
Moreover, you might consider temporary variable for better reading.

> +}
> +
> +/* Update Rx/Tx buffer index pointer. */
> +static void mlxbf_tmfifo_vdev_tx_buf_index_inc(u32 *index, u32 len)
> +{
> + *index += len;
> + if (*index >= MLXBF_TMFIFO_CONS_TX_BUF_SIZE)
> + *index -= MLXBF_TMFIFO_CONS_TX_BUF_SIZE;
> +}
> +
> +/* Allocate vrings for the fifo. */
> +static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
> + struct mlxbf_tmfifo_vdev *tm_vdev,
> + int vdev_id)
> +{
> + struct mlxbf_tmfifo_vring *vring;
> + dma_addr_t dma;
> + int i, size;
> + void *va;
> +
> + for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
> + vring = &tm_vdev->vrings[i];
> + vring->fifo = fifo;
> + vring->size = MLXBF_TMFIFO_VRING_SIZE;
> + vring->align = SMP_CACHE_BYTES;
> + vring->id = i;
> + vring->vdev_id = vdev_id;
> +

> + size = PAGE_ALIGN(vring_size(vring->size, vring->align));

Why do you need this?
dma_alloc_coherent() allocates memory on page granularity anyway.

> + va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
> + GFP_KERNEL);
> + if (!va) {

> + dev_err(tm_vdev->vdev.dev.parent,

Would be much easy if you have temporary variable for this device.

> + "dma_alloc_coherent failed\n");
> + return -ENOMEM;
> + }
> +
> + vring->va = va;
> + vring->dma = dma;
> + }
> +
> + return 0;
> +}

> +/* Interrupt handler. */
> +static irqreturn_t mlxbf_tmfifo_irq_handler(int irq, void *arg)
> +{
> + struct mlxbf_tmfifo_irq_info *irq_info;
> +
> + irq_info = (struct mlxbf_tmfifo_irq_info *)arg;

Useless casting.
Assignment can be done in definition block.

> + if (irq_info->index < MLXBF_TM_IRQ_CNT &&
> + !test_and_set_bit(irq_info->index, &irq_info->fifo->pend_events))
> + schedule_work(&irq_info->fifo->work);
> +
> + return IRQ_HANDLED;
> +}
> +
> +/* Get the next packet descriptor from the vring. */
> +static struct vring_desc *mlxbf_tmfifo_get_next_desc(struct virtqueue *vq)
> +{
> + struct mlxbf_tmfifo_vring *vring;
> + unsigned int idx, head;
> + struct vring *vr;
> +
> + vr = (struct vring *)virtqueue_get_vring(vq);

Return type is different? Is it safe to cast? Why?

> + if (!vr)
> + return NULL;

+ blank line

> + vring = (struct mlxbf_tmfifo_vring *)vq->priv;

Do you need explicit casting?

> + if (vring->next_avail == virtio16_to_cpu(vq->vdev, vr->avail->idx))
> + return NULL;

+blank line

> + idx = vring->next_avail % vr->num;
> + head = virtio16_to_cpu(vq->vdev, vr->avail->ring[idx]);
> + if (WARN_ON(head >= vr->num))
> + return NULL;
> + vring->next_avail++;
> +
> + return &vr->desc[head];
> +}
> +
> +/* Release virtio descriptor. */
> +static void mlxbf_tmfifo_release_desc(struct virtio_device *vdev,
> + struct vring *vr, struct vring_desc *desc,
> + u32 len)
> +{
> + u16 idx, vr_idx;
> +
> + vr_idx = virtio16_to_cpu(vdev, vr->used->idx);
> + idx = vr_idx % vr->num;
> + vr->used->ring[idx].id = cpu_to_virtio32(vdev, desc - vr->desc);
> + vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
> +
> + /* Virtio could poll and check the 'idx' to decide
> + * whether the desc is done or not. Add a memory
> + * barrier here to make sure the update above completes
> + * before updating the idx.
> + */

Multi-line comment style is broken.

> + mb();
> + vr->used->idx = cpu_to_virtio16(vdev, vr_idx + 1);
> +}

> +/* House-keeping timer. */
> +static void mlxbf_tmfifo_timer(struct timer_list *arg)
> +{
> + struct mlxbf_tmfifo *fifo;

> + fifo = container_of(arg, struct mlxbf_tmfifo, timer);

Can't be done in the definition block?

> + /*
> + * Wake up the work handler to poll the Rx FIFO in case interrupt
> + * missing or any leftover bytes stuck in the FIFO.
> + */
> + test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events);

How do you utilize test results?

> +
> + /*
> + * Wake up Tx handler in case virtio has queued too many packets
> + * and are waiting for buffer return.
> + */
> + test_and_set_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events);

Ditto.

> +
> + schedule_work(&fifo->work);
> +
> + mod_timer(&fifo->timer, jiffies + mlxbf_tmfifo_timer_interval);
> +}

> + /* Adjust the size to available space. */
> + if (size + sizeof(hdr) > avail * sizeof(u64))
> + size = avail * sizeof(u64) - sizeof(hdr);

Can avail be 0?

> + /* Write header. */
> + hdr.u.data = 0;
> + hdr.type = VIRTIO_ID_CONSOLE;
> + hdr.len = htons(size);
> + hdr.u.data_le = cpu_to_le64(hdr.u.data);

> + writeq(hdr.u.data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);

So, this one is not protected anyhow? Potential race condition?

> +
> + spin_lock_irqsave(&fifo->spin_lock, flags);
> +
> + while (size > 0) {
> + addr = cons->tx_buf + cons->tx_head;
> +
> + if (cons->tx_head + sizeof(u64) <=
> + MLXBF_TMFIFO_CONS_TX_BUF_SIZE) {
> + memcpy(&data, addr, sizeof(u64));
> + } else {
> + partial = MLXBF_TMFIFO_CONS_TX_BUF_SIZE - cons->tx_head;
> + memcpy(&data, addr, partial);
> + memcpy((u8 *)&data + partial, cons->tx_buf,
> + sizeof(u64) - partial);
> + }
> + writeq(data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
> +
> + if (size >= sizeof(u64)) {
> + mlxbf_tmfifo_vdev_tx_buf_index_inc(&cons->tx_head,
> + sizeof(u64));
> + size -= sizeof(u64);
> + } else {
> + mlxbf_tmfifo_vdev_tx_buf_index_inc(&cons->tx_head,
> + size);
> + size = 0;
> + }
> + }
> +
> + spin_unlock_irqrestore(&fifo->spin_lock, flags);
> +}

> + /* Rx/Tx one word (8 bytes) if not done. */
> + if (vring->cur_len != len)
> + mlxbf_tmfifo_rxtx_word(fifo, vdev, vring, desc, is_rx, avail,
> + len);

In such case better to keep it in one line.

> +/* Get the array of feature bits for this device. */
> +static u64 mlxbf_tmfifo_virtio_get_features(struct virtio_device *vdev)
> +{
> + struct mlxbf_tmfifo_vdev *tm_vdev;
> +
> + tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> + return tm_vdev->features;
> +}
> +
> +/* Confirm device features to use. */
> +static int mlxbf_tmfifo_virtio_finalize_features(struct virtio_device *vdev)
> +{
> + struct mlxbf_tmfifo_vdev *tm_vdev;
> +

> + tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);

This is candidate to be a macro

#define mlxbt_vdev_to_tmfifo(...) ...

> + tm_vdev->features = vdev->features;
> +
> + return 0;
> +}

> +/* Create vdev type in a tmfifo. */
> +static int mlxbf_tmfifo_create_vdev(struct device *dev,
> + struct mlxbf_tmfifo *fifo,
> + int vdev_id, u64 features,
> + void *config, u32 size)
> +{
> + struct mlxbf_tmfifo_vdev *tm_vdev;
> + int ret = 0;
> +
> + mutex_lock(&fifo->lock);
> +
> + tm_vdev = fifo->vdev[vdev_id];
> + if (tm_vdev) {
> + dev_err(dev, "vdev %d already exists\n", vdev_id);
> + ret = -EEXIST;
> + goto fail;
> + }
> +
> + tm_vdev = devm_kzalloc(dev, sizeof(*tm_vdev), GFP_KERNEL);
> + if (!tm_vdev) {
> + ret = -ENOMEM;
> + goto fail;
> + }
> +
> + tm_vdev->vdev.id.device = vdev_id;
> + tm_vdev->vdev.config = &mlxbf_tmfifo_virtio_config_ops;
> + tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
> + tm_vdev->features = features;
> + if (config)
> + memcpy(&tm_vdev->config, config, size);
> + if (mlxbf_tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
> + dev_err(dev, "unable to allocate vring\n");
> + ret = -ENOMEM;
> + goto fail;
> + }
> + if (vdev_id == VIRTIO_ID_CONSOLE)

> + tm_vdev->tx_buf = devm_kmalloc(dev,
> + MLXBF_TMFIFO_CONS_TX_BUF_SIZE,
> + GFP_KERNEL);

Are you sure devm_ suits here?

> + fifo->vdev[vdev_id] = tm_vdev;
> +
> + /* Register the virtio device. */
> + ret = register_virtio_device(&tm_vdev->vdev);
> + if (ret) {
> + dev_err(&fifo->pdev->dev, "register_virtio_device failed\n");
> + goto register_fail;
> + }
> +
> + mutex_unlock(&fifo->lock);
> + return 0;
> +
> +register_fail:
> + mlxbf_tmfifo_free_vrings(fifo, vdev_id);
> + fifo->vdev[vdev_id] = NULL;
> +fail:
> + mutex_unlock(&fifo->lock);
> + return ret;
> +}

> +/* Read the configured network MAC address from efi variable. */
> +static void mlxbf_tmfifo_get_cfg_mac(u8 *mac)
> +{
> + efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
> + efi_status_t status;
> + unsigned long size;
> + u8 buf[6];
> +
> + size = sizeof(buf);
> + status = efi.get_variable(mlxbf_tmfifo_efi_name, &guid, NULL, &size,
> + buf);
> + if (status == EFI_SUCCESS && size == sizeof(buf))
> + memcpy(mac, buf, sizeof(buf));
> +}

Shouldn't be rather helper in EFI lib in kernel?

> +/* Probe the TMFIFO. */
> +static int mlxbf_tmfifo_probe(struct platform_device *pdev)
> +{
> + struct virtio_net_config net_config;
> + struct resource *rx_res, *tx_res;
> + struct mlxbf_tmfifo *fifo;
> + int i, ret;
> +
> + /* Get the resource of the Rx FIFO. */
> + rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> + if (!rx_res)
> + return -ENODEV;
> +
> + /* Get the resource of the Tx FIFO. */
> + tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> + if (!tx_res)
> + return -ENODEV;
> +
> + if (!devm_request_mem_region(&pdev->dev, rx_res->start,
> + resource_size(rx_res), "bf-tmfifo"))
> + return -EBUSY;
> +
> + if (!devm_request_mem_region(&pdev->dev, tx_res->start,
> + resource_size(tx_res), "bf-tmfifo"))
> + return -EBUSY;
> +
> + fifo = devm_kzalloc(&pdev->dev, sizeof(*fifo), GFP_KERNEL);
> + if (!fifo)
> + return -ENOMEM;
> +
> + fifo->pdev = pdev;
> + platform_set_drvdata(pdev, fifo);
> +
> + spin_lock_init(&fifo->spin_lock);
> + INIT_WORK(&fifo->work, mlxbf_tmfifo_work_handler);
> +
> + timer_setup(&fifo->timer, mlxbf_tmfifo_timer, 0);
> +
> + for (i = 0; i < MLXBF_TM_IRQ_CNT; i++) {
> + fifo->irq_info[i].index = i;
> + fifo->irq_info[i].fifo = fifo;

> + fifo->irq_info[i].irq = platform_get_irq(pdev, i);


> + ret = devm_request_irq(&pdev->dev, fifo->irq_info[i].irq,
> + mlxbf_tmfifo_irq_handler, 0,
> + "tmfifo", &fifo->irq_info[i]);
> + if (ret) {
> + dev_err(&pdev->dev, "devm_request_irq failed\n");
> + fifo->irq_info[i].irq = 0;
> + return ret;
> + }
> + }
> +

> + fifo->rx_base = devm_ioremap(&pdev->dev, rx_res->start,
> + resource_size(rx_res));
> + if (!fifo->rx_base)
> + return -ENOMEM;
> +
> + fifo->tx_base = devm_ioremap(&pdev->dev, tx_res->start,
> + resource_size(tx_res));
> + if (!fifo->tx_base)
> + return -ENOMEM;

Switch to devm_ioremap_resource().
However, I think you probably need memremap().

> + mutex_init(&fifo->lock);

Isn't too late for initializing this one?


> +/* Device remove function. */
> +static int mlxbf_tmfifo_remove(struct platform_device *pdev)
> +{
> + struct mlxbf_tmfifo *fifo = platform_get_drvdata(pdev);
> +

> + if (fifo)

How is it possible to be not true?

> + mlxbf_tmfifo_cleanup(fifo);
> +

> + platform_set_drvdata(pdev, NULL);

Redundant.

> +
> + return 0;
> +}

> +MODULE_LICENSE("GPL");

Is it correct?

--
With Best Regards,
Andy Shevchenko