Re: [PATCH v4 1/1] dmaengine: switchtec-dma: Introduce Switchtec DMA engine PCI driver

From: Christoph Hellwig
Date: Wed May 03 2023 - 02:31:45 EST


On Sun, Apr 23, 2023 at 02:37:17PM -0700, Kelvin Cao wrote:
> Implement core PCI driver skeleton and register DMA engine callbacks.

I only noticed this now, but this sentence reads a bit odd. What does
it try to say?

> +struct chan_fw_regs {
> + u32 valid_en_se;

...

> + u16 cq_phase;
> +} __packed;

Everything here seems nicely naturally aligned, what is the reason
for the __packed attribute?

> +struct switchtec_dma_hw_se_desc {
> + u8 opc;
> + u8 ctrl;
> + __le16 tlp_setting;
> + __le16 rsvd1;
> + __le16 cid;
> + __le32 byte_cnt;
> + union {
> + __le32 saddr_lo;
> + __le32 widata_lo;
> + };
> + union {
> + __le32 saddr_hi;
> + __le32 widata_hi;
> + };

What is the point for unions of identical data types?

> + p = (int *)ce;
> + for (i = 0; i < sizeof(*ce)/4; i++) {
> + dev_err(chan_dev, "CE DW%d: 0x%08x\n", i,
> + le32_to_cpu((__force __le32)*p));
> + p++;
> + }

Why is this casting to an int that is never used and the back to CE?
Maybe a function that actually dumps the registers with names and
is type safe might be a better idea? If not just using
print_hex_dump would be a simpler, although that would always printk
in little endian representation (which might be easier to read anyway).

> + struct pci_dev *pdev;
> + struct switchtec_dma_chan *swdma_chan = to_switchtec_dma_chan(chan);
> + int rc;
> +
> + rcu_read_lock();
> + pdev = rcu_dereference(swdma_chan->swdma_dev->pdev);
> + rcu_read_unlock();
> +
> + if (pdev)
> + synchronize_irq(swdma_chan->irq);

At this point pdev might be freed as you're outside the RCU critical
section, and the irq number could have been reused.

> + switch (type) {
> + case MEMCPY:
> + if (len > SWITCHTEC_DESC_MAX_SIZE)
> + goto err_unlock;
> + break;
> + case WIMM:
> + if (len != 8)
> + break;
> +
> + if (dma_dst & ((1 << DMAENGINE_ALIGN_8_BYTES) - 1)) {
> + dev_err(chan_dev,
> + "QW WIMM dst addr 0x%08x_%08x not QW aligned!\n",
> + upper_32_bits(dma_dst), lower_32_bits(dma_dst));
> + goto err_unlock;
> + }
> + break;
> + default:
> + goto err_unlock;
> + }

IT looks like these checks could easily be done without the lock,
and in the respective callers.

> + if (type == MEMCPY) {
> + desc->hw->opc = SWITCHTEC_DMA_OPC_MEMCPY;
> + desc->hw->saddr_lo = cpu_to_le32(lower_32_bits(dma_src));
> + desc->hw->saddr_hi = cpu_to_le32(upper_32_bits(dma_src));
> + } else {
> + desc->hw->opc = SWITCHTEC_DMA_OPC_WRIMM;
> + desc->hw->widata_lo = cpu_to_le32(lower_32_bits(data));
> + desc->hw->widata_hi = cpu_to_le32(upper_32_bits(data));
> + }

... and then instead of the type I'd just pass the opcode directly,
simplifying the logic quite a bit.

> +static irqreturn_t switchtec_dma_isr(int irq, void *chan)
> +{
> + struct switchtec_dma_chan *swdma_chan = chan;
> +
> + if (swdma_chan->comp_ring_active)
> + tasklet_schedule(&swdma_chan->desc_task);
> +
> + return IRQ_HANDLED;
> +}
> +
> +static irqreturn_t switchtec_dma_chan_status_isr(int irq, void *dma)
> +{
> + struct switchtec_dma_dev *swdma_dev = dma;
> +
> + tasklet_schedule(&swdma_dev->chan_status_task);
> +
> + return IRQ_HANDLED;
> +}

Same comment as last time - irq + tasklet seems quite hack and
inefficient over just using threaded interrupts. I'd like to see
a really good rationale for using it, and a Cc to the interrupt
maintainers that would love to kill off tasklets

> + addr = swdma_dev->bar + SWITCHTEC_DMAC_CHAN_CFG_STS_OFFSET;
> + addr += i * SWITCHTEC_DMA_CHAN_FW_REGS_SIZE;
> + chan_fw = (struct chan_fw_regs __iomem *)addr;
> +
> + addr = swdma_dev->bar + SWITCHTEC_DMAC_CHAN_CTRL_OFFSET;
> + addr += i * SWITCHTEC_DMA_CHAN_HW_REGS_SIZE;
> + chan_hw = (struct chan_hw_regs __iomem *)addr;
> +
> + swdma_dev->swdma_chans[i] = swdma_chan;
> + swdma_chan->mmio_chan_fw = chan_fw;
> + swdma_chan->mmio_chan_hw = chan_hw;

No need for the casts above. This could simply become:

swdma_chan->mmio_chan_fw =
swdma_dev->bar + SWITCHTEC_DMAC_CHAN_CFG_STS_OFFSET +
i * SWITCHTEC_DMA_CHAN_FW_REGS_SIZE;
swdma_chan->mmio_chan_hw =
swdma_dev->bar + SWITCHTEC_DMAC_CHAN_CTRL_OFFSET +
i * SWITCHTEC_DMA_CHAN_HW_REGS_SIZE;

> + rc = pause_reset_channel(swdma_chan);
> + if (rc)
> + goto free_and_exit;
> +
> + rcu_read_lock();
> + pdev = rcu_dereference(swdma_dev->pdev);
> + if (!pdev) {
> + rc = -ENODEV;
> + goto unlock_and_free;
> + }

The pdev can't go away while you're in ->probe as that is synchronized
vs ->remove and ->shutdown.

> + irq = pci_irq_vector(pdev, irq);
> + if (irq < 0) {
> + rc = irq;
> + goto unlock_and_free;
> + }
> +
> + rcu_read_unlock();
> +
> + rc = request_irq(irq, switchtec_dma_isr, 0, KBUILD_MODNAME, swdma_chan);
> + if (rc)
> + goto free_and_exit;

I'd just use pci_request_irq here.

> +#define SWITCHTEC_DMA_DEVICE(device_id) \
> + { \
> + .vendor = PCI_VENDOR_ID_MICROSEMI, \
> + .device = device_id, \
> + .subvendor = PCI_ANY_ID, \
> + .subdevice = PCI_ANY_ID, \
> + .class = PCI_CLASS_SYSTEM_OTHER << 8, \
> + .class_mask = 0xFFFFFFFF, \
> + }
> +
> +static const struct pci_device_id switchtec_dma_pci_tbl[] = {
> + SWITCHTEC_DMA_DEVICE(0x4000), /* PFX 100XG4 */

This should use the common PCI_DEVICE() macro instead, i.e.

PCI_DEVICE(PCI_VENDOR_ID_MICROSEMI, 0x4000), /* PFX 100XG4 */
...