Re: [PATCH v2 1/1] dmaengine: switchtec-dma: Introduce Switchtec DMA engine PCI driver

From: Kelvin.Cao
Date: Thu Apr 13 2023 - 18:41:01 EST


On Mon, 2023-04-10 at 18:42 +0200, Christoph Hellwig wrote:
> > EXTERNAL EMAIL: Do not click links or open attachments unless you >
> > know the content is safe
> >
> > On Mon, Apr 03, 2023 at 11:06:28AM -0700, Kelvin Cao wrote:
> > > > +#define HALT_RETRY 100
> > > > +static int halt_channel(struct switchtec_dma_chan *swdma_chan)
> > > > +{
> > > > +     u32 status;
> > > > +     struct chan_hw_regs __iomem *chan_hw = > > swdma_chan-
> > > > >mmio_chan_hw;
> > > > +     int retry = HALT_RETRY;
> > > > +     struct pci_dev *pdev;
> > > > +     int ret;
> > > > +
> > > > +     rcu_read_lock();
> > > > +     pdev = rcu_dereference(swdma_chan->swdma_dev->pdev);
> > > > +     if (!pdev) {
> > > > +             ret = -ENODEV;
> > > > +             goto unlock_and_exit;
> > > > +     }
> >
> > This whole RCU critical section around every access to ->pdev
> > scheme
> > looks a bit bothersome to me.  This means that all the low-level
> > PCI ops are done in RCU critical section.  Is this something you
> > came up with or is it copied from other drivers?
> >
> > Normally we'd do an unregistration from the dmaengine subsystem
> > first, which might do a RCU synchronization at a high level,
> > and then we're sure that none of the methods gets called again
> > on the unregistered device.
> >
> > Can't this driver (and the dmaengine core) support an operation
> > mode where you set a shutdown flag at the beginning
> > > > +
> > > > +     spin_lock(&swdma_chan->hw_ctrl_lock);
> > > > +     writeb(SWITCHTEC_CHAN_CTRL_HALT, &chan_hw->ctrl);
> > > > +
> > > > +     ret = -EIO;
> > > > +     do {
> > > > +             status = readl(&chan_hw->status);
> > > > +
> > > > +             if (status & SWITCHTEC_CHAN_STS_HALTED) {
> > > > +                     ret = 0;
> > > > +                     break;
> > > > +             }
> > > > +
> > > > +             udelay(1000);
> > > > +     } while (retry--);
> > > > +     spin_unlock(&swdma_chan->hw_ctrl_lock);
> >
> > Why is the lock needed while reading the status and waiting
> > for it with long delays?
There's (low) chance of access to the same ctrl register from other
paths which might change the value of status in an unexpected way. It
also prevents the hardware operation from being interrupted until it
indicates it has finished by a bit set in the status register.
> >
> > > > +     spin_lock(&swdma_chan->hw_ctrl_lock);
> > > > +     ctrl = SWITCHTEC_CHAN_CTRL_RESET;
> > > > +     ctrl |= SWITCHTEC_CHAN_CTRL_ERR_PAUSE;
> > > > +     writel(ctrl, &chan_hw->ctrl);
> > > > +
> > > > +     udelay(1000);
> > > > +
> > > > +     ctrl = SWITCHTEC_CHAN_CTRL_ERR_PAUSE;
> > > > +     writel(ctrl, &chan_hw->ctrl);
> > > > +     spin_unlock(&swdma_chan->hw_ctrl_lock);
> >
> > This looks broken.  PCIe MMIO Write TLPs an might not never
> > reach the device until you do a read from it.  So instead of
> > the udelay you probably need to do a read* operation to some
> > register that has no side effects.
> >
Good point.

> > Also what is the point of the ctrl local variable?  Just passing
> > the values directly would be shorted and easier to read.

OK
> >
> > > > +     spin_lock(&swdma_chan->hw_ctrl_lock);
> > > > +     writeb(SWITCHTEC_CHAN_CTRL_PAUSE, &chan_hw->ctrl);
> > > > +     spin_unlock(&swdma_chan->hw_ctrl_lock);
> > > > +
> > > > +     rcu_read_unlock();
> > > > +
> > > > +     /* wait 60ms to ensure no pending CEs */
> > > > +     msleep(60);
> >
> > Without a previous read* to flush the posted writes you're not
> > actually waiting 60ms here from the device POV.
Yes.
> >
> > > > +     spin_lock(&swdma_chan->hw_ctrl_lock);
> > > > +     writeb(SWITCHTEC_CHAN_CTRL_PAUSE, &chan_hw->ctrl);
> > > > +
> > > > +     ret = -EIO;
> > > > +     do {
> > > > +             status = readl(&chan_hw->status);
> > > > +
> > > > +             if (status & SWITCHTEC_CHAN_STS_PAUSED) {
> > > > +                     ret = 0;
> > > > +                     break;
> > > > +             }
> > > > +
> > > > +             udelay(1000);
> > > > +     } while (retry--);
> > > > +     spin_unlock(&swdma_chan->hw_ctrl_lock);
> >
> > Same comment about waiting under a spinlock here.
> >
> > > > +     do {
> > > > +             status = readl(&chan_hw->status);
> > > > +
> > > > +             if (!(status & SWITCHTEC_CHAN_STS_PAUSED)) {
> > > > +                     ret = 0;
> > > > +                     break;
> > > > +             }
> > > > +
> > > > +             udelay(1000);
> > > > +     } while (retry--);
> > > > +     spin_unlock(&swdma_chan->hw_ctrl_lock);
> >
> > .. and here.
> >
> > It might also be useful to have a helper to wait for a certain
> > bit to be set or not set in the status register rather than
> > duplicating the code.

Will update.
> >
> > > > +static void switchtec_dma_synchronize(struct dma_chan *chan)
> > > > +{
> > > > +     struct pci_dev *pdev;
> > > > +     struct switchtec_dma_chan *swdma_chan = > >
> > > > to_switchtec_dma_chan(chan);
> > > > +     int rc;
> > > > +
> > > > +     rcu_read_lock();
> > > > +     pdev = rcu_dereference(swdma_chan->swdma_dev->pdev);
> > > > +     if (pdev)
> > > > +             synchronize_irq(swdma_chan->irq);
> > > > +     rcu_read_unlock();
> >
> > synchronize_irq can sleep and must not be called in a RCU critical
> > section.

Will fix.
> >
> > > > +     if (type >= UNKNOWN_TRANSACTION)
> > > > +             goto err_unlock;
> > > > +
> > > > +     if (type == MEMCPY)
> > > > +             if (len > SWITCHTEC_DESC_MAX_SIZE)
> > > > +                     goto err_unlock;
> > > > +
> > > > +     if ((type == WIMM) && (len == 8))
> >
> > Maybe do a switch on the type to make the code more readable?
> >
> > Also no need for the inner braces in the last line here.
> >
> > > > +static irqreturn_t switchtec_dma_isr(int irq, void *chan)
> > > > +{
> > > > +     struct switchtec_dma_chan *swdma_chan = chan;
> > > > +
> > > > +     if (swdma_chan->comp_ring_active)
> > > > +             tasklet_schedule(&swdma_chan->desc_task);
> > > > +
> > > > +     return IRQ_HANDLED;
> > > > +}
> > > > +
> > > > +static irqreturn_t switchtec_dma_chan_status_isr(int irq, void
> > > > > > *dma)
> > > > +{
> > > > +     struct switchtec_dma_dev *swdma_dev = dma;
> > > > +
> > > > +     tasklet_schedule(&swdma_dev->chan_status_task);
> >
> > What speaks against simply using threaded irqs here instead of the
> > tasklets?
> >
> > > > +     swdma_chan->hw_cq = > > dma_alloc_coherent(swdma_dev-
> > > > >dma_dev.dev, size,
> > > > +                                            > > &swdma_chan-
> > > > >dma_addr_cq,
> > > > +                                            GFP_KERNEL);
> > > > +     if (!swdma_chan->hw_cq) {
> > > > +             rc = -ENOMEM;
> > > > +             goto free_and_exit;
> > > > +     }
> > > > +
> > > > +     memset(swdma_chan->hw_cq, 0, size);
> >
> > dma_alloc_coherent always returns zeroed memory, no need to
> > zero it again.
> >
> > > > +
> > > > +     /* reset host phase tag */
> > > > +     swdma_chan->phase_tag = 0;
> > > > +
> > > > +     size = sizeof(*swdma_chan->desc_ring);
> > > > +     swdma_chan->desc_ring = kcalloc(SWITCHTEC_DMA_RING_SIZE,
> > > > > > size,
> > > > +                                     GFP_KERNEL);
> > > > +     if (!swdma_chan->desc_ring) {
> > > > +             rc = -ENOMEM;
> > > > +             goto free_and_exit;
> > > > +     }
> > > > +
> > > > +     memset(swdma_chan->desc_ring, 0, SWITCHTEC_DMA_RING_SIZE
> > > > * > > size);
> >
> > kcalloc also already zeroes the memory.

Memset has been fixed in v3 posted earlier.

Thanks,
Kelvin