Re: [PATCH 06/11] RISC-V: drivers/iommu/riscv: Add command, fault, page-req queues

From: Robin Murphy
Date: Wed Aug 16 2023 - 14:49:52 EST


On 2023-07-19 20:33, Tomasz Jeznach wrote:
Enables message or wire signal interrupts for PCIe and platforms devices.

Co-developed-by: Nick Kossifidis <mick@xxxxxxxxxxxx>
Signed-off-by: Nick Kossifidis <mick@xxxxxxxxxxxx>
Signed-off-by: Tomasz Jeznach <tjeznach@xxxxxxxxxxxx>
---
drivers/iommu/riscv/iommu-pci.c | 72 ++++
drivers/iommu/riscv/iommu-platform.c | 66 +++
drivers/iommu/riscv/iommu.c | 604 ++++++++++++++++++++++++++-
drivers/iommu/riscv/iommu.h | 28 ++
4 files changed, 769 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/riscv/iommu-pci.c b/drivers/iommu/riscv/iommu-pci.c
index c91f963d7a29..9ea0647f7b92 100644
--- a/drivers/iommu/riscv/iommu-pci.c
+++ b/drivers/iommu/riscv/iommu-pci.c
@@ -34,6 +34,7 @@ static int riscv_iommu_pci_probe(struct pci_dev *pdev, const struct pci_device_i
{
struct device *dev = &pdev->dev;
struct riscv_iommu_device *iommu;
+ u64 icvec;
int ret;
ret = pci_enable_device_mem(pdev);
@@ -67,14 +68,84 @@ static int riscv_iommu_pci_probe(struct pci_dev *pdev, const struct pci_device_i
iommu->dev = dev;
dev_set_drvdata(dev, iommu);
+ /* Check device reported capabilities. */
+ iommu->cap = riscv_iommu_readq(iommu, RISCV_IOMMU_REG_CAP);
+
+ /* The PCI driver only uses MSIs, make sure the IOMMU supports this */
+ switch (FIELD_GET(RISCV_IOMMU_CAP_IGS, iommu->cap)) {
+ case RISCV_IOMMU_CAP_IGS_MSI:
+ case RISCV_IOMMU_CAP_IGS_BOTH:
+ break;
+ default:
+ dev_err(dev, "unable to use message-signaled interrupts\n");
+ ret = -ENODEV;
+ goto fail;
+ }
+
dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
pci_set_master(pdev);
+ /* Allocate and assign IRQ vectors for the various events */
+ ret = pci_alloc_irq_vectors(pdev, 1, RISCV_IOMMU_INTR_COUNT, PCI_IRQ_MSIX);
+ if (ret < 0) {
+ dev_err(dev, "unable to allocate irq vectors\n");
+ goto fail;
+ }
+
+ ret = -ENODEV;
+
+ iommu->irq_cmdq = msi_get_virq(dev, RISCV_IOMMU_INTR_CQ);
+ if (!iommu->irq_cmdq) {
+ dev_warn(dev, "no MSI vector %d for the command queue\n",
+ RISCV_IOMMU_INTR_CQ);
+ goto fail;
+ }
+
+ iommu->irq_fltq = msi_get_virq(dev, RISCV_IOMMU_INTR_FQ);
+ if (!iommu->irq_fltq) {
+ dev_warn(dev, "no MSI vector %d for the fault/event queue\n",
+ RISCV_IOMMU_INTR_FQ);
+ goto fail;
+ }
+
+ if (iommu->cap & RISCV_IOMMU_CAP_HPM) {
+ iommu->irq_pm = msi_get_virq(dev, RISCV_IOMMU_INTR_PM);
+ if (!iommu->irq_pm) {
+ dev_warn(dev,
+ "no MSI vector %d for performance monitoring\n",
+ RISCV_IOMMU_INTR_PM);
+ goto fail;
+ }
+ }
+
+ if (iommu->cap & RISCV_IOMMU_CAP_ATS) {
+ iommu->irq_priq = msi_get_virq(dev, RISCV_IOMMU_INTR_PQ);
+ if (!iommu->irq_priq) {
+ dev_warn(dev,
+ "no MSI vector %d for page-request queue\n",
+ RISCV_IOMMU_INTR_PQ);
+ goto fail;
+ }
+ }
+
+ /* Set simple 1:1 mapping for MSI vectors */
+ icvec = FIELD_PREP(RISCV_IOMMU_IVEC_CIV, RISCV_IOMMU_INTR_CQ) |
+ FIELD_PREP(RISCV_IOMMU_IVEC_FIV, RISCV_IOMMU_INTR_FQ);
+
+ if (iommu->cap & RISCV_IOMMU_CAP_HPM)
+ icvec |= FIELD_PREP(RISCV_IOMMU_IVEC_PMIV, RISCV_IOMMU_INTR_PM);
+
+ if (iommu->cap & RISCV_IOMMU_CAP_ATS)
+ icvec |= FIELD_PREP(RISCV_IOMMU_IVEC_PIV, RISCV_IOMMU_INTR_PQ);
+
+ riscv_iommu_writel(iommu, RISCV_IOMMU_REG_IVEC, icvec);
+
ret = riscv_iommu_init(iommu);
if (!ret)
return ret;
fail:
+ pci_free_irq_vectors(pdev);
pci_clear_master(pdev);
pci_release_regions(pdev);
pci_disable_device(pdev);
@@ -85,6 +156,7 @@ static int riscv_iommu_pci_probe(struct pci_dev *pdev, const struct pci_device_i
static void riscv_iommu_pci_remove(struct pci_dev *pdev)
{
riscv_iommu_remove(dev_get_drvdata(&pdev->dev));
+ pci_free_irq_vectors(pdev);
pci_clear_master(pdev);
pci_release_regions(pdev);
pci_disable_device(pdev);
diff --git a/drivers/iommu/riscv/iommu-platform.c b/drivers/iommu/riscv/iommu-platform.c
index e4e8ca6711e7..35935d3c7ef4 100644
--- a/drivers/iommu/riscv/iommu-platform.c
+++ b/drivers/iommu/riscv/iommu-platform.c
@@ -20,6 +20,8 @@ static int riscv_iommu_platform_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct riscv_iommu_device *iommu = NULL;
struct resource *res = NULL;
+ u32 fctl = 0;
+ int irq = 0;
int ret = 0;
iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL);
@@ -53,6 +55,70 @@ static int riscv_iommu_platform_probe(struct platform_device *pdev)
goto fail;
}
+ iommu->cap = riscv_iommu_readq(iommu, RISCV_IOMMU_REG_CAP);
+
+ /* For now we only support WSIs until we have AIA support */
+ ret = FIELD_GET(RISCV_IOMMU_CAP_IGS, iommu->cap);
+ if (ret == RISCV_IOMMU_CAP_IGS_MSI) {
+ dev_err(dev, "IOMMU only supports MSIs\n");
+ goto fail;
+ }
+
+ /* Parse IRQ assignment */
+ irq = platform_get_irq_byname_optional(pdev, "cmdq");
+ if (irq > 0)
+ iommu->irq_cmdq = irq;
+ else {
+ dev_err(dev, "no IRQ provided for the command queue\n");
+ goto fail;
+ }
+
+ irq = platform_get_irq_byname_optional(pdev, "fltq");
+ if (irq > 0)
+ iommu->irq_fltq = irq;
+ else {
+ dev_err(dev, "no IRQ provided for the fault/event queue\n");
+ goto fail;
+ }
+
+ if (iommu->cap & RISCV_IOMMU_CAP_HPM) {
+ irq = platform_get_irq_byname_optional(pdev, "pm");
+ if (irq > 0)
+ iommu->irq_pm = irq;
+ else {
+ dev_err(dev, "no IRQ provided for performance monitoring\n");
+ goto fail;
+ }
+ }
+
+ if (iommu->cap & RISCV_IOMMU_CAP_ATS) {
+ irq = platform_get_irq_byname_optional(pdev, "priq");
+ if (irq > 0)
+ iommu->irq_priq = irq;
+ else {
+ dev_err(dev, "no IRQ provided for the page-request queue\n");
+ goto fail;
+ }
+ }
+
+ /* Make sure fctl.WSI is set */
+ fctl = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_FCTL);
+ fctl |= RISCV_IOMMU_FCTL_WSI;
+ riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FCTL, fctl);
+
+ /* Parse Queue lengts */
+ ret = of_property_read_u32(pdev->dev.of_node, "cmdq_len", &iommu->cmdq_len);
+ if (!ret)
+ dev_info(dev, "command queue length set to %i\n", iommu->cmdq_len);
+
+ ret = of_property_read_u32(pdev->dev.of_node, "fltq_len", &iommu->fltq_len);
+ if (!ret)
+ dev_info(dev, "fault/event queue length set to %i\n", iommu->fltq_len);
+
+ ret = of_property_read_u32(pdev->dev.of_node, "priq_len", &iommu->priq_len);
+ if (!ret)
+ dev_info(dev, "page request queue length set to %i\n", iommu->priq_len);

These properties are not documented in the binding, but are clearly Linux-specific driver policy which does not belong in DT anyway.

+
dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
return riscv_iommu_init(iommu);
diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
index 31dc3c458e13..5c4cf9875302 100644
--- a/drivers/iommu/riscv/iommu.c
+++ b/drivers/iommu/riscv/iommu.c
@@ -45,6 +45,18 @@ static int ddt_mode = RISCV_IOMMU_DDTP_MODE_BARE;
module_param(ddt_mode, int, 0644);
MODULE_PARM_DESC(ddt_mode, "Device Directory Table mode.");
+static int cmdq_length = 1024;
+module_param(cmdq_length, int, 0644);
+MODULE_PARM_DESC(cmdq_length, "Command queue length.");
+
+static int fltq_length = 1024;
+module_param(fltq_length, int, 0644);
+MODULE_PARM_DESC(fltq_length, "Fault queue length.");
+
+static int priq_length = 1024;
+module_param(priq_length, int, 0644);
+MODULE_PARM_DESC(priq_length, "Page request interface queue length.");
+
/* IOMMU PSCID allocation namespace. */
#define RISCV_IOMMU_MAX_PSCID (1U << 20)
static DEFINE_IDA(riscv_iommu_pscids);
@@ -65,6 +77,497 @@ static DEFINE_IDA(riscv_iommu_pscids);
static const struct iommu_domain_ops riscv_iommu_domain_ops;
static const struct iommu_ops riscv_iommu_ops;
+/*
+ * Common queue management routines
+ */
+
+/* Note: offsets are the same for all queues */
+#define Q_HEAD(q) ((q)->qbr + (RISCV_IOMMU_REG_CQH - RISCV_IOMMU_REG_CQB))
+#define Q_TAIL(q) ((q)->qbr + (RISCV_IOMMU_REG_CQT - RISCV_IOMMU_REG_CQB))
+
+static unsigned riscv_iommu_queue_consume(struct riscv_iommu_device *iommu,
+ struct riscv_iommu_queue *q, unsigned *ready)
+{
+ u32 tail = riscv_iommu_readl(iommu, Q_TAIL(q));
+ *ready = q->lui;
+
+ BUG_ON(q->cnt <= tail);
+ if (q->lui <= tail)
+ return tail - q->lui;
+ return q->cnt - q->lui;
+}
+
+static void riscv_iommu_queue_release(struct riscv_iommu_device *iommu,
+ struct riscv_iommu_queue *q, unsigned count)
+{
+ q->lui = (q->lui + count) & (q->cnt - 1);
+ riscv_iommu_writel(iommu, Q_HEAD(q), q->lui);
+}
+
+static u32 riscv_iommu_queue_ctrl(struct riscv_iommu_device *iommu,
+ struct riscv_iommu_queue *q, u32 val)
+{
+ cycles_t end_cycles = RISCV_IOMMU_TIMEOUT + get_cycles();
+
+ riscv_iommu_writel(iommu, q->qcr, val);
+ do {
+ val = riscv_iommu_readl(iommu, q->qcr);
+ if (!(val & RISCV_IOMMU_QUEUE_BUSY))
+ break;
+ cpu_relax();
+ } while (get_cycles() < end_cycles);
+
+ return val;
+}
+
+static void riscv_iommu_queue_free(struct riscv_iommu_device *iommu,
+ struct riscv_iommu_queue *q)
+{
+ size_t size = q->len * q->cnt;
+
+ riscv_iommu_queue_ctrl(iommu, q, 0);
+
+ if (q->base) {
+ if (q->in_iomem)
+ iounmap(q->base);
+ else
+ dmam_free_coherent(iommu->dev, size, q->base, q->base_dma);
+ }
+ if (q->irq)
+ free_irq(q->irq, q);
+}
+
+static irqreturn_t riscv_iommu_cmdq_irq_check(int irq, void *data);
+static irqreturn_t riscv_iommu_cmdq_process(int irq, void *data);
+static irqreturn_t riscv_iommu_fltq_irq_check(int irq, void *data);
+static irqreturn_t riscv_iommu_fltq_process(int irq, void *data);
+static irqreturn_t riscv_iommu_priq_irq_check(int irq, void *data);
+static irqreturn_t riscv_iommu_priq_process(int irq, void *data);
+
+static int riscv_iommu_queue_init(struct riscv_iommu_device *iommu, int queue_id)
+{
+ struct device *dev = iommu->dev;
+ struct riscv_iommu_queue *q = NULL;
+ size_t queue_size = 0;
+ irq_handler_t irq_check;
+ irq_handler_t irq_process;
+ const char *name;
+ int count = 0;
+ int irq = 0;
+ unsigned order = 0;
+ u64 qbr_val = 0;
+ u64 qbr_readback = 0;
+ u64 qbr_paddr = 0;
+ int ret = 0;
+
+ switch (queue_id) {
+ case RISCV_IOMMU_COMMAND_QUEUE:
+ q = &iommu->cmdq;
+ q->len = sizeof(struct riscv_iommu_command);
+ count = iommu->cmdq_len;
+ irq = iommu->irq_cmdq;
+ irq_check = riscv_iommu_cmdq_irq_check;
+ irq_process = riscv_iommu_cmdq_process;
+ q->qbr = RISCV_IOMMU_REG_CQB;
+ q->qcr = RISCV_IOMMU_REG_CQCSR;
+ name = "cmdq";
+ break;
+ case RISCV_IOMMU_FAULT_QUEUE:
+ q = &iommu->fltq;
+ q->len = sizeof(struct riscv_iommu_fq_record);
+ count = iommu->fltq_len;
+ irq = iommu->irq_fltq;
+ irq_check = riscv_iommu_fltq_irq_check;
+ irq_process = riscv_iommu_fltq_process;
+ q->qbr = RISCV_IOMMU_REG_FQB;
+ q->qcr = RISCV_IOMMU_REG_FQCSR;
+ name = "fltq";
+ break;
+ case RISCV_IOMMU_PAGE_REQUEST_QUEUE:
+ q = &iommu->priq;
+ q->len = sizeof(struct riscv_iommu_pq_record);
+ count = iommu->priq_len;
+ irq = iommu->irq_priq;
+ irq_check = riscv_iommu_priq_irq_check;
+ irq_process = riscv_iommu_priq_process;
+ q->qbr = RISCV_IOMMU_REG_PQB;
+ q->qcr = RISCV_IOMMU_REG_PQCSR;
+ name = "priq";
+ break;
+ default:
+ dev_err(dev, "invalid queue interrupt index in queue_init!\n");
+ return -EINVAL;
+ }
+
+ /* Polling not implemented */
+ if (!irq)
+ return -ENODEV;
+
+ /* Allocate queue in memory and set the base register */
+ order = ilog2(count);
+ do {
+ queue_size = q->len * (1ULL << order);
+ q->base = dmam_alloc_coherent(dev, queue_size, &q->base_dma, GFP_KERNEL);
+ if (q->base || queue_size < PAGE_SIZE)
+ break;
+
+ order--;
+ } while (1);
+
+ if (!q->base) {
+ dev_err(dev, "failed to allocate %s queue (cnt: %u)\n", name, count);
+ return -ENOMEM;
+ }
+
+ q->cnt = 1ULL << order;
+
+ qbr_val = phys_to_ppn(q->base_dma) |
+ FIELD_PREP(RISCV_IOMMU_QUEUE_LOGSZ_FIELD, order - 1);
+
+ riscv_iommu_writeq(iommu, q->qbr, qbr_val);
+
+ /*
+ * Queue base registers are WARL, so it's possible that whatever we wrote
+ * there was illegal/not supported by the hw in which case we need to make
+ * sure we set a supported PPN and/or queue size.
+ */
+ qbr_readback = riscv_iommu_readq(iommu, q->qbr);
+ if (qbr_readback == qbr_val)
+ goto irq;
+
+ dmam_free_coherent(dev, queue_size, q->base, q->base_dma);
+
+ /* Get supported queue size */
+ order = FIELD_GET(RISCV_IOMMU_QUEUE_LOGSZ_FIELD, qbr_readback) + 1;
+ q->cnt = 1ULL << order;
+ queue_size = q->len * q->cnt;

Um... What? We allocate an arbitrarily-sized queue, free it again, *then* check what the hardware actually supports, and maybe allocate another queue? I can't help thinking there's a much better way...

+
+ /*
+ * In case we also failed to set PPN, it means the field is hardcoded and the
+ * queue resides in I/O memory instead, so get its physical address and
+ * ioremap it.
+ */
+ qbr_paddr = ppn_to_phys(qbr_readback);
+ if (qbr_paddr != q->base_dma) {
+ dev_info(dev,
+ "hardcoded ppn in %s base register, using io memory for the queue\n",
+ name);
+ dev_info(dev, "queue length for %s set to %i\n", name, q->cnt);
+ q->in_iomem = true;
+ q->base = ioremap(qbr_paddr, queue_size);
+ if (!q->base) {
+ dev_err(dev, "failed to map %s queue (cnt: %u)\n", name, q->cnt);
+ return -ENOMEM;
+ }
+ q->base_dma = qbr_paddr;
+ } else {
+ /*
+ * We only failed to set the queue size, re-try to allocate memory with
+ * the queue size supported by the hw.
+ */
+ dev_info(dev, "hardcoded queue size in %s base register\n", name);
+ dev_info(dev, "retrying with queue length: %i\n", q->cnt);
+ q->base = dmam_alloc_coherent(dev, queue_size, &q->base_dma, GFP_KERNEL);

Note that dma_alloc_coherent only guarantees natural alignment here, so if you need a minimum alignment of 4KB as the spec claims you should really make clamp your minimum allocation size to that.

+ if (!q->base) {
+ dev_err(dev, "failed to allocate %s queue (cnt: %u)\n",
+ name, q->cnt);
+ return -ENOMEM;
+ }
+ }
+
+ qbr_val = phys_to_ppn(q->base_dma) |
+ FIELD_PREP(RISCV_IOMMU_QUEUE_LOGSZ_FIELD, order - 1);
+ riscv_iommu_writeq(iommu, q->qbr, qbr_val);
+
+ /* Final check to make sure hw accepted our write */
+ qbr_readback = riscv_iommu_readq(iommu, q->qbr);
+ if (qbr_readback != qbr_val) {
+ dev_err(dev, "failed to set base register for %s\n", name);
+ goto fail;
+ }
+
+ irq:
+ if (request_threaded_irq(irq, irq_check, irq_process, IRQF_ONESHOT | IRQF_SHARED,
+ dev_name(dev), q)) {
+ dev_err(dev, "fail to request irq %d for %s\n", irq, name);
+ goto fail;
+ }
+
+ q->irq = irq;
+
+ /* Note: All RIO_xQ_EN/IE fields are in the same offsets */
+ ret =
+ riscv_iommu_queue_ctrl(iommu, q,
+ RISCV_IOMMU_QUEUE_ENABLE |
+ RISCV_IOMMU_QUEUE_INTR_ENABLE);
+ if (ret & RISCV_IOMMU_QUEUE_BUSY) {
+ dev_err(dev, "%s init timeout\n", name);
+ ret = -EBUSY;
+ goto fail;
+ }
+
+ return 0;
+
+ fail:
+ riscv_iommu_queue_free(iommu, q);
+ return 0;
+}
+
+/*
+ * I/O MMU Command queue chapter 3.1
+ */
+
+static inline void riscv_iommu_cmd_inval_vma(struct riscv_iommu_command *cmd)
+{
+ cmd->dword0 =
+ FIELD_PREP(RISCV_IOMMU_CMD_OPCODE,
+ RISCV_IOMMU_CMD_IOTINVAL_OPCODE) | FIELD_PREP(RISCV_IOMMU_CMD_FUNC,
+ RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA);

Interesting indentation... :/

+ cmd->dword1 = 0;
+}
+
+static inline void riscv_iommu_cmd_inval_set_addr(struct riscv_iommu_command *cmd,
+ u64 addr)
+{
+ cmd->dword0 |= RISCV_IOMMU_CMD_IOTINVAL_AV;
+ cmd->dword1 = addr;
+}
+
+static inline void riscv_iommu_cmd_inval_set_pscid(struct riscv_iommu_command *cmd,
+ unsigned pscid)
+{
+ cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_IOTINVAL_PSCID, pscid) |
+ RISCV_IOMMU_CMD_IOTINVAL_PSCV;
+}
+
+static inline void riscv_iommu_cmd_inval_set_gscid(struct riscv_iommu_command *cmd,
+ unsigned gscid)
+{
+ cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_IOTINVAL_GSCID, gscid) |
+ RISCV_IOMMU_CMD_IOTINVAL_GV;
+}
+
+static inline void riscv_iommu_cmd_iofence(struct riscv_iommu_command *cmd)
+{
+ cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IOFENCE_OPCODE) |
+ FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IOFENCE_FUNC_C);
+ cmd->dword1 = 0;
+}
+
+static inline void riscv_iommu_cmd_iofence_set_av(struct riscv_iommu_command *cmd,
+ u64 addr, u32 data)
+{
+ cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IOFENCE_OPCODE) |
+ FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IOFENCE_FUNC_C) |
+ FIELD_PREP(RISCV_IOMMU_CMD_IOFENCE_DATA, data) | RISCV_IOMMU_CMD_IOFENCE_AV;
+ cmd->dword1 = (addr >> 2);
+}
+
+static inline void riscv_iommu_cmd_iodir_inval_ddt(struct riscv_iommu_command *cmd)
+{
+ cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IODIR_OPCODE) |
+ FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT);
+ cmd->dword1 = 0;
+}
+
+static inline void riscv_iommu_cmd_iodir_inval_pdt(struct riscv_iommu_command *cmd)
+{
+ cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IODIR_OPCODE) |
+ FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT);
+ cmd->dword1 = 0;
+}
+
+static inline void riscv_iommu_cmd_iodir_set_did(struct riscv_iommu_command *cmd,
+ unsigned devid)
+{
+ cmd->dword0 |=
+ FIELD_PREP(RISCV_IOMMU_CMD_IODIR_DID, devid) | RISCV_IOMMU_CMD_IODIR_DV;
+}
+
+/* TODO: Convert into lock-less MPSC implementation. */
+static bool riscv_iommu_post_sync(struct riscv_iommu_device *iommu,
+ struct riscv_iommu_command *cmd, bool sync)
+{
+ u32 head, tail, next, last;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iommu->cq_lock, flags);
+ head = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_CQH) & (iommu->cmdq.cnt - 1);
+ tail = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_CQT) & (iommu->cmdq.cnt - 1);
+ last = iommu->cmdq.lui;
+ if (tail != last) {
+ spin_unlock_irqrestore(&iommu->cq_lock, flags);
+ /*
+ * FIXME: This is a workaround for dropped MMIO writes/reads on QEMU platform.
+ * While debugging of the problem is still ongoing, this provides
+ * a simple impolementation of try-again policy.
+ * Will be changed to lock-less algorithm in the feature.
+ */
+ dev_dbg(iommu->dev, "IOMMU CQT: %x != %x (1st)\n", last, tail);
+ spin_lock_irqsave(&iommu->cq_lock, flags);
+ tail =
+ riscv_iommu_readl(iommu, RISCV_IOMMU_REG_CQT) & (iommu->cmdq.cnt - 1);
+ last = iommu->cmdq.lui;
+ if (tail != last) {
+ spin_unlock_irqrestore(&iommu->cq_lock, flags);
+ dev_dbg(iommu->dev, "IOMMU CQT: %x != %x (2nd)\n", last, tail);
+ spin_lock_irqsave(&iommu->cq_lock, flags);
+ }
+ }
+
+ next = (last + 1) & (iommu->cmdq.cnt - 1);
+ if (next != head) {
+ struct riscv_iommu_command *ptr = iommu->cmdq.base;
+ ptr[last] = *cmd;
+ wmb();
+ riscv_iommu_writel(iommu, RISCV_IOMMU_REG_CQT, next);
+ iommu->cmdq.lui = next;
+ }
+
+ spin_unlock_irqrestore(&iommu->cq_lock, flags);
+
+ if (sync && head != next) {
+ cycles_t start_time = get_cycles();
+ while (1) {
+ last = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_CQH) &
+ (iommu->cmdq.cnt - 1);
+ if (head < next && last >= next)
+ break;
+ if (head > next && last < head && last >= next)
+ break;
+ if (RISCV_IOMMU_TIMEOUT < (get_cycles() - start_time)) {
+ dev_err(iommu->dev, "IOFENCE TIMEOUT\n");
+ return false;
+ }
+ cpu_relax();
+ }
+ }
+
+ return next != head;
+}
+
+static bool riscv_iommu_post(struct riscv_iommu_device *iommu,
+ struct riscv_iommu_command *cmd)
+{
+ return riscv_iommu_post_sync(iommu, cmd, false);
+}
+
+static bool riscv_iommu_iofence_sync(struct riscv_iommu_device *iommu)
+{
+ struct riscv_iommu_command cmd;
+ riscv_iommu_cmd_iofence(&cmd);
+ return riscv_iommu_post_sync(iommu, &cmd, true);
+}
+
+/* Command queue primary interrupt handler */
+static irqreturn_t riscv_iommu_cmdq_irq_check(int irq, void *data)
+{
+ struct riscv_iommu_queue *q = (struct riscv_iommu_queue *)data;
+ struct riscv_iommu_device *iommu =
+ container_of(q, struct riscv_iommu_device, cmdq);
+ u32 ipsr = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_IPSR);
+ if (ipsr & RISCV_IOMMU_IPSR_CIP)
+ return IRQ_WAKE_THREAD;
+ return IRQ_NONE;
+}
+
+/* Command queue interrupt hanlder thread function */
+static irqreturn_t riscv_iommu_cmdq_process(int irq, void *data)
+{
+ struct riscv_iommu_queue *q = (struct riscv_iommu_queue *)data;
+ struct riscv_iommu_device *iommu;
+ unsigned ctrl;
+
+ iommu = container_of(q, struct riscv_iommu_device, cmdq);
+
+ /* Error reporting, clear error reports if any. */
+ ctrl = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_CQCSR);
+ if (ctrl & (RISCV_IOMMU_CQCSR_CQMF |
+ RISCV_IOMMU_CQCSR_CMD_TO | RISCV_IOMMU_CQCSR_CMD_ILL)) {
+ riscv_iommu_queue_ctrl(iommu, &iommu->cmdq, ctrl);
+ dev_warn_ratelimited(iommu->dev,
+ "Command queue error: fault: %d tout: %d err: %d\n",
+ !!(ctrl & RISCV_IOMMU_CQCSR_CQMF),
+ !!(ctrl & RISCV_IOMMU_CQCSR_CMD_TO),
+ !!(ctrl & RISCV_IOMMU_CQCSR_CMD_ILL));
+ }
+
+ /* Clear fault interrupt pending. */
+ riscv_iommu_writel(iommu, RISCV_IOMMU_REG_IPSR, RISCV_IOMMU_IPSR_CIP);
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * Fault/event queue, chapter 3.2
+ */
+
+static void riscv_iommu_fault_report(struct riscv_iommu_device *iommu,
+ struct riscv_iommu_fq_record *event)
+{
+ unsigned err, devid;
+
+ err = FIELD_GET(RISCV_IOMMU_FQ_HDR_CAUSE, event->hdr);
+ devid = FIELD_GET(RISCV_IOMMU_FQ_HDR_DID, event->hdr);
+
+ dev_warn_ratelimited(iommu->dev,
+ "Fault %d devid: %d" " iotval: %llx iotval2: %llx\n", err,
+ devid, event->iotval, event->iotval2);
+}
+
+/* Fault/event queue primary interrupt handler */
+static irqreturn_t riscv_iommu_fltq_irq_check(int irq, void *data)
+{
+ struct riscv_iommu_queue *q = (struct riscv_iommu_queue *)data;
+ struct riscv_iommu_device *iommu =
+ container_of(q, struct riscv_iommu_device, fltq);
+ u32 ipsr = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_IPSR);
+ if (ipsr & RISCV_IOMMU_IPSR_FIP)
+ return IRQ_WAKE_THREAD;
+ return IRQ_NONE;
+}
+
+/* Fault queue interrupt hanlder thread function */
+static irqreturn_t riscv_iommu_fltq_process(int irq, void *data)
+{
+ struct riscv_iommu_queue *q = (struct riscv_iommu_queue *)data;
+ struct riscv_iommu_device *iommu;
+ struct riscv_iommu_fq_record *events;
+ unsigned cnt, len, idx, ctrl;
+
+ iommu = container_of(q, struct riscv_iommu_device, fltq);
+ events = (struct riscv_iommu_fq_record *)q->base;
+
+ /* Error reporting, clear error reports if any. */
+ ctrl = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_FQCSR);
+ if (ctrl & (RISCV_IOMMU_FQCSR_FQMF | RISCV_IOMMU_FQCSR_FQOF)) {
+ riscv_iommu_queue_ctrl(iommu, &iommu->fltq, ctrl);
+ dev_warn_ratelimited(iommu->dev,
+ "Fault queue error: fault: %d full: %d\n",
+ !!(ctrl & RISCV_IOMMU_FQCSR_FQMF),
+ !!(ctrl & RISCV_IOMMU_FQCSR_FQOF));
+ }
+
+ /* Clear fault interrupt pending. */
+ riscv_iommu_writel(iommu, RISCV_IOMMU_REG_IPSR, RISCV_IOMMU_IPSR_FIP);
+
+ /* Report fault events. */
+ do {
+ cnt = riscv_iommu_queue_consume(iommu, q, &idx);
+ if (!cnt)
+ break;
+ for (len = 0; len < cnt; idx++, len++)
+ riscv_iommu_fault_report(iommu, &events[idx]);
+ riscv_iommu_queue_release(iommu, q, cnt);
+ } while (1);
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * Page request queue, chapter 3.3
+ */
+
/*
* Register device for IOMMU tracking.
*/
@@ -97,6 +600,54 @@ static void riscv_iommu_add_device(struct riscv_iommu_device *iommu, struct devi
mutex_unlock(&iommu->eps_mutex);
}
+/* Page request interface queue primary interrupt handler */
+static irqreturn_t riscv_iommu_priq_irq_check(int irq, void *data)
+{
+ struct riscv_iommu_queue *q = (struct riscv_iommu_queue *)data;
+ struct riscv_iommu_device *iommu =
+ container_of(q, struct riscv_iommu_device, priq);
+ u32 ipsr = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_IPSR);
+ if (ipsr & RISCV_IOMMU_IPSR_PIP)
+ return IRQ_WAKE_THREAD;
+ return IRQ_NONE;
+}
+
+/* Page request interface queue interrupt hanlder thread function */
+static irqreturn_t riscv_iommu_priq_process(int irq, void *data)
+{
+ struct riscv_iommu_queue *q = (struct riscv_iommu_queue *)data;
+ struct riscv_iommu_device *iommu;
+ struct riscv_iommu_pq_record *requests;
+ unsigned cnt, idx, ctrl;
+
+ iommu = container_of(q, struct riscv_iommu_device, priq);
+ requests = (struct riscv_iommu_pq_record *)q->base;
+
+ /* Error reporting, clear error reports if any. */
+ ctrl = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_PQCSR);
+ if (ctrl & (RISCV_IOMMU_PQCSR_PQMF | RISCV_IOMMU_PQCSR_PQOF)) {
+ riscv_iommu_queue_ctrl(iommu, &iommu->priq, ctrl);
+ dev_warn_ratelimited(iommu->dev,
+ "Page request queue error: fault: %d full: %d\n",
+ !!(ctrl & RISCV_IOMMU_PQCSR_PQMF),
+ !!(ctrl & RISCV_IOMMU_PQCSR_PQOF));
+ }
+
+ /* Clear page request interrupt pending. */
+ riscv_iommu_writel(iommu, RISCV_IOMMU_REG_IPSR, RISCV_IOMMU_IPSR_PIP);
+
+ /* Process page requests. */
+ do {
+ cnt = riscv_iommu_queue_consume(iommu, q, &idx);
+ if (!cnt)
+ break;
+ dev_warn(iommu->dev, "unexpected %u page requests\n", cnt);
+ riscv_iommu_queue_release(iommu, q, cnt);
+ } while (1);
+
+ return IRQ_HANDLED;
+}
+
/*
* Endpoint management
*/
@@ -350,7 +901,29 @@ static void riscv_iommu_flush_iotlb_range(struct iommu_domain *iommu_domain,
unsigned long *start, unsigned long *end,
size_t *pgsize)
{
- /* Command interface not implemented */
+ struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
+ struct riscv_iommu_command cmd;
+ unsigned long iova;
+
+ if (domain->mode == RISCV_IOMMU_DC_FSC_MODE_BARE)

That should probably not happen - things shouldn't be calling TLB ops on identity domains (and ideally your identity domains wouldn't even *have* iotlb callbacks...)

+ return;
+
+ /* Domain not attached to an IOMMU! */
+ BUG_ON(!domain->iommu);

However I'm not sure how iommu_create_device_direct_mappings() isn't hitting that?

Thanks,
Robin.

+
+ riscv_iommu_cmd_inval_vma(&cmd);
+ riscv_iommu_cmd_inval_set_pscid(&cmd, domain->pscid);
+
+ if (start && end && pgsize) {
+ /* Cover only the range that is needed */
+ for (iova = *start; iova <= *end; iova += *pgsize) {
+ riscv_iommu_cmd_inval_set_addr(&cmd, iova);
+ riscv_iommu_post(domain->iommu, &cmd);
+ }
+ } else {
+ riscv_iommu_post(domain->iommu, &cmd);
+ }
+ riscv_iommu_iofence_sync(domain->iommu);
}
static void riscv_iommu_flush_iotlb_all(struct iommu_domain *iommu_domain)
@@ -610,6 +1183,9 @@ void riscv_iommu_remove(struct riscv_iommu_device *iommu)
iommu_device_unregister(&iommu->iommu);
iommu_device_sysfs_remove(&iommu->iommu);
riscv_iommu_enable(iommu, RISCV_IOMMU_DDTP_MODE_OFF);
+ riscv_iommu_queue_free(iommu, &iommu->cmdq);
+ riscv_iommu_queue_free(iommu, &iommu->fltq);
+ riscv_iommu_queue_free(iommu, &iommu->priq);
}
int riscv_iommu_init(struct riscv_iommu_device *iommu)
@@ -632,6 +1208,16 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu)
}
#endif
+ /*
+ * Assign queue lengths from module parameters if not already
+ * set on the device tree.
+ */
+ if (!iommu->cmdq_len)
+ iommu->cmdq_len = cmdq_length;
+ if (!iommu->fltq_len)
+ iommu->fltq_len = fltq_length;
+ if (!iommu->priq_len)
+ iommu->priq_len = priq_length;
/* Clear any pending interrupt flag. */
riscv_iommu_writel(iommu, RISCV_IOMMU_REG_IPSR,
RISCV_IOMMU_IPSR_CIP |
@@ -639,7 +1225,20 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu)
RISCV_IOMMU_IPSR_PMIP | RISCV_IOMMU_IPSR_PIP);
spin_lock_init(&iommu->cq_lock);
mutex_init(&iommu->eps_mutex);
+ ret = riscv_iommu_queue_init(iommu, RISCV_IOMMU_COMMAND_QUEUE);
+ if (ret)
+ goto fail;
+ ret = riscv_iommu_queue_init(iommu, RISCV_IOMMU_FAULT_QUEUE);
+ if (ret)
+ goto fail;
+ if (!(iommu->cap & RISCV_IOMMU_CAP_ATS))
+ goto no_ats;
+
+ ret = riscv_iommu_queue_init(iommu, RISCV_IOMMU_PAGE_REQUEST_QUEUE);
+ if (ret)
+ goto fail;
+ no_ats:
ret = riscv_iommu_enable(iommu, RISCV_IOMMU_DDTP_MODE_BARE);
if (ret) {
@@ -663,5 +1262,8 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu)
return 0;
fail:
riscv_iommu_enable(iommu, RISCV_IOMMU_DDTP_MODE_OFF);
+ riscv_iommu_queue_free(iommu, &iommu->priq);
+ riscv_iommu_queue_free(iommu, &iommu->fltq);
+ riscv_iommu_queue_free(iommu, &iommu->cmdq);
return ret;
}
diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h
index 7dc9baa59a50..04148a2a8ffd 100644
--- a/drivers/iommu/riscv/iommu.h
+++ b/drivers/iommu/riscv/iommu.h
@@ -28,6 +28,24 @@
#define IOMMU_PAGE_SIZE_1G BIT_ULL(30)
#define IOMMU_PAGE_SIZE_512G BIT_ULL(39)
+struct riscv_iommu_queue {
+ dma_addr_t base_dma; /* ring buffer bus address */
+ void *base; /* ring buffer pointer */
+ size_t len; /* single item length */
+ u32 cnt; /* items count */
+ u32 lui; /* last used index, consumer/producer share */
+ unsigned qbr; /* queue base register offset */
+ unsigned qcr; /* queue control and status register offset */
+ int irq; /* registered interrupt number */
+ bool in_iomem; /* indicates queue data are in I/O memory */
+};
+
+enum riscv_queue_ids {
+ RISCV_IOMMU_COMMAND_QUEUE = 0,
+ RISCV_IOMMU_FAULT_QUEUE = 1,
+ RISCV_IOMMU_PAGE_REQUEST_QUEUE = 2
+};
+
struct riscv_iommu_device {
struct iommu_device iommu; /* iommu core interface */
struct device *dev; /* iommu hardware */
@@ -42,6 +60,11 @@ struct riscv_iommu_device {
int irq_pm;
int irq_priq;
+ /* Queue lengths */
+ int cmdq_len;
+ int fltq_len;
+ int priq_len;
+
/* supported and enabled hardware capabilities */
u64 cap;
@@ -53,6 +76,11 @@ struct riscv_iommu_device {
unsigned ddt_mode;
bool ddtp_in_iomem;
+ /* hardware queues */
+ struct riscv_iommu_queue cmdq;
+ struct riscv_iommu_queue fltq;
+ struct riscv_iommu_queue priq;
+
/* Connected end-points */
struct rb_root eps;
struct mutex eps_mutex;