[PATCH 07/11] RISC-V: drivers/iommu/riscv: Add device context support

From: Tomasz Jeznach
Date: Wed Jul 19 2023 - 15:35:24 EST


Introduces per device translation context, with 1,2 or 3 tree level
device tree structures.

Signed-off-by: Tomasz Jeznach <tjeznach@xxxxxxxxxxxx>
---
drivers/iommu/riscv/iommu.c | 163 ++++++++++++++++++++++++++++++++++--
drivers/iommu/riscv/iommu.h | 1 +
2 files changed, 158 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
index 5c4cf9875302..9ee7d2b222b5 100644
--- a/drivers/iommu/riscv/iommu.c
+++ b/drivers/iommu/riscv/iommu.c
@@ -41,7 +41,7 @@ MODULE_ALIAS("riscv-iommu");
MODULE_LICENSE("GPL v2");

/* Global IOMMU params. */
-static int ddt_mode = RISCV_IOMMU_DDTP_MODE_BARE;
+static int ddt_mode = RISCV_IOMMU_DDTP_MODE_3LVL;
module_param(ddt_mode, int, 0644);
MODULE_PARM_DESC(ddt_mode, "Device Directory Table mode.");

@@ -452,6 +452,14 @@ static bool riscv_iommu_post(struct riscv_iommu_device *iommu,
return riscv_iommu_post_sync(iommu, cmd, false);
}

+static bool riscv_iommu_iodir_inv_devid(struct riscv_iommu_device *iommu, unsigned devid)
+{
+ struct riscv_iommu_command cmd;
+ riscv_iommu_cmd_iodir_inval_ddt(&cmd);
+ riscv_iommu_cmd_iodir_set_did(&cmd, devid);
+ return riscv_iommu_post(iommu, &cmd);
+}
+
static bool riscv_iommu_iofence_sync(struct riscv_iommu_device *iommu)
{
struct riscv_iommu_command cmd;
@@ -671,6 +679,94 @@ static bool riscv_iommu_capable(struct device *dev, enum iommu_cap cap)
return false;
}

+/* TODO: implement proper device context management, e.g. teardown flow */
+
+/* Lookup or initialize device directory info structure. */
+static struct riscv_iommu_dc *riscv_iommu_get_dc(struct riscv_iommu_device *iommu,
+ unsigned devid)
+{
+ const bool base_format = !(iommu->cap & RISCV_IOMMU_CAP_MSI_FLAT);
+ unsigned depth = iommu->ddt_mode - RISCV_IOMMU_DDTP_MODE_1LVL;
+ u8 ddi_bits[3] = { 0 };
+ u64 *ddtp = NULL, ddt;
+
+ if (iommu->ddt_mode == RISCV_IOMMU_DDTP_MODE_OFF ||
+ iommu->ddt_mode == RISCV_IOMMU_DDTP_MODE_BARE)
+ return NULL;
+
+ /* Make sure the mode is valid */
+ if (iommu->ddt_mode > RISCV_IOMMU_DDTP_MODE_MAX)
+ return NULL;
+
+ /*
+ * Device id partitioning for base format:
+ * DDI[0]: bits 0 - 6 (1st level) (7 bits)
+ * DDI[1]: bits 7 - 15 (2nd level) (9 bits)
+ * DDI[2]: bits 16 - 23 (3rd level) (8 bits)
+ *
+ * For extended format:
+ * DDI[0]: bits 0 - 5 (1st level) (6 bits)
+ * DDI[1]: bits 6 - 14 (2nd level) (9 bits)
+ * DDI[2]: bits 15 - 23 (3rd level) (9 bits)
+ */
+ if (base_format) {
+ ddi_bits[0] = 7;
+ ddi_bits[1] = 7 + 9;
+ ddi_bits[2] = 7 + 9 + 8;
+ } else {
+ ddi_bits[0] = 6;
+ ddi_bits[1] = 6 + 9;
+ ddi_bits[2] = 6 + 9 + 9;
+ }
+
+ /* Make sure device id is within range */
+ if (devid >= (1 << ddi_bits[depth]))
+ return NULL;
+
+ /* Get to the level of the non-leaf node that holds the device context */
+ for (ddtp = (u64 *) iommu->ddtp; depth-- > 0;) {
+ const int split = ddi_bits[depth];
+ /*
+ * Each non-leaf node is 64bits wide and on each level
+ * nodes are indexed by DDI[depth].
+ */
+ ddtp += (devid >> split) & 0x1FF;
+
+ retry:
+ /*
+ * Check if this node has been populated and if not
+ * allocate a new level and populate it.
+ */
+ ddt = READ_ONCE(*ddtp);
+ if (ddt & RISCV_IOMMU_DDTE_VALID) {
+ ddtp = __va(ppn_to_phys(ddt));
+ } else {
+ u64 old, new = get_zeroed_page(GFP_KERNEL);
+ if (!new)
+ return NULL;
+
+ old = cmpxchg64_relaxed(ddtp, ddt,
+ phys_to_ppn(__pa(new)) |
+ RISCV_IOMMU_DDTE_VALID);
+
+ if (old != ddt) {
+ free_page(new);
+ goto retry;
+ }
+
+ ddtp = (u64 *) new;
+ }
+ }
+
+ /*
+ * Grab the node that matches DDI[depth], note that when using base
+ * format the device context is 4 * 64bits, and the extended format
+ * is 8 * 64bits, hence the (3 - base_format) below.
+ */
+ ddtp += (devid & ((64 << base_format) - 1)) << (3 - base_format);
+ return (struct riscv_iommu_dc *)ddtp;
+}
+
static struct iommu_device *riscv_iommu_probe_device(struct device *dev)
{
struct riscv_iommu_device *iommu;
@@ -708,6 +804,9 @@ static struct iommu_device *riscv_iommu_probe_device(struct device *dev)
ep->iommu = iommu;
ep->dev = dev;

+ /* Initial DC pointer can be NULL if IOMMU is configured in OFF or BARE mode */
+ ep->dc = riscv_iommu_get_dc(iommu, ep->devid);
+
dev_info(iommu->dev, "adding device to iommu with devid %i in domain %i\n",
ep->devid, ep->domid);

@@ -734,6 +833,16 @@ static void riscv_iommu_release_device(struct device *dev)
list_del(&ep->domain);
mutex_unlock(&ep->lock);

+ if (ep->dc) {
+ // this should be already done by domain detach.
+ ep->dc->tc = 0ULL;
+ wmb();
+ ep->dc->fsc = 0ULL;
+ ep->dc->iohgatp = 0ULL;
+ wmb();
+ riscv_iommu_iodir_inv_devid(iommu, ep->devid);
+ }
+
/* Remove endpoint from IOMMU tracking structures */
mutex_lock(&iommu->eps_mutex);
rb_erase(&ep->node, &iommu->eps);
@@ -853,11 +962,21 @@ static int riscv_iommu_domain_finalize(struct riscv_iommu_domain *domain,
return 0;
}

+static u64 riscv_iommu_domain_atp(struct riscv_iommu_domain *domain)
+{
+ u64 atp = FIELD_PREP(RISCV_IOMMU_DC_FSC_MODE, domain->mode);
+ if (domain->mode != RISCV_IOMMU_DC_FSC_MODE_BARE)
+ atp |= FIELD_PREP(RISCV_IOMMU_DC_FSC_PPN, virt_to_pfn(domain->pgd_root));
+ return atp;
+}
+
static int riscv_iommu_attach_dev(struct iommu_domain *iommu_domain, struct device *dev)
{
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
struct riscv_iommu_endpoint *ep = dev_iommu_priv_get(dev);
+ struct riscv_iommu_dc *dc = ep->dc;
int ret;
+ u64 val;

/* PSCID not valid */
if ((int)domain->pscid < 0)
@@ -880,17 +999,44 @@ static int riscv_iommu_attach_dev(struct iommu_domain *iommu_domain, struct devi
return ret;
}

- if (ep->iommu->ddt_mode != RISCV_IOMMU_DDTP_MODE_BARE ||
- domain->domain.type != IOMMU_DOMAIN_IDENTITY) {
- dev_warn(dev, "domain type %d not supported\n",
- domain->domain.type);
+ if (ep->iommu->ddt_mode == RISCV_IOMMU_DDTP_MODE_BARE &&
+ domain->domain.type == IOMMU_DOMAIN_IDENTITY) {
+ dev_info(dev, "domain type %d attached w/ PSCID %u\n",
+ domain->domain.type, domain->pscid);
+ return 0;
+ }
+
+ if (!dc) {
return -ENODEV;
}

+ /*
+ * S-Stage translation table. G-Stage remains unmodified (BARE).
+ */
+ val = FIELD_PREP(RISCV_IOMMU_DC_TA_PSCID, domain->pscid);
+
+ dc->ta = cpu_to_le64(val);
+ dc->fsc = cpu_to_le64(riscv_iommu_domain_atp(domain));
+
+ wmb();
+
+ /* Mark device context as valid, synchronise device context cache. */
+ val = RISCV_IOMMU_DC_TC_V;
+
+ if (ep->iommu->cap & RISCV_IOMMU_CAP_AMO) {
+ val |= RISCV_IOMMU_DC_TC_GADE |
+ RISCV_IOMMU_DC_TC_SADE;
+ }
+
+ dc->tc = cpu_to_le64(val);
+ wmb();
+
list_add_tail(&ep->domain, &domain->endpoints);
mutex_unlock(&ep->lock);
mutex_unlock(&domain->lock);

+ riscv_iommu_iodir_inv_devid(ep->iommu, ep->devid);
+
dev_info(dev, "domain type %d attached w/ PSCID %u\n",
domain->domain.type, domain->pscid);

@@ -1239,7 +1385,12 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu)
goto fail;

no_ats:
- ret = riscv_iommu_enable(iommu, RISCV_IOMMU_DDTP_MODE_BARE);
+ if (iommu_default_passthrough()) {
+ dev_info(dev, "iommu set to passthrough mode\n");
+ ret = riscv_iommu_enable(iommu, RISCV_IOMMU_DDTP_MODE_BARE);
+ } else {
+ ret = riscv_iommu_enable(iommu, ddt_mode);
+ }

if (ret) {
dev_err(dev, "cannot enable iommu device (%d)\n", ret);
diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h
index 04148a2a8ffd..9140df71e17b 100644
--- a/drivers/iommu/riscv/iommu.h
+++ b/drivers/iommu/riscv/iommu.h
@@ -105,6 +105,7 @@ struct riscv_iommu_endpoint {
unsigned devid; /* PCI bus:device:function number */
unsigned domid; /* PCI domain number, segment */
struct rb_node node; /* device tracking node (lookup by devid) */
+ struct riscv_iommu_dc *dc; /* device context pointer */
struct riscv_iommu_device *iommu; /* parent iommu device */

struct mutex lock;
--
2.34.1