Re: [PATCH 11/11] RISC-V: drivers/iommu/riscv: Add G-Stage translation support

From: Robin Murphy
Date: Wed Aug 16 2023 - 17:14:26 EST


On 2023-07-19 20:33, Tomasz Jeznach wrote:
This change introduces 2nd stage translation configuration
support, enabling nested translation for IOMMU hardware.
Pending integration with VMM IOMMUFD interfaces to manage
1st stage translation and IOMMU virtialization interfaces.

Signed-off-by: Tomasz Jeznach <tjeznach@xxxxxxxxxxxx>
---
drivers/iommu/riscv/iommu.c | 58 ++++++++++++++++++++++++++++---------
drivers/iommu/riscv/iommu.h | 3 +-
2 files changed, 46 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
index 7b3e3e135cf6..3ca2f0194d3c 100644
--- a/drivers/iommu/riscv/iommu.c
+++ b/drivers/iommu/riscv/iommu.c
@@ -1418,6 +1418,19 @@ static struct iommu_domain *riscv_iommu_domain_alloc(unsigned type)
return &domain->domain;
}
+/* mark domain as second-stage translation */
+static int riscv_iommu_enable_nesting(struct iommu_domain *iommu_domain)

Please don't add more instances of enable_nesting. It's a dead end that has never actually been used and should be removed fairly soon. The new nesting infrastructure is all still in flight, but the current patchsets should give a good idea of what you'd want to work towards:

https://lore.kernel.org/linux-iommu/20230724110406.107212-1-yi.l.liu@xxxxxxxxx/
https://lore.kernel.org/linux-iommu/20230724111335.107427-1-yi.l.liu@xxxxxxxxx/
https://lore.kernel.org/linux-iommu/cover.1683688960.git.nicolinc@xxxxxxxxxx/

Thanks,
Robin.

+{
+ struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
+
+ mutex_lock(&domain->lock);
+ if (list_empty(&domain->endpoints))
+ domain->g_stage = true;
+ mutex_unlock(&domain->lock);
+
+ return domain->g_stage ? 0 : -EBUSY;
+}
+
static void riscv_iommu_domain_free(struct iommu_domain *iommu_domain)
{
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
@@ -1433,7 +1446,7 @@ static void riscv_iommu_domain_free(struct iommu_domain *iommu_domain)
free_io_pgtable_ops(&domain->pgtbl.ops);
if (domain->pgd_root)
- free_pages((unsigned long)domain->pgd_root, 0);
+ free_pages((unsigned long)domain->pgd_root, domain->g_stage ? 2 : 0);
if ((int)domain->pscid > 0)
ida_free(&riscv_iommu_pscids, domain->pscid);
@@ -1483,7 +1496,8 @@ static int riscv_iommu_domain_finalize(struct riscv_iommu_domain *domain,
/* TODO: Fix this for RV32 */
domain->mode = satp_mode >> 60;
- domain->pgd_root = (pgd_t *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, 0);
+ domain->pgd_root = (pgd_t *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ domain->g_stage ? 2 : 0);
if (!domain->pgd_root)
return -ENOMEM;
@@ -1499,6 +1513,8 @@ static u64 riscv_iommu_domain_atp(struct riscv_iommu_domain *domain)
u64 atp = FIELD_PREP(RISCV_IOMMU_DC_FSC_MODE, domain->mode);
if (domain->mode != RISCV_IOMMU_DC_FSC_MODE_BARE)
atp |= FIELD_PREP(RISCV_IOMMU_DC_FSC_PPN, virt_to_pfn(domain->pgd_root));
+ if (domain->g_stage)
+ atp |= FIELD_PREP(RISCV_IOMMU_DC_IOHGATP_GSCID, domain->pscid);
return atp;
}
@@ -1541,20 +1557,30 @@ static int riscv_iommu_attach_dev(struct iommu_domain *iommu_domain, struct devi
if (!dc)
return -ENODEV;
- /*
- * S-Stage translation table. G-Stage remains unmodified (BARE).
- */
- val = FIELD_PREP(RISCV_IOMMU_DC_TA_PSCID, domain->pscid);
-
- if (ep->pasid_enabled) {
- ep->pc[0].ta = cpu_to_le64(val | RISCV_IOMMU_PC_TA_V);
- ep->pc[0].fsc = cpu_to_le64(riscv_iommu_domain_atp(domain));
+ if (domain->g_stage) {
+ /*
+ * Enable G-Stage translation with initial pass-through mode
+ * for S-Stage. VMM is responsible for more restrictive
+ * guest VA translation scheme configuration.
+ */
dc->ta = 0;
- dc->fsc = cpu_to_le64(virt_to_pfn(ep->pc) |
- FIELD_PREP(RISCV_IOMMU_DC_FSC_MODE, RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8));
+ dc->fsc = 0ULL; /* RISCV_IOMMU_DC_FSC_MODE_BARE */ ;
+ dc->iohgatp = cpu_to_le64(riscv_iommu_domain_atp(domain));
} else {
- dc->ta = cpu_to_le64(val);
- dc->fsc = cpu_to_le64(riscv_iommu_domain_atp(domain));
+ /* S-Stage translation table. G-Stage remains unmodified. */
+ if (ep->pasid_enabled) {
+ val = FIELD_PREP(RISCV_IOMMU_DC_TA_PSCID, domain->pscid);
+ ep->pc[0].ta = cpu_to_le64(val | RISCV_IOMMU_PC_TA_V);
+ ep->pc[0].fsc = cpu_to_le64(riscv_iommu_domain_atp(domain));
+ dc->ta = 0;
+ val = FIELD_PREP(RISCV_IOMMU_DC_FSC_MODE,
+ RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8);
+ dc->fsc = cpu_to_le64(val | virt_to_pfn(ep->pc));
+ } else {
+ val = FIELD_PREP(RISCV_IOMMU_DC_TA_PSCID, domain->pscid);
+ dc->ta = cpu_to_le64(val);
+ dc->fsc = cpu_to_le64(riscv_iommu_domain_atp(domain));
+ }
}
wmb();
@@ -1599,6 +1625,9 @@ static int riscv_iommu_set_dev_pasid(struct iommu_domain *iommu_domain,
if (!iommu_domain || !iommu_domain->mm)
return -EINVAL;
+ if (domain->g_stage)
+ return -EINVAL;
+
/* Driver uses TC.DPE mode, PASID #0 is incorrect. */
if (pasid == 0)
return -EINVAL;
@@ -1969,6 +1998,7 @@ static const struct iommu_domain_ops riscv_iommu_domain_ops = {
.iotlb_sync = riscv_iommu_iotlb_sync,
.iotlb_sync_map = riscv_iommu_iotlb_sync_map,
.flush_iotlb_all = riscv_iommu_flush_iotlb_all,
+ .enable_nesting = riscv_iommu_enable_nesting,
};
static const struct iommu_ops riscv_iommu_ops = {
diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h
index 55418a1144fb..55e5aafea5bc 100644
--- a/drivers/iommu/riscv/iommu.h
+++ b/drivers/iommu/riscv/iommu.h
@@ -102,8 +102,9 @@ struct riscv_iommu_domain {
struct riscv_iommu_device *iommu;
unsigned mode; /* RIO_ATP_MODE_* enum */
- unsigned pscid; /* RISC-V IOMMU PSCID */
+ unsigned pscid; /* RISC-V IOMMU PSCID / GSCID */
ioasid_t pasid; /* IOMMU_DOMAIN_SVA: Cached PASID */
+ bool g_stage; /* 2nd stage translation domain */
pgd_t *pgd_root; /* page table root pointer */
};