[RFC PATCH 20/21] iommu/amd: Introduce vIOMMU ioctl for setting up guest CR3

From: Suravee Suthikulpanit
Date: Wed Jun 21 2023 - 19:57:53 EST


This ioctl interface sets up guest CR3 (gCR3) table, which
is defined by guest IOMMU driver. It also enables nested
I/O page translation in the host.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>
---
drivers/iommu/amd/amd_iommu.h | 12 ++++
drivers/iommu/amd/iommu.c | 107 ++++++++++++++++++++++++++++++++++
drivers/iommu/amd/viommu.c | 36 ++++++++++++
include/linux/iommu.h | 1 +
include/uapi/linux/iommufd.h | 20 +++++++
5 files changed, 176 insertions(+)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index fccae07e8c9f..463cd59127b7 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -84,6 +84,18 @@ extern void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain);
extern int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid);
extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid,
unsigned long cr3);
+extern int amd_viommu_user_gcr3_update(const void *user_data,
+ struct iommu_domain *udom);
+extern int amd_iommu_setup_gcr3_table(struct amd_iommu *iommu,
+ struct pci_dev *pdev,
+ struct iommu_domain *dom,
+ struct iommu_domain *udom,
+ int pasids, bool giov);
+extern int amd_iommu_user_set_gcr3(struct amd_iommu *iommu,
+ struct iommu_domain *dom,
+ struct iommu_domain *udom,
+ struct pci_dev *pdev, u32 pasid,
+ unsigned long cr3);
extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid);
extern void amd_iommu_iotlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather);
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index f22b2a5a8bfc..bff53977f8f7 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -80,6 +80,8 @@ struct kmem_cache *amd_iommu_irq_cache;

static void detach_device(struct device *dev);
static int domain_enable_v2(struct protection_domain *domain, int pasids, bool giov);
+static int __set_gcr3(struct protection_domain *domain, u32 pasid,
+ unsigned long cr3);

/****************************************************************************
*
@@ -2525,10 +2527,43 @@ static void *amd_iommu_hw_info(struct device *dev, u32 *length)
return hwinfo;
}

+static struct iommu_domain *
+amd_iommu_domain_alloc_user(struct device *dev,
+ enum iommu_hwpt_type hwpt_type,
+ struct iommu_domain *parent,
+ const union iommu_domain_user_data *user_data)
+{
+ int ret;
+ struct iommu_domain *dom = iommu_domain_alloc(dev->bus);
+
+ if (!dom || !parent)
+ return dom;
+
+ /*
+ * The parent is not null only when external driver calls IOMMUFD kAPI
+ * to create IOMMUFD_OBJ_HW_PAGETABLE to attach a bound device to IOAS.
+ * This is for nested (v2) page table.
+ *
+ * TODO: Currently, only support nested table w/ 1 pasid for GIOV use case.
+ * Add support for multiple pasids.
+ */
+ dom->type = IOMMU_DOMAIN_NESTED;
+
+ ret = amd_viommu_user_gcr3_update(user_data, dom);
+ if (ret)
+ goto err_out;
+
+ return dom;
+err_out:
+ iommu_domain_free(dom);
+ return NULL;
+}
+
const struct iommu_ops amd_iommu_ops = {
.capable = amd_iommu_capable,
.hw_info = amd_iommu_hw_info,
.domain_alloc = amd_iommu_domain_alloc,
+ .domain_alloc_user = amd_iommu_domain_alloc_user,
.probe_device = amd_iommu_probe_device,
.release_device = amd_iommu_release_device,
.probe_finalize = amd_iommu_probe_finalize,
@@ -2537,6 +2572,7 @@ const struct iommu_ops amd_iommu_ops = {
.is_attach_deferred = amd_iommu_is_attach_deferred,
.pgsize_bitmap = AMD_IOMMU_PGSIZES,
.def_domain_type = amd_iommu_def_domain_type,
+ .hw_info_type = IOMMU_HW_INFO_TYPE_AMD,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = amd_iommu_attach_device,
.map_pages = amd_iommu_map_pages,
@@ -2639,6 +2675,77 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids, bool giov)
}
EXPORT_SYMBOL(amd_iommu_domain_enable_v2);

+int amd_iommu_setup_gcr3_table(struct amd_iommu *iommu, struct pci_dev *pdev,
+ struct iommu_domain *dom,
+ struct iommu_domain *udom,
+ int pasids, bool giov)
+{
+ int levels;
+ struct protection_domain *pdom = to_pdomain(dom);
+ struct protection_domain *updom = to_pdomain(udom);
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+
+ if (updom->gcr3_tbl)
+ return -EINVAL;
+
+ /* Number of GCR3 table levels required */
+ for (levels = 0; (pasids - 1) & ~0x1ff; pasids >>= 9)
+ levels += 1;
+
+ if (levels > amd_iommu_max_glx_val)
+ return -EINVAL;
+
+ updom->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC);
+ if (updom->gcr3_tbl == NULL)
+ return -ENOMEM;
+
+ updom->glx = levels;
+ updom->flags |= PD_IOMMUV2_MASK;
+ if (giov)
+ updom->flags |= PD_GIOV_MASK;
+
+ set_dte_entry(iommu, dev_data->devid, pdom, updom,
+ updom->gcr3_tbl,
+ dev_data->ats.enabled, false);
+ clone_aliases(iommu, dev_data->dev);
+
+ iommu_flush_dte(iommu, dev_data->devid);
+ iommu_completion_wait(iommu);
+ return 0;
+}
+
+/*
+ * Note: For vIOMMU, the guest could be using different
+ * GCR3 table for each VFIO pass-through device.
+ * Therefore, we need to per-device GCR3 table.
+ */
+int amd_iommu_user_set_gcr3(struct amd_iommu *iommu,
+ struct iommu_domain *dom,
+ struct iommu_domain *udom,
+ struct pci_dev *pdev, u32 pasid,
+ unsigned long cr3)
+{
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+ struct protection_domain *domain = to_pdomain(dom);
+ struct protection_domain *udomain = to_pdomain(udom);
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&domain->lock, flags);
+ spin_lock_irqsave(&udomain->lock, flags);
+
+ ret = __set_gcr3(udomain, pasid, cr3);
+ if (!ret) {
+ device_flush_dte(dev_data);
+ iommu_completion_wait(iommu);
+ }
+
+ spin_unlock_irqrestore(&udomain->lock, flags);
+ spin_unlock_irqrestore(&domain->lock, flags);
+
+ return ret;
+}
+
static int __flush_pasid(struct protection_domain *domain, u32 pasid,
u64 address, bool size)
{
diff --git a/drivers/iommu/amd/viommu.c b/drivers/iommu/amd/viommu.c
index 1bd4282384c4..8ce3ee3d6bf5 100644
--- a/drivers/iommu/amd/viommu.c
+++ b/drivers/iommu/amd/viommu.c
@@ -1072,3 +1072,39 @@ int amd_viommu_cmdbuf_update(struct amd_viommu_cmdbuf_data *data)
return -EINVAL;
}
EXPORT_SYMBOL(amd_viommu_cmdbuf_update);
+
+int amd_viommu_user_gcr3_update(const void *user_data, struct iommu_domain *udom)
+{
+ int ret;
+ struct pci_dev *pdev;
+ unsigned long npinned;
+ struct page *pages[2];
+ struct iommu_domain *dom;
+ struct iommu_hwpt_amd_v2 *hwpt = (struct iommu_hwpt_amd_v2 *)user_data;
+ struct amd_iommu *iommu = get_amd_iommu_from_devid(hwpt->iommu_id);
+ u16 hdev_id = viommu_get_hdev_id(iommu, hwpt->gid, hwpt->gdev_id);
+
+ pr_debug("%s: gid=%u, hdev_id=%#x, gcr3_va=%#llx\n",
+ __func__, hwpt->gid, hdev_id, (unsigned long long) hwpt->gcr3_va);
+
+ npinned = get_user_pages_fast(hwpt->gcr3_va, 1, FOLL_WRITE, pages);
+ if (!npinned) {
+ pr_err("Failure locking grc3 page (%#llx).\n", hwpt->gcr3_va);
+ return -EINVAL;
+ }
+
+ /* Allocate gcr3 table */
+ pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(hdev_id),
+ hdev_id & 0xff);
+ dom = iommu_get_domain_for_dev(&pdev->dev);
+ if (!dom)
+ return -EINVAL;
+
+ /* TODO: Only support 1 pasid (zero) for now */
+ ret = amd_iommu_setup_gcr3_table(iommu, pdev, dom, udom, 1,
+ iommu_feature(iommu, FEATURE_GIOSUP));
+ if (ret)
+ pr_err("%s: Fail to enable gcr3 (devid=%#x)\n", __func__, pci_dev_id(pdev));
+
+ return amd_iommu_user_set_gcr3(iommu, dom, udom, pdev, 0, hwpt->gcr3);
+}
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 4116f12d5f97..9239cd01d77c 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -236,6 +236,7 @@ union iommu_domain_user_data {
#endif
struct iommu_hwpt_vtd_s1 vtd;
struct iommu_hwpt_arm_smmuv3 smmuv3;
+ struct iommu_hwpt_amd_v2 amdv2;
};

/**
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index f8ea9faf6770..4147171429e1 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -408,6 +408,23 @@ struct iommu_hwpt_arm_smmuv3 {
__aligned_u64 out_event_uptr;
};

+/**
+ * struct iommu_hwpt_amd_v2 - AMD IOMMU specific user-managed
+ * v2 I/O page table data
+ * @gcr3: GCR3 guest physical ddress
+ * @gcr3_va: GCR3 host virtual address
+ * @gid: Guest ID
+ * @iommu_id: IOMMU host device ID
+ * @gdev_id: Guest device ID
+ */
+struct iommu_hwpt_amd_v2 {
+ __u64 gcr3;
+ __u64 gcr3_va;
+ __u32 gid;
+ __u32 iommu_id;
+ __u16 gdev_id;
+};
+
/**
* enum iommu_hwpt_type - IOMMU HWPT Type
* @IOMMU_HWPT_TYPE_DEFAULT: default
@@ -418,6 +435,7 @@ enum iommu_hwpt_type {
IOMMU_HWPT_TYPE_DEFAULT,
IOMMU_HWPT_TYPE_VTD_S1,
IOMMU_HWPT_TYPE_ARM_SMMUV3,
+ IOMMU_HWPT_TYPE_AMD_V2,
};

/**
@@ -523,11 +541,13 @@ struct iommu_hw_info_amd {
* enum iommu_hw_info_type - IOMMU Hardware Info Types
* @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type
* @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type
+ * @IOMMU_HW_INFO_TYPE_AMD: AMD IOMMU info type
*/
enum iommu_hw_info_type {
IOMMU_HW_INFO_TYPE_NONE,
IOMMU_HW_INFO_TYPE_INTEL_VTD,
IOMMU_HW_INFO_TYPE_ARM_SMMUV3,
+ IOMMU_HW_INFO_TYPE_AMD,
};

/**
--
2.34.1