[RFC PATCH 6/6] iommu/amd: Introduce nested translation support

From: Suravee Suthikulpanit
Date: Tue Dec 12 2023 - 11:02:38 EST


To support nested translation on AMD IOMMU, the driver needs to
program DTE[GCR3 Table Root Pointer] with the address provided by
the guest via struct iommu_hwpt_amd_v2, which is passed as a parameter
of the struct iommu_ops.domain_alloc_user() with the flag
IOMMU_HWPT_ALLOC_NEST_PARENT.

Note that current implementation only support GCR3TRPMode for
nested translation, which uses GPA to program GCR3 Table Root Pointer.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>
---
drivers/iommu/amd/Makefile | 2 +-
drivers/iommu/amd/amd_iommu.h | 8 +++
drivers/iommu/amd/amd_iommu_types.h | 3 +
drivers/iommu/amd/iommu.c | 63 ++++++++++++++--
drivers/iommu/amd/nested.c | 107 ++++++++++++++++++++++++++++
5 files changed, 175 insertions(+), 8 deletions(-)
create mode 100644 drivers/iommu/amd/nested.c

diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile
index f454fbb1569e..447cb6bb48eb 100644
--- a/drivers/iommu/amd/Makefile
+++ b/drivers/iommu/amd/Makefile
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o
+obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o nested.o
obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 55479a6efaae..6ea146a964df 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -7,6 +7,7 @@
#ifndef AMD_IOMMU_H
#define AMD_IOMMU_H

+#include <uapi/linux/iommufd.h>
#include <linux/iommu.h>

#include "amd_iommu_types.h"
@@ -75,6 +76,8 @@ void amd_iommu_dev_flush_pasid_all(struct iommu_dev_data *dev_data,
ioasid_t pasid);

void amd_iommu_build_efr(u64 *efr, u64 *efr2);
+int amd_iommu_attach_device(struct iommu_domain *dom, struct device *dev);
+void amd_iommu_domain_free(struct iommu_domain *dom);

#ifdef CONFIG_IRQ_REMAP
int amd_iommu_create_irq_domain(struct amd_iommu *iommu);
@@ -190,4 +193,9 @@ int amd_iommu_vminfo_alloc(struct amd_iommu *iommu, struct amd_iommu_vminfo *vmi
void amd_iommu_vminfo_free(struct amd_iommu *iommu, struct amd_iommu_vminfo *vminfo);
struct amd_iommu_vminfo *amd_iommu_get_vminfo(int gid);

+/* NESTED */
+struct protection_domain *to_pdomain(struct iommu_domain *dom);
+struct iommu_domain *amd_iommu_nested_domain_alloc(struct device *dev,
+ struct iommu_hwpt_amd_v2 *hwpt);
+
#endif
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 1b150e0cb689..c2055b476a97 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -114,6 +114,8 @@
#define FEATURE_PASMAX_MASK (0x1FULL << FEATURE_PASMAX_SHIFT)

/* Extended Feature 2 Bits */
+#define FEATURE_GCR3TRPMODE BIT_ULL(3)
+
#define FEATURE_SNPAVICSUP_SHIFT 5
#define FEATURE_SNPAVICSUP_MASK (0x07ULL << FEATURE_SNPAVICSUP_SHIFT)
#define FEATURE_SNPAVICSUP_GAM(x) \
@@ -1058,6 +1060,7 @@ struct amd_irte_ops {
struct amd_iommu_vminfo {
u16 gid;
struct hlist_node hnode;
+ u64 *devid_table;
};

#ifdef CONFIG_IRQ_REMAP
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 8bf12674dc84..2a7e29e8c112 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -260,7 +260,7 @@ static struct amd_iommu *rlookup_amd_iommu(struct device *dev)
return __rlookup_amd_iommu(seg, PCI_SBDF_TO_DEVID(devid));
}

-static struct protection_domain *to_pdomain(struct iommu_domain *dom)
+struct protection_domain *to_pdomain(struct iommu_domain *dom)
{
return container_of(dom, struct protection_domain, domain);
}
@@ -2526,21 +2526,70 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned int type)
return domain;
}

+static int udata_to_iommu_hwpt_amd_v2(const struct iommu_user_data *user_data,
+ struct iommu_hwpt_amd_v2 *hwpt)
+{
+ if (!user_data)
+ return -EINVAL;
+
+ if (user_data->type != IOMMU_HWPT_DATA_AMD_V2)
+ return -EOPNOTSUPP;
+
+ return iommu_copy_struct_from_user(hwpt, user_data,
+ IOMMU_HWPT_DATA_AMD_V2,
+ guest_paging_mode);
+}
+
+static bool check_nested_support(u32 flags)
+{
+ if (!(flags & IOMMU_HWPT_ALLOC_NEST_PARENT))
+ return true;
+
+ if (!check_feature(FEATURE_GT) ||
+ !check_feature(FEATURE_GIOSUP) ||
+ !check_feature2(FEATURE_GCR3TRPMODE))
+ return false;
+
+ return true;
+}
+
static struct iommu_domain *
amd_iommu_domain_alloc_user(struct device *dev, u32 flags,
struct iommu_domain *parent,
const struct iommu_user_data *user_data)
-
{
- unsigned int type = IOMMU_DOMAIN_UNMANAGED;
+ struct iommu_domain *dom;
+
+ if (parent) {
+ int ret;
+ struct iommu_hwpt_amd_v2 hwpt;
+
+ if (parent->ops != amd_iommu_ops.default_domain_ops)
+ return ERR_PTR(-EINVAL);

- if ((flags & ~IOMMU_HWPT_ALLOC_DIRTY_TRACKING) || parent || user_data)
+ ret = udata_to_iommu_hwpt_amd_v2(user_data, &hwpt);
+ if (ret)
+ return ERR_PTR(ret);
+
+ return amd_iommu_nested_domain_alloc(dev, &hwpt);
+ }
+
+ /* Check supported flags */
+ if (flags & (~(IOMMU_HWPT_ALLOC_NEST_PARENT |
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING)))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (!check_nested_support(flags))
return ERR_PTR(-EOPNOTSUPP);

- return do_iommu_domain_alloc(type, dev, flags);
+ dom = iommu_domain_alloc(dev->bus);
+ if (!dom)
+ return ERR_PTR(-ENOMEM);
+
+ return dom;
}

-static void amd_iommu_domain_free(struct iommu_domain *dom)
+void amd_iommu_domain_free(struct iommu_domain *dom)
{
struct protection_domain *domain;
unsigned long flags;
@@ -2559,7 +2608,7 @@ static void amd_iommu_domain_free(struct iommu_domain *dom)
protection_domain_free(domain);
}

-static int amd_iommu_attach_device(struct iommu_domain *dom,
+int amd_iommu_attach_device(struct iommu_domain *dom,
struct device *dev)
{
struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
diff --git a/drivers/iommu/amd/nested.c b/drivers/iommu/amd/nested.c
new file mode 100644
index 000000000000..332f7efcdc92
--- /dev/null
+++ b/drivers/iommu/amd/nested.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 Advanced Micro Devices, Inc.
+ * Author: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>
+ */
+
+#define pr_fmt(fmt) "AMD-Vi: " fmt
+#define dev_fmt(fmt) pr_fmt(fmt)
+
+#include <linux/iommu.h>
+#include <uapi/linux/iommufd.h>
+
+#include "amd_iommu.h"
+
+static struct amd_iommu *get_amd_iommu_from_devid(u16 devid)
+{
+ struct amd_iommu *iommu;
+
+ for_each_iommu(iommu)
+ if (iommu->devid == devid)
+ return iommu;
+ return NULL;
+}
+
+/*
+ * Note:
+ * Host-DevID is stored in the per-VM DevID mapping table,
+ * which is indexed by the Guest-DevID.
+ */
+static u16 get_hdev_id(struct amd_iommu *iommu, u16 guestId, u16 gdev_id)
+{
+ struct amd_iommu_vminfo *vminfo;
+ void *addr;
+ u64 offset;
+
+ vminfo = amd_iommu_get_vminfo(guestId);
+ if (!vminfo)
+ return -1;
+
+ addr = vminfo->devid_table;
+ offset = gdev_id << 4;
+ return (*((u64 *)(addr + offset)) >> 24) & 0xFFFF;
+}
+
+static int nested_gcr3_update(struct iommu_hwpt_amd_v2 *hwpt, struct iommu_domain *udom)
+{
+ int ret;
+ u16 hdev_id;
+ struct pci_dev *pdev;
+ struct amd_iommu *iommu;
+
+ iommu = get_amd_iommu_from_devid(hwpt->iommu_id);
+ hdev_id = get_hdev_id(iommu, hwpt->gid, hwpt->gdev_id);
+
+ pr_debug("%s: gid=%u, hdev_id=%#x, gcr3=%#llx\n",
+ __func__, hwpt->gid, hdev_id,
+ (unsigned long long) hwpt->gcr3);
+
+ pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(hdev_id),
+ hdev_id & 0xff);
+ if (!pdev)
+ return -EINVAL;
+
+ /* Note: Currently only support GCR3TRPMode with nested translation */
+ if (!check_feature2(FEATURE_GCR3TRPMODE))
+ return -EOPNOTSUPP;
+
+ ret = amd_iommu_set_gcr3tbl_trp(iommu, pdev, hwpt->gcr3, hwpt->glx,
+ hwpt->guest_paging_mode);
+ if (ret) {
+ pr_err("%s: Fail to enable gcr3 (devid=%#x)\n", __func__,
+ pci_dev_id(pdev));
+ }
+
+ return ret;
+}
+
+static const struct iommu_domain_ops nested_domain_ops = {
+ .attach_dev = amd_iommu_attach_device,
+ .free = amd_iommu_domain_free,
+};
+
+struct iommu_domain *amd_iommu_nested_domain_alloc(struct device *dev,
+ struct iommu_hwpt_amd_v2 *hwpt)
+{
+ int ret;
+ struct iommu_domain *dom;
+ struct protection_domain *pdom;
+
+ dom = iommu_domain_alloc(dev->bus);
+ if (!dom)
+ return ERR_PTR(-ENOMEM);
+
+ pdom = to_pdomain(dom);
+ dom->type = IOMMU_DOMAIN_NESTED;
+ dom->ops = &nested_domain_ops;
+
+ ret = nested_gcr3_update(hwpt, dom);
+ if (ret)
+ goto err_out;
+
+ return dom;
+
+err_out:
+ iommu_domain_free(dom);
+ return ERR_PTR(-EINVAL);
+}
--
2.34.1