[RFC PATCH 5/6] iommu/amd: Introduce helper functions to setup GCR3TRPMode

From: Suravee Suthikulpanit
Date: Tue Dec 12 2023 - 11:02:24 EST


The GCR3TRPMode allows IOMMU hardware to use GPA when programming the
GCR3 table root pointer (GCR3TRP) in the DTE. The GPA will be translated
by the IOMMU using the v1 page table referenced by the
DTE[Host Page Table Root Pointer].

Please see the AMD IOMMU Specification for more detail.
(https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/specifications/48882_IOMMU.pdf)

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>
---
drivers/iommu/amd/amd_iommu.h | 2 +
drivers/iommu/amd/amd_iommu_types.h | 1 +
drivers/iommu/amd/iommu.c | 132 +++++++++++++++++++++++++++-
3 files changed, 131 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 7783a933ad14..55479a6efaae 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -56,6 +56,8 @@ void amd_iommu_pdev_disable_cap_pri(struct pci_dev *pdev);
int amd_iommu_set_gcr3(struct iommu_dev_data *dev_data,
ioasid_t pasid, unsigned long gcr3);
int amd_iommu_clear_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid);
+int amd_iommu_set_gcr3tbl_trp(struct amd_iommu *iommu, struct pci_dev *pdev,
+ u64 gcr3_tbl, u16 glx, u16 guest_paging_mode);

/*
* This function flushes all internal caches of
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index a00731673c50..1b150e0cb689 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -541,6 +541,7 @@ struct gcr3_tbl_info {
u64 *gcr3_tbl; /* Guest CR3 table */
int glx; /* Number of levels for GCR3 table */
u32 pasid_cnt; /* Track attached PASIDs */
+ bool trp; /* TRP support */
};

struct amd_io_pgtable {
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index d18b23ac6357..8bf12674dc84 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -93,6 +93,9 @@ static void detach_device(struct device *dev);
static void set_dte_entry(struct amd_iommu *iommu,
struct iommu_dev_data *dev_data);

+static void amd_iommu_clear_gcr3tbl_trp(struct amd_iommu *iommu,
+ struct iommu_dev_data *dev_data);
+
/****************************************************************************
*
* Helper functions
@@ -2146,15 +2149,25 @@ static int do_attach(struct iommu_dev_data *dev_data,

static void do_detach(struct iommu_dev_data *dev_data)
{
+ struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
struct protection_domain *domain = dev_data->domain;
struct amd_iommu *iommu;

iommu = get_amd_iommu_from_dev(dev_data->dev);

- /* Clear GCR3 table */
- if (domain->pd_mode == PD_MODE_V2) {
- __clear_gcr3(dev_data, 0);
- free_gcr3_table(dev_data);
+ if (gcr3_info->gcr3_tbl) {
+ if (gcr3_info->trp) {
+ /*
+ * In GCR3TRPMode, the GCR3 table contains GPA,
+ * which is setup by guest kernel. Therefore, we just
+ * need to clean up the DTE settings for guest translation.
+ */
+ amd_iommu_clear_gcr3tbl_trp(iommu, dev_data);
+ } else {
+ /* Clear GCR3 table */
+ __clear_gcr3(dev_data, 0);
+ free_gcr3_table(dev_data);
+ }
}

/* Update data structures */
@@ -2951,6 +2964,117 @@ const struct iommu_ops amd_iommu_ops = {
}
};

+/*
+ * For GCR3TRPMode, user-space provides GPA for the GCR3 Root Pointer Table.
+ */
+int amd_iommu_set_gcr3tbl_trp(struct amd_iommu *iommu, struct pci_dev *pdev,
+ u64 gcr3_tbl, u16 glx, u16 guest_paging_mode)
+{
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+ struct dev_table_entry *dev_table = get_dev_table(iommu);
+ struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
+ int devid = pci_dev_id(pdev);
+ u64 data0 = dev_table[devid].data[0];
+ u64 data1 = dev_table[devid].data[1];
+ u64 data2 = dev_table[devid].data[2];
+ u64 tmp;
+
+ pr_debug("%s: devid=%d, glx=%#x, gcr3_tbl=%#llx\n",
+ __func__, devid, glx, gcr3_tbl);
+
+ WARN_ON(gcr3_info->trp);
+
+ gcr3_info->trp = true;
+ gcr3_info->gcr3_tbl = (u64 *)gcr3_tbl;
+
+ data0 |= DTE_FLAG_GV | DTE_FLAG_GIOV;
+ tmp = glx;
+ data0 |= (tmp & DTE_GLX_MASK) << DTE_GLX_SHIFT;
+
+ /* First mask out possible old values for GCR3 table */
+ tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A;
+ data0 &= ~tmp;
+
+ tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
+ data1 &= ~tmp;
+
+ tmp = DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
+ data1 &= ~tmp;
+
+ /* Encode GCR3 table into DTE */
+ tmp = DTE_GCR3_VAL_A(gcr3_tbl) << DTE_GCR3_SHIFT_A;
+ data0 |= tmp;
+
+ tmp = DTE_GCR3_VAL_B(gcr3_tbl) << DTE_GCR3_SHIFT_B;
+ data1 |= tmp;
+
+ tmp = DTE_GCR3_VAL_C(gcr3_tbl) << DTE_GCR3_SHIFT_C;
+ data1 |= tmp;
+
+ /* Mask out old values for GuestPagingMode */
+ data2 &= ~(0x3ULL << DTE_GPT_LEVEL_SHIFT);
+
+ /* Check 5-level support for the host before enabling on behalf of the guest */
+ tmp = (u64)guest_paging_mode;
+ if ((tmp == GUEST_PGTABLE_5_LEVEL) &&
+ (check_feature_gpt_level() < GUEST_PGTABLE_5_LEVEL)) {
+ pr_err("Cannot support 5-level v2 page table.\n");
+ return -EINVAL;
+ }
+ data2 |= (tmp << DTE_GPT_LEVEL_SHIFT);
+
+ dev_table[devid].data[2] = data2;
+ dev_table[devid].data[1] = data1;
+ dev_table[devid].data[0] = data0;
+
+ device_flush_dte(dev_data);
+ iommu_completion_wait(iommu);
+
+ return 0;
+}
+
+void amd_iommu_clear_gcr3tbl_trp(struct amd_iommu *iommu,
+ struct iommu_dev_data *dev_data)
+{
+ int devid = dev_data->devid;
+ struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
+ struct dev_table_entry *dev_table = get_dev_table(iommu);
+ u64 data0 = dev_table[devid].data[0];
+ u64 data1 = dev_table[devid].data[1];
+ u64 data2 = dev_table[devid].data[2];
+ u64 tmp;
+
+ if (!gcr3_info->trp)
+ return;
+
+ pr_debug("%s: devid=%#x, gcr3_tbl=%#llx\n", __func__, devid,
+ (unsigned long long)gcr3_info->gcr3_tbl);
+
+ tmp = DTE_GLX_MASK;
+ data0 &= ~(tmp << DTE_GLX_SHIFT);
+ data0 &= ~(DTE_FLAG_GV | DTE_FLAG_GIOV);
+
+ /* Mask out possible old values for GCR3 table */
+ tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A;
+ data0 &= ~tmp;
+
+ tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
+ data1 &= ~tmp;
+
+ tmp = DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
+ data1 &= ~tmp;
+
+ /* Mask out old values for GuestPagingMode */
+ data2 &= ~(0x3ULL << DTE_GPT_LEVEL_SHIFT);
+
+ dev_table[devid].data[2] = data2;
+ dev_table[devid].data[1] = data1;
+ dev_table[devid].data[0] = data0;
+
+ gcr3_info->trp = false;
+ gcr3_info->gcr3_tbl = NULL;
+}
+
#ifdef CONFIG_IRQ_REMAP

/*****************************************************************************
--
2.34.1