[PATCH v3 07/10] iommu/vt-d: Add iotlb flush for nested domain

From: Yi Liu
Date: Thu May 11 2023 - 10:53:18 EST


This is needed as the stage-1 page table of the nested domain is
maintained outside the iommu subsystem, hence, needs to support iotlb
flush requests.

This adds the data structure for flushing iotlb for the nested domain
allocated with IOMMU_HWPT_TYPE_VTD_S1 type and the related callback
to accept iotlb flush request from IOMMUFD.

This only exposes the interface for invalidating IOTLB, but no for
device-TLB as device-TLB invalidation will be covered automatically
in IOTLB invalidation if the affected device is ATS-capable.

Signed-off-by: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx>
Signed-off-by: Yi Liu <yi.l.liu@xxxxxxxxx>
---
drivers/iommu/intel/iommu.c | 10 +++---
drivers/iommu/intel/iommu.h | 6 ++++
drivers/iommu/intel/nested.c | 69 ++++++++++++++++++++++++++++++++++++
drivers/iommu/iommufd/main.c | 6 ++++
include/uapi/linux/iommufd.h | 59 ++++++++++++++++++++++++++++++
5 files changed, 145 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index e6536a43dd82..5f27cee4656a 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1474,10 +1474,10 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
spin_unlock_irqrestore(&domain->lock, flags);
}

-static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
- struct dmar_domain *domain,
- unsigned long pfn, unsigned int pages,
- int ih, int map)
+void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
+ struct dmar_domain *domain,
+ unsigned long pfn, unsigned int pages,
+ int ih, int map)
{
unsigned int aligned_pages = __roundup_pow_of_two(pages);
unsigned int mask = ilog2(aligned_pages);
@@ -1550,7 +1550,7 @@ static inline void __mapping_notify_one(struct intel_iommu *iommu,
iommu_flush_write_buffer(iommu);
}

-static void intel_flush_iotlb_all(struct iommu_domain *domain)
+void intel_flush_iotlb_all(struct iommu_domain *domain)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct iommu_domain_info *info;
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index ccb93aed6cf2..581596d90c1b 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -859,6 +859,12 @@ int prepare_domain_attach_device(struct iommu_domain *domain,
struct device *dev);
bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain);
void domain_update_iommu_cap(struct dmar_domain *domain);
+void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
+ struct dmar_domain *domain,
+ unsigned long pfn, unsigned int pages,
+ int ih, int map);
+void intel_flush_iotlb_all(struct iommu_domain *domain);
+

int dmar_ir_support(void);

diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
index fd38424b78f0..d13fbcd3f5a6 100644
--- a/drivers/iommu/intel/nested.c
+++ b/drivers/iommu/intel/nested.c
@@ -64,8 +64,77 @@ static void intel_nested_domain_free(struct iommu_domain *domain)
kfree(to_dmar_domain(domain));
}

+static void intel_nested_invalidate(struct device *dev,
+ struct dmar_domain *domain,
+ u64 addr, unsigned long npages)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+
+ if (addr == 0 && npages == -1)
+ intel_flush_iotlb_all(&domain->domain);
+ else
+ iommu_flush_iotlb_psi(iommu, domain,
+ addr >> VTD_PAGE_SHIFT,
+ npages, 1, 0);
+}
+
+static int intel_nested_cache_invalidate_user(struct iommu_domain *domain,
+ void *user_data)
+{
+ struct iommu_hwpt_invalidate_request_intel_vtd *req = user_data;
+ struct iommu_hwpt_invalidate_intel_vtd *inv_info = user_data;
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ unsigned int entry_size = inv_info->entry_size;
+ u64 uptr = inv_info->inv_data_uptr;
+ u64 nr_uptr = inv_info->entry_nr_uptr;
+ struct device_domain_info *info;
+ u32 entry_nr, index;
+ unsigned long flags;
+ int ret = 0;
+
+ if (WARN_ON(!user_data))
+ return 0;
+
+ if (get_user(entry_nr, (uint32_t __user *)u64_to_user_ptr(nr_uptr)))
+ return -EFAULT;
+
+ if (!entry_nr)
+ return -EINVAL;
+
+ for (index = 0; index < entry_nr; index++) {
+ ret = copy_struct_from_user(req, sizeof(*req),
+ u64_to_user_ptr(uptr + index * entry_size),
+ entry_size);
+ if (ret) {
+ pr_err_ratelimited("Failed to fetch invalidation request\n");
+ break;
+ }
+
+ if (req->__reserved || (req->flags & ~IOMMU_VTD_QI_FLAGS_LEAF) ||
+ !IS_ALIGNED(req->addr, VTD_PAGE_SIZE)) {
+ ret = -EINVAL;
+ break;
+ }
+
+ spin_lock_irqsave(&dmar_domain->lock, flags);
+ list_for_each_entry(info, &dmar_domain->devices, link)
+ intel_nested_invalidate(info->dev, dmar_domain,
+ req->addr, req->npages);
+ spin_unlock_irqrestore(&dmar_domain->lock, flags);
+ }
+
+ if (ret && put_user(index, (uint32_t __user *)u64_to_user_ptr(nr_uptr)))
+ return -EFAULT;
+
+ return ret;
+}
+
static const struct iommu_domain_ops intel_nested_domain_ops = {
.attach_dev = intel_nested_attach_dev,
+ .cache_invalidate_user = intel_nested_cache_invalidate_user,
+ .cache_invalidate_user_data_len =
+ sizeof(struct iommu_hwpt_invalidate_intel_vtd),
.free = intel_nested_domain_free,
.enforce_cache_coherency = intel_iommu_enforce_cache_coherency,
};
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 39922f83ce34..b338b082950b 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -282,6 +282,12 @@ union ucmd_buffer {
#ifdef CONFIG_IOMMUFD_TEST
struct iommu_test_cmd test;
#endif
+ /*
+ * hwpt_type specific structure used in the cache invalidation
+ * path.
+ */
+ struct iommu_hwpt_invalidate_intel_vtd vtd;
+ struct iommu_hwpt_invalidate_request_intel_vtd req_vtd;
};

struct iommufd_ioctl_op {
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index c2658394827a..2e658fa346ad 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -505,6 +505,63 @@ struct iommu_hw_info {
};
#define IOMMU_DEVICE_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DEVICE_GET_HW_INFO)

+/**
+ * enum iommu_hwpt_intel_vtd_invalidate_flags - Flags for Intel VT-d
+ * stage-1 page table cache
+ * invalidation
+ * @IOMMU_VTD_QI_FLAGS_LEAF: The LEAF flag indicates whether only the
+ * leaf PTE caching needs to be invalidated
+ * and other paging structure caches can be
+ * preserved.
+ */
+enum iommu_hwpt_intel_vtd_invalidate_flags {
+ IOMMU_VTD_QI_FLAGS_LEAF = 1 << 0,
+};
+
+/**
+ * struct iommu_hwpt_invalidate_request_intel_vtd - Intel VT-d cache invalidation request
+ * @addr: The start address of the addresses to be invalidated.
+ * @npages: Number of contiguous 4K pages to be invalidated.
+ * @flags: Combination of enum iommu_hwpt_intel_vtd_invalidate_flags
+ * @__reserved: Must be 0
+ *
+ * The Intel VT-d specific invalidation data for user-managed stage-1 cache
+ * invalidation under nested translation. Userspace uses this structure to
+ * tell host about the impacted caches after modifying the stage-1 page table.
+ *
+ * Invalidating all the caches related to the hw_pagetable by setting
+ * @addr==0 and @npages==__u64(-1).
+ */
+struct iommu_hwpt_invalidate_request_intel_vtd {
+ __u64 addr;
+ __u64 npages;
+ __u32 flags;
+ __u32 __reserved;
+};
+
+/**
+ * struct iommu_hwpt_invalidate_intel_vtd - Intel VT-d cache invalidation info
+ * @flags: Must be 0
+ * @entry_size: Size in bytes of each cache invalidation request
+ * @entry_nr_uptr: User pointer to the number of invalidation requests.
+ * Kernel reads it to get the number of requests and
+ * updates the buffer with the number of requests that
+ * have been processed successfully. This pointer must
+ * point to a __u32 type of memory location.
+ * @inv_data_uptr: Pointer to the cache invalidation requests
+ *
+ * The Intel VT-d specific invalidation data for a set of cache invalidation
+ * requests. Kernel loops the requests one-by-one and stops when failure
+ * is encountered. The number of handled requests is reported to user by
+ * writing the buffer pointed by @entry_nr_uptr.
+ */
+struct iommu_hwpt_invalidate_intel_vtd {
+ __u32 flags;
+ __u32 entry_size;
+ __u64 entry_nr_uptr;
+ __u64 inv_data_uptr;
+};
+
/**
* struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE)
* @size: sizeof(struct iommu_hwpt_invalidate)
@@ -520,6 +577,8 @@ struct iommu_hw_info {
* +==============================+========================================+
* | @hwpt_type | Data structure in @data_uptr |
* +------------------------------+----------------------------------------+
+ * | IOMMU_HWPT_TYPE_VTD_S1 | struct iommu_hwpt_invalidate_intel_vtd |
+ * +------------------------------+----------------------------------------+
*/
struct iommu_hwpt_invalidate {
__u32 size;
--
2.34.1