[PATCH 1/2] iommu/vt-d: Use rbtree to track iommu probed devices

From: Lu Baolu
Date: Thu Feb 15 2024 - 02:29:04 EST


Use a red-black tree(rbtree) to track devices probed by the driver's
probe_device callback. These devices need to be looked up quickly by
a source ID when the hardware reports a fault, either recoverable or
unrecoverable.

Fault reporting paths are critical. Searching a list in this scenario
is inefficient, with an algorithm complexity of O(n). An rbtree is a
self-balancing binary search tree, offering an average search time
complexity of O(log(n)). This significant performance improvement
makes rbtrees a better choice.

Furthermore, rbtrees are implemented on a per-iommu basis, eliminating
the need for global searches and further enhancing efficiency in
critical fault paths. The rbtree is protected by a spin lock with
interrupts disabled to ensure thread-safe access even within interrupt
contexts.

Co-developed-by: Huang Jiaqing <jiaqing.huang@xxxxxxxxx>
Signed-off-by: Huang Jiaqing <jiaqing.huang@xxxxxxxxx>
Signed-off-by: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx>
---
drivers/iommu/intel/iommu.h | 7 +++++
drivers/iommu/intel/dmar.c | 3 +-
drivers/iommu/intel/iommu.c | 62 +++++++++++++++++++++++++++++++++++--
3 files changed, 69 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index cf9a28c7fab8..54eeaa8e35a9 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -716,6 +716,11 @@ struct intel_iommu {
struct q_inval *qi; /* Queued invalidation info */
u32 iommu_state[MAX_SR_DMAR_REGS]; /* Store iommu states between suspend and resume.*/

+ /* rb tree for all probed devices */
+ struct rb_root device_rbtree;
+ /* protect the device_rbtree */
+ spinlock_t device_rbtree_lock;
+
#ifdef CONFIG_IRQ_REMAP
struct ir_table *ir_table; /* Interrupt remapping info */
struct irq_domain *ir_domain;
@@ -749,6 +754,8 @@ struct device_domain_info {
struct intel_iommu *iommu; /* IOMMU used by this device */
struct dmar_domain *domain; /* pointer to domain */
struct pasid_table *pasid_table; /* pasid table */
+ /* device tracking node(lookup by PCI RID) */
+ struct rb_node node;
#ifdef CONFIG_INTEL_IOMMU_DEBUGFS
struct dentry *debugfs_dentry; /* pointer to device directory dentry */
#endif
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 23cb80d62a9a..f9b63c2875f7 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1095,7 +1095,8 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
iommu->agaw = agaw;
iommu->msagaw = msagaw;
iommu->segment = drhd->segment;
-
+ iommu->device_rbtree = RB_ROOT;
+ spin_lock_init(&iommu->device_rbtree_lock);
iommu->node = NUMA_NO_NODE;

ver = readl(iommu->reg + DMAR_VER_REG);
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index a81a2be9b870..09009d96e553 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -96,6 +96,55 @@ static phys_addr_t root_entry_uctp(struct root_entry *re)
return re->hi & VTD_PAGE_MASK;
}

+static int device_rid_cmp_key(const void *key, const struct rb_node *node)
+{
+ struct device_domain_info *info =
+ rb_entry(node, struct device_domain_info, node);
+ const u16 *rid_lhs = key;
+
+ if (*rid_lhs < PCI_DEVID(info->bus, info->devfn))
+ return -1;
+
+ if (*rid_lhs > PCI_DEVID(info->bus, info->devfn))
+ return 1;
+
+ return 0;
+}
+
+static int device_rid_cmp(struct rb_node *lhs, const struct rb_node *rhs)
+{
+ struct device_domain_info *info =
+ rb_entry(lhs, struct device_domain_info, node);
+ u16 key = PCI_DEVID(info->bus, info->devfn);
+
+ return device_rid_cmp_key(&key, rhs);
+}
+
+static int device_rbtree_insert(struct intel_iommu *iommu,
+ struct device_domain_info *info)
+{
+ struct rb_node *curr;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
+ curr = rb_find_add(&info->node, &iommu->device_rbtree, device_rid_cmp);
+ spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
+ if (curr)
+ dev_warn(info->dev, "device already in rbtree\n");
+
+ return curr ? -EEXIST : 0;
+}
+
+static void device_rbtree_remove(struct device_domain_info *info)
+{
+ struct intel_iommu *iommu = info->iommu;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
+ rb_erase(&info->node, &iommu->device_rbtree);
+ spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
+}
+
/*
* This domain is a statically identity mapping domain.
* 1. This domain creats a static 1:1 mapping to all usable memory.
@@ -4264,25 +4313,34 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
}

dev_iommu_priv_set(dev, info);
+ ret = device_rbtree_insert(iommu, info);
+ if (ret)
+ goto free;

if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
ret = intel_pasid_alloc_table(dev);
if (ret) {
dev_err(dev, "PASID table allocation failed\n");
- kfree(info);
- return ERR_PTR(ret);
+ goto clear_rbtree;
}
}

intel_iommu_debugfs_create_dev(info);

return &iommu->iommu;
+clear_rbtree:
+ device_rbtree_remove(info);
+free:
+ kfree(info);
+
+ return ERR_PTR(ret);
}

static void intel_iommu_release_device(struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);

+ device_rbtree_remove(info);
dmar_remove_one_dev_info(dev);
intel_pasid_free_table(dev);
intel_iommu_debugfs_remove_dev(info);
--
2.34.1