Re: [PATCH v2 2/2] iommu: Probe right iommu_ops for device

From: Robin Murphy
Date: Mon Jan 29 2024 - 09:58:18 EST


On 2024-01-26 10:53 am, Lu Baolu wrote:
Previously, in the iommu probe device path, __iommu_probe_device() gets
the iommu_ops for the device from dev->iommu->fwspec if this field has
been initialized before probing. Otherwise, it is assumed that only one
of Intel, AMD, s390, PAMU or legacy SMMUv2 can be present, hence it's
feasible to lookup the global iommu device list and use the iommu_ops of
the first iommu device which has no dev->iommu->fwspec.

The assumption mentioned above is no longer correct with the introduction
of the IOMMUFD mock device on x86 platforms. There might be multiple
instances of iommu drivers, none of which have the dev->iommu->fwspec
field populated.

Probe the right iommu_ops for device by iterating over all the global
drivers and call their probe function to find a match.

This will now break several drivers which are no longer expecting to see a ->probe_device call without having seen the corresponding ->of_xlate call first.

Can we please just do the trivial fix to iommufd itself? At this point I don't mind if it's your v1, the even simpler one I proposed 2 months ago[1], or anything else similarly self-contained.

Thanks,
Robin.

[1] https://lore.kernel.org/linux-iommu/e365c08b21a8d0b60e6f5d1411be6701c1a06a53.1701165201.git.robin.murphy@xxxxxxx/

Fixes: 17de3f5fdd35 ("iommu: Retire bus ops")
Cc: Robin Murphy <robin.murphy@xxxxxxx>
Signed-off-by: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx>
---
drivers/iommu/iommu.c | 76 +++++++++++++++++++++++++++----------------
1 file changed, 48 insertions(+), 28 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 0af0b5544072..2cdb01e411fa 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -396,30 +396,69 @@ void dev_iommu_priv_set(struct device *dev, void *priv)
}
EXPORT_SYMBOL_GPL(dev_iommu_priv_set);
+static struct iommu_device *
+__iommu_do_probe_device(struct device *dev, const struct iommu_ops *ops)
+{
+ struct iommu_device *iommu_dev;
+
+ if (!try_module_get(ops->owner))
+ return ERR_PTR(-EINVAL);
+
+ iommu_dev = ops->probe_device(dev);
+ if (IS_ERR(iommu_dev))
+ module_put(ops->owner);
+
+ return iommu_dev;
+}
+
+static struct iommu_device *iommu_probe_device_ops(struct device *dev)
+{
+ struct iommu_device *iter, *iommu_dev = ERR_PTR(-ENODEV);
+ struct iommu_fwspec *fwspec;
+
+ /*
+ * For FDT-based systems and ACPI IORT/VIOT, drivers register IOMMU
+ * instances with non-NULL fwnodes, and client devices should have been
+ * identified with a fwspec by this point. Otherwise, we will iterate
+ * over all the global drivers and call their probe function to find a
+ * match.
+ */
+ fwspec = dev_iommu_fwspec_get(dev);
+ if (fwspec && fwspec->ops)
+ return __iommu_do_probe_device(dev, fwspec->ops);
+
+ mutex_lock(&iommu_device_lock);
+ list_for_each_entry(iter, &iommu_device_list, list) {
+ iommu_dev = __iommu_do_probe_device(dev, iter->ops);
+ if (!IS_ERR(iommu_dev))
+ break;
+ }
+ mutex_unlock(&iommu_device_lock);
+
+ return iommu_dev;
+}
+
/*
* Init the dev->iommu and dev->iommu_group in the struct device and get the
* driver probed
*/
-static int iommu_init_device(struct device *dev, const struct iommu_ops *ops)
+static int iommu_init_device(struct device *dev)
{
struct iommu_device *iommu_dev;
+ const struct iommu_ops *ops;
struct iommu_group *group;
int ret;
if (!dev_iommu_get(dev))
return -ENOMEM;
- if (!try_module_get(ops->owner)) {
- ret = -EINVAL;
- goto err_free;
- }
-
- iommu_dev = ops->probe_device(dev);
+ iommu_dev = iommu_probe_device_ops(dev);
if (IS_ERR(iommu_dev)) {
ret = PTR_ERR(iommu_dev);
- goto err_module_put;
+ goto err_free;
}
dev->iommu->iommu_dev = iommu_dev;
+ ops = dev_iommu_ops(dev);
ret = iommu_device_link(iommu_dev, dev);
if (ret)
@@ -444,7 +483,6 @@ static int iommu_init_device(struct device *dev, const struct iommu_ops *ops)
err_release:
if (ops->release_device)
ops->release_device(dev);
-err_module_put:
module_put(ops->owner);
err_free:
dev->iommu->iommu_dev = NULL;
@@ -499,28 +537,10 @@ DEFINE_MUTEX(iommu_probe_device_lock);
static int __iommu_probe_device(struct device *dev, struct list_head *group_list)
{
- const struct iommu_ops *ops;
- struct iommu_fwspec *fwspec;
struct iommu_group *group;
struct group_device *gdev;
int ret;
- /*
- * For FDT-based systems and ACPI IORT/VIOT, drivers register IOMMU
- * instances with non-NULL fwnodes, and client devices should have been
- * identified with a fwspec by this point. Otherwise, we can currently
- * assume that only one of Intel, AMD, s390, PAMU or legacy SMMUv2 can
- * be present, and that any of their registered instances has suitable
- * ops for probing, and thus cheekily co-opt the same mechanism.
- */
- fwspec = dev_iommu_fwspec_get(dev);
- if (fwspec && fwspec->ops)
- ops = fwspec->ops;
- else
- ops = iommu_ops_from_fwnode(NULL);
-
- if (!ops)
- return -ENODEV;
/*
* Serialise to avoid races between IOMMU drivers registering in
* parallel and/or the "replay" calls from ACPI/OF code via client
@@ -534,7 +554,7 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
if (dev->iommu_group)
return 0;
- ret = iommu_init_device(dev, ops);
+ ret = iommu_init_device(dev);
if (ret)
return ret;