Re: [PATCH v2] iommu: Fix iommu_sva_bind_device to the same domain

From: Jason Gunthorpe
Date: Wed Feb 21 2024 - 08:18:05 EST


On Wed, Feb 21, 2024 at 11:06:58AM +0000, Zhangfei Gao wrote:
> The accelerator device can provide multi-queue and bind to
> the same domain in multi-thread for better performance,
> and domain refcount takes care of it.
>
> 'commit 092edaddb660 ("iommu: Support mm PASID 1:n with sva domains")'
> removes the possibility, so fix it
>
> Fixs: '092edaddb660 ("iommu: Support mm PASID 1:n with sva domains")'
> Signed-off-by: Zhangfei Gao <zhangfei.gao@xxxxxxxxxx>
> ---
> v2: Instead of checking ret == -EBUSY,
> change iommu_attach_device_pasid return value from -EBUSY to 0
> when pasid entry is found, and refcount++ when return
>
> drivers/iommu/iommu-sva.c | 2 +-
> drivers/iommu/iommu.c | 2 +-
> 2 files changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
> index c3fc9201d0be..20b232c7675d 100644
> --- a/drivers/iommu/iommu-sva.c
> +++ b/drivers/iommu/iommu-sva.c
> @@ -141,8 +141,8 @@ void iommu_sva_unbind_device(struct iommu_sva *handle)
> struct device *dev = handle->dev;
>
> mutex_lock(&iommu_sva_lock);
> - iommu_detach_device_pasid(domain, dev, iommu_mm->pasid);
> if (--domain->users == 0) {
> + iommu_detach_device_pasid(domain, dev, iommu_mm->pasid);
> list_del(&domain->next);
> iommu_domain_free(domain);
> }

The users refcount is not to provide for sharing of the same PASID it
is to provide for sharing the domain across devices. This change would
break that because we loose the 'dev' that needs to be detached in a
multi-device case if we don't immediately call detach_device_pasid
here.

You'd need to build something much more complicated here to allow
PASID sharing.

I wonder if this case is common enough to warrant the core code to get
involved. I suppose maybe, does idxd have the same problem? It can
only open it's cdev once because of this - that doesn't seem like what
the code intends for a non-wq_dedicated?

More like this:

diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
index c3fc9201d0be97..aec11e5cde6b0e 100644
--- a/drivers/iommu/iommu-sva.c
+++ b/drivers/iommu/iommu-sva.c
@@ -41,6 +41,7 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de
}
iommu_mm->pasid = pasid;
INIT_LIST_HEAD(&iommu_mm->sva_domains);
+ INIT_LIST_HEAD(&iommu_mm->sva_handles);
/*
* Make sure the write to mm->iommu_mm is not reordered in front of
* initialization to iommu_mm fields. If it does, readers may see a
@@ -82,6 +83,13 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
goto out_unlock;
}

+ list_for_each_entry(handle, &mm->iommu_mm->sva_handles, handle_item) {
+ if (handle->dev == dev && handle->domain->mm == mm) {
+ refcount_inc(&handle->users);
+ return handle;
+ }
+ }
+
handle = kzalloc(sizeof(*handle), GFP_KERNEL);
if (!handle) {
ret = -ENOMEM;
@@ -109,6 +117,7 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
goto out_free_domain;
domain->users = 1;
list_add(&domain->next, &mm->iommu_mm->sva_domains);
+ list_add(&handle->handle_item, &mm->iommu_mm->sva_handles);

out:
mutex_unlock(&iommu_sva_lock);
@@ -141,6 +150,12 @@ void iommu_sva_unbind_device(struct iommu_sva *handle)
struct device *dev = handle->dev;

mutex_lock(&iommu_sva_lock);
+ if (!refcount_dec_and_test(&handle->users)) {
+ mutex_unlock(&iommu_sva_lock);
+ return;
+ }
+ list_del(&handle->handle_item);
+
iommu_detach_device_pasid(domain, dev, iommu_mm->pasid);
if (--domain->users == 0) {
list_del(&domain->next);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 1ea2a820e1eb03..5e27cb3a3be99b 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -892,11 +892,14 @@ struct iommu_fwspec {
struct iommu_sva {
struct device *dev;
struct iommu_domain *domain;
+ struct list_head handle_item;
+ refcount_t users;
};

struct iommu_mm_data {
u32 pasid;
struct list_head sva_domains;
+ struct list_head sva_handles;
};

int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,