Re: [PATCH] iommu/vt-d: Check for non-NULL domain on device release

From: Jason Gunthorpe
Date: Tue Jan 16 2024 - 10:22:26 EST


On Sat, Jan 13, 2024 at 10:17:13AM -0800, Eric Badger wrote:
> In the kdump kernel, the IOMMU will operate in deferred_attach mode. In
> this mode, info->domain may not yet be assigned by the time the
> release_device function is called, which leads to the following crash in
> the crashkernel:

This never worked right? But why are you getting to release in a crash
kernel in the first place, that seems kinda weird..

> BUG: kernel NULL pointer dereference, address: 000000000000003c
> ...
> RIP: 0010:do_raw_spin_lock+0xa/0xa0
> ...
> _raw_spin_lock_irqsave+0x1b/0x30
> intel_iommu_release_device+0x96/0x170
> iommu_deinit_device+0x39/0xf0
> __iommu_group_remove_device+0xa0/0xd0
> iommu_bus_notifier+0x55/0xb0
> notifier_call_chain+0x5a/0xd0
> blocking_notifier_call_chain+0x41/0x60
> bus_notify+0x34/0x50
> device_del+0x269/0x3d0
> pci_remove_bus_device+0x77/0x100
> p2sb_bar+0xae/0x1d0
> ...
> i801_probe+0x423/0x740
>
> Signed-off-by: Eric Badger <ebadger@xxxxxxxxxxxxxxx>

It should have a fixes line, but I'm not sure what it is..

> ---
> drivers/iommu/intel/iommu.c | 10 ++++++----
> 1 file changed, 6 insertions(+), 4 deletions(-)

Unfortunately this issue is likely systemic in all the drivers.

Something I've been thinking about for a while now is to have the
option for the core code release to set the driver to a specific
releasing domain (probably a blocking or identity global static)

A lot of drivers are open coding this internally in their release
functions, like Intel. But it really doesn't deserve to be special.

Obviously if we structure things like that then this problem goes away
too. It looks to me like domain_context_clear_one() is doing BLOCKED
to me..

> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> index 6fb5f6fceea1..26e450259889 100644
> --- a/drivers/iommu/intel/iommu.c
> +++ b/drivers/iommu/intel/iommu.c
> @@ -3750,7 +3750,6 @@ static void domain_context_clear(struct device_domain_info *info)
> static void dmar_remove_one_dev_info(struct device *dev)
> {
> struct device_domain_info *info = dev_iommu_priv_get(dev);
> - struct dmar_domain *domain = info->domain;
> struct intel_iommu *iommu = info->iommu;
> unsigned long flags;
>
> @@ -3763,11 +3762,14 @@ static void dmar_remove_one_dev_info(struct device *dev)
> domain_context_clear(info);
> }
>
> - spin_lock_irqsave(&domain->lock, flags);
> + if (!info->domain)
> + return;
> +
> + spin_lock_irqsave(&info->domain->lock, flags);
> list_del(&info->link);
> - spin_unlock_irqrestore(&domain->lock, flags);
> + spin_unlock_irqrestore(&info->domain->lock, flags);
>
> - domain_detach_iommu(domain, iommu);
> + domain_detach_iommu(info->domain, iommu);
> info->domain = NULL;
> }

This function is almost a copy of device_block_translation(), with
some bugs added :\

Lu can they be made the same? It seems to me BLOCKED could always
clear the domain context, even in scalable mode?

Anyhow, my suggestion is more like:

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 15699fe5418e3d..6c683fd4bc05ec 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -3747,30 +3747,6 @@ static void domain_context_clear(struct device_domain_info *info)
&domain_context_clear_one_cb, info);
}

-static void dmar_remove_one_dev_info(struct device *dev)
-{
- struct device_domain_info *info = dev_iommu_priv_get(dev);
- struct dmar_domain *domain = info->domain;
- struct intel_iommu *iommu = info->iommu;
- unsigned long flags;
-
- if (!dev_is_real_dma_subdevice(info->dev)) {
- if (dev_is_pci(info->dev) && sm_supported(iommu))
- intel_pasid_tear_down_entry(iommu, info->dev,
- IOMMU_NO_PASID, false);
-
- iommu_disable_pci_caps(info);
- domain_context_clear(info);
- }
-
- spin_lock_irqsave(&domain->lock, flags);
- list_del(&info->link);
- spin_unlock_irqrestore(&domain->lock, flags);
-
- domain_detach_iommu(domain, iommu);
- info->domain = NULL;
-}
-
/*
* Clear the page table pointer in context or pasid table entries so that
* all DMA requests without PASID from the device are blocked. If the page
@@ -4284,7 +4260,6 @@ static void intel_iommu_release_device(struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);

- dmar_remove_one_dev_info(dev);
intel_pasid_free_table(dev);
intel_iommu_debugfs_remove_dev(info);
kfree(info);
@@ -4743,6 +4718,7 @@ static const struct iommu_dirty_ops intel_dirty_ops = {

const struct iommu_ops intel_iommu_ops = {
.blocked_domain = &blocking_domain,
+ .release_domain = &blocking_domain,
.capable = intel_iommu_capable,
.hw_info = intel_iommu_hw_info,
.domain_alloc = intel_iommu_domain_alloc,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 068f6508e57c9b..9fbbd83a82d4e9 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -442,6 +442,8 @@ static int iommu_init_device(struct device *dev, const struct iommu_ops *ops)
err_unlink:
iommu_device_unlink(iommu_dev, dev);
err_release:
+ if (ops->release_domain)
+ ops->release_domain->ops->attach_dev(ops->release_domain, dev);
if (ops->release_device)
ops->release_device(dev);
err_module_put:
@@ -461,6 +463,15 @@ static void iommu_deinit_device(struct device *dev)

iommu_device_unlink(dev->iommu->iommu_dev, dev);

+ /*
+ * If the iommu driver provides release_domain then the core code
+ * ensures that domain is attached prior to calling release_device.
+ * Drivers can use this to enforce a translation on the idle iommu.
+ * Usually the global static blocked_domain is a good choice.
+ */
+ if (ops->release_domain)
+ ops->release_domain->ops->attach_dev(ops->release_domain, dev);
+
/*
* release_device() must stop using any attached domain on the device.
* If there are still other devices in the group they are not effected
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index c34d0ef1703d68..1e8be165c878d8 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -511,6 +511,7 @@ struct iommu_ops {
struct module *owner;
struct iommu_domain *identity_domain;
struct iommu_domain *blocked_domain;
+ struct iommu_domain *release_domain;
struct iommu_domain *default_domain;
};