Re: [PATCH 3/3] iommu/arm-smmu-v3: support suspend/resume

From: Thomas Gleixner
Date: Tue Mar 26 2024 - 16:11:13 EST


On Mon, Mar 25 2024 at 18:29, Thomas Gleixner wrote:
> On Mon, Mar 25 2024 at 10:48, Jason Gunthorpe wrote:
>> On Sun, Mar 24, 2024 at 08:29:00PM +0800, Peng Fan (OSS) wrote:
>>> + if (smmu->features & ARM_SMMU_FEAT_PRI) {
>>> + desc = irq_get_msi_desc(smmu->priq.q.irq);
>>> + if (desc) {
>>> + get_cached_msi_msg(smmu->priq.q.irq, &msg);
>>> + arm_smmu_write_msi_msg(desc, &msg);
>>> + }
>>> + }
>>> +}
>>
>> I wonder if this should be done instead by converting the driver away
>> from platform MSI to the new MSI mechanism?
>
> There is work in progress for that. Should come around in the next
> weeks.

But that won't solve it. The above is a horrible hack and I think this
should be solved completely differently.

On suspend the core interrupt code disables all interrupts which are not
marked as wakeup interrupts. On resume it reenables them including
restoring the affinity setting.

So this just should work, but it does not because the MSI message is
only written once, when the interrupt is activated. Further affinity
changes affect only the ITS table and do not result in a message write.

The most trivial way w/o changing any core code for it, would be to free
the interrupts on suspend and request them on resume again, because that
would deactivate and reactivate it.

Though that does not work if the IOMMU is resumed in the early resume
path. But we can enforce writing the message in the resume path.
See the untested below.

Thanks,

tglx
---
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -223,6 +223,7 @@ struct irq_data {
* irqchip have flag IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND set.
* IRQD_RESEND_WHEN_IN_PROGRESS - Interrupt may fire when already in progress in which
* case it must be resent at the next available opportunity.
+ * IRQD_RESUMING - Interrupt is resumed after suspend
*/
enum {
IRQD_TRIGGER_MASK = 0xf,
@@ -249,6 +250,7 @@ enum {
IRQD_AFFINITY_ON_ACTIVATE = BIT(28),
IRQD_IRQ_ENABLED_ON_SUSPEND = BIT(29),
IRQD_RESEND_WHEN_IN_PROGRESS = BIT(30),
+ IRQD_RESUMING = BIT(31),
};

#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
@@ -443,6 +445,11 @@ static inline bool irqd_needs_resend_whe
return __irqd_to_state(d) & IRQD_RESEND_WHEN_IN_PROGRESS;
}

+static inline bool irqd_resuming(struct irq_data *d)
+{
+ return __irqd_to_state(d) & IRQD_RESUMING;
+}
+
#undef __irqd_to_state

static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -617,6 +617,7 @@ static unsigned int msi_domain_get_hwsiz
static inline void irq_chip_write_msi_msg(struct irq_data *data,
struct msi_msg *msg)
{
+ irq_data_get_msi_desc(data)->msg = *msg;
data->chip->irq_write_msi_msg(data, msg);
}

@@ -652,7 +653,10 @@ int msi_domain_set_affinity(struct irq_d
int ret;

ret = parent->chip->irq_set_affinity(parent, mask, force);
- if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) {
+ if (ret < 0)
+ return ret;
+
+ if (ret != IRQ_SET_MASK_OK_DONE || irqd_resuming(irq_data)) {
BUG_ON(irq_chip_compose_msi_msg(irq_data, msg));
msi_check_level(irq_data->domain, msg);
irq_chip_write_msi_msg(irq_data, msg);
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -177,7 +177,13 @@ static void resume_irq(struct irq_desc *
irq_state_set_masked(desc);
resume:
desc->istate &= ~IRQS_SUSPENDED;
+ /*
+ * Ensure that MSI messages get rewritten on resume. The device
+ * might have lost it due to power disabling.
+ */
+ irqd_set(&desc->irq_data, IRQD_RESUMING);
__enable_irq(desc);
+ irqd_clear(&desc->irq_data, IRQD_RESUMING);
}

static void resume_irqs(bool want_early)