[PATCH 2/3] genirq: fasteoi supports resend on concurrent invoke

From: James Gowans
Date: Thu Jun 08 2023 - 08:03:31 EST


Update the generic handle_fasteoi_irq to support catering for the case
when the next interrupt comes in while the previous handler is still
running. Currently when that happens the irq_may_run() early out causes
the next IRQ to be lost. Support marking the interrupt as pending when
that happens so that the interrupt can be resent before
handle_fasteoi_irq returns. This is inspired by handle_edge_irq.

Generally it should not be possible for the next interrupt to arrive
while the previous handler is still running: the CPU will not preempt an
interrupt with another from the same source or same priority. However,
there is a race: if the interrupt affinity is changed while the previous
handler is running, then the next interrupt can arrive at a different
CPU while the previous handler is still running. In that case there will
be a concurrent invoke and the early out will be taken.

For example:

CPU 0 | CPU 1
-----------------------------|-----------------------------
interrupt start |
handle_fasteoi_irq | set_affinity(CPU 1)
handler |
... | interrupt start
... | handle_fasteoi_irq -> early out
handle_fasteoi_irq return | interrupt end
interrupt end |

This can happen on interrupt controllers which do not have a global
active state; the next commit will enable the flag for known impacted
controllers.

Implementation notes:

It is believed that it's NOT necessary to mask the interrupt in
handle_fasteoi_irq() the way that handle_edge_irq() does. This is
because handle_edge_irq() caters for controllers which are too simple to
gate interrupts from the same source, so the kernel explicitly masks the
interrupt if it re-occurs [0].

The resend on concurrent invoke logic is gated by a flag which the
interrupt controller can set if it's susceptible to this problem. It is
not desirable to resend unconditionally: a wake up source for example
has no need to be re-sent.

[0] https://lore.kernel.org/all/bf94a380-fadd-8c38-cc51-4b54711d84b3@xxxxxxxxxx/

Suggested-by: Marc Zyngier <maz@xxxxxxxxxx>
Signed-off-by: James Gowans <jgowans@xxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Marc Zyngier <maz@xxxxxxxxxx>
Cc: KarimAllah Raslan <karahmed@xxxxxxxxxx>
Cc: Yipeng Zou <zouyipeng@xxxxxxxxxx>
Cc: Zhang Jianhua <chris.zjh@xxxxxxxxxx>
---
include/linux/irq.h | 13 +++++++++++++
kernel/irq/chip.c | 16 +++++++++++++++-
kernel/irq/debugfs.c | 2 ++
3 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index b1b28affb32a..cb77da1ac4c6 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -223,6 +223,8 @@ struct irq_data {
* irq_chip::irq_set_affinity() when deactivated.
* IRQD_IRQ_ENABLED_ON_SUSPEND - Interrupt is enabled on suspend by irq pm if
* irqchip have flag IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND set.
+ * IRQD_RESEND_WHEN_IN_PROGRESS - Interrupt may fire when already in progress in which
+ * case it must be resent at the next available opportunity.
*/
enum {
IRQD_TRIGGER_MASK = 0xf,
@@ -249,6 +251,7 @@ enum {
IRQD_HANDLE_ENFORCE_IRQCTX = (1 << 28),
IRQD_AFFINITY_ON_ACTIVATE = (1 << 29),
IRQD_IRQ_ENABLED_ON_SUSPEND = (1 << 30),
+ IRQD_RESEND_WHEN_IN_PROGRESS = (1U << 31),
};

#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
@@ -448,6 +451,16 @@ static inline bool irqd_affinity_on_activate(struct irq_data *d)
return __irqd_to_state(d) & IRQD_AFFINITY_ON_ACTIVATE;
}

+static inline void irqd_set_resend_when_in_progress(struct irq_data *d)
+{
+ __irqd_to_state(d) |= IRQD_RESEND_WHEN_IN_PROGRESS;
+}
+
+static inline bool irqd_needs_resend_when_in_progress(struct irq_data *d)
+{
+ return __irqd_to_state(d) & IRQD_RESEND_WHEN_IN_PROGRESS;
+}
+
#undef __irqd_to_state

static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 49e7bc871fec..57cd8f475302 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -692,8 +692,16 @@ void handle_fasteoi_irq(struct irq_desc *desc)

raw_spin_lock(&desc->lock);

- if (!irq_may_run(desc))
+ /*
+ * When an affinity change races with IRQ handling, the next interrupt
+ * can arrive on the new CPU before the original CPU has completed
+ * handling the previous one - it may need to be resent.
+ */
+ if (!irq_may_run(desc)) {
+ if (irqd_needs_resend_when_in_progress(&desc->irq_data))
+ desc->istate |= IRQS_PENDING;
goto out;
+ }

desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);

@@ -715,6 +723,12 @@ void handle_fasteoi_irq(struct irq_desc *desc)

cond_unmask_eoi_irq(desc, chip);

+ /*
+ * When the race described above happens this will resend the interrupt.
+ */
+ if (unlikely(desc->istate & IRQS_PENDING))
+ check_irq_resend(desc, false);
+
raw_spin_unlock(&desc->lock);
return;
out:
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
index bbcaac64038e..5971a66be034 100644
--- a/kernel/irq/debugfs.c
+++ b/kernel/irq/debugfs.c
@@ -133,6 +133,8 @@ static const struct irq_bit_descr irqdata_states[] = {
BIT_MASK_DESCR(IRQD_HANDLE_ENFORCE_IRQCTX),

BIT_MASK_DESCR(IRQD_IRQ_ENABLED_ON_SUSPEND),
+
+ BIT_MASK_DESCR(IRQD_RESEND_WHEN_IN_PROGRESS),
};

static const struct irq_bit_descr irqdesc_states[] = {
--
2.25.1