[PATCH RFC 11/13] iommu/vt-d: Add an irq_chip for posted MSIs

From: Jacob Pan
Date: Sat Nov 11 2023 - 23:13:05 EST


With posted MSIs, end of interrupt is handled by the notification
handler. Each MSI handler does not go through local APIC IRR, ISR
processing. There's no need to do apic_eoi() in those handlers.

Add a new acpi_ack_irq_no_eoi() for the posted MSI IR chip. At runtime
the call trace looks like:

__sysvec_posted_msi_notification() {
irq_chip_ack_parent() {
apic_ack_irq_no_eoi();
}
handle_irq_event() {
handle_irq_event_percpu() {
driver_handler()
}
}

IO-APIC IR is excluded the from posted MSI, we need to make sure it
still performs EOI.

Signed-off-by: Jacob Pan <jacob.jun.pan@xxxxxxxxxxxxxxx>
---
arch/x86/include/asm/apic.h | 1 +
arch/x86/kernel/apic/io_apic.c | 2 +-
arch/x86/kernel/apic/vector.c | 5 ++++
drivers/iommu/intel/irq_remapping.c | 38 ++++++++++++++++++++++++++++-
4 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 5af4ec1a0f71..a88015d5638b 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -485,6 +485,7 @@ static inline void apic_setup_apic_calls(void) { }
#endif /* CONFIG_X86_LOCAL_APIC */

extern void apic_ack_irq(struct irq_data *data);
+extern void apic_ack_irq_no_eoi(struct irq_data *data);

static inline bool lapic_vector_set_in_irr(unsigned int vector)
{
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 00da6cf6b07d..ca398ee9075b 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1993,7 +1993,7 @@ static struct irq_chip ioapic_ir_chip __read_mostly = {
.irq_startup = startup_ioapic_irq,
.irq_mask = mask_ioapic_irq,
.irq_unmask = unmask_ioapic_irq,
- .irq_ack = irq_chip_ack_parent,
+ .irq_ack = apic_ack_irq,
.irq_eoi = ioapic_ir_ack_level,
.irq_set_affinity = ioapic_set_affinity,
.irq_retrigger = irq_chip_retrigger_hierarchy,
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 14fc33cfdb37..01223ac4f57a 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -911,6 +911,11 @@ void apic_ack_irq(struct irq_data *irqd)
apic_eoi();
}

+void apic_ack_irq_no_eoi(struct irq_data *irqd)
+{
+ irq_move_irq(irqd);
+}
+
void apic_ack_edge(struct irq_data *irqd)
{
irq_complete_move(irqd_cfg(irqd));
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index 29b9e55dcf26..f2870d3c8313 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -1233,6 +1233,42 @@ static struct irq_chip intel_ir_chip = {
.irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity,
};

+/*
+ * With posted MSIs, all vectors are multiplexed into a single notification
+ * vector. Devices MSIs are then dispatched in a demux loop where
+ * EOIs can be coalesced as well.
+ *
+ * IR chip "INTEL-IR-POST" does not do EOI on ACK instead letting posted
+ * interrupt notification handler to perform EOI.
+ *
+ * For the example below, 3 MSIs are coalesced in one CPU notification. Only
+ * one apic_eoi() is needed.
+ *
+ * __sysvec_posted_msi_notification() {
+ * irq_enter()
+ * handle_edge_irq()
+ * irq_chip_ack_parent()
+ * apic_ack_irq_no_eoi()
+ * handle_irq()
+ * handle_edge_irq()
+ * irq_chip_ack_parent()
+ * apic_ack_irq_no_eoi()
+ * handle_irq()
+ * handle_edge_irq()
+ * irq_chip_ack_parent()
+ * apic_ack_irq_no_eoi()
+ * handle_irq()
+ * apic_eoi()
+ * irq_exit()
+ */
+static struct irq_chip intel_ir_chip_post_msi = {
+ .name = "INTEL-IR-POST",
+ .irq_ack = apic_ack_irq_no_eoi,
+ .irq_set_affinity = intel_ir_set_affinity,
+ .irq_compose_msi_msg = intel_ir_compose_msi_msg,
+ .irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity,
+};
+
static void fill_msi_msg(struct msi_msg *msg, u32 index, u32 subhandle)
{
memset(msg, 0, sizeof(*msg));
@@ -1361,7 +1397,7 @@ static int intel_irq_remapping_alloc(struct irq_domain *domain,

irq_data->hwirq = (index << 16) + i;
irq_data->chip_data = ird;
- irq_data->chip = &intel_ir_chip;
+ irq_data->chip = posted_msi_supported() ? &intel_ir_chip_post_msi : &intel_ir_chip;
intel_irq_remapping_prepare_irte(ird, irq_cfg, info, index, i);
irq_set_status_flags(virq + i, IRQ_MOVE_PCNTXT);
}
--
2.25.1