DMAR interrupt reservations exceed available resources on 32s Intel SPR

From: Dimitri Sivanich
Date: Wed Jan 17 2024 - 14:35:52 EST


We currently are running Sapphire Rapids systems with 32 sockets. We have
noticed that on systems of this size we see the following warnings:
DMAR-IR: Enabled IRQ remapping in x2apic mode
Interrupt reservation exceeds available resources
DMAR: Can't request irq
DMAR: DRHD 97bfc000: failed to enable fault, interrupt, ret -28

There are 10 DMAR units per socket on these systems, so 320 DMAR units. It
appears that each is being allocated its own vector on the first cpu, as this
happens prior to other cpus being started. At the time that the DMAR vector
allocations begin, cpu 0 has 186 vectors available (as does the global set),
and that is the number of DMAR fault interrupts that show up as allocated
(/proc/interrupts) after boot.

As a simple experiment, I patched the kernel to only allocate node 0 DMAR fault
interrupt vectors while only the boot cpu is running, then to allocate fault
interrupt vectors for the rest of the DMAR units as cpus on their respective
nodes are coming up. With that change, all 320 interrupts were allocated, and
the interrupt affinity was set to the first cpu on each DMAR's respective node.

Does this seem like a sensible approach to fixing this, or is there another
avenue that we should be looking at?

For illustrative purposes, here's the patch for the experiment described above:

Index: linux/drivers/iommu/intel/dmar.c
===================================================================
--- linux.orig/drivers/iommu/intel/dmar.c
+++ linux/drivers/iommu/intel/dmar.c
@@ -2055,6 +2055,38 @@ int dmar_set_interrupt(struct intel_iomm
return ret;
}

+int __init enable_remaining_drhd_fault_handling(int node)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+
+ /*
+ * Enable fault control interrupt.
+ */
+ for_each_iommu(iommu, drhd) {
+ u32 fault_status;
+ int ret;
+ if (iommu->node != node)
+ continue;
+ ret = dmar_set_interrupt(iommu);
+
+ if (ret) {
+ pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
+ (unsigned long long)drhd->reg_base_addr, ret);
+ return -1;
+ }
+
+ /*
+ * Clear any previous faults.
+ */
+ dmar_fault(iommu->irq, iommu);
+ fault_status = readl(iommu->reg + DMAR_FSTS_REG);
+ writel(fault_status, iommu->reg + DMAR_FSTS_REG);
+ }
+
+ return 0;
+}
+
int __init enable_drhd_fault_handling(void)
{
struct dmar_drhd_unit *drhd;
@@ -2065,7 +2097,10 @@ int __init enable_drhd_fault_handling(vo
*/
for_each_iommu(iommu, drhd) {
u32 fault_status;
- int ret = dmar_set_interrupt(iommu);
+ int ret;
+ if (iommu->node != 0)
+ continue;
+ ret = dmar_set_interrupt(iommu);

if (ret) {
pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
Index: linux/arch/x86/kernel/smpboot.c
===================================================================
--- linux.orig/arch/x86/kernel/smpboot.c
+++ linux/arch/x86/kernel/smpboot.c
@@ -210,6 +210,7 @@ static void smp_callin(void)
cpumask_set_cpu(cpuid, cpu_callin_mask);
}

+extern int __init enable_remaining_drhd_fault_handling(int cpu);
static int cpu0_logical_apicid;
static int enable_start_cpu0;
/*
@@ -263,6 +264,7 @@ static void notrace start_secondary(void

x86_cpuinit.setup_percpu_clockev();

+ enable_remaining_drhd_fault_handling(cpu_to_node(smp_processor_id()));
wmb();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}