[PATCH 10/10] irqchip/gic-v4: Make the doorbells managed affinity interrupts

From: Marc Zyngier
Date: Tue Oct 10 2017 - 08:52:35 EST


We so far allocate the doorbell interrupts without taking any
special measure regarding the affinity of these interrupts. We
simply move them around as required when the vcpu gets scheduled
on a different CPU.

But that's counting without userspace (and the evil irqbalance) that
can try and move the VPE interrupt around, causing the ITS code
to emit VMOVP commands and remap the doorbell to another redistributor.
Worse, this can happen while the vcpu is running, causing all kind
of trouble as the VPE is not resident on this redistributor.

So let's take a definitive action and prevent userspace from messing
with us. This is just a matter of passing an affinity to the IRQ
allocator, and all the VPE interrupts will get the IRQD_AFFINITY_MANAGED
flag, which let the kernel is sole control of the affinity.

Signed-off-by: Marc Zyngier <marc.zyngier@xxxxxxx>
---
drivers/irqchip/irq-gic-v4.c | 35 +++++++++++++++++++++++++++++++++--
include/linux/irqchip/arm-gic-v4.h | 1 +
2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c
index cd0bcc3b7e33..bd5382799788 100644
--- a/drivers/irqchip/irq-gic-v4.c
+++ b/drivers/irqchip/irq-gic-v4.c
@@ -86,7 +86,10 @@
* - mask/unmask do what is expected on the doorbell interrupt.
*
* - irq_set_affinity is used to move a VPE from one redistributor to
- * another.
+ * another. Note that we make the doorbells a set of "managed
+ * affinity" interrupts in order to prevent userspace from messing
+ * with the affinity (you really don't want irqbalance to bounce
+ * them around while the VPE is running).
*
* - irq_set_vcpu_affinity once again gets hijacked for the purpose of
* creating a new sub-API, namely scheduling/descheduling a VPE
@@ -97,10 +100,36 @@
static struct irq_domain *gic_domain;
static const struct irq_domain_ops *vpe_domain_ops;

+static void its_free_affinity_masks(struct its_vm *vm)
+{
+ int i;
+
+ if (!vm->affinity_masks)
+ return;
+
+ for (i = 0; i < vm->nr_vpes; i++)
+ if (vm->affinity_masks[i])
+ free_cpumask_var(vm->affinity_masks[i]);
+
+ kfree(vm->affinity_masks);
+}
+
int its_alloc_vcpu_irqs(struct its_vm *vm)
{
int vpe_base_irq, i;

+ vm->affinity_masks = kzalloc(vm->nr_vpes * sizeof(*vm->affinity_masks),
+ GFP_KERNEL);
+ if (!vm->affinity_masks)
+ goto err;
+
+ for (i = 0; i < vm->nr_vpes; i++) {
+ if (!alloc_cpumask_var(&vm->affinity_masks[i], GFP_KERNEL))
+ goto err;
+
+ cpumask_copy(vm->affinity_masks[i], cpu_possible_mask);
+ }
+
vm->fwnode = irq_domain_alloc_named_id_fwnode("GICv4-vpe",
task_pid_nr(current));
if (!vm->fwnode)
@@ -119,7 +148,7 @@ int its_alloc_vcpu_irqs(struct its_vm *vm)

vpe_base_irq = __irq_domain_alloc_irqs(vm->domain, -1, vm->nr_vpes,
NUMA_NO_NODE, vm,
- false, NULL);
+ false, *vm->affinity_masks);
if (vpe_base_irq <= 0)
goto err;

@@ -133,6 +162,7 @@ int its_alloc_vcpu_irqs(struct its_vm *vm)
irq_domain_remove(vm->domain);
if (vm->fwnode)
irq_domain_free_fwnode(vm->fwnode);
+ its_free_affinity_masks(vm);

return -ENOMEM;
}
@@ -142,6 +172,7 @@ void its_free_vcpu_irqs(struct its_vm *vm)
irq_domain_free_irqs(vm->vpes[0]->irq, vm->nr_vpes);
irq_domain_remove(vm->domain);
irq_domain_free_fwnode(vm->fwnode);
+ its_free_affinity_masks(vm);
}

static int its_send_vpe_cmd(struct its_vpe *vpe, struct its_cmd_info *info)
diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h
index 43cde15f221b..1052d654a863 100644
--- a/include/linux/irqchip/arm-gic-v4.h
+++ b/include/linux/irqchip/arm-gic-v4.h
@@ -32,6 +32,7 @@ struct its_vm {
struct irq_domain *domain;
struct page *vprop_page;
struct its_vpe **vpes;
+ cpumask_var_t *affinity_masks;
int nr_vpes;
irq_hw_number_t db_lpi_base;
unsigned long *db_bitmap;
--
2.11.0