[patch 29/30] xen/events: Implement irq distribution

From: Thomas Gleixner
Date: Thu Dec 10 2020 - 14:46:14 EST


Keep track of the assignments of event channels to CPUs and select the
online CPU with the least assigned channels in the affinity mask which is
handed to irq_chip::irq_set_affinity() from the core code.

Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
Cc: Juergen Gross <jgross@xxxxxxxx>
Cc: Stefano Stabellini <sstabellini@xxxxxxxxxx>
Cc: xen-devel@xxxxxxxxxxxxxxxxxxxx
---
drivers/xen/events/events_base.c | 72 ++++++++++++++++++++++++++++++++++-----
1 file changed, 64 insertions(+), 8 deletions(-)

--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -96,6 +96,7 @@ struct irq_info {
struct list_head eoi_list;
short refcnt;
u8 spurious_cnt;
+ u8 is_accounted;
enum xen_irq_type type; /* type */
unsigned irq;
evtchn_port_t evtchn; /* event channel */
@@ -161,6 +162,9 @@ static DEFINE_PER_CPU(int [NR_VIRQS], vi
/* IRQ <-> IPI mapping */
static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};

+/* Event channel distribution data */
+static atomic_t channels_on_cpu[NR_CPUS];
+
static int **evtchn_to_irq;
#ifdef CONFIG_X86
static unsigned long *pirq_eoi_map;
@@ -257,6 +261,32 @@ static void set_info_for_irq(unsigned in
irq_set_chip_data(irq, info);
}

+/* Per CPU channel accounting */
+static void channels_on_cpu_dec(struct irq_info *info)
+{
+ if (!info->is_accounted)
+ return;
+
+ info->is_accounted = 0;
+
+ if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
+ return;
+
+ WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], -1 , 0));
+}
+
+static void channels_on_cpu_inc(struct irq_info *info)
+{
+ if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
+ return;
+
+ if (WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], 1,
+ INT_MAX)))
+ return;
+
+ info->is_accounted = 1;
+}
+
/* Constructors for packed IRQ information. */
static int xen_irq_info_common_setup(struct irq_info *info,
unsigned irq,
@@ -339,6 +369,7 @@ static void xen_irq_info_cleanup(struct
{
set_evtchn_to_irq(info->evtchn, -1);
info->evtchn = 0;
+ channels_on_cpu_dec(info);
}

/*
@@ -449,7 +480,9 @@ static void bind_evtchn_to_cpu(evtchn_po

xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu);

+ channels_on_cpu_dec(info);
info->cpu = cpu;
+ channels_on_cpu_inc(info);
}

/**
@@ -622,11 +655,6 @@ static void xen_irq_init(unsigned irq)
{
struct irq_info *info;

-#ifdef CONFIG_SMP
- /* By default all event channels notify CPU#0. */
- cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0));
-#endif
-
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (info == NULL)
panic("Unable to allocate metadata for IRQ%d\n", irq);
@@ -1691,10 +1719,34 @@ static int xen_rebind_evtchn_to_cpu(evtc
return 0;
}

+/*
+ * Find the CPU within @dest mask which has the least number of channels
+ * assigned. This is not precise as the per cpu counts can be modified
+ * concurrently.
+ */
+static unsigned int select_target_cpu(const struct cpumask *dest)
+{
+ unsigned int cpu, best_cpu = UINT_MAX, minch = UINT_MAX;
+
+ for_each_cpu_and(cpu, dest, cpu_online_mask) {
+ unsigned int curch = atomic_read(&channels_on_cpu[cpu]);
+
+ if (curch < minch) {
+ minch = curch;
+ best_cpu = cpu;
+ }
+ }
+
+ /* If this happens accounting is screwed up */
+ if (WARN_ON_ONCE(best_cpu == UINT_MAX))
+ best_cpu = cpumask_first(dest);
+ return best_cpu;
+}
+
static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
bool force)
{
- unsigned tcpu = cpumask_first_and(dest, cpu_online_mask);
+ unsigned int tcpu = select_target_cpu(dest);
int ret;

ret = xen_rebind_evtchn_to_cpu(evtchn_from_irq(data->irq), tcpu);
@@ -1922,8 +1974,12 @@ void xen_irq_resume(void)
xen_evtchn_resume();

/* No IRQ <-> event-channel mappings. */
- list_for_each_entry(info, &xen_irq_list_head, list)
- info->evtchn = 0; /* zap event-channel binding */
+ list_for_each_entry(info, &xen_irq_list_head, list) {
+ /* Zap event-channel binding */
+ info->evtchn = 0;
+ /* Adjust accounting */
+ channels_on_cpu_dec(info);
+ }

clear_evtchn_to_irq_all();