[PATCHv2] armpmu: broadcast overflow irq on multi-core system having one muxed SPI for PMU.

From: Hoeun Ryu
Date: Sun May 13 2018 - 22:26:57 EST


From: Hoeun Ryu <hoeun.ryu@xxxxxxx>

On some SoCs like i.MX6DL/QL have only one muxed SPI for multi-core system.
On the systems, a CPU can be interrupted by overflow irq but it is possible that
the overflow actually occurs on another CPU.
This patch broadcasts the irq using smp_call_function_single_async() so that other
CPUs can check and handle their overflows by themselves when a overflow doesn't
actually occur on the interrupted CPU. Per-cpu call_single_data are allocated in
arm_pmu structure for this purpose during initialization

The callback for smp_call_function_single_async() is __armpmu_handle_irq() and
the function calls armpmu->handle_irq() with an invalid irq_num because
smp_call_func_t has only one parameter and armpmu pointer is handed over by the
pointer. It can be a problem if irq_num parameter is used by handlers but no
handler uses the irq parameter for now. We could have another approach removing
irq_num argument itself in handle_irq() function.

Signed-off-by: Hoeun Ryu <hoeun.ryu@xxxxxxx>
---
drivers/perf/arm_pmu.c | 62 ++++++++++++++++++++++++++++++++++++++++++--
include/linux/perf/arm_pmu.h | 3 +++
2 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 1a0d340..df024a0 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -322,6 +322,29 @@ validate_group(struct perf_event *event)
return 0;
}

+static void __armpmu_handle_irq(void *dev)
+{
+ struct arm_pmu *armpmu;
+ u64 start_clock, finish_clock;
+ irqreturn_t ret;
+
+ armpmu = *(void **)dev;
+ start_clock = sched_clock();
+ /*
+ * irq_num should not be used by the handler, we don't have irq_num for
+ * the first place. There is no handler using the irq_num argument for now.
+ * smp_call_func_t has one function argument and irq number cannot be handed
+ * over to this callback because we need dev pointer here.
+ * If you need valid irq_num, you need to declare a wrapper struct having
+ * irq_num and dev pointer.
+ */
+ ret = armpmu->handle_irq(-1, armpmu);
+ if (ret == IRQ_HANDLED) {
+ finish_clock = sched_clock();
+ perf_sample_event_took(finish_clock - start_clock);
+ }
+}
+
static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
{
struct arm_pmu *armpmu;
@@ -340,9 +363,34 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)

start_clock = sched_clock();
ret = armpmu->handle_irq(irq, armpmu);
- finish_clock = sched_clock();
+ /*
+ * The handler just returns with IRQ_NONE when it checks the overflow
+ * and the overflow doesn't occur on the CPU.
+ *
+ * Some SoCs like i.MX6 have one muxed SPI on multi-core system.
+ * On the systems , the irq should be broadcasted to other CPUs so that the
+ * CPUs can check their own PMU overflow.
+ */
+ if (ret == IRQ_HANDLED) {
+ finish_clock = sched_clock();
+ perf_sample_event_took(finish_clock - start_clock);
+ } else if (ret == IRQ_NONE) {
+ int cpu;
+ struct cpumask mask;
+
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(raw_smp_processor_id(), &mask);
+ for_each_cpu(cpu, &mask) {
+ call_single_data_t *csd =
+ per_cpu_ptr(armpmu->ov_brdcast_csd, cpu);
+
+ csd->func = __armpmu_handle_irq;
+ csd->info = dev;
+
+ smp_call_function_single_async(cpu, csd);
+ }
+ }

- perf_sample_event_took(finish_clock - start_clock);
return ret;
}

@@ -790,6 +838,13 @@ static struct arm_pmu *__armpmu_alloc(gfp_t flags)
goto out_free_pmu;
}

+ pmu->ov_brdcast_csd = alloc_percpu_gfp(call_single_data_t, flags);
+ if (!pmu->ov_brdcast_csd) {
+ pr_info("failed to allocate per-cpu "
+ "overflow broadcasting call single data.\n");
+ goto out_free_hw_events;
+ }
+
pmu->pmu = (struct pmu) {
.pmu_enable = armpmu_enable,
.pmu_disable = armpmu_disable,
@@ -824,6 +879,8 @@ static struct arm_pmu *__armpmu_alloc(gfp_t flags)

return pmu;

+out_free_hw_events:
+ free_percpu(pmu->hw_events);
out_free_pmu:
kfree(pmu);
out:
@@ -844,6 +901,7 @@ struct arm_pmu *armpmu_alloc_atomic(void)
void armpmu_free(struct arm_pmu *pmu)
{
free_percpu(pmu->hw_events);
+ free_percpu(pmu->ov_brdcast_csd);
kfree(pmu);
}

diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 40036a5..a63da63 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -107,6 +107,9 @@ struct arm_pmu {

/* Only to be used by ACPI probing code */
unsigned long acpi_cpuid;
+
+ /* per-cpu call single data for overflow broadcasting */
+ call_single_data_t __percpu *ov_brdcast_csd;
};

#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
--
2.1.4