Re: [PATCH v3 1/3] perf/core: Rework forwarding of {task|cpu}-clock events

From: Peter Zijlstra
Date: Tue May 02 2023 - 11:35:02 EST


On Tue, Apr 25, 2023 at 07:52:03PM +0530, Ravi Bangoria wrote:
> Currently, PERF_TYPE_SOFTWARE is treated specially since task-clock and
> cpu-clock events are interfaced through it but internally gets forwarded
> to their own pmus.
>
> Rework this by overwriting event->attr.type in perf_swevent_init() which
> will cause perf_init_event() to retry with updated type and event will
> automatically get forwarded to right pmu. With the change, SW pmu no
> longer needs to be treated specially and can be included in 'pmu_idr'
> list.
>
> Suggested-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@xxxxxxx>
> ---
> include/linux/perf_event.h | 11 ++++++
> kernel/events/core.c | 69 ++++++++++++++++++++------------------
> 2 files changed, 47 insertions(+), 33 deletions(-)
>
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index d5628a7b5eaa..40647d707fb3 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -322,6 +322,9 @@ struct pmu {
> /* number of address filters this PMU can do */
> unsigned int nr_addr_filters;
>
> + /* Skip creating pmu device and sysfs interface. */
> + bool skip_sysfs_dev;
> +
> /*
> * Fully disable/enable this PMU, can be used to protect from the PMI
> * as well as for lazy/batch writing of the MSRs.

Does this make sense?

---
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -295,6 +295,8 @@ struct perf_event_pmu_context;

struct perf_output_handle;

+#define PMU_NULL_DEV ((void *)(~0))
+
/**
* struct pmu - generic performance monitoring unit
*/
@@ -322,9 +324,6 @@ struct pmu {
/* number of address filters this PMU can do */
unsigned int nr_addr_filters;

- /* Skip creating pmu device and sysfs interface. */
- bool skip_sysfs_dev;
-
/*
* Fully disable/enable this PMU, can be used to protect from the PMI
* as well as for lazy/batch writing of the MSRs.
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -11113,7 +11113,7 @@ static struct pmu perf_cpu_clock = {
.task_ctx_nr = perf_sw_context,

.capabilities = PERF_PMU_CAP_NO_NMI,
- .skip_sysfs_dev = true,
+ .dev = PMU_NULL_DEV,

.event_init = cpu_clock_event_init,
.add = cpu_clock_event_add,
@@ -11195,7 +11195,7 @@ static struct pmu perf_task_clock = {
.task_ctx_nr = perf_sw_context,

.capabilities = PERF_PMU_CAP_NO_NMI,
- .skip_sysfs_dev = true,
+ .dev = PMU_NULL_DEV,

.event_init = task_clock_event_init,
.add = task_clock_event_add,
@@ -11442,7 +11442,7 @@ int perf_pmu_register(struct pmu *pmu, c
type = ret;
pmu->type = type;

- if (pmu_bus_running && !pmu->skip_sysfs_dev) {
+ if (pmu_bus_running && !pmu->dev) {
ret = pmu_dev_alloc(pmu);
if (ret)
goto free_idr;
@@ -11524,7 +11524,7 @@ void perf_pmu_unregister(struct pmu *pmu

free_percpu(pmu->pmu_disable_count);
idr_remove(&pmu_idr, pmu->type);
- if (pmu_bus_running) {
+ if (pmu_bus_running && pmu->dev != PMU_NULL_DEV) {
if (pmu->nr_addr_filters)
device_remove_file(pmu->dev, &dev_attr_nr_addr_filters);
device_del(pmu->dev);
@@ -13687,7 +13687,7 @@ static int __init perf_event_sysfs_init(
goto unlock;

list_for_each_entry(pmu, &pmus, entry) {
- if (pmu->skip_sysfs_dev)
+ if (pmu->dev)
continue;

ret = pmu_dev_alloc(pmu);