Re: [PATCH v2] perf: arm_cspmu: Separate Arm and vendor module

From: Suzuki K Poulose
Date: Tue Apr 18 2023 - 07:07:15 EST

Next message: Kal Cutter Conley: "Re: [PATCH bpf-next v3 1/3] xsk: Support UMEM chunk_size > PAGE_SIZE"
Previous message: Nuno Sá: "Re: [PATCH v2] Fix IRQ issue by setting IRQ_DISABLE_UNLAZY flag"
In reply to: Besar Wicaksono: "[PATCH v2] perf: arm_cspmu: Separate Arm and vendor module"
Next in thread: Besar Wicaksono: "RE: [PATCH v2] perf: arm_cspmu: Separate Arm and vendor module"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

On 18/04/2023 07:20, Besar Wicaksono wrote:

Arm Coresight PMU driver consists of main standard code and vendor
backend code. Both are currently built as a single module.
This patch adds vendor registration API to separate the two to
keep things modular. Vendor module shall register to the main
module on loading and trigger device reprobe.

Signed-off-by: Besar Wicaksono <bwicaksono@xxxxxxxxxx>
---

Changes from v1:
* Added separate Kconfig entry for nvidia backend
* Added lock to protect accesses to the lists
* Added support for matching subset devices from a vendor
* Added state tracking to avoid reprobe when a device is in use
v1: ttps://lore.kernel.org/linux-arm-kernel/20230403163905.20354-1-bwicaksono@xxxxxxxxxx/T/#u

---
drivers/perf/arm_cspmu/Kconfig | 9 +-
drivers/perf/arm_cspmu/Makefile | 6 +-
drivers/perf/arm_cspmu/arm_cspmu.c | 280 +++++++++++++++++++++++---
drivers/perf/arm_cspmu/arm_cspmu.h | 32 ++-
drivers/perf/arm_cspmu/nvidia_cspmu.c | 39 +++-
drivers/perf/arm_cspmu/nvidia_cspmu.h | 17 --
6 files changed, 325 insertions(+), 58 deletions(-)
delete mode 100644 drivers/perf/arm_cspmu/nvidia_cspmu.h

diff --git a/drivers/perf/arm_cspmu/Kconfig b/drivers/perf/arm_cspmu/Kconfig
index 0b316fe69a45..8ce7b45a0075 100644
--- a/drivers/perf/arm_cspmu/Kconfig
+++ b/drivers/perf/arm_cspmu/Kconfig
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
#
-# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
tristate "ARM Coresight Architecture PMU"
@@ -11,3 +11,10 @@ config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
based on ARM CoreSight PMU architecture. Note that this PMU
architecture does not have relationship with the ARM CoreSight
Self-Hosted Tracing.
+
+config NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU
+ tristate "NVIDIA Coresight Architecture PMU"
+ depends on ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
+ help
+ Provides NVIDIA specific attributes for performance monitoring unit
+ (PMU) devices based on ARM CoreSight PMU architecture.
diff --git a/drivers/perf/arm_cspmu/Makefile b/drivers/perf/arm_cspmu/Makefile
index fedb17df982d..f8ae22411d59 100644
--- a/drivers/perf/arm_cspmu/Makefile
+++ b/drivers/perf/arm_cspmu/Makefile
@@ -1,6 +1,6 @@
-# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu_module.o
-arm_cspmu_module-y := arm_cspmu.o nvidia_cspmu.o
+obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu.o
+obj-$(CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += nvidia_cspmu.o
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c
index e31302ab7e37..c55ea2b74454 100644
--- a/drivers/perf/arm_cspmu/arm_cspmu.c
+++ b/drivers/perf/arm_cspmu/arm_cspmu.c
@@ -16,7 +16,7 @@
* The user should refer to the vendor technical documentation to get details
* about the supported events.
*
- * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
*/
@@ -25,13 +25,14 @@
#include <linux/ctype.h>
#include <linux/interrupt.h>
#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/list.h>
#include <linux/module.h>
+#include <linux/mutex.h>
#include <linux/perf_event.h>
#include <linux/platform_device.h>
#include <acpi/processor.h>
#include "arm_cspmu.h"
-#include "nvidia_cspmu.h"
#define PMUNAME "arm_cspmu"
#define DRVNAME "arm-cs-arch-pmu"
@@ -117,11 +118,52 @@
*/
#define HILOHI_MAX_POLL 1000
-/* JEDEC-assigned JEP106 identification code */
-#define ARM_CSPMU_IMPL_ID_NVIDIA 0x36B
-
static unsigned long arm_cspmu_cpuhp_state;
+/* List of Coresight PMU instances in the system. */
+static LIST_HEAD(arm_cspmus);
+
+/* List of registered vendor backends. */
+static LIST_HEAD(arm_cspmu_impls);
+
+static DEFINE_MUTEX(arm_cspmu_lock);
+
+/*
+ * State of the generic driver.
+ * 0 => registering backend.
+ * 1 => ready to use.
+ * 2 or more => in use.
+ */
+#define ARM_CSPMU_STATE_REG 0
+#define ARM_CSPMU_STATE_READY 1
+static atomic_t arm_cspmu_state;
+
+static void arm_cspmu_state_ready(void)
+{
+ atomic_set(&arm_cspmu_state, ARM_CSPMU_STATE_READY);
+}
+
+static bool try_arm_cspmu_state_reg(void)
+{
+ const int old = ARM_CSPMU_STATE_READY;
+ const int new = ARM_CSPMU_STATE_REG;
+
+ return atomic_cmpxchg(&arm_cspmu_state, old, new) == old;
+}
+
+static bool try_arm_cspmu_state_get(void)
+{
+ return atomic_inc_not_zero(&arm_cspmu_state);
+}
+
+static void arm_cspmu_state_put(void)
+{
+ int ret;
+
+ ret = atomic_dec_if_positive(&arm_cspmu_state);
+ WARN_ON(ret < 0);
+}
+

As long as the vendor module is set for the PMU instance, it won't be unloaded as long as there are any perf events and thus the specific
driver cannot be unloaded. So, you don't need explicit refcount
maintenance for each pmu callbacks.

/*
* In CoreSight PMU architecture, all of the MMIO registers are 32-bit except
* counter register. The counter register can be implemented as 32-bit or 64-bit
@@ -380,26 +422,161 @@ static struct attribute_group arm_cspmu_cpumask_attr_group = {
};
struct impl_match {
- u32 pmiidr;
- u32 mask;
- int (*impl_init_ops)(struct arm_cspmu *cspmu);
+ struct list_head next;
+ struct arm_cspmu_impl_param param;
};
-static const struct impl_match impl_match[] = {
- {
- .pmiidr = ARM_CSPMU_IMPL_ID_NVIDIA,
- .mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
- .impl_init_ops = nv_cspmu_init_ops
- },
- {}
-};
+static struct arm_cspmu_impl_param to_impl_param(const struct arm_cspmu *cspmu)
+{
+ struct arm_cspmu_impl_param ret = {0};
+ u32 pmiidr = cspmu->impl.pmiidr;
+
+ ret.impl_id = FIELD_GET(ARM_CSPMU_PMIIDR_IMPLEMENTER, pmiidr);
+ ret.pvr = FIELD_GET(ARM_CSPMU_PMIIDR_PVR, pmiidr);
+ ret.pvr_mask = GENMASK(31, 0);
+
+ return ret;
+}
+
+static bool impl_param_match(const struct arm_cspmu_impl_param *A,
+ const struct arm_cspmu_impl_param *B)
+{
+ /*
+ * Match criteria:
+ * - Implementer id should match.
+ * - A's device id is within B's range, or vice versa. This allows
+ * vendor to register backend for a range of devices.
+ */
+ if ((A->impl_id == B->impl_id) &&
+ (((A->pvr & A->pvr_mask) == (B->pvr & A->pvr_mask)) ||
+ ((A->pvr & B->pvr_mask) == (B->pvr & B->pvr_mask))))
+ return true;
+

nit: Please do not use CAPITAL letters for variable names. Could this
simply accept a pmiidr and a impl_match and match the fields with that
of the mask/value pair. See more below.

+ return false;
+}
+
+static struct impl_match *impl_match_find(
+ const struct arm_cspmu_impl_param *impl_param)
+{
+ struct impl_match *impl_match;
+
+ list_for_each_entry(impl_match, &arm_cspmu_impls, next) {
+ if (impl_param_match(impl_param, &impl_match->param))
+ return impl_match;
+ }
+
+ return NULL;
+}
+
+static int arm_cspmu_impl_reprobe(
+ const struct arm_cspmu_impl_param *impl_param)
+{
+ struct arm_cspmu *cspmu, *temp;
+ LIST_HEAD(reprobe_list);
+ int ret = 0;
+
+ mutex_lock(&arm_cspmu_lock);
+
+ /* Move the matching devices to temp list to avoid recursive lock. */
+ list_for_each_entry_safe(cspmu, temp, &arm_cspmus, next) {
+ struct arm_cspmu_impl_param match_param = to_impl_param(cspmu);

Also, does this work if the pvr and pvr_mask were provided by the
backend driver ? to_impl_param() takes the pmiidr which is either
read from the device or from the ACPI table, unfiltered. Could we
not change impl_param_match() to :
impl_param_match(cspmu->impl.pmiidr, impl_param) ?

+
+ if (impl_param_match(impl_param, &match_param))
+ list_move(&cspmu->next, &reprobe_list);
+ }
+
+ mutex_unlock(&arm_cspmu_lock);
+
+ /* Reprobe the devices. */
+ list_for_each_entry_safe(cspmu, temp, &reprobe_list, next) {
+ ret = device_reprobe(cspmu->dev);
+ if (ret) {
+ pr_err("arm_cspmu fail reprobe err: %d\n", ret);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+int arm_cspmu_impl_register(const struct arm_cspmu_impl_param *impl_param)
+{
+ struct impl_match *match;
+ int ret = 0;
+
+ if (!try_arm_cspmu_state_reg()) {
+ pr_err("arm_cspmu reg failed, device(s) is in use\n");
+ return -EBUSY;
+ }
+
+ mutex_lock(&arm_cspmu_lock);
+
+ match = impl_match_find(impl_param);
+ if (match) {
+ pr_err("arm_cspmu reg failed, impl: 0x%x, pvr: 0x%x, pvr_mask: 0x%x already exists\n",
+ match->param.impl_id, match->param.pvr,
+ match->param.pvr_mask);
+ mutex_unlock(&arm_cspmu_lock);
+ arm_cspmu_state_ready();
+ return -EINVAL;
+ }
+
+ match = kzalloc(sizeof(struct impl_match), GFP_KERNEL);
+ if (!match) {
+ mutex_unlock(&arm_cspmu_lock);
+ arm_cspmu_state_ready();
+ return -ENOMEM;
+ }
+
+ memcpy(&match->param, impl_param, sizeof(match->param));

nit: match->param = *impl_param; ?

+ list_add(&match->next, &arm_cspmu_impls);
+
+ mutex_unlock(&arm_cspmu_lock);
+
+ /* Replace generic backend with vendor implementation. */
+ ret = arm_cspmu_impl_reprobe(impl_param);
+
+ if (ret)
+ arm_cspmu_impl_unregister(impl_param);
+
+ arm_cspmu_state_ready();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(arm_cspmu_impl_register);
+
+void arm_cspmu_impl_unregister(const struct arm_cspmu_impl_param *impl_param)
+{
+ struct impl_match *match;
+
+ mutex_lock(&arm_cspmu_lock);
+
+ match = impl_match_find(impl_param);
+ if (!match) {
+ pr_err("arm_cspmu unreg failed, unable to find impl: 0x%x, pvr: 0x%x, pvr_mask: 0x%x\n",
+ impl_param->impl_id, impl_param->pvr,
+ impl_param->pvr_mask);
+ mutex_unlock(&arm_cspmu_lock);
+ return;
+ }
+
+ list_del(&match->next);
+ kfree(match);
+
+ mutex_unlock(&arm_cspmu_lock);
+
+ /* Re-attach devices to standard driver. */
+ arm_cspmu_impl_reprobe(impl_param);
+}
+EXPORT_SYMBOL_GPL(arm_cspmu_impl_unregister);
static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
{
- int ret;
+ int ret = 0;
struct acpi_apmt_node *apmt_node = cspmu->apmt_node;
struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
- const struct impl_match *match = impl_match;
+ struct arm_cspmu_impl_param match_param = {0};
+ const struct impl_match *match;
/*
* Get PMU implementer and product id from APMT node.
@@ -410,19 +587,23 @@ static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
(apmt_node->impl_id) ? apmt_node->impl_id :
readl(cspmu->base0 + PMIIDR);
- /* Find implementer specific attribute ops. */
- for (; match->pmiidr; match++) {
- const u32 mask = match->mask;
+ cspmu->impl.module = THIS_MODULE;
- if ((match->pmiidr & mask) == (cspmu->impl.pmiidr & mask)) {
- ret = match->impl_init_ops(cspmu);
- if (ret)
- return ret;
+ mutex_lock(&arm_cspmu_lock);
- break;
- }
+ /* Find implementer specific attribute ops. */
+ match_param = to_impl_param(cspmu);
+ match = impl_match_find(&match_param);
+ if (match) {
+ cspmu->impl.module = match->param.module;
+ ret = match->param.impl_init_ops(cspmu);
}
+ mutex_unlock(&arm_cspmu_lock);
+
+ if (ret)
+ return ret;
+
/* Use default callbacks if implementer doesn't provide one. */
CHECK_DEFAULT_IMPL_OPS(impl_ops, get_event_attrs);
CHECK_DEFAULT_IMPL_OPS(impl_ops, get_format_attrs);
@@ -639,6 +820,11 @@ static int arm_cspmu_event_init(struct perf_event *event)
struct arm_cspmu *cspmu;
struct hw_perf_event *hwc = &event->hw;
+ if (!try_arm_cspmu_state_get()) {
+ pr_err("arm_cspmu event_init fail: driver is reprobing\n");
+ return -EBUSY;
+ }
+
cspmu = to_arm_cspmu(event->pmu);
/*
@@ -648,12 +834,14 @@ static int arm_cspmu_event_init(struct perf_event *event)
if (is_sampling_event(event)) {
dev_dbg(cspmu->pmu.dev,
"Can't support sampling events\n");
+ arm_cspmu_state_put();
return -EOPNOTSUPP;
}
if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK) {
dev_dbg(cspmu->pmu.dev,
"Can't support per-task counters\n");
+ arm_cspmu_state_put();
return -EINVAL;
}
@@ -664,16 +852,21 @@ static int arm_cspmu_event_init(struct perf_event *event)
if (!cpumask_test_cpu(event->cpu, &cspmu->associated_cpus)) {
dev_dbg(cspmu->pmu.dev,
"Requested cpu is not associated with the PMU\n");
+ arm_cspmu_state_put();
return -EINVAL;
}
/* Enforce the current active CPU to handle the events in this PMU. */
event->cpu = cpumask_first(&cspmu->active_cpu);
- if (event->cpu >= nr_cpu_ids)
+ if (event->cpu >= nr_cpu_ids) {
+ arm_cspmu_state_put();
return -EINVAL;
+ }
- if (!arm_cspmu_validate_group(event))
+ if (!arm_cspmu_validate_group(event)) {
+ arm_cspmu_state_put();
return -EINVAL;
+ }
/*
* The logical counter id is tracked with hw_perf_event.extra_reg.idx.
@@ -686,6 +879,8 @@ static int arm_cspmu_event_init(struct perf_event *event)
hwc->extra_reg.idx = -1;
hwc->config = cspmu->impl.ops.event_type(event);
+ arm_cspmu_state_put();
+
return 0;
}
@@ -864,13 +1059,22 @@ static int arm_cspmu_add(struct perf_event *event, int flags)
struct hw_perf_event *hwc = &event->hw;
int idx;
+ if (!try_arm_cspmu_state_get()) {
+ pr_err("arm_cspmu event_init fail: driver is reprobing\n");
+ return -EBUSY;
+ }
+
if (WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(),
- &cspmu->associated_cpus)))
+ &cspmu->associated_cpus))) {
+ arm_cspmu_state_put();
return -ENOENT;
+ }
idx = arm_cspmu_get_event_idx(hw_events, event);
- if (idx < 0)
+ if (idx < 0) {
+ arm_cspmu_state_put();
return idx;
+ }
hw_events->events[idx] = event;
hwc->idx = to_phys_idx(cspmu, idx);
@@ -900,6 +1104,8 @@ static void arm_cspmu_del(struct perf_event *event, int flags)
clear_bit(idx, hw_events->used_ctrs);
perf_event_update_userpage(event);
+
+ arm_cspmu_state_put();
}
static void arm_cspmu_read(struct perf_event *event)
@@ -1154,7 +1360,7 @@ static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu)
cspmu->pmu = (struct pmu){
.task_ctx_nr = perf_invalid_context,
- .module = THIS_MODULE,
+ .module = cspmu->impl.module,
.pmu_enable = arm_cspmu_enable,
.pmu_disable = arm_cspmu_disable,
.event_init = arm_cspmu_event_init,
@@ -1205,6 +1411,10 @@ static int arm_cspmu_device_probe(struct platform_device *pdev)
if (ret)
return ret;
+ mutex_lock(&arm_cspmu_lock);
+ list_add(&cspmu->next, &arm_cspmus);
+ mutex_unlock(&arm_cspmu_lock);
+
return 0;
}
@@ -1212,6 +1422,10 @@ static int arm_cspmu_device_remove(struct platform_device *pdev)
{
struct arm_cspmu *cspmu = platform_get_drvdata(pdev);
+ mutex_lock(&arm_cspmu_lock);
+ list_del(&cspmu->next);
+ mutex_unlock(&arm_cspmu_lock);
+
perf_pmu_unregister(&cspmu->pmu);
cpuhp_state_remove_instance(arm_cspmu_cpuhp_state, &cspmu->cpuhp_node);
@@ -1281,6 +1495,8 @@ static int __init arm_cspmu_init(void)
{
int ret;
+ arm_cspmu_state_ready();
+
ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
"perf/arm/cspmu:online",
arm_cspmu_cpu_online,
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h
index 51323b175a4a..cf3458d9fc63 100644
--- a/drivers/perf/arm_cspmu/arm_cspmu.h
+++ b/drivers/perf/arm_cspmu/arm_cspmu.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0
*
* ARM CoreSight Architecture PMU driver.
- * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
*/
@@ -68,7 +68,10 @@
/* PMIIDR register field */
#define ARM_CSPMU_PMIIDR_IMPLEMENTER GENMASK(11, 0)
+#define ARM_CSPMU_PMIIDR_REVISION GENMASK(15, 12)
+#define ARM_CSPMU_PMIIDR_VARIANT GENMASK(19, 16)
#define ARM_CSPMU_PMIIDR_PRODUCTID GENMASK(31, 20)
+#define ARM_CSPMU_PMIIDR_PVR GENMASK(31, 12)
struct arm_cspmu;
@@ -107,15 +110,36 @@ struct arm_cspmu_impl_ops {
struct attribute *attr, int unused);
};
+/* Vendor/implementer registration parameter. */
+struct arm_cspmu_impl_param {
+ /* JEDEC assigned implementer id of the vendor. */
+ u32 impl_id;
+ /*
+ * The pvr value and mask describes the device ids covered by the
+ * vendor backend. pvr contains the pattern of acceptable product,
+ * variant, and revision bits from device's PMIIDR. pvr_mask contains
+ * the relevant bits when comparing pvr. 0 value on the mask means any
+ * pvr value is supported.
+ */
+ u32 pvr;
+ u32 pvr_mask;

Do we need to separate pvr from the vendor_id ? we could simply have:

pmiidr_val; /* includes Vendor id and any other fields of the pmiidr */
pmiidr_mask;

Rest looks fine to me.

Suzuki

Next message: Kal Cutter Conley: "Re: [PATCH bpf-next v3 1/3] xsk: Support UMEM chunk_size > PAGE_SIZE"
Previous message: Nuno Sá: "Re: [PATCH v2] Fix IRQ issue by setting IRQ_DISABLE_UNLAZY flag"
In reply to: Besar Wicaksono: "[PATCH v2] perf: arm_cspmu: Separate Arm and vendor module"
Next in thread: Besar Wicaksono: "RE: [PATCH v2] perf: arm_cspmu: Separate Arm and vendor module"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]