Re: [PATCH 1/2] drivers/perf: hisi: Add Support for CPA PMU

From: John Garry
Date: Tue Feb 15 2022 - 05:48:08 EST


On 14/02/2022 11:42, Qi Liu wrote:
On HiSilicon Hip09 platform, there is a CPA(Coherency Protocol Agent) on
each SICL (Super I/O Cluster) which implements packet format translation,
route parsing and traffic statistic.

CPA PMU has separate PMU registers which the driver can program freely and
interrupt is supported to handle counter overflow. Let's support its driver
under the framework of HiSilicon uncore PMU driver.


Any documentation required?

Signed-off-by: Qi Liu <liuqi115@xxxxxxxxxx>
---
drivers/perf/hisilicon/Makefile | 2 +-
drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c | 403 +++++++++++++++++++
include/linux/cpuhotplug.h | 1 +
3 files changed, 405 insertions(+), 1 deletion(-)
create mode 100644 drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c

diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile
index 506ed39e3266..6be83517acaa 100644
--- a/drivers/perf/hisilicon/Makefile
+++ b/drivers/perf/hisilicon/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \
hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \
- hisi_uncore_pa_pmu.o
+ hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o
obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o
diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c
new file mode 100644
index 000000000000..a28796e93ae1
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * HiSilicon SoC CPA(Coherency Protocol Agent) uncore Hardware event counters support
+ *
+ * Copyright (C) 2022 HiSilicon Limited
+ * Author: Qi Liu <liuqi115@xxxxxxxxxx>
+ *
+ * This code is based on the uncore PMUs like arm-cci and arm-ccn.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bug.h>
+#include <linux/cpuhotplug.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/smp.h>
+
+#include "hisi_uncore_pmu.h"
+
+/* CPA register definition */
+#define CPA_PERF_CTRL 0x1C00
+#define CPA_EVENT_CTRL 0x1C04
+#define CPA_INT_MASK 0x1C70
+#define CPA_INT_STATUS 0x1C78
+#define CPA_INT_CLEAR 0x1C7C
+#define CPA_EVENT_TYPE0 0x1C80
+#define CPA_VERSION 0x1CF0
+#define CPA_CNT0_LOWER 0x1D00
+#define CPA_CFG_REG 0x0534

nit: some people prefer lower case for hex numbers, but not a big deal.

+
+/* CPA operation command */
+#define CPA_PERF_CTRL_EN BIT_ULL(0)
+#define CPA_EVTYPE_MASK 0xFFUL
+#define CPA_PM_CTRL BIT_ULL(9)
+
+#define CPA_COUNTER_BITS 64
+#define CPA_NR_EVENTS 0xFF
+#define CPA_REG_OFFSET 0x8
+
+static u32 hisi_cpa_pmu_get_counter_offset(int idx)
+{
+ return (CPA_CNT0_LOWER + idx * CPA_REG_OFFSET);
+}
+
+static u64 hisi_cpa_pmu_read_counter(struct hisi_pmu *cpa_pmu,
+ struct hw_perf_event *hwc)
+{
+ return readq(cpa_pmu->base + hisi_cpa_pmu_get_counter_offset(hwc->idx));
+}
+
+static void hisi_cpa_pmu_write_counter(struct hisi_pmu *cpa_pmu,
+ struct hw_perf_event *hwc, u64 val)
+{
+ writeq(val, cpa_pmu->base + hisi_cpa_pmu_get_counter_offset(hwc->idx));
+}
+
+static void hisi_cpa_pmu_write_evtype(struct hisi_pmu *cpa_pmu, int idx, u32 type)
+{
+ u32 reg, reg_idx, shift, val;
+
+ /*
+ * Select the appropriate event select register(CPA_EVENT_TYPE0/1).
+ * There are 2 event select registers for the 8 hardware counters.
+ * Event code is 8-bits and for the former 4 hardware counters,
+ * CPA_EVENT_TYPE0 is chosen. For the latter 4 hardware counters,
+ * CPA_EVENT_TYPE1 is chosen.
+ */
+ reg = CPA_EVENT_TYPE0 + (idx / 4) * 4;

rounddown(,4) could be used, I think. Or something like it.

+ reg_idx = idx % 4;
+ shift = CPA_REG_OFFSET * reg_idx;
+
+ /* Write event code to CPA_EVENT_TYPEx Register */
+ val = readl(cpa_pmu->base + reg);
+ val &= ~(CPA_EVTYPE_MASK << shift);
+ val |= (type << shift);

no need for ()

+ writel(val, cpa_pmu->base + reg);
+}
+
+static void hisi_cpa_pmu_start_counters(struct hisi_pmu *cpa_pmu)
+{
+ u32 val;
+
+ val = readl(cpa_pmu->base + CPA_PERF_CTRL);
+ val |= CPA_PERF_CTRL_EN;
+ writel(val, cpa_pmu->base + CPA_PERF_CTRL);
+}
+
+static void hisi_cpa_pmu_stop_counters(struct hisi_pmu *cpa_pmu)
+{
+ u32 val;
+
+ val = readl(cpa_pmu->base + CPA_PERF_CTRL);
+ val &= ~(CPA_PERF_CTRL_EN);
+ writel(val, cpa_pmu->base + CPA_PERF_CTRL);
+}
+
+static void hisi_cpa_pmu_disable_pm(struct hisi_pmu *cpa_pmu)
+{
+ u32 val;
+
+ val = readl(cpa_pmu->base + CPA_CFG_REG);
+ val |= CPA_PM_CTRL;
+ writel(val, cpa_pmu->base + CPA_CFG_REG);
+}
+
+static void hisi_cpa_pmu_enable_pm(struct hisi_pmu *cpa_pmu)
+{
+ u32 val;
+
+ val = readl(cpa_pmu->base + CPA_CFG_REG);
+ val &= ~CPA_PM_CTRL;
+ writel(val, cpa_pmu->base + CPA_CFG_REG);
+}
+
+static void hisi_cpa_pmu_enable_counter(struct hisi_pmu *cpa_pmu, struct hw_perf_event *hwc)
+{
+ u32 val;
+
+ /* Enable counter index in CPA_EVENT_CTRL register */
+ val = readl(cpa_pmu->base + CPA_EVENT_CTRL);
+ val |= 1 << hwc->idx;
+ writel(val, cpa_pmu->base + CPA_EVENT_CTRL);
+
+ /* Power management should be disabled before counting. */
+ hisi_cpa_pmu_disable_pm(cpa_pmu);
+}
+
+static void hisi_cpa_pmu_disable_counter(struct hisi_pmu *cpa_pmu, struct hw_perf_event *hwc)
+{
+ u32 val;
+
+ hisi_cpa_pmu_enable_pm(cpa_pmu);
+
+ /* Clear counter index in CPA_EVENT_CTRL register */
+ val = readl(cpa_pmu->base + CPA_EVENT_CTRL);
+ val &= ~(1UL << hwc->idx);
+ writel(val, cpa_pmu->base + CPA_EVENT_CTRL);
+}
+
+static void hisi_cpa_pmu_enable_counter_int(struct hisi_pmu *cpa_pmu, struct hw_perf_event *hwc)
+{
+ u32 val;
+
+ /* Write 0 to enable interrupt */
+ val = readl(cpa_pmu->base + CPA_INT_MASK);
+ val &= ~(1UL << hwc->idx);
+ writel(val, cpa_pmu->base + CPA_INT_MASK);
+}
+
+static void hisi_cpa_pmu_disable_counter_int(struct hisi_pmu *cpa_pmu, struct hw_perf_event *hwc)
+{
+ u32 val;
+
+ /* Write 1 to mask interrupt */
+ val = readl(cpa_pmu->base + CPA_INT_MASK);
+ val |= 1 << hwc->idx;
+ writel(val, cpa_pmu->base + CPA_INT_MASK);
+}
+
+static u32 hisi_cpa_pmu_get_int_status(struct hisi_pmu *cpa_pmu)
+{
+ return readl(cpa_pmu->base + CPA_INT_STATUS);
+}
+
+static void hisi_cpa_pmu_clear_int_status(struct hisi_pmu *cpa_pmu, int idx)
+{
+ writel(1 << idx, cpa_pmu->base + CPA_INT_CLEAR);
+}
+
+static const struct acpi_device_id hisi_cpa_pmu_acpi_match[] = {
+ { "HISI0281", },
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, hisi_cpa_pmu_acpi_match);
+
+static int hisi_cpa_pmu_init_data(struct platform_device *pdev,
+ struct hisi_pmu *cpa_pmu)
+{
+ if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
+ &cpa_pmu->sccl_id)) {
+ dev_err(&pdev->dev, "Can not read cpa_pmu sccl-id\n");
+ return -EINVAL;
+ }
+
+ cpa_pmu->ccl_id = -1;
+
+ if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id",
+ &cpa_pmu->index_id)) {
+ dev_err(&pdev->dev, "Cannot read idx-id\n");
+ return -EINVAL;
+ }
+
+ cpa_pmu->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(cpa_pmu->base)) {
+ dev_err(&pdev->dev, "ioremap failed for cpa_pmu resource\n");

I think that devm_platform_ioremap_resource() gives error messages already

+ return PTR_ERR(cpa_pmu->base);
+ }
+
+ cpa_pmu->identifier = readl(cpa_pmu->base + CPA_VERSION);
+
+ return 0;
+}
+
+static struct attribute *hisi_cpa_pmu_format_attr[] = {
+ HISI_PMU_FORMAT_ATTR(event, "config:0-15"),
+ NULL
+};
+
+static const struct attribute_group hisi_cpa_pmu_format_group = {
+ .name = "format",
+ .attrs = hisi_cpa_pmu_format_attr,
+};
+
+static struct attribute *hisi_cpa_pmu_events_attr[] = {
+ HISI_PMU_EVENT_ATTR(cpa_cycles, 0x00),
+ HISI_PMU_EVENT_ATTR(p1_wr_dat, 0x61),
+ HISI_PMU_EVENT_ATTR(p1_rd_dat, 0x62),
+ HISI_PMU_EVENT_ATTR(p0_wr_dat, 0xE1),
+ HISI_PMU_EVENT_ATTR(p0_rd_dat, 0xE2),

would it be better to prefix them all with cpa_? That might be future-proofing any event alias clashes with other PMU types

+ NULL
+};
+
+static const struct attribute_group hisi_cpa_pmu_events_group = {
+ .name = "events",
+ .attrs = hisi_cpa_pmu_events_attr,
+};
+
+static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
+
+static struct attribute *hisi_cpa_pmu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static const struct attribute_group hisi_cpa_pmu_cpumask_attr_group = {
+ .attrs = hisi_cpa_pmu_cpumask_attrs,
+};
+
+static struct device_attribute hisi_cpa_pmu_identifier_attr =
+ __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
+
+static struct attribute *hisi_cpa_pmu_identifier_attrs[] = {
+ &hisi_cpa_pmu_identifier_attr.attr,
+ NULL
+};
+
+static const struct attribute_group hisi_cpa_pmu_identifier_group = {
+ .attrs = hisi_cpa_pmu_identifier_attrs,
+};
+
+static const struct attribute_group *hisi_cpa_pmu_attr_groups[] = {
+ &hisi_cpa_pmu_format_group,
+ &hisi_cpa_pmu_events_group,
+ &hisi_cpa_pmu_cpumask_attr_group,
+ &hisi_cpa_pmu_identifier_group,
+ NULL,

nit: no need to ',' on sentinel

+};
+
+static const struct hisi_uncore_ops hisi_uncore_cpa_pmu_ops = {
+ .write_evtype = hisi_cpa_pmu_write_evtype,
+ .get_event_idx = hisi_uncore_pmu_get_event_idx,
+ .start_counters = hisi_cpa_pmu_start_counters,
+ .stop_counters = hisi_cpa_pmu_stop_counters,
+ .enable_counter = hisi_cpa_pmu_enable_counter,
+ .disable_counter = hisi_cpa_pmu_disable_counter,
+ .enable_counter_int = hisi_cpa_pmu_enable_counter_int,
+ .disable_counter_int = hisi_cpa_pmu_disable_counter_int,
+ .write_counter = hisi_cpa_pmu_write_counter,
+ .read_counter = hisi_cpa_pmu_read_counter,
+ .get_int_status = hisi_cpa_pmu_get_int_status,
+ .clear_int_status = hisi_cpa_pmu_clear_int_status,
+};
+
+static int hisi_cpa_pmu_dev_probe(struct platform_device *pdev,
+ struct hisi_pmu *cpa_pmu)
+{
+ int ret;
+
+ ret = hisi_cpa_pmu_init_data(pdev, cpa_pmu);

nit: I don't see the need for this separate function. One issue is that we need to study the code there to see if we're missing some tidy-up if the hisi_uncore_pmu_init_irq call fails, below.

+ if (ret)
+ return ret;
+
+ ret = hisi_uncore_pmu_init_irq(cpa_pmu, pdev);
+ if (ret)
+ return ret;
+
+ cpa_pmu->counter_bits = CPA_COUNTER_BITS;
+ cpa_pmu->check_event = CPA_NR_EVENTS;
+ cpa_pmu->pmu_events.attr_groups = hisi_cpa_pmu_attr_groups;
+ cpa_pmu->ops = &hisi_uncore_cpa_pmu_ops;
+ cpa_pmu->num_counters = CPA_NR_COUNTERS;
+ cpa_pmu->dev = &pdev->dev;
+ cpa_pmu->on_cpu = -1;
+
+ return 0;
+}
+
+static int hisi_cpa_pmu_probe(struct platform_device *pdev)
+{
+ struct hisi_pmu *cpa_pmu;
+ char *name;
+ int ret;
+
+ cpa_pmu = devm_kzalloc(&pdev->dev, sizeof(*cpa_pmu), GFP_KERNEL);
+ if (!cpa_pmu)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, cpa_pmu);

nit: normally we do this at the end, right?

+
+ ret = hisi_cpa_pmu_dev_probe(pdev, cpa_pmu);
+ if (ret)
+ return ret;
+
+ ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE,
+ &cpa_pmu->node);
+ if (ret) {
+ dev_err(&pdev->dev, "Error %d registering hotplug\n", ret);
+ return ret;
+ }
+
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_cpa%u", cpa_pmu->sccl_id - 1,

why subtract 1? Looks dangerous if sccl_id == 0

+ cpa_pmu->index_id);
+
+ cpa_pmu->pmu = (struct pmu) {
+ .name = name,
+ .module = THIS_MODULE,
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = hisi_uncore_pmu_event_init,
+ .pmu_enable = hisi_uncore_pmu_enable,
+ .pmu_disable = hisi_uncore_pmu_disable,
+ .add = hisi_uncore_pmu_add,
+ .del = hisi_uncore_pmu_del,
+ .start = hisi_uncore_pmu_start,
+ .stop = hisi_uncore_pmu_stop,
+ .read = hisi_uncore_pmu_read,
+ .attr_groups = cpa_pmu->pmu_events.attr_groups,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ };
+
+ ret = perf_pmu_register(&cpa_pmu->pmu, name, -1);
+ if (ret) {
+ dev_err(cpa_pmu->dev, "CPA PMU register failed\n");
+ cpuhp_state_remove_instance_nocalls(
+ CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE, &cpa_pmu->node);
+ }
+
+ return ret;
+}
+
+static int hisi_cpa_pmu_remove(struct platform_device *pdev)
+{
+ struct hisi_pmu *cpa_pmu = platform_get_drvdata(pdev);
+
+ perf_pmu_unregister(&cpa_pmu->pmu);
+ cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE,
+ &cpa_pmu->node);
+ return 0;
+}
+
+static struct platform_driver hisi_cpa_pmu_driver = {
+ .driver = {
+ .name = "hisi_cpa_pmu",
+ .acpi_match_table = ACPI_PTR(hisi_cpa_pmu_acpi_match),
+ .suppress_bind_attrs = true,
+ },
+ .probe = hisi_cpa_pmu_probe,
+ .remove = hisi_cpa_pmu_remove,
+};
+
+static int __init hisi_cpa_pmu_module_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE,
+ "AP_PERF_ARM_HISI_CPA_ONLINE",
+ hisi_uncore_pmu_online_cpu,
+ hisi_uncore_pmu_offline_cpu);
+ if (ret) {
+ pr_err("CPA PMU: setup hotplug: %d\n", ret);
+ return ret;
+ }
+
+ ret = platform_driver_register(&hisi_cpa_pmu_driver);
+ if (ret)
+ cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE);

I am not being totally serious, but this pattern of registering a CPU hotplug handler and then a driver is so common that we could nearly add a wrapper for it.

+
+ return ret;
+}
+module_init(hisi_cpa_pmu_module_init);
+
+static void __exit hisi_cpa_pmu_module_exit(void)
+{
+ platform_driver_unregister(&hisi_cpa_pmu_driver);
+ cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE);
+}
+module_exit(hisi_cpa_pmu_module_exit);
+
+MODULE_DESCRIPTION("HiSilicon SoC CPA uncore PMU driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Qi Liu <liuqi115@xxxxxxxxxx>");
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 411a428ace4d..d55063719904 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -220,6 +220,7 @@ enum cpuhp_state {
CPUHP_AP_PERF_S390_SF_ONLINE,
CPUHP_AP_PERF_ARM_CCI_ONLINE,
CPUHP_AP_PERF_ARM_CCN_ONLINE,
+ CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE,
CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE,
CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE,
CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,