[RFC PATCH v1 4/4] perf tool: Add HiSilicon PMCU data decoding support

From: Jie Zhan
Date: Mon Feb 06 2023 - 01:53:21 EST


Support for dumping raw trace of HiSilicon PMCU data using 'perf-report'
or 'perf-script'

Example usage:

# perf report -D

Output will contain the raw PMCU data with notes, such as:

. ... HISI PMCU data: size 0x9630 bytes
. ... Header: size 0x30 bytes
. 00000000: 00 00 40 00 04 00 00 00 08 00 00 00 00 00 00 00
. 00000010: 80 01 00 00 01 00 00 00 04 00 00 00 10 00 00 00
. 00000020: 11 00 00 00 12 00 00 00 13 00 00 00 00 00 00 00
. Auxtrace buffer max size: 0x400000
. Number of PMU counters in parallel: 4
. Number of monitored CPUs: 8
. Compatible mode: no
. Subsample size: 0x180
. Number of subsamples per sample: 1
. Number of events: 4
. Event 0: 0x0010
. Event 1: 0x0011
. Event 2: 0x0012
. Event 3: 0x0013
. ... Data: size 0x9600 bytes
. Sample 0
. Subsample 0
. 00000030: 00000000 PMCID0SR CPU 0
. 00000034: 00000000 PMCID0SR CPU 1
. 00000038: 00000000 PMCID0SR CPU 2
. 0000003c: 00000000 PMCID0SR CPU 3
. 00000040: 00000000 PMCID0SR CPU 4
. 00000044: 00000000 PMCID0SR CPU 5
. 00000048: 00000000 PMCID0SR CPU 6
. 0000004c: 00000000 PMCID0SR CPU 7
. 00000050: 000000ba PMCID1SR CPU 0
. 00000054: 000056fe PMCID1SR CPU 1
. 00000058: 00000000 PMCID1SR CPU 2
. 0000005c: 00000000 PMCID1SR CPU 3
. 00000060: 00000195 PMCID1SR CPU 4
. 00000064: 000056fc PMCID1SR CPU 5
. 00000068: 00000000 PMCID1SR CPU 6
. 0000006c: 00000000 PMCID1SR CPU 7
. 00000070: 0000000000000000 Event 0010 CPU 0
. 00000078: 0000000000000000 Event 0010 CPU 1
. 00000080: 0000000000000000 Event 0010 CPU 2
. 00000088: 0000000000000000 Event 0010 CPU 3
. 00000090: 0000000000000000 Event 0010 CPU 4
. 00000098: 0000000000000001 Event 0010 CPU 5
. 000000a0: 0000000000000000 Event 0010 CPU 6
. 000000a8: 0000000000000000 Event 0010 CPU 7
. 000000b0: 0000000000000000 Event 0011 CPU 0
. 000000b8: 0000000000000000 Event 0011 CPU 1
. 000000c0: 0000000000000000 Event 0011 CPU 2
. 000000c8: 0000000000000000 Event 0011 CPU 3
. 000000d0: 000000000000d614 Event 0011 CPU 4
. 000000d8: 000000000000046b Event 0011 CPU 5
. 000000e0: 0000000000000000 Event 0011 CPU 6
. 000000e8: 0000000000000000 Event 0011 CPU 7
. 000000f0: 0000000000000000 Event 0012 CPU 0
. 000000f8: 0000000000000000 Event 0012 CPU 1
. 00000100: 0000000000000000 Event 0012 CPU 2
. 00000108: 0000000000000000 Event 0012 CPU 3
. 00000110: 00000000000000f4 Event 0012 CPU 4
. 00000118: 0000000000000003 Event 0012 CPU 5
. 00000120: 0000000000000000 Event 0012 CPU 6
. 00000128: 0000000000000000 Event 0012 CPU 7
. 00000130: 0000000000000000 Event 0013 CPU 0
. 00000138: 0000000000000000 Event 0013 CPU 1
. 00000140: 0000000000000000 Event 0013 CPU 2
. 00000148: 0000000000000000 Event 0013 CPU 3
. 00000150: 00000000000000f4 Event 0013 CPU 4
. 00000158: 0000000000000004 Event 0013 CPU 5
. 00000160: 0000000000000000 Event 0013 CPU 6
. 00000168: 0000000000000000 Event 0013 CPU 7
. 00000170: 000000000000d614 Cycle count CPU 0
. 00000178: 000000000000d614 Cycle count CPU 1
. 00000180: 0000000000000000 Cycle count CPU 2
. 00000188: 0000000000000000 Cycle count CPU 3
. 00000190: 000000000000d614 Cycle count CPU 4
. 00000198: 000000000000d614 Cycle count CPU 5
. 000001a0: 0000000000000000 Cycle count CPU 6
. 000001a8: 0000000000000000 Cycle count CPU 7
(...more data follows)

Signed-off-by: Jie Zhan <zhanjie9@xxxxxxxxxxxxx>
---
tools/perf/util/Build | 1 +
tools/perf/util/auxtrace.c | 4 +
tools/perf/util/hisi-pmcu.c | 305 ++++++++++++++++++++++++++++++++++++
tools/perf/util/hisi-pmcu.h | 2 +
4 files changed, 312 insertions(+)
create mode 100644 tools/perf/util/hisi-pmcu.c

diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index e315ecaec323..e062a2c1b962 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -120,6 +120,7 @@ perf-$(CONFIG_AUXTRACE) += arm-spe.o
perf-$(CONFIG_AUXTRACE) += arm-spe-decoder/
perf-$(CONFIG_AUXTRACE) += hisi-ptt.o
perf-$(CONFIG_AUXTRACE) += hisi-ptt-decoder/
+perf-$(CONFIG_AUXTRACE) += hisi-pmcu.o
perf-$(CONFIG_AUXTRACE) += s390-cpumsf.o

ifdef CONFIG_LIBOPENCSD
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 46ada5ec3f9a..ac19220d307e 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -53,6 +53,7 @@
#include "intel-bts.h"
#include "arm-spe.h"
#include "hisi-ptt.h"
+#include "hisi-pmcu.h"
#include "s390-cpumsf.h"
#include "util/mmap.h"

@@ -1324,6 +1325,9 @@ int perf_event__process_auxtrace_info(struct perf_session *session,
case PERF_AUXTRACE_HISI_PTT:
err = hisi_ptt_process_auxtrace_info(event, session);
break;
+ case PERF_AUXTRACE_HISI_PMCU:
+ err = hisi_pmcu_process_auxtrace_info(event, session);
+ break;
case PERF_AUXTRACE_UNKNOWN:
default:
return -EINVAL;
diff --git a/tools/perf/util/hisi-pmcu.c b/tools/perf/util/hisi-pmcu.c
new file mode 100644
index 000000000000..7e0b41cd464d
--- /dev/null
+++ b/tools/perf/util/hisi-pmcu.c
@@ -0,0 +1,305 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * HiSilicon Performance Monitor Control Unit (PMCU) support
+ *
+ * Copyright (C) 2022 HiSilicon Limited
+ */
+
+#include <errno.h>
+#include <linux/math.h>
+#include <linux/types.h>
+#include <linux/zalloc.h>
+#include <perf/event.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "auxtrace.h"
+#include "color.h"
+#include "debug.h"
+#include "event.h"
+#include "evsel.h"
+#include "hisi-pmcu.h"
+#include "session.h"
+#include "tool.h"
+#include <internal/lib.h>
+
+#define HISI_PMCU_AUX_HEADER_ALIGN 0x10
+#define HISI_PMCU_NR_CPU_CLUSTER 8
+#define dump_print(fmt, ...) \
+ color_fprintf(stdout, PERF_COLOR_BLUE, fmt, ##__VA_ARGS__)
+
+enum hisi_pmcu_auxtrace_header_index {
+ HISI_PMCU_HEADER_BUFFER_SIZE,
+ HISI_PMCU_HEADER_NR_PMU,
+ HISI_PMCU_HEADER_NR_CPU,
+ HISI_PMCU_HEADER_COMP_MODE,
+ HISI_PMCU_HEADER_SUBSAMPLE_SIZE,
+ HISI_PMCU_HEADER_NR_SUBSAMPLE_PER_SAMPLE,
+ HISI_PMCU_HEADER_NR_EVENT,
+ HISI_PMCU_HEADER_MAX
+};
+
+struct hisi_pmcu_aux_header_info {
+ u32 buffer_size;
+ u32 nr_pmu;
+ u32 nr_cpu;
+ u32 comp_mode;
+ u32 subsample_size;
+ u32 nr_subsample_per_sample;
+ u32 nr_event;
+ u32 events[];
+};
+
+struct hisi_pmcu_process {
+ u32 pmu_type;
+ struct auxtrace auxtrace;
+ struct hisi_pmcu_aux_header_info *header;
+};
+
+static int hisi_pmcu_process_event(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample __maybe_unused,
+ struct perf_tool *tool __maybe_unused)
+{
+ return 0;
+}
+
+static int hisi_pmcu_process_header(struct hisi_pmcu_process *pmcu,
+ const unsigned char *__data, u64 size)
+{
+ struct hisi_pmcu_aux_header_info *header;
+ const u32 *data = (const u32 *) __data;
+ unsigned int i, j;
+ u32 read_size;
+
+ read_size = HISI_PMCU_HEADER_MAX * sizeof(*data);
+ if (size < read_size)
+ return -EINVAL;
+
+ read_size += data[HISI_PMCU_HEADER_NR_EVENT] * sizeof(*data);
+ if (size < read_size)
+ return -EINVAL;
+
+ pmcu->header = malloc(read_size);
+ header = pmcu->header;
+ memcpy(header, data, read_size);
+ read_size = round_up(read_size, HISI_PMCU_AUX_HEADER_ALIGN);
+
+ dump_print(". ... Header: size 0x%lx bytes\n", read_size);
+ for (i = 0; i < read_size; i += HISI_PMCU_AUX_HEADER_ALIGN) {
+ dump_print(". %08lx: ", i);
+ for (j = 0; j < HISI_PMCU_AUX_HEADER_ALIGN; j++)
+ dump_print("%02x ", __data[i + j]);
+ dump_print("\n");
+ }
+
+ dump_print(". Auxtrace buffer max size: 0x%lx\n", header->buffer_size);
+ dump_print(". Number of PMU counters in parallel: %d\n", header->nr_pmu);
+ dump_print(". Number of monitored CPUs: %d\n", header->nr_cpu);
+ dump_print(". Compatible mode: %s\n", header->comp_mode ? "yes" : "no");
+ dump_print(". Subsample size: 0x%lx\n", header->subsample_size);
+ dump_print(". Number of subsamples per sample: %d\n", header->nr_subsample_per_sample);
+ dump_print(". Number of events: %d\n", header->nr_event);
+
+ for (i = 0; i < header->nr_event; i++)
+ dump_print(". Event %3d: 0x%04x\n", i, header->events[i]);
+
+ return read_size;
+}
+
+static int hisi_pmcu_dump_subsample(struct hisi_pmcu_aux_header_info *header,
+ const unsigned char *data, u64 offset,
+ u32 evoffset)
+{
+ int nr_cluster, core, cid, i;
+ u32 pos = 0, event;
+
+ nr_cluster = header->nr_cpu / HISI_PMCU_NR_CPU_CLUSTER;
+
+ for (cid = 0; cid < 2; cid++) {
+ for (core = 0; core < HISI_PMCU_NR_CPU_CLUSTER; core++) {
+ for (i = 0; i < nr_cluster; i++) {
+ dump_print(". %08lx: %08lx PMCID%dSR CPU %d\n",
+ offset + pos, *(u32 *) (data + pos),
+ cid,
+ core + i * HISI_PMCU_NR_CPU_CLUSTER);
+ pos += sizeof(u32);
+ }
+ }
+ }
+
+ for (event = 0; event < header->nr_pmu; event++) {
+ for (core = 0; core < HISI_PMCU_NR_CPU_CLUSTER; core++) {
+ for (i = 0; i < nr_cluster; i++) {
+ dump_print(". %08lx: %016llx Event %04lx CPU %d\n",
+ offset + pos, *(u64 *) (data + pos),
+ header->events[event + evoffset],
+ core + i * HISI_PMCU_NR_CPU_CLUSTER);
+ pos += sizeof(u64);
+ }
+ }
+ }
+
+ if (!header->comp_mode) {
+ for (core = 0; core < HISI_PMCU_NR_CPU_CLUSTER; core++) {
+ for (i = 0; i < nr_cluster; i++) {
+ dump_print(". %08lx: %016llx Cycle count CPU %d\n",
+ offset + pos, *(u64 *) (data + pos),
+ core + i * HISI_PMCU_NR_CPU_CLUSTER);
+ pos += sizeof(u64);
+ }
+ }
+ }
+
+ return pos;
+}
+
+static int hisi_pmcu_dump_sample(struct hisi_pmcu_aux_header_info *header,
+ const unsigned char *data, u64 offset)
+{
+ u32 pos = 0, i = 0;
+
+ while (i < header->nr_subsample_per_sample) {
+ dump_print(". Subsample %d\n", i + 1);
+ pos += hisi_pmcu_dump_subsample(header, data + pos,
+ offset + pos,
+ i * header->nr_pmu);
+ i++;
+ }
+
+ return pos;
+}
+
+static int hisi_pmcu_dump_data(struct hisi_pmcu_process *pmcu,
+ const unsigned char *data, u64 size)
+{
+ struct hisi_pmcu_aux_header_info *header;
+ u32 sample_size;
+ u32 nr_sample;
+ u64 pos = 0;
+ int ret;
+
+ dump_print(". ... HISI PMCU data: size 0x%lx bytes\n", size);
+
+ ret = hisi_pmcu_process_header(pmcu, data, size);
+ if (ret < 0)
+ return ret;
+
+ pos += ret;
+
+ header = pmcu->header;
+ sample_size = header->subsample_size * header->nr_subsample_per_sample;
+ nr_sample = 1;
+ dump_print(". ... Data: size 0x%lx bytes\n", size - pos);
+ while (pos < size) {
+ u32 buf_remain;
+
+ dump_print(". Sample %d\n", nr_sample);
+ pos += hisi_pmcu_dump_sample(header, data + pos, pos);
+ nr_sample++;
+
+ // Skip gap at the end of an auxtrace buffer
+ buf_remain = header->buffer_size - pos % header->buffer_size;
+ if (buf_remain < sample_size)
+ pos += buf_remain;
+ }
+
+ return 0;
+}
+
+static int hisi_pmcu_process_auxtrace_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_tool *tool __maybe_unused)
+{
+ struct hisi_pmcu_process *pmcu_process;
+ void *data;
+ u64 size;
+ int fd;
+
+ if (!dump_trace)
+ return 0;
+
+ size = event->auxtrace.size;
+ if (!size)
+ return 0;
+
+ data = malloc(size);
+ if (!data)
+ return -errno;
+
+ fd = perf_data__fd(session->data);
+
+ if (readn(fd, data, size) < 0) {
+ free(data);
+ return -errno;
+ }
+
+ pmcu_process = container_of(session->auxtrace,
+ struct hisi_pmcu_process, auxtrace);
+
+ return hisi_pmcu_dump_data(pmcu_process, data, size);
+}
+
+static int hisi_pmcu_flush_events(struct perf_session *session __maybe_unused,
+ struct perf_tool *tool __maybe_unused)
+{
+ return 0;
+}
+
+static void hisi_pmcu_free_events(struct perf_session *session __maybe_unused)
+{
+}
+
+static void hisi_pmcu_free(struct perf_session *session)
+{
+ struct hisi_pmcu_process *pmcu_process;
+
+ pmcu_process = container_of(session->auxtrace,
+ struct hisi_pmcu_process, auxtrace);
+
+ session->auxtrace = NULL;
+ free(pmcu_process);
+}
+
+static bool hisi_pmcu_evsel_is_auxtrace(struct perf_session *session,
+ struct evsel *evsel)
+{
+ struct hisi_pmcu_process *pmcu_process;
+
+ pmcu_process = container_of(session->auxtrace,
+ struct hisi_pmcu_process, auxtrace);
+
+ return evsel->core.attr.type == pmcu_process->pmu_type;
+}
+
+int hisi_pmcu_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session)
+{
+ struct perf_record_auxtrace_info *auxtrace_info;
+ struct hisi_pmcu_process *pmcu_process;
+
+ auxtrace_info = &event->auxtrace_info;
+
+ if (auxtrace_info->header.size < sizeof(*auxtrace_info) +
+ HISI_PMCU_AUXTRACE_PRIV_SIZE)
+ return -EINVAL;
+
+ pmcu_process = zalloc(sizeof(*pmcu_process));
+ if (!pmcu_process)
+ return -ENOMEM;
+
+ pmcu_process->pmu_type = auxtrace_info->priv[0];
+
+ pmcu_process->auxtrace = (struct auxtrace) {
+ .process_event = hisi_pmcu_process_event,
+ .process_auxtrace_event = hisi_pmcu_process_auxtrace_event,
+ .flush_events = hisi_pmcu_flush_events,
+ .free_events = hisi_pmcu_free_events,
+ .free = hisi_pmcu_free,
+ .evsel_is_auxtrace = hisi_pmcu_evsel_is_auxtrace,
+ };
+
+ session->auxtrace = &pmcu_process->auxtrace;
+
+ return 0;
+}
diff --git a/tools/perf/util/hisi-pmcu.h b/tools/perf/util/hisi-pmcu.h
index d46d523a3aee..8df74695164b 100644
--- a/tools/perf/util/hisi-pmcu.h
+++ b/tools/perf/util/hisi-pmcu.h
@@ -14,4 +14,6 @@
struct auxtrace_record *hisi_pmcu_recording_init(int *err,
struct perf_pmu *hisi_pmcu_pmu);

+int hisi_pmcu_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session);
#endif
--
2.30.0