Re: [PATCH] perf mem: Fix perf mem error on hybrid

From: Ian Rogers
Date: Thu Nov 30 2023 - 15:36:39 EST


On Wed, Nov 29, 2023 at 1:15 PM Liang, Kan <kan.liang@xxxxxxxxxxxxxxx> wrote:
>
>
>
> On 2023-11-29 11:17 a.m., Ian Rogers wrote:
> > On Wed, Nov 29, 2023 at 5:52 AM Liang, Kan <kan.liang@xxxxxxxxxxxxxxx> wrote:
> >>
> >>
> >>
> >> On 2023-11-29 1:24 a.m., Ian Rogers wrote:
> >>> On Tue, Nov 28, 2023 at 12:39 PM <kan.liang@xxxxxxxxxxxxxxx> wrote:
> >>>>
> >>>> From: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
> >>>>
> >>>> The below error can be triggered on a hybrid machine.
> >>>>
> >>>> $ perf mem record -t load sleep 1
> >>>> event syntax error: 'breakpoint/mem-loads,ldlat=30/P'
> >>>> \___ Bad event or PMU
> >>>>
> >>>> Unable to find PMU or event on a PMU of 'breakpoint'
> >>>>
> >>>> In the perf_mem_events__record_args(), the current perf never checks the
> >>>> availability of a mem event on a given PMU. All the PMUs will be added
> >>>> to the perf mem event list. Perf errors out for the unsupported PMU.
> >>>>
> >>>> Extend perf_mem_event__supported() and take a PMU into account. Check
> >>>> the mem event for each PMU before adding it to the perf mem event list.
> >>>>
> >>>> Optimize the perf_mem_events__init() a little bit. The function is to
> >>>> check whether the mem events are supported in the system. It doesn't
> >>>> need to scan all PMUs. Just return with the first supported PMU is good
> >>>> enough.
> >>>>
> >>>> Fixes: 5752c20f3787 ("perf mem: Scan all PMUs instead of just core ones")
> >>>> Reported-by: Ammy Yi <ammy.yi@xxxxxxxxx>
> >>>> Tested-by: Ammy Yi <ammy.yi@xxxxxxxxx>
> >>>> Signed-off-by: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
> >>>> ---
> >>>> tools/perf/util/mem-events.c | 25 ++++++++++++++-----------
> >>>> 1 file changed, 14 insertions(+), 11 deletions(-)
> >>>>
> >>>> diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
> >>>> index 954b235e12e5..3a2e3687878c 100644
> >>>> --- a/tools/perf/util/mem-events.c
> >>>> +++ b/tools/perf/util/mem-events.c
> >>>> @@ -100,11 +100,14 @@ int perf_mem_events__parse(const char *str)
> >>>> return -1;
> >>>> }
> >>>>
> >>>> -static bool perf_mem_event__supported(const char *mnt, char *sysfs_name)
> >>>> +static bool perf_mem_event__supported(const char *mnt, struct perf_pmu *pmu,
> >>>> + struct perf_mem_event *e)
> >>>> {
> >>>> + char sysfs_name[100];
> >>>> char path[PATH_MAX];
> >>>> struct stat st;
> >>>>
> >>>> + scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, pmu->name);
> >>>
> >>> Not sure if this is right. Looking at sysfs_name values:
> >>> https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/util/mem-events.c?h=perf-tools-next#n23
> >>> "cpu/events/mem-loads" and "cpu/events/mem-stores", so won't pmu->name
> >>> never be used?
> >>> Is there a missed change to change the cpu to %s?
> >>
> >> There is a X86 specific perf_mem_events__ptr(), which uses the
> >> "%s/mem-loads,ldlat=%u/P" and "%s/events/mem-loads" for Intel platforms.
> >> https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/arch/x86/util/mem-events.c?h=perf-tools-next#n20
> >> The pmu->name is used especially for the hybrid platforms.
> >
> > Right, that seems wrong. For one thing we're losing the compiler's
> > format string argument checking, but hardcoding PMU names just seems
> > to be something that will keep needing maintenance. This patch set
> > looks to fix an Intel issue but in general it is increasing tech debt
> > (or at least churning it) that will need cleaning up to do something
> > with better error checking and more generic. perf_mem_event looks like
> > a bad abstraction and then there are the integers whose special values
> > hold meaning. Could this fix come with some cleanup? It wouldn't seem
> > wrong to me to add notions of memory events to the PMU abstraction. As
> > it stands this scnprintf looks wrong in non-Intel cases.
> >
>
> The problem is that different ARCHs check different things. Arm and AMD
> checks the PMU name, while Intel and Power checks the specific events.
> It's hard to have a unified scnprintf.
>
> But we can abstract them into two cases, PMU name and event name. We use
> a different scnprintf to handle them.
> How about something as below?
>
> diff --git a/tools/perf/arch/x86/util/mem-events.c
> b/tools/perf/arch/x86/util/mem-events.c
> index 191b372f9a2d..4ef70fb9132b 100644
> --- a/tools/perf/arch/x86/util/mem-events.c
> +++ b/tools/perf/arch/x86/util/mem-events.c
> @@ -17,8 +17,8 @@ static char mem_stores_name[100];
> #define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
>
> static struct perf_mem_event
> perf_mem_events_intel[PERF_MEM_EVENTS__MAX] = {
> - E("ldlat-loads", "%s/mem-loads,ldlat=%u/P", "%s/events/mem-loads"),
> - E("ldlat-stores", "%s/mem-stores/P", "%s/events/mem-stores"),
> + E("ldlat-loads", "%s/mem-loads,ldlat=%u/P", "events/mem-loads"),
> + E("ldlat-stores", "%s/mem-stores/P", "events/mem-stores"),
> E(NULL, NULL, NULL),
> };
>
> diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
> index 3a2e3687878c..ba88cb3d804f 100644
> --- a/tools/perf/util/mem-events.c
> +++ b/tools/perf/util/mem-events.c
> @@ -8,6 +8,7 @@
> #include <unistd.h>
> #include <api/fs/fs.h>
> #include <linux/kernel.h>
> +#include <linux/string.h>
> #include "map_symbol.h"
> #include "mem-events.h"
> #include "debug.h"
> @@ -20,8 +21,8 @@ unsigned int perf_mem_events__loads_ldlat = 30;
> #define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
>
> static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
> - E("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "cpu/events/mem-loads"),
> - E("ldlat-stores", "cpu/mem-stores/P", "cpu/events/mem-stores"),
> + E("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "events/mem-loads"),
> + E("ldlat-stores", "cpu/mem-stores/P", "events/mem-stores"),
> E(NULL, NULL, NULL),
> };
> #undef E
> @@ -103,12 +104,14 @@ int perf_mem_events__parse(const char *str)
> static bool perf_mem_event__supported(const char *mnt, struct perf_pmu
> *pmu,
> struct perf_mem_event *e)
> {
> - char sysfs_name[100];
> char path[PATH_MAX];
> struct stat st;
>
> - scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, pmu->name);
> - scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, sysfs_name);
> + if (strstarts(e->sysfs_name, "event/"))
> + scnprintf(path, PATH_MAX, "%s/devices/%s/%s", mnt, pmu->name,
> e->sysfs_name);
> + else
> + scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, e->sysfs_name);
> +
> return !stat(path, &st);
> }

Thanks Kan, how about we move forward with the patch as is. I'm just
moaning as I think there is a longer term tech debt issue we should be
cleaning up. What I'm imagining in some crude hacking for just the
mem-events list case is:

```
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -149,17 +149,19 @@ int perf_mem_events__init(void)

void perf_mem_events__list(void)
{
- int j;
-
- for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
- struct perf_mem_event *e = perf_mem_events__ptr(j);
+ static const char *mem_event_names[PERF_MEM_EVENTS__MAX] = {
+ [PERF_MEM_EVENTS__LOAD] = "...load...",
+ [PERF_MEM_EVENTS__STORE] = "...store...",
+ [PERF_MEM_EVENTS__ACCESS] = "...access...",
+ };
+ while ((pmu = perf_pmus__scan(pmu)) != NULL) {
+ if (!perf_pmu__mem_events_supported(pmu))
+ continue;

- fprintf(stderr, "%-*s%-*s%s",
- e->tag ? 13 : 0,
- e->tag ? : "",
- e->tag && verbose > 0 ? 25 : 0,
- e->tag && verbose > 0 ?
perf_mem_events__name(j, NULL) : "",
- e->supported ? ": available\n" : "");
+ for (int i = 0; i < PERF_MEM_EVENTS__MAX; i++) {
+ if (perf_pmu__mem_event_supported(pmu, i))
+ printf("%s\n", mem_event_names[i]);
+ }
}
}

diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 3c9609944a2f..e7f4f7d3d082 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -1578,6 +1578,34 @@ int perf_pmu__find_event(struct perf_pmu *pmu,
const char *event, void *state, p
&args, find_event_callback);
}

+bool perf_pmu__mem_events_supported(const struct perf_pmu *pmu)
+{
+ if (pmu->is_core && is_intel()) {
+ return true;
+ }
+ if (strcmp(pmu->name, "ibs_op")) {
+ return true;
+ }
+ if (ARM...) {
+ }
+ return false;
+}
+
+bool perf_pmu__mem_event_supported(const struct perf_pmu *pmu, enum
mem_event_type type)
+{
+ if (pmu->is_core && is_intel()) {
+ switch (type) {
+ case PERF_MEM_EVENTS__LOAD:
+ return perf_pmu__have_event(pmu, "mem-loads");
+ case PERF_MEM_EVENTS__STORES:
+ return perf_pmu__have_event(pmu, "mem-stores");
+ default:
+ return false;
+ }
+ }
+ ...
+}
+
static void perf_pmu__del_formats(struct list_head *formats)
{
struct perf_pmu_format *fmt, *tmp;
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 424c3fee0949..414f0fbd77a8 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -26,6 +26,13 @@ enum {
#define PERF_PMU_FORMAT_BITS 64
#define MAX_PMU_NAME_LEN 128

+enum mem_event_type {
+ PERF_MEM_EVENTS__LOAD,
+ PERF_MEM_EVENTS__STORE,
+ PERF_MEM_EVENTS__ACCESS,
+ PERF_MEM_EVENTS__MAX,
+};
+
struct perf_event_attr;

struct perf_pmu_caps {
@@ -204,6 +211,8 @@ int perf_pmu__check_alias(struct perf_pmu *pmu,
struct parse_events_terms *head_
struct perf_pmu_info *info, bool *rewrote_terms,
struct parse_events_error *err);
int perf_pmu__find_event(struct perf_pmu *pmu, const char *event, void
*state, pmu_event_callback cb);
+bool perf_pmu__mem_events_supported(const struct perf_pmu *pmu);
+bool perf_pmu__mem_event_supported(const struct perf_pmu *pmu, enum
mem_event_types type);

int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd, bool eager_load);
void perf_pmu_format__set_value(void *format, int config, unsigned long *bits);
```

or maybe we can have some state in struct pmu and have the
perf_pmu__arch_init set that up. Like a bitmask of supported mem
events.

I'd kind of like the arch init perf pmu code to be as little as
possible. So if you did user space emulation of ARM on Intel (Intel
does this with houdini), then you could still read memory bandwidth
numbers from the Intel PMU as the logic isn't hidden and is generic.
Of course "cpu" is something of a special case PMU.

Anyway, if you don't feel like the refactor we can move forward with
this or a v2, let me know.

Thanks,
Ian

> Thanks,
> Kan
>
> > Thanks,
> > Ian
> >
> >> Thanks,
> >> Kan
> >>>
> >>> Thanks,
> >>> Ian
> >>>
> >>>> scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, sysfs_name);
> >>>> return !stat(path, &st);
> >>>> }
> >>>> @@ -120,7 +123,6 @@ int perf_mem_events__init(void)
> >>>>
> >>>> for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
> >>>> struct perf_mem_event *e = perf_mem_events__ptr(j);
> >>>> - char sysfs_name[100];
> >>>> struct perf_pmu *pmu = NULL;
> >>>>
> >>>> /*
> >>>> @@ -136,12 +138,12 @@ int perf_mem_events__init(void)
> >>>> * of core PMU.
> >>>> */
> >>>> while ((pmu = perf_pmus__scan(pmu)) != NULL) {
> >>>> - scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, pmu->name);
> >>>> - e->supported |= perf_mem_event__supported(mnt, sysfs_name);
> >>>> + e->supported |= perf_mem_event__supported(mnt, pmu, e);
> >>>> + if (e->supported) {
> >>>> + found = true;
> >>>> + break;
> >>>> + }
> >>>> }
> >>>> -
> >>>> - if (e->supported)
> >>>> - found = true;
> >>>> }
> >>>>
> >>>> return found ? 0 : -ENOENT;
> >>>> @@ -167,13 +169,10 @@ static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e,
> >>>> int idx)
> >>>> {
> >>>> const char *mnt = sysfs__mount();
> >>>> - char sysfs_name[100];
> >>>> struct perf_pmu *pmu = NULL;
> >>>>
> >>>> while ((pmu = perf_pmus__scan(pmu)) != NULL) {
> >>>> - scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name,
> >>>> - pmu->name);
> >>>> - if (!perf_mem_event__supported(mnt, sysfs_name)) {
> >>>> + if (!perf_mem_event__supported(mnt, pmu, e)) {
> >>>> pr_err("failed: event '%s' not supported\n",
> >>>> perf_mem_events__name(idx, pmu->name));
> >>>> }
> >>>> @@ -183,6 +182,7 @@ static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e,
> >>>> int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
> >>>> char **rec_tmp, int *tmp_nr)
> >>>> {
> >>>> + const char *mnt = sysfs__mount();
> >>>> int i = *argv_nr, k = 0;
> >>>> struct perf_mem_event *e;
> >>>>
> >>>> @@ -211,6 +211,9 @@ int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
> >>>> while ((pmu = perf_pmus__scan(pmu)) != NULL) {
> >>>> const char *s = perf_mem_events__name(j, pmu->name);
> >>>>
> >>>> + if (!perf_mem_event__supported(mnt, pmu, e))
> >>>> + continue;
> >>>> +
> >>>> rec_argv[i++] = "-e";
> >>>> if (s) {
> >>>> char *copy = strdup(s);
> >>>> --
> >>>> 2.35.1
> >>>>
> >