Re: [REGRESSION] Perf (userspace) broken on big.LITTLE systems since v6.5

From: Ian Rogers
Date: Wed Nov 22 2023 - 23:33:25 EST


On Wed, Nov 22, 2023 at 8:59 AM Ian Rogers <irogers@xxxxxxxxxx> wrote:
>
> On Wed, Nov 22, 2023 at 8:55 AM Arnaldo Carvalho de Melo
> <acme@xxxxxxxxxx> wrote:
> >
> > Em Wed, Nov 22, 2023 at 08:29:58AM -0800, Ian Rogers escreveu:
> > > I can look at doing an event parser change like:
> > >
> > > ```
> > > diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
> > > index aa2f5c6fc7fc..9a18fda525d2 100644
> > > --- a/tools/perf/util/parse-events.c
> > > +++ b/tools/perf/util/parse-events.c
> > > @@ -986,7 +986,8 @@ static int config_term_pmu(struct perf_event_attr *attr,
> > > err_str,
> > > /*help=*/NULL);
> > > return -EINVAL;
> > > }
> > > - if (perf_pmu__supports_legacy_cache(pmu)) {
> > > + if (perf_pmu__supports_legacy_cache(pmu) &&
> > > + !perf_pmu__have_event(pmu, term->val.str)) {
> > > attr->type = PERF_TYPE_HW_CACHE;
> > > return
> > > parse_events__decode_legacy_cache(term->config, pmu->type,
> > > &attr->config);
> > > @@ -1004,10 +1005,15 @@ static int config_term_pmu(struct perf_event_attr *attr,
> > > err_str,
> > > /*help=*/NULL);
> > > return -EINVAL;
> > > }
> > > - attr->type = PERF_TYPE_HARDWARE;
> > > - attr->config = term->val.num;
> > > - if (perf_pmus__supports_extended_type())
> > > - attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT;
> > > + if (perf_pmu__have_event(pmu, term->val.str)) {
> > > + /* If the PMU has a sysfs or json event prefer
> > > it over legacy. ARM requires this. */
> > > + term->term_type = PARSE_EVENTS__TERM_TYPE_USER;
> > > + } else {
> > > + attr->type = PERF_TYPE_HARDWARE;
> > > + attr->config = term->val.num;
> > > + if (perf_pmus__supports_extended_type())
> > > + attr->config |= (__u64)pmu->type <<
> > > PERF_PMU_TYPE_SHIFT;
> > > + }
> > > return 0;
> > > }
> > > if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER ||
> > > ```
> > > (note: this is incomplete as term->val.str isn't populated for
> > > PARSE_EVENTS__TERM_TYPE_HARDWARE)
> >
> > Yeah, I had to apply manually as your MUA mangled it, then it didn't
> > build, had to remove some consts, then there was a struct member
> > mistake, after all fixed I get to the patch below, but it now segfaults,
> > probably what you mention...
> >
> > root@roc-rk3399-pc:~# strace -e perf_event_open taskset -c 4,5 perf stat -v -e cycles,armv8_cortex_a53/cycles/,armv8_cortex_a72/cycles/ echo
> > Using CPUID 0x00000000410fd082
> > perf_event_open({type=PERF_TYPE_HARDWARE, size=0 /* PERF_ATTR_SIZE_??? */, config=0x7<<32|PERF_COUNT_HW_CPU_CYCLES, sample_period=0, sample_type=0, read_format=0, disabled=1, precise_ip=0 /* arbitrary skid */, ...}, 0, -1, -1, PERF_FLAG_FD_CLOEXEC) = -1 ENOENT (No such file or directory)
> > --- SIGSEGV {si_signo=SIGSEGV, si_code=SEGV_MAPERR, si_addr=NULL} ---
> > +++ killed by SIGSEGV +++
> > Segmentation fault
> > root@roc-rk3399-pc:~#
>
> Right, I have something further along that fails tests. I'll try to
> send out an RFC today, but given the Intel behavior change ¯\_(ツ)_/¯
> But Intel don't appear to have an issue having two things called, for
> example, cycles and them both being a cycles event so they may not
> care. It is only ARM's PMUs that appear broken in this way.

To workaround the PMU bug posted:
https://lore.kernel.org/lkml/20231123042922.834425-1-irogers@xxxxxxxxxx/

Thanks,
Ian

> Thanks,
> Ian
>
> > - Arnaldo
> >
> > diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
> > index aa2f5c6fc7fc..1e648454cc49 100644
> > --- a/tools/perf/util/parse-events.c
> > +++ b/tools/perf/util/parse-events.c
> > @@ -976,7 +976,7 @@ static int config_term_pmu(struct perf_event_attr *attr,
> > struct parse_events_error *err)
> > {
> > if (term->type_term == PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE) {
> > - const struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type);
> > + struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type);
> >
> > if (!pmu) {
> > char *err_str;
> > @@ -986,7 +986,8 @@ static int config_term_pmu(struct perf_event_attr *attr,
> > err_str, /*help=*/NULL);
> > return -EINVAL;
> > }
> > - if (perf_pmu__supports_legacy_cache(pmu)) {
> > + if (perf_pmu__supports_legacy_cache(pmu) &&
> > + !perf_pmu__have_event(pmu, term->val.str)) {
> > attr->type = PERF_TYPE_HW_CACHE;
> > return parse_events__decode_legacy_cache(term->config, pmu->type,
> > &attr->config);
> > @@ -994,7 +995,7 @@ static int config_term_pmu(struct perf_event_attr *attr,
> > term->type_term = PARSE_EVENTS__TERM_TYPE_USER;
> > }
> > if (term->type_term == PARSE_EVENTS__TERM_TYPE_HARDWARE) {
> > - const struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type);
> > + struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type);
> >
> > if (!pmu) {
> > char *err_str;
> > @@ -1004,10 +1005,15 @@ static int config_term_pmu(struct perf_event_attr *attr,
> > err_str, /*help=*/NULL);
> > return -EINVAL;
> > }
> > - attr->type = PERF_TYPE_HARDWARE;
> > - attr->config = term->val.num;
> > - if (perf_pmus__supports_extended_type())
> > - attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT;
> > + if (perf_pmu__have_event(pmu, term->val.str)) {
> > + /* If the PMU has a sysfs or JSON event prefer it over legacy. ARM requires this. */
> > + term->type_term = PARSE_EVENTS__TERM_TYPE_USER;
> > + } else {
> > + attr->type = PERF_TYPE_HARDWARE;
> > + attr->config = term->val.num;
> > + if (perf_pmus__supports_extended_type())
> > + attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT;
> > + }
> > return 0;
> > }
> > if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER ||