Re: [PATCH v1] perf pmus: Make PMU alias name loading lazy

From: Ian Rogers
Date: Thu Sep 28 2023 - 01:19:22 EST


On Wed, Sep 27, 2023 at 10:00 PM Namhyung Kim <namhyung@xxxxxxxxxx> wrote:
>
> Hi Ian,
>
> On Sun, Sep 24, 2023 at 11:24 PM Ian Rogers <irogers@xxxxxxxxxx> wrote:
> >
> > PMU alias names were computed when the first perf_pmu is created,
> > scanning all PMUs in event sources for a file called alias that
> > generally doesn't exist. Switch to trying to load the file when all
> > PMU related files are loaded in lookup. This would cause a PMU name
> > lookup of an alias name to fail if no PMUs were loaded, so in that
> > case all PMUs are loaded and the find repeated. The overhead is
> > similar but in the (very) general case not all PMUs are scanned for
> > the alias file.
> >
> > As the overhead occurs once per invocation it doesn't show in perf
> > bench internals pmu-scan. On a tigerlake machine, the number of openat
> > system calls for an event of cpu/cycles/ with perf stat reduces from
> > 94 to 69 (ie 25 fewer openat calls).
>
> I think the pmu-scan bench could show the difference as it
> calls perf_pmu__destroy() in the loop body. So every call to
> perf_pmu__scan() should start from nothing, right?

The PMU alias name list was funny. It is/was maintained in the x86
specific PMU code and the destroy didn't clear the list. This change
adds an openat to loading a PMU for the alias, so pmu-scan shows a
very small slow down. However, in the more normal cases we're reducing
the number of openats by 25%.

Thanks,
Ian

> >
> > Signed-off-by: Ian Rogers <irogers@xxxxxxxxxx>
>
> Maybe we can load event aliases and formats lazily later.
> Anyway, it looks good to me.
>
> Acked-by: Namhyung Kim <namhyung@xxxxxxxxxx>
>
> Thanks,
> Namhyung
>
>
> > ---
> > tools/perf/arch/x86/util/pmu.c | 139 ---------------------------------
> > tools/perf/util/pmu.c | 39 ++++-----
> > tools/perf/util/pmu.h | 2 -
> > tools/perf/util/pmus.c | 10 +++
> > 4 files changed, 31 insertions(+), 159 deletions(-)
> >
> > diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
> > index f428cffb0378..8b53ca468a50 100644
> > --- a/tools/perf/arch/x86/util/pmu.c
> > +++ b/tools/perf/arch/x86/util/pmu.c
> > @@ -17,15 +17,6 @@
> > #include "../../../util/pmus.h"
> > #include "env.h"
> >
> > -struct pmu_alias {
> > - char *name;
> > - char *alias;
> > - struct list_head list;
> > -};
> > -
> > -static LIST_HEAD(pmu_alias_name_list);
> > -static bool cached_list;
> > -
> > struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
> > {
> > #ifdef HAVE_AUXTRACE_SUPPORT
> > @@ -41,136 +32,6 @@ struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __mayb
> > return NULL;
> > }
> >
> > -static void pmu_alias__delete(struct pmu_alias *pmu_alias)
> > -{
> > - if (!pmu_alias)
> > - return;
> > -
> > - zfree(&pmu_alias->name);
> > - zfree(&pmu_alias->alias);
> > - free(pmu_alias);
> > -}
> > -
> > -static struct pmu_alias *pmu_alias__new(char *name, char *alias)
> > -{
> > - struct pmu_alias *pmu_alias = zalloc(sizeof(*pmu_alias));
> > -
> > - if (pmu_alias) {
> > - pmu_alias->name = strdup(name);
> > - if (!pmu_alias->name)
> > - goto out_delete;
> > -
> > - pmu_alias->alias = strdup(alias);
> > - if (!pmu_alias->alias)
> > - goto out_delete;
> > - }
> > - return pmu_alias;
> > -
> > -out_delete:
> > - pmu_alias__delete(pmu_alias);
> > - return NULL;
> > -}
> > -
> > -static int setup_pmu_alias_list(void)
> > -{
> > - int fd, dirfd;
> > - DIR *dir;
> > - struct dirent *dent;
> > - struct pmu_alias *pmu_alias;
> > - char buf[MAX_PMU_NAME_LEN];
> > - FILE *file;
> > - int ret = -ENOMEM;
> > -
> > - dirfd = perf_pmu__event_source_devices_fd();
> > - if (dirfd < 0)
> > - return -1;
> > -
> > - dir = fdopendir(dirfd);
> > - if (!dir)
> > - return -errno;
> > -
> > - while ((dent = readdir(dir))) {
> > - if (!strcmp(dent->d_name, ".") ||
> > - !strcmp(dent->d_name, ".."))
> > - continue;
> > -
> > - fd = perf_pmu__pathname_fd(dirfd, dent->d_name, "alias", O_RDONLY);
> > - if (fd < 0)
> > - continue;
> > -
> > - file = fdopen(fd, "r");
> > - if (!file)
> > - continue;
> > -
> > - if (!fgets(buf, sizeof(buf), file)) {
> > - fclose(file);
> > - continue;
> > - }
> > -
> > - fclose(file);
> > -
> > - /* Remove the last '\n' */
> > - buf[strlen(buf) - 1] = 0;
> > -
> > - pmu_alias = pmu_alias__new(dent->d_name, buf);
> > - if (!pmu_alias)
> > - goto close_dir;
> > -
> > - list_add_tail(&pmu_alias->list, &pmu_alias_name_list);
> > - }
> > -
> > - ret = 0;
> > -
> > -close_dir:
> > - closedir(dir);
> > - return ret;
> > -}
> > -
> > -static const char *__pmu_find_real_name(const char *name)
> > -{
> > - struct pmu_alias *pmu_alias;
> > -
> > - list_for_each_entry(pmu_alias, &pmu_alias_name_list, list) {
> > - if (!strcmp(name, pmu_alias->alias))
> > - return pmu_alias->name;
> > - }
> > -
> > - return name;
> > -}
> > -
> > -const char *pmu_find_real_name(const char *name)
> > -{
> > - if (cached_list)
> > - return __pmu_find_real_name(name);
> > -
> > - setup_pmu_alias_list();
> > - cached_list = true;
> > -
> > - return __pmu_find_real_name(name);
> > -}
> > -
> > -static const char *__pmu_find_alias_name(const char *name)
> > -{
> > - struct pmu_alias *pmu_alias;
> > -
> > - list_for_each_entry(pmu_alias, &pmu_alias_name_list, list) {
> > - if (!strcmp(name, pmu_alias->name))
> > - return pmu_alias->alias;
> > - }
> > - return NULL;
> > -}
> > -
> > -const char *pmu_find_alias_name(const char *name)
> > -{
> > - if (cached_list)
> > - return __pmu_find_alias_name(name);
> > -
> > - setup_pmu_alias_list();
> > - cached_list = true;
> > -
> > - return __pmu_find_alias_name(name);
> > -}
> > -
> > int perf_pmus__num_mem_pmus(void)
> > {
> > /* AMD uses IBS OP pmu and not a core PMU for perf mem/c2c */
> > diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
> > index 0d81c059c91c..0f5c6ed257a8 100644
> > --- a/tools/perf/util/pmu.c
> > +++ b/tools/perf/util/pmu.c
> > @@ -937,16 +937,27 @@ perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
> > return NULL;
> > }
> >
> > -const char * __weak
> > -pmu_find_real_name(const char *name)
> > +static char *pmu_find_alias_name(struct perf_pmu *pmu, int dirfd)
> > {
> > - return name;
> > -}
> > + FILE *file = perf_pmu__open_file_at(pmu, dirfd, "alias");
> > + char *line = NULL;
> > + size_t line_len = 0;
> > + ssize_t ret;
> >
> > -const char * __weak
> > -pmu_find_alias_name(const char *name __maybe_unused)
> > -{
> > - return NULL;
> > + if (!file)
> > + return NULL;
> > +
> > + ret = getline(&line, &line_len, file);
> > + if (ret < 0) {
> > + fclose(file);
> > + return NULL;
> > + }
> > + /* Remove trailing newline. */
> > + if (ret > 0 && line[ret - 1] == '\n')
> > + line[--ret] = '\0';
> > +
> > + fclose(file);
> > + return line;
> > }
> >
> > static int pmu_max_precise(int dirfd, struct perf_pmu *pmu)
> > @@ -957,12 +968,10 @@ static int pmu_max_precise(int dirfd, struct perf_pmu *pmu)
> > return max_precise;
> > }
> >
> > -struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *lookup_name)
> > +struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *name)
> > {
> > struct perf_pmu *pmu;
> > __u32 type;
> > - const char *name = pmu_find_real_name(lookup_name);
> > - const char *alias_name;
> >
> > pmu = zalloc(sizeof(*pmu));
> > if (!pmu)
> > @@ -995,18 +1004,12 @@ struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char
> > pmu->is_core = is_pmu_core(name);
> > pmu->cpus = pmu_cpumask(dirfd, name, pmu->is_core);
> >
> > - alias_name = pmu_find_alias_name(name);
> > - if (alias_name) {
> > - pmu->alias_name = strdup(alias_name);
> > - if (!pmu->alias_name)
> > - goto err;
> > - }
> > -
> > pmu->type = type;
> > pmu->is_uncore = pmu_is_uncore(dirfd, name);
> > if (pmu->is_uncore)
> > pmu->id = pmu_id(name);
> > pmu->max_precise = pmu_max_precise(dirfd, pmu);
> > + pmu->alias_name = pmu_find_alias_name(pmu, dirfd);
> > pmu->events_table = perf_pmu__find_events_table(pmu);
> > pmu_add_sys_aliases(pmu);
> > list_add_tail(&pmu->list, pmus);
> > diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
> > index 04b317b17d66..bc807729a7cd 100644
> > --- a/tools/perf/util/pmu.h
> > +++ b/tools/perf/util/pmu.h
> > @@ -251,8 +251,6 @@ void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu);
> >
> > int perf_pmu__match(const char *pattern, const char *name, const char *tok);
> >
> > -const char *pmu_find_real_name(const char *name);
> > -const char *pmu_find_alias_name(const char *name);
> > double perf_pmu__cpu_slots_per_cycle(void);
> > int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size);
> > int perf_pmu__pathname_scnprintf(char *buf, size_t size,
> > diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
> > index 64e798e68a2d..ce4931461741 100644
> > --- a/tools/perf/util/pmus.c
> > +++ b/tools/perf/util/pmus.c
> > @@ -37,6 +37,8 @@ static LIST_HEAD(other_pmus);
> > static bool read_sysfs_core_pmus;
> > static bool read_sysfs_all_pmus;
> >
> > +static void pmu_read_sysfs(bool core_only);
> > +
> > int pmu_name_len_no_suffix(const char *str, unsigned long *num)
> > {
> > int orig_len, len;
> > @@ -124,6 +126,14 @@ struct perf_pmu *perf_pmus__find(const char *name)
> > pmu = perf_pmu__lookup(core_pmu ? &core_pmus : &other_pmus, dirfd, name);
> > close(dirfd);
> >
> > + if (!pmu) {
> > + /*
> > + * Looking up an inidividual PMU failed. This may mean name is
> > + * an alias, so read the PMUs from sysfs and try to find again.
> > + */
> > + pmu_read_sysfs(core_pmu);
> > + pmu = pmu_find(name);
> > + }
> > return pmu;
> > }
> >
> > --
> > 2.42.0.515.g380fc7ccd1-goog
> >