Re: [PATCH 6/8] perf stat,metrics: New metricgroup output for the default mode

From: Ian Rogers
Date: Tue Jun 13 2023 - 16:17:01 EST


On Wed, Jun 7, 2023 at 9:27 AM <kan.liang@xxxxxxxxxxxxxxx> wrote:
>
> From: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
>
> In the default mode, the current output of the metricgroup include both
> events and metrics, which is not necessary and just makes the output
> hard to read. Since different ARCHs (even different generations in the
> same ARCH) may use different events. The output also vary on different
> platforms.
>
> For a metricgroup, only outputting the value of each metric is good
> enough.
>
> Current perf may append different metric groups to the same leader
> event, or append the metrics from the same metricgroup to different
> events. That could bring confusion when perf only prints the
> metricgroup output mode. For example, print the same metricgroup name
> several times.
> Reorganize metricgroup for the default mode and make sure that
> a metricgroup can only be appended to one event.
> Sort the metricgroup for the default mode by the name of the
> metricgroup.
>
> Add a new field default_metricgroup in evsel to indicate an event of
> the default metricgroup. For those events, printout() should print
> the metricgroup name rather than events.
>
> Add print_metricgroup_header() to print out the metricgroup name in
> different output formats.
>
> On SPR
> Before:
>
> ./perf_old stat sleep 1
>
> Performance counter stats for 'sleep 1':
>
> 0.54 msec task-clock:u # 0.001 CPUs utilized
> 0 context-switches:u # 0.000 /sec
> 0 cpu-migrations:u # 0.000 /sec
> 68 page-faults:u # 125.445 K/sec
> 540,970 cycles:u # 0.998 GHz
> 556,325 instructions:u # 1.03 insn per cycle
> 123,602 branches:u # 228.018 M/sec
> 6,889 branch-misses:u # 5.57% of all branches
> 3,245,820 TOPDOWN.SLOTS:u # 18.4 % tma_backend_bound
> # 17.2 % tma_retiring
> # 23.1 % tma_bad_speculation
> # 41.4 % tma_frontend_bound
> 564,859 topdown-retiring:u
> 1,370,999 topdown-fe-bound:u
> 603,271 topdown-be-bound:u
> 744,874 topdown-bad-spec:u
> 12,661 INT_MISC.UOP_DROPPING:u # 23.357 M/sec
>
> 1.001798215 seconds time elapsed
>
> 0.000193000 seconds user
> 0.001700000 seconds sys
>
> After:
>
> $ ./perf stat sleep 1
>
> Performance counter stats for 'sleep 1':
>
> 0.51 msec task-clock:u # 0.001 CPUs utilized
> 0 context-switches:u # 0.000 /sec
> 0 cpu-migrations:u # 0.000 /sec
> 68 page-faults:u # 132.683 K/sec
> 545,228 cycles:u # 1.064 GHz
> 555,509 instructions:u # 1.02 insn per cycle
> 123,574 branches:u # 241.120 M/sec
> 6,957 branch-misses:u # 5.63% of all branches
> TopdownL1 # 17.5 % tma_backend_bound
> # 22.6 % tma_bad_speculation
> # 42.7 % tma_frontend_bound
> # 17.1 % tma_retiring
> TopdownL2 # 21.8 % tma_branch_mispredicts
> # 11.5 % tma_core_bound
> # 13.4 % tma_fetch_bandwidth
> # 29.3 % tma_fetch_latency
> # 2.7 % tma_heavy_operations
> # 14.5 % tma_light_operations
> # 0.8 % tma_machine_clears
> # 6.1 % tma_memory_bound
>
> 1.001712086 seconds time elapsed
>
> 0.000151000 seconds user
> 0.001618000 seconds sys
>
>
> Signed-off-by: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
> ---
> tools/perf/builtin-stat.c | 1 +
> tools/perf/util/evsel.h | 1 +
> tools/perf/util/metricgroup.c | 106 ++++++++++++++++++++++++++++++++-
> tools/perf/util/metricgroup.h | 1 +
> tools/perf/util/stat-display.c | 69 ++++++++++++++++++++-
> 5 files changed, 172 insertions(+), 6 deletions(-)
>
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index 2269b3e90e9b..b274cc264d56 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -2172,6 +2172,7 @@ static int add_default_attributes(void)
>
> evlist__for_each_entry(metric_evlist, metric_evsel) {
> metric_evsel->skippable = true;
> + metric_evsel->default_metricgroup = true;
> }
> evlist__splice_list_tail(evsel_list, &metric_evlist->core.entries);
> evlist__delete(metric_evlist);
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index 36a32e4ca168..61b1385108f4 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -130,6 +130,7 @@ struct evsel {
> bool reset_group;
> bool errored;
> bool needs_auxtrace_mmap;
> + bool default_metricgroup;

A comment would be useful here, something like:

If running perf stat, is this evsel a member of a Default metric group metric.

> struct hashmap *per_pkg_mask;
> int err;
> struct {
> diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
> index efafa02db5e5..22181ce4f27f 100644
> --- a/tools/perf/util/metricgroup.c
> +++ b/tools/perf/util/metricgroup.c
> @@ -79,6 +79,7 @@ static struct rb_node *metric_event_new(struct rblist *rblist __maybe_unused,
> return NULL;
> memcpy(me, entry, sizeof(struct metric_event));
> me->evsel = ((struct metric_event *)entry)->evsel;
> + me->default_metricgroup_name = NULL;
> INIT_LIST_HEAD(&me->head);
> return &me->nd;
> }
> @@ -1133,14 +1134,19 @@ static int metricgroup__add_metric_sys_event_iter(const struct pmu_metric *pm,
> /**
> * metric_list_cmp - list_sort comparator that sorts metrics with more events to
> * the front. tool events are excluded from the count.
> + * For the default metrics, sort them by metricgroup name.
> */
> -static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l,
> +static int metric_list_cmp(void *priv, const struct list_head *l,
> const struct list_head *r)
> {
> const struct metric *left = container_of(l, struct metric, nd);
> const struct metric *right = container_of(r, struct metric, nd);
> struct expr_id_data *data;
> int i, left_count, right_count;
> + bool is_default = *(bool *)priv;
> +
> + if (is_default && left->default_metricgroup_name && right->default_metricgroup_name)
> + return strcmp(left->default_metricgroup_name, right->default_metricgroup_name);

This breaks the comment above. The events are now sorted prioritizing
default metric group names. This potentially will have an effect of
reducing sharing of events between groups, it will also break
assumptions within that code that there are always the same number of
fewer events in a metric as you process the list. To remedy this I
think you need to re-sort the metrics after the event sharing has had
a chance to share events between groups.


>
> left_count = hashmap__size(left->pctx->ids);
> perf_tool_event__for_each_event(i) {
> @@ -1497,6 +1503,91 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
> return ret;
> }
>
> +static struct metric_event *
> +metricgroup__lookup_default_metricgroup(struct rblist *metric_events,
> + struct evsel *evsel,
> + struct metric *m)
> +{
> + struct metric_event *me;
> + char *name;
> + int err;
> +
> + me = metricgroup__lookup(metric_events, evsel, true);
> + if (!me->default_metricgroup_name) {
> + if (m->pmu && strcmp(m->pmu, "cpu"))
> + err = asprintf(&name, "%s (%s)", m->default_metricgroup_name, m->pmu);
> + else
> + err = asprintf(&name, "%s", m->default_metricgroup_name);
> + if (err < 0)
> + return NULL;
> + me->default_metricgroup_name = name;
> + }
> + if (!strncmp(m->default_metricgroup_name,
> + me->default_metricgroup_name,
> + strlen(m->default_metricgroup_name)))
> + return me;
> +
> + return NULL;
> +}

A function comment would be useful as the name is confusing, why
lookup? Doesn't it create the value? Leak sanitizer isn't happy here:

```
==1545918==ERROR: LeakSanitizer: detected memory leaks

Direct leak of 10 byte(s) in 1 object(s) allocated from:
#0 0x7f2755a7077b in __interceptor_strdup
../../../../src/libsanitizer/asan/asan_interceptors.cpp:439
#1 0x564986a8df31 in asprintf util/util.c:566
#2 0x5649869b5901 in metricgroup__lookup_default_metricgroup
util/metricgroup.c:1520
#3 0x5649869b5e57 in metricgroup__lookup_create util/metricgroup.c:1579
#4 0x5649869b6ddc in parse_groups util/metricgroup.c:1698
#5 0x5649869b7714 in metricgroup__parse_groups util/metricgroup.c:1771
#6 0x5649867da9d5 in add_default_attributes tools/perf/builtin-stat.c:2164
#7 0x5649867ddbfb in cmd_stat tools/perf/builtin-stat.c:2707
#8 0x5649868fa5a2 in run_builtin tools/perf/perf.c:323
#9 0x5649868fab13 in handle_internal_command tools/perf/perf.c:377
#10 0x5649868faedb in run_argv tools/perf/perf.c:421
#11 0x5649868fb443 in main tools/perf/perf.c:537
#12 0x7f2754846189 in __libc_start_call_main
../sysdeps/nptl/libc_start_call_main.h:58
```

> +static struct metric_event *
> +metricgroup__lookup_create(struct rblist *metric_events,
> + struct evsel **evsel,
> + struct list_head *metric_list,
> + struct metric *m,
> + bool is_default)
> +{
> + struct metric_event *me;
> + struct metric *cur;
> + struct evsel *ev;
> + size_t i;
> +
> + if (!is_default)
> + return metricgroup__lookup(metric_events, evsel[0], true);
> +
> + /*
> + * If the metric group has been attached to a previous
> + * event/metric, use that metric event.
> + */
> + list_for_each_entry(cur, metric_list, nd) {
> + if (cur == m)
> + break;
> + if (cur->pmu && strcmp(m->pmu, cur->pmu))
> + continue;
> + if (strncmp(m->default_metricgroup_name,
> + cur->default_metricgroup_name,
> + strlen(m->default_metricgroup_name)))
> + continue;
> + if (!cur->evlist)
> + continue;
> + evlist__for_each_entry(cur->evlist, ev) {
> + me = metricgroup__lookup(metric_events, ev, false);
> + if (!strncmp(m->default_metricgroup_name,
> + me->default_metricgroup_name,
> + strlen(m->default_metricgroup_name)))
> + return me;
> + }
> + }
> +
> + /*
> + * Different metric groups may append to the same leader event.
> + * For example, TopdownL1 and TopdownL2 are appended to the
> + * TOPDOWN.SLOTS event.
> + * Split it and append the new metric group to the next available
> + * event.
> + */
> + me = metricgroup__lookup_default_metricgroup(metric_events, evsel[0], m);
> + if (me)
> + return me;
> +
> + for (i = 1; i < hashmap__size(m->pctx->ids); i++) {
> + me = metricgroup__lookup_default_metricgroup(metric_events, evsel[i], m);
> + if (me)
> + return me;
> + }
> + return NULL;
> +}
> +

I have a hard time understanding this function, does it just go away
if you do the two sorts that I proposed above? Should this be
metric_event__lookup_create? A function comment saying what the code
is trying to achieve would be useful.

This appears to be trying to correct output issues by changing how
metrics are associated with events, shouldn't output issues be
resolved by fixing the output code? If not, why don't we apply this
logic to TopdownL1, why just Default?

> static int parse_groups(struct evlist *perf_evlist,
> const char *pmu, const char *str,
> bool metric_no_group,
> @@ -1512,6 +1603,7 @@ static int parse_groups(struct evlist *perf_evlist,
> LIST_HEAD(metric_list);
> struct metric *m;
> bool tool_events[PERF_TOOL_MAX] = {false};
> + bool is_default = !strcmp(str, "Default");
> int ret;
>
> if (metric_events_list->nr_entries == 0)
> @@ -1523,7 +1615,7 @@ static int parse_groups(struct evlist *perf_evlist,
> goto out;
>
> /* Sort metrics from largest to smallest. */
> - list_sort(NULL, &metric_list, metric_list_cmp);
> + list_sort((void *)&is_default, &metric_list, metric_list_cmp);
>
> if (!metric_no_merge) {
> struct expr_parse_ctx *combined = NULL;
> @@ -1603,7 +1695,15 @@ static int parse_groups(struct evlist *perf_evlist,
> goto out;
> }
>
> - me = metricgroup__lookup(metric_events_list, metric_events[0], true);
> + me = metricgroup__lookup_create(metric_events_list,
> + metric_events,
> + &metric_list, m,
> + is_default);
> + if (!me) {
> + pr_err("Cannot create metric group for default!\n");
> + ret = -EINVAL;
> + goto out;
> + }
>
> expr = malloc(sizeof(struct metric_expr));
> if (!expr) {
> diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
> index bf18274c15df..e3609b853213 100644
> --- a/tools/perf/util/metricgroup.h
> +++ b/tools/perf/util/metricgroup.h
> @@ -22,6 +22,7 @@ struct cgroup;
> struct metric_event {
> struct rb_node nd;
> struct evsel *evsel;
> + char *default_metricgroup_name;
> struct list_head head; /* list of metric_expr */
> };
>
> diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
> index a2bbdc25d979..efe5fd04c033 100644
> --- a/tools/perf/util/stat-display.c
> +++ b/tools/perf/util/stat-display.c
> @@ -21,10 +21,12 @@
> #include "iostat.h"
> #include "pmu.h"
> #include "pmus.h"
> +#include "metricgroup.h"

This is bringing metric code from stat-shadow, which is kind of the
whole reason there is a separation and stat-shadow exists. Should the
logic exist in stat-shadow instead?

>
> #define CNTR_NOT_SUPPORTED "<not supported>"
> #define CNTR_NOT_COUNTED "<not counted>"
>
> +#define MGROUP_LEN 50
> #define METRIC_LEN 38
> #define EVNAME_LEN 32
> #define COUNTS_LEN 18
> @@ -707,6 +709,55 @@ static bool evlist__has_hybrid(struct evlist *evlist)
> return false;
> }
>
> +static void print_metricgroup_header_json(struct perf_stat_config *config,
> + struct outstate *os __maybe_unused,
> + const char *metricgroup_name)
> +{
> + fprintf(config->output, "\"metricgroup\" : \"%s\"}", metricgroup_name);
> + new_line_json(config, (void *)os);
> +}
> +

Should the output part of this patch be separate from the
evsel/evlist/meitric modifications?

Thanks,
Ian

> +static void print_metricgroup_header_csv(struct perf_stat_config *config,
> + struct outstate *os,
> + const char *metricgroup_name)
> +{
> + int i;
> +
> + for (i = 0; i < os->nfields; i++)
> + fputs(config->csv_sep, os->fh);
> + fprintf(config->output, "%s", metricgroup_name);
> + new_line_csv(config, (void *)os);
> +}
> +
> +static void print_metricgroup_header_std(struct perf_stat_config *config,
> + struct outstate *os __maybe_unused,
> + const char *metricgroup_name)
> +{
> + int n = fprintf(config->output, " %*s", EVNAME_LEN, metricgroup_name);
> +
> + fprintf(config->output, "%*s", MGROUP_LEN - n - 1, "");
> +}
> +
> +static void print_metricgroup_header(struct perf_stat_config *config,
> + struct outstate *os,
> + struct evsel *counter,
> + double noise, u64 run, u64 ena,
> + const char *metricgroup_name)
> +{
> + aggr_printout(config, os->evsel, os->id, os->aggr_nr);
> +
> + print_noise(config, counter, noise, /*before_metric=*/true);
> + print_running(config, run, ena, /*before_metric=*/true);
> +
> + if (config->json_output) {
> + print_metricgroup_header_json(config, os, metricgroup_name);
> + } else if (config->csv_output) {
> + print_metricgroup_header_csv(config, os, metricgroup_name);
> + } else
> + print_metricgroup_header_std(config, os, metricgroup_name);
> +
> +}
> +
> static void printout(struct perf_stat_config *config, struct outstate *os,
> double uval, u64 run, u64 ena, double noise, int aggr_idx)
> {
> @@ -751,10 +802,17 @@ static void printout(struct perf_stat_config *config, struct outstate *os,
> out.force_header = false;
>
> if (!config->metric_only) {
> - abs_printout(config, os->id, os->aggr_nr, counter, uval, ok);
> + if (counter->default_metricgroup) {
> + struct metric_event *me;
>
> - print_noise(config, counter, noise, /*before_metric=*/true);
> - print_running(config, run, ena, /*before_metric=*/true);
> + me = metricgroup__lookup(&config->metric_events, counter, false);
> + print_metricgroup_header(config, os, counter, noise, run, ena,
> + me->default_metricgroup_name);
> + } else {
> + abs_printout(config, os->id, os->aggr_nr, counter, uval, ok);
> + print_noise(config, counter, noise, /*before_metric=*/true);
> + print_running(config, run, ena, /*before_metric=*/true);
> + }
> }
>
> if (ok) {
> @@ -883,6 +941,11 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
> if (counter->merged_stat)
> return;
>
> + /* Only print the metric group for the default mode */
> + if (counter->default_metricgroup &&
> + !metricgroup__lookup(&config->metric_events, counter, false))
> + return;
> +
> uniquify_counter(config, counter);
>
> val = aggr->counts.val;
> --
> 2.35.1
>