[PATCH] perf stat: Take cgroups into account for shadow stats

From: Namhyung Kim
Date: Fri Nov 13 2020 - 21:37:14 EST


As of now it doesn't consider cgroups when collecting shadow stats and
metrics so counter values from different cgroups will be saved in a
same slot. This resulted in an incorrect numbers when those cgroups
have different workloads.

For example, let's look at the below - the cgroup A and C runs same
workload which burns a cpu while cgroup B runs a light workload.

$ perf stat -a -e cycles,instructions --for-each-cgroup A,B,C sleep 1

Performance counter stats for 'system wide':

3,958,116,522 cycles A
6,722,650,929 instructions A # 2.53 insn per cycle
1,132,741 cycles B
571,743 instructions B # 0.00 insn per cycle
4,007,799,935 cycles C
6,793,181,523 instructions C # 2.56 insn per cycle

1.001050869 seconds time elapsed

When I run perf stat with single workload, it usually shows IPC around 1.7.
We can verify it (6,722,650,929.0 / 3,958,116,522 = 1.698) for cgroup A.

But in this case, since cgroups are ignored, cycles are averaged so it
used the lower value for IPC calculation and resulted in around 2.5.

avg cycle: (3958116522 + 1132741 + 4007799935) / 3 = 2655683066
IPC (A) : 6722650929 / 2655683066 = 2.531
IPC (B) : 571743 / 2655683066 = 0.0002
IPC (C) : 6793181523 / 2655683066 = 2.557

We can simply compare cgroup pointers in the evsel and it'll be NULL
when cgroups are not specified. With this patch, I can see correct
numbers like below:

$ perf stat -a -e cycles,instructions --for-each-cgroup A,B,C sleep 1

Performance counter stats for 'system wide':

4,171,051,687 cycles A
7,219,793,922 instructions A # 1.73 insn per cycle
1,051,189 cycles B
583,102 instructions B # 0.55 insn per cycle
4,171,124,710 cycles C
7,192,944,580 instructions C # 1.72 insn per cycle

1.007909814 seconds time elapsed

Signed-off-by: Namhyung Kim <namhyung@xxxxxxxxxx>
---
tools/perf/util/stat-shadow.c | 243 ++++++++++++++++++----------------
1 file changed, 132 insertions(+), 111 deletions(-)

diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 901265127e36..10d0f5a0fd4a 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -8,6 +8,7 @@
#include "evlist.h"
#include "expr.h"
#include "metricgroup.h"
+#include "cgroup.h"
#include <linux/zalloc.h>

/*
@@ -28,6 +29,7 @@ struct saved_value {
enum stat_type type;
int ctx;
int cpu;
+ struct cgroup *cgrp;
struct runtime_stat *stat;
struct stats stats;
u64 metric_total;
@@ -57,6 +59,9 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
if (a->ctx != b->ctx)
return a->ctx - b->ctx;

+ if (a->cgrp != b->cgrp)
+ return (char *)a->cgrp < (char *)b->cgrp ? -1 : +1;
+
if (a->evsel == NULL && b->evsel == NULL) {
if (a->stat == b->stat)
return 0;
@@ -100,7 +105,8 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel,
bool create,
enum stat_type type,
int ctx,
- struct runtime_stat *st)
+ struct runtime_stat *st,
+ struct cgroup *cgrp)
{
struct rblist *rblist;
struct rb_node *nd;
@@ -110,6 +116,7 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel,
.type = type,
.ctx = ctx,
.stat = st,
+ .cgrp = cgrp,
};

rblist = &st->value_list;
@@ -193,10 +200,11 @@ void perf_stat__reset_shadow_per_stat(struct runtime_stat *st)

static void update_runtime_stat(struct runtime_stat *st,
enum stat_type type,
- int ctx, int cpu, u64 count)
+ int ctx, int cpu, u64 count,
+ struct cgroup *cgrp)
{
struct saved_value *v = saved_value_lookup(NULL, cpu, true,
- type, ctx, st);
+ type, ctx, st, cgrp);

if (v)
update_stats(&v->stats, count);
@@ -212,80 +220,81 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
{
int ctx = evsel_context(counter);
u64 count_ns = count;
+ struct cgroup *cgrp = counter->cgrp;
struct saved_value *v;

count *= counter->scale;

if (evsel__is_clock(counter))
- update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns);
+ update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns, cgrp);
else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
- update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count);
+ update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count, cgrp);
else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
- update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count);
+ update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count, cgrp);
else if (perf_stat_evsel__is(counter, TRANSACTION_START))
- update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count);
+ update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count, cgrp);
else if (perf_stat_evsel__is(counter, ELISION_START))
- update_runtime_stat(st, STAT_ELISION, ctx, cpu, count);
+ update_runtime_stat(st, STAT_ELISION, ctx, cpu, count, cgrp);
else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS,
- ctx, cpu, count);
+ ctx, cpu, count, cgrp);
else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED,
- ctx, cpu, count);
+ ctx, cpu, count, cgrp);
else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED,
- ctx, cpu, count);
+ ctx, cpu, count, cgrp);
else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES,
- ctx, cpu, count);
+ ctx, cpu, count, cgrp);
else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
- ctx, cpu, count);
+ ctx, cpu, count, cgrp);
else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING))
update_runtime_stat(st, STAT_TOPDOWN_RETIRING,
- ctx, cpu, count);
+ ctx, cpu, count, cgrp);
else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC))
update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC,
- ctx, cpu, count);
+ ctx, cpu, count, cgrp);
else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND))
update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND,
- ctx, cpu, count);
+ ctx, cpu, count, cgrp);
else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND))
update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND,
- ctx, cpu, count);
+ ctx, cpu, count, cgrp);
else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
- ctx, cpu, count);
+ ctx, cpu, count, cgrp);
else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
update_runtime_stat(st, STAT_STALLED_CYCLES_BACK,
- ctx, cpu, count);
+ ctx, cpu, count, cgrp);
else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
- update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count);
+ update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count, cgrp);
else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
- update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count);
+ update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count, cgrp);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
- update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count);
+ update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count, cgrp);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
- update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count);
+ update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count, cgrp);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL))
- update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count);
+ update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count, cgrp);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
- update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count);
+ update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count, cgrp);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
- update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count);
+ update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count, cgrp);
else if (perf_stat_evsel__is(counter, SMI_NUM))
- update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count);
+ update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count, cgrp);
else if (perf_stat_evsel__is(counter, APERF))
- update_runtime_stat(st, STAT_APERF, ctx, cpu, count);
+ update_runtime_stat(st, STAT_APERF, ctx, cpu, count, cgrp);

if (counter->collect_stat) {
- v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st);
+ v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st, cgrp);
update_stats(&v->stats, count);
if (counter->metric_leader)
v->metric_total += count;
} else if (counter->metric_leader) {
v = saved_value_lookup(counter->metric_leader,
- cpu, true, STAT_NONE, 0, st);
+ cpu, true, STAT_NONE, 0, st, cgrp);
v->metric_total += count;
v->metric_other++;
}
@@ -422,11 +431,12 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list)
}

static double runtime_stat_avg(struct runtime_stat *st,
- enum stat_type type, int ctx, int cpu)
+ enum stat_type type, int ctx, int cpu,
+ struct cgroup *cgrp)
{
struct saved_value *v;

- v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
+ v = saved_value_lookup(NULL, cpu, false, type, ctx, st, cgrp);
if (!v)
return 0.0;

@@ -434,11 +444,12 @@ static double runtime_stat_avg(struct runtime_stat *st,
}

static double runtime_stat_n(struct runtime_stat *st,
- enum stat_type type, int ctx, int cpu)
+ enum stat_type type, int ctx, int cpu,
+ struct cgroup *cgrp)
{
struct saved_value *v;

- v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
+ v = saved_value_lookup(NULL, cpu, false, type, ctx, st, cgrp);
if (!v)
return 0.0;

@@ -455,7 +466,7 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config,
const char *color;
int ctx = evsel_context(evsel);

- total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
+ total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu, evsel->cgrp);

if (total)
ratio = avg / total * 100.0;
@@ -479,7 +490,7 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config,
const char *color;
int ctx = evsel_context(evsel);

- total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
+ total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu, evsel->cgrp);

if (total)
ratio = avg / total * 100.0;
@@ -500,7 +511,7 @@ static void print_branch_misses(struct perf_stat_config *config,
const char *color;
int ctx = evsel_context(evsel);

- total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu);
+ total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu, evsel->cgrp);

if (total)
ratio = avg / total * 100.0;
@@ -522,7 +533,7 @@ static void print_l1_dcache_misses(struct perf_stat_config *config,
const char *color;
int ctx = evsel_context(evsel);

- total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu);
+ total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu, evsel->cgrp);

if (total)
ratio = avg / total * 100.0;
@@ -544,7 +555,7 @@ static void print_l1_icache_misses(struct perf_stat_config *config,
const char *color;
int ctx = evsel_context(evsel);

- total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu);
+ total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu, evsel->cgrp);

if (total)
ratio = avg / total * 100.0;
@@ -564,7 +575,7 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config,
const char *color;
int ctx = evsel_context(evsel);

- total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu);
+ total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu, evsel->cgrp);

if (total)
ratio = avg / total * 100.0;
@@ -584,7 +595,7 @@ static void print_itlb_cache_misses(struct perf_stat_config *config,
const char *color;
int ctx = evsel_context(evsel);

- total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu);
+ total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu, evsel->cgrp);

if (total)
ratio = avg / total * 100.0;
@@ -604,7 +615,7 @@ static void print_ll_cache_misses(struct perf_stat_config *config,
const char *color;
int ctx = evsel_context(evsel);

- total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu);
+ total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu, evsel->cgrp);

if (total)
ratio = avg / total * 100.0;
@@ -662,56 +673,61 @@ static double sanitize_val(double x)
return x;
}

-static double td_total_slots(int ctx, int cpu, struct runtime_stat *st)
+static double td_total_slots(int ctx, int cpu, struct runtime_stat *st,
+ struct cgroup *cgrp)
{
- return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu);
+ return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu, cgrp);
}

-static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st)
+static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st,
+ struct cgroup *cgrp)
{
double bad_spec = 0;
double total_slots;
double total;

- total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) -
- runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) +
- runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu);
+ total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu, cgrp) -
+ runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu, cgrp) +
+ runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu, cgrp);

- total_slots = td_total_slots(ctx, cpu, st);
+ total_slots = td_total_slots(ctx, cpu, st, cgrp);
if (total_slots)
bad_spec = total / total_slots;
return sanitize_val(bad_spec);
}

-static double td_retiring(int ctx, int cpu, struct runtime_stat *st)
+static double td_retiring(int ctx, int cpu, struct runtime_stat *st,
+ struct cgroup *cgrp)
{
double retiring = 0;
- double total_slots = td_total_slots(ctx, cpu, st);
+ double total_slots = td_total_slots(ctx, cpu, st, cgrp);
double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED,
- ctx, cpu);
+ ctx, cpu, cgrp);

if (total_slots)
retiring = ret_slots / total_slots;
return retiring;
}

-static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st)
+static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st,
+ struct cgroup *cgrp)
{
double fe_bound = 0;
- double total_slots = td_total_slots(ctx, cpu, st);
+ double total_slots = td_total_slots(ctx, cpu, st, cgrp);
double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES,
- ctx, cpu);
+ ctx, cpu, cgrp);

if (total_slots)
fe_bound = fetch_bub / total_slots;
return fe_bound;
}

-static double td_be_bound(int ctx, int cpu, struct runtime_stat *st)
+static double td_be_bound(int ctx, int cpu, struct runtime_stat *st,
+ struct cgroup *cgrp)
{
- double sum = (td_fe_bound(ctx, cpu, st) +
- td_bad_spec(ctx, cpu, st) +
- td_retiring(ctx, cpu, st));
+ double sum = (td_fe_bound(ctx, cpu, st, cgrp) +
+ td_bad_spec(ctx, cpu, st, cgrp) +
+ td_retiring(ctx, cpu, st, cgrp));
if (sum == 0)
return 0;
return sanitize_val(1.0 - sum);
@@ -724,13 +740,14 @@ static double td_be_bound(int ctx, int cpu, struct runtime_stat *st)

static double td_metric_ratio(int ctx, int cpu,
enum stat_type type,
- struct runtime_stat *stat)
+ struct runtime_stat *stat,
+ struct cgroup *cgrp)
{
- double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) +
- runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) +
- runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) +
- runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu);
- double d = runtime_stat_avg(stat, type, ctx, cpu);
+ double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu, cgrp) +
+ runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu, cgrp) +
+ runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu, cgrp) +
+ runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu, cgrp);
+ double d = runtime_stat_avg(stat, type, ctx, cpu, cgrp);

if (sum)
return d / sum;
@@ -743,17 +760,18 @@ static double td_metric_ratio(int ctx, int cpu,
*/

static bool full_td(int ctx, int cpu,
- struct runtime_stat *stat)
+ struct runtime_stat *stat,
+ struct cgroup *cgrp)
{
int c = 0;

- if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) > 0)
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu, cgrp) > 0)
c++;
- if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) > 0)
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu, cgrp) > 0)
c++;
- if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) > 0)
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu, cgrp) > 0)
c++;
- if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu) > 0)
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu, cgrp) > 0)
c++;
return c >= 2;
}
@@ -766,10 +784,11 @@ static void print_smi_cost(struct perf_stat_config *config,
double smi_num, aperf, cycles, cost = 0.0;
int ctx = evsel_context(evsel);
const char *color = NULL;
+ struct cgroup *cgrp = evsel->cgrp;

- smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu);
- aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu);
- cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
+ smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu, cgrp);
+ aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu, cgrp);
+ cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu, cgrp);

if ((cycles == 0) || (aperf == 0))
return;
@@ -804,7 +823,8 @@ static int prepare_metric(struct evsel **metric_events,
scale = 1e-9;
} else {
v = saved_value_lookup(metric_events[i], cpu, false,
- STAT_NONE, 0, st);
+ STAT_NONE, 0, st,
+ metric_events[i]->cgrp);
if (!v)
break;
stats = &v->stats;
@@ -931,11 +951,12 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
double total, ratio = 0.0, total2;
const char *color = NULL;
int ctx = evsel_context(evsel);
+ struct cgroup *cgrp = evsel->cgrp;
struct metric_event *me;
int num = 1;

if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
- total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
+ total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu, cgrp);

if (total) {
ratio = avg / total;
@@ -946,11 +967,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
}

total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT,
- ctx, cpu);
+ ctx, cpu, cgrp);

total = max(total, runtime_stat_avg(st,
STAT_STALLED_CYCLES_BACK,
- ctx, cpu));
+ ctx, cpu, cgrp));

if (total && avg) {
out->new_line(config, ctxp);
@@ -960,7 +981,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
ratio);
}
} else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
- if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0)
+ if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu, cgrp) != 0)
print_branch_misses(config, cpu, evsel, avg, out, st);
else
print_metric(config, ctxp, NULL, NULL, "of all branches", 0);
@@ -970,7 +991,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {

- if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0)
+ if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu, cgrp) != 0)
print_l1_dcache_misses(config, cpu, evsel, avg, out, st);
else
print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0);
@@ -980,7 +1001,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {

- if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0)
+ if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu, cgrp) != 0)
print_l1_icache_misses(config, cpu, evsel, avg, out, st);
else
print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0);
@@ -990,7 +1011,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {

- if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0)
+ if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu, cgrp) != 0)
print_dtlb_cache_misses(config, cpu, evsel, avg, out, st);
else
print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0);
@@ -1000,7 +1021,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {

- if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0)
+ if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu, cgrp) != 0)
print_itlb_cache_misses(config, cpu, evsel, avg, out, st);
else
print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0);
@@ -1010,17 +1031,17 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {

- if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0)
+ if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu, cgrp) != 0)
print_ll_cache_misses(config, cpu, evsel, avg, out, st);
else
print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0);
} else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
- total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu);
+ total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu, cgrp);

if (total)
ratio = avg * 100 / total;

- if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0)
+ if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu, cgrp) != 0)
print_metric(config, ctxp, NULL, "%8.3f %%",
"of all cache refs", ratio);
else
@@ -1030,7 +1051,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
} else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
print_stalled_cycles_backend(config, cpu, evsel, avg, out, st);
} else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
- total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
+ total = runtime_stat_avg(st, STAT_NSECS, 0, cpu, cgrp);

if (total) {
ratio = avg / total;
@@ -1039,7 +1060,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, NULL, NULL, "Ghz", 0);
}
} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
- total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
+ total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu, cgrp);

if (total)
print_metric(config, ctxp, NULL,
@@ -1049,8 +1070,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, NULL, NULL, "transactional cycles",
0);
} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
- total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
- total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu);
+ total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu, cgrp);
+ total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu, cgrp);

if (total2 < avg)
total2 = avg;
@@ -1061,12 +1082,12 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0);
} else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
- ctx, cpu);
+ ctx, cpu, cgrp);

if (avg)
ratio = total / avg;

- if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0)
+ if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu, cgrp) != 0)
print_metric(config, ctxp, NULL, "%8.0f",
"cycles / transaction", ratio);
else
@@ -1074,7 +1095,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
0);
} else if (perf_stat_evsel__is(evsel, ELISION_START)) {
total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
- ctx, cpu);
+ ctx, cpu, cgrp);

if (avg)
ratio = total / avg;
@@ -1087,28 +1108,28 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
else
print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
- double fe_bound = td_fe_bound(ctx, cpu, st);
+ double fe_bound = td_fe_bound(ctx, cpu, st, cgrp);

if (fe_bound > 0.2)
color = PERF_COLOR_RED;
print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
fe_bound * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
- double retiring = td_retiring(ctx, cpu, st);
+ double retiring = td_retiring(ctx, cpu, st, cgrp);

if (retiring > 0.7)
color = PERF_COLOR_GREEN;
print_metric(config, ctxp, color, "%8.1f%%", "retiring",
retiring * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
- double bad_spec = td_bad_spec(ctx, cpu, st);
+ double bad_spec = td_bad_spec(ctx, cpu, st, cgrp);

if (bad_spec > 0.1)
color = PERF_COLOR_RED;
print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
bad_spec * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
- double be_bound = td_be_bound(ctx, cpu, st);
+ double be_bound = td_be_bound(ctx, cpu, st, cgrp);
const char *name = "backend bound";
static int have_recovery_bubbles = -1;

@@ -1121,43 +1142,43 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,

if (be_bound > 0.2)
color = PERF_COLOR_RED;
- if (td_total_slots(ctx, cpu, st) > 0)
+ if (td_total_slots(ctx, cpu, st, cgrp) > 0)
print_metric(config, ctxp, color, "%8.1f%%", name,
be_bound * 100.);
else
print_metric(config, ctxp, NULL, NULL, name, 0);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) &&
- full_td(ctx, cpu, st)) {
+ full_td(ctx, cpu, st, cgrp)) {
double retiring = td_metric_ratio(ctx, cpu,
- STAT_TOPDOWN_RETIRING, st);
-
+ STAT_TOPDOWN_RETIRING, st,
+ cgrp);
if (retiring > 0.7)
color = PERF_COLOR_GREEN;
print_metric(config, ctxp, color, "%8.1f%%", "retiring",
retiring * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) &&
- full_td(ctx, cpu, st)) {
+ full_td(ctx, cpu, st, cgrp)) {
double fe_bound = td_metric_ratio(ctx, cpu,
- STAT_TOPDOWN_FE_BOUND, st);
-
+ STAT_TOPDOWN_FE_BOUND, st,
+ cgrp);
if (fe_bound > 0.2)
color = PERF_COLOR_RED;
print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
fe_bound * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) &&
- full_td(ctx, cpu, st)) {
+ full_td(ctx, cpu, st, cgrp)) {
double be_bound = td_metric_ratio(ctx, cpu,
- STAT_TOPDOWN_BE_BOUND, st);
-
+ STAT_TOPDOWN_BE_BOUND, st,
+ cgrp);
if (be_bound > 0.2)
color = PERF_COLOR_RED;
print_metric(config, ctxp, color, "%8.1f%%", "backend bound",
be_bound * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) &&
- full_td(ctx, cpu, st)) {
+ full_td(ctx, cpu, st, cgrp)) {
double bad_spec = td_metric_ratio(ctx, cpu,
- STAT_TOPDOWN_BAD_SPEC, st);
-
+ STAT_TOPDOWN_BAD_SPEC, st,
+ cgrp);
if (bad_spec > 0.1)
color = PERF_COLOR_RED;
print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
@@ -1165,11 +1186,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
} else if (evsel->metric_expr) {
generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
evsel->name, evsel->metric_name, NULL, 1, cpu, out, st);
- } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) {
+ } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu, cgrp) != 0) {
char unit = 'M';
char unit_buf[10];

- total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
+ total = runtime_stat_avg(st, STAT_NSECS, 0, cpu, cgrp);

if (total)
ratio = 1000.0 * avg / total;
--
2.29.2.299.gdc1121823c-goog