[PATCH 1/2] perf tool AMD: Use non-precise cycles as default event on certain Zen2 processors

From: Ravi Bangoria
Date: Tue Nov 07 2023 - 03:34:13 EST


By default, Perf uses precise cycles event when no explicit event is
specified by user. Precise cycles event is forwarded to ibs_op// pmu
on AMD. However, IBS has hw issue on certain Zen2 processors where
it might raise NMI without sample_valid bit set, which causes Unknown
NMI warnings. (Erratum #1215: IBS (Instruction Based Sampling) Counter
Valid Value May be Incorrect After Exit From Core C6 (CC6) State.) So,
use non-precise cycles as default event on affected processors.

This does not prevent user to use explicit precise cycles event or
ibs_op// pmu directly.

Suggested-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
Signed-off-by: Ravi Bangoria <ravi.bangoria@xxxxxxx>
---
tools/perf/arch/x86/util/evlist.c | 34 +++++++++++++++++++++++++++++++
tools/perf/builtin-record.c | 2 +-
tools/perf/builtin-top.c | 2 +-
tools/perf/util/evlist.c | 12 ++++++++++-
tools/perf/util/evlist.h | 2 ++
5 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
index b1ce0c52d88d..f4478179c91b 100644
--- a/tools/perf/arch/x86/util/evlist.c
+++ b/tools/perf/arch/x86/util/evlist.c
@@ -5,6 +5,8 @@
#include "util/evlist.h"
#include "util/parse-events.h"
#include "util/event.h"
+#include "util/env.h"
+#include "linux/string.h"
#include "topdown.h"
#include "evsel.h"

@@ -92,3 +94,35 @@ int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
/* Default ordering by insertion index. */
return lhs->core.idx - rhs->core.idx;
}
+
+/*
+ * Precise cycles event is forwarded to ibs_op// pmu on AMD. However, IBS
+ * has hw issue on certain Zen2 processors where it might raise NMI without
+ * sample_valid bit set, which causes Unknown NMI warnings. So default to
+ * non-precise cycles event on affected processors.
+ */
+const char *arch_evlist__default_cycles_event(bool can_profile_kernel)
+{
+ struct perf_env env = { .total_mem = 0, };
+ unsigned int family, model, stepping;
+ bool is_amd;
+ int ret;
+
+ perf_env__cpuid(&env);
+ is_amd = env.cpuid && strstarts(env.cpuid, "AuthenticAMD");
+ if (!is_amd)
+ goto out;
+
+ ret = sscanf(env.cpuid, "%*[^,],%u,%u,%u", &family, &model, &stepping);
+ if (ret == 3 && family == 0x17 && (
+ (model >= 0x30 && model <= 0x3f) ||
+ (model >= 0x60 && model <= 0x7f) ||
+ (model >= 0x90 && model <= 0x9f))) {
+ perf_env__exit(&env);
+ return can_profile_kernel ? "cycles" : "cycles:u";
+ }
+
+out:
+ perf_env__exit(&env);
+ return can_profile_kernel ? "cycles:P" : "cycles:Pu";
+}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index dcf288a4fb9a..e58d8ac8a77b 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -4150,7 +4150,7 @@ int cmd_record(int argc, const char **argv)
if (rec->evlist->core.nr_entries == 0) {
bool can_profile_kernel = perf_event_paranoid_check(1);

- err = parse_event(rec->evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu");
+ err = parse_event(rec->evlist, evlist__default_cycles_event(can_profile_kernel));
if (err)
goto out;
}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index ea8c7eca5eee..21368421eddd 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1666,7 +1666,7 @@ int cmd_top(int argc, const char **argv)

if (!top.evlist->core.nr_entries) {
bool can_profile_kernel = perf_event_paranoid_check(1);
- int err = parse_event(top.evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu");
+ int err = parse_event(top.evlist, evlist__default_cycles_event(can_profile_kernel));

if (err)
goto out_delete_evlist;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index e36da58522ef..406ed851cafc 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -90,6 +90,16 @@ struct evlist *evlist__new(void)
return evlist;
}

+const char * __weak arch_evlist__default_cycles_event(bool can_profile_kernel)
+{
+ return can_profile_kernel ? "cycles:P" : "cycles:Pu";
+}
+
+const char *evlist__default_cycles_event(bool can_profile_kernel)
+{
+ return arch_evlist__default_cycles_event(can_profile_kernel);
+}
+
struct evlist *evlist__new_default(void)
{
struct evlist *evlist = evlist__new();
@@ -100,7 +110,7 @@ struct evlist *evlist__new_default(void)
return NULL;

can_profile_kernel = perf_event_paranoid_check(1);
- err = parse_event(evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu");
+ err = parse_event(evlist, evlist__default_cycles_event(can_profile_kernel));
if (err) {
evlist__delete(evlist);
evlist = NULL;
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 98e7ddb2bd30..7267b4fb1981 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -91,6 +91,8 @@ struct evsel_str_handler {

struct evlist *evlist__new(void);
struct evlist *evlist__new_default(void);
+const char *arch_evlist__default_cycles_event(bool can_profile_kernel);
+const char *evlist__default_cycles_event(bool can_profile_kernel);
struct evlist *evlist__new_dummy(void);
void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
struct perf_thread_map *threads);
--
2.41.0