[PATCH V16 8/8] perf: test: Extend branch stack sampling test for Arm64 BRBE

From: Anshuman Khandual
Date: Thu Jan 25 2024 - 04:44:52 EST


From: James Clark <james.clark@xxxxxxx>

Add Arm64 BRBE-specific testing to the existing branch stack sampling test.
The test currently passes on the Arm FVP RevC model, but no hardware has
been tested yet.

Cc: Mark Rutland <mark.rutland@xxxxxxx>
Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
Cc: linux-perf-users@xxxxxxxxxxxxxxx
Cc: linux-kernel@xxxxxxxxxxxxxxx
Co-developed-by: German Gomez <german.gomez@xxxxxxx>
Signed-off-by: German Gomez <german.gomez@xxxxxxx>
Signed-off-by: James Clark <james.clark@xxxxxxx>
Signed-off-by: Anshuman Khandual <anshuman.khandual@xxxxxxx>
---
tools/perf/tests/builtin-test.c | 1 +
tools/perf/tests/shell/test_brstack.sh | 42 ++++++++++++++++++++++++--
tools/perf/tests/tests.h | 1 +
tools/perf/tests/workloads/Build | 2 ++
tools/perf/tests/workloads/traploop.c | 39 ++++++++++++++++++++++++
5 files changed, 82 insertions(+), 3 deletions(-)
create mode 100644 tools/perf/tests/workloads/traploop.c

diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 4a5973f9bb9b..bd7202ff5cca 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -139,6 +139,7 @@ static struct test_workload *workloads[] = {
&workload__sqrtloop,
&workload__brstack,
&workload__datasym,
+ &workload__traploop
};

static int num_subtests(const struct test_suite *t)
diff --git a/tools/perf/tests/shell/test_brstack.sh b/tools/perf/tests/shell/test_brstack.sh
index 928790f35747..6a4069c930e8 100755
--- a/tools/perf/tests/shell/test_brstack.sh
+++ b/tools/perf/tests/shell/test_brstack.sh
@@ -53,12 +53,43 @@ test_user_branches() {
grep -E -m1 "^brstack_foo\+[^ ]*/brstack_bench\+[^ ]*/RET/.*$" $TMPDIR/perf.script
grep -E -m1 "^brstack_bench\+[^ ]*/brstack_bench\+[^ ]*/COND/.*$" $TMPDIR/perf.script
grep -E -m1 "^brstack\+[^ ]*/brstack\+[^ ]*/UNCOND/.*$" $TMPDIR/perf.script
+
+ if is_arm64; then
+ # in arm64 with BRBE, we get IRQ entries that correspond
+ # to any point in the process
+ grep -m1 "/IRQ/" $TMPDIR/perf.script
+ fi
set +x

# some branch types are still not being tested:
# IND COND_CALL COND_RET SYSCALL SYSRET IRQ SERROR NO_TX
}

+test_arm64_trap_eret_branches() {
+ echo "Testing trap & eret branches (arm64 brbe)"
+ perf record -o $TMPDIR/perf.data --branch-filter any,save_type,u -- \
+ perf test -w traploop 250
+ perf script -i $TMPDIR/perf.data --fields brstacksym | tr ' ' '\n' > $TMPDIR/perf.script
+ set -x
+ # BRBINF<n>.TYPE == TRAP are mapped to PERF_BR_SYSCALL by the BRBE driver
+ grep -E -m1 "^trap_bench\+[^ ]*/\[unknown\][^ ]*/SYSCALL/" $TMPDIR/perf.script
+ grep -E -m1 "^\[unknown\][^ ]*/trap_bench\+[^ ]*/ERET/" $TMPDIR/perf.script
+ set +x
+}
+
+test_arm64_kernel_branches() {
+ echo "Testing kernel branches (arm64 brbe)"
+ # skip if perf doesn't have enough privileges
+ if ! perf record --branch-filter any,k -o- -- true > /dev/null; then
+ echo "[skipped: not enough privileges]"
+ return 0
+ fi
+ perf record -o $TMPDIR/perf.data --branch-filter any,k -- uname -a
+ perf script -i $TMPDIR/perf.data --fields brstack | tr ' ' '\n' > $TMPDIR/perf.script
+ grep -E -m1 "0xffff[0-9a-f]{12}" $TMPDIR/perf.script
+ ! egrep -E -m1 "0x0000[0-9a-f]{12}" $TMPDIR/perf.script
+}
+
# first argument <arg0> is the argument passed to "--branch-stack <arg0>,save_type,u"
# second argument are the expected branch types for the given filter
test_filter() {
@@ -81,11 +112,16 @@ set -e

test_user_branches

-test_filter "any_call" "CALL|IND_CALL|COND_CALL|SYSCALL|IRQ"
+if is_arm64; then
+ test_arm64_trap_eret_branches
+ test_arm64_kernel_branches
+fi
+
+test_filter "any_call" "CALL|IND_CALL|COND_CALL|SYSCALL|IRQ|FAULT_DATA|FAULT_INST"
test_filter "call" "CALL|SYSCALL"
test_filter "cond" "COND"
test_filter "any_ret" "RET|COND_RET|SYSRET|ERET"

test_filter "call,cond" "CALL|SYSCALL|COND"
-test_filter "any_call,cond" "CALL|IND_CALL|COND_CALL|IRQ|SYSCALL|COND"
-test_filter "cond,any_call,any_ret" "COND|CALL|IND_CALL|COND_CALL|SYSCALL|IRQ|RET|COND_RET|SYSRET|ERET"
+test_filter "any_call,cond" "CALL|IND_CALL|COND_CALL|IRQ|SYSCALL|COND|FAULT_DATA|FAULT_INST"
+test_filter "cond,any_call,any_ret" "COND|CALL|IND_CALL|COND_CALL|SYSCALL|IRQ|RET|COND_RET|SYSRET|ERET|FAULT_DATA|FAULT_INST"
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index dad3d7414142..6d3d575352d5 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -205,6 +205,7 @@ DECLARE_WORKLOAD(leafloop);
DECLARE_WORKLOAD(sqrtloop);
DECLARE_WORKLOAD(brstack);
DECLARE_WORKLOAD(datasym);
+DECLARE_WORKLOAD(traploop);

extern const char *dso_to_test;
extern const char *test_objdump_path;
diff --git a/tools/perf/tests/workloads/Build b/tools/perf/tests/workloads/Build
index a1f34d5861e3..a9dc93d8468b 100644
--- a/tools/perf/tests/workloads/Build
+++ b/tools/perf/tests/workloads/Build
@@ -6,8 +6,10 @@ perf-y += leafloop.o
perf-y += sqrtloop.o
perf-y += brstack.o
perf-y += datasym.o
+perf-y += traploop.o

CFLAGS_sqrtloop.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE
CFLAGS_leafloop.o = -g -O0 -fno-inline -fno-omit-frame-pointer -U_FORTIFY_SOURCE
CFLAGS_brstack.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE
CFLAGS_datasym.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE
+CFLAGS_traploop.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE
diff --git a/tools/perf/tests/workloads/traploop.c b/tools/perf/tests/workloads/traploop.c
new file mode 100644
index 000000000000..7dac94897e49
--- /dev/null
+++ b/tools/perf/tests/workloads/traploop.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdlib.h>
+#include "../tests.h"
+
+#define BENCH_RUNS 999999
+
+static volatile int cnt;
+
+#ifdef __aarch64__
+static void trap_bench(void)
+{
+ unsigned long val;
+
+ asm("mrs %0, ID_AA64ISAR0_EL1" : "=r" (val)); /* TRAP + ERET */
+}
+#else
+static void trap_bench(void)
+{
+
+}
+#endif
+
+static int traploop(int argc, const char **argv)
+{
+ int num_loops = BENCH_RUNS;
+
+ if (argc > 0)
+ num_loops = atoi(argv[0]);
+
+ while (1) {
+ if ((cnt++) > num_loops)
+ break;
+
+ trap_bench();
+ }
+ return 0;
+}
+
+DEFINE_WORKLOAD(traploop);
--
2.25.1