Re: [PATCH 05/12] perf test: Add coresight test to check all threads get some data logged

From: Leo Yan
Date: Mon Jan 03 2022 - 02:07:13 EST


On Wed, Dec 15, 2021 at 04:03:56PM +0000, carsten.haitzler@xxxxxxxxxxxx wrote:
> From: Carsten Haitzler <carsten.haitzler@xxxxxxx>
>
> This adds a test and test scripts to check that all threads in the
> target binary end up logging some kind of coresight aux data and that
> they are not missing.
>
> Signed-off-by: Carsten Haitzler <carsten.haitzler@xxxxxxx>
> ---
> .../coresight_thread_loop_check_tid_10.sh | 19 ++++
> .../coresight_thread_loop_check_tid_2.sh | 19 ++++
> .../coresight_thread_loop_check_tid_250.sh | 19 ++++
> .../perf/tests/shell/tools/coresight/Makefile | 3 +-
> .../tools/coresight/thread_loop/Makefile | 29 +++++++
> .../tools/coresight/thread_loop/thread_loop.c | 86 +++++++++++++++++++
> 6 files changed, 174 insertions(+), 1 deletion(-)
> create mode 100755 tools/perf/tests/shell/coresight_thread_loop_check_tid_10.sh
> create mode 100755 tools/perf/tests/shell/coresight_thread_loop_check_tid_2.sh
> create mode 100755 tools/perf/tests/shell/coresight_thread_loop_check_tid_250.sh
> create mode 100644 tools/perf/tests/shell/tools/coresight/thread_loop/Makefile
> create mode 100644 tools/perf/tests/shell/tools/coresight/thread_loop/thread_loop.c
>
> diff --git a/tools/perf/tests/shell/coresight_thread_loop_check_tid_10.sh b/tools/perf/tests/shell/coresight_thread_loop_check_tid_10.sh
> new file mode 100755
> index 000000000000..283ad9facdee
> --- /dev/null
> +++ b/tools/perf/tests/shell/coresight_thread_loop_check_tid_10.sh
> @@ -0,0 +1,19 @@
> +#!/bin/sh -e
> +# Coresight / Thread Loop 10 Threads - Check TID
> +
> +# SPDX-License-Identifier: GPL-2.0
> +# Carsten Haitzler <carsten.haitzler@xxxxxxx>, 2021
> +
> +TEST="thread_loop"
> +. $(dirname $0)/lib/coresight.sh
> +ARGS="10 2000"
> +DATV="check-tid-10th"
> +DATA="$DATD/perf-$TEST-$DATV.data"
> +STDO="$DATD/perf-$TEST-$DATV.stdout"
> +
> +SHOW_TID=1 perf record -s $PERFRECOPT -o "$DATA" "$BIN" $ARGS > $STDO
> +
> +perf_dump_aux_tid_verify "$DATA" "$STDO"
> +
> +err=$?
> +exit $err
> diff --git a/tools/perf/tests/shell/coresight_thread_loop_check_tid_2.sh b/tools/perf/tests/shell/coresight_thread_loop_check_tid_2.sh
> new file mode 100755
> index 000000000000..ce8ba534bba2
> --- /dev/null
> +++ b/tools/perf/tests/shell/coresight_thread_loop_check_tid_2.sh
> @@ -0,0 +1,19 @@
> +#!/bin/sh -e
> +# Coresight / Thread Loop 2 Threads - Check TID
> +
> +# SPDX-License-Identifier: GPL-2.0
> +# Carsten Haitzler <carsten.haitzler@xxxxxxx>, 2021
> +
> +TEST="thread_loop"
> +. $(dirname $0)/lib/coresight.sh
> +ARGS="2 4000"
> +DATV="check-tid-2th"
> +DATA="$DATD/perf-$TEST-$DATV.data"
> +STDO="$DATD/perf-$TEST-$DATV.stdout"
> +
> +SHOW_TID=1 perf record -s $PERFRECOPT -o "$DATA" "$BIN" $ARGS > $STDO
> +
> +perf_dump_aux_tid_verify "$DATA" "$STDO"
> +
> +err=$?
> +exit $err
> diff --git a/tools/perf/tests/shell/coresight_thread_loop_check_tid_250.sh b/tools/perf/tests/shell/coresight_thread_loop_check_tid_250.sh
> new file mode 100755
> index 000000000000..cb14581c1e68
> --- /dev/null
> +++ b/tools/perf/tests/shell/coresight_thread_loop_check_tid_250.sh
> @@ -0,0 +1,19 @@
> +#!/bin/sh -e
> +# Coresight / Thread Loop 250 Threads - Check TID
> +
> +# SPDX-License-Identifier: GPL-2.0
> +# Carsten Haitzler <carsten.haitzler@xxxxxxx>, 2021
> +
> +TEST="thread_loop"
> +. $(dirname $0)/lib/coresight.sh
> +ARGS="250 100"
> +DATV="check-tid-250th"
> +DATA="$DATD/perf-$TEST-$DATV.data"
> +STDO="$DATD/perf-$TEST-$DATV.stdout"
> +
> +SHOW_TID=1 perf record -s $PERFRECOPT -o "$DATA" "$BIN" $ARGS > $STDO
> +
> +perf_dump_aux_tid_verify "$DATA" "$STDO"
> +
> +err=$?
> +exit $err

>From this case I start to understand why the lib/coresight.sh sets
AUX buffer as 250MB, setting a large buffer size can capture trace
data for all threads, especially for big amount of threads.

Seems to me, if we test on server, this case can run for short time, but
I think (sorry if I am wrong) it might take much longer time to test on
the embedded system, which might cause testing failure by two factors:

- The resource (e.g. the required big memory size) is pressure for
embedded system;
- The execution time (IIRC, every test case should be finished within
5 minutes).

Do you think does it make sense for us to only use 32 threads or 64
threads for the testing and it can give us a good testing coverage,
and we don't need to maintain multiple cases for 2/10/250 threads?

Thanks,
Leo

> diff --git a/tools/perf/tests/shell/tools/coresight/Makefile b/tools/perf/tests/shell/tools/coresight/Makefile
> index 723006ea827c..1edab729db76 100644
> --- a/tools/perf/tests/shell/tools/coresight/Makefile
> +++ b/tools/perf/tests/shell/tools/coresight/Makefile
> @@ -5,7 +5,8 @@ include ../../../../../../tools/scripts/Makefile.arch
> include ../../../../../../tools/scripts/utilities.mak
>
> SUBDIRS = \
> - asm_pure_loop
> + asm_pure_loop \
> + thread_loop
>
> all: $(SUBDIRS)
> $(SUBDIRS):
> diff --git a/tools/perf/tests/shell/tools/coresight/thread_loop/Makefile b/tools/perf/tests/shell/tools/coresight/thread_loop/Makefile
> new file mode 100644
> index 000000000000..424df4e8b0e6
> --- /dev/null
> +++ b/tools/perf/tests/shell/tools/coresight/thread_loop/Makefile
> @@ -0,0 +1,29 @@
> +# SPDX-License-Identifier: GPL-2.0
> +# Carsten Haitzler <carsten.haitzler@xxxxxxx>, 2021
> +include ../Makefile.miniconfig
> +
> +BIN=thread_loop
> +LIB=-pthread
> +
> +all: $(BIN)
> +
> +$(BIN): $(BIN).c
> +ifdef CORESIGHT
> +ifeq ($(ARCH),arm64)
> + $(Q)$(CC) $(BIN).c -o $(BIN) $(LIB)
> +endif
> +endif
> +
> +install-tests: all
> +ifdef CORESIGHT
> +ifeq ($(ARCH),arm64)
> + $(call QUIET_INSTALL, tests) \
> + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/tools/$(BIN)'; \
> + $(INSTALL) $(BIN) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/tools/$(BIN)/$(BIN)'
> +endif
> +endif
> +
> +clean:
> + $(Q)$(RM) -f $(BIN)
> +
> +.PHONY: all clean install-tests
> diff --git a/tools/perf/tests/shell/tools/coresight/thread_loop/thread_loop.c b/tools/perf/tests/shell/tools/coresight/thread_loop/thread_loop.c
> new file mode 100644
> index 000000000000..c0158fac7d0b
> --- /dev/null
> +++ b/tools/perf/tests/shell/tools/coresight/thread_loop/thread_loop.c
> @@ -0,0 +1,86 @@
> +// SPDX-License-Identifier: GPL-2.0
> +// Carsten Haitzler <carsten.haitzler@xxxxxxx>, 2021
> +
> +// define this for gettid()
> +#define _GNU_SOURCE
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <string.h>
> +#include <pthread.h>
> +#include <sys/syscall.h>
> +#ifndef SYS_gettid
> +// gettid is 178 on arm64
> +# define SYS_gettid 178
> +#endif
> +#define gettid() syscall(SYS_gettid)
> +
> +struct args {
> + unsigned int loops;
> + pthread_t th;
> + void *ret;
> +};
> +
> +static void *thrfn(void *arg)
> +{
> + struct args *a = arg;
> + int i = 0, len = a->loops;
> +
> + if (getenv("SHOW_TID")) {
> + unsigned long long tid = gettid();
> +
> + printf("%llu\n", tid);
> + }
> + asm volatile(
> + "loop:\n"
> + "add %[i], %[i], #1\n"
> + "cmp %[i], %[len]\n"
> + "blt loop\n"
> + : /* out */
> + : /* in */ [i] "r" (i), [len] "r" (len)
> + : /* clobber */
> + );
> + return (void *)(long)i;
> +}
> +
> +static pthread_t new_thr(void *(*fn) (void *arg), void *arg)
> +{
> + pthread_t t;
> + pthread_attr_t attr;
> +
> + pthread_attr_init(&attr);
> + pthread_create(&t, &attr, fn, arg);
> + return t;
> +}
> +
> +int main(int argc, char **argv)
> +{
> + unsigned int i, len, thr;
> + pthread_t threads[256];
> + struct args args[256];
> +
> + if (argc < 3) {
> + printf("ERR: %s [numthreads] [numloops (millions)]\n", argv[0]);
> + exit(1);
> + }
> +
> + thr = atoi(argv[1]);
> + if ((thr < 1) || (thr > 256)) {
> + printf("ERR: threads 1-256\n");
> + exit(1);
> + }
> + len = atoi(argv[2]);
> + if ((len < 1) || (len > 4000)) {
> + printf("ERR: max loops 4000 (millions)\n");
> + exit(1);
> + }
> + len *= 1000000;
> + for (i = 0; i < thr; i++) {
> + args[i].loops = len;
> + args[i].th = new_thr(thrfn, &(args[i]));
> + }
> + for (i = 0; i < thr; i++)
> + pthread_join(args[i].th, &(args[i].ret));
> + return 0;
> +}
> --
> 2.32.0
>