Re: [PATCH] perf stat: Add event-interval-print option

From: Aaron Lindsay
Date: Thu Aug 24 2017 - 16:39:21 EST


Does anyone have any feedback on this patch or the idea in general?

Also, (for our bookkeeping - not trying to rush things) is there any
chance this will still make it in for the 4.14 merge window, or is it
4.15 material at the earliest?

-Aaron

On Aug 16 11:08, Aaron Lindsay wrote:
> This allows for printing counts at regular intervals based on <n>
> instances of an arbitrary event (currently the first event provided).
>
> This can be useful when comparing `perf stat` runs of fixed-work
> workloads. For instance, you may want to compare interval-by-interval
> stats between two runs based on instruction count rather than time
> intervals to ensure the same sections of the userspace component of code
> are aligned when comparing them (which wouldn't necessarily be the case
> if the kernel was misbehaving in only one of the compared runs).
>
> Signed-off-by: Aaron Lindsay <alindsay@xxxxxxxxxxxxxx>
> Tested-by: Digant Desai <digantd@xxxxxxxxxxxxxx>
> ---
> tools/perf/builtin-stat.c | 114 ++++++++++++++++++++++++++++++++++++++++++----
> tools/perf/util/evsel.h | 1 +
> tools/perf/util/stat.h | 1 +
> 3 files changed, 107 insertions(+), 9 deletions(-)
>
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index 48ac53b..e400cd3 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -71,7 +71,9 @@
> #include <api/fs/fs.h>
> #include <errno.h>
> #include <signal.h>
> +#include <poll.h>
> #include <stdlib.h>
> +#include <sys/mman.h>
> #include <sys/prctl.h>
> #include <inttypes.h>
> #include <locale.h>
> @@ -224,7 +226,8 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
> * Some events get initialized with sample_(period/type) set,
> * like tracepoints. Clear it up for counting.
> */
> - attr->sample_period = 0;
> + if (!evsel->event_interval)
> + attr->sample_period = 0;
>
> /*
> * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
> @@ -562,6 +565,7 @@ static int store_counter_ids(struct perf_evsel *counter)
> static int __run_perf_stat(int argc, const char **argv)
> {
> int interval = stat_config.interval;
> + int event_interval = stat_config.event_interval;
> char msg[BUFSIZ];
> unsigned long long t0, t1;
> struct perf_evsel *counter;
> @@ -571,6 +575,9 @@ static int __run_perf_stat(int argc, const char **argv)
> const bool forks = (argc > 0);
> bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false;
> struct perf_evsel_config_term *err_term;
> + bool first_counter = true;
> +
> + struct pollfd interval_pollfd = {.fd = 0, .events = POLLIN};
>
> if (interval) {
> ts.tv_sec = interval / USEC_PER_MSEC;
> @@ -594,6 +601,32 @@ static int __run_perf_stat(int argc, const char **argv)
>
> evlist__for_each_entry(evsel_list, counter) {
> try_again:
> + if (first_counter && event_interval) {
> + /*
> + * Setup this counter as an event interval by pinning
> + * it, setting it to sample based on the user-specified
> + * period, and to return from polling on expiration
> + * (i.e. when any data is written to the buffer)
> + */
> + int nthreads = thread_map__nr(evsel_list->threads);
> + int ncpus = perf_evsel__nr_cpus(counter);
> +
> + if (nthreads > 1 || ncpus > 1) {
> + perror("Failed to setup event-interval-print "
> + "because the event has more than one "
> + "CPU or thread.");
> + return -1;
> + }
> +
> + counter->attr.pinned = 1;
> + counter->attr.watermark = 1;
> + counter->attr.wakeup_watermark = 1;
> + counter->attr.sample_period = event_interval;
> + counter->attr.freq = 0;
> + counter->event_interval = true;
> +
> + pr_debug2("Polling on %s\n", perf_evsel__name(counter));
> + }
> if (create_perf_stat_counter(counter) < 0) {
> /*
> * PPC returns ENXIO for HW counters until 2.6.37
> @@ -633,6 +666,33 @@ static int __run_perf_stat(int argc, const char **argv)
>
> if (STAT_RECORD && store_counter_ids(counter))
> return -1;
> +
> + if (first_counter && event_interval) {
> + void *m;
> +
> + interval_pollfd.fd =
> + *(int *)xyarray__entry(counter->fd, 0, 0);
> +
> + /*
> + * Do not set PROT_WRITE since the kernel will stop
> + * writing to the mmap buffer when it fills if
> + * data_tail is not set. Map 2 pages since the kernel
> + * requires the first for metadata.
> + */
> + m = mmap(NULL, 2 * getpagesize(), PROT_READ, MAP_SHARED,
> + interval_pollfd.fd, 0);
> + if (m == (void *) -1) {
> + perror("Failed to mmap print-event-interval "
> + "fd");
> + return -1;
> + }
> + }
> +
> + first_counter = false;
> + }
> + if (event_interval && !interval_pollfd.fd) {
> + perror("Failed to initialize print-event-interval fd");
> + return -1;
> }
>
> if (perf_evlist__apply_filters(evsel_list, &counter)) {
> @@ -677,7 +737,18 @@ static int __run_perf_stat(int argc, const char **argv)
> perf_evlist__start_workload(evsel_list);
> enable_counters();
>
> - if (interval) {
> + if (event_interval) {
> + while (!waitpid(child_pid, &status, WNOHANG)) {
> + /*
> + * Wait for the event to expire for at most 1
> + * second
> + */
> + if (poll(&interval_pollfd, 1, 1000) > 0) {
> + if (interval_pollfd.revents & (POLLIN))
> + process_interval();
> + }
> + }
> + } else if (interval) {
> while (!waitpid(child_pid, &status, WNOHANG)) {
> nanosleep(&ts, NULL);
> process_interval();
> @@ -696,9 +767,20 @@ static int __run_perf_stat(int argc, const char **argv)
> } else {
> enable_counters();
> while (!done) {
> - nanosleep(&ts, NULL);
> - if (interval)
> - process_interval();
> + if (event_interval) {
> + /*
> + * Wait for the event to expire for at most 1
> + * second
> + */
> + if (poll(&interval_pollfd, 1, 1000) > 0) {
> + if (interval_pollfd.revents & (POLLIN))
> + process_interval();
> + }
> + } else {
> + nanosleep(&ts, NULL);
> + if (interval)
> + process_interval();
> + }
> }
> }
>
> @@ -1614,7 +1696,7 @@ static void print_footer(void)
>
> static void print_counters(struct timespec *ts, int argc, const char **argv)
> {
> - int interval = stat_config.interval;
> + int interval = stat_config.interval || stat_config.event_interval;
> struct perf_evsel *counter;
> char buf[64], *prefix = NULL;
>
> @@ -1781,6 +1863,8 @@ static int enable_metric_only(const struct option *opt __maybe_unused,
> "command to run after to the measured command"),
> OPT_UINTEGER('I', "interval-print", &stat_config.interval,
> "print counts at regular interval in ms (>= 10)"),
> + OPT_UINTEGER('E', "event-interval-print", &stat_config.event_interval,
> + "print counts every <n> of the first event specified"),
> OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
> "aggregate counts per processor socket", AGGR_SOCKET),
> OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
> @@ -2546,7 +2630,7 @@ int cmd_stat(int argc, const char **argv)
> int status = -EINVAL, run_idx;
> const char *mode;
> FILE *output = stderr;
> - unsigned int interval;
> + unsigned int interval, event_interval;
> const char * const stat_subcommands[] = { "record", "report" };
>
> setlocale(LC_ALL, "");
> @@ -2577,6 +2661,7 @@ int cmd_stat(int argc, const char **argv)
> return __cmd_report(argc, argv);
>
> interval = stat_config.interval;
> + event_interval = stat_config.event_interval;
>
> /*
> * For record command the -o is already taken care of.
> @@ -2704,6 +2789,17 @@ int cmd_stat(int argc, const char **argv)
> if (stat_config.aggr_mode == AGGR_THREAD)
> thread_map__read_comms(evsel_list->threads);
>
> + if (interval && event_interval) {
> + pr_err("interval-print and event-interval-print options "
> + "cannot be used together\n");
> + goto out;
> + }
> +
> + if (event_interval && !no_inherit) {
> + pr_err("The event-interval-print option requires no-inherit\n");
> + goto out;
> + }
> +
> if (interval && interval < 100) {
> if (interval < 10) {
> pr_err("print interval must be >= 10ms\n");
> @@ -2715,7 +2811,7 @@ int cmd_stat(int argc, const char **argv)
> "Please proceed with caution.\n");
> }
>
> - if (perf_evlist__alloc_stats(evsel_list, interval))
> + if (perf_evlist__alloc_stats(evsel_list, interval || event_interval))
> goto out;
>
> if (perf_stat_init_aggr_mode())
> @@ -2747,7 +2843,7 @@ int cmd_stat(int argc, const char **argv)
> }
> }
>
> - if (!forever && status != -1 && !interval)
> + if (!forever && status != -1 && !interval && !event_interval)
> print_counters(NULL, argc, argv);
>
> if (STAT_RECORD) {
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index d101695..44a65e6 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -121,6 +121,7 @@ struct perf_evsel {
> bool per_pkg;
> bool precise_max;
> bool ignore_missing_thread;
> + bool event_interval;
> /* parse modifier helper */
> int exclude_GH;
> int nr_members;
> diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
> index 7522bf1..db4dda4 100644
> --- a/tools/perf/util/stat.h
> +++ b/tools/perf/util/stat.h
> @@ -46,6 +46,7 @@ struct perf_stat_config {
> bool scale;
> FILE *output;
> unsigned int interval;
> + unsigned int event_interval;
> };
>
> void update_stats(struct stats *stats, u64 val);
> --
> Qualcomm Datacenter Technologies as an affiliate of Qualcomm Technologies, Inc.
> Qualcomm Technologies, Inc. is a member of the
> Code Aurora Forum, a Linux Foundation Collaborative Project.
>

--
Qualcomm Datacenter Technologies as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the
Code Aurora Forum, a Linux Foundation Collaborative Project.