[PATCH 1/2] tracing: Add .percent suffix option to histogram values

From: Masami Hiramatsu (Google)
Date: Sun Jul 31 2022 - 23:01:05 EST


From: Masami Hiramatsu (Google) <mhiramat@xxxxxxxxxx>

Add .percent suffix option to show the histogram values in percentage.
This feature is useful when we need yo undersntand the overall trend
for the histograms of large values.
E.g. this shows the runtime percentage for each tasks.

------
# cd /sys/kernel/debug/tracing/
# echo hist:keys=pid:vals=hitcount,runtime.percent:sort=pid > \
events/sched/sched_stat_runtime/trigger
# sleep 10
# cat events/sched/sched_stat_runtime/hist
# event histogram
#
# trigger info: hist:keys=pid:vals=hitcount,runtime.percent:sort=pid:size=2048 [active]
#

{ pid: 14 } hitcount: 9 runtime: 2.48
{ pid: 16 } hitcount: 38 runtime: 5.11
{ pid: 59 } hitcount: 30 runtime: 10.30
{ pid: 61 } hitcount: 73 runtime: 13.19
{ pid: 64 } hitcount: 1 runtime: 0.22
{ pid: 65 } hitcount: 13 runtime: 2.53
{ pid: 67 } hitcount: 11 runtime: 2.35
{ pid: 69 } hitcount: 8 runtime: 1.40
{ pid: 77 } hitcount: 7 runtime: 1.83
{ pid: 145 } hitcount: 41 runtime: 33.03
{ pid: 152 } hitcount: 8 runtime: 11.90
{ pid: 153 } hitcount: 6 runtime: 8.09
{ pid: 154 } hitcount: 5 runtime: 7.50

Totals:
Hits: 250
Entries: 13
Dropped: 0
-----

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@xxxxxxxxxx>
---
kernel/trace/trace.c | 3 +-
kernel/trace/trace_events_hist.c | 66 ++++++++++++++++++++++++++++++++++----
2 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 7eb5bce62500..4f54f2494370 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5700,7 +5700,8 @@ static const char readme_msg[] =
"\t .syscall display a syscall id as a syscall name\n"
"\t .log2 display log2 value rather than raw number\n"
"\t .buckets=size display values in groups of size rather than raw number\n"
- "\t .usecs display a common_timestamp in microseconds\n\n"
+ "\t .usecs display a common_timestamp in microseconds\n"
+ "\t .percent display a number of percentage value\n\n"
"\t The 'pause' parameter can be used to pause an existing hist\n"
"\t trigger or to start a hist trigger but not log any events\n"
"\t until told to do so. 'continue' can be used to start or\n"
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index fdf784620c28..1103b9eb0a74 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -477,6 +477,7 @@ enum hist_field_flags {
HIST_FIELD_FL_ALIAS = 1 << 16,
HIST_FIELD_FL_BUCKET = 1 << 17,
HIST_FIELD_FL_CONST = 1 << 18,
+ HIST_FIELD_FL_PERCENT = 1 << 19,
};

struct var_defs {
@@ -1682,6 +1683,8 @@ static const char *get_hist_field_flags(struct hist_field *hist_field)
flags_str = "buckets";
else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP_USECS)
flags_str = "usecs";
+ else if (hist_field->flags & HIST_FIELD_FL_PERCENT)
+ flags_str = "percent";

return flags_str;
}
@@ -2290,6 +2293,10 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
if (ret || !(*buckets))
goto error;
*flags |= HIST_FIELD_FL_BUCKET;
+ } else if (strncmp(modifier, "percent", 7) == 0) {
+ if (*flags & (HIST_FIELD_FL_VAR | HIST_FIELD_FL_KEY))
+ goto error;
+ *flags |= HIST_FIELD_FL_PERCENT;
} else {
error:
hist_err(tr, HIST_ERR_BAD_FIELD_MODIFIER, errpos(modifier));
@@ -4254,8 +4261,13 @@ static int create_val_fields(struct hist_trigger_data *hist_data,
if (!field_str)
break;

- if (strcmp(field_str, "hitcount") == 0)
+ if (strncmp(field_str, "hitcount", 8) == 0 &&
+ (field_str[8] == '.' || field_str[8] == '\0')) {
+ if (strncmp(field_str + 8, ".percent", 8) == 0)
+ hist_data->fields[HITCOUNT_IDX]->flags |=
+ HIST_FIELD_FL_PERCENT;
continue;
+ }

ret = create_val_field(hist_data, j++, file, field_str);
if (ret)
@@ -5190,18 +5202,34 @@ static void hist_trigger_print_key(struct seq_file *m,
seq_puts(m, "}");
}

+/* Get the 100 times of the percentage of @val in @total */
+static inline unsigned int __get_percentage(u64 val, u64 total)
+{
+ if (val < (U64_MAX / 10000))
+ return (unsigned int)(val * 10000 / total);
+ else
+ return val / (total / 10000);
+}
+
static void hist_trigger_entry_print(struct seq_file *m,
struct hist_trigger_data *hist_data,
+ u64 *totals,
void *key,
struct tracing_map_elt *elt)
{
const char *field_name;
- unsigned int i;
+ unsigned int i, pc;
+ u64 val;

hist_trigger_print_key(m, hist_data, key, elt);

- seq_printf(m, " hitcount: %10llu",
- tracing_map_read_sum(elt, HITCOUNT_IDX));
+ i = HITCOUNT_IDX;
+ val = tracing_map_read_sum(elt, i);
+ if (hist_data->fields[i]->flags & HIST_FIELD_FL_PERCENT) {
+ pc = __get_percentage(val, totals[i]);
+ seq_printf(m, " hitcount: %7u.%02u", pc / 100, pc % 100);
+ } else
+ seq_printf(m, " hitcount: %10llu", val);

for (i = 1; i < hist_data->n_vals; i++) {
field_name = hist_field_name(hist_data->fields[i], 0);
@@ -5210,7 +5238,12 @@ static void hist_trigger_entry_print(struct seq_file *m,
hist_data->fields[i]->flags & HIST_FIELD_FL_EXPR)
continue;

- if (hist_data->fields[i]->flags & HIST_FIELD_FL_HEX) {
+ if (hist_data->fields[i]->flags & HIST_FIELD_FL_PERCENT) {
+ val = tracing_map_read_sum(elt, i);
+ pc = __get_percentage(val, totals[i]);
+ seq_printf(m, " %s: %7u.%02u", field_name,
+ pc / 100, pc % 100);
+ } else if (hist_data->fields[i]->flags & HIST_FIELD_FL_HEX) {
seq_printf(m, " %s: %10llx", field_name,
tracing_map_read_sum(elt, i));
} else {
@@ -5229,7 +5262,8 @@ static int print_entries(struct seq_file *m,
{
struct tracing_map_sort_entry **sort_entries = NULL;
struct tracing_map *map = hist_data->map;
- int i, n_entries;
+ int i, j, n_entries;
+ u64 *totals = NULL;

n_entries = tracing_map_sort_entries(map, hist_data->sort_keys,
hist_data->n_sort_keys,
@@ -5237,11 +5271,29 @@ static int print_entries(struct seq_file *m,
if (n_entries < 0)
return n_entries;

+ for (j = 0; j < hist_data->n_vals; j++) {
+ if (!(hist_data->fields[j]->flags & HIST_FIELD_FL_PERCENT))
+ continue;
+ if (!totals) {
+ totals = kcalloc(hist_data->n_vals, sizeof(u64),
+ GFP_KERNEL);
+ if (!totals) {
+ n_entries = -ENOMEM;
+ goto out;
+ }
+ }
+ for (i = 0; i < n_entries; i++)
+ totals[j] += tracing_map_read_sum(
+ sort_entries[i]->elt, j);
+ }
+
for (i = 0; i < n_entries; i++)
- hist_trigger_entry_print(m, hist_data,
+ hist_trigger_entry_print(m, hist_data, totals,
sort_entries[i]->key,
sort_entries[i]->elt);

+ kfree(totals);
+out:
tracing_map_destroy_sort_entries(sort_entries, n_entries);

return n_entries;