[PATCH 13/15] tracing: Add sorting to hist triggers

From: Tom Zanussi
Date: Mon Mar 02 2015 - 11:04:47 EST


Add support for sorting to hist triggers, which without this will
display entries in hash order, which is to say randomly.

Currently, support is implemented for just a primary sort key which is
by default ascending.

To specify the sort key for a trigger, append ':sort=<sort_key>',
where sort_key can be any value specified in the values= param, or the
special value 'hitcount', which is a sum of the number of times each
entry in the hashtable was hit.

With these changes, even if the user doesn't explicitly specify a sort
key, the table will be sorted ascending on hitcount. To sort in
descending order, append the .descending modifier to the sort field,
as such:

':sort=<sort_key>.descending'

Signed-off-by: Tom Zanussi <tom.zanussi@xxxxxxxxxxxxxxx>
---
kernel/trace/trace.c | 2 +-
kernel/trace/trace_events_trigger.c | 279 +++++++++++++++++++++++++++++++++++-
2 files changed, 278 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 683048f..2fa41ef 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3777,7 +3777,7 @@ static const char readme_msg[] =
"\t The 'sort' param can be used to specify a value field to sort\n"
"\t on. The default if unspecified is 'hitcount' and the.\n"
"\t default sort order is 'ascending'. To sort in the opposite\n"
- "\t direction, append .descending' to the sort key.\n"
+ "\t direction, append .descending' to the sort key.\n\n"
"\t The 'pause' param can be used to pause an existing hist\n"
"\t trigger or to start a hist trigger but not log any events\n"
"\t until told to do so. 'continue' can be used to start or\n"
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 80805f3..ae75528 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -23,6 +23,7 @@
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/stacktrace.h>
+#include <linux/sort.h>

#include <linux/bpf.h>

@@ -1479,6 +1480,7 @@ DEFINE_HIST_FIELD_FN(u8);
#define HIST_TRIGGER_BITS_MAX 17
#define HIST_TRIGGER_BITS_MIN 7
#define HIST_VALS_MAX 8
+#define HIST_SORT_KEYS_MAX 2

#define HIST_KEY_STRING_MAX 64

@@ -1491,9 +1493,16 @@ enum hist_field_flags {
HIST_FIELD_SYSCALL = 32,
};

+struct hist_trigger_sort_key {
+ bool use_hitcount;
+ bool descending;
+ unsigned int idx;
+};
+
struct hist_trigger_attrs {
char *keys_str;
char *vals_str;
+ char *sort_keys_str;
bool pause;
bool cont;
bool clear;
@@ -1509,6 +1518,8 @@ struct hist_trigger_data {
struct ftrace_event_file *event_file;
atomic64_t drops;
struct hist_trigger_attrs *attrs;
+ struct hist_trigger_sort_key *sort_keys[HIST_SORT_KEYS_MAX];
+ struct hist_trigger_sort_key *sort_key_cur;
unsigned int map_bits;
struct bpf_map *map;
union bpf_attr map_attr;
@@ -1521,6 +1532,11 @@ struct hist_trigger_entry {
char *comm;
};

+struct hist_trigger_sort_entry {
+ void *key;
+ struct hist_trigger_entry *entry;
+};
+
#define HIST_STACKTRACE_DEPTH 16
#define HIST_STACKTRACE_SKIP 5

@@ -1671,6 +1687,106 @@ static void destroy_hist_fields(struct hist_trigger_data *hist_data)
}
}

+static inline struct hist_trigger_sort_key *create_default_sort_key(void)
+{
+ struct hist_trigger_sort_key *sort_key;
+
+ sort_key = kzalloc(sizeof(*sort_key), GFP_KERNEL);
+ if (!sort_key)
+ return ERR_PTR(-ENOMEM);
+
+ sort_key->use_hitcount = true;
+
+ return sort_key;
+}
+
+static inline struct hist_trigger_sort_key *
+create_sort_key(char *field_name, struct hist_trigger_data *hist_data)
+{
+ struct hist_trigger_sort_key *sort_key;
+ unsigned int i;
+
+ if (!strcmp(field_name, "hitcount"))
+ return create_default_sort_key();
+
+ for (i = 0; i < hist_data->n_vals; i++)
+ if (!strcmp(field_name, hist_data->vals[i]->field->name))
+ goto out;
+
+ return ERR_PTR(-EINVAL);
+ out:
+ sort_key = kzalloc(sizeof(*sort_key), GFP_KERNEL);
+ if (!sort_key)
+ return ERR_PTR(-ENOMEM);
+
+ sort_key->idx = i;
+
+ return sort_key;
+}
+
+static int create_sort_keys(struct hist_trigger_data *hist_data)
+{
+ char *fields_str = hist_data->attrs->sort_keys_str;
+ struct hist_trigger_sort_key *sort_key;
+ char *field_str, *field_name;
+ unsigned int i;
+ int ret = 0;
+
+ if (!fields_str) {
+ sort_key = create_default_sort_key();
+ if (IS_ERR(sort_key)) {
+ ret = PTR_ERR(sort_key);
+ goto out;
+ }
+ hist_data->sort_keys[0] = sort_key;
+ goto out;
+ }
+
+ strsep(&fields_str, "=");
+ if (!fields_str) {
+ ret = -EINVAL;
+ goto free;
+ }
+
+ for (i = 0; i < HIST_SORT_KEYS_MAX; i++) {
+ field_str = strsep(&fields_str, ",");
+ if (!field_str) {
+ if (i == 0) {
+ ret = -EINVAL;
+ goto free;
+ } else
+ break;
+ }
+
+ field_name = strsep(&field_str, ".");
+ sort_key = create_sort_key(field_name, hist_data);
+ if (IS_ERR(sort_key)) {
+ ret = PTR_ERR(sort_key);
+ goto free;
+ }
+
+ if (field_str) {
+ if (!strcmp(field_str, "descending"))
+ sort_key->descending = true;
+ else if (strcmp(field_str, "ascending")) {
+ ret = -EINVAL;
+ goto free;
+ }
+ }
+ hist_data->sort_keys[i] = sort_key;
+ }
+out:
+ return ret;
+free:
+ for (i = 0; i < HIST_SORT_KEYS_MAX; i++) {
+ if (!hist_data->sort_keys[i])
+ break;
+ kfree(hist_data->sort_keys[i]);
+ hist_data->sort_keys[i] = NULL;
+ }
+ goto out;
+}
+
static int create_key_field(struct hist_trigger_data *hist_data,
struct ftrace_event_file *file,
char *field_str)
@@ -1785,6 +1901,8 @@ static int create_hist_fields(struct hist_trigger_data *hist_data,
if (ret)
goto out;
}
+
+ ret = create_sort_keys(hist_data);
out:
return ret;
}
@@ -1803,6 +1921,7 @@ static u32 get_key_size(struct hist_trigger_data *hist_data)

static void destroy_hist_trigger_attrs(struct hist_trigger_attrs *attrs)
{
+ kfree(attrs->sort_keys_str);
kfree(attrs->keys_str);
kfree(attrs->vals_str);
kfree(attrs);
@@ -1852,6 +1971,8 @@ static struct hist_trigger_attrs *parse_hist_trigger_attrs(char *trigger_str)
!strncmp(str, "vals", strlen("vals")) ||
!strncmp(str, "val", strlen("val")))
attrs->vals_str = kstrdup(str, GFP_KERNEL);
+ else if (!strncmp(str, "sort", strlen("sort")))
+ attrs->sort_keys_str = kstrdup(str, GFP_KERNEL);
else if (!strncmp(str, "pause", strlen("pause")))
attrs->pause = true;
else if (!strncmp(str, "continue", strlen("continue")) ||
@@ -2093,8 +2214,42 @@ static void hist_trigger_entry_print(struct seq_file *m,
seq_puts(m, "\n");
}

-static int print_entries(struct seq_file *m,
- struct hist_trigger_data *hist_data)
+static int cmp_entries(const struct hist_trigger_sort_entry **a,
+ const struct hist_trigger_sort_entry **b)
+{
+ const struct hist_trigger_entry *entry_a, *entry_b;
+ struct hist_trigger_sort_key *sort_key;
+ struct hist_trigger_data *hist_data;
+ u64 val_a, val_b;
+ int ret = 0;
+
+ entry_a = (*a)->entry;
+ entry_b = (*b)->entry;
+
+ hist_data = entry_a->hist_data;
+ sort_key = hist_data->sort_key_cur;
+
+ if (sort_key->use_hitcount) {
+ val_a = atomic64_read(&entry_a->hitcount);
+ val_b = atomic64_read(&entry_b->hitcount);
+ } else {
+ val_a = atomic64_read(&entry_a->sums[sort_key->idx]);
+ val_b = atomic64_read(&entry_b->sums[sort_key->idx]);
+ }
+
+ if (val_a > val_b)
+ ret = 1;
+ else if (val_a < val_b)
+ ret = -1;
+
+ if (sort_key->descending)
+ ret = -ret;
+
+ return ret;
+}
+
+static int print_entries_unsorted(struct seq_file *m,
+ struct hist_trigger_data *hist_data)
{
void *key, *next_key, *tmp_key = NULL;
struct hist_trigger_entry *entry;
@@ -2124,6 +2279,106 @@ free:
return ret;
}

+static void destroy_sort_entry(struct hist_trigger_sort_entry *entry)
+{
+ if (!entry)
+ return;
+
+ kfree(entry->key);
+ kfree(entry);
+}
+
+static void destroy_sort_entries(struct hist_trigger_sort_entry **entries,
+ unsigned int entries_size)
+{
+ unsigned int i;
+
+ for (i = 0; i < entries_size; i++)
+ destroy_sort_entry(entries[i]);
+}
+
+static struct hist_trigger_sort_entry *
+create_sort_entry(void *key, struct hist_trigger_entry *entry)
+{
+ struct hist_trigger_sort_entry *sort_entry;
+
+ sort_entry = kmalloc(sizeof(*sort_entry), GFP_KERNEL);
+ if (!sort_entry)
+ return ERR_PTR(-ENOMEM);
+
+ sort_entry->key = kmalloc(entry->hist_data->map_attr.key_size,
+ GFP_KERNEL);
+ if (!sort_entry->key) {
+ destroy_sort_entry(sort_entry);
+ return ERR_PTR(-ENOMEM);
+ }
+ memcpy(sort_entry->key, key, entry->hist_data->map_attr.key_size);
+
+ sort_entry->entry = entry;
+
+ return sort_entry;
+}
+
+static int print_entries(struct seq_file *m,
+ struct hist_trigger_data *hist_data)
+{
+ struct hist_trigger_sort_entry **sort_entries;
+ void *key, *next_key = NULL, *tmp_key = NULL;
+ struct hist_trigger_sort_entry *sort_entry;
+ struct hist_trigger_entry *entry;
+ unsigned int sort_entries_size;
+ unsigned int i = 0, j;
+ int ret = 0;
+
+ hist_data->map_attr.key_size = get_key_size(hist_data);
+
+ sort_entries_size = hist_data->map_attr.max_entries;
+ sort_entries_size *= sizeof(sort_entry);
+ sort_entries = kzalloc(sort_entries_size, GFP_KERNEL);
+
+ if (!sort_entries)
+ return -ENOMEM;
+
+ key = kzalloc(hist_data->map_attr.key_size, GFP_KERNEL);
+ if (!key) {
+ ret = -ENOMEM;
+ goto free;
+ }
+
+ next_key = kzalloc(hist_data->map_attr.key_size, GFP_KERNEL);
+ if (!next_key) {
+ ret = -ENOMEM;
+ goto free;
+ }
+
+ while (tracing_map_get_next_key(hist_data->map, key, next_key) == 0) {
+ tracing_map_lookup_elem(hist_data->map, next_key, &entry);
+ sort_entries[i] = create_sort_entry(next_key, entry);
+ if (!sort_entries[i++])
+ goto free;
+
+ tmp_key = key;
+ key = next_key;
+ next_key = tmp_key;
+ }
+
+ hist_data->sort_key_cur = hist_data->sort_keys[0];
+ sort(sort_entries, i, sizeof(struct hist_trigger_entry *),
+ (int (*)(const void *, const void *))cmp_entries, NULL);
+
+ for (j = 0; j < i; j++) {
+ hist_trigger_entry_print(m, hist_data, sort_entries[j]->key,
+ sort_entries[j]->entry);
+ }
+free:
+ destroy_sort_entries(sort_entries, hist_data->map_attr.max_entries);
+
+ kfree(key);
+ kfree(next_key);
+
+ return ret;
+}
+
static int hist_show(struct seq_file *m, void *v)
{
struct event_trigger_data *test, *data = NULL;
@@ -2157,6 +2412,8 @@ static int hist_show(struct seq_file *m, void *v)
hist_data = data->private_data;

ret = print_entries(m, hist_data);
+ if (ret)
+ print_entries_unsorted(m, hist_data);

seq_printf(m, "\nTotals:\n Hits: %lu\n Entries: %u\n Dropped: %lu\n",
atomic64_read(&hist_data->total_hits),
@@ -2229,6 +2486,24 @@ static int event_hist_trigger_print(struct seq_file *m,
hist_field_print(m, hist_data->vals[i]);
}

+ for (i = 0; i < HIST_SORT_KEYS_MAX; i++) {
+ if (!hist_data->sort_keys[i])
+ break;
+
+ if (i == 0)
+ seq_puts(m, ":sort=");
+ else
+ seq_puts(m, ",");
+
+ if (hist_data->sort_keys[i]->use_hitcount)
+ seq_puts(m, "hitcount");
+ else {
+ unsigned int idx = hist_data->sort_keys[i]->idx;
+
+ hist_field_print(m, hist_data->vals[idx]);
+ }
+ }
+
seq_printf(m, ":size=%u", (1 << hist_data->map_bits));

if (data->filter_str)
--
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/