[PATCH] perf/sdt: Directly record cached SDT events

From: Hemant Kumar
Date: Fri Apr 29 2016 - 09:41:05 EST


This patch adds support for directly recording SDT events which are
present in the probe cache. This patch is based on current SDT
enablement patchset (v5) by Masami :
https://lkml.org/lkml/2016/4/27/828
and it implements two points in the TODO list mentioned in the
cover note :
"- (perf record) Support SDT event recording directly"
"- (perf record) Try to unregister SDT events after record."

Without this patch, we could probe into SDT events using
"perf probe" and "perf record". With this patch, we can probe
the SDT events directly using "perf record".

For example :

# perf list sdt // List the SDT events
...
sdt_mysql:update__row__done [SDT event]
sdt_mysql:update__row__start [SDT event]
sdt_mysql:update__start [SDT event]
sdt_python:function__entry [SDT event]
sdt_python:function__return [SDT event]
sdt_test:marker1 [SDT event]
sdt_test:marker2 [SDT event]
...

# perf record -e %sdt_test:marker1 -e %sdt_test:marker2 -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 2.087 MB perf.data (22 samples) ]

# perf script
test_sdt 29230 [002] 405550.548017: sdt_test:marker1: (400534)
test_sdt 29230 [002] 405550.548064: sdt_test:marker2: (40053f)
test_sdt 29231 [002] 405550.962806: sdt_test:marker1: (400534)
test_sdt 29231 [002] 405550.962841: sdt_test:marker2: (40053f)
test_sdt 29232 [001] 405551.379327: sdt_test:marker1: (400534)
...

After invoking "perf record", behind the scenes, it checks whether the
event specified is an SDT event using the flag '%'. After that, it
does a lookup of the probe cache to find out the SDT event. If its not
present, it throws an error. Otherwise, it goes on and writes the event
into the uprobe_events file and sets up the probe event, trace events,
etc and starts recording. It also maintains a list of the event names
that were written to uprobe_events file. After finishing the record
session, it removes the events from the uprobe_events file using the
maintained name list.

Signed-off-by: Hemant Kumar <hemant@xxxxxxxxxxxxxxxxxx>
---
tools/perf/builtin-probe.c | 44 +++++++++++++++++++---
tools/perf/builtin-record.c | 24 ++++++++++++
tools/perf/util/bpf-loader.c | 2 +-
tools/perf/util/build-id.c | 1 +
tools/perf/util/parse-events.c | 36 +++++++++++++++++-
tools/perf/util/parse-events.h | 2 +
tools/perf/util/probe-event.c | 24 +++++++++---
tools/perf/util/probe-event.h | 4 +-
tools/perf/util/probe-file.c | 85 ++++++++++++++++++++++++++++++++++++++++++
tools/perf/util/probe-file.h | 8 ++++
10 files changed, 217 insertions(+), 13 deletions(-)

diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 4a86aea..560cce3 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -322,7 +322,7 @@ static int perf_add_probe_events(struct perf_probe_event *pevs, int npevs)
if (ret < 0)
return ret;

- ret = convert_perf_probe_events(pevs, npevs);
+ ret = convert_perf_probe_events(pevs, npevs, false);
if (ret < 0)
goto out_cleanup;

@@ -387,7 +387,7 @@ static int del_perf_probe_caches(struct strfilter *filter)
return 0;
}

-static int perf_del_probe_events(struct strfilter *filter)
+static int perf_del_probe_events(struct strfilter *filter, bool show)
{
int ret, ret2, ufd = -1, kfd = -1;
char *str = strfilter__string(filter);
@@ -417,7 +417,8 @@ static int perf_del_probe_events(struct strfilter *filter)
ret = probe_file__get_events(kfd, filter, klist);
if (ret == 0) {
strlist__for_each(ent, klist)
- pr_info("Removed event: %s\n", ent->s);
+ if (show)
+ pr_info("Removed event: %s\n", ent->s);

ret = probe_file__del_strlist(kfd, klist);
if (ret < 0)
@@ -427,7 +428,8 @@ static int perf_del_probe_events(struct strfilter *filter)
ret2 = probe_file__get_events(ufd, filter, ulist);
if (ret2 == 0) {
strlist__for_each(ent, ulist)
- pr_info("Removed event: %s\n", ent->s);
+ if (show)
+ pr_info("Removed event: %s\n", ent->s);

ret2 = probe_file__del_strlist(ufd, ulist);
if (ret2 < 0)
@@ -452,6 +454,38 @@ out:
return ret;
}

+/*
+ * Record session for SDT events has ended. Delete the SDT events
+ * from uprobe_events file that were created initially.
+ */
+void remove_sdt_event_list(struct list_head *sdt_events)
+{
+ struct sdt_event_list *event;
+ struct strfilter *filter = NULL;
+ const char *err = NULL;
+ int ret = 0;
+
+ if (list_empty(sdt_events))
+ return;
+
+ list_for_each_entry(event, sdt_events, list) {
+ if (!filter) {
+ filter = strfilter__new(event->event_name, &err);
+ if (!filter)
+ goto free_list;
+ } else {
+ ret = strfilter__or(filter, event->event_name, &err);
+ }
+ }
+
+ ret = perf_del_probe_events(filter, false);
+ if (ret)
+ pr_err("Error in deleting the SDT list\n");
+
+free_list:
+ free_sdt_list(sdt_events);
+}
+
static int
__cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
{
@@ -635,7 +669,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
return ret;
#endif
case 'd':
- ret = perf_del_probe_events(params.filter);
+ ret = perf_del_probe_events(params.filter, true);
if (ret < 0) {
pr_err_with_code(" Error: Failed to delete events.", ret);
return ret;
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 515510e..7691a8e 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -34,6 +34,7 @@
#include "util/llvm-utils.h"
#include "util/bpf-loader.h"
#include "asm/bug.h"
+#include "util/probe-file.h"

#include <unistd.h>
#include <sched.h>
@@ -56,6 +57,7 @@ struct record {
bool no_buildid_cache_set;
bool buildid_all;
unsigned long long samples;
+ struct list_head sdt_event_list;
};

static int record__write(struct record *rec, void *bf, size_t size)
@@ -1077,6 +1079,26 @@ static struct record record = {
},
};

+void sdt_event_list__add(struct list_head *sdt_event_list)
+{
+ if (list_empty(sdt_event_list))
+ return;
+ list_splice(sdt_event_list, &record.sdt_event_list);
+}
+
+bool is_cmd_record(void)
+{
+ return (record.evlist != NULL);
+}
+
+static void sdt_event_list__remove(struct list_head *sdt_event_list)
+{
+ if (list_empty(sdt_event_list))
+ return;
+
+ remove_sdt_event_list(sdt_event_list);
+}
+
const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
"\n\t\t\t\tDefault: fp";

@@ -1231,6 +1253,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
if (rec->evlist == NULL)
return -ENOMEM;

+ INIT_LIST_HEAD(&rec->sdt_event_list);
perf_config(perf_record_config, rec);

argc = parse_options(argc, argv, record_options, record_usage,
@@ -1330,6 +1353,7 @@ out_symbol_exit:
perf_evlist__delete(rec->evlist);
symbol__exit();
auxtrace_record__free(rec->itr);
+ sdt_event_list__remove(&rec->sdt_event_list);
return err;
}

diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 0967ce6..72b5be2 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -612,7 +612,7 @@ int bpf__probe(struct bpf_object *obj)
goto out;
pev = &priv->pev;

- err = convert_perf_probe_events(pev, 1);
+ err = convert_perf_probe_events(pev, 1, false);
if (err < 0) {
pr_debug("bpf_probe: failed to convert perf probe events");
goto out;
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 10e0f06..889f93d 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -569,6 +569,7 @@ static void build_id_cache__add_sdt_cache(const char *sbuild_id,
probe_cache__commit(cache);
probe_cache__delete(cache);
}
+
#else
#define build_id_cache__add_sdt_cache(sbuild_id, realname) do { } while (0)
#endif
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4033dce..cc017a5 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1687,7 +1687,41 @@ int parse_events_option(const struct option *opt, const char *str,
{
struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
struct parse_events_error err = { .idx = 0, };
- int ret = parse_events(evlist, str, &err);
+ int ret = 0;
+
+ if (*str == '%' && is_cmd_record()) {
+ char *ptr = NULL;
+ struct list_head *sdt_list;
+
+ ptr = zalloc(strlen(str));
+ if (ptr == NULL)
+ return -ENOMEM;
+ strcpy(ptr, str);
+
+ sdt_list = zalloc(sizeof(*sdt_list));
+ if (!sdt_list) {
+ free(ptr);
+ pr_err("Error in sdt_list memory allocation\n");
+ return -ENOMEM;
+ }
+ INIT_LIST_HEAD(sdt_list);
+
+ /*
+ * If there is an error in this call, no need to free
+ * up sdt_list, its already free'ed up in the previous
+ * call. Free up 'ptr' though.
+ */
+ ret = add_sdt_event(ptr, sdt_list);
+ if (!ret)
+ sdt_event_list__add(sdt_list);
+ free(ptr);
+ }
+ if (!ret) {
+ ret = parse_events(evlist, str + 1, &err);
+ if (ret > 0)
+ ret = 0;
+ } else
+ ret = parse_events(evlist, str, &err);

if (ret)
parse_events_print_error(&err, str);
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index c08daa9..98503c6 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -189,4 +189,6 @@ int is_valid_tracepoint(const char *event_string);
int valid_event_mount(const char *eventfs);
char *parse_events_formats_error_string(char *additional_terms);

+void sdt_event_list__add(struct list_head *sdt_event_list);
+bool is_cmd_record(void);
#endif /* __PERF_PARSE_EVENTS_H */
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index bb9fc34..34f6bab 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -1144,7 +1144,7 @@ err:
return err;
}

-static int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev)
+int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev)
{
char *ptr;

@@ -3008,7 +3008,8 @@ out:
}

static int convert_to_probe_trace_events(struct perf_probe_event *pev,
- struct probe_trace_event **tevs)
+ struct probe_trace_event **tevs,
+ bool rec_enabled)
{
int ret;

@@ -3033,6 +3034,17 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
ret = find_probe_trace_events_from_cache(pev, tevs);
if (ret > 0)
return ret; /* Found in probe cache */
+ else if (rec_enabled) {
+ /*
+ * Can't find in probe cache, and, since,
+ * recording for sdt events is only enabled for
+ * cached events, we need to return throwing an error.
+ */
+ pr_err("Can't find %s in cache.\n", pev->event);
+ pr_err("Use \"perf list sdt\" to see the available events\n");
+ return -EINVAL;
+ }
+

if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) {
ret = find_probe_trace_events_from_map(pev, tevs);
@@ -3048,7 +3060,8 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
return find_probe_trace_events_from_map(pev, tevs);
}

-int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs)
+int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs,
+ bool rec_enabled)
{
int i, ret;

@@ -3058,7 +3071,8 @@ int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs)
if (!pevs[i].uprobes)
kprobe_blacklist__init();
/* Convert with or without debuginfo */
- ret = convert_to_probe_trace_events(&pevs[i], &pevs[i].tevs);
+ ret = convert_to_probe_trace_events(&pevs[i], &pevs[i].tevs,
+ rec_enabled);
if (ret < 0)
return ret;
pevs[i].ntevs = ret;
@@ -3106,7 +3120,7 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs)
if (ret < 0)
return ret;

- ret = convert_perf_probe_events(pevs, npevs);
+ ret = convert_perf_probe_events(pevs, npevs, false);
if (ret == 0)
ret = apply_perf_probe_events(pevs, npevs);

diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 39b5a35..98e954d 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -146,7 +146,8 @@ void line_range__clear(struct line_range *lr);
int line_range__init(struct line_range *lr);

int add_perf_probe_events(struct perf_probe_event *pevs, int npevs);
-int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs,
+ bool rec);
int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs);
void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs);
int del_perf_probe_events(struct strfilter *filter);
@@ -173,4 +174,5 @@ int e_snprintf(char *str, size_t size, const char *format, ...)
int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
struct perf_probe_arg *pvar);

+int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev);
#endif /*_PROBE_EVENT_H */
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index 896d645..917b67a 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -27,6 +27,7 @@
#include "probe-event.h"
#include "probe-file.h"
#include "session.h"
+#include "probe-finder.h"

#define MAX_CMDLEN 256

@@ -816,3 +817,87 @@ int probe_cache__show_all_caches(struct strfilter *filter)

return 0;
}
+
+void free_sdt_list(struct list_head *sdt_events)
+{
+ struct sdt_event_list *tmp, *ptr;
+
+ if (list_empty(sdt_events))
+ return;
+ list_for_each_entry_safe(tmp, ptr, sdt_events, list) {
+ list_del(&tmp->list);
+ free(tmp->event_name);
+ free(tmp);
+ }
+}
+
+/*
+ * Find the SDT event from the cache and if found add it/them
+ * to the uprobe_events file
+ */
+int add_sdt_event(char *event, struct list_head *sdt_events)
+{
+ struct perf_probe_event *pev;
+ int ret, i;
+ char *str = event + 1;
+ struct sdt_event_list *tmp;
+
+ pev = zalloc(sizeof(*pev));
+ if (!pev)
+ return -ENOMEM;
+
+ pev->sdt = true;
+ pev->uprobes = true;
+
+ /*
+ * Parse str to find the group name and event name of
+ * the sdt event.
+ */
+ ret = parse_perf_probe_event_name(&str, pev);
+ if (ret) {
+ pr_err("Error in parsing sdt event %s\n", str);
+ free(pev);
+ return ret;
+ }
+
+ probe_conf.max_probes = MAX_PROBES;
+ probe_conf.force_add = 1;
+
+ /*
+ * Find the sdt event from the cache, only cached SDT
+ * events can be directly recorded.
+ */
+ ret = convert_perf_probe_events(pev, 1, true);
+ if (ret < 0) {
+ goto free_pev;
+ } else if (!ret) {
+ ret = apply_perf_probe_events(pev, 1);
+ if (ret) {
+ pr_err("Error in adding SDT event : %s\n", event);
+ goto free_pev;
+ }
+ }
+
+ if (pev->ntevs) {
+ /* Add the event name to "sdt_events" */
+ for (i = 0; i < pev->ntevs; i++) {
+ tmp = zalloc(sizeof(*tmp));
+ if (!tmp) {
+ ret = -ENOMEM;
+ goto free_pev;
+ }
+ INIT_LIST_HEAD(&tmp->list);
+ tmp->event_name = strdup(pev->tevs[i].event);
+ if (!tmp->event_name) {
+ free_sdt_list(sdt_events);
+ ret = -ENOMEM;
+ goto free_pev;
+ }
+ list_add(&tmp->list, sdt_events);
+ }
+ }
+
+free_pev:
+ cleanup_perf_probe_events(pev, 1);
+ return 0;
+}
diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h
index a02bbbd..f120614 100644
--- a/tools/perf/util/probe-file.h
+++ b/tools/perf/util/probe-file.h
@@ -32,6 +32,11 @@ struct probe_cache {
struct list_head list;
};

+struct sdt_event_list {
+ char *event_name;
+ struct list_head list;
+};
+
int probe_cache_entry__get_event(struct probe_cache_entry *entry,
struct probe_trace_event **tevs);
#define for_each_probe_cache_entry(entry, pcache) \
@@ -51,4 +56,7 @@ struct probe_cache_entry *probe_cache__find(struct probe_cache *pcache,
struct probe_cache_entry *probe_cache__find_by_name(struct probe_cache *pcache,
const char *group, const char *event);
int probe_cache__show_all_caches(struct strfilter *filter);
+int add_sdt_event(char *event, struct list_head *sdt_event_list);
+void remove_sdt_event_list(struct list_head *sdt_event_list);
+void free_sdt_list(struct list_head *sdt_events);
#endif
--
1.9.3