[PATCH] perf data: Add JSON export

From: Nicholas Fraser
Date: Tue Apr 06 2021 - 05:35:52 EST


This adds a feature to export perf data to JSON. It uses a minimal
inline JSON encoding, no external dependencies. Currently it only
outputs some headers and sample metadata but it's easily extensible.

Use it like this:

perf data convert --to-json out.json

Signed-off-by: Nicholas Fraser <nfraser@xxxxxxxxxxxxxxx>
---
tools/perf/Documentation/perf-data.txt | 5 +-
tools/perf/builtin-data.c | 26 +-
tools/perf/util/Build | 1 +
tools/perf/util/data-convert-bt.c | 2 +-
tools/perf/util/data-convert-bt.h | 11 -
tools/perf/util/data-convert-json.c | 384 +++++++++++++++++++++++++
tools/perf/util/data-convert.h | 10 +
7 files changed, 418 insertions(+), 21 deletions(-)
delete mode 100644 tools/perf/util/data-convert-bt.h
create mode 100644 tools/perf/util/data-convert-json.c

diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt
index 726b9bc9e1a7..417bf17e265c 100644
--- a/tools/perf/Documentation/perf-data.txt
+++ b/tools/perf/Documentation/perf-data.txt
@@ -17,7 +17,7 @@ Data file related processing.
COMMANDS
--------
convert::
- Converts perf data file into another format (only CTF [1] format is support by now).
+ Converts perf data file into another format.
It's possible to set data-convert debug variable to get debug messages from conversion,
like:
perf --debug data-convert data convert ...
@@ -27,6 +27,9 @@ OPTIONS for 'convert'
--to-ctf::
Triggers the CTF conversion, specify the path of CTF data directory.

+--to-json::
+ Triggers JSON conversion. Specify the JSON filename to output.
+
--tod::
Convert time to wall clock time.

diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c
index 8d23b8d6ee8e..15ca23675ef0 100644
--- a/tools/perf/builtin-data.c
+++ b/tools/perf/builtin-data.c
@@ -7,7 +7,6 @@
#include "debug.h"
#include <subcmd/parse-options.h>
#include "data-convert.h"
-#include "data-convert-bt.h"

typedef int (*data_cmd_fn_t)(int argc, const char **argv);

@@ -55,7 +54,8 @@ static const char * const data_convert_usage[] = {

static int cmd_data_convert(int argc, const char **argv)
{
- const char *to_ctf = NULL;
+ const char *to_json = NULL;
+ const char *to_ctf = NULL;
struct perf_data_convert_opts opts = {
.force = false,
.all = false,
@@ -63,6 +63,7 @@ static int cmd_data_convert(int argc, const char **argv)
const struct option options[] = {
OPT_INCR('v', "verbose", &verbose, "be more verbose"),
OPT_STRING('i', "input", &input_name, "file", "input file name"),
+ OPT_STRING(0, "to-json", &to_json, NULL, "Convert to JSON format"),
#ifdef HAVE_LIBBABELTRACE_SUPPORT
OPT_STRING(0, "to-ctf", &to_ctf, NULL, "Convert to CTF format"),
OPT_BOOLEAN(0, "tod", &opts.tod, "Convert time to wall clock time"),
@@ -72,11 +73,6 @@ static int cmd_data_convert(int argc, const char **argv)
OPT_END()
};

-#ifndef HAVE_LIBBABELTRACE_SUPPORT
- pr_err("No conversion support compiled in. perf should be compiled with environment variables LIBBABELTRACE=1 and LIBBABELTRACE_DIR=/path/to/libbabeltrace/\n");
- return -1;
-#endif
-
argc = parse_options(argc, argv, options,
data_convert_usage, 0);
if (argc) {
@@ -84,11 +80,25 @@ static int cmd_data_convert(int argc, const char **argv)
return -1;
}

+ if (to_json && to_ctf) {
+ pr_err("You cannot specify both --to-ctf and --to-json.\n");
+ return -1;
+ }
+ if (!to_json && !to_ctf) {
+ pr_err("You must specify one of --to-ctf or --to-json.\n");
+ return -1;
+ }
+
+ if (to_json)
+ return bt_convert__perf2json(input_name, to_json, &opts);
+
if (to_ctf) {
#ifdef HAVE_LIBBABELTRACE_SUPPORT
return bt_convert__perf2ctf(input_name, to_ctf, &opts);
#else
- pr_err("The libbabeltrace support is not compiled in.\n");
+ pr_err("The libbabeltrace support is not compiled in. perf should be "
+ "compiled with environment variables LIBBABELTRACE=1 and "
+ "LIBBABELTRACE_DIR=/path/to/libbabeltrace/\n");
return -1;
#endif
}
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index e2563d0154eb..de9ac182b25a 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -163,6 +163,7 @@ perf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o
perf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o

perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o
+perf-y += data-convert-json.o

perf-y += scripting-engines/

diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 27c5fef9ad54..803102207a8b 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -21,7 +21,7 @@
#include <babeltrace/ctf/events.h>
#include <traceevent/event-parse.h>
#include "asm/bug.h"
-#include "data-convert-bt.h"
+#include "data-convert.h"
#include "session.h"
#include "debug.h"
#include "tool.h"
diff --git a/tools/perf/util/data-convert-bt.h b/tools/perf/util/data-convert-bt.h
deleted file mode 100644
index 821674d63c4e..000000000000
--- a/tools/perf/util/data-convert-bt.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __DATA_CONVERT_BT_H
-#define __DATA_CONVERT_BT_H
-#include "data-convert.h"
-#ifdef HAVE_LIBBABELTRACE_SUPPORT
-
-int bt_convert__perf2ctf(const char *input_name, const char *to_ctf,
- struct perf_data_convert_opts *opts);
-
-#endif /* HAVE_LIBBABELTRACE_SUPPORT */
-#endif /* __DATA_CONVERT_BT_H */
diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c
new file mode 100644
index 000000000000..b57c48d35503
--- /dev/null
+++ b/tools/perf/util/data-convert-json.c
@@ -0,0 +1,384 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * JSON export.
+ *
+ * Copyright (C) 2021, CodeWeavers Inc. <nfraser@xxxxxxxxxxxxxxx>
+ */
+
+#include "data-convert.h"
+
+#include <fcntl.h>
+#include <inttypes.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "linux/compiler.h"
+#include "linux/err.h"
+#include "util/auxtrace.h"
+#include "util/debug.h"
+#include "util/dso.h"
+#include "util/event.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/header.h"
+#include "util/map.h"
+#include "util/session.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/tool.h"
+
+struct convert_json {
+ struct perf_tool tool;
+ FILE *out;
+ bool first;
+ u64 events_count;
+};
+
+// Outputs a JSON-encoded string surrounded by quotes with characters escaped.
+static void output_json_string(FILE *out, const char *s)
+{
+ fputc('"', out);
+ while (*s) {
+ switch (*s) {
+
+ // required escapes with special forms as per RFC 8259
+ case '"': fputs("\\\"", out); break;
+ case '\\': fputs("\\\\", out); break;
+ case '\b': fputs("\\b", out); break;
+ case '\f': fputs("\\f", out); break;
+ case '\n': fputs("\\n", out); break;
+ case '\r': fputs("\\r", out); break;
+ case '\t': fputs("\\t", out); break;
+
+ default:
+ // all other control characters must be escaped by hex code
+ if (*s <= 0x1f)
+ fprintf(out, "\\u%04x", *s);
+ else
+ fputc(*s, out);
+ break;
+ }
+
+ ++s;
+ }
+ fputc('"', out);
+}
+
+// Outputs an optional comma, newline and indentation to delimit a new value
+// from the previous one in a JSON object or array.
+static void output_json_delimiters(FILE *out, bool comma, int depth)
+{
+ int i;
+
+ if (comma)
+ fputc(',', out);
+ fputc('\n', out);
+ for (i = 0; i < depth; ++i)
+ fputc('\t', out);
+}
+
+// Outputs a printf format string (with delimiter) as a JSON value.
+__printf(4, 5)
+static void output_json_format(FILE *out, bool comma, int depth, const char *format, ...)
+{
+ va_list args;
+
+ output_json_delimiters(out, comma, depth);
+ va_start(args, format);
+ vfprintf(out, format, args);
+ va_end(args);
+}
+
+// Outputs a JSON key-value pair where the value is a string.
+static void output_json_key_string(FILE *out, bool comma, int depth,
+ const char *key, const char *value)
+{
+ output_json_delimiters(out, comma, depth);
+ output_json_string(out, key);
+ fputs(": ", out);
+ output_json_string(out, value);
+}
+
+// Outputs a JSON key-value pair where the value is a printf format string.
+__(printf, 5, 6)
+static void output_json_key_format(FILE *out, bool comma, int depth,
+ const char *key, const char *format, ...)
+{
+ va_list args;
+
+ output_json_delimiters(out, comma, depth);
+ output_json_string(out, key);
+ fputs(": ", out);
+ va_start(args, format);
+ vfprintf(out, format, args);
+ va_end(args);
+}
+
+static void output_sample_callchain_entry(struct perf_tool *tool,
+ u64 ip, struct addr_location *al)
+{
+ struct convert_json *c = container_of(tool, struct convert_json, tool);
+ FILE *out = c->out;
+
+ output_json_format(out, false, 4, "{");
+ output_json_key_format(out, false, 5, "ip", "\"0x%" PRIx64 "\"", ip);
+
+ if (al && al->sym && al->sym->name && strlen(al->sym->name) > 0) {
+ fputc(',', out);
+ output_json_key_string(out, false, 5, "symbol", al->sym->name);
+
+ if (al->map && al->map->dso) {
+ const char *dso = al->map->dso->short_name;
+
+ if (dso && strlen(dso) > 0) {
+ fputc(',', out);
+ output_json_key_string(out, false, 5, "dso", dso);
+ }
+ }
+ }
+
+ output_json_format(out, false, 4, "}");
+}
+
+static int process_sample_event(struct perf_tool *tool,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample,
+ struct evsel *evsel __maybe_unused,
+ struct machine *machine)
+{
+ struct convert_json *c = container_of(tool, struct convert_json, tool);
+ FILE *out = c->out;
+ struct addr_location al, tal;
+ u8 cpumode = PERF_RECORD_MISC_USER;
+
+ if (machine__resolve(machine, &al, sample) < 0) {
+ pr_err("Sample resolution failed!\n");
+ return -1;
+ }
+
+ ++c->events_count;
+
+ if (c->first)
+ c->first = false;
+ else
+ fputc(',', out);
+ output_json_format(out, false, 2, "{");
+
+ output_json_key_format(out, false, 3, "timestamp", "%" PRIi64, sample->time);
+ output_json_key_format(out, true, 3, "pid", "%i", al.thread->pid_);
+ output_json_key_format(out, true, 3, "tid", "%i", al.thread->tid);
+
+ if (al.thread->cpu >= 0)
+ output_json_key_format(out, true, 3, "cpu", "%i", al.thread->cpu);
+
+ output_json_key_string(out, true, 3, "comm", thread__comm_str(al.thread));
+
+ output_json_key_format(out, true, 3, "callchain", "[");
+ if (sample->callchain) {
+ unsigned int i;
+ bool ok;
+ bool first_callchain = true;
+
+ for (i = 0; i < sample->callchain->nr; ++i) {
+ u64 ip = sample->callchain->ips[i];
+
+ if (ip >= PERF_CONTEXT_MAX) {
+ switch (ip) {
+ case PERF_CONTEXT_HV:
+ cpumode = PERF_RECORD_MISC_HYPERVISOR;
+ break;
+ case PERF_CONTEXT_KERNEL:
+ cpumode = PERF_RECORD_MISC_KERNEL;
+ break;
+ case PERF_CONTEXT_USER:
+ cpumode = PERF_RECORD_MISC_USER;
+ break;
+ default:
+ pr_debug("invalid callchain context: %"
+ PRId64 "\n", (s64) ip);
+ break;
+ }
+ continue;
+ }
+
+ if (first_callchain)
+ first_callchain = false;
+ else
+ fputc(',', out);
+
+ ok = thread__find_symbol(al.thread, cpumode, ip, &tal);
+ output_sample_callchain_entry(tool, ip, ok ? &tal : NULL);
+ }
+ } else {
+ output_sample_callchain_entry(tool, sample->ip, &al);
+ }
+ output_json_format(out, false, 3, "]");
+
+ output_json_format(out, false, 2, "}");
+ return 0;
+}
+
+static void output_headers(struct perf_session *session, struct convert_json *c)
+{
+ struct stat st;
+ struct perf_header *header = &session->header;
+ int ret;
+ int fd = perf_data__fd(session->data);
+ int i;
+ FILE *out = c->out;
+
+ output_json_key_format(out, false, 2, "header-version", "%u", header->version);
+
+ ret = fstat(fd, &st);
+ if (ret >= 0) {
+ time_t stctime = st.st_mtime;
+ char buf[256];
+
+ strftime(buf, sizeof(buf), "%FT%TZ", gmtime(&stctime));
+ output_json_key_string(out, true, 2, "captured-on", buf);
+ } else {
+ pr_debug("Failed to get mtime of source file, not writing captured-on");
+ }
+
+ output_json_key_format(out, true, 2, "data-offset", "%" PRIu64, header->data_offset);
+ output_json_key_format(out, true, 2, "data-size", "%" PRIu64, header->data_size);
+ output_json_key_format(out, true, 2, "feat-offset", "%" PRIu64, header->feat_offset);
+
+ output_json_key_string(out, true, 2, "hostname", header->env.hostname);
+ output_json_key_string(out, true, 2, "os-release", header->env.os_release);
+ output_json_key_string(out, true, 2, "arch", header->env.arch);
+
+ output_json_key_string(out, true, 2, "cpu-desc", header->env.cpu_desc);
+ output_json_key_string(out, true, 2, "cpuid", header->env.cpuid);
+ output_json_key_format(out, true, 2, "nrcpus-online", "%u", header->env.nr_cpus_online);
+ output_json_key_format(out, true, 2, "nrcpus-avail", "%u", header->env.nr_cpus_avail);
+
+ if (header->env.clock.enabled) {
+ output_json_key_format(out, true, 2, "clockid",
+ "%u", header->env.clock.clockid);
+ output_json_key_format(out, true, 2, "clock-time",
+ "%" PRIu64, header->env.clock.clockid_ns);
+ output_json_key_format(out, true, 2, "real-time",
+ "%" PRIu64, header->env.clock.tod_ns);
+ }
+
+ output_json_key_string(out, true, 2, "perf-version", header->env.version);
+
+ output_json_key_format(out, true, 2, "cmdline", "[");
+ for (i = 0; i < header->env.nr_cmdline; i++) {
+ output_json_delimiters(out, i != 0, 3);
+ output_json_string(c->out, header->env.cmdline_argv[i]);
+ }
+ output_json_format(out, false, 2, "]");
+}
+
+int bt_convert__perf2json(const char *input_name, const char *output_name,
+ struct perf_data_convert_opts *opts __maybe_unused)
+{
+ struct perf_session *session;
+ int fd;
+ int ret = -1;
+
+ struct convert_json c = {
+ .tool = {
+ .sample = process_sample_event,
+ .mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
+ .comm = perf_event__process_comm,
+ .namespaces = perf_event__process_namespaces,
+ .cgroup = perf_event__process_cgroup,
+ .exit = perf_event__process_exit,
+ .fork = perf_event__process_fork,
+ .lost = perf_event__process_lost,
+ .tracing_data = perf_event__process_tracing_data,
+ .build_id = perf_event__process_build_id,
+ .id_index = perf_event__process_id_index,
+ .auxtrace_info = perf_event__process_auxtrace_info,
+ .auxtrace = perf_event__process_auxtrace,
+ .event_update = perf_event__process_event_update,
+ .ordered_events = true,
+ .ordering_requires_timestamps = true,
+ },
+ .first = true,
+ .events_count = 0,
+ };
+
+ struct perf_data data = {
+ .mode = PERF_DATA_MODE_READ,
+ .path = input_name,
+ .force = opts->force,
+ };
+
+ if (opts->all) {
+ pr_err("--all is currently unsupported for JSON output.\n");
+ goto err;
+ }
+ if (opts->tod) {
+ pr_err("--tod is currently unsupported for JSON output.\n");
+ goto err;
+ }
+
+ fd = open(output_name, O_CREAT | O_WRONLY | (opts->force ? O_TRUNC : O_EXCL), 0666);
+ if (fd == -1) {
+ if (errno == EEXIST)
+ pr_err("Output file exists. Use --force to overwrite it.\n");
+ else
+ pr_err("Error opening output file!\n");
+ goto err;
+ }
+
+ c.out = fdopen(fd, "w");
+ if (!c.out) {
+ fprintf(stderr, "Error opening output file!\n");
+ close(fd);
+ goto err;
+ }
+
+ session = perf_session__new(&data, false, &c.tool);
+ if (IS_ERR(session)) {
+ fprintf(stderr, "Error creating perf session!\n");
+ goto err_fclose;
+ }
+
+ if (symbol__init(&session->header.env) < 0) {
+ fprintf(stderr, "Symbol init error!\n");
+ goto err_session_delete;
+ }
+
+ // The opening brace is printed manually because it isn't delimited from a
+ // previous value (i.e. we don't want a leading newline)
+ fputc('{', c.out);
+
+ // Version number for future-proofing. Most additions should be able to be
+ // done in a backwards-compatible way so this should only need to be bumped
+ // if some major breaking change must be made.
+ output_json_format(c.out, false, 1, "\"linux-perf-json-version\": 1");
+
+ // Output headers
+ output_json_format(c.out, true, 1, "\"headers\": {");
+ output_headers(session, &c);
+ output_json_format(c.out, false, 1, "}");
+
+ // Output samples
+ output_json_format(c.out, true, 1, "\"samples\": [");
+ perf_session__process_events(session);
+ output_json_format(c.out, false, 1, "]");
+ output_json_format(c.out, false, 0, "}");
+ fputc('\n', c.out);
+
+ fprintf(stderr,
+ "[ perf data convert: Converted '%s' into JSON data '%s' ]\n",
+ data.path, output_name);
+
+ fprintf(stderr,
+ "[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples) ]\n",
+ (ftell(c.out)) / 1024.0 / 1024.0, c.events_count);
+
+ ret = 0;
+err_session_delete:
+ perf_session__delete(session);
+err_fclose:
+ fclose(c.out);
+err:
+ return ret;
+}
diff --git a/tools/perf/util/data-convert.h b/tools/perf/util/data-convert.h
index feab5f114e37..1b4c5f598415 100644
--- a/tools/perf/util/data-convert.h
+++ b/tools/perf/util/data-convert.h
@@ -2,10 +2,20 @@
#ifndef __DATA_CONVERT_H
#define __DATA_CONVERT_H

+#include <stdbool.h>
+
struct perf_data_convert_opts {
bool force;
bool all;
bool tod;
};

+#ifdef HAVE_LIBBABELTRACE_SUPPORT
+int bt_convert__perf2ctf(const char *input_name, const char *to_ctf,
+ struct perf_data_convert_opts *opts);
+#endif /* HAVE_LIBBABELTRACE_SUPPORT */
+
+int bt_convert__perf2json(const char *input_name, const char *to_ctf,
+ struct perf_data_convert_opts *opts);
+
#endif /* __DATA_CONVERT_H */
--
2.31.1