[PATCH v2] perf scripting python: expose symbol offset and source information

From: Eelco Chaudron
Date: Tue Feb 22 2022 - 10:11:31 EST


This change adds the symbol offset to the data exported for each
call-chain entry. This can not be calculated from the script and
only the ip value, and no related mmap information.

In addition, also export the source file and line information, if
available, to avoid an external lookup if this information is needed.

Signed-off-by: Eelco Chaudron <echaudro@xxxxxxxxxx>
---
v2:
- Fixed small code nit
- Included new features in auto generated scripts

.../util/scripting-engines/trace-event-python.c | 49 +++++++++++++++-----
1 file changed, 36 insertions(+), 13 deletions(-)

diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index e752e1f4a5f0..86a9c8614231 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -392,6 +392,18 @@ static const char *get_dsoname(struct map *map)
return dsoname;
}

+static unsigned long get_offset(struct symbol *sym, struct addr_location *al)
+{
+ unsigned long offset;
+
+ if (al->addr < sym->end)
+ offset = al->addr - sym->start;
+ else
+ offset = al->addr - al->map->start - sym->start;
+
+ return offset;
+}
+
static PyObject *python_process_callchain(struct perf_sample *sample,
struct evsel *evsel,
struct addr_location *al)
@@ -443,6 +455,25 @@ static PyObject *python_process_callchain(struct perf_sample *sample,
_PyUnicode_FromStringAndSize(node->ms.sym->name,
node->ms.sym->namelen));
pydict_set_item_string_decref(pyelem, "sym", pysym);
+
+ if (node->ms.map) {
+ struct map *map = node->ms.map;
+ struct addr_location node_al;
+ unsigned long offset;
+
+ node_al.addr = map->map_ip(map, node->ip);
+ node_al.map = map;
+ offset = get_offset(node->ms.sym, &node_al);
+
+ pydict_set_item_string_decref(
+ pyelem, "sym_off",
+ PyLong_FromUnsignedLongLong(offset));
+ }
+ if (node->srcline && strcmp(":0", node->srcline)) {
+ pydict_set_item_string_decref(
+ pyelem, "sym_srcline",
+ _PyUnicode_FromString(node->srcline));
+ }
}

if (node->ms.map) {
@@ -520,18 +551,6 @@ static PyObject *python_process_brstack(struct perf_sample *sample,
return pylist;
}

-static unsigned long get_offset(struct symbol *sym, struct addr_location *al)
-{
- unsigned long offset;
-
- if (al->addr < sym->end)
- offset = al->addr - sym->start;
- else
- offset = al->addr - al->map->start - sym->start;
-
- return offset;
-}
-
static int get_symoff(struct symbol *sym, struct addr_location *al,
bool print_off, char *bf, int size)
{
@@ -2073,7 +2092,11 @@ static int python_generate_script(struct tep_handle *pevent, const char *outfile

fprintf(ofp, "\t\tfor node in common_callchain:");
fprintf(ofp, "\n\t\t\tif 'sym' in node:");
- fprintf(ofp, "\n\t\t\t\tprint(\"\\t[%%x] %%s\" %% (node['ip'], node['sym']['name']))");
+ fprintf(ofp, "\n\t\t\t\tprint(\"\t[%%x] %%s%%s%%s%%s\" %% (");
+ fprintf(ofp, "\n\t\t\t\t\tnode['ip'], node['sym']['name'],");
+ fprintf(ofp, "\n\t\t\t\t\t\"+0x{:x}\".format(node['sym_off']) if 'sym_off' in node else \"\",");
+ fprintf(ofp, "\n\t\t\t\t\t\" ({})\".format(node['dso']) if 'dso' in node else \"\",");
+ fprintf(ofp, "\n\t\t\t\t\t\" \" + node['sym_srcline'] if 'sym_srcline' in node else \"\"))");
fprintf(ofp, "\n\t\t\telse:");
fprintf(ofp, "\n\t\t\t\tprint(\"\t[%%x]\" %% (node['ip']))\n\n");
fprintf(ofp, "\t\tprint()\n\n");