Re: [RFC 3/6] perf annotate: Enable cross arch annotate

From: Arnaldo Carvalho de Melo
Date: Mon Jun 27 2016 - 13:16:39 EST


Em Fri, Jun 24, 2016 at 05:23:57PM +0530, Ravi Bangoria escreveu:
> Change current data structures and function to enable cross arch annotate
> and add support for x86 and arm instructions.
>
> Current implementation does not contain logic of recording on one arch
> and annotating on other. This remote annotate is partially possible with
> current implementation for x86 (or may be arm as well) only. But, to make
> remote annotation work properly, all architecture instruction tables need
> to be included in the perf binary. And while annotating, look for
> instruction table where perf.data was recorded.
>
> For arm, few instructions were defined under #if __arm__ which I've used
> as a table for arm. But I'm not sure whether instruction defined outside
> of that also contains arm instructions.
>
> Note:
> Here arch_ins is global var. And init_arch_ins will be called every
> time when we annotate symbol. So I still need to optimize this.
> May be make arch_ins per session. Please suggest best way to do it.
>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@xxxxxxxxxxxxxxxxxx>
> ---
> tools/perf/builtin-top.c | 2 +-
> tools/perf/ui/browsers/annotate.c | 5 +-
> tools/perf/ui/gtk/annotate.c | 6 +-
> tools/perf/util/annotate.c | 116 +++++++++++++++++++++++++++++---------
> tools/perf/util/annotate.h | 3 +-
> tools/perf/util/evsel.c | 7 +++
> tools/perf/util/evsel.h | 2 +
> 7 files changed, 108 insertions(+), 33 deletions(-)
>
> diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
> index 07fc792..d4fd947 100644
> --- a/tools/perf/builtin-top.c
> +++ b/tools/perf/builtin-top.c
> @@ -128,7 +128,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
> return err;
> }
>
> - err = symbol__annotate(sym, map, 0);
> + err = symbol__annotate(sym, map, 0, NULL);
> if (err == 0) {
> out_assign:
> top->sym_filter_entry = he;
> diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
> index 0e106bb..b65a979 100644
> --- a/tools/perf/ui/browsers/annotate.c
> +++ b/tools/perf/ui/browsers/annotate.c
> @@ -1031,6 +1031,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
> int ret = -1;
> int nr_pcnt = 1;
> size_t sizeof_bdl = sizeof(struct browser_disasm_line);
> + char *target_arch = NULL;
>
> if (sym == NULL)
> return -1;
> @@ -1052,7 +1053,9 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
> (nr_pcnt - 1);
> }
>
> - if (symbol__annotate(sym, map, sizeof_bdl) < 0) {
> + target_arch = perf_evsel__env_arch(evsel);
> +
> + if (symbol__annotate(sym, map, sizeof_bdl, target_arch) < 0) {
> ui__error("%s", ui_helpline__last_msg);
> goto out_free_offsets;
> }
> diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
> index 9c7ff8d..e468c1a 100644
> --- a/tools/perf/ui/gtk/annotate.c
> +++ b/tools/perf/ui/gtk/annotate.c
> @@ -4,7 +4,6 @@
> #include "util/evsel.h"
> #include "ui/helpline.h"
>
> -
> enum {
> ANN_COL__PERCENT,
> ANN_COL__OFFSET,
> @@ -162,11 +161,14 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map,
> GtkWidget *notebook;
> GtkWidget *scrolled_window;
> GtkWidget *tab_label;
> + char *target_arch = NULL;
>
> if (map->dso->annotate_warned)
> return -1;
>
> - if (symbol__annotate(sym, map, 0) < 0) {
> + target_arch = perf_evsel__env_arch(evsel);
> +
> + if (symbol__annotate(sym, map, 0, target_arch) < 0) {
> ui__error("%s", ui_helpline__current);
> return -1;
> }
> diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
> index b2c7ae4..e0dc7b2 100644
> --- a/tools/perf/util/annotate.c
> +++ b/tools/perf/util/annotate.c
> @@ -20,6 +20,8 @@
> #include <regex.h>
> #include <pthread.h>
> #include <linux/bitops.h>
> +#include <sys/utsname.h>
> +#include "../arch/common.h"
>
> const char *disassembler_style;
> const char *objdump_path;
> @@ -28,6 +30,13 @@ static regex_t file_lineno;
> static struct ins *ins__find(const char *name);
> static int disasm_line__parse(char *line, char **namep, char **rawp);
>
> +static struct arch_instructions {
> + const char *arch;
> + int nmemb;
> + struct ins *instructions;
> + struct ins *(*ins__find)(const char *);

Why do we need arch specific find functions? Why not pass the
instructions pointer to it, just like you did with ins__sort().

Probably it is not needed to be global, you just pick the right
instructions table + its ARRAY_SIZE and pass it around, again, like you
did in ins__sort().

- Arnaldo

> +} arch_ins;
> +
> static void ins__delete(struct ins_operands *ops)
> {
> if (ops == NULL)
> @@ -183,7 +192,7 @@ static int lock__parse(struct ins_operands *ops)
> if (disasm_line__parse(ops->raw, &name, &ops->locked.ops->raw) < 0)
> goto out_free_ops;
>
> - ops->locked.ins = ins__find(name);
> + ops->locked.ins = arch_ins.ins__find(name);
> free(name);
>
> if (ops->locked.ins == NULL)
> @@ -354,26 +363,12 @@ static struct ins_ops nop_ops = {
> .scnprintf = nop__scnprintf,
> };
>
> -static struct ins instructions[] = {
> +static struct ins instructions_x86[] = {
> { .name = "add", .ops = &mov_ops, },
> { .name = "addl", .ops = &mov_ops, },
> { .name = "addq", .ops = &mov_ops, },
> { .name = "addw", .ops = &mov_ops, },
> { .name = "and", .ops = &mov_ops, },
> -#ifdef __arm__
> - { .name = "b", .ops = &jump_ops, }, // might also be a call
> - { .name = "bcc", .ops = &jump_ops, },
> - { .name = "bcs", .ops = &jump_ops, },
> - { .name = "beq", .ops = &jump_ops, },
> - { .name = "bge", .ops = &jump_ops, },
> - { .name = "bgt", .ops = &jump_ops, },
> - { .name = "bhi", .ops = &jump_ops, },
> - { .name = "bl", .ops = &call_ops, },
> - { .name = "bls", .ops = &jump_ops, },
> - { .name = "blt", .ops = &jump_ops, },
> - { .name = "blx", .ops = &call_ops, },
> - { .name = "bne", .ops = &jump_ops, },
> -#endif
> { .name = "bts", .ops = &mov_ops, },
> { .name = "call", .ops = &call_ops, },
> { .name = "callq", .ops = &call_ops, },
> @@ -446,6 +441,21 @@ static struct ins instructions[] = {
> { .name = "xbeginq", .ops = &jump_ops, },
> };
>
> +static struct ins instructions_arm[] = {
> + { .name = "b", .ops = &jump_ops, }, /* might also be a call */
> + { .name = "bcc", .ops = &jump_ops, },
> + { .name = "bcs", .ops = &jump_ops, },
> + { .name = "beq", .ops = &jump_ops, },
> + { .name = "bge", .ops = &jump_ops, },
> + { .name = "bgt", .ops = &jump_ops, },
> + { .name = "bhi", .ops = &jump_ops, },
> + { .name = "bl", .ops = &call_ops, },
> + { .name = "bls", .ops = &jump_ops, },
> + { .name = "blt", .ops = &jump_ops, },
> + { .name = "blx", .ops = &call_ops, },
> + { .name = "bne", .ops = &jump_ops, },
> +};
> +
> static int ins__key_cmp(const void *name, const void *insp)
> {
> const struct ins *ins = insp;
> @@ -461,24 +471,69 @@ static int ins__cmp(const void *a, const void *b)
> return strcmp(ia->name, ib->name);
> }
>
> -static void ins__sort(void)
> +static void ins__sort(struct ins *instructions, int nmemb)
> {
> - const int nmemb = ARRAY_SIZE(instructions);
> -
> qsort(instructions, nmemb, sizeof(struct ins), ins__cmp);
> }
>
> static struct ins *ins__find(const char *name)
> {
> - const int nmemb = ARRAY_SIZE(instructions);
> - static bool sorted;
> + return bsearch(name, arch_ins.instructions, arch_ins.nmemb,
> + sizeof(struct ins), ins__key_cmp);
> +}
> +
> +static void __init_arch_ins(const char *arch, struct ins *instructions,
> + int size, struct ins *(*func)(const char *))
> +{
> + ins__sort(instructions, size);
> +
> + arch_ins.arch = arch;
> + arch_ins.nmemb = size;
> + arch_ins.instructions = instructions;
> + arch_ins.ins__find = func;
> +}
> +
> +static int _init_arch_ins(const char *norm_arch)
> +{
> + if (!strcmp(norm_arch, PERF_ARCH_X86))
> + __init_arch_ins(norm_arch, instructions_x86,
> + ARRAY_SIZE(instructions_x86),
> + ins__find);
> + else if (!strcmp(norm_arch, PERF_ARCH_ARM))
> + __init_arch_ins(norm_arch, instructions_arm,
> + ARRAY_SIZE(instructions_arm),
> + ins__find);
> + else
> + return -1;
> +
> + return 0;
> +}
> +
> +static int init_arch_ins(char *target_arch)
> +{
> + const char *norm_arch = NULL;
> + struct utsname uts;
>
> - if (!sorted) {
> - ins__sort();
> - sorted = true;
> + if (!target_arch) { /* Assume we are annotating locally. */
> + if (uname(&uts) < 0) {
> + pr_err("Can not annotate. Could not determine architecture.");
> + return -1;
> + }
> + target_arch = uts.machine;
> }
>
> - return bsearch(name, instructions, nmemb, sizeof(struct ins), ins__key_cmp);
> + norm_arch = normalize_arch(target_arch);
> +
> + /* retuen if already initialized. */
> + if (arch_ins.arch && !strcmp(norm_arch, arch_ins.arch))
> + return 0;
> +
> + if (_init_arch_ins(norm_arch)) {
> + pr_err("perf annotate not supported by %s arch\n", target_arch);
> + return -1;
> + }
> +
> + return 0;
> }
>
> int symbol__annotate_init(struct map *map __maybe_unused, struct symbol *sym)
> @@ -707,7 +762,7 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 ip)
>
> static void disasm_line__init_ins(struct disasm_line *dl)
> {
> - dl->ins = ins__find(dl->name);
> + dl->ins = arch_ins.ins__find(dl->name);
>
> if (dl->ins == NULL)
> return;
> @@ -1113,7 +1168,8 @@ static void delete_last_nop(struct symbol *sym)
> }
> }
>
> -int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize)
> +int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize,
> + char *target_arch)
> {
> struct dso *dso = map->dso;
> char *filename = dso__build_id_filename(dso, NULL, 0);
> @@ -1258,6 +1314,9 @@ fallback:
> goto out_remove_tmp;
> }
>
> + if (init_arch_ins(target_arch) < 0)
> + goto out_arch_err;
> +
> nline = 0;
> while (!feof(file)) {
> if (symbol__parse_objdump_line(sym, map, file, privsize,
> @@ -1269,6 +1328,7 @@ fallback:
> if (nline == 0)
> pr_err("No output from %s\n", command);
>
> +out_arch_err:
> /*
> * kallsyms does not have symbol sizes so there may a nop at the end.
> * Remove it.
> @@ -1655,7 +1715,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map,
> struct rb_root source_line = RB_ROOT;
> u64 len;
>
> - if (symbol__annotate(sym, map, 0) < 0)
> + if (symbol__annotate(sym, map, 0, perf_evsel__env_arch(evsel)) < 0)
> return -1;
>
> len = symbol__size(sym);
> diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
> index 82f3781..f7b669e 100644
> --- a/tools/perf/util/annotate.h
> +++ b/tools/perf/util/annotate.h
> @@ -154,7 +154,8 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr);
> int symbol__alloc_hist(struct symbol *sym);
> void symbol__annotate_zero_histograms(struct symbol *sym);
>
> -int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize);
> +int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize,
> + char *target_arch);
>
> int symbol__annotate_init(struct map *map, struct symbol *sym);
> int symbol__annotate_printf(struct symbol *sym, struct map *map,
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index 1d8f2bb..0fea724 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -2422,3 +2422,10 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
> err, strerror_r(err, sbuf, sizeof(sbuf)),
> perf_evsel__name(evsel));
> }
> +
> +char *perf_evsel__env_arch(struct perf_evsel *evsel)
> +{
> + if (evsel && evsel->evlist && evsel->evlist->env)
> + return evsel->evlist->env->arch;
> + return NULL;
> +}
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index 828ddd1..86fed7a 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -435,4 +435,6 @@ typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *);
> int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
> attr__fprintf_f attr__fprintf, void *priv);
>
> +char *perf_evsel__env_arch(struct perf_evsel *evsel);
> +
> #endif /* __PERF_EVSEL_H */
> --
> 2.5.5