Re: [PATCH] perf tools: Add OCaml demangling

From: Arnaldo Carvalho de Melo
Date: Wed Feb 17 2021 - 13:15:35 EST


Em Tue, Feb 16, 2021 at 04:47:30PM +0900, Namhyung Kim escreveu:
> Hello,
>
> (+ Cc: LKML)
>
> On Thu, Feb 4, 2021 at 6:22 AM Fabian Hemmer <copy@xxxxxxx> wrote:
> >
> > Detect symbols generated by the OCaml compiler based on their prefix.
> >
> > Demangle OCaml symbols, returning a newly allocated string (like the
> > existing Java demangling functionality).
> >
> > Move a helper function (hex) from tests/code-reading.c to util/string.c
> >
> > To test:
> >
> > echo 'Printf.printf "%d\n" (Random.int 42)' > test.ml
> > perf record ocamlopt.opt test.ml
> > perf report -d ocamlopt.opt
> >
> > Signed-off-by: Fabian Hemmer <copy@xxxxxxx>
>
> Acked-by: Namhyung Kim <namhyung@xxxxxxxxxx>

This should have been a series of patches, anyway, applying :-\

Thanks.

- Arnaldo

> Thanks,
> Namhyung
>
>
> > ---
> > tools/perf/tests/Build | 1 +
> > tools/perf/tests/builtin-test.c | 4 ++
> > tools/perf/tests/code-reading.c | 10 +---
> > tools/perf/tests/demangle-ocaml-test.c | 43 ++++++++++++++
> > tools/perf/tests/tests.h | 1 +
> > tools/perf/util/Build | 1 +
> > tools/perf/util/demangle-ocaml.c | 80 ++++++++++++++++++++++++++
> > tools/perf/util/demangle-ocaml.h | 7 +++
> > tools/perf/util/string.c | 9 +++
> > tools/perf/util/string2.h | 2 +
> > tools/perf/util/symbol-elf.c | 9 ++-
> > 11 files changed, 156 insertions(+), 11 deletions(-)
> > create mode 100644 tools/perf/tests/demangle-ocaml-test.c
> > create mode 100644 tools/perf/util/demangle-ocaml.c
> > create mode 100644 tools/perf/util/demangle-ocaml.h
> >
> > diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
> > index aa4dc4f5abde..650aec19d490 100644
> > --- a/tools/perf/tests/Build
> > +++ b/tools/perf/tests/Build
> > @@ -58,6 +58,7 @@ perf-y += time-utils-test.o
> > perf-y += genelf.o
> > perf-y += api-io.o
> > perf-y += demangle-java-test.o
> > +perf-y += demangle-ocaml-test.o
> > perf-y += pfm.o
> > perf-y += parse-metric.o
> > perf-y += pe-file-parsing.o
> > diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
> > index 7273823d0d02..c4b888f18e9c 100644
> > --- a/tools/perf/tests/builtin-test.c
> > +++ b/tools/perf/tests/builtin-test.c
> > @@ -338,6 +338,10 @@ static struct test generic_tests[] = {
> > .desc = "Demangle Java",
> > .func = test__demangle_java,
> > },
> > + {
> > + .desc = "Demangle OCaml",
> > + .func = test__demangle_ocaml,
> > + },
> > {
> > .desc = "Parse and process metrics",
> > .func = test__parse_metric,
> > diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
> > index 7c098d49c77e..280f0348a09c 100644
> > --- a/tools/perf/tests/code-reading.c
> > +++ b/tools/perf/tests/code-reading.c
> > @@ -26,6 +26,7 @@
> > #include "event.h"
> > #include "record.h"
> > #include "util/mmap.h"
> > +#include "util/string2.h"
> > #include "util/synthetic-events.h"
> > #include "thread.h"
> >
> > @@ -41,15 +42,6 @@ struct state {
> > size_t done_cnt;
> > };
> >
> > -static unsigned int hex(char c)
> > -{
> > - if (c >= '0' && c <= '9')
> > - return c - '0';
> > - if (c >= 'a' && c <= 'f')
> > - return c - 'a' + 10;
> > - return c - 'A' + 10;
> > -}
> > -
> > static size_t read_objdump_chunk(const char **line, unsigned char **buf,
> > size_t *buf_len)
> > {
> > diff --git a/tools/perf/tests/demangle-ocaml-test.c b/tools/perf/tests/demangle-ocaml-test.c
> > new file mode 100644
> > index 000000000000..a273ed5163d7
> > --- /dev/null
> > +++ b/tools/perf/tests/demangle-ocaml-test.c
> > @@ -0,0 +1,43 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +#include <string.h>
> > +#include <stdlib.h>
> > +#include <stdio.h>
> > +#include "tests.h"
> > +#include "session.h"
> > +#include "debug.h"
> > +#include "demangle-ocaml.h"
> > +
> > +int test__demangle_ocaml(struct test *test __maybe_unused, int subtest __maybe_unused)
> > +{
> > + int ret = TEST_OK;
> > + char *buf = NULL;
> > + size_t i;
> > +
> > + struct {
> > + const char *mangled, *demangled;
> > + } test_cases[] = {
> > + { "main",
> > + NULL },
> > + { "camlStdlib__array__map_154",
> > + "Stdlib.array.map" },
> > + { "camlStdlib__anon_fn$5bstdlib$2eml$3a334$2c0$2d$2d54$5d_1453",
> > + "Stdlib.anon_fn[stdlib.ml:334,0--54]" },
> > + { "camlStdlib__bytes__$2b$2b_2205",
> > + "Stdlib.bytes.++" },
> > + };
> > +
> > + for (i = 0; i < sizeof(test_cases) / sizeof(test_cases[0]); i++) {
> > + buf = ocaml_demangle_sym(test_cases[i].mangled);
> > + if ((buf == NULL && test_cases[i].demangled != NULL)
> > + || (buf != NULL && test_cases[i].demangled == NULL)
> > + || (buf != NULL && strcmp(buf, test_cases[i].demangled))) {
> > + pr_debug("FAILED: %s: %s != %s\n", test_cases[i].mangled,
> > + buf == NULL ? "(null)" : buf,
> > + test_cases[i].demangled == NULL ? "(null)" : test_cases[i].demangled);
> > + ret = TEST_FAIL;
> > + }
> > + free(buf);
> > + }
> > +
> > + return ret;
> > +}
> > diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
> > index 8e24a61fe4c2..b85f005308a3 100644
> > --- a/tools/perf/tests/tests.h
> > +++ b/tools/perf/tests/tests.h
> > @@ -119,6 +119,7 @@ int test__time_utils(struct test *t, int subtest);
> > int test__jit_write_elf(struct test *test, int subtest);
> > int test__api_io(struct test *test, int subtest);
> > int test__demangle_java(struct test *test, int subtest);
> > +int test__demangle_ocaml(struct test *test, int subtest);
> > int test__pfm(struct test *test, int subtest);
> > const char *test__pfm_subtest_get_desc(int subtest);
> > int test__pfm_subtest_get_nr(void);
> > diff --git a/tools/perf/util/Build b/tools/perf/util/Build
> > index e2563d0154eb..34995e1fef8f 100644
> > --- a/tools/perf/util/Build
> > +++ b/tools/perf/util/Build
> > @@ -172,6 +172,7 @@ perf-$(CONFIG_ZSTD) += zstd.o
> >
> > perf-$(CONFIG_LIBCAP) += cap.o
> >
> > +perf-y += demangle-ocaml.o
> > perf-y += demangle-java.o
> > perf-y += demangle-rust.o
> >
> > diff --git a/tools/perf/util/demangle-ocaml.c b/tools/perf/util/demangle-ocaml.c
> > new file mode 100644
> > index 000000000000..3df14e67c622
> > --- /dev/null
> > +++ b/tools/perf/util/demangle-ocaml.c
> > @@ -0,0 +1,80 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +#include <string.h>
> > +#include <stdlib.h>
> > +#include "util/string2.h"
> > +
> > +#include "demangle-ocaml.h"
> > +
> > +#include <linux/ctype.h>
> > +
> > +static const char *caml_prefix = "caml";
> > +static const size_t caml_prefix_len = 4;
> > +
> > +/* mangled OCaml symbols start with "caml" followed by an upper-case letter */
> > +static bool
> > +ocaml_is_mangled(const char *sym)
> > +{
> > + return 0 == strncmp(sym, caml_prefix, caml_prefix_len)
> > + && isupper(sym[caml_prefix_len]);
> > +}
> > +
> > +/*
> > + * input:
> > + * sym: a symbol which may have been mangled by the OCaml compiler
> > + * return:
> > + * if the input doesn't look like a mangled OCaml symbol, NULL is returned
> > + * otherwise, a newly allocated string containing the demangled symbol is returned
> > + */
> > +char *
> > +ocaml_demangle_sym(const char *sym)
> > +{
> > + char *result;
> > + int j = 0;
> > + int i;
> > + int len;
> > +
> > + if (!ocaml_is_mangled(sym)) {
> > + return NULL;
> > + }
> > +
> > + len = strlen(sym);
> > +
> > + /* the demangled symbol is always smaller than the mangled symbol */
> > + result = malloc(len + 1);
> > + if (!result)
> > + return NULL;
> > +
> > + /* skip "caml" prefix */
> > + i = caml_prefix_len;
> > +
> > + while (i < len) {
> > + if (sym[i] == '_' && sym[i + 1] == '_') {
> > + /* "__" -> "." */
> > + result[j++] = '.';
> > + i += 2;
> > + }
> > + else if (sym[i] == '$' && isxdigit(sym[i + 1]) && isxdigit(sym[i + 2])) {
> > + /* "$xx" is a hex-encoded character */
> > + result[j++] = (hex(sym[i + 1]) << 4) | hex(sym[i + 2]);
> > + i += 3;
> > + }
> > + else {
> > + result[j++] = sym[i++];
> > + }
> > + }
> > + result[j] = '\0';
> > +
> > + /* scan backwards to remove an "_" followed by decimal digits */
> > + if (j != 0 && isdigit(result[j - 1])) {
> > + while (--j) {
> > + if (!isdigit(result[j])) {
> > + break;
> > + }
> > + }
> > + if (result[j] == '_') {
> > + result[j] = '\0';
> > + }
> > + }
> > +
> > + return result;
> > +}
> > diff --git a/tools/perf/util/demangle-ocaml.h b/tools/perf/util/demangle-ocaml.h
> > new file mode 100644
> > index 000000000000..843cc4fa10a6
> > --- /dev/null
> > +++ b/tools/perf/util/demangle-ocaml.h
> > @@ -0,0 +1,7 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +#ifndef __PERF_DEMANGLE_OCAML
> > +#define __PERF_DEMANGLE_OCAML 1
> > +
> > +char * ocaml_demangle_sym(const char *str);
> > +
> > +#endif /* __PERF_DEMANGLE_OCAML */
> > diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
> > index 52603876c548..f6d90cdd9225 100644
> > --- a/tools/perf/util/string.c
> > +++ b/tools/perf/util/string.c
> > @@ -293,3 +293,12 @@ char *strdup_esc(const char *str)
> >
> > return ret;
> > }
> > +
> > +unsigned int hex(char c)
> > +{
> > + if (c >= '0' && c <= '9')
> > + return c - '0';
> > + if (c >= 'a' && c <= 'f')
> > + return c - 'a' + 10;
> > + return c - 'A' + 10;
> > +}
> > diff --git a/tools/perf/util/string2.h b/tools/perf/util/string2.h
> > index 73df616ced43..56c30fef9682 100644
> > --- a/tools/perf/util/string2.h
> > +++ b/tools/perf/util/string2.h
> > @@ -38,4 +38,6 @@ char *asprintf__tp_filter_pids(size_t npids, pid_t *pids);
> > char *strpbrk_esc(char *str, const char *stopset);
> > char *strdup_esc(const char *str);
> >
> > +unsigned int hex(char c);
> > +
> > #endif /* PERF_STRING_H */
> > diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
> > index f3577f7d72fe..1e9114592f2e 100644
> > --- a/tools/perf/util/symbol-elf.c
> > +++ b/tools/perf/util/symbol-elf.c
> > @@ -12,6 +12,7 @@
> > #include "maps.h"
> > #include "symbol.h"
> > #include "symsrc.h"
> > +#include "demangle-ocaml.h"
> > #include "demangle-java.h"
> > #include "demangle-rust.h"
> > #include "machine.h"
> > @@ -251,8 +252,12 @@ static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
> > return demangled;
> >
> > demangled = bfd_demangle(NULL, elf_name, demangle_flags);
> > - if (demangled == NULL)
> > - demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET);
> > + if (demangled == NULL) {
> > + demangled = ocaml_demangle_sym(elf_name);
> > + if (demangled == NULL) {
> > + demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET);
> > + }
> > + }
> > else if (rust_is_mangled(demangled))
> > /*
> > * Input to Rust demangling is the BFD-demangled
> > --
> > 2.30.0
> >

--

- Arnaldo