Re: [PATCH] perf tools: Add OCaml demangling

From: Namhyung Kim
Date: Tue Feb 16 2021 - 02:49:39 EST


Hello,

(+ Cc: LKML)

On Thu, Feb 4, 2021 at 6:22 AM Fabian Hemmer <copy@xxxxxxx> wrote:
>
> Detect symbols generated by the OCaml compiler based on their prefix.
>
> Demangle OCaml symbols, returning a newly allocated string (like the
> existing Java demangling functionality).
>
> Move a helper function (hex) from tests/code-reading.c to util/string.c
>
> To test:
>
> echo 'Printf.printf "%d\n" (Random.int 42)' > test.ml
> perf record ocamlopt.opt test.ml
> perf report -d ocamlopt.opt
>
> Signed-off-by: Fabian Hemmer <copy@xxxxxxx>

Acked-by: Namhyung Kim <namhyung@xxxxxxxxxx>

Thanks,
Namhyung


> ---
> tools/perf/tests/Build | 1 +
> tools/perf/tests/builtin-test.c | 4 ++
> tools/perf/tests/code-reading.c | 10 +---
> tools/perf/tests/demangle-ocaml-test.c | 43 ++++++++++++++
> tools/perf/tests/tests.h | 1 +
> tools/perf/util/Build | 1 +
> tools/perf/util/demangle-ocaml.c | 80 ++++++++++++++++++++++++++
> tools/perf/util/demangle-ocaml.h | 7 +++
> tools/perf/util/string.c | 9 +++
> tools/perf/util/string2.h | 2 +
> tools/perf/util/symbol-elf.c | 9 ++-
> 11 files changed, 156 insertions(+), 11 deletions(-)
> create mode 100644 tools/perf/tests/demangle-ocaml-test.c
> create mode 100644 tools/perf/util/demangle-ocaml.c
> create mode 100644 tools/perf/util/demangle-ocaml.h
>
> diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
> index aa4dc4f5abde..650aec19d490 100644
> --- a/tools/perf/tests/Build
> +++ b/tools/perf/tests/Build
> @@ -58,6 +58,7 @@ perf-y += time-utils-test.o
> perf-y += genelf.o
> perf-y += api-io.o
> perf-y += demangle-java-test.o
> +perf-y += demangle-ocaml-test.o
> perf-y += pfm.o
> perf-y += parse-metric.o
> perf-y += pe-file-parsing.o
> diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
> index 7273823d0d02..c4b888f18e9c 100644
> --- a/tools/perf/tests/builtin-test.c
> +++ b/tools/perf/tests/builtin-test.c
> @@ -338,6 +338,10 @@ static struct test generic_tests[] = {
> .desc = "Demangle Java",
> .func = test__demangle_java,
> },
> + {
> + .desc = "Demangle OCaml",
> + .func = test__demangle_ocaml,
> + },
> {
> .desc = "Parse and process metrics",
> .func = test__parse_metric,
> diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
> index 7c098d49c77e..280f0348a09c 100644
> --- a/tools/perf/tests/code-reading.c
> +++ b/tools/perf/tests/code-reading.c
> @@ -26,6 +26,7 @@
> #include "event.h"
> #include "record.h"
> #include "util/mmap.h"
> +#include "util/string2.h"
> #include "util/synthetic-events.h"
> #include "thread.h"
>
> @@ -41,15 +42,6 @@ struct state {
> size_t done_cnt;
> };
>
> -static unsigned int hex(char c)
> -{
> - if (c >= '0' && c <= '9')
> - return c - '0';
> - if (c >= 'a' && c <= 'f')
> - return c - 'a' + 10;
> - return c - 'A' + 10;
> -}
> -
> static size_t read_objdump_chunk(const char **line, unsigned char **buf,
> size_t *buf_len)
> {
> diff --git a/tools/perf/tests/demangle-ocaml-test.c b/tools/perf/tests/demangle-ocaml-test.c
> new file mode 100644
> index 000000000000..a273ed5163d7
> --- /dev/null
> +++ b/tools/perf/tests/demangle-ocaml-test.c
> @@ -0,0 +1,43 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <string.h>
> +#include <stdlib.h>
> +#include <stdio.h>
> +#include "tests.h"
> +#include "session.h"
> +#include "debug.h"
> +#include "demangle-ocaml.h"
> +
> +int test__demangle_ocaml(struct test *test __maybe_unused, int subtest __maybe_unused)
> +{
> + int ret = TEST_OK;
> + char *buf = NULL;
> + size_t i;
> +
> + struct {
> + const char *mangled, *demangled;
> + } test_cases[] = {
> + { "main",
> + NULL },
> + { "camlStdlib__array__map_154",
> + "Stdlib.array.map" },
> + { "camlStdlib__anon_fn$5bstdlib$2eml$3a334$2c0$2d$2d54$5d_1453",
> + "Stdlib.anon_fn[stdlib.ml:334,0--54]" },
> + { "camlStdlib__bytes__$2b$2b_2205",
> + "Stdlib.bytes.++" },
> + };
> +
> + for (i = 0; i < sizeof(test_cases) / sizeof(test_cases[0]); i++) {
> + buf = ocaml_demangle_sym(test_cases[i].mangled);
> + if ((buf == NULL && test_cases[i].demangled != NULL)
> + || (buf != NULL && test_cases[i].demangled == NULL)
> + || (buf != NULL && strcmp(buf, test_cases[i].demangled))) {
> + pr_debug("FAILED: %s: %s != %s\n", test_cases[i].mangled,
> + buf == NULL ? "(null)" : buf,
> + test_cases[i].demangled == NULL ? "(null)" : test_cases[i].demangled);
> + ret = TEST_FAIL;
> + }
> + free(buf);
> + }
> +
> + return ret;
> +}
> diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
> index 8e24a61fe4c2..b85f005308a3 100644
> --- a/tools/perf/tests/tests.h
> +++ b/tools/perf/tests/tests.h
> @@ -119,6 +119,7 @@ int test__time_utils(struct test *t, int subtest);
> int test__jit_write_elf(struct test *test, int subtest);
> int test__api_io(struct test *test, int subtest);
> int test__demangle_java(struct test *test, int subtest);
> +int test__demangle_ocaml(struct test *test, int subtest);
> int test__pfm(struct test *test, int subtest);
> const char *test__pfm_subtest_get_desc(int subtest);
> int test__pfm_subtest_get_nr(void);
> diff --git a/tools/perf/util/Build b/tools/perf/util/Build
> index e2563d0154eb..34995e1fef8f 100644
> --- a/tools/perf/util/Build
> +++ b/tools/perf/util/Build
> @@ -172,6 +172,7 @@ perf-$(CONFIG_ZSTD) += zstd.o
>
> perf-$(CONFIG_LIBCAP) += cap.o
>
> +perf-y += demangle-ocaml.o
> perf-y += demangle-java.o
> perf-y += demangle-rust.o
>
> diff --git a/tools/perf/util/demangle-ocaml.c b/tools/perf/util/demangle-ocaml.c
> new file mode 100644
> index 000000000000..3df14e67c622
> --- /dev/null
> +++ b/tools/perf/util/demangle-ocaml.c
> @@ -0,0 +1,80 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <string.h>
> +#include <stdlib.h>
> +#include "util/string2.h"
> +
> +#include "demangle-ocaml.h"
> +
> +#include <linux/ctype.h>
> +
> +static const char *caml_prefix = "caml";
> +static const size_t caml_prefix_len = 4;
> +
> +/* mangled OCaml symbols start with "caml" followed by an upper-case letter */
> +static bool
> +ocaml_is_mangled(const char *sym)
> +{
> + return 0 == strncmp(sym, caml_prefix, caml_prefix_len)
> + && isupper(sym[caml_prefix_len]);
> +}
> +
> +/*
> + * input:
> + * sym: a symbol which may have been mangled by the OCaml compiler
> + * return:
> + * if the input doesn't look like a mangled OCaml symbol, NULL is returned
> + * otherwise, a newly allocated string containing the demangled symbol is returned
> + */
> +char *
> +ocaml_demangle_sym(const char *sym)
> +{
> + char *result;
> + int j = 0;
> + int i;
> + int len;
> +
> + if (!ocaml_is_mangled(sym)) {
> + return NULL;
> + }
> +
> + len = strlen(sym);
> +
> + /* the demangled symbol is always smaller than the mangled symbol */
> + result = malloc(len + 1);
> + if (!result)
> + return NULL;
> +
> + /* skip "caml" prefix */
> + i = caml_prefix_len;
> +
> + while (i < len) {
> + if (sym[i] == '_' && sym[i + 1] == '_') {
> + /* "__" -> "." */
> + result[j++] = '.';
> + i += 2;
> + }
> + else if (sym[i] == '$' && isxdigit(sym[i + 1]) && isxdigit(sym[i + 2])) {
> + /* "$xx" is a hex-encoded character */
> + result[j++] = (hex(sym[i + 1]) << 4) | hex(sym[i + 2]);
> + i += 3;
> + }
> + else {
> + result[j++] = sym[i++];
> + }
> + }
> + result[j] = '\0';
> +
> + /* scan backwards to remove an "_" followed by decimal digits */
> + if (j != 0 && isdigit(result[j - 1])) {
> + while (--j) {
> + if (!isdigit(result[j])) {
> + break;
> + }
> + }
> + if (result[j] == '_') {
> + result[j] = '\0';
> + }
> + }
> +
> + return result;
> +}
> diff --git a/tools/perf/util/demangle-ocaml.h b/tools/perf/util/demangle-ocaml.h
> new file mode 100644
> index 000000000000..843cc4fa10a6
> --- /dev/null
> +++ b/tools/perf/util/demangle-ocaml.h
> @@ -0,0 +1,7 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __PERF_DEMANGLE_OCAML
> +#define __PERF_DEMANGLE_OCAML 1
> +
> +char * ocaml_demangle_sym(const char *str);
> +
> +#endif /* __PERF_DEMANGLE_OCAML */
> diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
> index 52603876c548..f6d90cdd9225 100644
> --- a/tools/perf/util/string.c
> +++ b/tools/perf/util/string.c
> @@ -293,3 +293,12 @@ char *strdup_esc(const char *str)
>
> return ret;
> }
> +
> +unsigned int hex(char c)
> +{
> + if (c >= '0' && c <= '9')
> + return c - '0';
> + if (c >= 'a' && c <= 'f')
> + return c - 'a' + 10;
> + return c - 'A' + 10;
> +}
> diff --git a/tools/perf/util/string2.h b/tools/perf/util/string2.h
> index 73df616ced43..56c30fef9682 100644
> --- a/tools/perf/util/string2.h
> +++ b/tools/perf/util/string2.h
> @@ -38,4 +38,6 @@ char *asprintf__tp_filter_pids(size_t npids, pid_t *pids);
> char *strpbrk_esc(char *str, const char *stopset);
> char *strdup_esc(const char *str);
>
> +unsigned int hex(char c);
> +
> #endif /* PERF_STRING_H */
> diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
> index f3577f7d72fe..1e9114592f2e 100644
> --- a/tools/perf/util/symbol-elf.c
> +++ b/tools/perf/util/symbol-elf.c
> @@ -12,6 +12,7 @@
> #include "maps.h"
> #include "symbol.h"
> #include "symsrc.h"
> +#include "demangle-ocaml.h"
> #include "demangle-java.h"
> #include "demangle-rust.h"
> #include "machine.h"
> @@ -251,8 +252,12 @@ static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
> return demangled;
>
> demangled = bfd_demangle(NULL, elf_name, demangle_flags);
> - if (demangled == NULL)
> - demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET);
> + if (demangled == NULL) {
> + demangled = ocaml_demangle_sym(elf_name);
> + if (demangled == NULL) {
> + demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET);
> + }
> + }
> else if (rust_is_mangled(demangled))
> /*
> * Input to Rust demangling is the BFD-demangled
> --
> 2.30.0
>