Re: [PATCH v1 00/20] Reference count checking for thread

From: Ian Rogers
Date: Wed Jun 07 2023 - 03:07:40 EST


On Tue, Jun 6, 2023 at 6:44 PM Ian Rogers <irogers@xxxxxxxxxx> wrote:
>
> Add reference count checking to thread after first refactoring bits of
> the code, such as making the thread red-black tree non-invasive (so
> the thread it references is easier to reference count, rather than
> having 3 potential references). Part of this refactoring also removes
> the dead thread list because if we held a reference here the threads
> would never die and anything else has questionable
> correctness. addr_location is made into its own C/header file to
> capture the init, exit and copy code.
>
> Fix additional outstanding memory leak and reference count issues to
> the point that "perf test" compiled with address sanitizer but without
> libtraceevent passes all but one test - libtraceevent reports leaks
> within its own code, most likely as it isn't compiled with
> sanitizers. The remaining failing test is "68: Test dwarf unwind" and
> that has address sanitizer issues as it uses memcpy to access the
> stack within the process - we likely want to skip parts of the test
> with sanitizers enabled.

So I tried a bit harder to break things and ran into crashes/leaks
around callchains fixed by the patch below - I used 'perf top -g' and
I'll merge the patch into a v2 for the series. The largest remaining
leak is caused by map/maps retaining dsos retaining symbols. The map
and maps are being retained by the TLS call chain cursors [1]
allocated in callchain_cursor_append. For perf report there are
similar retention issues via "struct hists", but as this maybe merged
with evsels it is messy to cleanup.

Thanks,
Ian

```
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 437325d19ad3..909f62b3b266 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -590,6 +590,7 @@ fill_node(struct callchain_node *node, struct
callchain_cursor *cursor)
call->ip = cursor_node->ip;
call->ms = cursor_node->ms;
call->ms.map = map__get(call->ms.map);
+ call->ms.maps = maps__get(call->ms.maps);
call->srcline = cursor_node->srcline;

if (cursor_node->branch) {
@@ -649,6 +650,7 @@ add_child(struct callchain_node *parent,
list_for_each_entry_safe(call, tmp, &new->val, list) {
list_del_init(&call->list);
map__zput(call->ms.map);
+ maps__zput(call->ms.maps);
free(call);
}
free(new);
@@ -1010,10 +1012,16 @@ merge_chain_branch(struct callchain_cursor *cursor,
int err = 0;

list_for_each_entry_safe(list, next_list, &src->val, list) {
- callchain_cursor_append(cursor, list->ip, &list->ms,
- false, NULL, 0, 0, 0, list->srcline);
+ struct map_symbol ms = {
+ .maps = maps__get(list->ms.maps),
+ .map = map__get(list->ms.map),
+ };
+ callchain_cursor_append(cursor, list->ip, &ms, false,
NULL, 0, 0, 0, list->srcline);
list_del_init(&list->list);
+ map__zput(ms.map);
+ maps__zput(ms.maps);
map__zput(list->ms.map);
+ maps__zput(list->ms.maps);
free(list);
}

@@ -1068,8 +1076,8 @@ int callchain_cursor_append(struct
callchain_cursor *cursor,
maps__zput(node->ms.maps);
map__zput(node->ms.map);
node->ms = *ms;
- node->ms.maps = maps__get(node->ms.maps);
- node->ms.map = map__get(node->ms.map);
+ node->ms.maps = maps__get(ms->maps);
+ node->ms.map = map__get(ms->map);
node->branch = branch;
node->nr_loop_iter = nr_loop_iter;
node->iter_cycles = iter_cycles;
@@ -1463,12 +1471,14 @@ static void free_callchain_node(struct
callchain_node *node)
list_for_each_entry_safe(list, tmp, &node->parent_val, list) {
list_del_init(&list->list);
map__zput(list->ms.map);
+ maps__zput(list->ms.maps);
free(list);
}

list_for_each_entry_safe(list, tmp, &node->val, list) {
list_del_init(&list->list);
map__zput(list->ms.map);
+ maps__zput(list->ms.maps);
free(list);
}

@@ -1554,6 +1564,7 @@ int callchain_node__make_parent_list(struct
callchain_node *node)
list_for_each_entry_safe(chain, new, &head, list) {
list_del_init(&chain->list);
map__zput(chain->ms.map);
+ maps__zput(chain->ms.maps);
free(chain);
}
return -ENOMEM;
@@ -1599,8 +1610,10 @@ void callchain_cursor_reset(struct
callchain_cursor *cursor)
cursor->nr = 0;
cursor->last = &cursor->first;

- for (node = cursor->first; node != NULL; node = node->next)
+ for (node = cursor->first; node != NULL; node = node->next) {
map__zput(node->ms.map);
+ maps__zput(node->ms.maps);
+ }
}

void callchain_param_setup(u64 sample_type, const char *arch)
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 16dc49876e87..bdad4b8bf77d 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2321,7 +2321,7 @@ static int add_callchain_ip(struct thread *thread,
struct iterations *iter,
u64 branch_from)
{
- struct map_symbol ms;
+ struct map_symbol ms = {};
struct addr_location al;
int nr_loop_iter = 0, err = 0;
u64 iter_cycles = 0;
@@ -3091,6 +3091,7 @@ static int append_inlines(struct
callchain_cursor *cursor, struct map_symbol *
ms
struct dso *dso;
u64 addr;
int ret = 1;
+ struct map_symbol ilist_ms;

if (!symbol_conf.inline_name || !map || !sym)
return ret;
@@ -3107,18 +3108,20 @@ static int append_inlines(struct
callchain_cursor *cursor, struct map_symbol
*ms
inlines__tree_insert(&dso->inlined_nodes, inline_node);
}

+ ilist_ms = (struct map_symbol) {
+ .maps = maps__get(ms->maps),
+ .map = map__get(map),
+ };
list_for_each_entry(ilist, &inline_node->val, list) {
- struct map_symbol ilist_ms = {
- .maps = ms->maps,
- .map = map,
- .sym = ilist->symbol,
- };
+ ilist_ms.sym = ilist->symbol;
ret = callchain_cursor_append(cursor, ip, &ilist_ms, false,
NULL, 0, 0, 0, ilist->srcline);

if (ret != 0)
return ret;
}
+ map__put(ilist_ms.map);
+ maps__put(ilist_ms.maps);

return ret;
}
```

[1] http://lkml.kernel.org/r/1338443007-24857-1-git-send-email-namhyung.kim@xxxxxxx

> Ian Rogers (20):
> perf thread: Remove notion of dead threads
> perf thread: Make threads rbtree non-invasive
> perf thread: Add accessor functions for thread
> perf maps: Make delete static, always use put
> perf addr_location: Move to its own header
> perf addr_location: Add init/exit/copy functions
> perf thread: Add reference count checking
> perf machine: Make delete_threads part of machine__exit
> perf report: Avoid thread leak
> perf header: Ensure bitmaps are freed
> perf stat: Avoid evlist leak
> perf intel-pt: Fix missed put and leak
> perf evlist: Free stats in all evlist destruction
> perf python: Avoid 2 leak sanitizer issues
> perf jit: Fix two thread leaks
> perf symbol-elf: Correct holding a reference
> perf maps: Fix overlapping memory leak
> perf machine: Fix leak of kernel dso
> perf machine: Don't leak module maps
> perf map/maps/thread: Changes to reference counting
>
> tools/perf/arch/arm/tests/dwarf-unwind.c | 2 +-
> tools/perf/arch/arm64/tests/dwarf-unwind.c | 2 +-
> tools/perf/arch/powerpc/tests/dwarf-unwind.c | 2 +-
> tools/perf/arch/x86/tests/dwarf-unwind.c | 2 +-
> tools/perf/builtin-annotate.c | 28 +-
> tools/perf/builtin-c2c.c | 18 +-
> tools/perf/builtin-diff.c | 16 +-
> tools/perf/builtin-inject.c | 4 +-
> tools/perf/builtin-kmem.c | 13 +-
> tools/perf/builtin-kwork.c | 15 +-
> tools/perf/builtin-mem.c | 4 +-
> tools/perf/builtin-report.c | 21 +-
> tools/perf/builtin-sched.c | 80 +++--
> tools/perf/builtin-script.c | 97 +++---
> tools/perf/builtin-stat.c | 1 +
> tools/perf/builtin-timechart.c | 11 +-
> tools/perf/builtin-top.c | 8 +-
> tools/perf/builtin-trace.c | 38 ++-
> .../scripts/python/Perf-Trace-Util/Context.c | 4 +-
> tools/perf/tests/code-reading.c | 6 +-
> tools/perf/tests/dwarf-unwind.c | 1 -
> tools/perf/tests/hists_common.c | 2 +-
> tools/perf/tests/hists_cumulate.c | 18 +-
> tools/perf/tests/hists_filter.c | 11 +-
> tools/perf/tests/hists_link.c | 20 +-
> tools/perf/tests/hists_output.c | 12 +-
> tools/perf/tests/maps.c | 2 +-
> tools/perf/tests/mmap-thread-lookup.c | 5 +-
> tools/perf/tests/perf-targz-src-pkg | 5 +-
> tools/perf/tests/symbols.c | 1 -
> tools/perf/tests/thread-maps-share.c | 13 +-
> tools/perf/trace/beauty/pid.c | 4 +-
> tools/perf/ui/browsers/hists.c | 19 +-
> tools/perf/ui/hist.c | 5 +-
> tools/perf/ui/stdio/hist.c | 2 +-
> tools/perf/util/Build | 1 +
> tools/perf/util/addr_location.c | 44 +++
> tools/perf/util/addr_location.h | 31 ++
> tools/perf/util/arm-spe.c | 4 +-
> tools/perf/util/build-id.c | 2 +
> tools/perf/util/callchain.c | 7 +-
> tools/perf/util/cs-etm.c | 28 +-
> tools/perf/util/data-convert-json.c | 16 +-
> tools/perf/util/db-export.c | 20 +-
> tools/perf/util/dlfilter.c | 17 +-
> tools/perf/util/event.c | 37 +--
> tools/perf/util/evlist.c | 2 +
> tools/perf/util/evsel_fprintf.c | 8 +-
> tools/perf/util/header.c | 12 +-
> tools/perf/util/hist.c | 22 +-
> tools/perf/util/intel-bts.c | 2 +-
> tools/perf/util/intel-pt.c | 88 +++---
> tools/perf/util/jitdump.c | 12 +-
> tools/perf/util/machine.c | 277 +++++++++---------
> tools/perf/util/map.c | 2 +-
> tools/perf/util/maps.c | 5 +-
> tools/perf/util/maps.h | 9 +-
> tools/perf/util/python.c | 4 +
> .../scripting-engines/trace-event-python.c | 28 +-
> tools/perf/util/session.c | 8 +-
> tools/perf/util/sort.c | 12 +-
> tools/perf/util/symbol-elf.c | 4 +-
> tools/perf/util/symbol.h | 17 +-
> tools/perf/util/thread-stack.c | 25 +-
> tools/perf/util/thread.c | 218 +++++++-------
> tools/perf/util/thread.h | 210 +++++++++++--
> tools/perf/util/unwind-libdw.c | 27 +-
> tools/perf/util/unwind-libunwind-local.c | 19 +-
> tools/perf/util/unwind-libunwind.c | 2 +-
> tools/perf/util/vdso.c | 2 +-
> 70 files changed, 1059 insertions(+), 655 deletions(-)
> create mode 100644 tools/perf/util/addr_location.c
> create mode 100644 tools/perf/util/addr_location.h
>
> --
> 2.41.0.rc0.172.g3f132b7071-goog
>