Re: [PATCH bpf-next v9] selftests/bpf: trace_helpers.c: optimize kallsyms cache

From: Jiri Olsa
Date: Thu Aug 31 2023 - 09:52:11 EST


On Tue, Aug 29, 2023 at 10:14:01PM +0800, Rong Tao wrote:
> From: Rong Tao <rongtao@xxxxxxxx>
>
> Static ksyms often have problems because the number of symbols exceeds the
> MAX_SYMS limit. Like changing the MAX_SYMS from 300000 to 400000 in
> commit e76a014334a6("selftests/bpf: Bump and validate MAX_SYMS") solves
> the problem somewhat, but it's not the perfect way.
>
> This commit uses dynamic memory allocation, which completely solves the
> problem caused by the limitation of the number of kallsyms.
>
> Acked-by: Stanislav Fomichev <sdf@xxxxxxxxxx>
> Signed-off-by: Rong Tao <rongtao@xxxxxxxx>
> ---
> v9: Add load_kallsyms_local,ksym_search_local,ksym_get_addr_local functions.
> v8: https://lore.kernel.org/lkml/tencent_6D23FE187408D965E95DFAA858BC7E8C760A@xxxxxx/
> Resolves inter-thread contention for ksyms global variables.
> v7: https://lore.kernel.org/lkml/tencent_BD6E19C00BF565CD5C36A9A0BD828CFA210A@xxxxxx/
> Fix __must_check macro.
> v6: https://lore.kernel.org/lkml/tencent_4A09A36F883A06EA428A593497642AF8AF08@xxxxxx/
> Apply libbpf_ensure_mem()
> v5: https://lore.kernel.org/lkml/tencent_0E9E1A1C0981678D5E7EA9E4BDBA8EE2200A@xxxxxx/
> Release the allocated memory once the load_kallsyms_refresh() upon error
> given it's dynamically allocated.
> v4: https://lore.kernel.org/lkml/tencent_59C74613113F0C728524B2A82FE5540A5E09@xxxxxx/
> Make sure most cases we don't need the realloc() path to begin with,
> and check strdup() return value.
> v3: https://lore.kernel.org/lkml/tencent_50B4B2622FE7546A5FF9464310650C008509@xxxxxx/
> Do not use structs and judge ksyms__add_symbol function return value.
> v2: https://lore.kernel.org/lkml/tencent_B655EE5E5D463110D70CD2846AB3262EED09@xxxxxx/
> Do the usual len/capacity scheme here to amortize the cost of realloc, and
> don't free symbols.
> v1: https://lore.kernel.org/lkml/tencent_AB461510B10CD484E0B2F62E3754165F2909@xxxxxx/
> ---
> samples/bpf/Makefile | 4 +
> .../selftests/bpf/prog_tests/bpf_cookie.c | 9 +-
> .../selftests/bpf/prog_tests/fill_link_info.c | 9 +-
> .../bpf/prog_tests/get_stack_raw_tp.c | 10 +-
> .../bpf/prog_tests/kprobe_multi_test.c | 15 +-
> .../prog_tests/kprobe_multi_testmod_test.c | 19 ++-
> tools/testing/selftests/bpf/trace_helpers.c | 142 +++++++++++++-----
> tools/testing/selftests/bpf/trace_helpers.h | 8 +
> 8 files changed, 159 insertions(+), 57 deletions(-)
>
> diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
> index 4ccf4236031c..6c707ebcebb9 100644
> --- a/samples/bpf/Makefile
> +++ b/samples/bpf/Makefile
> @@ -175,6 +175,7 @@ TPROGS_CFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
> TPROGS_CFLAGS += -I$(LIBBPF_INCLUDE)
> TPROGS_CFLAGS += -I$(srctree)/tools/include
> TPROGS_CFLAGS += -I$(srctree)/tools/perf
> +TPROGS_CFLAGS += -I$(srctree)/tools/lib
> TPROGS_CFLAGS += -DHAVE_ATTR_TEST=0
>
> ifdef SYSROOT
> @@ -314,6 +315,9 @@ XDP_SAMPLE_CFLAGS += -Wall -O2 \
>
> $(obj)/$(XDP_SAMPLE): TPROGS_CFLAGS = $(XDP_SAMPLE_CFLAGS)
> $(obj)/$(XDP_SAMPLE): $(src)/xdp_sample_user.h $(src)/xdp_sample_shared.h
> +# Override includes for trace_helpers.o because __must_check won't be defined
> +# in our include path.
> +$(obj)/$(TRACE_HELPERS): TPROGS_CFLAGS := $(TPROGS_CFLAGS) -D__must_check=
>
> -include $(BPF_SAMPLES_PATH)/Makefile.target
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
> index 1454cebc262b..4ed47dc75669 100644
> --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
> @@ -104,8 +104,10 @@ static void kprobe_multi_link_api_subtest(void)
> LIBBPF_OPTS(bpf_link_create_opts, opts);
> unsigned long long addrs[8];
> __u64 cookies[8];
> + struct ksyms *ksyms;
>
> - if (!ASSERT_OK(load_kallsyms(), "load_kallsyms"))
> + ksyms = load_kallsyms_local();
> + if (!ASSERT_OK(ksyms != NULL, "load_kallsyms_local"))
> goto cleanup;

hum, this wont work right? I think you should use ASSERT_OK_PTR in here
and ther similar places below

>
> skel = kprobe_multi__open_and_load();
> @@ -116,8 +118,8 @@ static void kprobe_multi_link_api_subtest(void)
> skel->bss->test_cookie = true;
>
> #define GET_ADDR(__sym, __addr) ({ \
> - __addr = ksym_get_addr(__sym); \
> - if (!ASSERT_NEQ(__addr, 0, "ksym_get_addr " #__sym)) \
> + __addr = ksym_get_addr_local(ksyms, __sym); \
> + if (!ASSERT_NEQ(__addr, 0, "ksym_get_addr_local " #__sym)) \
> goto cleanup; \
> })
>
> @@ -171,6 +173,7 @@ static void kprobe_multi_link_api_subtest(void)
> cleanup:
> close(link1_fd);
> close(link2_fd);
> + free_kallsyms_local(ksyms);

I think we don't need to change any test that's calling load_kallsyms,
it should use load_kallsyms and global ksyms data

the load_kallsyms_local would be used in tests that use load_kallsyms_refresh
and need updated ksyms data for bpf_testmod symbols

we just need to make sure that global ksyms initialization won't race, like with:

+static pthread_mutex_t ksyms_mutex = PTHREAD_MUTEX_INITIALIZER;
+
int load_kallsyms(void)
{
- /*
- * This is called/used from multiplace places,
- * load symbols just once.
- */
- if (sym_cnt)
- return 0;
- return load_kallsyms_refresh();
+ pthread_mutex_lock(&ksyms_mutex);
+ if (!ksyms)
+ ksyms = load_kallsyms_local();
+ pthread_mutex_unlock(&ksyms_mutex);
+ return ksyms ? 0 : 1;
}

it could be in separate patch perhaps, because currently there's the same race

> kprobe_multi__destroy(skel);
> }
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
> index 9d768e083714..154aaa08761f 100644
> --- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
> +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
> @@ -302,16 +302,18 @@ void test_fill_link_info(void)
> {
> struct test_fill_link_info *skel;
> int i;
> + struct ksyms *ksyms;
>
> skel = test_fill_link_info__open_and_load();
> if (!ASSERT_OK_PTR(skel, "skel_open"))
> return;
>
> /* load kallsyms to compare the addr */
> - if (!ASSERT_OK(load_kallsyms_refresh(), "load_kallsyms_refresh"))
> + ksyms = load_kallsyms_refresh_local(NULL);
> + if (!ASSERT_OK(ksyms != NULL, "load_kallsyms_refresh_local"))

ASSERT_OK_PTR

SNIP

> -int load_kallsyms_refresh(void)
> +struct ksyms *load_kallsyms_refresh_local(struct ksyms *ksyms)
> {
> FILE *f;
> char func[256], buf[256];
> char symbol;
> void *addr;
> - int i = 0;
> + int ret;
>
> - sym_cnt = 0;
> + /* flush kallsyms, free the previously allocated dynamic memory */
> + free_kallsyms_local(ksyms);
>
> f = fopen("/proc/kallsyms", "r");
> if (!f)
> - return -ENOENT;
> + return NULL;
> +
> + ksyms = calloc(1, sizeof(struct ksyms));
> + if (!ksyms)
> + return NULL;
>
> while (fgets(buf, sizeof(buf), f)) {
> if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
> break;
> if (!addr)
> continue;
> - if (i >= MAX_SYMS)
> - return -EFBIG;
>
> - syms[i].addr = (long) addr;
> - syms[i].name = strdup(func);
> - i++;
> + ret = libbpf_ensure_mem((void **) &ksyms->syms, &ksyms->sym_cap,
> + sizeof(struct ksym), ksyms->sym_cnt + 1);
> + if (ret)
> + goto error;
> + ret = ksyms__add_symbol(ksyms, func, (unsigned long)addr);
> + if (ret)
> + goto error;
> }
> fclose(f);
> - sym_cnt = i;
> - qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
> - return 0;
> + qsort(ksyms->syms, ksyms->sym_cnt, sizeof(struct ksym), ksym_cmp);
> + return ksyms;
> +
> +error:
> + free_kallsyms_local(ksyms);
> + return NULL;
> +}
> +
> +int load_kallsyms_refresh(void)
> +{
> + ksyms = load_kallsyms_refresh_local(NULL);
> + return ksyms ? 0 : 1;
> +}
> +
> +struct ksyms *load_kallsyms_local(void)
> +{
> + return load_kallsyms_refresh_local(NULL);
> }

do we need to have load_kallsyms_refresh_local?

we could have ksyms arg passed directly to load_kallsyms_local

struct ksyms *load_kallsyms_local(struct ksyms *old);

it would return fresh ksyms and release old ksyms if it's passed as an argument

basically I mean just to drop load_kallsyms_local above and rename
load_kallsyms_refresh_local to load_kallsyms_local


jirka