Re: [PATCH v1 2/4] perf trace: Migrate BPF augmentation to use a skeleton

From: Arnaldo Carvalho de Melo
Date: Fri Aug 11 2023 - 14:46:51 EST


Em Thu, Aug 10, 2023 at 11:48:51AM -0700, Ian Rogers escreveu:
> Previously a BPF event of augmented_raw_syscalls.c could be used to
> enable augmentation of syscalls by perf trace. As BPF events are no
> longer supported, switch to using a BPF skeleton which when attached
> explicitly opens the sysenter and sysexit tracepoints.
>
> The dump map is removed as debugging wasn't supported by the
> augmentation and bpf_printk can be used when necessary.
>
> Remove tools/perf/examples/bpf/augmented_raw_syscalls.c so that the
> rename/migration to a BPF skeleton captures that this was the source.

So, there is a problem where the augmented_raw_syscalls connect/sendto
handlers are being rejected by the verifier, the way you did it makes it
to print the verifier output and then continue without augmentation,
unsure if this is a good default, opinions?

[root@quaco ~]# perf trace -e open*
libbpf: prog 'sys_enter_connect': BPF program load failed: Permission denied
libbpf: prog 'sys_enter_connect': -- BEGIN PROG LOAD LOG --
reg type unsupported for arg#0 function sys_enter_connect#59
0: R1=ctx(off=0,imm=0) R10=fp0
; int sys_enter_connect(struct syscall_enter_args *args)
0: (bf) r6 = r1 ; R1=ctx(off=0,imm=0) R6_w=ctx(off=0,imm=0)
1: (b7) r1 = 0 ; R1_w=0
; int key = 0;
2: (63) *(u32 *)(r10 -4) = r1 ; R1_w=0 R10=fp0 fp-8=0000????
3: (bf) r2 = r10 ; R2_w=fp0 R10=fp0
;
4: (07) r2 += -4 ; R2_w=fp-4
; return bpf_map_lookup_elem(&augmented_args_tmp, &key);
5: (18) r1 = 0xffff8de5ae1d4600 ; R1_w=map_ptr(off=0,ks=4,vs=8272,imm=0)
7: (85) call bpf_map_lookup_elem#1 ; R0_w=map_value_or_null(id=1,off=0,ks=4,vs=8272,imm=0)
8: (bf) r7 = r0 ; R0_w=map_value_or_null(id=1,off=0,ks=4,vs=8272,imm=0) R7_w=map_value_or_null(id=1,off=0,ks=4,vs=8272,imm=0)
9: (b7) r0 = 1 ; R0_w=1
; if (augmented_args == NULL)
10: (15) if r7 == 0x0 goto pc+25 ; R7_w=map_value(off=0,ks=4,vs=8272,imm=0)
; unsigned int socklen = args->args[2];
11: (79) r1 = *(u64 *)(r6 +32) ; R1_w=scalar() R6_w=ctx(off=0,imm=0)
;
12: (bf) r2 = r1 ; R1_w=scalar(id=2) R2_w=scalar(id=2)
13: (67) r2 <<= 32 ; R2_w=scalar(smax=9223372032559808512,umax=18446744069414584320,var_off=(0x0; 0xffffffff00000000),s32_min=0,s32_max=0,u32_max=0)
14: (77) r2 >>= 32 ; R2_w=scalar(umax=4294967295,var_off=(0x0; 0xffffffff))
15: (b7) r8 = 128 ; R8=128
; if (socklen > sizeof(augmented_args->saddr))
16: (25) if r2 > 0x80 goto pc+1 ; R2=scalar(umax=128,var_off=(0x0; 0xff))
17: (bf) r8 = r1 ; R1=scalar(id=2) R8_w=scalar(id=2)
; const void *sockaddr_arg = (const void *)args->args[1];
18: (79) r3 = *(u64 *)(r6 +24) ; R3_w=scalar() R6=ctx(off=0,imm=0)
; bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
19: (bf) r1 = r7 ; R1_w=map_value(off=0,ks=4,vs=8272,imm=0) R7=map_value(off=0,ks=4,vs=8272,imm=0)
20: (07) r1 += 64 ; R1_w=map_value(off=64,ks=4,vs=8272,imm=0)
; bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
21: (bf) r2 = r8 ; R2_w=scalar(id=2) R8_w=scalar(id=2)
22: (85) call bpf_probe_read#4
R2 min value is negative, either use unsigned or 'var &= const'
processed 22 insns (limit 1000000) max_states_per_insn 0 total_states 1 peak_states 1 mark_read 1
-- END PROG LOAD LOG --
libbpf: prog 'sys_enter_connect': failed to load: -13
libbpf: failed to load object 'augmented_raw_syscalls_bpf'
libbpf: failed to load BPF skeleton 'augmented_raw_syscalls_bpf': -13
0.000 systemd-oomd/959 openat(dfd: CWD, filename: 0xc0a2a2bd, flags: RDONLY|CLOEXEC) = 12
86.339 thermald/1234 openat(dfd: CWD, filename: 0xac000ba0) = 13
87.008 thermald/1234 openat(dfd: CWD, filename: 0xac000eb0) = 13
87.270 thermald/1234 openat(dfd: CWD, filename: 0xac000b70) = 13
89.657 thermald/1234 openat(dfd: CWD, filename: 0xac000eb0) = 13
^C

If I comment out the connect and sendto it doesn't build anymore,
whereas before it would continue with the other handlers:

CLANG /tmp/build/perf-tools-next/util/bpf_skel/.tmp/augmented_raw_syscalls.bpf.o
GENSKEL /tmp/build/perf-tools-next/util/bpf_skel/augmented_raw_syscalls.skel.h
CC /tmp/build/perf-tools-next/builtin-trace.o
builtin-trace.c: In function ‘cmd_trace’:
builtin-trace.c:4873:63: error: ‘struct <anonymous>’ has no member named ‘sys_enter_connect’; did you mean ‘sys_enter_openat’?
4873 | bpf_program__set_autoattach(trace.skel->progs.sys_enter_connect,
| ^~~~~~~~~~~~~~~~~
| sys_enter_openat
builtin-trace.c:4875:63: error: ‘struct <anonymous>’ has no member named ‘sys_enter_sendto’; did you mean ‘sys_enter_openat’?
4875 | bpf_program__set_autoattach(trace.skel->progs.sys_enter_sendto,
| ^~~~~~~~~~~~~~~~
| sys_enter_openat
make[3]: *** [/home/acme/git/perf-tools-next/tools/build/Makefile.build:97: /tmp/build/perf-tools-next/builtin-trace.o] Error 1
make[2]: *** [Makefile.perf:662: /tmp/build/perf-tools-next/perf-in.o] Error 2
make[1]: *** [Makefile.perf:238: sub-make] Error 2
make: *** [Makefile:113: install-bin] Error 2
make: Leaving directory '/home/acme/git/perf-tools-next/tools/perf'
[acme@quaco perf-tools-next]$



I.e. no need for explicitely referring to those, I think in the past it
was just looking if it was there and if so, attaching, I'll try to fix
this.

If I remove the explicit references in builtin-trace.c:

[root@quaco ~]# perf trace -e open* --max-events=10
0.000 thermald/1234 openat(dfd: CWD, filename: "/sys/class/powercap/intel-rapl/intel-rapl:0/intel-rapl:0:2/energy_uj") = 13
0.236 thermald/1234 openat(dfd: CWD, filename: "/sys/class/powercap/intel-rapl/intel-rapl:0/energy_uj") = 13
0.334 thermald/1234 openat(dfd: CWD, filename: "/sys/class/thermal/thermal_zone2/temp") = 13
9.092 systemd-oomd/959 openat(dfd: CWD, filename: "/proc/meminfo", flags: RDONLY|CLOEXEC) = 12
259.212 systemd-oomd/959 openat(dfd: CWD, filename: "/proc/meminfo", flags: RDONLY|CLOEXEC) = 12
497.464 gpm/1049 openat(dfd: CWD, filename: "/dev/tty0") = 4
509.044 systemd-oomd/959 openat(dfd: CWD, filename: "/proc/meminfo", flags: RDONLY|CLOEXEC) = 12
509.559 systemd-oomd/959 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/session.slice/memory.pressure", flags: RDONLY|CLOEXEC) = 12
509.917 systemd-oomd/959 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/session.slice/memory.current", flags: RDONLY|CLOEXEC) = 12
510.111 systemd-oomd/959 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/session.slice/memory.min", flags: RDONLY|CLOEXEC) = 12
[root@quaco ~]#

Cool!

Some inception:

[root@quaco ~]# perf trace -e perf_event_open perf stat -e cycles,instructions,cache-misses sleep 1
0.000 perf_event_open(attr_uptr: { type: 0 (PERF_TYPE_HARDWARE), size: 136, config: 0 (PERF_COUNT_HW_CPU_CYCLES), sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 232297 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 3
0.063 perf_event_open(attr_uptr: { type: 0 (PERF_TYPE_HARDWARE), size: 136, config: 0x1 (PERF_COUNT_HW_INSTRUCTIONS), sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 232297 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 4
0.070 perf_event_open(attr_uptr: { type: 0 (PERF_TYPE_HARDWARE), size: 136, config: 0x3 (PERF_COUNT_HW_CACHE_MISSES), sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 232297 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 5

Performance counter stats for 'sleep 1':

2,669,464 cycles
1,842,319 instructions # 0.69 insn per cycle
27,716 cache-misses

1.001948592 seconds time elapsed

0.000000000 seconds user
0.001657000 seconds sys


[root@quaco ~]#

I'm putting what I have in the tmp.perf-tools-next branch, will continue
later today.

- Arnaldo