Re: [RFC PATCH] tracing: change syscall number type in struct syscall_trace_*

From: Andrii Nakryiko
Date: Tue Oct 03 2023 - 18:11:33 EST


On Mon, Oct 2, 2023 at 6:53 AM Artem Savkov <asavkov@xxxxxxxxxx> wrote:
>
> linux-rt-devel tree contains a patch that adds an extra member to struct

can you please point to the patch itself that makes that change?

> trace_entry. This causes the offset of args field in struct
> trace_event_raw_sys_enter be different from the one in struct
> syscall_trace_enter:
>
> struct trace_event_raw_sys_enter {
> struct trace_entry ent; /* 0 12 */
>
> /* XXX last struct has 3 bytes of padding */
> /* XXX 4 bytes hole, try to pack */
>
> long int id; /* 16 8 */
> long unsigned int args[6]; /* 24 48 */
> /* --- cacheline 1 boundary (64 bytes) was 8 bytes ago --- */
> char __data[]; /* 72 0 */
>
> /* size: 72, cachelines: 2, members: 4 */
> /* sum members: 68, holes: 1, sum holes: 4 */
> /* paddings: 1, sum paddings: 3 */
> /* last cacheline: 8 bytes */
> };
>
> struct syscall_trace_enter {
> struct trace_entry ent; /* 0 12 */
>
> /* XXX last struct has 3 bytes of padding */
>
> int nr; /* 12 4 */
> long unsigned int args[]; /* 16 0 */
>
> /* size: 16, cachelines: 1, members: 3 */
> /* paddings: 1, sum paddings: 3 */
> /* last cacheline: 16 bytes */
> };
>
> This, in turn, causes perf_event_set_bpf_prog() fail while running bpf
> test_profiler testcase because max_ctx_offset is calculated based on the
> former struct, while off on the latter:
>
> 10488 if (is_tracepoint || is_syscall_tp) {
> 10489 int off = trace_event_get_offsets(event->tp_event);
> 10490
> 10491 if (prog->aux->max_ctx_offset > off)
> 10492 return -EACCES;
> 10493 }
>
> This patch changes the type of nr member in syscall_trace_* structs to
> be long so that "args" offset is equal to that in struct
> trace_event_raw_sys_enter.
>
> Signed-off-by: Artem Savkov <asavkov@xxxxxxxxxx>
> ---
> kernel/trace/trace.h | 4 ++--
> kernel/trace/trace_syscalls.c | 7 ++++---
> 2 files changed, 6 insertions(+), 5 deletions(-)
>
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index 77debe53f07cf..cd1d24df85364 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -135,13 +135,13 @@ enum trace_type {
> */
> struct syscall_trace_enter {
> struct trace_entry ent;
> - int nr;
> + long nr;
> unsigned long args[];
> };
>
> struct syscall_trace_exit {
> struct trace_entry ent;
> - int nr;
> + long nr;
> long ret;
> };
>
> diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
> index de753403cdafb..c26939119f2e4 100644
> --- a/kernel/trace/trace_syscalls.c
> +++ b/kernel/trace/trace_syscalls.c
> @@ -101,7 +101,7 @@ find_syscall_meta(unsigned long syscall)
> return NULL;
> }
>
> -static struct syscall_metadata *syscall_nr_to_meta(int nr)
> +static struct syscall_metadata *syscall_nr_to_meta(long nr)
> {
> if (IS_ENABLED(CONFIG_HAVE_SPARSE_SYSCALL_NR))
> return xa_load(&syscalls_metadata_sparse, (unsigned long)nr);
> @@ -132,7 +132,8 @@ print_syscall_enter(struct trace_iterator *iter, int flags,
> struct trace_entry *ent = iter->ent;
> struct syscall_trace_enter *trace;
> struct syscall_metadata *entry;
> - int i, syscall;
> + int i;
> + long syscall;
>
> trace = (typeof(trace))ent;
> syscall = trace->nr;
> @@ -177,7 +178,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags,
> struct trace_seq *s = &iter->seq;
> struct trace_entry *ent = iter->ent;
> struct syscall_trace_exit *trace;
> - int syscall;
> + long syscall;
> struct syscall_metadata *entry;
>
> trace = (typeof(trace))ent;
> --
> 2.41.0
>
>