Re: [RESEND PATCH v5 1/4] perf/bpf: Call bpf handler directly, not through overflow machinery

From: Andrii Nakryiko
Date: Thu Feb 15 2024 - 19:12:05 EST


On Wed, Feb 14, 2024 at 9:40 AM Kyle Huey <me@xxxxxxxxxxxx> wrote:
>
> To ultimately allow bpf programs attached to perf events to completely
> suppress all of the effects of a perf event overflow (rather than just the
> sample output, as they do today), call bpf_overflow_handler() from
> __perf_event_overflow() directly rather than modifying struct perf_event's
> overflow_handler. Return the bpf program's return value from
> bpf_overflow_handler() so that __perf_event_overflow() knows how to
> proceed. Remove the now unnecessary orig_overflow_handler from struct
> perf_event.
>
> This patch is solely a refactoring and results in no behavior change.
>
> Signed-off-by: Kyle Huey <khuey@xxxxxxxxxxxx>
> Suggested-by: Namhyung Kim <namhyung@xxxxxxxxxx>
> Acked-by: Song Liu <song@xxxxxxxxxx>
> Acked-by: Jiri Olsa <jolsa@xxxxxxxxxx>
> ---
> include/linux/perf_event.h | 6 +-----
> kernel/events/core.c | 28 +++++++++++++++-------------
> 2 files changed, 16 insertions(+), 18 deletions(-)
>
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index d2a15c0c6f8a..c7f54fd74d89 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -810,7 +810,6 @@ struct perf_event {
> perf_overflow_handler_t overflow_handler;
> void *overflow_handler_context;
> #ifdef CONFIG_BPF_SYSCALL
> - perf_overflow_handler_t orig_overflow_handler;
> struct bpf_prog *prog;
> u64 bpf_cookie;
> #endif
> @@ -1357,10 +1356,7 @@ __is_default_overflow_handler(perf_overflow_handler_t overflow_handler)
> #ifdef CONFIG_BPF_SYSCALL
> static inline bool uses_default_overflow_handler(struct perf_event *event)
> {
> - if (likely(is_default_overflow_handler(event)))
> - return true;
> -
> - return __is_default_overflow_handler(event->orig_overflow_handler);
> + return is_default_overflow_handler(event);
> }
> #else
> #define uses_default_overflow_handler(event) \

and so in both cases uses_default_overflow_handler() is now just
is_default_overflow_handler(), right? So we can clean all this up
quite a bit?

> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index f0f0f71213a1..24a718e7eb98 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -9548,6 +9548,12 @@ static inline bool sample_is_allowed(struct perf_event *event, struct pt_regs *r
> return true;
> }
>
> +#ifdef CONFIG_BPF_SYSCALL
> +static int bpf_overflow_handler(struct perf_event *event,
> + struct perf_sample_data *data,
> + struct pt_regs *regs);
> +#endif
> +
> /*
> * Generic event overflow handling, sampling.
> */
> @@ -9617,7 +9623,10 @@ static int __perf_event_overflow(struct perf_event *event,
> irq_work_queue(&event->pending_irq);
> }
>
> - READ_ONCE(event->overflow_handler)(event, data, regs);
> +#ifdef CONFIG_BPF_SYSCALL
> + if (!(event->prog && !bpf_overflow_handler(event, data, regs)))
> +#endif
> + READ_ONCE(event->overflow_handler)(event, data, regs);

This is quite hard to follow... And that CONFIG_BPF_SYSCALL check
breaking apart that if statement is not great. Maybe something like:


bool skip_def_handler = false;

#ifdef CONFIG_BPF_SYSCALL
if (event->prog)
skip = bpf_overflow_handler(event, data, regs) == 0;
#endif
if (!skip_def_handler)
READ_ONCE(event->overflow_handler)(event, data, regs);

we can of course invert "skip" to be "run" and invert conditions, if
that's easier to follow

>
> if (*perf_event_fasync(event) && event->pending_kill) {
> event->pending_wakeup = 1;
> @@ -10427,9 +10436,9 @@ static void perf_event_free_filter(struct perf_event *event)
> }
>
> #ifdef CONFIG_BPF_SYSCALL
> -static void bpf_overflow_handler(struct perf_event *event,
> - struct perf_sample_data *data,
> - struct pt_regs *regs)
> +static int bpf_overflow_handler(struct perf_event *event,
> + struct perf_sample_data *data,
> + struct pt_regs *regs)
> {
> struct bpf_perf_event_data_kern ctx = {
> .data = data,

[...]