Re: [PATCH bpf] bpf: Add LBR data to BPF_PROG_TYPE_PERF_EVENT prog context

From: Alexei Starovoitov
Date: Fri Dec 06 2019 - 01:39:59 EST


On Thu, Dec 05, 2019 at 04:12:26PM -0800, Daniel Xu wrote:
> Last-branch-record is an intel CPU feature that can be configured to
> record certain branches that are taken during code execution. This data
> is particularly interesting for profile guided optimizations. perf has
> had LBR support for a while but the data collection can be a bit coarse
> grained.
>
> We (Facebook) have recently run a lot of experiments with feeding
> filtered LBR data to various PGO pipelines. We've seen really good
> results (+2.5% throughput with lower cpu util and lower latency) by
> feeding high request latency LBR branches to the compiler on a
> request-oriented service. We used bpf to read a special request context
> ID (which is how we associate branches with latency) from a fixed
> userspace address. Reading from the fixed address is why bpf support is
> useful.
>
> Aside from this particular use case, having LBR data available to bpf
> progs can be useful to get stack traces out of userspace applications
> that omit frame pointers.
>
> This patch adds support for LBR data to bpf perf progs.
>
> Some notes:
> * We use `__u64 entries[BPF_MAX_LBR_ENTRIES * 3]` instead of
> `struct perf_branch_entry[BPF_MAX_LBR_ENTRIES]` because checkpatch.pl
> warns about including a uapi header from another uapi header
>
> * We define BPF_MAX_LBR_ENTRIES as 32 (instead of using the value from
> arch/x86/events/perf_events.h) because including arch specific headers
> seems wrong and could introduce circular header includes.
>
> Signed-off-by: Daniel Xu <dxu@xxxxxxxxx>
> ---
> include/uapi/linux/bpf_perf_event.h | 5 ++++
> kernel/trace/bpf_trace.c | 39 +++++++++++++++++++++++++++++
> 2 files changed, 44 insertions(+)
>
> diff --git a/include/uapi/linux/bpf_perf_event.h b/include/uapi/linux/bpf_perf_event.h
> index eb1b9d21250c..dc87e3d50390 100644
> --- a/include/uapi/linux/bpf_perf_event.h
> +++ b/include/uapi/linux/bpf_perf_event.h
> @@ -10,10 +10,15 @@
>
> #include <asm/bpf_perf_event.h>
>
> +#define BPF_MAX_LBR_ENTRIES 32
> +
> struct bpf_perf_event_data {
> bpf_user_pt_regs_t regs;
> __u64 sample_period;
> __u64 addr;
> + __u64 nr_lbr;
> + /* Cast to struct perf_branch_entry* before using */
> + __u64 entries[BPF_MAX_LBR_ENTRIES * 3];
> };
>
> #endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index ffc91d4935ac..96ba7995b3d7 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -1259,6 +1259,14 @@ static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type
> if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
> return false;
> break;
> + case bpf_ctx_range(struct bpf_perf_event_data, nr_lbr):
> + bpf_ctx_record_field_size(info, size_u64);
> + if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
> + return false;
> + break;
> + case bpf_ctx_range(struct bpf_perf_event_data, entries):
> + /* No narrow loads */
> + break;
> default:
> if (size != sizeof(long))
> return false;
> @@ -1273,6 +1281,7 @@ static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
> struct bpf_prog *prog, u32 *target_size)
> {
> struct bpf_insn *insn = insn_buf;
> + int off;
>
> switch (si->off) {
> case offsetof(struct bpf_perf_event_data, sample_period):
> @@ -1291,6 +1300,36 @@ static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
> bpf_target_off(struct perf_sample_data, addr, 8,
> target_size));
> break;
> + case offsetof(struct bpf_perf_event_data, nr_lbr):
> + /* Load struct perf_sample_data* */
> + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
> + data), si->dst_reg, si->src_reg,
> + offsetof(struct bpf_perf_event_data_kern, data));
> + /* Load struct perf_branch_stack* */
> + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct perf_sample_data, br_stack),
> + si->dst_reg, si->dst_reg,
> + offsetof(struct perf_sample_data, br_stack));

br_stack can be NULL.
if != NULL check has to be emitted too.

Otherwise looks good.
Please add a selftest and resubmit when bpf-next reopens next week.