[RFC][PATCH 02/10] eBPF: Add BPF_PROG_TYPE_TRACE_EVENT prog type

From: Tom Zanussi
Date: Fri Feb 12 2016 - 11:12:06 EST


Add a new BPF_PROG_TYPE_TRACE_EVENT prog type that allows the eBPF
program to access the fields defined for a trace event. Trace event
fields are defined and available on both static and kprobes-based
trace events, and so TRACE_EVENT eBPF progs can be used on both types.

The reason a new prog type is needed is that BPF_PROG_TYPE_KPROBE
progs expect a pt_regs * ctx, while a TRACE_EVENT prog needs a trace
rec * ctx. It would have been nice to have a probe that could do
both, but existing KPROBE progs expect pt_regs * ctx. We can't change
that to some more self-descriptive ctx without breaking existing eBPF
programs.

In any case, mixing the two different types of access in a given
program probably isn't that common a thing to want to do - if you're
grabbing probe params and chasing pointers in your probe, you're
probably not typically interested in accessing event fields too, and
vice versa.

Signed-off-by: Tom Zanussi <tom.zanussi@xxxxxxxxxxxxxxx>
---
include/linux/trace_events.h | 7 +++++
include/uapi/linux/bpf.h | 1 +
kernel/events/core.c | 12 +++++----
kernel/trace/bpf_trace.c | 62 ++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 77 insertions(+), 5 deletions(-)

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 70f8fc4..f7f12f3 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -15,6 +15,13 @@ struct tracer;
struct dentry;
struct bpf_prog;

+struct trace_event_context {
+ struct trace_event_call *call;
+ void *record;
+};
+
+#define TRACE_EVENT_CTX_HDR_SIZE offsetof(struct trace_event_context, record)
+
struct trace_print_flags {
unsigned long mask;
const char *name;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 9ea2d22..df6a7ff 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -89,6 +89,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_KPROBE,
BPF_PROG_TYPE_SCHED_CLS,
BPF_PROG_TYPE_SCHED_ACT,
+ BPF_PROG_TYPE_TRACE_EVENT,
};

#define BPF_PSEUDO_MAP_FD 1
diff --git a/kernel/events/core.c b/kernel/events/core.c
index cfc227c..c366e6e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7081,15 +7081,17 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
if (event->tp_event->prog)
return -EEXIST;

- if (!(event->tp_event->flags & TRACE_EVENT_FL_UKPROBE))
- /* bpf programs can only be attached to u/kprobes */
- return -EINVAL;
-
prog = bpf_prog_get(prog_fd);
if (IS_ERR(prog))
return PTR_ERR(prog);

- if (prog->type != BPF_PROG_TYPE_KPROBE) {
+ if ((prog->type == BPF_PROG_TYPE_KPROBE) &&
+ !(event->tp_event->flags & TRACE_EVENT_FL_UKPROBE))
+ /* KPROBE bpf programs can only be attached to u/kprobes */
+ return -EINVAL;
+
+ if (prog->type != BPF_PROG_TYPE_KPROBE &&
+ prog->type != BPF_PROG_TYPE_TRACE_EVENT) {
/* valid fd, but invalid bpf program type */
bpf_prog_put(prog);
return -EINVAL;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 4228fd3..78dbac0 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -326,9 +326,71 @@ static struct bpf_prog_type_list kprobe_tl = {
.type = BPF_PROG_TYPE_KPROBE,
};

+static const struct bpf_func_proto *
+trace_event_prog_func_proto(enum bpf_func_id func_id)
+{
+ switch (func_id) {
+ case BPF_FUNC_map_lookup_elem:
+ return &bpf_map_lookup_elem_proto;
+ case BPF_FUNC_map_update_elem:
+ return &bpf_map_update_elem_proto;
+ case BPF_FUNC_map_delete_elem:
+ return &bpf_map_delete_elem_proto;
+ case BPF_FUNC_probe_read:
+ return &bpf_probe_read_proto;
+ case BPF_FUNC_ktime_get_ns:
+ return &bpf_ktime_get_ns_proto;
+ case BPF_FUNC_tail_call:
+ return &bpf_tail_call_proto;
+ case BPF_FUNC_get_current_pid_tgid:
+ return &bpf_get_current_pid_tgid_proto;
+ case BPF_FUNC_get_current_uid_gid:
+ return &bpf_get_current_uid_gid_proto;
+ case BPF_FUNC_get_current_comm:
+ return &bpf_get_current_comm_proto;
+ case BPF_FUNC_trace_printk:
+ return bpf_get_trace_printk_proto();
+ case BPF_FUNC_get_smp_processor_id:
+ return &bpf_get_smp_processor_id_proto;
+ case BPF_FUNC_perf_event_read:
+ return &bpf_perf_event_read_proto;
+ case BPF_FUNC_perf_event_output:
+ return &bpf_perf_event_output_proto;
+ default:
+ return NULL;
+ }
+}
+
+/* trace_event programs can access fields of trace event in rec */
+static bool trace_event_prog_is_valid_access(int off, int size,
+ enum bpf_access_type type)
+{
+ /* check bounds */
+ if (off < 0 || off >= TRACE_EVENT_CTX_HDR_SIZE + BUF_MAX_DATA_SIZE)
+ return false;
+
+ /* only read is allowed */
+ if (type != BPF_READ)
+ return false;
+
+ return true;
+}
+
+static const struct bpf_verifier_ops trace_event_prog_ops = {
+ .get_func_proto = trace_event_prog_func_proto,
+ .is_valid_access = trace_event_prog_is_valid_access,
+};
+
+static struct bpf_prog_type_list trace_event_tl = {
+ .ops = &trace_event_prog_ops,
+ .type = BPF_PROG_TYPE_TRACE_EVENT,
+};
+
static int __init register_kprobe_prog_ops(void)
{
bpf_register_prog_type(&kprobe_tl);
+ bpf_register_prog_type(&trace_event_tl);
+
return 0;
}
late_initcall(register_kprobe_prog_ops);
--
1.9.3