Re: [PATCH bpf-next v3 1/5] bpf: add writable context for raw tracepoints

From: Yonghong Song
Date: Mon Apr 22 2019 - 17:17:56 EST




On 4/22/19 12:23 PM, Matt Mullins wrote:
> On Mon, 2019-04-22 at 18:12 +0000, Yonghong Song wrote:
>>
>> On 4/19/19 2:04 PM, Matt Mullins wrote:
>>> This is an opt-in interface that allows a tracepoint to provide a safe
>>> buffer that can be written from a BPF_PROG_TYPE_RAW_TRACEPOINT program.
>>> The size of the buffer must be a compile-time constant, and is checked
>>> before allowing a BPF program to attach to a tracepoint that uses this
>>> feature.
>>>
>>> The pointer to this buffer will be the first argument of tracepoints
>>> that opt in; the buffer is readable by both BPF_PROG_TYPE_RAW_TRACEPOINT
>>> and BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE programs that attach to such a
>>> tracepoint, but the buffer to which it points may only be written by the
>>> latter.
>>>
>>> Signed-off-by: Matt Mullins <mmullins@xxxxxx>
>>> ---
>>> include/linux/bpf.h | 2 ++
>>> include/linux/bpf_types.h | 1 +
>>> include/linux/tracepoint-defs.h | 1 +
>>> include/trace/bpf_probe.h | 27 +++++++++++++++++++++++++--
>>> include/uapi/linux/bpf.h | 1 +
>>> kernel/bpf/syscall.c | 8 ++++++--
>>> kernel/bpf/verifier.c | 31 +++++++++++++++++++++++++++++++
>>> kernel/trace/bpf_trace.c | 21 +++++++++++++++++++++
>>> 8 files changed, 88 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>>> index a2132e09dc1c..d3c71fd67476 100644
>>> --- a/include/linux/bpf.h
>>> +++ b/include/linux/bpf.h
>>> @@ -263,6 +263,7 @@ enum bpf_reg_type {
>>> PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
>>> PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */
>>> PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
>>> + PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */
>>> };
>>>
>>
>> [...]
>>> /* truncate register to smaller size (in bytes)
>>> * must be called with size < BPF_REG_SIZE
>>> */
>>> @@ -2100,6 +2127,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
>>> err = check_sock_access(env, insn_idx, regno, off, size, t);
>>> if (!err && value_regno >= 0)
>>> mark_reg_unknown(env, regs, value_regno);
>>> + } else if (reg->type == PTR_TO_TP_BUFFER) {
>>> + err = check_tp_buffer_access(env, reg, regno, off, size);
>>> + if (!err && t == BPF_READ && value_regno >= 0)
>>> + mark_reg_unknown(env, regs, value_regno);
>>> } else {
>>> verbose(env, "R%d invalid mem access '%s'\n", regno,
>>> reg_type_str[reg->type]);
>>> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
>>> index d64c00afceb5..a2dd79dc6871 100644
>>> --- a/kernel/trace/bpf_trace.c
>>> +++ b/kernel/trace/bpf_trace.c
>>> @@ -909,6 +909,24 @@ const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
>>> const struct bpf_prog_ops raw_tracepoint_prog_ops = {
>>> };
>>>
>>> +static bool raw_tp_writable_prog_is_valid_access(int off, int size,
>>> + enum bpf_access_type type,
>>> + const struct bpf_prog *prog,
>>> + struct bpf_insn_access_aux *info)
>>> +{
>>> + if (off == 0 && size == sizeof(u64))
>>> + info->reg_type = PTR_TO_TP_BUFFER;
>>
>> on 32bit system, the first argument pointer size could be sizeof(u32)?
>
> As far as I can tell, pointers are always 64 bits wide from the
> perspective of the eBPF instruction set. I think the proper fixup is
> in include/trace/events/nbd.h ... I should use a u64 instead of a
> pointer type.

u64 is okay. You may want to double check tracepoint definition to
ensure the assign to the first argument converting to u64 as well to
avoid potential garbage. It would be good if this is enforced during
compilation time.

>
>> Should the first argument for raw_tp_writable_prog be always
>> PTR_TO_TP_BUFFER?
>
> That is the purpose of this patch series, yes. My initial attempt at
> this tried to add it to the end of the context structure instead, and
> that ended up being quite complex to track.

So `size == sizeof(u64)` can be removed, off == 0 just implies
reg_type PTR_TO_TP_BUFFER?

>
>>
>>> + return raw_tp_prog_is_valid_access(off, size, type, prog, info);
>>> +}
>>> +
>>> +const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = {
>>> + .get_func_proto = raw_tp_prog_func_proto,
>>> + .is_valid_access = raw_tp_writable_prog_is_valid_access,
>>> +};
>>> +
>>> +const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = {
>>> +};
>>> +
>>> static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
>>> const struct bpf_prog *prog,
>>> struct bpf_insn_access_aux *info)
>>> @@ -1198,6 +1216,9 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *
>>> if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64))
>>> return -EINVAL;
>>>
>>> + if (prog->aux->max_tp_access > btp->writable_size)
>>> + return -EINVAL;
>>> +
>>> return tracepoint_probe_register(tp, (void *)btp->bpf_func, prog);
>>> }
>>>
>>>