Re: [RFC PATCH v3 28/37] WIP: bpf: Add fuse_ops struct_op programs

From: Andrii Nakryiko
Date: Thu Apr 27 2023 - 00:18:32 EST


On Mon, Apr 17, 2023 at 6:42 PM Daniel Rosenberg <drosen@xxxxxxxxxx> wrote:
>
> This introduces a new struct_op type: fuse_ops. This program set
> provides pre and post filters to run around fuse-bpf calls that act
> directly on the lower filesystem.
>
> The inputs are either fixed structures, or struct fuse_buffer's.
>
> These programs are not permitted to make any changes to these fuse_buffers
> unless they create a dynptr wrapper using the supplied kfunc helpers.
>
> Fuse_buffers maintain additional state information that FUSE uses to
> manage memory and determine if additional set up or checks are needed.
>
> Signed-off-by: Daniel Rosenberg <drosen@xxxxxxxxxx>
> ---
> include/linux/bpf_fuse.h | 189 +++++++++++++++++++++++
> kernel/bpf/Makefile | 4 +
> kernel/bpf/bpf_fuse.c | 241 ++++++++++++++++++++++++++++++
> kernel/bpf/bpf_struct_ops_types.h | 4 +
> kernel/bpf/btf.c | 1 +
> kernel/bpf/verifier.c | 9 ++
> 6 files changed, 448 insertions(+)
> create mode 100644 kernel/bpf/bpf_fuse.c
>
> diff --git a/include/linux/bpf_fuse.h b/include/linux/bpf_fuse.h
> index ce8b1b347496..780a7889aea2 100644
> --- a/include/linux/bpf_fuse.h
> +++ b/include/linux/bpf_fuse.h
> @@ -30,6 +30,8 @@ struct fuse_buffer {
> #define BPF_FUSE_MODIFIED (1 << 3) // The helper function allowed writes to the buffer
> #define BPF_FUSE_ALLOCATED (1 << 4) // The helper function allocated the buffer
>
> +extern void *bpf_fuse_get_writeable(struct fuse_buffer *arg, u64 size, bool copy);
> +
> /*
> * BPF Fuse Args
> *
> @@ -81,4 +83,191 @@ static inline unsigned bpf_fuse_arg_size(const struct bpf_fuse_arg *arg)
> return arg->is_buffer ? arg->buffer->size : arg->size;
> }
>
> +struct fuse_ops {
> + uint32_t (*open_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_open_in *in);
> + uint32_t (*open_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_open_in *in,
> + struct fuse_open_out *out);
> +
> + uint32_t (*opendir_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_open_in *in);
> + uint32_t (*opendir_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_open_in *in,
> + struct fuse_open_out *out);
> +
> + uint32_t (*create_open_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_create_in *in, struct fuse_buffer *name);
> + uint32_t (*create_open_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_create_in *in, const struct fuse_buffer *name,
> + struct fuse_entry_out *entry_out, struct fuse_open_out *out);
> +
> + uint32_t (*release_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_release_in *in);
> + uint32_t (*release_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_release_in *in);
> +
> + uint32_t (*releasedir_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_release_in *in);
> + uint32_t (*releasedir_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_release_in *in);
> +
> + uint32_t (*flush_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_flush_in *in);
> + uint32_t (*flush_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_flush_in *in);
> +
> + uint32_t (*lseek_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_lseek_in *in);
> + uint32_t (*lseek_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_lseek_in *in,
> + struct fuse_lseek_out *out);
> +
> + uint32_t (*copy_file_range_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_copy_file_range_in *in);
> + uint32_t (*copy_file_range_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_copy_file_range_in *in,
> + struct fuse_write_out *out);
> +
> + uint32_t (*fsync_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_fsync_in *in);
> + uint32_t (*fsync_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_fsync_in *in);
> +
> + uint32_t (*dir_fsync_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_fsync_in *in);
> + uint32_t (*dir_fsync_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_fsync_in *in);
> +
> + uint32_t (*getxattr_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_getxattr_in *in, struct fuse_buffer *name);
> + // if in->size > 0, use value. If in->size == 0, use out.
> + uint32_t (*getxattr_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_getxattr_in *in, const struct fuse_buffer *name,
> + struct fuse_buffer *value, struct fuse_getxattr_out *out);
> +
> + uint32_t (*listxattr_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_getxattr_in *in);
> + // if in->size > 0, use value. If in->size == 0, use out.
> + uint32_t (*listxattr_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_getxattr_in *in,
> + struct fuse_buffer *value, struct fuse_getxattr_out *out);
> +
> + uint32_t (*setxattr_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_setxattr_in *in, struct fuse_buffer *name,
> + struct fuse_buffer *value);
> + uint32_t (*setxattr_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_setxattr_in *in, const struct fuse_buffer *name,
> + const struct fuse_buffer *value);
> +
> + uint32_t (*removexattr_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_buffer *name);
> + uint32_t (*removexattr_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_buffer *name);
> +
> + /* Read and Write iter will likely undergo some sort of change/addition to handle changing
> + * the data buffer passed in/out. */
> + uint32_t (*read_iter_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_read_in *in);
> + uint32_t (*read_iter_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_read_in *in,
> + struct fuse_read_iter_out *out);
> +
> + uint32_t (*write_iter_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_write_in *in);
> + uint32_t (*write_iter_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_write_in *in,
> + struct fuse_write_iter_out *out);
> +
> + uint32_t (*file_fallocate_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_fallocate_in *in);
> + uint32_t (*file_fallocate_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_fallocate_in *in);
> +
> + uint32_t (*lookup_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_buffer *name);
> + uint32_t (*lookup_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_buffer *name,
> + struct fuse_entry_out *out, struct fuse_buffer *entries);
> +
> + uint32_t (*mknod_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_mknod_in *in, struct fuse_buffer *name);
> + uint32_t (*mknod_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_mknod_in *in, const struct fuse_buffer *name);
> +
> + uint32_t (*mkdir_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_mkdir_in *in, struct fuse_buffer *name);
> + uint32_t (*mkdir_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_mkdir_in *in, const struct fuse_buffer *name);
> +
> + uint32_t (*rmdir_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_buffer *name);
> + uint32_t (*rmdir_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_buffer *name);
> +
> + uint32_t (*rename2_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_rename2_in *in, struct fuse_buffer *old_name,
> + struct fuse_buffer *new_name);
> + uint32_t (*rename2_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_rename2_in *in, const struct fuse_buffer *old_name,
> + const struct fuse_buffer *new_name);
> +
> + uint32_t (*rename_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_rename_in *in, struct fuse_buffer *old_name,
> + struct fuse_buffer *new_name);
> + uint32_t (*rename_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_rename_in *in, const struct fuse_buffer *old_name,
> + const struct fuse_buffer *new_name);
> +
> + uint32_t (*unlink_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_buffer *name);
> + uint32_t (*unlink_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_buffer *name);
> +
> + uint32_t (*link_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_link_in *in, struct fuse_buffer *name);
> + uint32_t (*link_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_link_in *in, const struct fuse_buffer *name);
> +
> + uint32_t (*getattr_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_getattr_in *in);
> + uint32_t (*getattr_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_getattr_in *in,
> + struct fuse_attr_out *out);
> +
> + uint32_t (*setattr_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_setattr_in *in);
> + uint32_t (*setattr_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_setattr_in *in,
> + struct fuse_attr_out *out);
> +
> + uint32_t (*statfs_prefilter)(const struct bpf_fuse_meta_info *meta);
> + uint32_t (*statfs_postfilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_statfs_out *out);
> +
> + //TODO: This does not allow doing anything with path
> + uint32_t (*get_link_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_buffer *name);
> + uint32_t (*get_link_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_buffer *name);
> +
> + uint32_t (*symlink_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_buffer *name, struct fuse_buffer *path);
> + uint32_t (*symlink_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_buffer *name, const struct fuse_buffer *path);
> +
> + uint32_t (*readdir_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_read_in *in);
> + uint32_t (*readdir_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_read_in *in,
> + struct fuse_read_out *out, struct fuse_buffer *buffer);
> +
> + uint32_t (*access_prefilter)(const struct bpf_fuse_meta_info *meta,
> + struct fuse_access_in *in);
> + uint32_t (*access_postfilter)(const struct bpf_fuse_meta_info *meta,
> + const struct fuse_access_in *in);
> +
> + char name[BPF_FUSE_NAME_MAX];
> +};

Have you considered grouping this huge amount of callbacks into a
smaller set of more generic callbacks where each callback would get
enum argument specifying what sort of operation it is called for? This
has many advantages, starting from not having to deal with struct_ops
limits, ending with not needing to instantiate dozens of individual
BPF programs.

E.g., for a lot of operations the difference between pre- and
post-filter is in having in argument as read-only and maybe having
extra out argument for post-filter. One way to unify such post/pre
filters into one callback would be to record whether in has to be
read-only or read-write and not allow to create r/w dynptr for the
former case. Pass bool or enum specifying if it's post or pre filter.
For that optional out argument, you can simulate effectively the same
by always supplying it, but making sure that out parameter is
read-only and zero-sized, for example.

That would cut the number of callbacks in two, which I'd say still is
not great :) I think it would be better still to have even larger
groups of callbacks for whole families of operations with the same (or
"unifiable") interface (domain experts like you would need to do an
analysis here to see what makes sense to group, of course).

We'll probably touch on that tomorrow at BPF office hours, but I
wanted to point this out beforehand, so that you have time to think
about it.

> +
> #endif /* _BPF_FUSE_H */
> diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
> index 1d3892168d32..26a2e741ef61 100644
> --- a/kernel/bpf/Makefile
> +++ b/kernel/bpf/Makefile

[...]

> +__diag_push();
> +__diag_ignore_all("-Wmissing-prototypes",
> + "Global kfuncs as their definitions will be in BTF");
> +void bpf_fuse_get_rw_dynptr(struct fuse_buffer *buffer, struct bpf_dynptr_kern *dynptr__uninit, u64 size, bool copy)

not clear why size is passed from outside instead of instantiating
dynptr with buffer->size? See [0] for bpf_dynptr_adjust and
bpf_dynptr_clone that allow you to adjust buffer as necessary.

As for the copy parameter, can you elaborate on the idea behind it?

[0] https://patchwork.kernel.org/project/netdevbpf/list/?series=741584&state=*

> +{
> + buffer->data = bpf_fuse_get_writeable(buffer, size, copy);
> + bpf_dynptr_init(dynptr__uninit, buffer->data, BPF_DYNPTR_TYPE_LOCAL, 0, buffer->size);
> +}
> +
> +void bpf_fuse_get_ro_dynptr(const struct fuse_buffer *buffer, struct bpf_dynptr_kern *dynptr__uninit)

these kfuncs probably should be more consistently named as
bpf_dynptr_from_fuse_buffer_{ro,rw}() ?

> +{
> + bpf_dynptr_init(dynptr__uninit, buffer->data, BPF_DYNPTR_TYPE_LOCAL, 0, buffer->size);
> + bpf_dynptr_set_rdonly(dynptr__uninit);
> +}
> +
> +uint32_t bpf_fuse_return_len(struct fuse_buffer *buffer)
> +{
> + return buffer->size;

you should be able to get this with bpf_dynptr_size() (once you create
it from fuse_buffer).

> +}
> +__diag_pop();
> +BTF_SET8_START(fuse_kfunc_set)
> +BTF_ID_FLAGS(func, bpf_fuse_get_rw_dynptr)
> +BTF_ID_FLAGS(func, bpf_fuse_get_ro_dynptr)
> +BTF_ID_FLAGS(func, bpf_fuse_return_len)
> +BTF_SET8_END(fuse_kfunc_set)
> +
> +static const struct btf_kfunc_id_set bpf_fuse_kfunc_set = {
> + .owner = THIS_MODULE,
> + .set = &fuse_kfunc_set,
> +};
> +
> +static int __init bpf_fuse_kfuncs_init(void)
> +{
> + return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
> + &bpf_fuse_kfunc_set);
> +}
> +
> +late_initcall(bpf_fuse_kfuncs_init);
> +
> +static const struct bpf_func_proto *bpf_fuse_get_func_proto(enum bpf_func_id func_id,
> + const struct bpf_prog *prog)
> +{
> + switch (func_id) {
> + default:
> + return bpf_base_func_proto(func_id);
> + }
> +}
> +
> +static bool bpf_fuse_is_valid_access(int off, int size,
> + enum bpf_access_type type,
> + const struct bpf_prog *prog,
> + struct bpf_insn_access_aux *info)
> +{
> + return bpf_tracing_btf_ctx_access(off, size, type, prog, info);
> +}
> +
> +const struct btf_type *fuse_buffer_struct_type;
> +
> +static int bpf_fuse_btf_struct_access(struct bpf_verifier_log *log,
> + const struct bpf_reg_state *reg,
> + int off, int size)
> +{
> + const struct btf_type *t;
> +
> + t = btf_type_by_id(reg->btf, reg->btf_id);
> + if (t == fuse_buffer_struct_type) {
> + bpf_log(log,
> + "direct access to fuse_buffer is disallowed\n");
> + return -EACCES;
> + }
> +
> + return 0;
> +}
> +
> +static const struct bpf_verifier_ops bpf_fuse_verifier_ops = {
> + .get_func_proto = bpf_fuse_get_func_proto,

you probably should be fine with just using bpf_tracing_func_proto as is

> + .is_valid_access = bpf_fuse_is_valid_access,

similarly, why custom no-op callback?

> + .btf_struct_access = bpf_fuse_btf_struct_access,
> +};
> +
> +static int bpf_fuse_check_member(const struct btf_type *t,
> + const struct btf_member *member,
> + const struct bpf_prog *prog)
> +{
> + //if (is_unsupported(__btf_member_bit_offset(t, member) / 8))
> + // return -ENOTSUPP;
> + return 0;
> +}
> +
> +static int bpf_fuse_init_member(const struct btf_type *t,
> + const struct btf_member *member,
> + void *kdata, const void *udata)
> +{
> + const struct fuse_ops *uf_ops;
> + struct fuse_ops *f_ops;
> + u32 moff;
> +
> + uf_ops = (const struct fuse_ops *)udata;
> + f_ops = (struct fuse_ops *)kdata;
> +
> + moff = __btf_member_bit_offset(t, member) / 8;
> + switch (moff) {
> + case offsetof(struct fuse_ops, name):
> + if (bpf_obj_name_cpy(f_ops->name, uf_ops->name,
> + sizeof(f_ops->name)) <= 0)
> + return -EINVAL;
> + //if (tcp_ca_find(utcp_ca->name))
> + // return -EEXIST;
> + return 1;
> + }
> +
> + return 0;
> +}
> +
> +static int bpf_fuse_init(struct btf *btf)
> +{
> + s32 type_id;
> +
> + type_id = btf_find_by_name_kind(btf, "fuse_buffer", BTF_KIND_STRUCT);
> + if (type_id < 0)
> + return -EINVAL;
> + fuse_buffer_struct_type = btf_type_by_id(btf, type_id);
> +

see BTF_ID and BTF_ID_LIST uses for how to get ID for your custom
well-known type

> + return 0;
> +}
> +
> +static struct bpf_fuse_ops_attach *fuse_reg = NULL;
> +

[...]