Re: [PATCH v2 2/5] seccomp, bpf: Introduce SECCOMP_LOAD_FILTER operation

From: Hengqi Chen
Date: Mon Oct 23 2023 - 00:19:36 EST


On Mon, Oct 16, 2023 at 8:44 PM Daniel Borkmann <daniel@xxxxxxxxxxxxx> wrote:
>
> On 10/16/23 1:29 AM, Hengqi Chen wrote:
> > This patch adds a new operation named SECCOMP_LOAD_FILTER.
> > It accepts a sock_fprog the same as SECCOMP_SET_MODE_FILTER
> > but only performs the loading process. If succeed, return a
> > new fd associated with the JITed BPF program (the filter).
> > The filter can then be pinned to bpffs using the returned
> > fd and reused for different processes. To distinguish the
> > filter from other BPF progs, BPF_PROG_TYPE_SECCOMP is added.
> >
> > Signed-off-by: Hengqi Chen <hengqi.chen@xxxxxxxxx>
> > ---
> > include/uapi/linux/bpf.h | 1 +
> > include/uapi/linux/seccomp.h | 1 +
> > kernel/seccomp.c | 43 ++++++++++++++++++++++++++++++++++
> > tools/include/uapi/linux/bpf.h | 1 +
> > 4 files changed, 46 insertions(+)
> >
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index 7ba61b75bc0e..61c80ffb1724 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -995,6 +995,7 @@ enum bpf_prog_type {
> > BPF_PROG_TYPE_SK_LOOKUP,
> > BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
> > BPF_PROG_TYPE_NETFILTER,
> > + BPF_PROG_TYPE_SECCOMP,
>
> Please don't extend UAPI surface if this is not reachable/usable from user
> space anyway.
>
> > enum bpf_attach_type {
> > diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
> > index dbfc9b37fcae..ee2c83697810 100644
> > --- a/include/uapi/linux/seccomp.h
> > +++ b/include/uapi/linux/seccomp.h
> > @@ -16,6 +16,7 @@
> > #define SECCOMP_SET_MODE_FILTER 1
> > #define SECCOMP_GET_ACTION_AVAIL 2
> > #define SECCOMP_GET_NOTIF_SIZES 3
> > +#define SECCOMP_LOAD_FILTER 4
> >
> > /* Valid flags for SECCOMP_SET_MODE_FILTER */
> > #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
> > diff --git a/kernel/seccomp.c b/kernel/seccomp.c
> > index faf84fc892eb..c9f6a19f7a4e 100644
> > --- a/kernel/seccomp.c
> > +++ b/kernel/seccomp.c
> > @@ -17,6 +17,7 @@
> >
> > #include <linux/refcount.h>
> > #include <linux/audit.h>
> > +#include <linux/bpf.h>
> > #include <linux/compat.h>
> > #include <linux/coredump.h>
> > #include <linux/kmemleak.h>
> > @@ -25,6 +26,7 @@
> > #include <linux/sched.h>
> > #include <linux/sched/task_stack.h>
> > #include <linux/seccomp.h>
> > +#include <linux/security.h>
> > #include <linux/slab.h>
> > #include <linux/syscalls.h>
> > #include <linux/sysctl.h>
> > @@ -2032,12 +2034,48 @@ static long seccomp_set_mode_filter(unsigned int flags,
> > seccomp_filter_free(prepared);
> > return ret;
> > }
> > +
> > +static long seccomp_load_filter(const char __user *filter)
> > +{
> > + struct sock_fprog fprog;
> > + struct bpf_prog *prog;
> > + int ret;
> > +
> > + ret = seccomp_copy_user_filter(filter, &fprog);
> > + if (ret)
> > + return ret;
> > +
> > + ret = seccomp_prepare_prog(&prog, &fprog);
> > + if (ret)
> > + return ret;
> > +
> > + ret = security_bpf_prog_alloc(prog->aux);
> > + if (ret) {
> > + bpf_prog_free(prog);
> > + return ret;
> > + }
> > +
> > + prog->aux->user = get_current_user();
> > + atomic64_set(&prog->aux->refcnt, 1);
> > + prog->type = BPF_PROG_TYPE_SECCOMP;
> > +
> > + ret = bpf_prog_new_fd(prog);
> > + if (ret < 0)
> > + bpf_prog_put(prog);
>
> My bigger concern here is that bpf_prog_new_fd() is only used by eBPF (not cBPF).
>
> Then you get an 'eBPF'-like fd back to user space which you can pass to various
> other bpf(2) commands like BPF_OBJ_GET_INFO_BY_FD etc which all have the assumption
> that this is a proper looking eBPF prog fd.
>
> There may be breakage/undefined behavior in subtle ways.
>
> I would suggest two potential alternatives :
>
> 1) Build a seccomp-specific fd via anon_inode_getfd() so that BPF side does not
> confuse it with bpf_prog_fops and therefore does not recognize it in bpf(2)
> as a prog fd.
>
> 2) Extend seccomp where proper eBPF could be supported.
>
> If option 2) is not realistic (where you would get this out of the box), then I
> think 1) could be however.
>

The intention is to use bpffs, so we need a bpf prog fd.
I prefer option 2, though it requires a bit of work.
That way, we could also write seccomp filter in eBPF language.

Kees, could you share your opinions ? If you have no objection,
I will continue this work.

> > + return ret;
> > +}
> > #else
> > static inline long seccomp_set_mode_filter(unsigned int flags,
> > const char __user *filter)
> > {
> > return -EINVAL;
> > }
> > +
> > +static inline long seccomp_load_filter(const char __user *filter)
> > +{
> > + return -EINVAL;
> > +}
> > #endif
> >
> > static long seccomp_get_action_avail(const char __user *uaction)
> > @@ -2099,6 +2137,11 @@ static long do_seccomp(unsigned int op, unsigned int flags,
> > return -EINVAL;
> >
> > return seccomp_get_notif_sizes(uargs);
> > + case SECCOMP_LOAD_FILTER:
> > + if (flags != 0)
> > + return -EINVAL;
> > +
> > + return seccomp_load_filter(uargs);
> > default:
> > return -EINVAL;
> > }
> > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> > index 7ba61b75bc0e..61c80ffb1724 100644
> > --- a/tools/include/uapi/linux/bpf.h
> > +++ b/tools/include/uapi/linux/bpf.h
> > @@ -995,6 +995,7 @@ enum bpf_prog_type {
> > BPF_PROG_TYPE_SK_LOOKUP,
> > BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
> > BPF_PROG_TYPE_NETFILTER,
> > + BPF_PROG_TYPE_SECCOMP,
> > };
> >
> > enum bpf_attach_type {
> >
>