[PATCH 4.19 18/49] bpf: Add kconfig knob for disabling unpriv bpf by default

From: Greg Kroah-Hartman
Date: Mon Feb 14 2022 - 04:39:01 EST


From: Daniel Borkmann <daniel@xxxxxxxxxxxxx>

commit 08389d888287c3823f80b0216766b71e17f0aba5 upstream.

Add a kconfig knob which allows for unprivileged bpf to be disabled by default.
If set, the knob sets /proc/sys/kernel/unprivileged_bpf_disabled to value of 2.

This still allows a transition of 2 -> {0,1} through an admin. Similarly,
this also still keeps 1 -> {1} behavior intact, so that once set to permanently
disabled, it cannot be undone aside from a reboot.

We've also added extra2 with max of 2 for the procfs handler, so that an admin
still has a chance to toggle between 0 <-> 2.

Either way, as an additional alternative, applications can make use of CAP_BPF
that we added a while ago.

Signed-off-by: Daniel Borkmann <daniel@xxxxxxxxxxxxx>
Signed-off-by: Alexei Starovoitov <ast@xxxxxxxxxx>
Link: https://lore.kernel.org/bpf/74ec548079189e4e4dffaeb42b8987bb3c852eee.1620765074.git.daniel@xxxxxxxxxxxxx
[fllinden@xxxxxxxxxx: backported to 4.19]
Signed-off-by: Frank van der Linden <fllinden@xxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
Documentation/sysctl/kernel.txt | 21 +++++++++++++++++++++
init/Kconfig | 10 ++++++++++
kernel/bpf/syscall.c | 3 ++-
kernel/sysctl.c | 29 +++++++++++++++++++++++++----
4 files changed, 58 insertions(+), 5 deletions(-)

--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -94,6 +94,7 @@ show up in /proc/sys/kernel:
- sysctl_writes_strict
- tainted
- threads-max
+- unprivileged_bpf_disabled
- unknown_nmi_panic
- watchdog
- watchdog_thresh
@@ -1041,6 +1042,26 @@ available RAM pages threads-max is reduc

==============================================================

+unprivileged_bpf_disabled:
+
+Writing 1 to this entry will disable unprivileged calls to bpf();
+once disabled, calling bpf() without CAP_SYS_ADMIN will return
+-EPERM. Once set to 1, this can't be cleared from the running kernel
+anymore.
+
+Writing 2 to this entry will also disable unprivileged calls to bpf(),
+however, an admin can still change this setting later on, if needed, by
+writing 0 or 1 to this entry.
+
+If BPF_UNPRIV_DEFAULT_OFF is enabled in the kernel config, then this
+entry will default to 2 instead of 0.
+
+ 0 - Unprivileged calls to bpf() are enabled
+ 1 - Unprivileged calls to bpf() are disabled without recovery
+ 2 - Unprivileged calls to bpf() are disabled
+
+==============================================================
+
unknown_nmi_panic:

The value in this file affects behavior of handling NMI. When the
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1474,6 +1474,16 @@ config BPF_JIT_ALWAYS_ON
Enables BPF JIT and removes BPF interpreter to avoid
speculative execution of BPF instructions by the interpreter

+config BPF_UNPRIV_DEFAULT_OFF
+ bool "Disable unprivileged BPF by default"
+ depends on BPF_SYSCALL
+ help
+ Disables unprivileged BPF by default by setting the corresponding
+ /proc/sys/kernel/unprivileged_bpf_disabled knob to 2. An admin can
+ still reenable it by setting it to 0 later on, or permanently
+ disable it by setting it to 1 (from which no other transition to
+ 0 is possible anymore).
+
config USERFAULTFD
bool "Enable userfaultfd() system call"
select ANON_INODES
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -48,7 +48,8 @@ static DEFINE_SPINLOCK(prog_idr_lock);
static DEFINE_IDR(map_idr);
static DEFINE_SPINLOCK(map_idr_lock);

-int sysctl_unprivileged_bpf_disabled __read_mostly;
+int sysctl_unprivileged_bpf_disabled __read_mostly =
+ IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0;

static const struct bpf_map_ops * const bpf_map_types[] = {
#define BPF_PROG_TYPE(_id, _ops)
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -250,6 +250,28 @@ static int sysrq_sysctl_handler(struct c

#endif

+#ifdef CONFIG_BPF_SYSCALL
+static int bpf_unpriv_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret, unpriv_enable = *(int *)table->data;
+ bool locked_state = unpriv_enable == 1;
+ struct ctl_table tmp = *table;
+
+ if (write && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ tmp.data = &unpriv_enable;
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+ if (write && !ret) {
+ if (locked_state && unpriv_enable != 1)
+ return -EPERM;
+ *(int *)table->data = unpriv_enable;
+ }
+ return ret;
+}
+#endif
+
static struct ctl_table kern_table[];
static struct ctl_table vm_table[];
static struct ctl_table fs_table[];
@@ -1220,10 +1242,9 @@ static struct ctl_table kern_table[] = {
.data = &sysctl_unprivileged_bpf_disabled,
.maxlen = sizeof(sysctl_unprivileged_bpf_disabled),
.mode = 0644,
- /* only handle a transition from default "0" to "1" */
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
- .extra2 = &one,
+ .proc_handler = bpf_unpriv_handler,
+ .extra1 = &zero,
+ .extra2 = &two,
},
#endif
#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)