[PATCH net-next v3 2/4] net: socket: add sockopts blacklist for BPF cgroup hook

From: Alexander Mikhalitsyn
Date: Tue Apr 11 2023 - 06:43:55 EST


During work on SO_PEERPIDFD, it was discovered (thanks to Christian),
that bpf cgroup hook can cause FD leaks when used with sockopts which
install FDs into the process fdtable.

After some offlist discussion it was proposed to add a blacklist of
socket options those can cause troubles when BPF cgroup hook is enabled.

Cc: "David S. Miller" <davem@xxxxxxxxxxxxx>
Cc: Eric Dumazet <edumazet@xxxxxxxxxx>
Cc: Jakub Kicinski <kuba@xxxxxxxxxx>
Cc: Paolo Abeni <pabeni@xxxxxxxxxx>
Cc: Leon Romanovsky <leon@xxxxxxxxxx>
Cc: David Ahern <dsahern@xxxxxxxxxx>
Cc: Arnd Bergmann <arnd@xxxxxxxx>
Cc: Kees Cook <keescook@xxxxxxxxxxxx>
Cc: Christian Brauner <brauner@xxxxxxxxxx>
Cc: Kuniyuki Iwashima <kuniyu@xxxxxxxxxx>
Cc: Lennart Poettering <mzxreary@xxxxxxxxxxx>
Cc: linux-kernel@xxxxxxxxxxxxxxx
Cc: netdev@xxxxxxxxxxxxxxx
Cc: linux-arch@xxxxxxxxxxxxxxx
Suggested-by: Daniel Borkmann <daniel@xxxxxxxxxxxxx>
Suggested-by: Christian Brauner <brauner@xxxxxxxxxx>
Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@xxxxxxxxxxxxx>
---
net/socket.c | 38 +++++++++++++++++++++++++++++++++++---
1 file changed, 35 insertions(+), 3 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index 73e493da4589..9c1ef11de23f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -108,6 +108,8 @@
#include <linux/ptp_clock_kernel.h>
#include <trace/events/sock.h>

+#include <linux/sctp.h>
+
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int sysctl_net_busy_read __read_mostly;
unsigned int sysctl_net_busy_poll __read_mostly;
@@ -2227,6 +2229,36 @@ static bool sock_use_custom_sol_socket(const struct socket *sock)
return test_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags);
}

+#ifdef CONFIG_CGROUP_BPF
+static bool sockopt_installs_fd(int level, int optname)
+{
+ /*
+ * These options do fd_install(), and if BPF_CGROUP_RUN_PROG_GETSOCKOPT
+ * hook returns an error after success of the original handler
+ * sctp_getsockopt(...), userspace will receive an error from getsockopt
+ * syscall and will be not aware that fd was successfully installed into fdtable.
+ *
+ * Let's prevent bpf cgroup hook from running on them.
+ */
+ if (level == SOL_SCTP) {
+ switch (optname) {
+ case SCTP_SOCKOPT_PEELOFF:
+ case SCTP_SOCKOPT_PEELOFF_FLAGS:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ return false;
+}
+#else /* CONFIG_CGROUP_BPF */
+static inline bool sockopt_installs_fd(int level, int optname)
+{
+ return false;
+}
+#endif /* CONFIG_CGROUP_BPF */
+
/*
* Set a socket option. Because we don't know the option lengths we have
* to pass the user mode parameter for the protocols to sort out.
@@ -2250,7 +2282,7 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
if (err)
goto out_put;

- if (!in_compat_syscall())
+ if (!in_compat_syscall() && !sockopt_installs_fd(level, optname))
err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname,
user_optval, &optlen,
&kernel_optval);
@@ -2304,7 +2336,7 @@ int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
if (err)
goto out_put;

- if (!in_compat_syscall())
+ if (!in_compat_syscall() && !sockopt_installs_fd(level, optname))
max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);

if (level == SOL_SOCKET)
@@ -2315,7 +2347,7 @@ int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
err = sock->ops->getsockopt(sock, level, optname, optval,
optlen);

- if (!in_compat_syscall())
+ if (!in_compat_syscall() && !sockopt_installs_fd(level, optname))
err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
optval, optlen, max_optlen,
err);
--
2.34.1