Re: [PATCH] net: Fix suspicious RCU usage in bpf_sk_reuseport_detach()

From: Hawkins Jiawei
Date: Tue Aug 16 2022 - 07:07:15 EST


On Tue, 16 Aug 2022 at 17:34, David Howells <dhowells@xxxxxxxxxx> wrote:
>
> Fix this by adding a new helper, __locked_read_sk_user_data_with_flags()
> that checks to see if sk->sk_callback_lock() is held and use that here
> instead.
Hi, I wonder if we make this more geniric, for I think maybe the future
code who use __rcu_dereference_sk_user_data_with_flags() may
also meet this bug.

To be more specific, maybe we can refactor
__rcu_dereference_sk_user_data_with_flags() to
__rcu_dereference_sk_user_data_with_flags_check(), like
rcu_dereference() and rcu_dereference_check(). Maybe:

diff --git a/include/net/sock.h b/include/net/sock.h
index 05a1bbdf5805..cf123954eab9 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -578,18 +578,27 @@ static inline bool sk_user_data_is_nocopy(const struct sock *sk)
#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))

/**
- * __rcu_dereference_sk_user_data_with_flags - return the pointer
- * only if argument flags all has been set in sk_user_data. Otherwise
- * return NULL
+ * __rcu_dereference_sk_user_data_with_flags_check - return the pointer
+ * only if argument flags all has been set in sk_user_data, with debug
+ * checking. Otherwise return NULL
*
- * @sk: socket
- * @flags: flag bits
+ * Do __rcu_dereference_sk_user_data_with_flags(), but check that the
+ * conditions under which the rcu dereference will take place are correct,
+ * which is a bit like rcu_dereference_check() and rcu_derefence().
+ *
+ * @sk : socket
+ * @flags : flag bits
+ * @condition : the conditions under which the rcu dereference will
+ * take place
*/
static inline void *
-__rcu_dereference_sk_user_data_with_flags(const struct sock *sk,
- uintptr_t flags)
+__rcu_dereference_sk_user_data_with_flags_check(const struct sock *sk,
+ uintptr_t flags, bool condition)
{
- uintptr_t sk_user_data = (uintptr_t)rcu_dereference(__sk_user_data(sk));
+ uintptr_t sk_user_data;
+
+ sk_user_data = (uintptr_t)rcu_dereference_check(__sk_user_data(sk),
+ condition);

WARN_ON_ONCE(flags & SK_USER_DATA_PTRMASK);

@@ -598,6 +607,8 @@ __rcu_dereference_sk_user_data_with_flags(const struct sock *sk,
return NULL;
}

+#define __rcu_dereference_sk_user_data_with_flags(sk, flags) \
+ __rcu_dereference_sk_user_data_with_flags_check(sk, flags, 0)
#define rcu_dereference_sk_user_data(sk) \
__rcu_dereference_sk_user_data_with_flags(sk, 0)
#define __rcu_assign_sk_user_data_with_flags(sk, ptr, flags) \

> +/**
> + * __locked_read_sk_user_data_with_flags - return the pointer
> + * only if argument flags all has been set in sk_user_data. Otherwise
> + * return NULL
> + *
> + (uintptr_t)rcu_dereference_check(__sk_user_data(sk),
> + lockdep_is_held(&sk->sk_callback_lock));

> diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c
> index 85fa9dbfa8bf..82c61612f382 100644
> --- a/kernel/bpf/reuseport_array.c
> +++ b/kernel/bpf/reuseport_array.c
> @@ -24,7 +24,7 @@ void bpf_sk_reuseport_detach(struct sock *sk)
> struct sock __rcu **socks;
>
> write_lock_bh(&sk->sk_callback_lock);
> - socks = __rcu_dereference_sk_user_data_with_flags(sk, SK_USER_DATA_BPF);
> + socks = __locked_read_sk_user_data_with_flags(sk, SK_USER_DATA_BPF);
> if (socks) {
> WRITE_ONCE(sk->sk_user_data, NULL);
> /*
Then, as you point out, we can pass
condition(lockdep_is_held(&sk->sk_callback_lock)) to
__rcu_dereference_sk_user_data_with_flags_check() in order to
make compiler happy as below:

diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c
index 85fa9dbfa8bf..a772610987c5 100644
--- a/kernel/bpf/reuseport_array.c
+++ b/kernel/bpf/reuseport_array.c
@@ -24,7 +24,10 @@ void bpf_sk_reuseport_detach(struct sock *sk)
struct sock __rcu **socks;

write_lock_bh(&sk->sk_callback_lock);
- socks = __rcu_dereference_sk_user_data_with_flags(sk, SK_USER_DATA_BPF);
+ socks = __rcu_dereference_sk_user_data_with_flags_check(
+ sk, SK_USER_DATA_BPF,
+ lockdep_is_held(&sk->sk_callback_lock));
+
if (socks) {
WRITE_ONCE(sk->sk_user_data, NULL);
/*