[PATCH 4.2.y-ckt 265/305] xfrm: add rcu protection to sk->sk_policy[]

From: Kamal Mostafa
Date: Fri Jan 15 2016 - 19:20:22 EST


4.2.8-ckt2 -stable review patch. If anyone has any objections, please let me know.

---8<------------------------------------------------------------

From: Eric Dumazet <edumazet@xxxxxxxxxx>

commit d188ba86dd07a72ebebfa22fe9cb0b0572e57740 upstream.

XFRM can deal with SYNACK messages, sent while listener socket
is not locked. We add proper rcu protection to __xfrm_sk_clone_policy()
and xfrm_sk_policy_lookup()

This might serve as the first step to remove xfrm.xfrm_policy_lock
use in fast path.

Fixes: fa76ce7328b2 ("inet: get rid of central tcp/dccp listener timer")
Signed-off-by: Eric Dumazet <edumazet@xxxxxxxxxx>
Acked-by: Steffen Klassert <steffen.klassert@xxxxxxxxxxx>
Signed-off-by: David S. Miller <davem@xxxxxxxxxxxxx>
Signed-off-by: Kamal Mostafa <kamal@xxxxxxxxxxxxx>
---
include/net/sock.h | 2 +-
include/net/xfrm.h | 24 +++++++++++++++---------
net/core/sock.c | 2 +-
net/xfrm/xfrm_policy.c | 37 +++++++++++++++++++++++++------------
4 files changed, 42 insertions(+), 23 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 208c874..8de0690 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -371,7 +371,7 @@ struct sock {
struct socket_wq __rcu *sk_wq;

#ifdef CONFIG_XFRM
- struct xfrm_policy *sk_policy[2];
+ struct xfrm_policy __rcu *sk_policy[2];
#endif
unsigned long sk_flags;
struct dst_entry *sk_rx_dst;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index f0ee97e..02a1d20 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1140,12 +1140,14 @@ static inline int xfrm6_route_forward(struct sk_buff *skb)
return xfrm_route_forward(skb, AF_INET6);
}

-int __xfrm_sk_clone_policy(struct sock *sk);
+int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk);

-static inline int xfrm_sk_clone_policy(struct sock *sk)
+static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
{
- if (unlikely(sk->sk_policy[0] || sk->sk_policy[1]))
- return __xfrm_sk_clone_policy(sk);
+ sk->sk_policy[0] = NULL;
+ sk->sk_policy[1] = NULL;
+ if (unlikely(osk->sk_policy[0] || osk->sk_policy[1]))
+ return __xfrm_sk_clone_policy(sk, osk);
return 0;
}

@@ -1153,12 +1155,16 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir);

static inline void xfrm_sk_free_policy(struct sock *sk)
{
- if (unlikely(sk->sk_policy[0] != NULL)) {
- xfrm_policy_delete(sk->sk_policy[0], XFRM_POLICY_MAX);
+ struct xfrm_policy *pol;
+
+ pol = rcu_dereference_protected(sk->sk_policy[0], 1);
+ if (unlikely(pol != NULL)) {
+ xfrm_policy_delete(pol, XFRM_POLICY_MAX);
sk->sk_policy[0] = NULL;
}
- if (unlikely(sk->sk_policy[1] != NULL)) {
- xfrm_policy_delete(sk->sk_policy[1], XFRM_POLICY_MAX+1);
+ pol = rcu_dereference_protected(sk->sk_policy[1], 1);
+ if (unlikely(pol != NULL)) {
+ xfrm_policy_delete(pol, XFRM_POLICY_MAX+1);
sk->sk_policy[1] = NULL;
}
}
@@ -1168,7 +1174,7 @@ void xfrm_garbage_collect(struct net *net);
#else

static inline void xfrm_sk_free_policy(struct sock *sk) {}
-static inline int xfrm_sk_clone_policy(struct sock *sk) { return 0; }
+static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { return 0; }
static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }
static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; }
static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
diff --git a/net/core/sock.c b/net/core/sock.c
index 623224a..3148f24 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1536,7 +1536,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
*/
is_charged = sk_filter_charge(newsk, filter);

- if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk))) {
+ if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
/* It is still raw copy of parent, so invalidate
* destructor and make plain sk_free() */
newsk->sk_destruct = NULL;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 18cead7..2718184 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1212,8 +1212,10 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir,
struct xfrm_policy *pol;
struct net *net = sock_net(sk);

+ rcu_read_lock();
read_lock_bh(&net->xfrm.xfrm_policy_lock);
- if ((pol = sk->sk_policy[dir]) != NULL) {
+ pol = rcu_dereference(sk->sk_policy[dir]);
+ if (pol != NULL) {
bool match = xfrm_selector_match(&pol->selector, fl,
sk->sk_family);
int err = 0;
@@ -1237,6 +1239,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir,
}
out:
read_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ rcu_read_unlock();
return pol;
}

@@ -1305,13 +1308,14 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
#endif

write_lock_bh(&net->xfrm.xfrm_policy_lock);
- old_pol = sk->sk_policy[dir];
- sk->sk_policy[dir] = pol;
+ old_pol = rcu_dereference_protected(sk->sk_policy[dir],
+ lockdep_is_held(&net->xfrm.xfrm_policy_lock));
if (pol) {
pol->curlft.add_time = get_seconds();
pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0);
xfrm_sk_policy_link(pol, dir);
}
+ rcu_assign_pointer(sk->sk_policy[dir], pol);
if (old_pol) {
if (pol)
xfrm_policy_requeue(old_pol, pol);
@@ -1359,17 +1363,26 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
return newp;
}

-int __xfrm_sk_clone_policy(struct sock *sk)
+int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
{
- struct xfrm_policy *p0 = sk->sk_policy[0],
- *p1 = sk->sk_policy[1];
+ const struct xfrm_policy *p;
+ struct xfrm_policy *np;
+ int i, ret = 0;

- sk->sk_policy[0] = sk->sk_policy[1] = NULL;
- if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
- return -ENOMEM;
- if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
- return -ENOMEM;
- return 0;
+ rcu_read_lock();
+ for (i = 0; i < 2; i++) {
+ p = rcu_dereference(osk->sk_policy[i]);
+ if (p) {
+ np = clone_policy(p, i);
+ if (unlikely(!np)) {
+ ret = -ENOMEM;
+ break;
+ }
+ rcu_assign_pointer(sk->sk_policy[i], np);
+ }
+ }
+ rcu_read_unlock();
+ return ret;
}

static int
--
1.9.1