[PATCH net] ipv4, ipv6: Fix handling of transhdrlen in __ip{,6}_append_data()

From: David Howells
Date: Tue Sep 19 2023 - 12:13:54 EST



Including the transhdrlen in length is a problem when the packet is
partially filled (e.g. something like send(MSG_MORE) happened previously)
when appending to an IPv4 or IPv6 packet as we don't want to repeat the
transport header or account for it twice. This can happen under some
circumstances, such as splicing into an L2TP socket.

The symptom observed is a warning in __ip6_append_data():

WARNING: CPU: 1 PID: 5042 at net/ipv6/ip6_output.c:1800 __ip6_append_data.isra.0+0x1be8/0x47f0 net/ipv6/ip6_output.c:1800

that occurs when MSG_SPLICE_PAGES is used to append more data to an already
partially occupied skbuff. The warning occurs when 'copy' is larger than
the amount of data in the message iterator. This is because the requested
length includes the transport header length when it shouldn't. This can be
triggered by, for example:

sfd = socket(AF_INET6, SOCK_DGRAM, IPPROTO_L2TP);
bind(sfd, ...); // ::1
connect(sfd, ...); // ::1 port 7
send(sfd, buffer, 4100, MSG_MORE);
sendfile(sfd, dfd, NULL, 1024);

Fix this by pushing the addition of transhdrlen into length down into
__ip_append_data() and __ip6_append_data(), making it conditional on the
write queue being empty (otherwise we just clear transhdrlen).

Notes:

(1) The length supplied by userspace in ping_*_sendmsg() includes the
transport header len in the size, so we need to subtract it as the
iterator has been advanced to copy out the header - however we still
have to return the fact that we use it from sys_sendmsg(), so we can't
just preemptively decrease len at the beginning of the function.

(2) Raw sockets include the transport header in the payload and pass down
a zero transhdrlen, which I just leave as is.

(3) A smaller fix could be to make ip{,6}_append_data() subtract
transhdrlen from length if it is going to set transhdrlen to 0 - but
it would seem better just not to include it in the first place.

Reported-by: syzbot+62cbf263225ae13ff153@xxxxxxxxxxxxxxxxxxxxxxxxx
Link: https://lore.kernel.org/r/0000000000001c12b30605378ce8@xxxxxxxxxx/
Signed-off-by: David Howells <dhowells@xxxxxxxxxx>
cc: Eric Dumazet <edumazet@xxxxxxxxxx>
cc: Willem de Bruijn <willemdebruijn.kernel@xxxxxxxxx>
cc: "David S. Miller" <davem@xxxxxxxxxxxxx>
cc: David Ahern <dsahern@xxxxxxxxxx>
cc: Paolo Abeni <pabeni@xxxxxxxxxx>
cc: Jakub Kicinski <kuba@xxxxxxxxxx>
cc: netdev@xxxxxxxxxxxxxxx
cc: bpf@xxxxxxxxxxxxxxx
cc: syzkaller-bugs@xxxxxxxxxxxxxxxx
---
net/ipv4/icmp.c | 3 +--
net/ipv4/ip_output.c | 7 +++++--
net/ipv4/ping.c | 3 ++-
net/ipv4/udp.c | 8 +++-----
net/ipv6/icmp.c | 6 ++----
net/ipv6/ip6_output.c | 16 +++++++++-------
net/ipv6/ping.c | 3 ++-
net/ipv6/udp.c | 8 +++-----
net/l2tp/l2tp_ip6.c | 4 +---
9 files changed, 28 insertions(+), 30 deletions(-)

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index b8607763d113..e1140597e971 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -368,8 +368,7 @@ static void icmp_push_reply(struct sock *sk,
struct sk_buff *skb;

if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param,
- icmp_param->data_len+icmp_param->head_len,
- icmp_param->head_len,
+ icmp_param->data_len, icmp_param->head_len,
ipc, rt, MSG_DONTWAIT) < 0) {
__ICMP_INC_STATS(sock_net(sk), ICMP_MIB_OUTERRORS);
ip_flush_pending_frames(sk);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 4ab877cf6d35..80735bb34a73 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -974,6 +974,11 @@ static int __ip_append_data(struct sock *sk,
bool paged, extra_uref = false;
u32 tskey = 0;

+ if (skb_queue_empty(&sk->sk_write_queue))
+ length += transhdrlen;
+ else
+ transhdrlen = 0;
+
skb = skb_peek_tail(queue);

exthdrlen = !skb ? rt->dst.header_len : 0;
@@ -1353,8 +1358,6 @@ int ip_append_data(struct sock *sk, struct flowi4 *fl4,
err = ip_setup_cork(sk, &inet->cork.base, ipc, rtp);
if (err)
return err;
- } else {
- transhdrlen = 0;
}

return __ip_append_data(sk, fl4, &sk->sk_write_queue, &inet->cork.base,
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 75e0aee35eb7..237f25a015f7 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -819,7 +819,8 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
pfh.wcheck = 0;
pfh.family = AF_INET;

- err = ip_append_data(sk, &fl4, ping_getfrag, &pfh, len,
+ err = ip_append_data(sk, &fl4, ping_getfrag, &pfh,
+ len - sizeof(struct icmphdr),
sizeof(struct icmphdr), &ipc, &rt,
msg->msg_flags);
if (err)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f39b9c844580..b16da49a8478 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1042,7 +1042,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name);
struct flowi4 fl4_stack;
struct flowi4 *fl4;
- int ulen = len;
struct ipcm_cookie ipc;
struct rtable *rt = NULL;
int free = 0;
@@ -1084,7 +1083,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
release_sock(sk);
}
- ulen += sizeof(struct udphdr);

/*
* Get and verify the address.
@@ -1238,7 +1236,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!corkreq) {
struct inet_cork cork;

- skb = ip_make_skb(sk, fl4, getfrag, msg, ulen,
+ skb = ip_make_skb(sk, fl4, getfrag, msg, len,
sizeof(struct udphdr), &ipc, &rt,
&cork, msg->msg_flags);
err = PTR_ERR(skb);
@@ -1268,8 +1266,8 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
up->pending = AF_INET;

do_append_data:
- up->len += ulen;
- err = ip_append_data(sk, fl4, getfrag, msg, ulen,
+ up->len += sizeof(struct udphdr) + len;
+ err = ip_append_data(sk, fl4, getfrag, msg, len,
sizeof(struct udphdr), &ipc, &rt,
corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
if (err)
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 93a594a901d1..55ae1966fa75 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -614,8 +614,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
idev = __in6_dev_get(skb->dev);

if (ip6_append_data(sk, icmpv6_getfrag, &msg,
- len + sizeof(struct icmp6hdr),
- sizeof(struct icmp6hdr),
+ len, sizeof(struct icmp6hdr),
&ipc6, &fl6, (struct rt6_info *)dst,
MSG_DONTWAIT)) {
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
@@ -801,8 +800,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
goto out_dst_release;

if (ip6_append_data(sk, icmpv6_getfrag, &msg,
- skb->len + sizeof(struct icmp6hdr),
- sizeof(struct icmp6hdr), &ipc6, &fl6,
+ skb->len, sizeof(struct icmp6hdr), &ipc6, &fl6,
(struct rt6_info *)dst, MSG_DONTWAIT)) {
__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 54fc4c711f2c..21bae77489c9 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1490,6 +1490,11 @@ static int __ip6_append_data(struct sock *sk,
unsigned int wmem_alloc_delta = 0;
bool paged, extra_uref = false;

+ if (skb_queue_empty(&sk->sk_write_queue))
+ length += transhdrlen;
+ else
+ transhdrlen = 0;
+
skb = skb_peek_tail(queue);
if (!skb) {
exthdrlen = opt ? opt->opt_flen : 0;
@@ -1868,7 +1873,6 @@ int ip6_append_data(struct sock *sk,
{
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
- int exthdrlen;
int err;

if (flags&MSG_PROBE)
@@ -1884,13 +1888,11 @@ int ip6_append_data(struct sock *sk,
return err;

inet->cork.fl.u.ip6 = *fl6;
- exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
- length += exthdrlen;
- transhdrlen += exthdrlen;
- } else {
- transhdrlen = 0;
}

+ /* Add space for extensions */
+ transhdrlen += (ipc6->opt ? ipc6->opt->opt_flen : 0);
+
return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork,
&np->cork, sk_page_frag(sk), getfrag,
from, length, transhdrlen, flags, ipc6);
@@ -2095,7 +2097,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,

err = __ip6_append_data(sk, &queue, cork, &v6_cork,
&current->task_frag, getfrag, from,
- length + exthdrlen, transhdrlen + exthdrlen,
+ length, transhdrlen + exthdrlen,
flags, ipc6);
if (err) {
__ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 5831aaa53d75..fa2701241724 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -174,7 +174,8 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);

lock_sock(sk);
- err = ip6_append_data(sk, ping_getfrag, &pfh, len,
+ err = ip6_append_data(sk, ping_getfrag, &pfh,
+ len - sizeof(struct icmp6hdr),
sizeof(struct icmp6hdr), &ipc6, &fl6, rt,
MSG_DONTWAIT);

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 86b5d509a468..9dd71f542c9f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1331,7 +1331,6 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct ipcm6_cookie ipc6;
int addr_len = msg->msg_namelen;
bool connected = false;
- int ulen = len;
int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE;
int err;
int is_udplite = IS_UDPLITE(sk);
@@ -1416,7 +1415,6 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
release_sock(sk);
}
- ulen += sizeof(struct udphdr);

memset(fl6, 0, sizeof(*fl6));

@@ -1567,7 +1565,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!corkreq) {
struct sk_buff *skb;

- skb = ip6_make_skb(sk, getfrag, msg, ulen,
+ skb = ip6_make_skb(sk, getfrag, msg, len,
sizeof(struct udphdr), &ipc6,
(struct rt6_info *)dst,
msg->msg_flags, &cork);
@@ -1594,8 +1592,8 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
do_append_data:
if (ipc6.dontfrag < 0)
ipc6.dontfrag = np->dontfrag;
- up->len += ulen;
- err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr),
+ up->len += sizeof(struct udphdr) + len;
+ err = ip6_append_data(sk, getfrag, msg, len, sizeof(struct udphdr),
&ipc6, fl6, (struct rt6_info *)dst,
corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
if (err)
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index ed8ebb6f5909..d5d1c3b68200 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -499,7 +499,6 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct ipcm6_cookie ipc6;
int addr_len = msg->msg_namelen;
int transhdrlen = 4; /* zero session-id */
- int ulen;
int err;

/* Rough check on arithmetic overflow,
@@ -507,7 +506,6 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
*/
if (len > INT_MAX - transhdrlen)
return -EMSGSIZE;
- ulen = len + transhdrlen;

/* Mirror BSD error message compatibility */
if (msg->msg_flags & MSG_OOB)
@@ -629,7 +627,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
back_from_confirm:
lock_sock(sk);
err = ip6_append_data(sk, ip_generic_getfrag, msg,
- ulen, transhdrlen, &ipc6,
+ len, transhdrlen, &ipc6,
&fl6, (struct rt6_info *)dst,
msg->msg_flags);
if (err)