Re: [PATCH net v2] udp: ipv4: manipulate network header of NATed UDP GRO fraglist

From: Alexander Lobakin
Date: Fri Jan 15 2021 - 12:13:56 EST


From: Dongseok Yi <dseok.yi@xxxxxxxxxxx>
Date: Fri, 15 Jan 2021 22:20:35 +0900

> UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
> forwarding. Only the header of head_skb from ip_finish_output_gso ->
> skb_gso_segment is updated but following frag_skbs are not updated.
>
> A call path skb_mac_gso_segment -> inet_gso_segment ->
> udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
> does not try to update UDP/IP header of the segment list but copy
> only the MAC header.
>
> Update dport, daddr and checksums of each skb of the segment list
> in __udp_gso_segment_list. It covers both SNAT and DNAT.
>
> Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
> Signed-off-by: Dongseok Yi <dseok.yi@xxxxxxxxxxx>
> ---
> v1:
> Steffen Klassert said, there could be 2 options.
> https://lore.kernel.org/patchwork/patch/1362257/
> I was trying to write a quick fix, but it was not easy to forward
> segmented list. Currently, assuming DNAT only.
>
> v2:
> Per Steffen Klassert request, move the procedure from
> udp4_ufo_fragment to __udp_gso_segment_list and support SNAT.
>
> To Alexander Lobakin, I've checked your email late. Just use this
> patch as a reference. It support SNAT too, but does not support IPv6
> yet. I cannot make IPv6 header changes in __udp_gso_segment_list due
> to the file is in IPv4 directory.

I used another approach, tried to make fraglist GRO closer to plain
in terms of checksummming, as it is confusing to me why GSO packet
should have CHECKSUM_UNNECESSARY. Just let Netfilter do its mangling,
and then use classic UDP GSO magic at the end of segmentation.
I also see the idea of explicit comparing and editing of IP and UDP
headers right in __udp_gso_segment_list() rather unacceptable.

Dongseok, Steffen, please test this WIP diff and tell if this one
works for you, so I could clean up the code and make a patch.
For me, it works now in any configurations, with and without
checksum/GSO/fraglist offload.

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c1a6f262636a..646a42e88e83 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3674,6 +3674,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
unsigned int offset)
{
struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
+ unsigned int doffset = skb->data - skb_mac_header(skb);
unsigned int tnl_hlen = skb_tnl_header_len(skb);
unsigned int delta_truesize = 0;
unsigned int delta_len = 0;
@@ -3681,7 +3682,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
struct sk_buff *nskb, *tmp;
int err;

- skb_push(skb, -skb_network_offset(skb) + offset);
+ skb_push(skb, doffset);

skb_shinfo(skb)->frag_list = NULL;

@@ -3716,12 +3717,11 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
delta_len += nskb->len;
delta_truesize += nskb->truesize;

- skb_push(nskb, -skb_network_offset(nskb) + offset);
+ skb_push(nskb, skb_headroom(nskb) - skb_headroom(skb));

skb_release_head_state(nskb);
- __copy_skb_header(nskb, skb);
+ __copy_skb_header(nskb, skb);

- skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb));
skb_copy_from_linear_data_offset(skb, -tnl_hlen,
nskb->data - tnl_hlen,
offset + tnl_hlen);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index ff39e94781bf..61665fcd8c85 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -190,13 +190,58 @@ EXPORT_SYMBOL(skb_udp_tunnel_segment);
static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
netdev_features_t features)
{
- unsigned int mss = skb_shinfo(skb)->gso_size;
+ struct sk_buff *seg;
+ struct udphdr *uh;
+ unsigned int mss;
+ __be16 newlen;
+ __sum16 check;
+
+ mss = skb_shinfo(skb)->gso_size;
+ if (skb->len <= sizeof(*uh) + mss)
+ return ERR_PTR(-EINVAL);

- skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
+ skb_pull(skb, sizeof(*uh));
+
+ skb = skb_segment_list(skb, features, skb->data - skb_mac_header(skb));
if (IS_ERR(skb))
return skb;

- udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss);
+ seg = skb;
+ uh = udp_hdr(seg);
+
+ /* compute checksum adjustment based on old length versus new */
+ newlen = htons(sizeof(*uh) + mss);
+ check = csum16_add(csum16_sub(uh->check, uh->len), newlen);
+
+ for (;;) {
+ if (!seg->next)
+ break;
+
+ uh->len = newlen;
+ uh->check = check;
+
+ if (seg->ip_summed == CHECKSUM_PARTIAL)
+ gso_reset_checksum(seg, ~check);
+ else
+ uh->check = gso_make_checksum(seg, ~check) ? :
+ CSUM_MANGLED_0;
+
+ seg = seg->next;
+ uh = udp_hdr(seg);
+ }
+
+ /* last packet can be partial gso_size, account for that in checksum */
+ newlen = htons(skb_tail_pointer(seg) - skb_transport_header(seg) +
+ seg->data_len);
+ check = csum16_add(csum16_sub(uh->check, uh->len), newlen);
+
+ uh->len = newlen;
+ uh->check = check;
+
+ if (seg->ip_summed == CHECKSUM_PARTIAL)
+ gso_reset_checksum(seg, ~check);
+ else
+ uh->check = gso_make_checksum(seg, ~check) ? : CSUM_MANGLED_0;

return skb;
}
@@ -602,27 +647,13 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
const struct iphdr *iph = ip_hdr(skb);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);

- if (NAPI_GRO_CB(skb)->is_flist) {
- uh->len = htons(skb->len - nhoff);
-
- skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
- skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
-
- if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
- if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
- skb->csum_level++;
- } else {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- skb->csum_level = 0;
- }
-
- return 0;
- }
-
if (uh->check)
uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
iph->daddr, 0);

+ if (NAPI_GRO_CB(skb)->is_flist)
+ skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST;
+
return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb);
}