[PATCH v1] tcp: enhancing timestamps random algo to address issues arising from NAT mapping

From: George Guo
Date: Sun Sep 17 2023 - 21:47:52 EST


Tsval=tsoffset+local_clock, here tsoffset is randomized with saddr and daddr parameters in func
secure_tcp_ts_off. Most of time it is OK except for NAT mapping to the same port and daddr.
Consider the following scenario:
ns1: ns2:
+-----------+ +-----------+
| | | |
| | | |
| | | |
| veth1 | | vethb |
|192.168.1.1| |192.168.1.2|
+----+------+ +-----+-----+
| |
| |
| br0:192.168.1.254 |
+----------+----------+
veth0 | vetha
192.168.1.3 | 192.168.1.4
|
nat(192.168.1.x -->172.30.60.199)
|
V
eth0
172.30.60.199
|
|
+----> ... ... ---->server: 172.30.60.191

Let's say ns1 (192.168.1.1) generates a timestamp ts1, and ns2 (192.168.1.2) generates a timestamp
ts2, with ts1 > ts2.

If ns1 initiates a connection to a server, and then the server actively closes the connection,
entering the TIME_WAIT state, and ns2 attempts to connect to the server while port reuse is in
progress, due to the presence of NAT, the server sees both connections as originating from the
same IP address (e.g., 172.30.60.199) and port. However, since ts2 is smaller than ts1, the server
will respond with the acknowledgment (ACK) for the fourth handshake.

SERVER CLIENT

1. ESTABLISHED ESTABLISHED

(Close)
2. FIN-WAIT-1 --> <SEQ=100><ACK=300><TSval=20><CTL=FIN,ACK> --> CLOSE-WAIT

3. FIN-WAIT-2 <-- <SEQ=300><ACK=101><TSval=40><CTL=ACK> <-- CLOSE-WAIT

(Close)
4. TIME-WAIT <-- <SEQ=300><ACK=101><TSval=41><CTL=FIN,ACK> <-- LAST-ACK

5. TIME-WAIT --> <SEQ=101><ACK=301><TSval=25><CTL=ACK> --> CLOSED

- - - - - - - - - - - - - port reused - - - - - - - - - - - - - - -

5.1. TIME-WAIT <-- <SEQ=255><TSval=30><CTL=SYN> <-- SYN-SENT

5.2. TIME-WAIT --> <SEQ=101><ACK=301><TSval=35><CTL=ACK> --> SYN-SENT

5.3. CLOSED <-- <SEQ=301><CTL=RST> <-- SYN-SENT

6. SYN-RECV <-- <SEQ=255><TSval=34><CTL=SYN> <-- SYN-SENT

7. SYN-RECV --> <SEQ=400><ACK=301><TSval=40><CTL=SYN,ACK> --> ESTABLISHED

1. ESTABLISH <-- <SEQ=301><ACK=401><TSval=55><CTL=ACK> <-- ESTABLISHED

This enhancement uses sport and daddr rather than saddr and daddr, which keep the timestamp
monotonically increasing in the situation described above. Then the port reuse is like this:

SERVER CLIENT

1. ESTABLISHED ESTABLISHED

(Close)
2. FIN-WAIT-1 --> <SEQ=100><ACK=300><TSval=20><CTL=FIN,ACK> --> CLOSE-WAIT

3. FIN-WAIT-2 <-- <SEQ=300><ACK=101><TSval=40><CTL=ACK> <-- CLOSE-WAIT

(Close)
4. TIME-WAIT <-- <SEQ=300><ACK=101><TSval=41><CTL=FIN,ACK> <-- LAST-ACK

5. TIME-WAIT --> <SEQ=101><ACK=301><TSval=25><CTL=ACK> --> CLOSED

- - - - - - - - - - - - - port reused - - - - - - - - - - - - - - -

5.1. TIME-WAIT <-- <SEQ=300><TSval=50><CTL=SYN> <-- SYN-SENT

6. SYN-RECV --> <SEQ=400><ACK=301><TSval=40><CTL=SYN,ACK> --> ESTABLISHED

1. ESTABLISH <-- <SEQ=301><ACK=401><TSval=55><CTL=ACK> <-- ESTABLISHED

The enhancement lets port reused more efficiently.

Signed-off-by: George Guo <guodongtai@xxxxxxxxxx>
---
include/net/secure_seq.h | 2 +-
net/core/secure_seq.c | 4 ++--
net/ipv4/syncookies.c | 4 ++--
net/ipv4/tcp_ipv4.c | 6 ++++--
4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
index 21e7fa2a1813..40fb53520aa4 100644
--- a/include/net/secure_seq.h
+++ b/include/net/secure_seq.h
@@ -11,7 +11,7 @@ u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
__be16 dport);
u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
__be16 sport, __be16 dport);
-u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr);
+u32 secure_tcp_ts_off(const struct net *net, __be16 sport, __be32 daddr);
u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
__be16 sport, __be16 dport);
u32 secure_tcpv6_ts_off(const struct net *net,
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index b0ff6153be62..575b6afe39a4 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -118,13 +118,13 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
#endif

#ifdef CONFIG_INET
-u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr)
+u32 secure_tcp_ts_off(const struct net *net, __be16 sport, __be32 daddr)
{
if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
return 0;

ts_secret_init();
- return siphash_2u32((__force u32)saddr, (__force u32)daddr,
+ return siphash_2u32((__force u32)sport, (__force u32)daddr,
&ts_secret);
}

diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index dc478a0574cb..df1757ff5956 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -360,8 +360,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)

if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
tsoff = secure_tcp_ts_off(sock_net(sk),
- ip_hdr(skb)->daddr,
- ip_hdr(skb)->saddr);
+ th->source,
+ ip_hdr(skb)->daddr);
tcp_opt.rcv_tsecr -= tsoff;
}

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 27140e5cdc06..acad4b14ecf7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -104,7 +104,9 @@ static u32 tcp_v4_init_seq(const struct sk_buff *skb)

static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
{
- return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
+ const struct tcphdr *th = tcp_hdr(skb);
+
+ return secure_tcp_ts_off(net, th->source, ip_hdr(skb)->daddr);
}

int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
@@ -309,7 +311,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_sport,
usin->sin_port));
WRITE_ONCE(tp->tsoffset,
- secure_tcp_ts_off(net, inet->inet_saddr,
+ secure_tcp_ts_off(net, inet->inet_sport,
inet->inet_daddr));
}

--
2.34.1