Re: [PATCH v9 net-next 16/23] net/tcp: Ignore specific ICMPs for TCP-AO connections

From: Eric Dumazet
Date: Tue Aug 08 2023 - 11:45:01 EST


On Wed, Aug 2, 2023 at 7:27 PM Dmitry Safonov <dima@xxxxxxxxxx> wrote:
>
> Similarly to IPsec, RFC5925 prescribes:
> ">> A TCP-AO implementation MUST default to ignore incoming ICMPv4
> messages of Type 3 (destination unreachable), Codes 2-4 (protocol
> unreachable, port unreachable, and fragmentation needed -- ’hard
> errors’), and ICMPv6 Type 1 (destination unreachable), Code 1
> (administratively prohibited) and Code 4 (port unreachable) intended
> for connections in synchronized states (ESTABLISHED, FIN-WAIT-1, FIN-
> WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT) that match MKTs."
>
> A selftest (later in patch series) verifies that this attack is not
> possible in this TCP-AO implementation.
>
> Co-developed-by: Francesco Ruggeri <fruggeri@xxxxxxxxxx>
> Signed-off-by: Francesco Ruggeri <fruggeri@xxxxxxxxxx>
> Co-developed-by: Salam Noureddine <noureddine@xxxxxxxxxx>
> Signed-off-by: Salam Noureddine <noureddine@xxxxxxxxxx>
> Signed-off-by: Dmitry Safonov <dima@xxxxxxxxxx>
> Acked-by: David Ahern <dsahern@xxxxxxxxxx>
> ---
> include/net/tcp_ao.h | 10 ++++++-
> include/uapi/linux/snmp.h | 1 +
> include/uapi/linux/tcp.h | 4 ++-
> net/ipv4/proc.c | 1 +
> net/ipv4/tcp_ao.c | 61 +++++++++++++++++++++++++++++++++++++++
> net/ipv4/tcp_ipv4.c | 5 ++++
> net/ipv6/tcp_ipv6.c | 4 +++
> 7 files changed, 84 insertions(+), 2 deletions(-)
>
> diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h
> index 986e8dcbb150..94fde002b638 100644
> --- a/include/net/tcp_ao.h
> +++ b/include/net/tcp_ao.h
> @@ -24,6 +24,7 @@ struct tcp_ao_counters {
> atomic64_t pkt_bad;
> atomic64_t key_not_found;
> atomic64_t ao_required;
> + atomic64_t dropped_icmp;
> };
>
> struct tcp_ao_key {
> @@ -92,7 +93,8 @@ struct tcp_ao_info {
> struct tcp_ao_key *rnext_key;
> struct tcp_ao_counters counters;
> u32 ao_required :1,
> - __unused :31;
> + accept_icmps :1,
> + __unused :30;
> __be32 lisn;
> __be32 risn;
> /* Sequence Number Extension (SNE) are upper 4 bytes for SEQ,
> @@ -189,6 +191,7 @@ int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx,
> unsigned int len, struct tcp_sigpool *hp);
> void tcp_ao_destroy_sock(struct sock *sk, bool twsk);
> void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp);
> +bool tcp_ao_ignore_icmp(struct sock *sk, int type, int code);
> enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk,
> const struct sk_buff *skb, unsigned short int family,
> const struct request_sock *req,
> @@ -264,6 +267,11 @@ static inline void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb,
> {
> }
>
> +static inline bool tcp_ao_ignore_icmp(struct sock *sk, int type, int code)
> +{
> + return false;
> +}
> +
> static inline enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk,
> const struct sk_buff *skb, unsigned short int family,
> const struct request_sock *req, const struct tcp_ao_hdr *aoh)
> diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
> index 06ddf4cd295c..47a6b47da66f 100644
> --- a/include/uapi/linux/snmp.h
> +++ b/include/uapi/linux/snmp.h
> @@ -300,6 +300,7 @@ enum
> LINUX_MIB_TCPAOBAD, /* TCPAOBad */
> LINUX_MIB_TCPAOKEYNOTFOUND, /* TCPAOKeyNotFound */
> LINUX_MIB_TCPAOGOOD, /* TCPAOGood */
> + LINUX_MIB_TCPAODROPPEDICMPS, /* TCPAODroppedIcmps */
> __LINUX_MIB_MAX
> };
>
> diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
> index 3fe0612ec59a..ca7ed18ce67b 100644
> --- a/include/uapi/linux/tcp.h
> +++ b/include/uapi/linux/tcp.h
> @@ -392,7 +392,8 @@ struct tcp_ao_info_opt { /* setsockopt(TCP_AO_INFO) */
> set_rnext :1, /* corresponding ::rnext */
> ao_required :1, /* don't accept non-AO connects */
> set_counters :1, /* set/clear ::pkt_* counters */
> - reserved :28; /* must be 0 */
> + accept_icmps :1, /* accept incoming ICMPs */
> + reserved :27; /* must be 0 */
> __u16 reserved2; /* padding, must be 0 */
> __u8 current_key; /* KeyID to set as Current_key */
> __u8 rnext; /* KeyID to set as Rnext_key */
> @@ -400,6 +401,7 @@ struct tcp_ao_info_opt { /* setsockopt(TCP_AO_INFO) */
> __u64 pkt_bad; /* failed verification */
> __u64 pkt_key_not_found; /* could not find a key to verify */
> __u64 pkt_ao_required; /* segments missing TCP-AO sign */
> + __u64 pkt_dropped_icmp; /* ICMPs that were ignored */
> } __attribute__((aligned(8)));
>
> /* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */
> diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
> index 3f643cd29cfe..5d3c9c96773e 100644
> --- a/net/ipv4/proc.c
> +++ b/net/ipv4/proc.c
> @@ -302,6 +302,7 @@ static const struct snmp_mib snmp4_net_list[] = {
> SNMP_MIB_ITEM("TCPAOBad", LINUX_MIB_TCPAOBAD),
> SNMP_MIB_ITEM("TCPAOKeyNotFound", LINUX_MIB_TCPAOKEYNOTFOUND),
> SNMP_MIB_ITEM("TCPAOGood", LINUX_MIB_TCPAOGOOD),
> + SNMP_MIB_ITEM("TCPAODroppedIcmps", LINUX_MIB_TCPAODROPPEDICMPS),
> SNMP_MIB_SENTINEL
> };
>
> diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c
> index 226dcefb426a..236c8cd1a0c7 100644
> --- a/net/ipv4/tcp_ao.c
> +++ b/net/ipv4/tcp_ao.c
> @@ -15,6 +15,7 @@
>
> #include <net/tcp.h>
> #include <net/ipv6.h>
> +#include <net/icmp.h>
>
> int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx,
> unsigned int len, struct tcp_sigpool *hp)
> @@ -44,6 +45,63 @@ int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx,
> return 1;
> }
>
> +bool tcp_ao_ignore_icmp(struct sock *sk, int type, int code)

const struct sock *sk ?

> +{
> + bool ignore_icmp = false;
> + struct tcp_ao_info *ao;
> +
> + /* RFC5925, 7.8:
> + * >> A TCP-AO implementation MUST default to ignore incoming ICMPv4
> + * messages of Type 3 (destination unreachable), Codes 2-4 (protocol
> + * unreachable, port unreachable, and fragmentation needed -- ’hard
> + * errors’), and ICMPv6 Type 1 (destination unreachable), Code 1
> + * (administratively prohibited) and Code 4 (port unreachable) intended
> + * for connections in synchronized states (ESTABLISHED, FIN-WAIT-1, FIN-
> + * WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT) that match MKTs.
> + */

I know this sounds silly, but you should read sk->sk_family once.

Or risk another KCSAN report with IPV6_ADDRFORM

if (sk->sk_family == AF_INET) {
...
} else {
/* AF_INET case */
}


> + if (sk->sk_family == AF_INET) {
> + if (type != ICMP_DEST_UNREACH)
> + return false;
> + if (code < ICMP_PROT_UNREACH || code > ICMP_FRAG_NEEDED)
> + return false;
> + } else if (sk->sk_family == AF_INET6) {
> + if (type != ICMPV6_DEST_UNREACH)
> + return false;
> + if (code != ICMPV6_ADM_PROHIBITED && code != ICMPV6_PORT_UNREACH)
> + return false;
> + } else {


No WARN_ON_ONCE(1) here please.

> + WARN_ON_ONCE(1);
> + return false;
> + }
> +
> + rcu_read_lock();
> + switch (sk->sk_state) {
> + case TCP_TIME_WAIT:
> + ao = rcu_dereference(tcp_twsk(sk)->ao_info);
> + break;
> + case TCP_SYN_SENT:
> + case TCP_SYN_RECV:
> + case TCP_LISTEN:
> + case TCP_NEW_SYN_RECV:
> + /* RFC5925 specifies to ignore ICMPs *only* on connections
> + * in synchronized states.
> + */
> + rcu_read_unlock();
> + return false;
> + default:
> + ao = rcu_dereference(tcp_sk(sk)->ao_info);
> + }
> +
> + if (ao && !ao->accept_icmps) {
> + ignore_icmp = true;
> + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAODROPPEDICMPS);
> + atomic64_inc(&ao->counters.dropped_icmp);
> + }
> + rcu_read_unlock();
> +
> + return ignore_icmp;
> +}
> +
> /* Optimized version of tcp_ao_do_lookup(): only for sockets for which
> * it's known that the keys in ao_info are matching peer's
> * family/address/VRF/etc.
> @@ -1036,6 +1094,7 @@ int tcp_ao_copy_all_matching(const struct sock *sk, struct sock *newsk,
> new_ao->lisn = htonl(tcp_rsk(req)->snt_isn);
> new_ao->risn = htonl(tcp_rsk(req)->rcv_isn);
> new_ao->ao_required = ao->ao_required;
> + new_ao->accept_icmps = ao->accept_icmps;
>
> if (family == AF_INET) {
> addr = (union tcp_ao_addr *)&newsk->sk_daddr;
> @@ -1742,9 +1801,11 @@ static int tcp_ao_info_cmd(struct sock *sk, unsigned short int family,
> atomic64_set(&ao_info->counters.pkt_bad, cmd.pkt_bad);
> atomic64_set(&ao_info->counters.key_not_found, cmd.pkt_key_not_found);
> atomic64_set(&ao_info->counters.ao_required, cmd.pkt_ao_required);
> + atomic64_set(&ao_info->counters.dropped_icmp, cmd.pkt_dropped_icmp);
> }
>
> ao_info->ao_required = cmd.ao_required;
> + ao_info->accept_icmps = cmd.accept_icmps;
> if (new_current)
> WRITE_ONCE(ao_info->current_key, new_current);
> if (new_rnext)
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index 42a3be6c25a4..ae910181693d 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -494,6 +494,8 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
> return -ENOENT;
> }
> if (sk->sk_state == TCP_TIME_WAIT) {
> + /* To increase the counter of ignored icmps for TCP-AO */
> + tcp_ao_ignore_icmp(sk, type, code);
> inet_twsk_put(inet_twsk(sk));
> return 0;
> }
> @@ -508,6 +510,9 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
> }
>
> bh_lock_sock(sk);

Do we need to hold the spinlock before calling tcp_ao_ignore_icmp() ?

> + if (tcp_ao_ignore_icmp(sk, type, code))
> + goto out;
> +
> /* If too many ICMPs get dropped on busy
> * servers this needs to be solved differently.
> * We do take care of PMTU discovery (RFC1191) special case :
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index f1804ec3bb1d..07126d9eeda9 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -395,6 +395,8 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
> }
>
> if (sk->sk_state == TCP_TIME_WAIT) {
> + /* To increase the counter of ignored icmps for TCP-AO */
> + tcp_ao_ignore_icmp(sk, type, code);
> inet_twsk_put(inet_twsk(sk));
> return 0;
> }
> @@ -406,6 +408,8 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
> }
>
> bh_lock_sock(sk);
> + if (tcp_ao_ignore_icmp(sk, type, code))
> + goto out;
> if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
> __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
>
> --
> 2.41.0
>