Re: [PATCH v8.1 net-next 03/23] net/tcp: Introduce TCP_AO setsockopt()s

From: Simon Horman
Date: Mon Jul 24 2023 - 15:31:19 EST


On Fri, Jul 21, 2023 at 05:18:54PM +0100, Dmitry Safonov wrote:

...

Hi Dimitry,

> diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h
> index bae5e2369b4f..307961b41541 100644
> --- a/include/linux/sockptr.h
> +++ b/include/linux/sockptr.h
> @@ -55,6 +55,29 @@ static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size)
> return copy_from_sockptr_offset(dst, src, 0, size);
> }
>
> +static inline int copy_struct_from_sockptr(void *dst, size_t ksize,
> + sockptr_t src, size_t usize)

The indentation of the two lines above is not correct,
they should be aligned to the inside of the opening '('
on the preceding line.

In order to stop things being too far to the left,
which is perhaps the intent of the current indention scheme,
the return type of the function can be moved to it's own line.

static inline int
copy_struct_from_sockptr(void *dst, size_t ksize, sockptr_t src, size_t usize)

...

> diff --git a/include/net/tcp.h b/include/net/tcp.h

...

> +static inline int ipv4_prefix_cmp(const struct in_addr *addr1,
> + const struct in_addr *addr2,
> + unsigned int prefixlen)
> +{
> + __be32 mask = inet_make_mask(prefixlen);
> +
> + if ((addr1->s_addr & mask) == (addr2->s_addr & mask))
> + return 0;
> + return ((addr1->s_addr & mask) > (addr2->s_addr & mask)) ? 1 : -1;
> +}

Above, '>' is operating on two big endian values.
But typically such maths operates on host byte order values.

Flagged by Sparse.

...

> +static struct tcp_ao_key *__tcp_ao_do_lookup(const struct sock *sk,
> + const union tcp_ao_addr *addr, int family, u8 prefix,
> + int sndid, int rcvid, u16 port)

Same comment about indentation as above.

static struct tcp_ao_key *
__tcp_ao_do_lookup(const struct sock *sk, const union tcp_ao_addr *addr,
int family, u8 prefix, int sndid, int rcvid, u16 port)

...

> +struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk,
> + const union tcp_ao_addr *addr,
> + int family, int sndid, int rcvid, u16 port)

Should tcp_ao_do_lookup be static?
It seems to only be used in this file.

...

> +static int tcp_ao_verify_port(struct sock *sk, u16 port)
> +{
> + struct inet_sock *inet = inet_sk(sk);
> +
> + if (port != 0) /* FIXME */

I guess this should be fixed :)

> + return -EINVAL;
> +
> + /* Check that MKT port is consistent with socket */
> + if (port != 0 && inet->inet_dport != 0 && port != inet->inet_dport)

port is host byte order, but inet->inet_dport is big endian.
This does not seem correct.

> + return -EINVAL;
> +
> + return 0;
> +}

...

> +static int tcp_ao_parse_crypto(struct tcp_ao_add *cmd, struct tcp_ao_key *key)
> +{
> + unsigned int syn_tcp_option_space;
> + bool is_kdf_aes_128_cmac = false;
> + struct crypto_ahash *tfm;
> + struct tcp_sigpool hp;
> + void *tmp_key = NULL;
> + int err;
> +
> + /* RFC5926, 3.1.1.2. KDF_AES_128_CMAC */
> + if (!strcmp("cmac(aes128)", cmd->alg_name)) {
> + strscpy(cmd->alg_name, "cmac(aes)", sizeof(cmd->alg_name));
> + is_kdf_aes_128_cmac = (cmd->keylen != 16);
> + tmp_key = kmalloc(cmd->keylen, GFP_KERNEL);
> + if (!tmp_key)
> + return -ENOMEM;
> + }
> +
> + key->maclen = cmd->maclen ?: 12; /* 12 is the default in RFC5925 */
> +
> + /* Check: maclen + tcp-ao header <= (MAX_TCP_OPTION_SPACE - mss
> + * - tstamp - wscale - sackperm),
> + * see tcp_syn_options(), tcp_synack_options(), commit 33ad798c924b.
> + *
> + * In order to allow D-SACK with TCP-AO, the header size should be:
> + * (MAX_TCP_OPTION_SPACE - TCPOLEN_TSTAMP_ALIGNED
> + * - TCPOLEN_SACK_BASE_ALIGNED
> + * - 2 * TCPOLEN_SACK_PERBLOCK) = 8 (maclen = 4),
> + * see tcp_established_options().
> + *
> + * RFC5925, 2.2:
> + * Typical MACs are 96-128 bits (12-16 bytes), but any length
> + * that fits in the header of the segment being authenticated
> + * is allowed.
> + *
> + * RFC5925, 7.6:
> + * TCP-AO continues to consume 16 bytes in non-SYN segments,
> + * leaving a total of 24 bytes for other options, of which
> + * the timestamp consumes 10. This leaves 14 bytes, of which 10
> + * are used for a single SACK block. When two SACK blocks are used,
> + * such as to handle D-SACK, a smaller TCP-AO MAC would be required
> + * to make room for the additional SACK block (i.e., to leave 18
> + * bytes for the D-SACK variant of the SACK option) [RFC2883].
> + * Note that D-SACK is not supportable in TCP MD5 in the presence
> + * of timestamps, because TCP MD5’s MAC length is fixed and too
> + * large to leave sufficient option space.
> + */
> + syn_tcp_option_space = MAX_TCP_OPTION_SPACE;
> + syn_tcp_option_space -= TCPOLEN_TSTAMP_ALIGNED;
> + syn_tcp_option_space -= TCPOLEN_WSCALE_ALIGNED;
> + syn_tcp_option_space -= TCPOLEN_SACKPERM_ALIGNED;
> + if (tcp_ao_len(key) > syn_tcp_option_space) {
> + err = -EMSGSIZE;
> + goto err_kfree;
> + }
> +
> + key->keylen = cmd->keylen;
> + memcpy(key->key, cmd->key, cmd->keylen);
> +
> + err = tcp_sigpool_start(key->tcp_sigpool_id, &hp);
> + if (err)
> + goto err_kfree;
> +
> + tfm = crypto_ahash_reqtfm(hp.req);
> + if (is_kdf_aes_128_cmac) {
> + void *scratch = hp.scratch;
> + struct scatterlist sg;
> +
> + memcpy(tmp_key, cmd->key, cmd->keylen);
> + sg_init_one(&sg, tmp_key, cmd->keylen);
> +
> + /* Using zero-key of 16 bytes as described in RFC5926 */
> + memset(scratch, 0, 16);
> + err = crypto_ahash_setkey(tfm, scratch, 16);
> + if (err)
> + goto err_pool_end;
> +
> + err = crypto_ahash_init(hp.req);
> + if (err)
> + goto err_pool_end;
> +
> + ahash_request_set_crypt(hp.req, &sg, key->key, cmd->keylen);
> + err = crypto_ahash_update(hp.req);
> + if (err)
> + goto err_pool_end;
> +
> + err |= crypto_ahash_final(hp.req);
> + if (err)
> + goto err_pool_end;
> + key->keylen = 16;
> + }
> +
> + err = crypto_ahash_setkey(tfm, key->key, key->keylen);
> + if (err)
> + goto err_pool_end;
> +
> + tcp_sigpool_end(&hp);
> +
> + if (tcp_ao_maclen(key) > key->digest_size)
> + return -EINVAL;

tmp_key appears to be leaked here.

> +
> + return 0;

And here.

This is flagged by Smatch.

> +
> +err_pool_end:
> + tcp_sigpool_end(&hp);
> +err_kfree:
> + kfree(tmp_key);
> + return err;
> +}

...

> +static int tcp_ao_add_cmd(struct sock *sk, unsigned short int family,
> + sockptr_t optval, int optlen)
> +{
> + struct tcp_ao_info *ao_info;
> + union tcp_ao_addr *addr;
> + struct tcp_ao_key *key;
> + struct tcp_ao_add cmd;
> + int ret;
> + bool first = false;
> + u16 port;

Please use reverse xmas tree - longest line to shortest - for
local variable declarations in new Networking code.

...

> +static int tcp_ao_del_cmd(struct sock *sk, unsigned short int family,
> + sockptr_t optval, int optlen)
> +{
> + struct tcp_ao_key *key, *new_current = NULL, *new_rnext = NULL;
> + struct tcp_ao_info *ao_info;
> + union tcp_ao_addr *addr;
> + struct tcp_ao_del cmd;
> + int err;
> + __u8 prefix;
> + __be16 port;
> + int addr_len;
> +
> + if (optlen < sizeof(cmd))
> + return -EINVAL;
> +
> + err = copy_struct_from_sockptr(&cmd, sizeof(cmd), optval, optlen);
> + if (err)
> + return err;
> +
> + if (cmd.reserved != 0 || cmd.reserved2 != 0)
> + return -EINVAL;
> +
> + if (cmd.set_current || cmd.set_rnext) {
> + if (!tcp_ao_can_set_current_rnext(sk))
> + return -EINVAL;
> + }
> +
> + ao_info = setsockopt_ao_info(sk);
> + if (IS_ERR(ao_info))
> + return PTR_ERR(ao_info);
> + if (!ao_info)
> + return -ENOENT;
> +
> + /* For sockets in TCP_CLOSED it's possible set keys that aren't
> + * matching the future peer (address/port/VRF/etc),
> + * tcp_ao_connect_init() will choose a correct matching MKT
> + * if there's any.
> + */
> + if (cmd.set_current) {
> + new_current = tcp_ao_established_key(ao_info, cmd.current_key, -1);
> + if (!new_current)
> + return -ENOENT;
> + }
> + if (cmd.set_rnext) {
> + new_rnext = tcp_ao_established_key(ao_info, -1, cmd.rnext);
> + if (!new_rnext)
> + return -ENOENT;
> + }
> +
> + if (family == AF_INET) {
> + struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.addr;
> +
> + addr = (union tcp_ao_addr *)&sin->sin_addr;
> + addr_len = sizeof(struct in_addr);
> + port = ntohs(sin->sin_port);

port is big endian, but here it is assigned a host-byte order value.
It looks like port should be u16 rather than __bbe16.

As flagged by Smatch.

> + } else {
> + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.addr;
> + struct in6_addr *addr6 = &sin6->sin6_addr;
> +
> + if (ipv6_addr_v4mapped(addr6)) {
> + addr = (union tcp_ao_addr *)&addr6->s6_addr32[3];
> + addr_len = sizeof(struct in_addr);
> + family = AF_INET;
> + } else {
> + addr = (union tcp_ao_addr *)addr6;
> + addr_len = sizeof(struct in6_addr);
> + }
> + port = ntohs(sin6->sin6_port);

Ditto.

> + }
> + prefix = cmd.prefix;
> +
> + /* We could choose random present key here for current/rnext
> + * but that's less predictable. Let's be strict and don't
> + * allow removing a key that's in use. RFC5925 doesn't
> + * specify how-to coordinate key removal, but says:
> + * "It is presumed that an MKT affecting a particular
> + * connection cannot be destroyed during an active connection"
> + */
> + hlist_for_each_entry_rcu(key, &ao_info->head, node) {
> + if (cmd.sndid != key->sndid ||
> + cmd.rcvid != key->rcvid)
> + continue;
> +
> + if (family != key->family ||
> + prefix != key->prefixlen ||
o
> + port != key->port ||

There is a similar problem here too.
port is host byte order but key->port is big endian.

> + memcmp(addr, &key->addr, addr_len))
> + continue;
> +
> + if (key == new_current || key == new_rnext)
> + continue;
> +
> + return tcp_ao_delete_key(sk, ao_info, key,
> + new_current, new_rnext);
> + }
> + return -ENOENT;
> +}

...