Re: [RFC][PATCH] fix csum_and_copy_..._user() idiocy. Re: AW: [PATCH] amd64: Fix csum_partial_copy_generic()

From: Thomas Gleixner
Date: Mon Oct 23 2023 - 06:38:09 EST


On Sun, Oct 22 2023 at 20:46, Al Viro wrote:
> @@ -113,14 +113,14 @@ csum_partial_cfu_aligned(const unsigned long __user *src, unsigned long *dst,
> *dst = word | tmp;
> checksum += carry;
> }
> - return checksum;
> + return from64to16 (checksum);

from64to16(checksum); all over the place

>
> #define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
> #define _HAVE_ARCH_CSUM_AND_COPY
> static inline
> -__wsum csum_and_copy_from_user(const void __user *src, void *dst, int len)
> +__wsum_fault csum_and_copy_from_user(const void __user *src, void *dst, int len)
> {
> if (!access_ok(src, len))
> - return 0;
> + return -1;

return CSUM_FAULT;

> /*
> diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S
> index 0fd5c10e90a7..5db935eaf165 100644
> --- a/arch/arm/lib/csumpartialcopygeneric.S
> +++ b/arch/arm/lib/csumpartialcopygeneric.S
> @@ -86,7 +86,7 @@ sum .req r3
>
> FN_ENTRY
> save_regs
> - mov sum, #-1
> + mov sum, #0
>
> cmp len, #8 @ Ensure that we have at least
> blo .Lless8 @ 8 bytes to copy.
> @@ -160,6 +160,7 @@ FN_ENTRY
> ldr sum, [sp, #0] @ dst
> tst sum, #1
> movne r0, r0, ror #8
> + mov r1, #0
> load_regs
>
> .Lsrc_not_aligned:
> diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S
> index 6928781e6bee..f273c9667914 100644
> --- a/arch/arm/lib/csumpartialcopyuser.S
> +++ b/arch/arm/lib/csumpartialcopyuser.S
> @@ -73,11 +73,11 @@
> #include "csumpartialcopygeneric.S"
>
> /*
> - * We report fault by returning 0 csum - impossible in normal case, since
> - * we start with 0xffffffff for initial sum.
> + * We report fault by returning ~0ULL csum
> */

There is also a stale comment a few lines further up.

> .pushsection .text.fixup,"ax"
> .align 4
> -9001: mov r0, #0
> +9001: mov r0, #-1
> + mov r1, #-1
> load_regs
> .popsection
> #include <linux/errno.h>
> -#include <asm/types.h>
> +#include <linux/bitops.h>
> #include <asm/byteorder.h>
> +
> +typedef u64 __bitwise __wsum_fault;

newline please.

> +static inline __wsum_fault to_wsum_fault(__wsum v)
> +{
> +#if defined(CONFIG_64BIT) || defined(__LITTLE_ENDIAN__)
> + return (__force __wsum_fault)v;
> +#else
> + return (__force __wsum_fault)((__force u64)v << 32);
> +#endif
> +}
> +
> +static inline __wsum_fault from_wsum_fault(__wsum v)
> +{
> +#if defined(CONFIG_64BIT) || defined(__LITTLE_ENDIAN__)
> + return (__force __wsum)v;
> +#else
> + return (__force __wsum)((__force u64)v >> 32);
> +#endif
> +}
> +
> +static inline bool wsum_fault_check(__wsum_fault v)
> +{
> +#if defined(CONFIG_64BIT) || defined(__LITTLE_ENDIAN__)
> + return (__force s64)v < 0;
> +#else
> + return (int)(__force u32)v < 0;

Why not __force s32 right away?

> #include <asm/checksum.h>
> #if !defined(_HAVE_ARCH_COPY_AND_CSUM_FROM_USER) || !defined(HAVE_CSUM_COPY_USER)
> #include <linux/uaccess.h>
> @@ -25,24 +57,24 @@
>
> #ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
> static __always_inline
> -__wsum csum_and_copy_from_user (const void __user *src, void *dst,
> +__wsum_fault csum_and_copy_from_user (const void __user *src, void *dst,
> int len)
> {
> if (copy_from_user(dst, src, len))
> - return 0;
> - return csum_partial(dst, len, ~0U);
> + return CSUM_FAULT;
> + return to_wsum_fault(csum_partial(dst, len, 0));
> }
> #endif
> #ifndef HAVE_CSUM_COPY_USER
> -static __always_inline __wsum csum_and_copy_to_user
> +static __always_inline __wsum_fault csum_and_copy_to_user
> (const void *src, void __user *dst, int len)
> {
> - __wsum sum = csum_partial(src, len, ~0U);
> + __wsum sum = csum_partial(src, len, 0);
>
> if (copy_to_user(dst, src, len) == 0)
> - return sum;
> - return 0;
> + return to_wsum_fault(sum);
> + return CSUM_FAULT;

if (copy_to_user(dst, src, len))
return CSUM_FAULT;
return to_wsum_fault(sum);

So it follows the pattern of csum_and_copy_from_user() above?

> size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
> struct iov_iter *i)
> {
> - __wsum sum, next;
> + __wsum sum;
> + __wsum_fault next;
> sum = *csum;
> if (WARN_ON_ONCE(!i->data_source))
> return 0;
>
> iterate_and_advance(i, bytes, base, len, off, ({
> next = csum_and_copy_from_user(base, addr + off, len);
> - sum = csum_block_add(sum, next, off);
> - next ? 0 : len;
> + sum = csum_block_add(sum, from_wsum_fault(next), off);
> + likely(!wsum_fault_check(next)) ? 0 : len;

This macro maze is confusing as hell.

Looking at iterate_buf() which is the least convoluted. That resolves to
the following:

len = bytes;
...
next = csum_and_copy_from_user(...);
...
len -= !wsum_fault_check(next) ? 0 : len;
...
bytes = len;
...
return bytes;

So it correctly returns 'bytes' for the success case and 0 for the fault
case.

Now decrypting iterate_iovec():

off = 0;

do {
....
len -= !wsum_fault_check(next) ? 0 : len;
off += len;
skip += len;
bytes- -= len;
if (skip < __p->iov_len) <- breaks on fault
break;
...
} while(bytes);

bytes = off;
...
return bytes;

Which means that if the first vector is successfully copied, then 'off'
is greater 0. A fault on the second one will correctly break out of the
loop, but the function will incorrectly return a value > 0, i.e. the
length of the first iteration.

As the callers just check for != 0 such a partial copy is considered
success, no?

So instead of

likely(!wsum_fault_check(next)) ? 0 : len;

shouldn't this just do:

if (unlikely(wsum_fault_check(next))
return 0;
len;

for simplicity and mental sanity sake?

Thanks,

tglx