Re: "test_ip_fast_csum: ASSERTION FAILED at lib/checksum_kunit.c:589" at boot with CONFIG_CHECKSUM_KUNIT=y enabled on a Talos II, kernel 6.8-rc5

From: Christophe Leroy
Date: Fri Feb 23 2024 - 04:07:34 EST




Le 23/02/2024 à 08:00, Charlie Jenkins a écrit :
> On Fri, Feb 23, 2024 at 06:58:14AM +0000, Christophe Leroy wrote:
>>
>>
>> Le 23/02/2024 à 07:12, Charlie Jenkins a écrit :
>>> On Fri, Feb 23, 2024 at 05:59:07AM +0000, Christophe Leroy wrote:
>>>> Hi Erhard, hi Charlie,
>>>>
>>>> Le 23/02/2024 à 02:26, Erhard Furtner a écrit :
>>>>> Greetings!
>>>>>
>>>>> Looks like my Talos II (running a BE kernel+system) fails some of the kernels internal unit tests. One of the failing tests is checksum_kunit, enabled via CONFIG_CHECKSUM_KUNIT=y:
>>>>>
>>>>> [...]
>>>>> KTAP version 1
>>>>> # Subtest: checksum
>>>>> # module: checksum_kunit
>>>>> 1..5
>>>>> entry-flush: disabled on command line.
>>>>> ok 1 test_csum_fixed_random_inputs
>>>>> ok 2 test_csum_all_carry_inputs
>>>>> ok 3 test_csum_no_carry_inputs
>>>>> # test_ip_fast_csum: ASSERTION FAILED at lib/checksum_kunit.c:589
>>>>> Expected ( u64)expected == ( u64)csum_result, but
>>>>> ( u64)expected == 55939 (0xda83)
>>>>> ( u64)csum_result == 33754 (0x83da)
>>>>> not ok 4 test_ip_fast_csum
>>>>> # test_csum_ipv6_magic: ASSERTION FAILED at lib/checksum_kunit.c:617
>>>>> Expected ( u64)expected_csum_ipv6_magic[i] == ( u64)csum_ipv6_magic(saddr, daddr, len, proto, csum), but
>>>>> ( u64)expected_csum_ipv6_magic[i] == 6356 (0x18d4)
>>>>> ( u64)csum_ipv6_magic(saddr, daddr, len, proto, csum) == 43586 (0xaa42)
>>>>> not ok 5 test_csum_ipv6_magic
>>>>> # checksum: pass:3 fail:2 skip:0 total:5
>>>>> # Totals: pass:3 fail:2 skip:0 total:5
>>>>> not ok 4 checksum
>>>>> [...]
>>>>>
>>>>> Full dmesg + kernel .config attached.
>>>>
>>>> Looks like the same problem as the one I fixed with commit b38460bc463c
>>>> ("kunit: Fix checksum tests on big endian CPUs")
>>>>
>>>> The new tests implemented through commit 6f4c45cbcb00 ("kunit: Add tests
>>>> for csum_ipv6_magic and ip_fast_csum") create a lot of type issues as
>>>> reported by sparse when built with C=2 (see below).
>>>>
>>>> Once those issues are fixed, it should work.
>>>>
>>>> Charlie, can you provide a fix ?
>>>>
>>>> Thanks,
>>>> Christophe
>>>
>>> The "lib: checksum: Fix issues with checksum tests" patch should fix all of these issues [1].
>>>
>>> [1] https://lore.kernel.org/all/20240221-fix_sparse_errors_checksum_tests-v9-1-bff4d73ab9d1@xxxxxxxxxxxx/T/#m189783a9b2a7d12e3c34c4a412e65408658db2c9
>>
>> It doesn't fix the issues, I still get the following with your patch 1/2
>> applied:
>>
>> [ 6.893141] KTAP version 1
>> [ 6.896118] 1..1
>> [ 6.897764] KTAP version 1
>> [ 6.900800] # Subtest: checksum
>> [ 6.904518] # module: checksum_kunit
>> [ 6.904601] 1..5
>> [ 7.139784] ok 1 test_csum_fixed_random_inputs
>> [ 7.590056] ok 2 test_csum_all_carry_inputs
>> [ 8.064415] ok 3 test_csum_no_carry_inputs
>> [ 8.070065] # test_ip_fast_csum: ASSERTION FAILED at
>> lib/checksum_kunit.c:589
>> [ 8.070065] Expected ( u64)expected == ( u64)csum_result, but
>> [ 8.070065] ( u64)expected == 55939 (0xda83)
>> [ 8.070065] ( u64)csum_result == 33754 (0x83da)
>> [ 8.075836] not ok 4 test_ip_fast_csum
>> [ 8.101039] # test_csum_ipv6_magic: ASSERTION FAILED at
>> lib/checksum_kunit.c:617
>> [ 8.101039] Expected ( u64)( __sum16)expected_csum_ipv6_magic[i]
>> == ( u64)csum_ipv6_magic(saddr, daddr, len, proto, ( __wsum)csum), but
>> [ 8.101039] ( u64)( __sum16)expected_csum_ipv6_magic[i] ==
>> 6356 (0x18d4)
>> [ 8.101039] ( u64)csum_ipv6_magic(saddr, daddr, len, proto, (
>> __wsum)csum) == 43586 (0xaa42)
>> [ 8.106446] not ok 5 test_csum_ipv6_magic
>> [ 8.143829] # checksum: pass:3 fail:2 skip:0 total:5
>> [ 8.148334] # Totals: pass:3 fail:2 skip:0 total:5
>> [ 8.153173] not ok 1 checksum
>>
>> All your patch does is to hide the sparse warnings. But forcing a cast
>> doesn't fix byte orders.
>>
>> Please have a look at commit b38460bc463c ("kunit: Fix checksum tests on
>> big endian CPUs"), there are helpers to put checksums in the correct
>> byte order.
>>
>> Christophe
>
> Well that's what the second patch is for. Is it failing with the second
> patch applied?
>

Yes, with second patch is magically works, meaning the patch description
is not correct because the problem for powerpc it not at all related to
memory alignment but to endianness. And endianness should have been
fixed by patch 1, but instead of it, patch 1 just hides the problem by
forcing casts.

The real fix for endianness which should be your patch 1 is the
following change. With that change it works perfectly well without any
forced cast:

diff --git a/lib/checksum_kunit.c b/lib/checksum_kunit.c
index 225bb7701460..bf70850035c7 100644
--- a/lib/checksum_kunit.c
+++ b/lib/checksum_kunit.c
@@ -215,7 +215,7 @@ static const u32 init_sums_no_overflow[] = {
0xffff0000, 0xfffffffb,
};

-static const __sum16 expected_csum_ipv6_magic[] = {
+static const u16 expected_csum_ipv6_magic[] = {
0x18d4, 0x3085, 0x2e4b, 0xd9f4, 0xbdc8, 0x78f, 0x1034, 0x8422, 0x6fc0,
0xd2f6, 0xbeb5, 0x9d3, 0x7e2a, 0x312e, 0x778e, 0xc1bb, 0x7cf2, 0x9d1e,
0xca21, 0xf3ff, 0x7569, 0xb02e, 0xca86, 0x7e76, 0x4539, 0x45e3, 0xf28d,
@@ -241,7 +241,7 @@ static const __sum16 expected_csum_ipv6_magic[] = {
0x3845, 0x1014
};

-static const __sum16 expected_fast_csum[] = {
+static const u16 expected_fast_csum[] = {
0xda83, 0x45da, 0x4f46, 0x4e4f, 0x34e, 0xe902, 0xa5e9, 0x87a5, 0x7187,
0x5671, 0xf556, 0x6df5, 0x816d, 0x8f81, 0xbb8f, 0xfbba, 0x5afb, 0xbe5a,
0xedbe, 0xabee, 0x6aac, 0xe6b, 0xea0d, 0x67ea, 0x7e68, 0x8a7e, 0x6f8a,
@@ -577,7 +577,8 @@ static void test_csum_no_carry_inputs(struct kunit
*test)

static void test_ip_fast_csum(struct kunit *test)
{
- __sum16 csum_result, expected;
+ __sum16 csum_result;
+ u16 expected;

for (int len = IPv4_MIN_WORDS; len < IPv4_MAX_WORDS; len++) {
for (int index = 0; index < NUM_IP_FAST_CSUM_TESTS; index++) {
@@ -586,7 +587,7 @@ static void test_ip_fast_csum(struct kunit *test)
expected_fast_csum[(len - IPv4_MIN_WORDS) *
NUM_IP_FAST_CSUM_TESTS +
index];
- CHECK_EQ(expected, csum_result);
+ CHECK_EQ(to_sum16(expected), csum_result);
}
}
}
@@ -598,7 +599,7 @@ static void test_csum_ipv6_magic(struct kunit *test)
const struct in6_addr *daddr;
unsigned int len;
unsigned char proto;
- unsigned int csum;
+ __wsum csum;

const int daddr_offset = sizeof(struct in6_addr);
const int len_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr);
@@ -611,10 +612,10 @@ static void test_csum_ipv6_magic(struct kunit *test)
saddr = (const struct in6_addr *)(random_buf + i);
daddr = (const struct in6_addr *)(random_buf + i +
daddr_offset);
- len = *(unsigned int *)(random_buf + i + len_offset);
+ len = le32_to_cpu(*(__le32 *)(random_buf + i + len_offset));
proto = *(random_buf + i + proto_offset);
- csum = *(unsigned int *)(random_buf + i + csum_offset);
- CHECK_EQ(expected_csum_ipv6_magic[i],
+ csum = *(__wsum *)(random_buf + i + csum_offset);
+ CHECK_EQ(to_sum16(expected_csum_ipv6_magic[i]),
csum_ipv6_magic(saddr, daddr, len, proto, csum));
}
#endif /* !CONFIG_NET */
---

Christophe