[PATCH v2 08/19] crypto: x86/ghash - limit FPU preemption

From: Robert Elliott
Date: Wed Oct 12 2022 - 18:01:19 EST


As done by the ECB and CBC helpers in arch/x86/crypt/ecb_cbc_helpers.h,
limit the number of bytes processed between kernel_fpu_begin() and
kernel_fpu_end() calls.

Those functions call preempt_disable() and preempt_enable(), so
the CPU core is unavailable for scheduling while running, leading to:
rcu: INFO: rcu_preempt detected expedited stalls on CPUs/tasks: ...

Fixes: 0e1227d356e9 ("crypto: ghash - Add PCLMULQDQ accelerated implementation")
Suggested-by: Herbert Xu <herbert@xxxxxxxxxxxxxxxxxxx>
Signed-off-by: Robert Elliott <elliott@xxxxxxx>
---
arch/x86/crypto/ghash-clmulni-intel_glue.c | 26 ++++++++++++++++------
1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index 53aa286ec27f..a39fc405c7cf 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -23,6 +23,8 @@
#define GHASH_BLOCK_SIZE 16
#define GHASH_DIGEST_SIZE 16

+#define FPU_BYTES 4096U /* avoid kernel_fpu_begin/end scheduler/rcu stalls */
+
void clmul_ghash_mul(char *dst, const u128 *shash);

void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
@@ -82,7 +84,7 @@ static int ghash_update(struct shash_desc *desc,

if (dctx->bytes) {
int n = min(srclen, dctx->bytes);
- u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
+ u8 *pos = dst + GHASH_BLOCK_SIZE - dctx->bytes;

dctx->bytes -= n;
srclen -= n;
@@ -97,13 +99,23 @@ static int ghash_update(struct shash_desc *desc,
}
}

- kernel_fpu_begin();
- clmul_ghash_update(dst, src, srclen, &ctx->shash);
- kernel_fpu_end();
+ while (srclen >= GHASH_BLOCK_SIZE) {
+ unsigned int fpulen = min(srclen, FPU_BYTES);
+
+ kernel_fpu_begin();
+ while (fpulen >= GHASH_BLOCK_SIZE) {
+ int n = min_t(unsigned int, fpulen, GHASH_BLOCK_SIZE);
+
+ clmul_ghash_update(dst, src, n, &ctx->shash);
+
+ srclen -= n;
+ fpulen -= n;
+ src += n;
+ }
+ kernel_fpu_end();
+ }

- if (srclen & 0xf) {
- src += srclen - (srclen & 0xf);
- srclen &= 0xf;
+ if (srclen) {
dctx->bytes = GHASH_BLOCK_SIZE - srclen;
while (srclen--)
*dst++ ^= *src++;
--
2.37.3