[PATCH v2 14/19] crypto: x86 - load based on CPU features

From: Robert Elliott
Date: Wed Oct 12 2022 - 18:01:51 EST


x86 optimized crypto modules built as modules rather than built-in
to the kernel end up as .ko files in the filesystem, e.g., in
/usr/lib/modules. If the filesystem itself is a module, these might
not be available when the crypto API is initialized, resulting in
the generic implementation being used (e.g., sha512_transform rather
than sha512_transform_avx2).

In one test case, CPU utilization in the sha512 function dropped
from 15.34% to 7.18% after forcing loading of the optimized module.

Set module aliases for x86 optimized crypto modules based on CPU
feature bits so udev gets a chance to load them later in the boot
process when the filesystems are all running.

For example, with sha256, sha512, aesni_intel, and blake2s configured
as built-in and the rest configured as modules:

[ 13.749145] sha256_ssse3: CPU-optimized crypto module loaded (SSSE3=no, AVX=no, AVX2=yes, SHA-NI=no)
[ 13.758502] sha512_ssse3: CPU-optimized crypto module loaded (SSSE3=no, AVX=no, AVX2=yes)
[ 13.766939] libblake2s_x86_64: CPU-optimized crypto module loaded (SSSE3=yes, AVX512=yes)
[ 16.794502] aesni_intel: CPU-optimized crypto module loaded (GCM SSE=no, AVX=yes, AVX2=yes)(CTR AVX=yes)
...
[ 18.160648] Run /init as init process
...
[ 20.073484] twofish_x86_64: CPU-optimized crypto module loaded
[ 23.974029] serpent_sse2_x86_64: CPU-optimized crypto module loaded
[ 24.080749] serpent_avx_x86_64: CPU-optimized crypto module loaded
[ 24.187148] serpent_avx2: CPU-optimized crypto module loaded
[ 24.358980] des3_ede_x86_64: CPU-optimized crypto module loaded
[ 24.459257] camellia_x86_64: CPU-optimized crypto module loaded
[ 24.548487] camellia_aesni_avx_x86_64: CPU-optimized crypto module loaded
[ 24.630777] camellia_aesni_avx2: CPU-optimized crypto module loaded
[ 24.957134] blowfish_x86_64: CPU-optimized crypto module loaded
[ 25.063537] aegis128_aesni: CPU-optimized crypto module loaded
[ 25.174560] chacha_x86_64: CPU-optimized crypto module loaded (AVX2=yes, AVX512=yes)
[ 25.270084] sha1_ssse3: CPU-optimized crypto module loaded (SSSE3=no, AVX=no, AVX2=yes, SHA-NI=no)
[ 25.531724] ghash_clmulni_intel: CPU-optimized crypto module loaded
[ 25.596316] crc32c_intel: CPU-optimized crypto module loaded (PCLMULQDQ=yes)
[ 25.661693] crc32_pclmul: CPU-optimized crypto module loaded
[ 25.696388] crct10dif_pclmul: CPU-optimized crypto module loaded
[ 25.742040] poly1305_x86_64: CPU-optimized crypto module loaded (AVX=yes, AVX2=yes, AVX512=no)
[ 25.841364] nhpoly1305_avx2: CPU-optimized crypto module loaded
[ 25.856401] curve25519_x86_64: CPU-optimized crypto module loaded (ADX=yes)
[ 25.866615] sm3_avx_x86_64: CPU-optimized crypto module loaded

This commit covers modules that did not create rcu stall issues
due to kernel_fpu_begin/kernel_fpu_end calls.

Signed-off-by: Robert Elliott <elliott@xxxxxxx>
---
arch/x86/crypto/aegis128-aesni-glue.c | 9 +++++++++
arch/x86/crypto/aesni-intel_glue.c | 7 +++----
arch/x86/crypto/blake2s-glue.c | 11 ++++++++++-
arch/x86/crypto/blowfish_glue.c | 10 ++++++++++
arch/x86/crypto/camellia_aesni_avx2_glue.c | 12 ++++++++++++
arch/x86/crypto/camellia_aesni_avx_glue.c | 11 +++++++++++
arch/x86/crypto/camellia_glue.c | 9 +++++++++
arch/x86/crypto/cast5_avx_glue.c | 10 ++++++++++
arch/x86/crypto/cast6_avx_glue.c | 10 ++++++++++
arch/x86/crypto/chacha_glue.c | 12 ++++++++++--
arch/x86/crypto/curve25519-x86_64.c | 12 +++++++++++-
arch/x86/crypto/des3_ede_glue.c | 10 ++++++++++
arch/x86/crypto/nhpoly1305-avx2-glue.c | 10 ++++++++++
arch/x86/crypto/nhpoly1305-sse2-glue.c | 10 ++++++++++
arch/x86/crypto/poly1305_glue.c | 12 ++++++++++++
arch/x86/crypto/serpent_avx2_glue.c | 10 ++++++++++
arch/x86/crypto/serpent_avx_glue.c | 10 ++++++++++
arch/x86/crypto/serpent_sse2_glue.c | 10 ++++++++++
arch/x86/crypto/sm4_aesni_avx2_glue.c | 12 ++++++++++++
arch/x86/crypto/sm4_aesni_avx_glue.c | 11 +++++++++++
arch/x86/crypto/twofish_avx_glue.c | 10 ++++++++++
arch/x86/crypto/twofish_glue.c | 10 ++++++++++
arch/x86/crypto/twofish_glue_3way.c | 10 ++++++++++
23 files changed, 230 insertions(+), 8 deletions(-)

diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c
index 4623189000d8..9e4ba031704d 100644
--- a/arch/x86/crypto/aegis128-aesni-glue.c
+++ b/arch/x86/crypto/aegis128-aesni-glue.c
@@ -263,10 +263,19 @@ static struct aead_alg crypto_aegis128_aesni_alg = {
}
};

+static const struct x86_cpu_id module_cpu_ids[] = {
+ X86_MATCH_FEATURE(X86_FEATURE_AES, NULL),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
static struct simd_aead_alg *simd_alg;

static int __init crypto_aegis128_aesni_module_init(void)
{
+ if (!x86_match_cpu(module_cpu_ids))
+ return -ENODEV;
+
if (!boot_cpu_has(X86_FEATURE_XMM2) ||
!boot_cpu_has(X86_FEATURE_AES) ||
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index a5b0cb3efeba..4a530a558436 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -36,7 +36,6 @@
#include <linux/spinlock.h>
#include <linux/static_call.h>

-
#define AESNI_ALIGN 16
#define AESNI_ALIGN_ATTR __attribute__ ((__aligned__(AESNI_ALIGN)))
#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE - 1))
@@ -1228,17 +1227,17 @@ static struct aead_alg aesni_aeads[0];

static struct simd_aead_alg *aesni_simd_aeads[ARRAY_SIZE(aesni_aeads)];

-static const struct x86_cpu_id aesni_cpu_id[] = {
+static const struct x86_cpu_id module_cpu_ids[] = {
X86_MATCH_FEATURE(X86_FEATURE_AES, NULL),
{}
};
-MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);

static int __init aesni_init(void)
{
int err;

- if (!x86_match_cpu(aesni_cpu_id))
+ if (!x86_match_cpu(module_cpu_ids))
return -ENODEV;
#ifdef CONFIG_X86_64
if (boot_cpu_has(X86_FEATURE_AVX2)) {
diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c
index 3054ee7fa219..5153bb423dbe 100644
--- a/arch/x86/crypto/blake2s-glue.c
+++ b/arch/x86/crypto/blake2s-glue.c
@@ -10,7 +10,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sizes.h>
-
+#include <asm/cpu_device_id.h>
#include <asm/cpufeature.h>
#include <asm/fpu/api.h>
#include <asm/processor.h>
@@ -56,8 +56,17 @@ void blake2s_compress(struct blake2s_state *state, const u8 *block,
}
EXPORT_SYMBOL(blake2s_compress);

+static const struct x86_cpu_id module_cpu_ids[] = {
+ X86_MATCH_FEATURE(X86_FEATURE_ANY, NULL),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
static int __init blake2s_mod_init(void)
{
+ if (!x86_match_cpu(module_cpu_ids))
+ return -ENODEV;
+
if (boot_cpu_has(X86_FEATURE_SSSE3))
static_branch_enable(&blake2s_use_ssse3);

diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index 019c64c1340a..4c0ead71b198 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -15,6 +15,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/types.h>
+#include <asm/cpu_device_id.h>

/* regular block cipher functions */
asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
@@ -303,10 +304,19 @@ static int force;
module_param(force, int, 0);
MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");

+static const struct x86_cpu_id module_cpu_ids[] = {
+ X86_MATCH_FEATURE(X86_FEATURE_ANY, NULL),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
static int __init blowfish_init(void)
{
int err;

+ if (!x86_match_cpu(module_cpu_ids))
+ return -ENODEV;
+
if (!force && is_blacklisted_cpu()) {
printk(KERN_INFO
"blowfish-x86_64: performance on this CPU "
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c
index e7e4d64e9577..8e3ac5be7cf6 100644
--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
@@ -11,6 +11,7 @@
#include <linux/err.h>
#include <linux/module.h>
#include <linux/types.h>
+#include <asm/cpu_device_id.h>

#include "camellia.h"
#include "ecb_cbc_helpers.h"
@@ -98,12 +99,23 @@ static struct skcipher_alg camellia_algs[] = {
},
};

+static const struct x86_cpu_id module_cpu_ids[] = {
+ X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL),
+ X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
+ X86_MATCH_FEATURE(X86_FEATURE_AES, NULL),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)];

static int __init camellia_aesni_init(void)
{
const char *feature_name;

+ if (!x86_match_cpu(module_cpu_ids))
+ return -ENODEV;
+
if (!boot_cpu_has(X86_FEATURE_AVX) ||
!boot_cpu_has(X86_FEATURE_AVX2) ||
!boot_cpu_has(X86_FEATURE_AES) ||
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index c7ccf63e741e..54fcd86160ff 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -11,6 +11,7 @@
#include <linux/err.h>
#include <linux/module.h>
#include <linux/types.h>
+#include <asm/cpu_device_id.h>

#include "camellia.h"
#include "ecb_cbc_helpers.h"
@@ -98,12 +99,22 @@ static struct skcipher_alg camellia_algs[] = {
}
};

+static const struct x86_cpu_id module_cpu_ids[] = {
+ X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
+ X86_MATCH_FEATURE(X86_FEATURE_AES, NULL),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)];

static int __init camellia_aesni_init(void)
{
const char *feature_name;

+ if (!x86_match_cpu(module_cpu_ids))
+ return -ENODEV;
+
if (!boot_cpu_has(X86_FEATURE_AVX) ||
!boot_cpu_has(X86_FEATURE_AES) ||
!boot_cpu_has(X86_FEATURE_OSXSAVE)) {
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c
index d45e9c0c42ac..e21d2d5b68f9 100644
--- a/arch/x86/crypto/camellia_glue.c
+++ b/arch/x86/crypto/camellia_glue.c
@@ -1377,10 +1377,19 @@ static int force;
module_param(force, int, 0);
MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");

+static const struct x86_cpu_id module_cpu_ids[] = {
+ X86_MATCH_FEATURE(X86_FEATURE_ANY, NULL),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
static int __init camellia_init(void)
{
int err;

+ if (!x86_match_cpu(module_cpu_ids))
+ return -ENODEV;
+
if (!force && is_blacklisted_cpu()) {
printk(KERN_INFO
"camellia-x86_64: performance on this CPU "
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index 3976a87f92ad..bdc3c763334c 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -13,6 +13,7 @@
#include <linux/err.h>
#include <linux/module.h>
#include <linux/types.h>
+#include <asm/cpu_device_id.h>

#include "ecb_cbc_helpers.h"

@@ -93,12 +94,21 @@ static struct skcipher_alg cast5_algs[] = {
}
};

+static const struct x86_cpu_id module_cpu_ids[] = {
+ X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
static struct simd_skcipher_alg *cast5_simd_algs[ARRAY_SIZE(cast5_algs)];

static int __init cast5_init(void)
{
const char *feature_name;

+ if (!x86_match_cpu(module_cpu_ids))
+ return -ENODEV;
+
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
&feature_name)) {
pr_info("CPU feature '%s' is not supported.\n", feature_name);
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index 7e2aea372349..addca34b3511 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -15,6 +15,7 @@
#include <crypto/algapi.h>
#include <crypto/cast6.h>
#include <crypto/internal/simd.h>
+#include <asm/cpu_device_id.h>

#include "ecb_cbc_helpers.h"

@@ -93,12 +94,21 @@ static struct skcipher_alg cast6_algs[] = {
},
};

+static const struct x86_cpu_id module_cpu_ids[] = {
+ X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
static struct simd_skcipher_alg *cast6_simd_algs[ARRAY_SIZE(cast6_algs)];

static int __init cast6_init(void)
{
const char *feature_name;

+ if (!x86_match_cpu(module_cpu_ids))
+ return -ENODEV;
+
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
&feature_name)) {
pr_info("CPU feature '%s' is not supported.\n", feature_name);
diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index 0d7e172862db..7275cae3380d 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -13,6 +13,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sizes.h>
+#include <asm/cpu_device_id.h>
#include <asm/simd.h>

#define FPU_BYTES 4096U /* avoid kernel_fpu_begin/end scheduler/rcu stalls */
@@ -278,10 +279,17 @@ static struct skcipher_alg algs[] = {
},
};

+static const struct x86_cpu_id module_cpu_ids[] = {
+ X86_MATCH_FEATURE(X86_FEATURE_SSSE3, NULL),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
static int __init chacha_simd_mod_init(void)
{
- if (!boot_cpu_has(X86_FEATURE_SSSE3))
- return 0;
+ if (!x86_match_cpu(module_cpu_ids))
+ return -ENODEV;
+

static_branch_enable(&chacha_use_simd);

diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c
index d55fa9e9b9e6..7fe395dfa79d 100644
--- a/arch/x86/crypto/curve25519-x86_64.c
+++ b/arch/x86/crypto/curve25519-x86_64.c
@@ -12,7 +12,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/scatterlist.h>
-
+#include <asm/cpu_device_id.h>
#include <asm/cpufeature.h>
#include <asm/processor.h>

@@ -1697,9 +1697,19 @@ static struct kpp_alg curve25519_alg = {
.max_size = curve25519_max_size,
};

+static const struct x86_cpu_id module_cpu_ids[] = {
+ X86_MATCH_FEATURE(X86_FEATURE_ADX, NULL),
+ X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
+ X86_MATCH_FEATURE(X86_FEATURE_SSSE3, NULL),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);

static int __init curve25519_mod_init(void)
{
+ if (!x86_match_cpu(module_cpu_ids))
+ return -ENODEV;
+
if (boot_cpu_has(X86_FEATURE_BMI2) && boot_cpu_has(X86_FEATURE_ADX))
static_branch_enable(&curve25519_use_bmi2_adx);
else
diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c
index abb8b1fe123b..168cac5c6ca6 100644
--- a/arch/x86/crypto/des3_ede_glue.c
+++ b/arch/x86/crypto/des3_ede_glue.c
@@ -15,6 +15,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/types.h>
+#include <asm/cpu_device_id.h>

struct des3_ede_x86_ctx {
struct des3_ede_ctx enc;
@@ -354,10 +355,19 @@ static int force;
module_param(force, int, 0);
MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");

+static const struct x86_cpu_id module_cpu_ids[] = {
+ X86_MATCH_FEATURE(X86_FEATURE_ANY, NULL),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
static int __init des3_ede_x86_init(void)
{
int err;

+ if (!x86_match_cpu(module_cpu_ids))
+ return -ENODEV;
+
if (!force && is_blacklisted_cpu()) {
pr_info("des3_ede-x86_64: performance on this CPU would be suboptimal: disabling des3_ede-x86_64.\n");
return -ENODEV;
diff --git a/arch/x86/crypto/nhpoly1305-avx2-glue.c b/arch/x86/crypto/nhpoly1305-avx2-glue.c
index 59615ae95e86..a8046334ddca 100644
--- a/arch/x86/crypto/nhpoly1305-avx2-glue.c
+++ b/arch/x86/crypto/nhpoly1305-avx2-glue.c
@@ -11,6 +11,7 @@
#include <crypto/nhpoly1305.h>
#include <linux/module.h>
#include <linux/sizes.h>
+#include <asm/cpu_device_id.h>
#include <asm/simd.h>

#define FPU_BYTES 4096U /* avoid kernel_fpu_begin/end scheduler/rcu stalls */
@@ -57,8 +58,17 @@ static struct shash_alg nhpoly1305_alg = {
.descsize = sizeof(struct nhpoly1305_state),
};

+static const struct x86_cpu_id module_cpu_ids[] = {
+ X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
static int __init nhpoly1305_mod_init(void)
{
+ if (!x86_match_cpu(module_cpu_ids))
+ return -ENODEV;
+
if (!boot_cpu_has(X86_FEATURE_AVX2) ||
!boot_cpu_has(X86_FEATURE_OSXSAVE))
return -ENODEV;
diff --git a/arch/x86/crypto/nhpoly1305-sse2-glue.c b/arch/x86/crypto/nhpoly1305-sse2-glue.c
index bf91c375821a..cdbe5df00927 100644
--- a/arch/x86/crypto/nhpoly1305-sse2-glue.c
+++ b/arch/x86/crypto/nhpoly1305-sse2-glue.c
@@ -11,6 +11,7 @@
#include <crypto/nhpoly1305.h>
#include <linux/module.h>
#include <linux/sizes.h>
+#include <asm/cpu_device_id.h>
#include <asm/simd.h>

#define FPU_BYTES 4096U /* avoid kernel_fpu_begin/end scheduler/rcu stalls */
@@ -57,8 +58,17 @@ static struct shash_alg nhpoly1305_alg = {
.descsize = sizeof(struct nhpoly1305_state),
};

+static const struct x86_cpu_id module_cpu_ids[] = {
+ X86_MATCH_FEATURE(X86_FEATURE_XMM2, NULL),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
static int __init nhpoly1305_mod_init(void)
{
+ if (!x86_match_cpu(module_cpu_ids))
+ return -ENODEV;
+
if (!boot_cpu_has(X86_FEATURE_XMM2))
return -ENODEV;

diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index 3764301bdf1b..3e6ff505cd26 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -12,6 +12,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sizes.h>
+#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
#include <asm/simd.h>

@@ -260,8 +261,19 @@ static struct shash_alg alg = {
},
};

+static const struct x86_cpu_id module_cpu_ids[] = {
+ X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL),
+ X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
+ X86_MATCH_FEATURE(X86_FEATURE_AVX512F, NULL),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
static int __init poly1305_simd_mod_init(void)
{
+ if (!x86_match_cpu(module_cpu_ids))
+ return -ENODEV;
+
if (boot_cpu_has(X86_FEATURE_AVX) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
static_branch_enable(&poly1305_use_avx);
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
index 347e97f4b713..24741d33edaf 100644
--- a/arch/x86/crypto/serpent_avx2_glue.c
+++ b/arch/x86/crypto/serpent_avx2_glue.c
@@ -12,6 +12,7 @@
#include <crypto/algapi.h>
#include <crypto/internal/simd.h>
#include <crypto/serpent.h>
+#include <asm/cpu_device_id.h>

#include "serpent-avx.h"
#include "ecb_cbc_helpers.h"
@@ -94,12 +95,21 @@ static struct skcipher_alg serpent_algs[] = {
},
};

+static const struct x86_cpu_id module_cpu_ids[] = {
+ X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)];

static int __init serpent_avx2_init(void)
{
const char *feature_name;

+ if (!x86_match_cpu(module_cpu_ids))
+ return -ENODEV;
+
if (!boot_cpu_has(X86_FEATURE_AVX2) || !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
pr_info("AVX2 instructions are not detected.\n");
return -ENODEV;
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index 6c248e1ea4ef..0db18d99da50 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -15,6 +15,7 @@
#include <crypto/algapi.h>
#include <crypto/internal/simd.h>
#include <crypto/serpent.h>
+#include <asm/cpu_device_id.h>

#include "serpent-avx.h"
#include "ecb_cbc_helpers.h"
@@ -100,12 +101,21 @@ static struct skcipher_alg serpent_algs[] = {
},
};

+static const struct x86_cpu_id module_cpu_ids[] = {
+ X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)];

static int __init serpent_init(void)
{
const char *feature_name;

+ if (!x86_match_cpu(module_cpu_ids))
+ return -ENODEV;
+
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
&feature_name)) {
pr_info("CPU feature '%s' is not supported.\n", feature_name);
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index d78f37e9b2cf..5288441cc223 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -20,6 +20,7 @@
#include <crypto/b128ops.h>
#include <crypto/internal/simd.h>
#include <crypto/serpent.h>
+#include <asm/cpu_device_id.h>

#include "serpent-sse2.h"
#include "ecb_cbc_helpers.h"
@@ -103,10 +104,19 @@ static struct skcipher_alg serpent_algs[] = {
},
};

+static const struct x86_cpu_id module_cpu_ids[] = {
+ X86_MATCH_FEATURE(X86_FEATURE_XMM2, NULL),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)];

static int __init serpent_sse2_init(void)
{
+ if (!x86_match_cpu(module_cpu_ids))
+ return -ENODEV;
+
if (!boot_cpu_has(X86_FEATURE_XMM2)) {
printk(KERN_INFO "SSE2 instructions are not detected.\n");
return -ENODEV;
diff --git a/arch/x86/crypto/sm4_aesni_avx2_glue.c b/arch/x86/crypto/sm4_aesni_avx2_glue.c
index 84bc718f49a3..2e9fe76056b8 100644
--- a/arch/x86/crypto/sm4_aesni_avx2_glue.c
+++ b/arch/x86/crypto/sm4_aesni_avx2_glue.c
@@ -11,6 +11,7 @@
#include <linux/module.h>
#include <linux/crypto.h>
#include <linux/kernel.h>
+#include <asm/cpu_device_id.h>
#include <asm/simd.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
@@ -126,6 +127,14 @@ static struct skcipher_alg sm4_aesni_avx2_skciphers[] = {
}
};

+static const struct x86_cpu_id module_cpu_ids[] = {
+ X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL),
+ X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
+ X86_MATCH_FEATURE(X86_FEATURE_AES, NULL),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
static struct simd_skcipher_alg *
simd_sm4_aesni_avx2_skciphers[ARRAY_SIZE(sm4_aesni_avx2_skciphers)];

@@ -133,6 +142,9 @@ static int __init sm4_init(void)
{
const char *feature_name;

+ if (!x86_match_cpu(module_cpu_ids))
+ return -ENODEV;
+
if (!boot_cpu_has(X86_FEATURE_AVX) ||
!boot_cpu_has(X86_FEATURE_AVX2) ||
!boot_cpu_has(X86_FEATURE_AES) ||
diff --git a/arch/x86/crypto/sm4_aesni_avx_glue.c b/arch/x86/crypto/sm4_aesni_avx_glue.c
index 7800f77d68ad..f730822f203a 100644
--- a/arch/x86/crypto/sm4_aesni_avx_glue.c
+++ b/arch/x86/crypto/sm4_aesni_avx_glue.c
@@ -11,6 +11,7 @@
#include <linux/module.h>
#include <linux/crypto.h>
#include <linux/kernel.h>
+#include <asm/cpu_device_id.h>
#include <asm/simd.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
@@ -445,6 +446,13 @@ static struct skcipher_alg sm4_aesni_avx_skciphers[] = {
}
};

+static const struct x86_cpu_id module_cpu_ids[] = {
+ X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
+ X86_MATCH_FEATURE(X86_FEATURE_AES, NULL),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
static struct simd_skcipher_alg *
simd_sm4_aesni_avx_skciphers[ARRAY_SIZE(sm4_aesni_avx_skciphers)];

@@ -452,6 +460,9 @@ static int __init sm4_init(void)
{
const char *feature_name;

+ if (!x86_match_cpu(module_cpu_ids))
+ return -ENODEV;
+
if (!boot_cpu_has(X86_FEATURE_AVX) ||
!boot_cpu_has(X86_FEATURE_AES) ||
!boot_cpu_has(X86_FEATURE_OSXSAVE)) {
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index 3eb3440b477a..4657e6efc35d 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -15,6 +15,7 @@
#include <crypto/algapi.h>
#include <crypto/internal/simd.h>
#include <crypto/twofish.h>
+#include <asm/cpu_device_id.h>

#include "twofish.h"
#include "ecb_cbc_helpers.h"
@@ -103,12 +104,21 @@ static struct skcipher_alg twofish_algs[] = {
},
};

+static const struct x86_cpu_id module_cpu_ids[] = {
+ X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
static struct simd_skcipher_alg *twofish_simd_algs[ARRAY_SIZE(twofish_algs)];

static int __init twofish_init(void)
{
const char *feature_name;

+ if (!x86_match_cpu(module_cpu_ids))
+ return -ENODEV;
+
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, &feature_name)) {
pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c
index f9c4adc27404..ade98aef3402 100644
--- a/arch/x86/crypto/twofish_glue.c
+++ b/arch/x86/crypto/twofish_glue.c
@@ -43,6 +43,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/types.h>
+#include <asm/cpu_device_id.h>

asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
@@ -81,8 +82,17 @@ static struct crypto_alg alg = {
}
};

+static const struct x86_cpu_id module_cpu_ids[] = {
+ X86_MATCH_FEATURE(X86_FEATURE_ANY, NULL),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
static int __init twofish_glue_init(void)
{
+ if (!x86_match_cpu(module_cpu_ids))
+ return -ENODEV;
+
return crypto_register_alg(&alg);
}

diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
index 90454cf18e0d..790e5a59a9a7 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -11,6 +11,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/types.h>
+#include <asm/cpu_device_id.h>

#include "twofish.h"
#include "ecb_cbc_helpers.h"
@@ -140,8 +141,17 @@ static int force;
module_param(force, int, 0);
MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");

+static const struct x86_cpu_id module_cpu_ids[] = {
+ X86_MATCH_FEATURE(X86_FEATURE_ANY, NULL),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
static int __init twofish_3way_init(void)
{
+ if (!x86_match_cpu(module_cpu_ids))
+ return -ENODEV;
+
if (!force && is_blacklisted_cpu()) {
printk(KERN_INFO
"twofish-x86_64-3way: performance on this CPU "
--
2.37.3