[PATCH 4/6] x86: KVM: Enable AVX-VNNI-INT8 CPUID and expose it to guest

From: Jiaxi Chen
Date: Wed Oct 19 2022 - 04:56:46 EST


AVX-VNNI-INT8 is a new set of instructions in the latest Intel platform
Sierra Forest. It multiplies the individual bytes of two unsigned or
unsigned source operands, then add and accumulate the results into the
destination dword element size operand. This instruction allows for the
platform to have superior AI capabilities.

The bit definition:
CPUID.(EAX=7,ECX=1):EDX[bit 4]

This patch enables this CPUID in the kernel feature bits and expose it to
guest OS. Since the CPUID involves a bit of EDX (EAX=7,ECX=1) which has not
been enumerated yet, this patch adds CPUID_7_1_EDX to CPUID subleaves. At
the same time, word 20 is newly-defined in CPU features for CPUID level
0x00000007:1 (EDX).

Signed-off-by: Jiaxi Chen <jiaxi.chen@xxxxxxxxxxxxxxx>
---
arch/x86/include/asm/cpufeature.h | 7 +++++--
arch/x86/include/asm/cpufeatures.h | 5 ++++-
arch/x86/include/asm/disabled-features.h | 3 ++-
arch/x86/include/asm/required-features.h | 3 ++-
arch/x86/kernel/cpu/common.c | 1 +
arch/x86/kvm/cpuid.c | 5 ++++-
arch/x86/kvm/reverse_cpuid.h | 1 +
7 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 1a85e1fb0922..d46b802930b0 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -32,6 +32,7 @@ enum cpuid_leafs
CPUID_8000_0007_EBX,
CPUID_7_EDX,
CPUID_8000_001F_EAX,
+ CPUID_7_1_EDX,
};

#define X86_CAP_FMT_NUM "%d:%d"
@@ -94,8 +95,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \
REQUIRED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 20))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 21))

#define DISABLED_MASK_BIT_SET(feature_bit) \
( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
@@ -118,8 +120,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \
DISABLED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 20))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 21))

#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index a682f646243f..b2aa761ea110 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -13,7 +13,7 @@
/*
* Defines x86 CPU feature bits
*/
-#define NCAPINTS 20 /* N 32-bit words worth of info */
+#define NCAPINTS 21 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */

/*
@@ -423,6 +423,9 @@
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */
#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */

+/* Intel-defined CPU features, CPUID level 0x00000007:1 (EDX), word 20 */
+#define X86_FEATURE_AVX_VNNI_INT8 (20*32+ 4) /* Support for VPDPB[SU,UU,SS]D[,S] */
+
/*
* BUG word(s)
*/
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 33d2cd04d254..2db6929ca868 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -111,6 +111,7 @@
#define DISABLED_MASK17 0
#define DISABLED_MASK18 0
#define DISABLED_MASK19 0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20)
+#define DISABLED_MASK20 0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)

#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index aff774775c67..3b5522b3f051 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -98,6 +98,7 @@
#define REQUIRED_MASK17 0
#define REQUIRED_MASK18 0
#define REQUIRED_MASK19 0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20)
+#define REQUIRED_MASK20 0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)

#endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 3e508f239098..7c659127ed89 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1031,6 +1031,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
if (eax >= 1) {
cpuid_count(0x00000007, 1, &eax, &ebx, &ecx, &edx);
c->x86_capability[CPUID_7_1_EAX] = eax;
+ c->x86_capability[CPUID_7_1_EDX] = edx;
}
}

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 837bcd1373e5..b1b53a5c788a 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -660,6 +660,9 @@ void kvm_set_cpu_caps(void)
F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) | F(AMX_FP16) |
F(AVX_IFMA));

+ kvm_cpu_cap_mask(CPUID_7_1_EDX,
+ F(AVX_VNNI_INT8));
+
kvm_cpu_cap_mask(CPUID_D_1_EAX,
F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) | f_xfd
);
@@ -913,9 +916,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
goto out;

cpuid_entry_override(entry, CPUID_7_1_EAX);
+ cpuid_entry_override(entry, CPUID_7_1_EDX);
entry->ebx = 0;
entry->ecx = 0;
- entry->edx = 0;
}
break;
case 0xa: { /* Architectural Performance Monitoring */
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index a19d473d0184..fb4f4bffad5c 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -48,6 +48,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_7_1_EAX] = { 7, 1, CPUID_EAX},
[CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX},
[CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX},
+ [CPUID_7_1_EDX] = { 7, 1, CPUID_EDX},
};

/*
--
2.27.0