[PATCH] x86/cpu/amd: Work-around for family 17h models 00h-0fh erratum 1019

From: Joshua Scott
Date: Tue Feb 07 2023 - 19:47:22 EST


Errata 1019 can result in a false-positive micro-op queue parity error,
which could result in a system reset.

Set the appropriate mask bit to disable detection of the affected error.

Signed-off-by: Joshua Scott <joshua.scott@xxxxxxxxxxxxxxxxxxx>
---
arch/x86/include/asm/msr-index.h | 2 ++
arch/x86/kernel/cpu/amd.c | 12 ++++++++++++
2 files changed, 14 insertions(+)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index d3fe82c5d6b6..3c8b95829838 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -527,6 +527,8 @@
#define MSR_AMD64_OSVW_STATUS 0xc0010141
#define MSR_AMD_PPIN_CTL 0xc00102f0
#define MSR_AMD_PPIN 0xc00102f1
+#define MSR_AMD64_DE_MC_CTRL_MASK 0xc0010403
+#define MSR_AMD64_DE_MC_CTRL_MASK_UOPQ_BIT 3
#define MSR_AMD64_CPUID_FN_1 0xc0011004
#define MSR_AMD64_LS_CFG 0xc0011020
#define MSR_AMD64_DC_CFG 0xc0011022
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f769d6d08b43..c329be80930f 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -29,6 +29,7 @@

static const int amd_erratum_383[];
static const int amd_erratum_400[];
+static const int amd_erratum_1019[];
static const int amd_erratum_1054[];
static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum);

@@ -995,6 +996,13 @@ static void init_amd(struct cpuinfo_x86 *c)
!cpu_has_amd_erratum(c, amd_erratum_1054))
msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT);

+ /*
+ * Turn off detection of micro-op queue parity errors on
+ * machines susceptible to erratum #1019.
+ */
+ if (cpu_has_amd_erratum(c, amd_erratum_1019))
+ msr_set_bit(MSR_AMD64_DE_MC_CTRL_MASK, MSR_AMD64_DE_MC_CTRL_MASK_UOPQ_BIT);
+
check_null_seg_clears_base(c);
}

@@ -1123,6 +1131,10 @@ static const int amd_erratum_400[] =
static const int amd_erratum_383[] =
AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf));

+/* #1019: A Thread May Spuriously Report a Micro-Op Queue Parity Error */
+static const int amd_erratum_1019[] =
+ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x0f, 0xf));
+
/* #1054: Instructions Retired Performance Counter May Be Inaccurate */
static const int amd_erratum_1054[] =
AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
--
2.39.1