[PATCH v4 1/2] x86/mce: Check for writes ignored in MCA_STATUS register

From: Smita Koralahalli
Date: Mon Feb 14 2022 - 18:37:11 EST


According to Section 2.1.16.3 under HWCR[McStatusWrEn] in "PPR for AMD
Family 19h, Model 01h, Revision B1 Processors - 55898 Rev 0.35 - Feb 5,
2021", the status register may sometimes enforce write ignored behavior
independent of the value of HWCR[McStatusWrEn] depending on the platform
settings.

Hence, evaluate for writes ignored for MCA_STATUS before doing error
injection. If true, return the appropriate error code to userspace.

Make mca_msr_reg exportable in order to be accessible from mce-inject
module.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537
Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@xxxxxxx>
Reviewed-by: Yazen Ghannam <yazen.ghannam@xxxxxxx>
---
Link:
https://lkml.kernel.org/r/20211104215846.254012-1-Smita.KoralahalliChannabasappa@xxxxxxx

v2:
msr_ops -> mca_msr_reg().
simulation -> injection.
pr_info() -> pr_err().
Aligned on ",".
v3:
Removed "x86/mce: Use mca_msr_reg() in prepare_msrs()" patch.
and made changes on the existing MCx_{STATUS, ADDR, MISC} macros.
v4:
Simplified the code by just checking for writes ignored behavior in
MCA_STATUS register.
Introduced prepare_mca_status() and performed writes ignored checks
inside the function.
Rephrased error message.
---
arch/x86/kernel/cpu/mce/core.c | 1 +
arch/x86/kernel/cpu/mce/inject.c | 37 +++++++++++++++++++++++++++++---
2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 4f1e825033ce..4ddad8082989 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -188,6 +188,7 @@ u32 mca_msr_reg(int bank, enum mca_msr reg)

return 0;
}
+EXPORT_SYMBOL_GPL(mca_msr_reg);

static void __print_mce(struct mce *m)
{
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 5fbd7ffb3233..43ba63b7dc73 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -470,11 +470,36 @@ static void toggle_nb_mca_mst_cpu(u16 nid)
__func__, PCI_FUNC(F3->devfn), NBCFG);
}

+struct mce_err_handler {
+ struct mce *mce;
+ int err;
+};
+
+static struct mce_err_handler mce_err;
+
+static bool prepare_mca_status(struct mce *m)
+{
+ u32 status_reg = mca_msr_reg(m->bank, MCA_STATUS);
+ u64 status_val = m->status;
+
+ wrmsrl(status_reg, status_val);
+ rdmsrl(status_reg, status_val);
+
+ return status_val;
+}
+
static void prepare_msrs(void *info)
{
- struct mce m = *(struct mce *)info;
+ struct mce_err_handler *i_mce_err = ((struct mce_err_handler *)info);
+ struct mce m = *i_mce_err->mce;
u8 b = m.bank;

+ if (!prepare_mca_status(&m)) {
+ pr_err("Platform does not allow error injection, try using APEI EINJ instead.\n");
+ i_mce_err->err = -EINVAL;
+ return;
+ }
+
wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);

if (boot_cpu_has(X86_FEATURE_SMCA)) {
@@ -501,6 +526,9 @@ static void do_inject(void)
unsigned int cpu = i_mce.extcpu;
u8 b = i_mce.bank;

+ mce_err.mce = &i_mce;
+ mce_err.err = 0;
+
i_mce.tsc = rdtsc_ordered();

i_mce.status |= MCI_STATUS_VAL;
@@ -552,10 +580,13 @@ static void do_inject(void)

i_mce.mcgstatus = mcg_status;
i_mce.inject_flags = inj_type;
- smp_call_function_single(cpu, prepare_msrs, &i_mce, 0);
+ smp_call_function_single(cpu, prepare_msrs, &mce_err, 0);

toggle_hw_mce_inject(cpu, false);

+ if (mce_err.err)
+ goto err;
+
switch (inj_type) {
case DFR_INT_INJ:
smp_call_function_single(cpu, trigger_dfr_int, NULL, 0);
@@ -624,7 +655,7 @@ static int inj_bank_set(void *data, u64 val)
/* Reset injection struct */
setup_inj_struct(&i_mce);

- return 0;
+ return mce_err.err;
}

MCE_INJECT_GET(bank);
--
2.17.1