Re: [PATCH v2] x86/mce: work around an erratum on fast string copy instructions.

From: Borislav Petkov
Date: Fri Feb 18 2022 - 10:07:16 EST


On Thu, Feb 17, 2022 at 05:32:09PM -0800, Jue Wang wrote:
> +static noinstr bool quirk_skylake_repmov(void)
> +{
> + u64 mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
> + u64 misc_enable = __rdmsr(MSR_IA32_MISC_ENABLE);
> +
> + /*
> + * Apply the quirk only to local machine checks, i.e., no broadcast
> + * sync is needed.
> + */
> + if ((mcgstatus & MCG_STATUS_LMCES) &&
> + unlikely(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) {
> + u64 mc1_status = mce_rdmsrl(MSR_IA32_MCx_STATUS(1));
> +
> + /* Check for a software-recoverable data fetch error. */
> + if ((mc1_status &
> + (MCI_STATUS_VAL | MCI_STATUS_OVER | MCI_STATUS_UC | MCI_STATUS_EN |
> + MCI_STATUS_ADDRV | MCI_STATUS_MISCV | MCI_STATUS_PCC |
> + MCI_STATUS_AR | MCI_STATUS_S)) ==
> + (MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
> + MCI_STATUS_ADDRV | MCI_STATUS_MISCV |
> + MCI_STATUS_AR | MCI_STATUS_S)) {
> + misc_enable &= ~MSR_IA32_MISC_ENABLE_FAST_STRING;
> + __wrmsr(MSR_IA32_MISC_ENABLE,
> + (u32)misc_enable, (u32)(misc_enable >> 32));

"You're going to have to use the mce_{rd,wr}msrl() routines."

I actually really meant that.

And I went and simplified this a bit more so that it is more readable,
diff ontop.

Also, Tony, I think the clearing of MCG_STATUS should happen last.

---
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index c1a41da99975..1741be9b9464 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -831,34 +831,35 @@ quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
*/
static noinstr bool quirk_skylake_repmov(void)
{
- u64 mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
- u64 misc_enable = __rdmsr(MSR_IA32_MISC_ENABLE);
+ u64 mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
+ u64 misc_enable = mce_rdmsrl(MSR_IA32_MISC_ENABLE);
+ u64 mc1_status;

/*
* Apply the quirk only to local machine checks, i.e., no broadcast
* sync is needed.
*/
- if ((mcgstatus & MCG_STATUS_LMCES) &&
- (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) {
- u64 mc1_status = mce_rdmsrl(MSR_IA32_MCx_STATUS(1));
-
- /* Check for a software-recoverable data fetch error. */
- if ((mc1_status &
- (MCI_STATUS_VAL | MCI_STATUS_OVER | MCI_STATUS_UC | MCI_STATUS_EN |
- MCI_STATUS_ADDRV | MCI_STATUS_MISCV | MCI_STATUS_PCC |
- MCI_STATUS_AR | MCI_STATUS_S)) ==
- (MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
- MCI_STATUS_ADDRV | MCI_STATUS_MISCV |
- MCI_STATUS_AR | MCI_STATUS_S)) {
- misc_enable &= ~MSR_IA32_MISC_ENABLE_FAST_STRING;
- __wrmsr(MSR_IA32_MISC_ENABLE,
- (u32)misc_enable, (u32)(misc_enable >> 32));
- mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
- mce_wrmsrl(MSR_IA32_MCx_STATUS(1), 0);
- pr_err_once("Errata detected, disable fast string copy instructions.\n");
- return true;
- }
+ if (!(mcgstatus & MCG_STATUS_LMCES) ||
+ !(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING))
+ return false;
+
+ mc1_status = mce_rdmsrl(MSR_IA32_MCx_STATUS(1));
+
+ /* Check for a software-recoverable data fetch error. */
+ if ((mc1_status &
+ (MCI_STATUS_VAL | MCI_STATUS_OVER | MCI_STATUS_UC | MCI_STATUS_EN |
+ MCI_STATUS_ADDRV | MCI_STATUS_MISCV | MCI_STATUS_PCC |
+ MCI_STATUS_AR | MCI_STATUS_S)) ==
+ (MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
+ MCI_STATUS_ADDRV | MCI_STATUS_MISCV |
+ MCI_STATUS_AR | MCI_STATUS_S)) {
+ misc_enable &= ~MSR_IA32_MISC_ENABLE_FAST_STRING;
+ mce_wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
+ mce_wrmsrl(MSR_IA32_MCx_STATUS(1), 0);
+ pr_err_once("Erratum detected, disable fast string copy instructions.\n");
+ return true;
}
+
return false;
}

@@ -1432,7 +1433,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
return unexpected_machine_check(regs);

if (mce_flags.skx_repmov_quirk && quirk_skylake_repmov())
- return;
+ goto clear;

/*
* Establish sequential order between the CPUs entering the machine
@@ -1576,6 +1577,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
out:
instrumentation_end();

+clear:
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
}
EXPORT_SYMBOL_GPL(do_machine_check);

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette