[PATCH v3 3/4] x86/mce, EDAC/mce_amd: Cache MCA_CONFIG[McaX] in struct mce_bank

From: Smita Koralahalli
Date: Fri Feb 11 2022 - 17:35:40 EST


Cache the value of MCA_CONFIG[McaX] in the existing mce_bank struct
similar to MCA_CONFIG[McaLsbInStatusSupported].

This simplifies and eliminates the need to read MCA_CONFIG register each
time to check McaX.

Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@xxxxxxx>
Reviewed-by: Yazen Ghannam <yazen.ghannam@xxxxxxx>
---
arch/x86/include/asm/mce.h | 10 ++--------
arch/x86/kernel/cpu/mce/amd.c | 24 ++++++++++++++++++++----
arch/x86/kernel/cpu/mce/internal.h | 13 ++++++++++++-
arch/x86/kernel/cpu/mce/severity.c | 6 +-----
drivers/edac/mce_amd.c | 5 +----
5 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index cc67c74e8b46..bb2d45b0bb89 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -50,14 +50,6 @@
#define MCI_STATUS_POISON BIT_ULL(43) /* access poisonous data */
#define MCI_STATUS_SCRUB BIT_ULL(40) /* Error detected during scrub operation */

-/*
- * McaX field if set indicates a given bank supports MCA extensions:
- * - Deferred error interrupt type is specifiable by bank.
- * - MCx_MISC0[BlkPtr] field indicates presence of extended MISC registers,
- * But should not be used to determine MSR numbers.
- * - TCC bit is present in MCx_STATUS.
- */
-#define MCI_CONFIG_MCAX 0x1
#define MCI_IPID_MCATYPE 0xFFFF0000
#define MCI_IPID_HWID 0xFFF

@@ -339,6 +331,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c);
enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank);
void smca_extract_err_addr(struct mce *m);
void smca_feature_init(void);
+bool smca_config_get_mcax(struct mce *m);
#else

static inline int mce_threshold_create_device(unsigned int cpu) { return 0; };
@@ -347,6 +340,7 @@ static inline bool amd_mce_is_memory_error(struct mce *m) { return false; };
static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
static inline void smca_extract_err_addr(struct mce *m) { }
static inline void smca_feature_init(void) { }
+static inline bool smca_config_get_mcax(struct mce *m) { return false; };
#endif

static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); }
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index ed75d4bd2aff..4ec644611f33 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -257,10 +257,7 @@ static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu)
* For SMCA enabled processors, BLKPTR field of the first MISC register
* (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
*/
- if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
- return;
-
- if (!(low & MCI_CONFIG_MCAX))
+ if (!(this_cpu_ptr(mce_banks_array)[bank].mcax))
return;

if (rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high))
@@ -735,6 +732,24 @@ void smca_extract_err_addr(struct mce *m)
}
}

+bool smca_config_get_mcax(struct mce *m)
+{
+ struct mce_bank *mce_banks;
+
+ /*
+ * Check if the bank number is valid. It could be possible for the
+ * user to input unavailable bank number when doing SW error injection
+ * or to get an invalid bank number like with APEI memory handling.
+ */
+ if (m->bank >= per_cpu(mce_num_banks, m->extcpu))
+ return false;
+
+ mce_banks = per_cpu_ptr(mce_banks_array, m->extcpu);
+
+ return mce_banks[m->bank].mcax;
+}
+EXPORT_SYMBOL_GPL(smca_config_get_mcax);
+
void smca_feature_init(void)
{
unsigned int bank;
@@ -743,6 +758,7 @@ void smca_feature_init(void)
for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
rdmsrl(MSR_AMD64_SMCA_MCx_CONFIG(bank), mca_cfg);
this_cpu_ptr(mce_banks_array)[bank].lsb_in_status = !!(mca_cfg & BIT(8));
+ this_cpu_ptr(mce_banks_array)[bank].mcax = !!(mca_cfg & BIT(0));
}
}

diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 39dc37052cb9..422387f8699d 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -184,7 +184,18 @@ struct mce_bank {
* the LSB field is found in MCA_STATUS, when set.
*/
__u64 lsb_in_status : 1,
- __reserved_1 : 63;
+
+ /*
+ * (AMD) MCA_CONFIG[McaX]: This bit when set indicates a given
+ * bank supports MCA extensions:
+ * - Deferred error interrupt type is specifiable by bank.
+ * - MCx_MISC0[BlkPtr] field indicates presence of extended MISC registers,
+ * But should not be used to determine MSR numbers.
+ * - TCC bit is present in MCx_STATUS.
+ */
+ mcax : 1,
+
+ __reserved_1 : 62;
};

DECLARE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index 7aa2bda93cbb..95030a162f61 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -303,8 +303,6 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)

static int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
{
- u64 mcx_cfg;
-
/*
* We need to look at the following bits:
* - "succor" bit (data poisoning support), and
@@ -314,10 +312,8 @@ static int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
if (!mce_flags.succor)
return MCE_PANIC_SEVERITY;

- mcx_cfg = mce_rdmsrl(MSR_AMD64_SMCA_MCx_CONFIG(m->bank));
-
/* TCC (Task context corrupt). If set and if IN_KERNEL, panic. */
- if ((mcx_cfg & MCI_CONFIG_MCAX) &&
+ if ((smca_config_get_mcax(m)) &&
(m->status & MCI_STATUS_TCC) &&
(err_ctx == IN_KERNEL))
return MCE_PANIC_SEVERITY;
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index cc5c63feb26a..415201ce35c7 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -1254,11 +1254,8 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
((m->status & MCI_STATUS_PCC) ? "PCC" : "-"));

if (boot_cpu_has(X86_FEATURE_SMCA)) {
- u32 low, high;
- u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);

- if (!rdmsr_safe(addr, &low, &high) &&
- (low & MCI_CONFIG_MCAX))
+ if (smca_config_get_mcax(m))
pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));

pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-"));
--
2.17.1