[PATCH 2/2] x86/mce: Add support for Extended Physical Address MCA changes

From: Smita Koralahalli
Date: Thu Jun 24 2021 - 21:34:26 EST


Newer AMD processors such as AMD 'Milan' support more physical address
bits.

That is the MCA_ADDR registers on Scalable MCA systems contain the
ErrorAddr in bits [56:0] instead of [55:0]. Hence the existing LSB field
from bits [61:56] in MCA_ADDR must be moved around to accommodate the
larger ErrorAddr size.

MCA_CONFIG[McaLsbInStatusSupported] indicates this change. If set, the
LSB field will be found in MCA_STATUS rather than MCA_ADDR.

Each logical CPU has unique MCA bank in hardware and is not shared with
other logical CPUs. Additionally on SMCA systems, each feature bit may be
different for each bank within same logical CPU.

Check for MCA_CONFIG[McaLsbInStatusSupported] for each MCA bank and for
each CPU.

Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@xxxxxxx>
---
v2:
Declared lsb_in_status in existing mce_banks[] struct.
Moved struct mce_banks[] declaration from core.c -> internal.h
---
arch/x86/include/asm/mce.h | 2 ++
arch/x86/kernel/cpu/mce/amd.c | 25 +++++++++++++++++++------
arch/x86/kernel/cpu/mce/core.c | 12 +++---------
arch/x86/kernel/cpu/mce/internal.h | 14 ++++++++++++++
4 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 0a1c7224a582..33c5e77cf924 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -358,6 +358,7 @@ extern int mce_threshold_remove_device(unsigned int cpu);
void mce_amd_feature_init(struct cpuinfo_x86 *c);
int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr);
void smca_extract_err_addr(struct mce *m);
+void smca_feature_init(void);

#else

@@ -368,6 +369,7 @@ static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
static inline int
umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return -EINVAL; };
static inline void smca_extract_err_addr(struct mce *m) { }
+static inline void smca_feature_init(void) { }
#endif

static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); }
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index f71435e53cdb..5e0819de641f 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -901,9 +901,26 @@ bool amd_mce_is_memory_error(struct mce *m)

void smca_extract_err_addr(struct mce *m)
{
- u8 lsb = (m->addr >> 56) & 0x3f;
+ if (this_cpu_ptr(mce_banks_array)[m->bank].lsb_in_status) {
+ u8 lsb = (m->status >> 24) & 0x3f;

- m->addr &= GENMASK_ULL(55, lsb);
+ m->addr &= GENMASK_ULL(56, lsb);
+ } else {
+ u8 lsb = (m->addr >> 56) & 0x3f;
+
+ m->addr &= GENMASK_ULL(55, lsb);
+ }
+}
+
+void smca_feature_init(void)
+{
+ unsigned int bank;
+ u64 mca_cfg;
+
+ for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
+ rdmsrl(MSR_AMD64_SMCA_MCx_CONFIG(bank), mca_cfg);
+ this_cpu_ptr(mce_banks_array)[bank].lsb_in_status = !!(mca_cfg & BIT(8));
+ }
}

static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
@@ -920,10 +937,6 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
if (m.status & MCI_STATUS_ADDRV) {
m.addr = addr;

- /*
- * Extract [55:<lsb>] where lsb is the least significant
- * *valid* bit of the address bits.
- */
if (mce_flags.smca)
smca_extract_err_addr(&m);
}
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 2c09c1eec50a..f3be82acce67 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -67,11 +67,7 @@ DEFINE_PER_CPU(unsigned, mce_exception_count);

DEFINE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks);

-struct mce_bank {
- u64 ctl; /* subevents to enable */
- bool init; /* initialise bank? */
-};
-static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);
+DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);

#define ATTR_LEN 16
/* One object for each MCE bank, shared by all CPUs */
@@ -699,10 +695,6 @@ static void mce_read_aux(struct mce *m, int i)
m->addr <<= shift;
}

- /*
- * Extract [55:<lsb>] where lsb is the least significant
- * *valid* bit of the address bits.
- */
if (mce_flags.smca)
smca_extract_err_addr(m);
}
@@ -1839,6 +1831,8 @@ static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c)
msr_ops.status = smca_status_reg;
msr_ops.addr = smca_addr_reg;
msr_ops.misc = smca_misc_reg;
+
+ smca_feature_init();
}
}
}
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 88dcc79cfb07..37b76a726c29 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -168,6 +168,20 @@ struct mce_vendor_flags {

extern struct mce_vendor_flags mce_flags;

+struct mce_bank {
+ u64 ctl; /* subevents to enable */
+ bool init; /* initialise bank? */
+
+ /*
+ * (AMD) MCA_CONFIG[McaLsbInStatusSupported]: This bit indicates
+ * the LSB field is found in MCA_STATUS, when set.
+ */
+ __u64 lsb_in_status : 1,
+ __reserved_1 : 63;
+};
+
+DECLARE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);
+
struct mca_msr_regs {
u32 (*ctl) (int bank);
u32 (*status) (int bank);
--
2.17.1