Re: [PATCH 1/4] EDAC, MCE, AMD: Enable error decoding of Scalable MCA errors

From: Borislav Petkov
Date: Tue Feb 23 2016 - 07:38:01 EST


On Tue, Feb 16, 2016 at 03:45:08PM -0600, Aravind Gopalakrishnan wrote:
> For Scalable MCA enabled processors, errors are listed
> per IP block. And since it is not required for an IP to
> map to a particular bank, we need to use HWID and McaType
> values from the MCx_IPID register to figure out which IP
> a given bank represents.
>
> We also have a new bit (TCC) in the MCx_STATUS register
> to indicate Task context is corrupt.
>
> Add logic here to decode errors from all known IP
> blocks for Fam17h Model 00-0fh and to print TCC errors.
>
> Signed-off-by: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@xxxxxxx>
> ---
> arch/x86/include/asm/mce.h | 50 ++++++
> arch/x86/include/asm/msr-index.h | 2 +
> arch/x86/kernel/cpu/mcheck/mce_amd.c | 11 ++
> drivers/edac/mce_amd.c | 327 ++++++++++++++++++++++++++++++++++-
> 4 files changed, 389 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
> index 2ea4527..2ec67ac 100644
> --- a/arch/x86/include/asm/mce.h
> +++ b/arch/x86/include/asm/mce.h
> @@ -42,6 +42,17 @@
> /* AMD-specific bits */
> #define MCI_STATUS_DEFERRED (1ULL<<44) /* declare an uncorrected error */
> #define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */
> +#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */

\n

> +/*
> + * McaX field if set indicates a given bank supports MCA extensions:
> + * - Deferred error interrupt type is specifiable by bank
> + * - BlkPtr field indicates prescence of extended MISC registers.
^^^^^^^^^

Btw, that's MCi_MISC[BlkPtr] ?

Also, please integrate a spell checker into your patch creation
workflow.


> + * But should not be used to determine MSR numbers
> + * - TCC bit is present in MCx_STATUS

All sentences end with a "."

> + */
> +#define MCI_CONFIG_MCAX 0x1
> +#define MCI_IPID_MCATYPE 0xFFFF0000
> +#define MCI_IPID_HWID 0xFFF
>
> /*
> * Note that the full MCACOD field of IA32_MCi_STATUS MSR is
> @@ -287,4 +298,43 @@ struct cper_sec_mem_err;
> extern void apei_mce_report_mem_error(int corrected,
> struct cper_sec_mem_err *mem_err);
>
> +/*
> + * Enumerating new IP types and HWID values

Please stop using gerund, i.e., -ing, in comments and commit messages.

"Enumerate new IP ..." is just fine.

> + * in ScalableMCA enabled AMD processors
> + */
> +#ifdef CONFIG_X86_MCE_AMD
> +enum ip_types {

AMD-specific so "amd_ip_types"

> + F17H_CORE = 0, /* Core errors */
> + DF, /* Data Fabric */
> + UMC, /* Unified Memory Controller */
> + FUSE, /* FUSE subsystem */

What's FUSE subsystem?

In any case, this could use a different name in order not to confuse
with Linux's filesystem in userspace.

> + PSP, /* Platform Security Processor */
> + SMU, /* System Management Unit */
> + N_IP_TYPES
> +};
> +
> +struct hwid {

amd_hwid and so on. All below should have the "amd_" prefix so that it
is obvious.

> + const char *ipname;
> + unsigned int hwid_value;
> +};
> +
> +extern struct hwid hwid_mappings[N_IP_TYPES];


> +
> +enum core_mcatypes {
> + LS = 0, /* Load Store */
> + IF, /* Instruction Fetch */
> + L2_CACHE, /* L2 cache */
> + DE, /* Decoder unit */
> + RES, /* Reserved */
> + EX, /* Execution unit */
> + FP, /* Floating Point */
> + L3_CACHE /* L3 cache */
> +};
> +
> +enum df_mcatypes {
> + CS = 0, /* Coherent Slave */
> + PIE /* Power management, Interrupts, etc */
> +};
> +#endif

So all those are defined here but we have a header for exactly that
drivers/edac/mce_amd.h. And then you define and export hwid_mappings in
arch/x86/kernel/cpu/mcheck/mce_amd.c to not use it there.

Why isn't all this in drivers/edac/mce_amd.[ch] ?

And if it is there, then you obviously don't need the "amd_" prefix.

> +
> #endif /* _ASM_X86_MCE_H */
> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
> index 5523465..93bccbc 100644
> --- a/arch/x86/include/asm/msr-index.h
> +++ b/arch/x86/include/asm/msr-index.h
> @@ -266,7 +266,9 @@
>
> /* 'SMCA': AMD64 Scalable MCA */
> #define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004
> +#define MSR_AMD64_SMCA_MC0_IPID 0xc0002005
> #define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x))
> +#define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x))

Are those MSRs used in multiple files? If not, -> mce.h.

> #define MSR_P6_PERFCTR0 0x000000c1
> #define MSR_P6_PERFCTR1 0x000000c2
> diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> index 88de27b..8169103 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> @@ -71,6 +71,17 @@ static const char * const th_names[] = {
> "execution_unit",
> };
>
> +/* Defining HWID to IP type mappings for Scalable MCA */

" Define ..."

> +struct hwid hwid_mappings[] = {
> + [F17H_CORE] = { "f17h_core", 0xB0 },
> + [DF] = { "df", 0x2E },
> + [UMC] = { "umc", 0x96 },
> + [FUSE] = { "fuse", 0x5 },
> + [PSP] = { "psp", 0xFF },
> + [SMU] = { "smu", 0x1 },
> +};
> +EXPORT_SYMBOL_GPL(hwid_mappings);
> +
> static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
> static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
>
> diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
> index e3a945c..6e6b327 100644
> --- a/drivers/edac/mce_amd.c
> +++ b/drivers/edac/mce_amd.c
> @@ -147,6 +147,136 @@ static const char * const mc6_mce_desc[] = {
> "Status Register File",

...

> +static void decode_f17hcore_errors(u8 xec, unsigned int mca_type)
> +{
> + switch (mca_type) {
> + case LS:
> + if (xec == 0x4 || xec > (ARRAY_SIZE(f17h_ls_mce_desc) - 1))
> + goto wrong_f17hcore_error;
> +
> + pr_cont("%s.\n", f17h_ls_mce_desc[xec]);
> + break;
> +
> + case IF:
> + if (xec > (ARRAY_SIZE(f17h_if_mce_desc) - 1))
> + goto wrong_f17hcore_error;
> +
> + pr_cont("%s.\n", f17h_if_mce_desc[xec]);
> + break;
> +
> + case L2_CACHE:
> + if (xec > (ARRAY_SIZE(f17h_l2_mce_desc) - 1))
> + goto wrong_f17hcore_error;
> +
> + pr_cont("%s.\n", f17h_l2_mce_desc[xec]);
> + break;
> +
> + case DE:
> + if (xec > (ARRAY_SIZE(f17h_de_mce_desc) - 1))
> + goto wrong_f17hcore_error;
> +
> + pr_cont("%s.\n", f17h_de_mce_desc[xec]);
> + break;
> +
> + case EX:
> + if (xec > (ARRAY_SIZE(f17h_ex_mce_desc) - 1))
> + goto wrong_f17hcore_error;
> +
> + pr_cont("%s.\n", f17h_ex_mce_desc[xec]);
> + break;
> +
> + case FP:
> + if (xec > (ARRAY_SIZE(f17h_fp_mce_desc) - 1))
> + goto wrong_f17hcore_error;
> +
> + pr_cont("%s.\n", f17h_fp_mce_desc[xec]);
> + break;
> +
> + case L3_CACHE:
> + if (xec > (ARRAY_SIZE(f17h_l3_mce_desc) - 1))
> + goto wrong_f17hcore_error;
> +
> + pr_cont("%s.\n", f17h_l3_mce_desc[xec]);
> + break;
> +
> + default:
> + goto wrong_f17hcore_error;

That's a lot of repeated code. You can assign the desc array to a temp
variable depending on mca_type and do the if and pr_cont only once using
that temp variable.

> + }
> +
> + return;
> +
> +wrong_f17hcore_error:
> + pr_cont("Unrecognized error code from %s MCA bank\n",
> + (mca_type == L3_CACHE) ? "L3 Cache" : "F17h Core");
> +}
> +
> +static void decode_df_errors(u8 xec, unsigned int mca_type)
> +{
> + switch (mca_type) {
> + case CS:
> + if (xec > (ARRAY_SIZE(f17h_cs_mce_desc) - 1))
> + goto wrong_df_error;
> +
> + pr_cont("%s.\n", f17h_cs_mce_desc[xec]);
> + break;
> +
> + case PIE:
> + if (xec > (ARRAY_SIZE(f17h_pie_mce_desc) - 1))
> + goto wrong_df_error;
> +
> + pr_cont("%s.\n", f17h_pie_mce_desc[xec]);
> + break;

Ditto.

> +
> + default:
> + goto wrong_df_error;
> + }
> +
> + return;
> +
> +wrong_df_error:
> + pr_cont("Unrecognized error code from DF MCA bank\n");
> +}
> +
> +/* Decode errors according to Scalable MCA specification */
> +static void decode_smca_errors(struct mce *m)
> +{
> + u32 low, high;
> + u32 addr = MSR_AMD64_SMCA_MCx_IPID(m->bank);
> + unsigned int hwid, mca_type, i;
> + u8 xec = XEC(m->status, xec_mask);
> +
> + if (rdmsr_safe(addr, &low, &high)) {
> + pr_emerg("Unable to decode errors from banks\n");

That statement is not very useful in its current form.

> + return;
> + }
> +
> + hwid = high & MCI_IPID_HWID;
> + mca_type = (high & MCI_IPID_MCATYPE) >> 16;
> +
> + pr_emerg(HW_ERR "MC%d IPID value: 0x%08x%08x\n",
> + m->bank, high, low);
> +
> + /*
> + * Based on hwid and mca_type values,
> + * decode errors from respective IPs.
> + * Note: mca_type values make sense only
> + * in the context of an hwid
> + */
> + for (i = 0; i < ARRAY_SIZE(hwid_mappings); i++)

So you use those hwid_mappings here. Why aren't they defined here too?

> + if (hwid_mappings[i].hwid_value == hwid)
> + break;
> +
> + switch (i) {
> + case F17H_CORE:
> + pr_emerg(HW_ERR "%s Error: ",
> + (mca_type == L3_CACHE) ? "L3 Cache" : "F17h Core");
> + decode_f17hcore_errors(xec, mca_type);
> + break;
> +
> + case DF:
> + pr_emerg(HW_ERR "DF Error: ");
> + decode_df_errors(xec, mca_type);
> + break;
> +
> + case UMC:
> + pr_emerg(HW_ERR "UMC Error: ");
> + if (xec > (ARRAY_SIZE(f17h_umc_mce_desc) - 1)) {
> + pr_cont("Unrecognized error code from UMC MCA bank\n");
> + return;
> + }
> + pr_cont("%s.\n", f17h_umc_mce_desc[xec]);
> + break;
> +
> + case FUSE:
> + pr_emerg(HW_ERR "FUSE Error: ");
> + if (xec > (ARRAY_SIZE(f17h_fuse_mce_desc) - 1)) {
> + pr_cont("Unrecognized error code from FUSE MCA bank\n");
> + return;
> + }
> + pr_cont("%s.\n", f17h_fuse_mce_desc[xec]);
> + break;
> +
> + case PSP:
> + pr_emerg(HW_ERR "PSP Error: ");
> + if (xec > (ARRAY_SIZE(f17h_psp_mce_desc) - 1)) {
> + pr_cont("Unrecognized error code from PSP MCA bank\n");
> + return;
> + }
> + pr_cont("%s.\n", f17h_psp_mce_desc[xec]);
> + break;
> +
> + case SMU:
> + pr_emerg(HW_ERR "SMU Error: ");
> + if (xec > (ARRAY_SIZE(f17h_smu_mce_desc) - 1)) {
> + pr_cont("Unrecognized error code from SMU MCA bank\n");
> + return;
> + }
> + pr_cont("%s.\n", f17h_smu_mce_desc[xec]);
> + break;

Also a lot of repeated code which could be simplified.

> +
> + default:
> + pr_emerg(HW_ERR "HWID:%d does not match any existing IPs\n", hwid);
> + }
> +}
> +
> static const char *decode_error_status(struct mce *m)
> {
> if (m->status & MCI_STATUS_UC) {
> @@ -769,11 +1071,21 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
> ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"),
> ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"));
>
> - if (c->x86 == 0x15 || c->x86 == 0x16)
> + if (c->x86 >= 0x15)
> pr_cont("|%s|%s",
> ((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
> ((m->status & MCI_STATUS_POISON) ? "Poison" : "-"));
>
> + if (mce_flags.smca) {

ERROR: "mce_flags" [drivers/edac/edac_mce_amd.ko] undefined!
make[1]: *** [__modpost] Error 1
make: *** [modules] Error 2
make: *** Waiting for unfinished jobs....

Just read CPUID again here instead of exporting mce_flags.

> + u32 smca_low, smca_high;


> + u32 smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);

s/smca_//

Also, all those MSRs should be defined in drivers/edac/mce_amd.h if not
used outside.

> +
> + if (!rdmsr_safe(smca_addr, &smca_low, &smca_high) &&
> + (smca_low & MCI_CONFIG_MCAX))
> + pr_cont("|%s",

No need for that break here.

> + ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
> + }
> +
> /* do the two bits[14:13] together */
> ecc = (m->status >> 45) & 0x3;
> if (ecc)
> @@ -784,6 +1096,11 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
> if (m->status & MCI_STATUS_ADDRV)
> pr_emerg(HW_ERR "MC%d Error Address: 0x%016llx\n", m->bank, m->addr);
>
> + if (mce_flags.smca) {
> + decode_smca_errors(m);
> + goto err_code;
> + }
> +
> if (!fam_ops)
> goto err_code;
>
> @@ -888,6 +1205,14 @@ static int __init mce_amd_init(void)
> fam_ops->mc2_mce = f16h_mc2_mce;
> break;
>
> + case 0x17:
> + xec_mask = 0x3f;
> + if (!mce_flags.smca) {
> + printk(KERN_WARNING "Decoding supported only on Scalable MCA enabled processors\n");

What is that supposed to do? I thought all new ones will have SMCA...

> + return 0;
> + }
> + break;
> +
> default:
> printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
> kfree(fam_ops);
> --
> 2.7.0
>
>

--
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply.