[PATCH] EDAC/amd64: Include MCA error codes in EDAC message

From: Yazen Ghannam
Date: Wed Jun 22 2022 - 12:08:46 EST


The AMD64 EDAC module does not include MCA information in its output.
Users and tooling that gather memory error information only from EDAC
will lose the MCA information.

Print the ErrorCode and ErrorCodeExt fields from MCA_STATUS as part of
the EDAC message, so that relevant memory error information is available
from a single source.

Signed-off-by: Yazen Ghannam <yazen.ghannam@xxxxxxx>
---
drivers/edac/amd64_edac.c | 11 ++++++++++-
drivers/edac/amd64_edac.h | 2 ++
2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 2f854feeeb23..7905cfd34cd0 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -3168,11 +3168,15 @@ static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome)
return map_err_sym_to_channel(err_sym, pvt->ecc_sym_sz);
}

+#define MSG_SIZE 1024
+static char msg[MSG_SIZE];
+
static void __log_ecc_error(struct mem_ctl_info *mci, struct err_info *err,
u8 ecc_type)
{
enum hw_event_mc_err_type err_type;
const char *string;
+ int len;

if (ecc_type == 2)
err_type = HW_EVENT_ERR_CORRECTED;
@@ -3209,10 +3213,12 @@ static void __log_ecc_error(struct mem_ctl_info *mci, struct err_info *err,
break;
}

+ len = snprintf(msg, MSG_SIZE, "err_code:0x%04x:0x%04x", err->xec, err->ec);
+
edac_mc_handle_error(err_type, mci, 1,
err->page, err->offset, err->syndrome,
err->csrow, err->channel, -1,
- string, "");
+ string, msg);
}

static inline void decode_bus_error(int node_id, struct mce *m)
@@ -3281,6 +3287,9 @@ static void decode_umc_error(int node_id, struct mce *m)

memset(&err, 0, sizeof(err));

+ err.ec = EC(m->status);
+ err.xec = XEC(m->status, 0x3f);
+
if (m->status & MCI_STATUS_DEFERRED)
ecc_type = 3;

diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 38e5ad95d010..a49d797b7322 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -422,6 +422,8 @@ struct err_info {
struct mem_ctl_info *src_mci;
int csrow;
int channel;
+ u16 ec;
+ u16 xec;
u16 syndrome;
u32 page;
u32 offset;
--
2.25.1