Re: [PATCH v4 1/1] EDAC/altera: Check previous DDR DBE during driver probe

From: Dinh Nguyen
Date: Thu Jun 15 2023 - 14:54:11 EST




On 6/14/23 21:25, niravkumar.l.rabara@xxxxxxxxx wrote:
From: Niravkumar L Rabara <niravkumar.l.rabara@xxxxxxxxx>

Add DDR DBE check during driver probe to notify user if previous
reboot cause by DDR DBE and print DBE error related information.

Signed-off-by: Niravkumar L Rabara <niravkumar.l.rabara@xxxxxxxxx>
---
drivers/edac/altera_edac.c | 29 ++++++++++++++++----
include/linux/firmware/intel/stratix10-smc.h | 20 ++++++++++++++
2 files changed, 44 insertions(+), 5 deletions(-)

diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 8b31cd54bdb6..04c0675adc8c 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -2159,6 +2159,7 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
#ifdef CONFIG_64BIT
{
int dberror, err_addr;
+ struct arm_smccc_res result;
edac->panic_notifier.notifier_call = s10_edac_dberr_handler;
atomic_notifier_chain_register(&panic_notifier_list,
@@ -2168,11 +2169,29 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
regmap_read(edac->ecc_mgr_map, S10_SYSMGR_UE_VAL_OFST,
&dberror);
if (dberror) {
- regmap_read(edac->ecc_mgr_map, S10_SYSMGR_UE_ADDR_OFST,
- &err_addr);
- edac_printk(KERN_ERR, EDAC_DEVICE,
- "Previous Boot UE detected[0x%X] @ 0x%X\n",
- dberror, err_addr);
+ /* Bit-31 is set if previous DDR UE happened */
+ if (dberror & (1 << 31)) {
+ /* Read previous DDR UE info */
+ arm_smccc_smc(INTEL_SIP_SMC_READ_SEU_ERR, 0,
+ 0, 0, 0, 0, 0, 0, &result);
+
+ if (!result.a0) {
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "Previous DDR UE:Count=0x%X,Address=0x%X,ErrorData=0x%X\n"
+ , (unsigned int)result.a1
+ , (unsigned int)result.a2
+ , (unsigned int)result.a3);
+ } else {
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "INTEL_SIP_SMC_SEU_ERR_STATUS failed\n");
+ }
+ } else {
+ regmap_read(edac->ecc_mgr_map, S10_SYSMGR_UE_ADDR_OFST,
+ &err_addr);
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "Previous Boot UE detected[0x%X] @ 0x%X\n",
+ dberror, err_addr);
+ }
/* Reset the sticky registers */
regmap_write(edac->ecc_mgr_map,
S10_SYSMGR_UE_VAL_OFST, 0);
diff --git a/include/linux/firmware/intel/stratix10-smc.h b/include/linux/firmware/intel/stratix10-smc.h
index a718f853d457..48810c39f612 100644
--- a/include/linux/firmware/intel/stratix10-smc.h
+++ b/include/linux/firmware/intel/stratix10-smc.h
@@ -595,4 +595,24 @@ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_COMPLETED_WRITE)
#define INTEL_SIP_SMC_FCS_GET_PROVISION_DATA \
INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FCS_GET_PROVISION_DATA)
+/**
+ * Request INTEL_SIP_SMC_READ_SEU_ERR
+ * Sync call to get Single Event Upset Error information
+ * SEU detects both corrected and uncorrected error
+ *
+ * Call register usage:
+ * a0 INTEL_SIP_SMC_READ_SEU_ERR
+ * a1-7 not used
+ *
+ * Return status:
+ * a0 INTEL_SIP_SMC_STATUS_OK, INTEL_SIP_SMC_STATUS_NOT_SUPPORTED or
+ * INTEL_SIP_SMC_STATUS_ERROR
+ * a1 error count of response data
+ * a2 sector address of response data
+ * a3 error data
+ */
+#define INTEL_SIP_SMC_FUNCID_SEU_ERR_STATUS 153
+#define INTEL_SIP_SMC_READ_SEU_ERR \
+ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_SEU_ERR_STATUS)
+
#endif


Acked-by: Dinh Nguyen <dinguyen@xxxxxxxxxx>