[PATCH v2 2/2] hwmon (occ): Retry for checksum failure

From: Eddie James
Date: Tue Apr 26 2022 - 11:50:30 EST


Due to the OCC communication design with a shared SRAM area,
checkum errors are expected due to corrupted buffer from OCC
communications with other system components. Therefore, retry
the command twice in the event of a checksum failure.

Signed-off-by: Eddie James <eajames@xxxxxxxxxxxxx>
Acked-by: Guenter Roeck <linux@xxxxxxxxxxxx>
---
drivers/hwmon/occ/p9_sbe.c | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/hwmon/occ/p9_sbe.c b/drivers/hwmon/occ/p9_sbe.c
index 49b13cc01073..e6ccef2af659 100644
--- a/drivers/hwmon/occ/p9_sbe.c
+++ b/drivers/hwmon/occ/p9_sbe.c
@@ -14,6 +14,8 @@

#include "common.h"

+#define OCC_CHECKSUM_RETRIES 3
+
struct p9_sbe_occ {
struct occ occ;
bool sbe_error;
@@ -83,17 +85,22 @@ static int p9_sbe_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len)
struct occ_response *resp = &occ->resp;
struct p9_sbe_occ *ctx = to_p9_sbe_occ(occ);
size_t resp_len = sizeof(*resp);
+ int i;
int rc;

- rc = fsi_occ_submit(ctx->sbe, cmd, len, resp, &resp_len);
- if (rc < 0) {
+ for (i = 0; i < OCC_CHECKSUM_RETRIES; ++i) {
+ rc = fsi_occ_submit(ctx->sbe, cmd, len, resp, &resp_len);
+ if (rc >= 0)
+ break;
if (resp_len) {
if (p9_sbe_occ_save_ffdc(ctx, resp, resp_len))
sysfs_notify(&occ->bus_dev->kobj, NULL,
bin_attr_ffdc.attr.name);
- }

- return rc;
+ return rc;
+ }
+ if (rc != -EBADE)
+ return rc;
}

switch (resp->return_status) {
--
2.27.0