[PATCH V11 7/8] cxl/port: Retry reading CDAT on failure

From: ira . weiny
Date: Fri Jun 10 2022 - 16:23:50 EST


From: Ira Weiny <ira.weiny@xxxxxxxxx>

The CDAT read may fail for a number of reasons but mainly it is possible
to get different parts of a valid state. The checksum in the CDAT table
protects against this.

Now that the cdat data is validated, issue a retry if the CDAT read
fails. For now 5 retries are implemented.

Reviewed-by: Ben Widawsky <bwidawsk@xxxxxxxxxx>
Signed-off-by: Ira Weiny <ira.weiny@xxxxxxxxx>

---
Changes from V10
Pick up review tag and fix commit message

Changes from V9
Alison Schofield/Davidlohr Bueso
Print debug on each iteration and error only after failure

Changes from V8
Move code to cxl/core/pci.c

Changes from V6
Move to pci.c
Fix retries count
Change to 5 retries

Changes from V5:
New patch -- easy to push off or drop.
---
drivers/cxl/core/pci.c | 40 +++++++++++++++++++++++++++++++---------
1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index cb70287e2984..fd02bc7c0d97 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -617,19 +617,13 @@ static int cxl_cdat_read_table(struct device *dev,
return rc;
}

-/**
- * read_cdat_data - Read the CDAT data on this port
- * @port: Port to read data from
- *
- * This call will sleep waiting for responses from the DOE mailbox.
- */
-void read_cdat_data(struct cxl_port *port)
+static int __read_cdat_data(struct cxl_port *port)
{
static struct pci_doe_mb *cdat_mb;
struct device *dev = &port->dev;
struct device *uport = port->uport;
size_t cdat_length;
- int ret;
+ int ret = 0;

/*
* Ensure a reference on the underlying uport device which has the
@@ -640,17 +634,21 @@ void read_cdat_data(struct cxl_port *port)
cdat_mb = find_cdat_mb(uport);
if (!cdat_mb) {
dev_dbg(dev, "No CDAT mailbox\n");
+ ret = -EIO;
goto out;
}

if (cxl_cdat_get_length(dev, cdat_mb, &cdat_length)) {
dev_dbg(dev, "No CDAT length\n");
+ ret = -EIO;
goto out;
}

port->cdat.table = devm_kzalloc(dev, cdat_length, GFP_KERNEL);
- if (!port->cdat.table)
+ if (!port->cdat.table) {
+ ret = -ENOMEM;
goto out;
+ }

port->cdat.length = cdat_length;
ret = cxl_cdat_read_table(dev, cdat_mb, &port->cdat);
@@ -664,5 +662,29 @@ void read_cdat_data(struct cxl_port *port)

out:
put_device(uport);
+ return ret;
+}
+
+/**
+ * read_cdat_data - Read the CDAT data on this port
+ * @port: Port to read data from
+ *
+ * This call will sleep waiting for responses from the DOE mailbox.
+ */
+void read_cdat_data(struct cxl_port *port)
+{
+ int retries = 5;
+ int rc;
+
+ while (retries--) {
+ rc = __read_cdat_data(port);
+ if (!rc)
+ return;
+ dev_dbg(&port->dev,
+ "CDAT data read error rc=%d (retries %d)\n",
+ rc, retries);
+ }
+ dev_err(&port->dev, "CDAT data read failed after %d retries\n",
+ retries);
}
EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
--
2.35.1