[PATCH v1] scsi: ses: Avoid reporting errors for simple subenclosures

From: Aidan MacDonald
Date: Tue May 09 2023 - 07:55:08 EST


Currently the SES driver reports bogus errors when plugging
in a drive that exposes only a single, simple subenclosure:

[ 432.713731] scsi 3:0:0:0: Direct-Access WD Elements 2667 2007 PQ: 0 ANSI: 6
[ 432.714127] scsi 3:0:0:1: Enclosure WD SES Device 2007 PQ: 0 ANSI: 6
...
[ 432.716508] scsi 3:0:0:1: Attached scsi generic sg2 type 13
...
[ 439.897020] scsi 3:0:0:1: Wrong diagnostic page; asked for 1 got 8
[ 439.897023] scsi 3:0:0:1: Failed to get diagnostic page 0x1
[ 439.897025] scsi 3:0:0:1: Failed to bind enclosure -19

According to the SES specification, simple subenclosures always
return diagnostic page 8 no matter what page was requested, so
the device is permitted to page 8 here and nothing is wrong.

To avoid polluting the kernel logs with bogus errors, the first
diagnostic page read bypasses the usual page code check. If it
returns page 8 the device is assumed to be a simple subenclosure
and no enclosure device is created. Simple subenclosures only
return a vendor specific status byte in page 8 and don't support
any other pages, so they can't support the enclosure interface.

Signed-off-by: Aidan MacDonald <aidanmacdonald.0x0@xxxxxxxxx>
---
drivers/scsi/ses.c | 53 +++++++++++++++++++++++++++++++++++++++-------
1 file changed, 45 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c
index d7d0c35c58b8..3457d8bc1ddf 100644
--- a/drivers/scsi/ses.c
+++ b/drivers/scsi/ses.c
@@ -73,9 +73,12 @@ static void init_device_slot_control(unsigned char *dest_desc,
dest_desc[3] &= 0x3c;
}

-
-static int ses_recv_diag(struct scsi_device *sdev, int page_code,
- void *buf, int bufflen)
+/*
+ * Raw RECEIVE_DIAGNOSTIC page command. Does not check the returned
+ * page code, which may differ from the requested page code!
+ */
+static int __ses_recv_diag(struct scsi_device *sdev, int page_code,
+ void *buf, int bufflen)
{
int ret;
unsigned char cmd[] = {
@@ -86,7 +89,6 @@ static int ses_recv_diag(struct scsi_device *sdev, int page_code,
bufflen & 0xff,
0
};
- unsigned char recv_page_code;
unsigned int retries = SES_RETRIES;
struct scsi_sense_hdr sshdr;
const struct scsi_exec_args exec_args = {
@@ -100,6 +102,15 @@ static int ses_recv_diag(struct scsi_device *sdev, int page_code,
(sshdr.sense_key == NOT_READY ||
(sshdr.sense_key == UNIT_ATTENTION && sshdr.asc == 0x29)));

+ return ret;
+}
+
+static int ses_recv_diag(struct scsi_device *sdev, int page_code,
+ void *buf, int bufflen)
+{
+ unsigned char recv_page_code;
+ int ret = __ses_recv_diag(sdev, page_code, buf, bufflen);
+
if (unlikely(ret))
return ret;

@@ -108,8 +119,11 @@ static int ses_recv_diag(struct scsi_device *sdev, int page_code,
if (likely(recv_page_code == page_code))
return ret;

- /* successful diagnostic but wrong page code. This happens to some
- * USB devices, just print a message and pretend there was an error */
+ /*
+ * Successful diagnostic but wrong page code. Shouldn't happen
+ * except for simple subenclosures, which should already have
+ * been detected by this point.
+ */

sdev_printk(KERN_ERR, sdev,
"Wrong diagnostic page; asked for %d got %u\n",
@@ -695,11 +709,33 @@ static int ses_intf_add(struct device *cdev)
if (!hdr_buf || !ses_dev)
goto err_init_free;

+ /*
+ * Read without checking page code, getting a different page
+ * is not necessarily an error for devices with only a simple
+ * subenclosure (eg. some USB drives).
+ */
page = 1;
- result = ses_recv_diag(sdev, page, hdr_buf, INIT_ALLOC_SIZE);
+ result = __ses_recv_diag(sdev, page, hdr_buf, INIT_ALLOC_SIZE);
if (result)
goto recv_failed;

+ /*
+ * A simple subenclosure only supports page 8 and will return
+ * it after any diagnostic page request. Simple subenclosures
+ * are not supported by this driver -- there is simply no data
+ * to report besides a vendor specific byte -- but they will
+ * not be treated as an error.
+ */
+ if (hdr_buf[0] == 8) {
+ err = 0;
+ goto err_init_free;
+ }
+
+ /*
+ * All diagnostic pages will include a length field so even
+ * if the page code is incorrect at this point, that'll get
+ * detected when re-reading the page.
+ */
len = (hdr_buf[2] << 8) + hdr_buf[3] + 4;
buf = kzalloc(len, GFP_KERNEL);
if (!buf)
@@ -817,7 +853,8 @@ static int ses_intf_add(struct device *cdev)
err_init_free:
kfree(ses_dev);
kfree(hdr_buf);
- sdev_printk(KERN_ERR, sdev, "Failed to bind enclosure %d\n", err);
+ if (err)
+ sdev_printk(KERN_ERR, sdev, "Failed to bind enclosure %d\n", err);
return err;
}

--
2.39.2