Re: [PATCH v2] scsi: libsas: Fix disk not being scanned in after being removed

From: John Garry
Date: Wed Mar 06 2024 - 13:43:52 EST


On 27/02/2024 09:01, Xingui Yang wrote:
As of commit d8649fc1c5e4 ("scsi: libsas: Do discovery on empty PHY to
update PHY info"), do discovery will send a new SMP_DISCOVER and update
phy->phy_change_count. We found that if the disk is reconnected and phy
change_count changes at this time, the disk scanning process will not be
triggered.

So update the PHY info with the last query results.

Please mention that sas_unregister_devs_sas_addr() should be called before sas_set_ex_phy() for this case.


Fixes: d8649fc1c5e4 ("scsi: libsas: Do discovery on empty PHY to update PHY info")
Signed-off-by: Xingui Yang <yangxingui@xxxxxxxxxx>
---
v1 -> v2:
Use sas_get_phy_discover() instead of sas_get_phy_attached_dev() in
sas_rediscover_dev() and use disc_resp to update phy info.
---
drivers/scsi/libsas/sas_expander.c | 37 ++++++++++++++++++++++--------
1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index a2204674b680..a75dcce7a9ba 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -1940,6 +1940,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
struct expander_device *ex = &dev->ex_dev;
struct ex_phy *phy = &ex->ex_phy[phy_id];
enum sas_device_type type = SAS_PHY_UNUSED;
+ struct smp_disc_resp *disc_resp;
u8 sas_addr[SAS_ADDR_SIZE];
char msg[80] = "";
int res;
@@ -1951,33 +1952,47 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
SAS_ADDR(dev->sas_addr), phy_id, msg);
memset(sas_addr, 0, SAS_ADDR_SIZE);
- res = sas_get_phy_attached_dev(dev, phy_id, sas_addr, &type);
+ disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE);
+ if (!disc_resp)
+ return -ENOMEM;
+
+ res = sas_get_phy_discover(dev, phy_id, disc_resp);
switch (res) {
case SMP_RESP_NO_PHY:
phy->phy_state = PHY_NOT_PRESENT;
sas_unregister_devs_sas_addr(dev, phy_id, last);
- return res;
+ goto out;
case SMP_RESP_PHY_VACANT:
phy->phy_state = PHY_VACANT;
sas_unregister_devs_sas_addr(dev, phy_id, last);
- return res;
+ goto out;
case SMP_RESP_FUNC_ACC:
break;
case -ECOMM:
break;
default:
- return res;
+ goto out;
+ }
+
+ if (res == 0) {
+ struct discover_resp *dr = &disc_resp->disc;
+
+ memcpy(sas_addr, dr->attached_sas_addr, SAS_ADDR_SIZE);
+ type = to_dev_type(dr);
+ if (type == 0)

Please use SAS_PHY_UNUSED


+ memset(sas_addr, 0, SAS_ADDR_SIZE);
}

Any chance we can factor out some of this code with sas_get_phy_attached_dev() (where it has been copied from)?

if ((SAS_ADDR(sas_addr) == 0) || (res == -ECOMM)) {
phy->phy_state = PHY_EMPTY;
sas_unregister_devs_sas_addr(dev, phy_id, last);
/*
- * Even though the PHY is empty, for convenience we discover
- * the PHY to update the PHY info, like negotiated linkrate.
+ * Even though the PHY is empty, for convenience we update
+ * the PHY info, like negotiated linkrate.
*/
- sas_ex_phy_discover(dev, phy_id);
- return res;
+ if (res == 0)
+ sas_set_ex_phy(dev, phy_id, disc_resp);
+ goto out;
} else if (SAS_ADDR(sas_addr) == SAS_ADDR(phy->attached_sas_addr) &&
dev_type_flutter(type, phy->attached_dev_type)) {
struct domain_device *ata_dev = sas_ex_to_ata(dev, phy_id);
@@ -1989,7 +2004,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
action = ", needs recovery";
pr_debug("ex %016llx phy%02d broadcast flutter%s\n",
SAS_ADDR(dev->sas_addr), phy_id, action);
- return res;
+ goto out;

is there any way to get rid of all the goto's? Ideally we would have just one location to call kfree(disc_resp)

}
/* we always have to delete the old device when we went here */
@@ -1998,7 +2013,11 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
SAS_ADDR(phy->attached_sas_addr));
sas_unregister_devs_sas_addr(dev, phy_id, last);
+ kfree(disc_resp);
return sas_discover_new(dev, phy_id);
+out:

Calling this simply "out" is odd, as we exit in many places

+ kfree(disc_resp);
+ return res;
}
/**


As an aside, could libsas - and your changes here - be simpler if we changed smp_execute_task() like this:

static int smp_execute_task(struct domain_device *dev, void *req /* can be on the stack */, int req_size,
void *resp /* can be on the stack */, int resp_size)
{
struct scatterlist req_sg;
struct scatterlist resp_sg;
int ret;
void *_req = kmemdup(req, req_size, GFP_KERNEL);
void *_resp = alloc_smp_resp(resp_size);
if (!_req || !resp)
return -ENOMEM;

sg_init_one(&req_sg, _req, req_size);
sg_init_one(&resp_sg, _resp, resp_size);
ret = smp_execute_task_sg(dev, &req_sg, &resp_sg);
memcpy(resp, _resp, resp_size);
kfree(_req);
kfree(_resp);
return ret;
}

We need to use alloc_smp_resp() and alloc_smp_req() as we can't allocate these memories on the stack for calling sg_init_one(). But if we changed smp_execute_task() to memcpy from/to data on the stack, it might make callers simpler. I'm not sure.

Thanks,
John