[PATCH 5.18 71/88] scsi: ufs: core: Fix a race condition related to device management

From: Greg Kroah-Hartman
Date: Mon Aug 01 2022 - 08:17:21 EST


From: Bart Van Assche <bvanassche@xxxxxxx>

[ Upstream commit f5c2976e0cb0f6236013bfb479868531b04f61d4 ]

If a device management command completion happens after
wait_for_completion_timeout() times out and before ufshcd_clear_cmds() is
called, then the completion code may crash on the complete() call in
__ufshcd_transfer_req_compl().

Fix the following crash:

Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008
Call trace:
complete+0x64/0x178
__ufshcd_transfer_req_compl+0x30c/0x9c0
ufshcd_poll+0xf0/0x208
ufshcd_sl_intr+0xb8/0xf0
ufshcd_intr+0x168/0x2f4
__handle_irq_event_percpu+0xa0/0x30c
handle_irq_event+0x84/0x178
handle_fasteoi_irq+0x150/0x2e8
__handle_domain_irq+0x114/0x1e4
gic_handle_irq.31846+0x58/0x300
el1_irq+0xe4/0x1c0
efi_header_end+0x110/0x680
__irq_exit_rcu+0x108/0x124
__handle_domain_irq+0x118/0x1e4
gic_handle_irq.31846+0x58/0x300
el1_irq+0xe4/0x1c0
cpuidle_enter_state+0x3ac/0x8c4
do_idle+0x2fc/0x55c
cpu_startup_entry+0x84/0x90
kernel_init+0x0/0x310
start_kernel+0x0/0x608
start_kernel+0x4ec/0x608

Link: https://lore.kernel.org/r/20220720170228.1598842-1-bvanassche@xxxxxxx
Fixes: 5a0b0cb9bee7 ("[SCSI] ufs: Add support for sending NOP OUT UPIU")
Cc: Adrian Hunter <adrian.hunter@xxxxxxxxx>
Cc: Avri Altman <avri.altman@xxxxxxx>
Cc: Bean Huo <beanhuo@xxxxxxxxxx>
Cc: Stanley Chu <stanley.chu@xxxxxxxxxxxx>
Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx>
Signed-off-by: Martin K. Petersen <martin.petersen@xxxxxxxxxx>
Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>
---
drivers/scsi/ufs/ufshcd.c | 58 +++++++++++++++++++++++++++------------
1 file changed, 40 insertions(+), 18 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index a34c1fab0246..874490f7f5e7 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -2947,37 +2947,59 @@ ufshcd_dev_cmd_completion(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,
struct ufshcd_lrb *lrbp, int max_timeout)
{
- int err = 0;
- unsigned long time_left;
+ unsigned long time_left = msecs_to_jiffies(max_timeout);
unsigned long flags;
+ bool pending;
+ int err;

+retry:
time_left = wait_for_completion_timeout(hba->dev_cmd.complete,
- msecs_to_jiffies(max_timeout));
+ time_left);

- spin_lock_irqsave(hba->host->host_lock, flags);
- hba->dev_cmd.complete = NULL;
if (likely(time_left)) {
+ /*
+ * The completion handler called complete() and the caller of
+ * this function still owns the @lrbp tag so the code below does
+ * not trigger any race conditions.
+ */
+ hba->dev_cmd.complete = NULL;
err = ufshcd_get_tr_ocs(lrbp);
if (!err)
err = ufshcd_dev_cmd_completion(hba, lrbp);
- }
- spin_unlock_irqrestore(hba->host->host_lock, flags);
-
- if (!time_left) {
+ } else {
err = -ETIMEDOUT;
dev_dbg(hba->dev, "%s: dev_cmd request timedout, tag %d\n",
__func__, lrbp->task_tag);
- if (!ufshcd_clear_cmds(hba, 1U << lrbp->task_tag))
+ if (ufshcd_clear_cmds(hba, 1U << lrbp->task_tag) == 0) {
/* successfully cleared the command, retry if needed */
err = -EAGAIN;
- /*
- * in case of an error, after clearing the doorbell,
- * we also need to clear the outstanding_request
- * field in hba
- */
- spin_lock_irqsave(&hba->outstanding_lock, flags);
- __clear_bit(lrbp->task_tag, &hba->outstanding_reqs);
- spin_unlock_irqrestore(&hba->outstanding_lock, flags);
+ /*
+ * Since clearing the command succeeded we also need to
+ * clear the task tag bit from the outstanding_reqs
+ * variable.
+ */
+ spin_lock_irqsave(&hba->outstanding_lock, flags);
+ pending = test_bit(lrbp->task_tag,
+ &hba->outstanding_reqs);
+ if (pending) {
+ hba->dev_cmd.complete = NULL;
+ __clear_bit(lrbp->task_tag,
+ &hba->outstanding_reqs);
+ }
+ spin_unlock_irqrestore(&hba->outstanding_lock, flags);
+
+ if (!pending) {
+ /*
+ * The completion handler ran while we tried to
+ * clear the command.
+ */
+ time_left = 1;
+ goto retry;
+ }
+ } else {
+ dev_err(hba->dev, "%s: failed to clear tag %d\n",
+ __func__, lrbp->task_tag);
+ }
}

return err;
--
2.35.1