[PATCH] scsi error update 2/3 (enh)

From: Mike Anderson (andmike@us.ibm.com)
Date: Fri Oct 04 2002 - 14:56:44 EST


This series of patches is an update to scsi error handling.

00_scsi-error-base-1.diff:
        - Fix bug on incorrect check of scsi_eh_tur return value.
        - Fix debug printk format problems.
        - Removed ref to arch specific semaphore value in debug printk

01_scsi-error-enh-1.diff:
        - Forward port of Russell King's retry scsi cmd restore.
        - Adjustment of BUS_RESET_SETTLE_TIME from 5 seconds to 10 seconds
          to provide increase time post bus_reset to allow door lock
          command to succeed. This should be exported to driverfs so that
          it can be adjusted if needed.
        - Error Policy change: Error recovery command retry is now not
          based on failed command retry value.
        - Error Policy change: Failed command is not retried if retry
          count is expired.

02_scsi-error-dr-lck-1.diff:
        - Forward port of Russell King's door lock changes.

Testing:
        - Current patches where tested on a SPI interconnect using both in
          kernel and new versions of the aic driver. A Plextor SCSI cd-rom
          was used for a door lock device. Cables where done during dd's
          to generates errors and verify recover / door re-lock.

The full patch is available at:
http://www-124.ibm.com/storageio/patches/2.5/scsi-error

-andmike

--
Michael Anderson
andmike@us.ibm.com

scsi.c | 12 ++---- scsi.h | 1 scsi_error.c | 117 +++++++++++++++++++++++------------------------------------ scsi_lib.c | 26 ++++++++++++- 4 files changed, 76 insertions(+), 80 deletions(-) ----- diff -Nru a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c --- a/drivers/scsi/scsi.c Fri Oct 4 08:37:58 2002 +++ b/drivers/scsi/scsi.c Fri Oct 4 08:37:58 2002 @@ -1345,14 +1345,10 @@ */ int scsi_retry_command(Scsi_Cmnd * SCpnt) { - memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd, - sizeof(SCpnt->data_cmnd)); - SCpnt->request_buffer = SCpnt->buffer; - SCpnt->request_bufflen = SCpnt->bufflen; - SCpnt->use_sg = SCpnt->old_use_sg; - SCpnt->cmd_len = SCpnt->old_cmd_len; - SCpnt->sc_data_direction = SCpnt->sc_old_data_direction; - SCpnt->underflow = SCpnt->old_underflow; + /* + * Restore the SCSI command state. + */ + scsi_setup_cmd_retry(SCpnt); /* * Zero the sense information from the last time we tried diff -Nru a/drivers/scsi/scsi.h b/drivers/scsi/scsi.h --- a/drivers/scsi/scsi.h Fri Oct 4 08:37:58 2002 +++ b/drivers/scsi/scsi.h Fri Oct 4 08:37:58 2002 @@ -467,6 +467,7 @@ int sectors); extern struct Scsi_Device_Template *scsi_get_request_dev(struct request *); extern int scsi_init_cmd_errh(Scsi_Cmnd * SCpnt); +extern void scsi_setup_cmd_retry(Scsi_Cmnd *SCpnt); extern int scsi_insert_special_cmd(Scsi_Cmnd * SCpnt, int); extern void scsi_io_completion(Scsi_Cmnd * SCpnt, int good_sectors, int block_sectors); diff -Nru a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c --- a/drivers/scsi/scsi_error.c Fri Oct 4 08:37:58 2002 +++ b/drivers/scsi/scsi_error.c Fri Oct 4 08:37:58 2002 @@ -8,6 +8,10 @@ * * Restructured scsi_unjam_host and associated functions. * September 04, 2002 Mike Anderson (andmike@us.ibm.com) + * + * Forward port of Russell King's (rmk@arm.linux.org.uk) changes and + * minor cleanups. + * September 30, 2002 Mike Anderson (andmike@us.ibm.com) */ #include <linux/module.h> @@ -59,7 +63,7 @@ * These should *probably* be handled by the host itself. * Since it is allowed to sleep, it probably should. */ -#define BUS_RESET_SETTLE_TIME 5*HZ +#define BUS_RESET_SETTLE_TIME 10*HZ #define HOST_RESET_SETTLE_TIME 10*HZ /** @@ -279,12 +283,17 @@ SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(*sc_list, shost)); - BUG_ON(shost->host_failed != found); + if (shost->host_failed != found) + printk(KERN_ERR "%s: host_failed: %d != found: %d\n", + __FUNCTION__, shost->host_failed, found); } /** * scsi_check_sense - Examine scsi cmd sense * @scmd: Cmd to have sense checked. + * + * Return value: + * SUCCESS or FAILED or NEEDS_RETRY **/ static int scsi_check_sense(Scsi_Cmnd *scmd) { @@ -354,7 +363,6 @@ **/ static int scsi_eh_completed_normally(Scsi_Cmnd *scmd) { - int rtn; /* * first check the host byte, to see if there is anything in there @@ -370,7 +378,7 @@ * SUCCESS. */ scmd->flags &= ~IS_RESETTING; - goto maybe_retry; + return NEEDS_RETRY; } /* * rats. we are already in the error handler, so we now @@ -378,10 +386,7 @@ * is valid, we have a pretty good idea of what to do. * if not, we mark it as FAILED. */ - rtn = scsi_check_sense(scmd); - if (rtn == NEEDS_RETRY) - goto maybe_retry; - return rtn; + return scsi_check_sense(scmd); } if (host_byte(scmd->result) != DID_OK) { return FAILED; @@ -401,10 +406,7 @@ case COMMAND_TERMINATED: return SUCCESS; case CHECK_CONDITION: - rtn = scsi_check_sense(scmd); - if (rtn == NEEDS_RETRY) - goto maybe_retry; - return rtn; + return scsi_check_sense(scmd); case CONDITION_GOOD: case INTERMEDIATE_GOOD: case INTERMEDIATE_C_GOOD: @@ -419,14 +421,6 @@ return FAILED; } return FAILED; - - maybe_retry: - if ((++scmd->retries) < scmd->allowed) { - return NEEDS_RETRY; - } else { - /* no more retries - report this one back to upper level */ - return SUCCESS; - } } /** @@ -490,7 +484,7 @@ * this case, and furthermore, there is a different completion handler * vs scsi_dispatch_cmd. * Return value: - * SUCCESS/FAILED + * SUCCESS or FAILED or NEEDS_RETRY **/ static int scsi_send_eh_cmnd(Scsi_Cmnd *scmd, int timeout) { @@ -500,7 +494,6 @@ ASSERT_LOCK(host->host_lock, 0); -retry: /* * we will use a queued command if possible, otherwise we will * emulate the queuing and calling of completion function ourselves. @@ -577,16 +570,15 @@ * actually did complete normally. */ if (rtn == SUCCESS) { - int ret = scsi_eh_completed_normally(scmd); + int rtn = scsi_eh_completed_normally(scmd); SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scsi_eh_completed_normally %x\n", - __FUNCTION__, ret)); - switch (ret) { + __FUNCTION__, rtn)); + switch (rtn) { case SUCCESS: - break; case NEEDS_RETRY: - goto retry; case FAILED: + break; default: rtn = FAILED; break; @@ -658,15 +650,8 @@ * when we eventually call scsi_finish, we really wish to complete * the original request, so let's restore the original data. (db) */ - memcpy((void *) scmd->cmnd, (void *) scmd->data_cmnd, - sizeof(scmd->data_cmnd)); + scsi_setup_cmd_retry(scmd); scmd->result = saved_result; - scmd->request_buffer = scmd->buffer; - scmd->request_bufflen = scmd->bufflen; - scmd->use_sg = scmd->old_use_sg; - scmd->cmd_len = scmd->old_cmd_len; - scmd->sc_data_direction = scmd->sc_old_data_direction; - scmd->underflow = scmd->old_underflow; /* * hey, we are done. let's look to see what happened. @@ -684,16 +669,16 @@ **/ static int scsi_eh_retry_cmd(Scsi_Cmnd *scmd) { - memcpy((void *) scmd->cmnd, (void *) scmd->data_cmnd, - sizeof(scmd->data_cmnd)); - scmd->request_buffer = scmd->buffer; - scmd->request_bufflen = scmd->bufflen; - scmd->use_sg = scmd->old_use_sg; - scmd->cmd_len = scmd->old_cmd_len; - scmd->sc_data_direction = scmd->sc_old_data_direction; - scmd->underflow = scmd->old_underflow; + int rtn = SUCCESS; + + for (; scmd->retries < scmd->allowed; scmd->retries++) { + scsi_setup_cmd_retry(scmd); + rtn = scsi_send_eh_cmnd(scmd, scmd->timeout_per_command); + if (rtn != NEEDS_RETRY) + break; + } - return scsi_send_eh_cmnd(scmd, scmd->timeout_per_command); + return rtn; } /** @@ -718,9 +703,7 @@ * set this back so that the upper level can correctly free up * things. */ - scmd->use_sg = scmd->old_use_sg; - scmd->sc_data_direction = scmd->sc_old_data_direction; - scmd->underflow = scmd->old_underflow; + scsi_setup_cmd_retry(scmd); } /** @@ -848,7 +831,9 @@ static unsigned char tur_command[6] = {TEST_UNIT_READY, 0, 0, 0, 0, 0}; int rtn; + int retry_cnt = 1; +retry_tur: memcpy((void *) scmd->cmnd, (void *) tur_command, sizeof(tur_command)); @@ -874,32 +859,18 @@ * when we eventually call scsi_finish, we really wish to complete * the original request, so let's restore the original data. (db) */ - memcpy((void *) scmd->cmnd, (void *) scmd->data_cmnd, - sizeof(scmd->data_cmnd)); - scmd->request_buffer = scmd->buffer; - scmd->request_bufflen = scmd->bufflen; - scmd->use_sg = scmd->old_use_sg; - scmd->cmd_len = scmd->old_cmd_len; - scmd->sc_data_direction = scmd->sc_old_data_direction; - scmd->underflow = scmd->old_underflow; + scsi_setup_cmd_retry(scmd); /* * hey, we are done. let's look to see what happened. */ - SCSI_LOG_ERROR_RECOVERY(3, - printk("%s: scmd %p rtn %x\n", + SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd %p rtn %x\n", __FUNCTION__, scmd, rtn)); - if ((rtn == SUCCESS) && scmd->result) { - if (((driver_byte(scmd->result) & DRIVER_SENSE) || - (status_byte(scmd->result) & CHECK_CONDITION)) && - (SCSI_SENSE_VALID(scmd))) { - if (((scmd->sense_buffer[2] & 0xf) != NOT_READY) && - ((scmd->sense_buffer[2] & 0xf) != UNIT_ATTENTION) && - ((scmd->sense_buffer[2] & 0xf) != ILLEGAL_REQUEST)) { - return 0; - } - } - } + if (rtn == SUCCESS) + return 0; + else if (rtn == NEEDS_RETRY) + if (retry_cnt--) + goto retry_tur; return 1; } @@ -964,6 +935,11 @@ rtn = scmd->host->hostt->eh_device_reset_handler(scmd); spin_unlock_irqrestore(scmd->host->host_lock, flags); + if (rtn == SUCCESS) { + scmd->device->was_reset = 1; + scmd->device->expecting_cc_ua = 1; + } + return rtn; } @@ -1422,8 +1398,7 @@ if ((shost->can_queue > 0 && (shost->host_busy >= shost->can_queue)) || (shost->host_blocked) - || (shost->host_self_blocked) - || (sdev->device_blocked)) { + || (shost->host_self_blocked)) { break; } @@ -1471,7 +1446,7 @@ if (scsi_eh_get_sense(sc_todo, shost)) if (scsi_eh_abort_cmd(sc_todo, shost)) if (scsi_eh_bus_device_reset(sc_todo, shost)) - if(scsi_eh_bus_host_reset(sc_todo, shost)) + if (scsi_eh_bus_host_reset(sc_todo, shost)) scsi_eh_offline_sdevs(sc_todo, shost); BUG_ON(shost->host_failed); diff -Nru a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c --- a/drivers/scsi/scsi_lib.c Fri Oct 4 08:37:58 2002 +++ b/drivers/scsi/scsi_lib.c Fri Oct 4 08:37:58 2002 @@ -160,6 +160,30 @@ } /* + * Function: scsi_setup_cmd_retry() + * + * Purpose: Restore the command state for a retry + * + * Arguments: SCpnt - command to be restored + * + * Returns: Nothing + * + * Notes: Immediately prior to retrying a command, we need + * to restore certain fields that we saved above. + */ +void scsi_setup_cmd_retry(Scsi_Cmnd *SCpnt) +{ + memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd, + sizeof(SCpnt->data_cmnd)); + SCpnt->request_buffer = SCpnt->buffer; + SCpnt->request_bufflen = SCpnt->bufflen; + SCpnt->use_sg = SCpnt->old_use_sg; + SCpnt->cmd_len = SCpnt->old_cmd_len; + SCpnt->sc_data_direction = SCpnt->sc_old_data_direction; + SCpnt->underflow = SCpnt->old_underflow; +} + +/* * Function: scsi_queue_next_request() * * Purpose: Handle post-processing of completed commands. @@ -614,7 +638,7 @@ printk("scsi%d: ERROR on channel %d, id %d, lun %d, CDB: ", SCpnt->host->host_no, (int) SCpnt->channel, (int) SCpnt->target, (int) SCpnt->lun); - print_command(SCpnt->cmnd); + print_command(SCpnt->data_cmnd); print_sense("sd", SCpnt); SCpnt = scsi_end_request(SCpnt, 0, block_sectors); return; - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Mon Oct 07 2002 - 22:00:47 EST