From 8d453ee5a037af528b2f4b5b1268af76ad5950ed Mon Sep 17 00:00:00 2001 From: Kenneth R Westerback Date: Wed, 28 Dec 2005 02:43:55 +0000 Subject: Strip out fancy timeout code that attempts to mimic FreeBSD's thread based timeout handling. Use the simple timeout == bus reset model instead. Also move verbose debug output inside #ifdef/#endif. Fixes immediate crashes on encountering a timeout. Noted by per englebrecht when trying 'smartctl -d scsi -a /dev/rsd0c'. Thanks to per englebrecht and luiz gustavo for testing. Should only impact timeout handling. No change to normal processing. ok marco@ --- sys/dev/ic/aic79xx.c | 323 ++++--------------------------------------- sys/dev/ic/aic79xx.h | 9 +- sys/dev/ic/aic79xx_openbsd.c | 58 +------- 3 files changed, 33 insertions(+), 357 deletions(-) (limited to 'sys/dev') diff --git a/sys/dev/ic/aic79xx.c b/sys/dev/ic/aic79xx.c index 877f99ea699..18f10022b7e 100644 --- a/sys/dev/ic/aic79xx.c +++ b/sys/dev/ic/aic79xx.c @@ -1,4 +1,4 @@ -/* $OpenBSD: aic79xx.c,v 1.31 2005/11/29 03:12:11 krw Exp $ */ +/* $OpenBSD: aic79xx.c,v 1.32 2005/12/28 02:43:54 krw Exp $ */ /* * Copyright (c) 2004 Milos Urbanek, Kenneth R. Westerback & Marco Peereboom @@ -253,9 +253,6 @@ u_int ahd_resolve_seqaddr(struct ahd_softc *ahd, void ahd_download_instr(struct ahd_softc *ahd, u_int instrptr, uint8_t *dconsts); int ahd_probe_stack_size(struct ahd_softc *ahd); -int ahd_other_scb_timeout(struct ahd_softc *ahd, - struct scb *scb, - struct scb *other_scb); int ahd_scb_active_in_fifo(struct ahd_softc *ahd, struct scb *scb); void ahd_run_data_fifo(struct ahd_softc *ahd, @@ -1420,8 +1417,7 @@ ahd_handle_seqint(struct ahd_softc *ahd, u_int intstat) devinfo.lun); scbid = ahd_get_scbptr(ahd); scb = ahd_lookup_scb(ahd, scbid); - if (scb != NULL - && (scb->flags & SCB_RECOVERY_SCB) != 0) + if (scb != NULL) /* * Ensure that we didn't put a second instance of this * SCB into the QINFIFO. @@ -8235,9 +8231,7 @@ ahd_handle_scsi_status(struct ahd_softc *ahd, struct scb *scb) * this is a recovery SCB that is allowed * to have an active timer. */ - if (ahd->scb_data.recovery_scbs == 0 - || (scb->flags & SCB_RECOVERY_SCB) != 0) - aic_scb_timer_reset(scb, 5 * 1000); + aic_scb_timer_reset(scb, 5 * 1000); break; } case SCSI_STATUS_OK: @@ -9146,309 +9140,52 @@ ahd_dump_scbs(struct ahd_softc *ahd) void ahd_timeout(void *arg) { - struct scb *scb = (struct scb *)arg; + struct scb *scb, *list_scb; struct ahd_softc *ahd; + char channel; + long s; + int found; +#ifdef AHD_DEBUG + int was_paused; +#endif + scb = (struct scb *)arg; ahd = scb->ahd_softc; - if ((scb->flags & SCB_ACTIVE) != 0) { - if ((scb->flags & SCB_TIMEDOUT) == 0) { - LIST_INSERT_HEAD(&ahd->timedout_scbs, scb, - timedout_links); - scb->flags |= SCB_TIMEDOUT; - } - ahd_recover_commands(ahd); - } -} - -/* - * ahd_recover_commands determines if any of the commands that have currently - * timedout are the root cause for this timeout. Innocent commands are given - * a new timeout while we wait for the command executing on the bus to timeout. - * This routine is invoked from a thread context so we are allowed to sleep. - * Our lock is not held on entry. - */ -void -ahd_recover_commands(struct ahd_softc *ahd) -{ - struct scb *scb; - struct scb *active_scb; - long s; - int found; - int was_paused; - u_int active_scbptr; - u_int last_phase; ahd_lock(ahd, &s); - /* - * Pause the controller and manually flush any - * commands that have just completed but that our - * interrupt handler has yet to see. - */ +#ifdef AHD_DEBUG was_paused = ahd_is_paused(ahd); - - printf("%s: Recovery Initiated - Card was %spaused\n", ahd_name(ahd), - was_paused ? "" : "not "); + printf("%s: SCB %d timed out - Card was %spaused\n", ahd_name(ahd), + SCB_GET_TAG(scb), was_paused ? "" : "not "); ahd_dump_card_state(ahd); +#endif - ahd_pause_and_flushwork(ahd); + ahd_pause(ahd); - if (LIST_EMPTY(&ahd->timedout_scbs) != 0) { + if (scb->flags & SCB_ACTIVE) { + aic_set_transaction_status(scb, CAM_CMD_TIMEOUT); /* - * The timedout commands have already - * completed. This typically means - * that either the timeout value was on - * the hairy edge of what the device - * requires or - more likely - interrupts - * are not happening. + * Go through all of our pending SCBs and remove any scheduled + * timeouts for them. They're about to be aborted so no need + * for them to timeout. */ - printf("%s: Timedout SCBs already complete. " - "Interrupts may not be functioning.\n", ahd_name(ahd)); - ahd_unpause(ahd); - ahd_unlock(ahd, &s); - return; - } - - /* - * Determine identity of SCB acting on the bus. - * This test only catches non-packetized transactions. - * Due to the fleeting nature of packetized operations, - * we can't easily determine that a packetized operation - * is on the bus. - */ - ahd_set_modes(ahd, AHD_MODE_SCSI, AHD_MODE_SCSI); - last_phase = ahd_inb(ahd, LASTPHASE); - active_scbptr = ahd_get_scbptr(ahd); - active_scb = NULL; - if (last_phase != P_BUSFREE - || (ahd_inb(ahd, SEQ_FLAGS) & NOT_IDENTIFIED) == 0) - active_scb = ahd_lookup_scb(ahd, active_scbptr); - - while ((scb = LIST_FIRST(&ahd->timedout_scbs)) != NULL) { - int target; - int lun; - char channel; - - target = SCB_GET_TARGET(ahd, scb); - channel = SCB_GET_CHANNEL(ahd, scb); - lun = SCB_GET_LUN(scb); - - ahd_print_path(ahd, scb); - printf("SCB %d - timed out\n", SCB_GET_TAG(scb)); - - if (scb->flags & (SCB_DEVICE_RESET|SCB_ABORT)) { - /* - * Been down this road before. - * Do a full bus reset. - */ - aic_set_transaction_status(scb, CAM_CMD_TIMEOUT); -bus_reset: - found = ahd_reset_channel(ahd, channel, - /*Initiate Reset*/TRUE); - printf("%s: Issued Channel %c Bus Reset. " - "%d SCBs aborted\n", ahd_name(ahd), channel, - found); - continue; + LIST_FOREACH(list_scb, &ahd->pending_scbs, pending_links) { + if (list_scb->xs) + timeout_del(&list_scb->xs->stimeout); } - - /* - * Remove the command from the timedout list in - * preparation for requeing it. - */ - LIST_REMOVE(scb, timedout_links); - scb->flags &= ~SCB_TIMEDOUT; - - if (active_scb != NULL) { - - if (active_scb != scb) { - - /* - * If the active SCB is not us, assume that - * the active SCB has a longer timeout than - * the timedout SCB, and wait for the active - * SCB to timeout. As a safeguard, only - * allow this deferral to continue if some - * untimed-out command is outstanding. - */ - if (ahd_other_scb_timeout(ahd, scb, - active_scb) == 0) - goto bus_reset; - continue; - } - - /* - * We're active on the bus, so assert ATN - * and hope that the target responds. - */ - ahd_set_recoveryscb(ahd, active_scb); - active_scb->flags |= SCB_RECOVERY_SCB|SCB_DEVICE_RESET; - ahd_outb(ahd, MSG_OUT, HOST_MSG); - ahd_outb(ahd, SCSISIGO, last_phase|ATNO); - ahd_print_path(ahd, active_scb); - printf("BDR message in message buffer\n"); - aic_scb_timer_reset(scb, 2 * 1000); - break; - } else if (last_phase != P_BUSFREE - && ahd_inb(ahd, SCSIPHASE) == 0) { - /* - * SCB is not identified, there - * is no pending REQ, and the sequencer - * has not seen a busfree. Looks like - * a stuck connection waiting to - * go busfree. Reset the bus. - */ - printf("%s: Connection stuck awaiting busfree or " - "Identify Msg.\n", ahd_name(ahd)); - goto bus_reset; - } else if (ahd_search_qinfifo(ahd, target, channel, lun, - SCB_GET_TAG(scb), ROLE_INITIATOR, - /*status*/0, SEARCH_COUNT) > 0) { - - /* - * We haven't even gone out on the bus - * yet, so the timeout must be due to - * some other command. Reset the timer - * and go on. - */ - if (ahd_other_scb_timeout(ahd, scb, NULL) == 0) - goto bus_reset; - } else { - /* - * This SCB is for a disconnected transaction - * and we haven't found a better candidate on - * the bus to explain this timeout. - */ - ahd_set_recoveryscb(ahd, scb); - - /* - * Actually re-queue this SCB in an attempt - * to select the device before it reconnects. - * In either case (selection or reselection), - * we will now issue a target reset to the - * timed-out device. - */ - scb->flags |= SCB_DEVICE_RESET; - scb->hscb->cdb_len = 0; - scb->hscb->task_attribute = 0; - scb->hscb->task_management = SIU_TASKMGMT_ABORT_TASK; - - ahd_set_scbptr(ahd, SCB_GET_TAG(scb)); - if ((scb->flags & SCB_PACKETIZED) != 0) { - /* - * Mark the SCB has having an outstanding - * task management function. Should the command - * complete normally before the task management - * function can be sent, the host will be - * notified to abort our requeued SCB. - */ - ahd_outb(ahd, SCB_TASK_MANAGEMENT, - scb->hscb->task_management); - } else { - /* - * If non-packetized, set the MK_MESSAGE control - * bit indicating that we desire to send a - * message. We also set the disconnected flag - * since there is no guarantee that our SCB - * control byte matches the version on the - * card. We don't want the sequencer to abort - * the command thinking an unsolicited - * reselection occurred. - */ - scb->hscb->control |= MK_MESSAGE|DISCONNECTED; - - /* - * The sequencer will never re-reference the - * in-core SCB. To make sure we are notified - * during reslection, set the MK_MESSAGE flag in - * the card's copy of the SCB. - */ - ahd_outb(ahd, SCB_CONTROL, - ahd_inb(ahd, SCB_CONTROL)|MK_MESSAGE); - } - - /* - * Clear out any entries in the QINFIFO first - * so we are the next SCB for this target - * to run. - */ - ahd_search_qinfifo(ahd, target, channel, lun, - SCB_LIST_NULL, ROLE_INITIATOR, - CAM_REQUEUE_REQ, SEARCH_COMPLETE); - ahd_qinfifo_requeue_tail(ahd, scb); - ahd_set_scbptr(ahd, active_scbptr); - ahd_print_path(ahd, scb); - printf("Queuing a BDR SCB\n"); - aic_scb_timer_reset(scb, 2 * 1000); - break; - } - } - - /* - * Any remaining SCBs were not the "culprit", so remove - * them from the timeout list. The timer for these commands - * will be reset once the recovery SCB completes. - */ - while ((scb = LIST_FIRST(&ahd->timedout_scbs)) != NULL) { - - LIST_REMOVE(scb, timedout_links); - scb->flags &= ~SCB_TIMEDOUT; + channel = SCB_GET_CHANNEL(ahd, scb); + found = ahd_reset_channel(ahd, channel, /*Initiate Reset*/TRUE); +#ifdef AHD_DEBUG + printf("%s: Issued Channel %c Bus Reset. %d SCBs aborted\n", + ahd_name(ahd), channel, found); +#endif } ahd_unpause(ahd); ahd_unlock(ahd, &s); } -/* - * Re-schedule a timeout for the passed in SCB if we determine that some - * other SCB is in the process of recovery or an SCB with a longer - * timeout is still pending. Limit our search to just "other_scb" - * if it is non-NULL. - */ -int -ahd_other_scb_timeout(struct ahd_softc *ahd, struct scb *scb, - struct scb *other_scb) -{ - u_int newtimeout; - int found; - - ahd_print_path(ahd, scb); - printf("Other SCB Timeout%s", - (scb->flags & SCB_OTHERTCL_TIMEOUT) != 0 - ? " again\n" : "\n"); - - newtimeout = aic_get_timeout(scb); - scb->flags |= SCB_OTHERTCL_TIMEOUT; - found = 0; - if (other_scb != NULL) { - if ((other_scb->flags - & (SCB_OTHERTCL_TIMEOUT|SCB_TIMEDOUT)) == 0 - || (other_scb->flags & SCB_RECOVERY_SCB) != 0) { - found++; - newtimeout = MAX(aic_get_timeout(other_scb), - newtimeout); - } - } else { - LIST_FOREACH(other_scb, &ahd->pending_scbs, pending_links) { - if ((other_scb->flags - & (SCB_OTHERTCL_TIMEOUT|SCB_TIMEDOUT)) == 0 - || (other_scb->flags & SCB_RECOVERY_SCB) != 0) { - found++; - newtimeout = MAX(aic_get_timeout(other_scb), - newtimeout); - } - } - } - - if (found != 0) - aic_scb_timer_reset(scb, newtimeout); - else { - ahd_print_path(ahd, scb); - printf("No other SCB worth waiting for...\n"); - } - - return (found != 0); -} - /**************************** Flexport Logic **********************************/ /* * Read count 16bit words from 16bit word address start_addr from the diff --git a/sys/dev/ic/aic79xx.h b/sys/dev/ic/aic79xx.h index 5b4f3404285..16121fe926f 100644 --- a/sys/dev/ic/aic79xx.h +++ b/sys/dev/ic/aic79xx.h @@ -1,4 +1,4 @@ -/* $OpenBSD: aic79xx.h,v 1.19 2005/11/29 03:12:11 krw Exp $ */ +/* $OpenBSD: aic79xx.h,v 1.20 2005/12/28 02:43:54 krw Exp $ */ /* * Copyright (c) 2004 Milos Urbanek, Kenneth R. Westerback & Marco Peereboom @@ -618,7 +618,6 @@ typedef enum { SCB_DEVICE_RESET = 0x00004, SCB_SENSE = 0x00008, SCB_CDB32_PTR = 0x00010, - SCB_RECOVERY_SCB = 0x00020, SCB_AUTO_NEGOTIATE = 0x00040,/* Negotiate to achieve goal. */ SCB_NEGOTIATE = 0x00080,/* Negotiation forced for command. */ SCB_ABORT = 0x00100, @@ -635,10 +634,6 @@ typedef enum { * don't want to upset the user. This * flag is typically used during DV. */ - SCB_TIMEDOUT = 0x20000/* - * SCB has timed out and is on the - * timedout list. - */ } scb_flag; struct scb { @@ -1486,7 +1481,6 @@ void ahd_handle_scsi_status(struct ahd_softc *ahd, void ahd_calc_residual(struct ahd_softc *ahd, struct scb *scb); void ahd_timeout(void *); -void ahd_recover_commands(struct ahd_softc *ahd); /*************************** Utility Functions ********************************/ struct ahd_phase_table_entry* ahd_lookup_phase_entry(int phase); @@ -1592,6 +1586,5 @@ int ahd_print_register(ahd_reg_parse_entry_t *table, u_int *cur_column, u_int wrap_point); void ahd_dump_scbs(struct ahd_softc *ahd); -void ahd_set_recoveryscb(struct ahd_softc *, struct scb *); #endif /* _AIC79XX_H_ */ diff --git a/sys/dev/ic/aic79xx_openbsd.c b/sys/dev/ic/aic79xx_openbsd.c index f16aaceab30..d973286d26b 100644 --- a/sys/dev/ic/aic79xx_openbsd.c +++ b/sys/dev/ic/aic79xx_openbsd.c @@ -1,4 +1,4 @@ -/* $OpenBSD: aic79xx_openbsd.c,v 1.22 2005/11/02 03:27:39 krw Exp $ */ +/* $OpenBSD: aic79xx_openbsd.c,v 1.23 2005/12/28 02:43:54 krw Exp $ */ /* * Copyright (c) 2004 Milos Urbanek, Kenneth R. Westerback & Marco Peereboom @@ -170,7 +170,6 @@ void ahd_done(struct ahd_softc *ahd, struct scb *scb) { struct scsi_xfer *xs = scb->xs; - struct scb *list_scb; int s; /* XXX in ahc there is some bus_dmamap_sync(PREREAD|PREWRITE); */ @@ -191,36 +190,6 @@ ahd_done(struct ahd_softc *ahd, struct scb *scb) bus_dmamap_unload(ahd->parent_dmat, scb->dmamap); } - /* - * If the recovery SCB completes, we have to be - * out of our timeout. - */ - if ((scb->flags & SCB_RECOVERY_SCB) != 0) { - ahd->scb_data.recovery_scbs--; - - if (aic_get_transaction_status(scb) == CAM_BDR_SENT - || aic_get_transaction_status(scb) == CAM_REQ_ABORTED) - aic_set_transaction_status(scb, CAM_CMD_TIMEOUT); - - if (ahd->scb_data.recovery_scbs == 0) { - /* - * All recovery actions have completed successfully, - * so reinstate the timeouts for all other pending - * commands. - */ - LIST_FOREACH(list_scb, &ahd->pending_scbs, - pending_links) { - if (!(list_scb->xs->flags & SCSI_POLL)) - aic_scb_timer_reset(list_scb, - aic_get_timeout(list_scb)); - } - - ahd_print_path(ahd, scb); - printf("%s: no longer in timeout, status = %x\n", - ahd_name(ahd), aic_get_transaction_status(scb)); - } - } - /* Translate the CAM status code to a SCSI error code. */ switch (xs->error) { case CAM_SCSI_STATUS_ERROR: @@ -790,8 +759,7 @@ aic_platform_scb_free(struct ahd_softc *ahd, struct scb *scb) ahd_lock(ahd, &s); - if ((ahd->flags & AHD_RESOURCE_SHORTAGE) != 0 || - (scb->flags & SCB_RECOVERY_SCB) != 0) { + if ((ahd->flags & AHD_RESOURCE_SHORTAGE) != 0) { ahd->flags &= ~AHD_RESOURCE_SHORTAGE; } @@ -821,25 +789,3 @@ void ahd_platform_flushwork(struct ahd_softc *ahd) { } - -void -ahd_set_recoveryscb(struct ahd_softc *ahd, struct scb *scb) -{ - - if ((scb->flags & SCB_RECOVERY_SCB) == 0) { - struct scb *list_scb; - - scb->flags |= SCB_RECOVERY_SCB; - - AIC_SCB_DATA(ahd)->recovery_scbs++; - - /* - * Go through all of our pending SCBs and remove - * any scheduled timeouts for them. We will reschedule - * them after we've successfully fixed this problem. - */ - LIST_FOREACH(list_scb, &ahd->pending_scbs, pending_links) { - timeout_del(&list_scb->xs->stimeout); - } - } -} -- cgit v1.2.3