diff options
Diffstat (limited to 'sys')
154 files changed, 25747 insertions, 39365 deletions
diff --git a/sys/dev/raidframe/rf_acctrace.c b/sys/dev/raidframe/rf_acctrace.c index 8e3c7a9b26a..65e9365ba23 100644 --- a/sys/dev/raidframe/rf_acctrace.c +++ b/sys/dev/raidframe/rf_acctrace.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_acctrace.c,v 1.1 1999/01/11 14:28:58 niklas Exp $ */ -/* $NetBSD: rf_acctrace.c,v 1.1 1998/11/13 04:20:26 oster Exp $ */ +/* $OpenBSD: rf_acctrace.c,v 1.2 1999/02/16 00:02:21 niklas Exp $ */ +/* $NetBSD: rf_acctrace.c,v 1.3 1999/02/05 00:06:06 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -28,100 +28,17 @@ */ /***************************************************************************** - * + * * acctrace.c -- code to support collecting information about each access * *****************************************************************************/ -/* : - * Log: rf_acctrace.c,v - * Revision 1.29 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.28 1996/07/17 21:00:58 jimz - * clean up timer interface, tracing - * - * Revision 1.27 1996/06/14 14:35:24 jimz - * clean up dfstrace protection - * - * Revision 1.26 1996/06/13 19:09:04 jimz - * remove trace.dat file before beginning - * - * Revision 1.25 1996/06/12 04:41:26 jimz - * tweaks to make genplot work with user-level driver - * (mainly change stat collection) - * - * Revision 1.24 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.23 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.22 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.21 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.20 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.19 1996/05/30 12:59:18 jimz - * make etimer happier, more portable - * - * Revision 1.18 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.17 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.16 1996/05/20 16:15:49 jimz - * switch to rf_{mutex,cond}_{init,destroy} - * - * Revision 1.15 1996/05/18 20:10:00 jimz - * bit of cleanup to compile cleanly in kernel, once again - * - * Revision 1.14 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.13 1995/11/30 16:26:43 wvcii - * added copyright info - * - */ - -#ifdef _KERNEL -#define KERNEL -#endif -#include "rf_threadstuff.h" -#include "rf_types.h" #include <sys/stat.h> #include <sys/types.h> -#ifdef KERNEL -#if !defined(__NetBSD__) && !defined(__OpenBSD__) -#include <dfstrace.h> -#endif /* !__NetBSD__ && !__OpenBSD__ */ -#if DFSTRACE > 0 -#include <sys/dfs_log.h> -#include <sys/dfstracebuf.h> -#endif /* DFSTRACE > 0 */ -#endif /* KERNEL */ - +#include "rf_threadstuff.h" +#include "rf_types.h" #include "rf_debugMem.h" #include "rf_acctrace.h" #include "rf_general.h" @@ -136,104 +53,69 @@ static int accessTraceBufCount = 0; static RF_AccTraceEntry_t *access_tracebuf; static long traceCount; -int rf_stopCollectingTraces; +int rf_stopCollectingTraces; RF_DECLARE_MUTEX(rf_tracing_mutex) -int rf_trace_fd; + int rf_trace_fd; -static void rf_ShutdownAccessTrace(void *); + static void rf_ShutdownAccessTrace(void *); -static void rf_ShutdownAccessTrace(ignored) - void *ignored; + static void rf_ShutdownAccessTrace(ignored) + void *ignored; { - if (rf_accessTraceBufSize) { - if (accessTraceBufCount) rf_FlushAccessTraceBuf(); -#ifndef KERNEL - close(rf_trace_fd); -#endif /* !KERNEL */ - RF_Free(access_tracebuf, rf_accessTraceBufSize * sizeof(RF_AccTraceEntry_t)); - } - rf_mutex_destroy(&rf_tracing_mutex); -#if defined(KERNEL) && DFSTRACE > 0 - printf("RAIDFRAME: %d trace entries were sent to dfstrace\n",traceCount); -#endif /* KERNEL && DFSTRACE > 0 */ + if (rf_accessTraceBufSize) { + if (accessTraceBufCount) + rf_FlushAccessTraceBuf(); + RF_Free(access_tracebuf, rf_accessTraceBufSize * sizeof(RF_AccTraceEntry_t)); + } + rf_mutex_destroy(&rf_tracing_mutex); } -int rf_ConfigureAccessTrace(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureAccessTrace(listp) + RF_ShutdownList_t **listp; { - int rc; + int rc; - numTracesSoFar = accessTraceBufCount = rf_stopCollectingTraces = 0; - if (rf_accessTraceBufSize) { - RF_Malloc(access_tracebuf, rf_accessTraceBufSize * sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); - accessTraceBufCount = 0; -#ifndef KERNEL - rc = unlink("trace.dat"); - if (rc && (errno != ENOENT)) { - perror("unlink"); - RF_ERRORMSG("Unable to remove existing trace.dat\n"); - return(errno); - } - if ((rf_trace_fd = open("trace.dat",O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) < 0 ) { - perror("Unable to open trace.dat for output"); - return(errno); - } -#endif /* !KERNEL */ - } - traceCount = 0; - numTracesSoFar = 0; - rc = rf_mutex_init(&rf_tracing_mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - } - rc = rf_ShutdownCreate(listp, rf_ShutdownAccessTrace, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - if (rf_accessTraceBufSize) { - RF_Free(access_tracebuf, rf_accessTraceBufSize * sizeof(RF_AccTraceEntry_t)); -#ifndef KERNEL - close(rf_trace_fd); -#endif /* !KERNEL */ - rf_mutex_destroy(&rf_tracing_mutex); - } - } - return(rc); + numTracesSoFar = accessTraceBufCount = rf_stopCollectingTraces = 0; + if (rf_accessTraceBufSize) { + RF_Malloc(access_tracebuf, rf_accessTraceBufSize * sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); + accessTraceBufCount = 0; + } + traceCount = 0; + numTracesSoFar = 0; + rc = rf_mutex_init(&rf_tracing_mutex); + if (rc) { + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + } + rc = rf_ShutdownCreate(listp, rf_ShutdownAccessTrace, NULL); + if (rc) { + RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + if (rf_accessTraceBufSize) { + RF_Free(access_tracebuf, rf_accessTraceBufSize * sizeof(RF_AccTraceEntry_t)); + rf_mutex_destroy(&rf_tracing_mutex); + } + } + return (rc); } - /* install a trace record. cause a flush to disk or to the trace collector daemon * if the trace buffer is at least 1/2 full. */ -void rf_LogTraceRec(raid, rec) - RF_Raid_t *raid; - RF_AccTraceEntry_t *rec; +void +rf_LogTraceRec(raid, rec) + RF_Raid_t *raid; + RF_AccTraceEntry_t *rec; { RF_AccTotals_t *acc = &raid->acc_totals; #if 0 RF_Etimer_t timer; - int i, n; + int i, n; #endif - if (rf_stopCollectingTraces || ((rf_maxNumTraces >= 0) && (numTracesSoFar >= rf_maxNumTraces))) - return; + if (rf_stopCollectingTraces || ((rf_maxNumTraces >= 0) && (numTracesSoFar >= rf_maxNumTraces))) + return; -#ifndef KERNEL - if (rf_accessTraceBufSize) { - RF_LOCK_MUTEX(rf_tracing_mutex); - numTracesSoFar++; - bcopy((char *)rec, (char *)&access_tracebuf[ accessTraceBufCount++ ], sizeof(RF_AccTraceEntry_t)); - if (accessTraceBufCount == rf_accessTraceBufSize) - rf_FlushAccessTraceBuf(); - RF_UNLOCK_MUTEX(rf_tracing_mutex); - } -#endif /* !KERNEL */ -#if defined(KERNEL) && DFSTRACE > 0 - rec->index = traceCount++; - if (traceon & DFS_TRACE_RAIDFRAME) { - dfs_log(DFS_NOTE, (char *) rec, (int) sizeof(*rec), 0); - } -#endif /* KERNEL && DFSTRACE > 0 */ /* update AccTotals for this device */ if (!raid->keep_acc_totals) return; @@ -246,11 +128,11 @@ void rf_LogTraceRec(raid, rec) acc->recon_phys_io_us += rec->phys_io_us; acc->recon_diskwait_us += rec->diskwait_us; acc->recon_reccount++; - } - else { + } else { RF_HIST_ADD(acc->tot_hist, rec->total_us); RF_HIST_ADD(acc->dw_hist, rec->diskwait_us); - /* count of physical ios which are too big. often due to thermal recalibration */ + /* count of physical ios which are too big. often due to + * thermal recalibration */ /* if bigvals > 0, you should probably ignore this data set */ if (rec->diskwait_us > 100000) acc->bigvals++; @@ -279,17 +161,8 @@ void rf_LogTraceRec(raid, rec) * from interrupt context, we don't do any copyouts here, but rather just wake trace * buffer collector thread. */ -void rf_FlushAccessTraceBuf() +void +rf_FlushAccessTraceBuf() { -#ifndef KERNEL - int size = accessTraceBufCount * sizeof(RF_AccTraceEntry_t); - - if (write(rf_trace_fd, (char *) access_tracebuf, size) < size ) { - fprintf(stderr, "Unable to write traces to file. tracing disabled\n"); - RF_Free(access_tracebuf, rf_accessTraceBufSize * sizeof(RF_AccTraceEntry_t)); - rf_accessTraceBufSize = 0; - close(rf_trace_fd); - } -#endif /* !KERNEL */ - accessTraceBufCount = 0; + accessTraceBufCount = 0; } diff --git a/sys/dev/raidframe/rf_acctrace.h b/sys/dev/raidframe/rf_acctrace.h index 0b3441e3e49..f7ca09eb173 100644 --- a/sys/dev/raidframe/rf_acctrace.h +++ b/sys/dev/raidframe/rf_acctrace.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_acctrace.h,v 1.1 1999/01/11 14:28:58 niklas Exp $ */ -/* $NetBSD: rf_acctrace.h,v 1.1 1998/11/13 04:20:26 oster Exp $ */ +/* $OpenBSD: rf_acctrace.h,v 1.2 1999/02/16 00:02:22 niklas Exp $ */ +/* $NetBSD: rf_acctrace.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -28,81 +28,11 @@ */ /***************************************************************************** - * + * * acctrace.h -- header file for acctrace.c * *****************************************************************************/ -/* : - * - * Log: rf_acctrace.h,v - * Revision 1.32 1996/08/02 15:12:38 jimz - * remove dead code - * - * Revision 1.31 1996/07/27 14:34:39 jimz - * remove bogus semicolon - * - * Revision 1.30 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.29 1996/07/17 21:00:58 jimz - * clean up timer interface, tracing - * - * Revision 1.28 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.27 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * / - * - * Revision 1.26 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.25 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.24 1996/05/30 12:59:18 jimz - * make etimer happier, more portable - * - * Revision 1.23 1996/05/28 12:34:30 jimz - * nail down size of reconacc - * - * Revision 1.22 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.21 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.20 1996/05/02 14:57:24 jimz - * change to boolean_t - * - * Revision 1.19 1995/12/14 18:37:06 jimz - * convert to rf_types.h types - * - * Revision 1.18 1995/11/30 16:26:49 wvcii - * added copyright info - * - * Revision 1.17 1995/09/30 19:49:23 jimz - * add AccTotals structure, for capturing totals in kernel - * - * Revision 1.16 1995/09/12 00:20:55 wvcii - * added support for tracing disk queue time - * - * Revision 1.15 95/09/06 19:23:12 wvcii - * increased MAX_IOS_PER_TRACE_ENTRY from 1 to 4 - * - */ #ifndef _RF__RF_ACCTRACE_H_ #define _RF__RF_ACCTRACE_H_ @@ -112,85 +42,93 @@ #include "rf_etimer.h" typedef struct RF_user_acc_stats_s { - RF_uint64 suspend_ovhd_us; /* us spent mucking in the access-suspension code */ - RF_uint64 map_us; /* us spent mapping the access */ - RF_uint64 lock_us; /* us spent locking & unlocking stripes, including time spent blocked */ - RF_uint64 dag_create_us; /* us spent creating the DAGs */ - RF_uint64 dag_retry_us; /* _total_ us spent retrying the op -- not broken down into components */ - RF_uint64 exec_us; /* us spent in DispatchDAG */ - RF_uint64 exec_engine_us; /* us spent in engine, not including blocking time */ - RF_uint64 cleanup_us; /* us spent tearing down the dag & maps, and generally cleaning up */ -} RF_user_acc_stats_t; + RF_uint64 suspend_ovhd_us; /* us spent mucking in the + * access-suspension code */ + RF_uint64 map_us; /* us spent mapping the access */ + RF_uint64 lock_us; /* us spent locking & unlocking stripes, + * including time spent blocked */ + RF_uint64 dag_create_us;/* us spent creating the DAGs */ + RF_uint64 dag_retry_us; /* _total_ us spent retrying the op -- not + * broken down into components */ + RF_uint64 exec_us; /* us spent in DispatchDAG */ + RF_uint64 exec_engine_us; /* us spent in engine, not including + * blocking time */ + RF_uint64 cleanup_us; /* us spent tearing down the dag & maps, and + * generally cleaning up */ +} RF_user_acc_stats_t; typedef struct RF_recon_acc_stats_s { - RF_uint32 recon_start_to_fetch_us; - RF_uint32 recon_fetch_to_return_us; - RF_uint32 recon_return_to_submit_us; -} RF_recon_acc_stats_t; + RF_uint32 recon_start_to_fetch_us; + RF_uint32 recon_fetch_to_return_us; + RF_uint32 recon_return_to_submit_us; +} RF_recon_acc_stats_t; typedef struct RF_acctrace_entry_s { - union { - RF_user_acc_stats_t user; - RF_recon_acc_stats_t recon; - } specific; - RF_uint8 reconacc; /* whether this is a tracerec for a user acc or a recon acc */ - RF_uint64 xor_us; /* us spent doing XORs */ - RF_uint64 q_us; /* us spent doing XORs */ - RF_uint64 plog_us; /* us spent waiting to stuff parity into log */ - RF_uint64 diskqueue_us; /* _total_ us spent in disk queue(s), incl concurrent ops */ - RF_uint64 diskwait_us; /* _total_ us spent waiting actually waiting on the disk, incl concurrent ops */ - RF_uint64 total_us; /* total us spent on this access */ - RF_uint64 num_phys_ios; /* number of physical I/Os invoked */ - RF_uint64 phys_io_us; /* time of physical I/O */ - RF_Etimer_t tot_timer; /* a timer used to compute total access time */ - RF_Etimer_t timer; /* a generic timer val for timing events that live across procedure boundaries */ - RF_Etimer_t recon_timer; /* generic timer for recon stuff */ - RF_uint64 index; -} RF_AccTraceEntry_t; + union { + RF_user_acc_stats_t user; + RF_recon_acc_stats_t recon; + } specific; + RF_uint8 reconacc; /* whether this is a tracerec for a user acc + * or a recon acc */ + RF_uint64 xor_us; /* us spent doing XORs */ + RF_uint64 q_us; /* us spent doing XORs */ + RF_uint64 plog_us; /* us spent waiting to stuff parity into log */ + RF_uint64 diskqueue_us; /* _total_ us spent in disk queue(s), incl + * concurrent ops */ + RF_uint64 diskwait_us; /* _total_ us spent waiting actually waiting + * on the disk, incl concurrent ops */ + RF_uint64 total_us; /* total us spent on this access */ + RF_uint64 num_phys_ios; /* number of physical I/Os invoked */ + RF_uint64 phys_io_us; /* time of physical I/O */ + RF_Etimer_t tot_timer; /* a timer used to compute total access time */ + RF_Etimer_t timer; /* a generic timer val for timing events that + * live across procedure boundaries */ + RF_Etimer_t recon_timer;/* generic timer for recon stuff */ + RF_uint64 index; +} RF_AccTraceEntry_t; typedef struct RF_AccTotals_s { /* user acc stats */ - RF_uint64 suspend_ovhd_us; - RF_uint64 map_us; - RF_uint64 lock_us; - RF_uint64 dag_create_us; - RF_uint64 dag_retry_us; - RF_uint64 exec_us; - RF_uint64 exec_engine_us; - RF_uint64 cleanup_us; - RF_uint64 user_reccount; + RF_uint64 suspend_ovhd_us; + RF_uint64 map_us; + RF_uint64 lock_us; + RF_uint64 dag_create_us; + RF_uint64 dag_retry_us; + RF_uint64 exec_us; + RF_uint64 exec_engine_us; + RF_uint64 cleanup_us; + RF_uint64 user_reccount; /* recon acc stats */ - RF_uint64 recon_start_to_fetch_us; - RF_uint64 recon_fetch_to_return_us; - RF_uint64 recon_return_to_submit_us; - RF_uint64 recon_io_overflow_count; - RF_uint64 recon_phys_io_us; - RF_uint64 recon_num_phys_ios; - RF_uint64 recon_diskwait_us; - RF_uint64 recon_reccount; + RF_uint64 recon_start_to_fetch_us; + RF_uint64 recon_fetch_to_return_us; + RF_uint64 recon_return_to_submit_us; + RF_uint64 recon_io_overflow_count; + RF_uint64 recon_phys_io_us; + RF_uint64 recon_num_phys_ios; + RF_uint64 recon_diskwait_us; + RF_uint64 recon_reccount; /* trace entry stats */ - RF_uint64 xor_us; - RF_uint64 q_us; - RF_uint64 plog_us; - RF_uint64 diskqueue_us; - RF_uint64 diskwait_us; - RF_uint64 total_us; - RF_uint64 num_log_ents; - RF_uint64 phys_io_overflow_count; - RF_uint64 num_phys_ios; - RF_uint64 phys_io_us; - RF_uint64 bigvals; + RF_uint64 xor_us; + RF_uint64 q_us; + RF_uint64 plog_us; + RF_uint64 diskqueue_us; + RF_uint64 diskwait_us; + RF_uint64 total_us; + RF_uint64 num_log_ents; + RF_uint64 phys_io_overflow_count; + RF_uint64 num_phys_ios; + RF_uint64 phys_io_us; + RF_uint64 bigvals; /* histograms */ RF_Hist_t dw_hist[RF_HIST_NUM_BUCKETS]; RF_Hist_t tot_hist[RF_HIST_NUM_BUCKETS]; -} RF_AccTotals_t; - +} RF_AccTotals_t; #if RF_UTILITY == 0 RF_DECLARE_EXTERN_MUTEX(rf_tracing_mutex) -#endif /* RF_UTILITY == 0 */ +#endif /* RF_UTILITY == 0 */ -int rf_ConfigureAccessTrace(RF_ShutdownList_t **listp); -void rf_LogTraceRec(RF_Raid_t *raid, RF_AccTraceEntry_t *rec); -void rf_FlushAccessTraceBuf(void); + int rf_ConfigureAccessTrace(RF_ShutdownList_t ** listp); + void rf_LogTraceRec(RF_Raid_t * raid, RF_AccTraceEntry_t * rec); + void rf_FlushAccessTraceBuf(void); -#endif /* !_RF__RF_ACCTRACE_H_ */ +#endif /* !_RF__RF_ACCTRACE_H_ */ diff --git a/sys/dev/raidframe/rf_alloclist.c b/sys/dev/raidframe/rf_alloclist.c index 5f0de4a4070..26b4e135634 100644 --- a/sys/dev/raidframe/rf_alloclist.c +++ b/sys/dev/raidframe/rf_alloclist.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_alloclist.c,v 1.1 1999/01/11 14:28:58 niklas Exp $ */ -/* $NetBSD: rf_alloclist.c,v 1.1 1998/11/13 04:20:26 oster Exp $ */ +/* $OpenBSD: rf_alloclist.c,v 1.2 1999/02/16 00:02:23 niklas Exp $ */ +/* $NetBSD: rf_alloclist.c,v 1.3 1999/02/05 00:06:06 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,106 +27,6 @@ * rights to redistribute these changes. */ -/* - * Log: rf_alloclist.c,v - * Revision 1.28 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.27 1996/06/12 03:29:54 jimz - * don't barf just because we can't create an alloclist - * - * Revision 1.26 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.25 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.24 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.23 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.22 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.21 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.20 1996/05/20 16:15:59 jimz - * switch to rf_{mutex,cond}_{init,destroy} - * - * Revision 1.19 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.18 1996/05/16 22:27:45 jimz - * get rid of surreal_MakeAllocList (what was that, anyway?) - * - * Revision 1.17 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.16 1995/11/30 16:27:07 wvcii - * added copyright info - * - * Revision 1.15 1995/10/05 20:37:56 jimz - * assert non-NULLness of pointer to FREE in FreeAllocList() - * - * Revision 1.14 1995/06/11 20:11:24 holland - * changed fl_hist,miss_count from long to int to get around weird kernel bug - * - * Revision 1.13 1995/05/01 13:28:00 holland - * parity range locks, locking disk requests, recon+parityscan in kernel, etc. - * - * Revision 1.12 1995/04/21 19:13:04 holland - * minor change to avoid a syntax error on DO_FREE - * - * Revision 1.11 1995/02/17 19:39:56 holland - * added size param to all calls to Free(). - * this is ignored at user level, but necessary in the kernel. - * - * Revision 1.10 1995/02/10 18:08:07 holland - * added DO_FREE macro to fix what I broke during kernelization - * - * Revision 1.9 1995/02/10 17:34:10 holland - * kernelization changes - * - * Revision 1.8 1995/02/03 22:31:36 holland - * many changes related to kernelization - * - * Revision 1.7 1995/02/01 15:13:05 holland - * moved #include of general.h out of raid.h and into each file - * - * Revision 1.6 1995/01/11 19:27:02 holland - * many changes related to performance tuning - * - * Revision 1.5 1994/11/29 20:53:10 danner - * Marks mods - * - * Revision 1.3 1994/11/19 21:01:07 danner - * First merge with mark - * - * Revision 1.1.1.1 1994/11/19 20:23:38 danner - * First PQ checkin - * - * Revision 1.2 1994/11/16 15:45:35 danner - * fixed free bug in FreeAllocList - * - * - */ - /**************************************************************************** * * Alloclist.c -- code to manipulate allocation lists @@ -147,60 +47,57 @@ #include "rf_sys.h" RF_DECLARE_STATIC_MUTEX(alist_mutex) -static unsigned int fl_hit_count, fl_miss_count; + static unsigned int fl_hit_count, fl_miss_count; -static RF_AllocListElem_t *al_free_list=NULL; -static int al_free_list_count; + static RF_AllocListElem_t *al_free_list = NULL; + static int al_free_list_count; #define RF_AL_FREELIST_MAX 256 -#ifndef KERNEL -#define DO_FREE(_p,_sz) free((_p)) -#else /* !KERNEL */ #define DO_FREE(_p,_sz) RF_Free((_p),(_sz)) -#endif /* !KERNEL */ -static void rf_ShutdownAllocList(void *); + static void rf_ShutdownAllocList(void *); -static void rf_ShutdownAllocList(ignored) - void *ignored; + static void rf_ShutdownAllocList(ignored) + void *ignored; { - RF_AllocListElem_t *p, *pt; - - for (p = al_free_list; p; ) { - pt = p; - p = p->next; - DO_FREE(pt, sizeof(*pt)); - } - rf_mutex_destroy(&alist_mutex); - /* - printf("Alloclist: Free list hit count %lu (%lu %%) miss count %lu (%lu %%)\n", - fl_hit_count, (100*fl_hit_count)/(fl_hit_count+fl_miss_count), - fl_miss_count, (100*fl_miss_count)/(fl_hit_count+fl_miss_count)); - */ + RF_AllocListElem_t *p, *pt; + + for (p = al_free_list; p;) { + pt = p; + p = p->next; + DO_FREE(pt, sizeof(*pt)); + } + rf_mutex_destroy(&alist_mutex); + /* + printf("Alloclist: Free list hit count %lu (%lu %%) miss count %lu (%lu %%)\n", + fl_hit_count, (100*fl_hit_count)/(fl_hit_count+fl_miss_count), + fl_miss_count, (100*fl_miss_count)/(fl_hit_count+fl_miss_count)); + */ } -int rf_ConfigureAllocList(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureAllocList(listp) + RF_ShutdownList_t **listp; { - int rc; - - rc = rf_mutex_init(&alist_mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return(rc); - } - al_free_list = NULL; - fl_hit_count = fl_miss_count = al_free_list_count = 0; - rc = rf_ShutdownCreate(listp, rf_ShutdownAllocList, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - rf_mutex_destroy(&alist_mutex); - return(rc); - } - return(0); + int rc; + + rc = rf_mutex_init(&alist_mutex); + if (rc) { + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + return (rc); + } + al_free_list = NULL; + fl_hit_count = fl_miss_count = al_free_list_count = 0; + rc = rf_ShutdownCreate(listp, rf_ShutdownAllocList, NULL); + if (rc) { + RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", + __FILE__, __LINE__, rc); + rf_mutex_destroy(&alist_mutex); + return (rc); + } + return (0); } @@ -208,26 +105,27 @@ int rf_ConfigureAllocList(listp) * to search for the end. If you ever observe the lists growing longer, * increase POINTERS_PER_ALLOC_LIST_ELEMENT. */ -void rf_real_AddToAllocList(l, p, size, lockflag) - RF_AllocListElem_t *l; - void *p; - int size; - int lockflag; +void +rf_real_AddToAllocList(l, p, size, lockflag) + RF_AllocListElem_t *l; + void *p; + int size; + int lockflag; { - RF_AllocListElem_t *newelem; + RF_AllocListElem_t *newelem; + + for (; l->next; l = l->next) + RF_ASSERT(l->numPointers == RF_POINTERS_PER_ALLOC_LIST_ELEMENT); /* find end of list */ - for ( ; l->next; l=l->next) - RF_ASSERT(l->numPointers == RF_POINTERS_PER_ALLOC_LIST_ELEMENT); /* find end of list */ - - RF_ASSERT(l->numPointers >= 0 && l->numPointers <= RF_POINTERS_PER_ALLOC_LIST_ELEMENT); - if (l->numPointers == RF_POINTERS_PER_ALLOC_LIST_ELEMENT) { - newelem = rf_real_MakeAllocList(lockflag); - l->next = newelem; - l = newelem; - } - l->pointers[ l->numPointers ] = p; - l->sizes [ l->numPointers ] = size; - l->numPointers++; + RF_ASSERT(l->numPointers >= 0 && l->numPointers <= RF_POINTERS_PER_ALLOC_LIST_ELEMENT); + if (l->numPointers == RF_POINTERS_PER_ALLOC_LIST_ELEMENT) { + newelem = rf_real_MakeAllocList(lockflag); + l->next = newelem; + l = newelem; + } + l->pointers[l->numPointers] = p; + l->sizes[l->numPointers] = size; + l->numPointers++; } @@ -239,56 +137,53 @@ void rf_real_AddToAllocList(l, p, size, lockflag) * as the lock around the al_free_list. Note that we can't call Free with the * debug_mem_mutex locked. */ -void rf_FreeAllocList(l) - RF_AllocListElem_t *l; +void +rf_FreeAllocList(l) + RF_AllocListElem_t *l; { - int i; - RF_AllocListElem_t *temp, *p; - - for (p=l; p; p=p->next) { - RF_ASSERT(p->numPointers >= 0 && p->numPointers <= RF_POINTERS_PER_ALLOC_LIST_ELEMENT); - for (i=0; i<p->numPointers; i++) { - RF_ASSERT(p->pointers[i]); - RF_Free(p->pointers[i], p->sizes[i]); - } - } -#ifndef KERNEL - RF_LOCK_MUTEX(rf_debug_mem_mutex); -#endif /* !KERNEL */ - while (l) { - temp = l; - l = l->next; - if (al_free_list_count > RF_AL_FREELIST_MAX) {DO_FREE(temp, sizeof(*temp));} - else {temp->next = al_free_list; al_free_list = temp; al_free_list_count++;} - } -#ifndef KERNEL - RF_UNLOCK_MUTEX(rf_debug_mem_mutex); -#endif /* !KERNEL */ + int i; + RF_AllocListElem_t *temp, *p; + + for (p = l; p; p = p->next) { + RF_ASSERT(p->numPointers >= 0 && p->numPointers <= RF_POINTERS_PER_ALLOC_LIST_ELEMENT); + for (i = 0; i < p->numPointers; i++) { + RF_ASSERT(p->pointers[i]); + RF_Free(p->pointers[i], p->sizes[i]); + } + } + while (l) { + temp = l; + l = l->next; + if (al_free_list_count > RF_AL_FREELIST_MAX) { + DO_FREE(temp, sizeof(*temp)); + } else { + temp->next = al_free_list; + al_free_list = temp; + al_free_list_count++; + } + } } -RF_AllocListElem_t *rf_real_MakeAllocList(lockflag) - int lockflag; +RF_AllocListElem_t * +rf_real_MakeAllocList(lockflag) + int lockflag; { - RF_AllocListElem_t *p; - -#ifndef KERNEL - if (lockflag) { RF_LOCK_MUTEX(rf_debug_mem_mutex); } -#endif /* !KERNEL */ - if (al_free_list) {fl_hit_count++; p = al_free_list; al_free_list = p->next; al_free_list_count--;} - else { - fl_miss_count++; -#ifndef KERNEL - p = (RF_AllocListElem_t *) malloc(sizeof(RF_AllocListElem_t)); /* can't use Malloc at user level b/c we already locked the mutex */ -#else /* !KERNEL */ - RF_Malloc(p, sizeof(RF_AllocListElem_t), (RF_AllocListElem_t *)); /* no allocation locking in kernel, so this is fine */ -#endif /* !KERNEL */ - } -#ifndef KERNEL - if (lockflag) { RF_UNLOCK_MUTEX(rf_debug_mem_mutex); } -#endif /* !KERNEL */ - if (p == NULL) { - return(NULL); - } - bzero((char *)p, sizeof(RF_AllocListElem_t)); - return(p); + RF_AllocListElem_t *p; + + if (al_free_list) { + fl_hit_count++; + p = al_free_list; + al_free_list = p->next; + al_free_list_count--; + } else { + fl_miss_count++; + RF_Malloc(p, sizeof(RF_AllocListElem_t), (RF_AllocListElem_t *)); /* no allocation locking + * in kernel, so this is + * fine */ + } + if (p == NULL) { + return (NULL); + } + bzero((char *) p, sizeof(RF_AllocListElem_t)); + return (p); } diff --git a/sys/dev/raidframe/rf_alloclist.h b/sys/dev/raidframe/rf_alloclist.h index b33f7a46e8b..8426b1cd7fd 100644 --- a/sys/dev/raidframe/rf_alloclist.h +++ b/sys/dev/raidframe/rf_alloclist.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_alloclist.h,v 1.1 1999/01/11 14:28:59 niklas Exp $ */ -/* $NetBSD: rf_alloclist.h,v 1.1 1998/11/13 04:20:26 oster Exp $ */ +/* $OpenBSD: rf_alloclist.h,v 1.2 1999/02/16 00:02:23 niklas Exp $ */ +/* $NetBSD: rf_alloclist.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -33,29 +33,6 @@ * ***************************************************************************/ -/* : - * Log: rf_alloclist.h,v - * Revision 1.11 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.10 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.9 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.8 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.7 1995/11/30 16:27:13 wvcii - * added copyright info - * - */ - #ifndef _RF__RF_ALLOCLIST_H_ #define _RF__RF_ALLOCLIST_H_ @@ -64,21 +41,20 @@ #define RF_POINTERS_PER_ALLOC_LIST_ELEMENT 20 struct RF_AllocListElem_s { - void *pointers[RF_POINTERS_PER_ALLOC_LIST_ELEMENT]; - int sizes[RF_POINTERS_PER_ALLOC_LIST_ELEMENT]; - int numPointers; - RF_AllocListElem_t *next; + void *pointers[RF_POINTERS_PER_ALLOC_LIST_ELEMENT]; + int sizes[RF_POINTERS_PER_ALLOC_LIST_ELEMENT]; + int numPointers; + RF_AllocListElem_t *next; }; - #define rf_MakeAllocList(_ptr_) _ptr_ = rf_real_MakeAllocList(1); #define rf_AddToAllocList(_l_,_ptr_,_sz_) rf_real_AddToAllocList((_l_), (_ptr_), (_sz_), 1) -int rf_ConfigureAllocList(RF_ShutdownList_t **listp); +int rf_ConfigureAllocList(RF_ShutdownList_t ** listp); #if RF_UTILITY == 0 -void rf_real_AddToAllocList(RF_AllocListElem_t *l, void *p, int size, int lockflag); -void rf_FreeAllocList(RF_AllocListElem_t *l); +void rf_real_AddToAllocList(RF_AllocListElem_t * l, void *p, int size, int lockflag); +void rf_FreeAllocList(RF_AllocListElem_t * l); RF_AllocListElem_t *rf_real_MakeAllocList(int lockflag); -#endif /* RF_UTILITY == 0 */ +#endif /* RF_UTILITY == 0 */ -#endif /* !_RF__RF_ALLOCLIST_H_ */ +#endif /* !_RF__RF_ALLOCLIST_H_ */ diff --git a/sys/dev/raidframe/rf_archs.h b/sys/dev/raidframe/rf_archs.h index 6a4850829ce..7eb07bf39e1 100644 --- a/sys/dev/raidframe/rf_archs.h +++ b/sys/dev/raidframe/rf_archs.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_archs.h,v 1.1 1999/01/11 14:28:59 niklas Exp $ */ -/* $NetBSD: rf_archs.h,v 1.1 1998/11/13 04:20:26 oster Exp $ */ +/* $OpenBSD: rf_archs.h,v 1.2 1999/02/16 00:02:23 niklas Exp $ */ +/* $NetBSD: rf_archs.h,v 1.4 1999/02/05 00:06:06 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -44,112 +44,6 @@ * the list below includes all the modules that can be compiled * out. * - * : - * Log: rf_archs.h,v - * Revision 1.32 1996/08/20 23:05:40 jimz - * define RF_KEEP_DISKSTATS to 1 - * - * Revision 1.31 1996/07/31 15:34:04 jimz - * include evenodd - * - * Revision 1.30 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.29 1996/07/26 20:11:46 jimz - * only define RF_DEMO for CMU_PDL - * - * Revision 1.28 1996/07/26 20:10:57 jimz - * define RF_CMU_PDL only if it isn't already defined - * - * Revision 1.27 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.26 1996/06/17 14:38:33 jimz - * properly #if out RF_DEMO code - * fix bug in MakeConfig that was causing weird behavior - * in configuration routines (config was not zeroed at start) - * clean up genplot handling of stacks - * - * Revision 1.25 1996/06/14 21:24:59 jimz - * turn on RF_CMU_PDL by default - * - * Revision 1.24 1996/06/13 20:41:57 jimz - * add RF_INCLUDE_QUEUE_RANDOM (0) - * - * Revision 1.23 1996/06/11 18:12:36 jimz - * get rid of JOIN operations - * use ThreadGroup stuff instead - * fix some allocation/deallocation and sync bugs - * - * Revision 1.22 1996/06/10 22:24:55 wvcii - * added symbols for enabling forward or backward error - * recovery experiments - * - * Revision 1.21 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.20 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.19 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.18 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.17 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.16 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.15 1996/05/15 22:32:59 jimz - * remove cache and vs stuff - * - * Revision 1.14 1995/11/30 16:27:34 wvcii - * added copyright info - * - * Revision 1.13 1995/11/28 21:23:44 amiri - * added the interleaved declustering architecture - * ('I'), with distributed sparing. - * - * Revision 1.12 1995/11/17 16:59:45 amiri - * don't INCLUDE_CHAINDECLUSTER in the kernel - * source. - * - * Revision 1.11 1995/11/16 16:15:21 amiri - * don't include RAID5 with rotated sparing (INCLUDE_RAID5_RS) in kernel - * - * Revision 1.10 1995/10/12 17:40:47 jimz - * define INCLUDE_LS - * - * Revision 1.9 1995/10/11 06:56:47 jimz - * define INCLUDE_VS (sanity check for compilation) - * - * Revision 1.8 1995/10/05 18:56:24 jimz - * don't INCLUDE_VS - * - * Revision 1.7 1995/10/04 03:51:20 wvcii - * added raid 1 - * - * Revision 1.6 1995/09/07 09:59:29 wvcii - * unstable archs conditionally defined for !KERNEL makes - * - * */ #ifndef _RF__RF_ARCHS_H_ @@ -161,17 +55,12 @@ */ #ifndef RF_CMU_PDL #define RF_CMU_PDL 0 -#endif /* !RF_CMU_PDL */ +#endif /* !RF_CMU_PDL */ /* * Khalil's performance-displaying demo stuff. * Relies on CMU meter tools. */ -#ifndef KERNEL -#if RF_CMU_PDL > 0 -#define RF_DEMO 1 -#endif /* RF_CMU_PDL > 0 */ -#endif /* !KERNEL */ #define RF_INCLUDE_EVENODD 1 @@ -208,4 +97,4 @@ #include "rf_options.h" -#endif /* !_RF__RF_ARCHS_H_ */ +#endif /* !_RF__RF_ARCHS_H_ */ diff --git a/sys/dev/raidframe/rf_aselect.c b/sys/dev/raidframe/rf_aselect.c index f6a1918b7a5..b50be26171b 100644 --- a/sys/dev/raidframe/rf_aselect.c +++ b/sys/dev/raidframe/rf_aselect.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_aselect.c,v 1.1 1999/01/11 14:28:59 niklas Exp $ */ -/* $NetBSD: rf_aselect.c,v 1.1 1998/11/13 04:20:26 oster Exp $ */ +/* $OpenBSD: rf_aselect.c,v 1.2 1999/02/16 00:02:23 niklas Exp $ */ +/* $NetBSD: rf_aselect.c,v 1.3 1999/02/05 00:06:06 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -30,110 +30,9 @@ /***************************************************************************** * * aselect.c -- algorithm selection code - * - *****************************************************************************/ -/* - * : - * Log: rf_aselect.c,v - * Revision 1.35 1996/07/28 20:31:39 jimz - * i386netbsd port - * true/false fixup - * - * Revision 1.34 1996/07/27 18:39:39 jimz - * cleanup sweep - * - * Revision 1.33 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.32 1996/06/12 03:29:40 jimz - * Note: things that call InitHdrNode should check - * for successful return. - * - * Revision 1.31 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.30 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.29 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.28 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.27 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.26 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.25 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.24 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.23 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.22 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera * - * Revision 1.21 1996/05/08 21:01:24 jimz - * fixed up enum type names that were conflicting with other - * enums and function names (ie, "panic") - * future naming trends will be towards RF_ and rf_ for - * everything raidframe-related - * - * Revision 1.20 1996/05/03 19:45:35 wvcii - * removed includes of old deg creation files - * updated SelectAlgorithm comments - * - * Revision 1.19 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.18 1995/11/30 16:27:48 wvcii - * added copyright info - * - * Revision 1.17 1995/11/19 16:25:55 wvcii - * SelectAlgorithm now creates an array, returned in desc->dagArray - * return value is now int (1 = FAIL) - * - * Revision 1.16 1995/11/17 15:09:58 wvcii - * fixed bug in SelectAlgorithm in which multiple graphs per stripe are required - * - * Revision 1.15 1995/11/07 17:12:42 wvcii - * changed SelectAlgorithm as follows: - * - * dag creation funcs now create term nodes - * dag selection funcs no longer return numHdrSucc, numTermAnt - * there is now one dag hdr for each dag in a request, implying - * that SelectAlgorithm now returns a linked list of dag hdrs - * - */ + *****************************************************************************/ + #include "rf_archs.h" #include "rf_types.h" @@ -153,7 +52,7 @@ static void TransferDagMemory(RF_DagHeader_t *, RF_DagHeader_t *); static int InitHdrNode(RF_DagHeader_t **, RF_Raid_t *, int); static void UpdateNodeHdrPtr(RF_DagHeader_t *, RF_DagNode_t *); -int rf_SelectAlgorithm(RF_RaidAccessDesc_t *, RF_RaidAccessFlags_t ); +int rf_SelectAlgorithm(RF_RaidAccessDesc_t *, RF_RaidAccessFlags_t); /****************************************************************************** @@ -161,25 +60,25 @@ int rf_SelectAlgorithm(RF_RaidAccessDesc_t *, RF_RaidAccessFlags_t ); * Create and Initialiaze a dag header and termination node * *****************************************************************************/ -static int InitHdrNode(hdr, raidPtr, memChunkEnable) - RF_DagHeader_t **hdr; - RF_Raid_t *raidPtr; - int memChunkEnable; +static int +InitHdrNode(hdr, raidPtr, memChunkEnable) + RF_DagHeader_t **hdr; + RF_Raid_t *raidPtr; + int memChunkEnable; { - /* create and initialize dag hdr */ - *hdr = rf_AllocDAGHeader(); - rf_MakeAllocList((*hdr)->allocList); - if ((*hdr)->allocList == NULL) { - rf_FreeDAGHeader(*hdr); - return(ENOMEM); - } - (*hdr)->status = rf_enable; - (*hdr)->numSuccedents = 0; - (*hdr)->raidPtr = raidPtr; - (*hdr)->next = NULL; - return(0); + /* create and initialize dag hdr */ + *hdr = rf_AllocDAGHeader(); + rf_MakeAllocList((*hdr)->allocList); + if ((*hdr)->allocList == NULL) { + rf_FreeDAGHeader(*hdr); + return (ENOMEM); + } + (*hdr)->status = rf_enable; + (*hdr)->numSuccedents = 0; + (*hdr)->raidPtr = raidPtr; + (*hdr)->next = NULL; + return (0); } - /****************************************************************************** * * Transfer allocation list and mem chunks from one dag to another @@ -188,76 +87,65 @@ static int InitHdrNode(hdr, raidPtr, memChunkEnable) #if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) /* the function below is not used... so don't define it! */ #else -static void TransferDagMemory(daga, dagb) - RF_DagHeader_t *daga; - RF_DagHeader_t *dagb; +static void +TransferDagMemory(daga, dagb) + RF_DagHeader_t *daga; + RF_DagHeader_t *dagb; { - RF_AccessStripeMapHeader_t *end; - RF_AllocListElem_t *p; - int i, memChunksXfrd = 0, xtraChunksXfrd = 0; - - /* transfer allocList from dagb to daga */ - for (p = dagb->allocList; p ; p = p->next) - { - for (i = 0; i < p->numPointers; i++) - { - rf_AddToAllocList(daga->allocList, p->pointers[i], p->sizes[i]); - p->pointers[i] = NULL; - p->sizes[i] = 0; + RF_AccessStripeMapHeader_t *end; + RF_AllocListElem_t *p; + int i, memChunksXfrd = 0, xtraChunksXfrd = 0; + + /* transfer allocList from dagb to daga */ + for (p = dagb->allocList; p; p = p->next) { + for (i = 0; i < p->numPointers; i++) { + rf_AddToAllocList(daga->allocList, p->pointers[i], p->sizes[i]); + p->pointers[i] = NULL; + p->sizes[i] = 0; + } + p->numPointers = 0; } - p->numPointers = 0; - } - - /* transfer chunks from dagb to daga */ - while ((memChunksXfrd + xtraChunksXfrd < dagb->chunkIndex + dagb->xtraChunkIndex) && (daga->chunkIndex < RF_MAXCHUNKS)) - { - /* stuff chunks into daga's memChunk array */ - if (memChunksXfrd < dagb->chunkIndex) - { - daga->memChunk[daga->chunkIndex++] = dagb->memChunk[memChunksXfrd]; - dagb->memChunk[memChunksXfrd++] = NULL; + + /* transfer chunks from dagb to daga */ + while ((memChunksXfrd + xtraChunksXfrd < dagb->chunkIndex + dagb->xtraChunkIndex) && (daga->chunkIndex < RF_MAXCHUNKS)) { + /* stuff chunks into daga's memChunk array */ + if (memChunksXfrd < dagb->chunkIndex) { + daga->memChunk[daga->chunkIndex++] = dagb->memChunk[memChunksXfrd]; + dagb->memChunk[memChunksXfrd++] = NULL; + } else { + daga->memChunk[daga->xtraChunkIndex++] = dagb->xtraMemChunk[xtraChunksXfrd]; + dagb->xtraMemChunk[xtraChunksXfrd++] = NULL; + } } - else - { - daga->memChunk[daga->xtraChunkIndex++] = dagb->xtraMemChunk[xtraChunksXfrd]; - dagb->xtraMemChunk[xtraChunksXfrd++] = NULL; + /* use escape hatch to hold excess chunks */ + while (memChunksXfrd + xtraChunksXfrd < dagb->chunkIndex + dagb->xtraChunkIndex) { + if (memChunksXfrd < dagb->chunkIndex) { + daga->xtraMemChunk[daga->xtraChunkIndex++] = dagb->memChunk[memChunksXfrd]; + dagb->memChunk[memChunksXfrd++] = NULL; + } else { + daga->xtraMemChunk[daga->xtraChunkIndex++] = dagb->xtraMemChunk[xtraChunksXfrd]; + dagb->xtraMemChunk[xtraChunksXfrd++] = NULL; + } } - } - /* use escape hatch to hold excess chunks */ - while (memChunksXfrd + xtraChunksXfrd < dagb->chunkIndex + dagb->xtraChunkIndex) { - if (memChunksXfrd < dagb->chunkIndex) - { - daga->xtraMemChunk[daga->xtraChunkIndex++] = dagb->memChunk[memChunksXfrd]; - dagb->memChunk[memChunksXfrd++] = NULL; - } - else - { - daga->xtraMemChunk[daga->xtraChunkIndex++] = dagb->xtraMemChunk[xtraChunksXfrd]; - dagb->xtraMemChunk[xtraChunksXfrd++] = NULL; - } - } - RF_ASSERT((memChunksXfrd == dagb->chunkIndex) && (xtraChunksXfrd == dagb->xtraChunkIndex)); - RF_ASSERT(daga->chunkIndex <= RF_MAXCHUNKS); - RF_ASSERT(daga->xtraChunkIndex <= daga->xtraChunkCnt); - dagb->chunkIndex = 0; - dagb->xtraChunkIndex = 0; - - /* transfer asmList from dagb to daga */ - if (dagb->asmList) - { - if (daga->asmList) - { - end = daga->asmList; - while (end->next) - end = end->next; - end->next = dagb->asmList; + RF_ASSERT((memChunksXfrd == dagb->chunkIndex) && (xtraChunksXfrd == dagb->xtraChunkIndex)); + RF_ASSERT(daga->chunkIndex <= RF_MAXCHUNKS); + RF_ASSERT(daga->xtraChunkIndex <= daga->xtraChunkCnt); + dagb->chunkIndex = 0; + dagb->xtraChunkIndex = 0; + + /* transfer asmList from dagb to daga */ + if (dagb->asmList) { + if (daga->asmList) { + end = daga->asmList; + while (end->next) + end = end->next; + end->next = dagb->asmList; + } else + daga->asmList = dagb->asmList; + dagb->asmList = NULL; } - else - daga->asmList = dagb->asmList; - dagb->asmList = NULL; - } } -#endif /* __NetBSD__ || __OpenBSD__ */ +#endif /* __NetBSD__ || __OpenBSD__ */ /***************************************************************************************** * @@ -268,18 +156,18 @@ static void TransferDagMemory(daga, dagb) * succedents WILL NOT BE EXAMINED. * ****************************************************************************************/ -static void UpdateNodeHdrPtr(hdr, node) - RF_DagHeader_t *hdr; - RF_DagNode_t *node; +static void +UpdateNodeHdrPtr(hdr, node) + RF_DagHeader_t *hdr; + RF_DagNode_t *node; { - int i; - RF_ASSERT(hdr != NULL && node != NULL); - for (i = 0; i < node->numSuccedents; i++) - if (node->succedents[i]->dagHdr != hdr) - UpdateNodeHdrPtr(hdr, node->succedents[i]); - node->dagHdr = hdr; + int i; + RF_ASSERT(hdr != NULL && node != NULL); + for (i = 0; i < node->numSuccedents; i++) + if (node->succedents[i]->dagHdr != hdr) + UpdateNodeHdrPtr(hdr, node->succedents[i]); + node->dagHdr = hdr; } - /****************************************************************************** * * Create a DAG to do a read or write operation. @@ -292,7 +180,7 @@ static void UpdateNodeHdrPtr(hdr, node) * unit or one per block (sector). When this occurs, these dags are returned * as a linked list (dagList) which is executed sequentially (to preserve * atomic parity updates in the stripe). - * + * * dags which operate on independent parity goups (stripes) are returned in * independent dagLists (distinct elements in desc->dagArray) and may be * executed concurrently. @@ -314,305 +202,294 @@ static void UpdateNodeHdrPtr(hdr, node) #define MAXNSTRIPES 50 -int rf_SelectAlgorithm(desc, flags) - RF_RaidAccessDesc_t *desc; - RF_RaidAccessFlags_t flags; +int +rf_SelectAlgorithm(desc, flags) + RF_RaidAccessDesc_t *desc; + RF_RaidAccessFlags_t flags; { - RF_AccessStripeMapHeader_t *asm_h = desc->asmap; - RF_IoType_t type = desc->type; - RF_Raid_t *raidPtr = desc->raidPtr; - void *bp = desc->bp; - - RF_AccessStripeMap_t *asmap = asm_h->stripeMap; - RF_AccessStripeMap_t *asm_p; - RF_DagHeader_t *dag_h = NULL, *tempdag_h, *lastdag_h; - int i, j, k; - RF_VoidFuncPtr *stripeFuncs, normalStripeFuncs[MAXNSTRIPES]; - RF_AccessStripeMap_t *asm_up, *asm_bp; - RF_AccessStripeMapHeader_t ***asmh_u, *endASMList; - RF_AccessStripeMapHeader_t ***asmh_b; - RF_VoidFuncPtr **stripeUnitFuncs, uFunc; - RF_VoidFuncPtr **blockFuncs, bFunc; - int numStripesBailed = 0, cantCreateDAGs = RF_FALSE; - int numStripeUnitsBailed = 0; - int stripeNum, numUnitDags = 0, stripeUnitNum, numBlockDags = 0; - RF_StripeNum_t numStripeUnits; - RF_SectorNum_t numBlocks; - RF_RaidAddr_t address; - int length; - RF_PhysDiskAddr_t *physPtr; - caddr_t buffer; - - lastdag_h = NULL; - asmh_u = asmh_b = NULL; - stripeUnitFuncs = NULL; - blockFuncs = NULL; - - /* get an array of dag-function creation pointers, try to avoid calling malloc */ - if (asm_h->numStripes <= MAXNSTRIPES) stripeFuncs = normalStripeFuncs; - else RF_Calloc(stripeFuncs, asm_h->numStripes, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr *)); - - /* walk through the asm list once collecting information */ - /* attempt to find a single creation function for each stripe */ - desc->numStripes = 0; - for (i=0,asm_p = asmap; asm_p; asm_p=asm_p->next,i++) { - desc->numStripes++; - (raidPtr->Layout.map->SelectionFunc)(raidPtr, type, asm_p, &stripeFuncs[i]); - /* check to see if we found a creation func for this stripe */ - if (stripeFuncs[i] == (RF_VoidFuncPtr) NULL) - { - /* could not find creation function for entire stripe - so, let's see if we can find one for each stripe unit in the stripe */ - - if (numStripesBailed == 0) - { - /* one stripe map header for each stripe we bail on */ - RF_Malloc(asmh_u, sizeof(RF_AccessStripeMapHeader_t **) * asm_h->numStripes, (RF_AccessStripeMapHeader_t ***)); - /* create an array of ptrs to arrays of stripeFuncs */ - RF_Calloc(stripeUnitFuncs, asm_h->numStripes, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr **)); - } - - /* create an array of creation funcs (called stripeFuncs) for this stripe */ - numStripeUnits = asm_p->numStripeUnitsAccessed; - RF_Calloc(stripeUnitFuncs[numStripesBailed], numStripeUnits, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr *)); - RF_Malloc(asmh_u[numStripesBailed], numStripeUnits * sizeof(RF_AccessStripeMapHeader_t *), (RF_AccessStripeMapHeader_t **)); - - /* lookup array of stripeUnitFuncs for this stripe */ - for (j=0, physPtr = asm_p->physInfo; physPtr; physPtr = physPtr->next, j++) - { - /* remap for series of single stripe-unit accesses */ - address = physPtr->raidAddress; - length = physPtr->numSector; - buffer = physPtr->bufPtr; - - asmh_u[numStripesBailed][j] = rf_MapAccess(raidPtr, address, length, buffer, RF_DONT_REMAP); - asm_up = asmh_u[numStripesBailed][j]->stripeMap; - - /* get the creation func for this stripe unit */ - (raidPtr->Layout.map-> SelectionFunc)(raidPtr, type, asm_up, &(stripeUnitFuncs[numStripesBailed][j])); - - /* check to see if we found a creation func for this stripe unit */ - if (stripeUnitFuncs[numStripesBailed][j] == (RF_VoidFuncPtr) NULL) - { - /* could not find creation function for stripe unit so, - let's see if we can find one for each block in the stripe unit */ - if (numStripeUnitsBailed == 0) - { - /* one stripe map header for each stripe unit we bail on */ - RF_Malloc(asmh_b, sizeof(RF_AccessStripeMapHeader_t **) * asm_h->numStripes * raidPtr->Layout.numDataCol, (RF_AccessStripeMapHeader_t ***)); - /* create an array of ptrs to arrays of blockFuncs */ - RF_Calloc(blockFuncs, asm_h->numStripes * raidPtr->Layout.numDataCol, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr **)); - } - - /* create an array of creation funcs (called blockFuncs) for this stripe unit */ - numBlocks = physPtr->numSector; - numBlockDags += numBlocks; - RF_Calloc(blockFuncs[numStripeUnitsBailed], numBlocks, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr *)); - RF_Malloc(asmh_b[numStripeUnitsBailed], numBlocks * sizeof(RF_AccessStripeMapHeader_t *), (RF_AccessStripeMapHeader_t **)); - - /* lookup array of blockFuncs for this stripe unit */ - for (k=0; k < numBlocks; k++) - { - /* remap for series of single stripe-unit accesses */ - address = physPtr->raidAddress + k; - length = 1; - buffer = physPtr->bufPtr + (k * (1<<raidPtr->logBytesPerSector)); - - asmh_b[numStripeUnitsBailed][k] = rf_MapAccess(raidPtr, address, length, buffer, RF_DONT_REMAP); - asm_bp = asmh_b[numStripeUnitsBailed][k]->stripeMap; - - /* get the creation func for this stripe unit */ - (raidPtr->Layout.map-> SelectionFunc)(raidPtr, type, asm_bp, &(blockFuncs[numStripeUnitsBailed][k])); - - /* check to see if we found a creation func for this stripe unit */ - if (blockFuncs[numStripeUnitsBailed][k] == NULL) - cantCreateDAGs = RF_TRUE; - } - numStripeUnitsBailed++; - } - else - { - numUnitDags++; - } - } - RF_ASSERT(j == numStripeUnits); - numStripesBailed++; - } - } - - if (cantCreateDAGs) - { - /* free memory and punt */ - if (asm_h->numStripes > MAXNSTRIPES) - RF_Free(stripeFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr)); - if (numStripesBailed > 0) - { - stripeNum = 0; - for (i = 0, asm_p = asmap; asm_p; asm_p = asm_p->next, i++) - if (stripeFuncs[i] == NULL) - { - numStripeUnits = asm_p->numStripeUnitsAccessed; - for (j = 0; j < numStripeUnits; j++) - rf_FreeAccessStripeMap(asmh_u[stripeNum][j]); - RF_Free(asmh_u[stripeNum], numStripeUnits * sizeof(RF_AccessStripeMapHeader_t *)); - RF_Free(stripeUnitFuncs[stripeNum], numStripeUnits * sizeof(RF_VoidFuncPtr)); - stripeNum++; - } - RF_ASSERT(stripeNum == numStripesBailed); - RF_Free(stripeUnitFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr)); - RF_Free(asmh_u, asm_h->numStripes * sizeof(RF_AccessStripeMapHeader_t **)); - } - return(1); - } - else - { - /* begin dag creation */ - stripeNum = 0; - stripeUnitNum = 0; - - /* create an array of dagLists and fill them in */ - RF_CallocAndAdd(desc->dagArray, desc->numStripes, sizeof(RF_DagList_t), (RF_DagList_t *), desc->cleanupList); - - for (i=0, asm_p = asmap; asm_p; asm_p=asm_p->next,i++) { - /* grab dag header for this stripe */ - dag_h = NULL; - desc->dagArray[i].desc = desc; - - if (stripeFuncs[i] == (RF_VoidFuncPtr) NULL) - { - /* use bailout functions for this stripe */ - for (j = 0, physPtr = asm_p->physInfo; physPtr; physPtr=physPtr->next, j++) - { - uFunc = stripeUnitFuncs[stripeNum][j]; - if (uFunc == (RF_VoidFuncPtr) NULL) - { - /* use bailout functions for this stripe unit */ - for (k = 0; k < physPtr->numSector; k++) - { - /* create a dag for this block */ - InitHdrNode(&tempdag_h, raidPtr, rf_useMemChunks); - desc->dagArray[i].numDags++; - if (dag_h == NULL) { - dag_h = tempdag_h; + RF_AccessStripeMapHeader_t *asm_h = desc->asmap; + RF_IoType_t type = desc->type; + RF_Raid_t *raidPtr = desc->raidPtr; + void *bp = desc->bp; + + RF_AccessStripeMap_t *asmap = asm_h->stripeMap; + RF_AccessStripeMap_t *asm_p; + RF_DagHeader_t *dag_h = NULL, *tempdag_h, *lastdag_h; + int i, j, k; + RF_VoidFuncPtr *stripeFuncs, normalStripeFuncs[MAXNSTRIPES]; + RF_AccessStripeMap_t *asm_up, *asm_bp; + RF_AccessStripeMapHeader_t ***asmh_u, *endASMList; + RF_AccessStripeMapHeader_t ***asmh_b; + RF_VoidFuncPtr **stripeUnitFuncs, uFunc; + RF_VoidFuncPtr **blockFuncs, bFunc; + int numStripesBailed = 0, cantCreateDAGs = RF_FALSE; + int numStripeUnitsBailed = 0; + int stripeNum, numUnitDags = 0, stripeUnitNum, numBlockDags = 0; + RF_StripeNum_t numStripeUnits; + RF_SectorNum_t numBlocks; + RF_RaidAddr_t address; + int length; + RF_PhysDiskAddr_t *physPtr; + caddr_t buffer; + + lastdag_h = NULL; + asmh_u = asmh_b = NULL; + stripeUnitFuncs = NULL; + blockFuncs = NULL; + + /* get an array of dag-function creation pointers, try to avoid + * calling malloc */ + if (asm_h->numStripes <= MAXNSTRIPES) + stripeFuncs = normalStripeFuncs; + else + RF_Calloc(stripeFuncs, asm_h->numStripes, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr *)); + + /* walk through the asm list once collecting information */ + /* attempt to find a single creation function for each stripe */ + desc->numStripes = 0; + for (i = 0, asm_p = asmap; asm_p; asm_p = asm_p->next, i++) { + desc->numStripes++; + (raidPtr->Layout.map->SelectionFunc) (raidPtr, type, asm_p, &stripeFuncs[i]); + /* check to see if we found a creation func for this stripe */ + if (stripeFuncs[i] == (RF_VoidFuncPtr) NULL) { + /* could not find creation function for entire stripe + * so, let's see if we can find one for each stripe + * unit in the stripe */ + + if (numStripesBailed == 0) { + /* one stripe map header for each stripe we + * bail on */ + RF_Malloc(asmh_u, sizeof(RF_AccessStripeMapHeader_t **) * asm_h->numStripes, (RF_AccessStripeMapHeader_t ***)); + /* create an array of ptrs to arrays of + * stripeFuncs */ + RF_Calloc(stripeUnitFuncs, asm_h->numStripes, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr **)); } - else { - lastdag_h->next = tempdag_h; + /* create an array of creation funcs (called + * stripeFuncs) for this stripe */ + numStripeUnits = asm_p->numStripeUnitsAccessed; + RF_Calloc(stripeUnitFuncs[numStripesBailed], numStripeUnits, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr *)); + RF_Malloc(asmh_u[numStripesBailed], numStripeUnits * sizeof(RF_AccessStripeMapHeader_t *), (RF_AccessStripeMapHeader_t **)); + + /* lookup array of stripeUnitFuncs for this stripe */ + for (j = 0, physPtr = asm_p->physInfo; physPtr; physPtr = physPtr->next, j++) { + /* remap for series of single stripe-unit + * accesses */ + address = physPtr->raidAddress; + length = physPtr->numSector; + buffer = physPtr->bufPtr; + + asmh_u[numStripesBailed][j] = rf_MapAccess(raidPtr, address, length, buffer, RF_DONT_REMAP); + asm_up = asmh_u[numStripesBailed][j]->stripeMap; + + /* get the creation func for this stripe unit */ + (raidPtr->Layout.map->SelectionFunc) (raidPtr, type, asm_up, &(stripeUnitFuncs[numStripesBailed][j])); + + /* check to see if we found a creation func + * for this stripe unit */ + if (stripeUnitFuncs[numStripesBailed][j] == (RF_VoidFuncPtr) NULL) { + /* could not find creation function + * for stripe unit so, let's see if we + * can find one for each block in the + * stripe unit */ + if (numStripeUnitsBailed == 0) { + /* one stripe map header for + * each stripe unit we bail on */ + RF_Malloc(asmh_b, sizeof(RF_AccessStripeMapHeader_t **) * asm_h->numStripes * raidPtr->Layout.numDataCol, (RF_AccessStripeMapHeader_t ***)); + /* create an array of ptrs to + * arrays of blockFuncs */ + RF_Calloc(blockFuncs, asm_h->numStripes * raidPtr->Layout.numDataCol, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr **)); + } + /* create an array of creation funcs + * (called blockFuncs) for this stripe + * unit */ + numBlocks = physPtr->numSector; + numBlockDags += numBlocks; + RF_Calloc(blockFuncs[numStripeUnitsBailed], numBlocks, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr *)); + RF_Malloc(asmh_b[numStripeUnitsBailed], numBlocks * sizeof(RF_AccessStripeMapHeader_t *), (RF_AccessStripeMapHeader_t **)); + + /* lookup array of blockFuncs for this + * stripe unit */ + for (k = 0; k < numBlocks; k++) { + /* remap for series of single + * stripe-unit accesses */ + address = physPtr->raidAddress + k; + length = 1; + buffer = physPtr->bufPtr + (k * (1 << raidPtr->logBytesPerSector)); + + asmh_b[numStripeUnitsBailed][k] = rf_MapAccess(raidPtr, address, length, buffer, RF_DONT_REMAP); + asm_bp = asmh_b[numStripeUnitsBailed][k]->stripeMap; + + /* get the creation func for + * this stripe unit */ + (raidPtr->Layout.map->SelectionFunc) (raidPtr, type, asm_bp, &(blockFuncs[numStripeUnitsBailed][k])); + + /* check to see if we found a + * creation func for this + * stripe unit */ + if (blockFuncs[numStripeUnitsBailed][k] == NULL) + cantCreateDAGs = RF_TRUE; + } + numStripeUnitsBailed++; + } else { + numUnitDags++; + } } - lastdag_h = tempdag_h; - - bFunc = blockFuncs[stripeUnitNum][k]; - RF_ASSERT(bFunc); - asm_bp = asmh_b[stripeUnitNum][k]->stripeMap; - (*bFunc)(raidPtr, asm_bp, tempdag_h, bp, flags, tempdag_h->allocList); - } - stripeUnitNum++; - } - else - { - /* create a dag for this unit */ - InitHdrNode(&tempdag_h, raidPtr, rf_useMemChunks); - desc->dagArray[i].numDags++; - if (dag_h == NULL) { - dag_h = tempdag_h; - } - else { - lastdag_h->next = tempdag_h; - } - lastdag_h = tempdag_h; - - asm_up = asmh_u[stripeNum][j]->stripeMap; - (*uFunc)(raidPtr, asm_up, tempdag_h, bp, flags, tempdag_h->allocList); - } - } - RF_ASSERT(j == asm_p->numStripeUnitsAccessed); - /* merge linked bailout dag to existing dag collection */ - stripeNum++; - } - else { - /* Create a dag for this parity stripe */ - InitHdrNode(&tempdag_h, raidPtr, rf_useMemChunks); - desc->dagArray[i].numDags++; - if (dag_h == NULL) { - dag_h = tempdag_h; - } - else { - lastdag_h->next = tempdag_h; - } - lastdag_h = tempdag_h; - - (stripeFuncs[i])(raidPtr, asm_p, tempdag_h, bp, flags, tempdag_h->allocList); + RF_ASSERT(j == numStripeUnits); + numStripesBailed++; + } } - desc->dagArray[i].dags = dag_h; - } - RF_ASSERT(i == desc->numStripes); - - /* free memory */ - if (asm_h->numStripes > MAXNSTRIPES) - RF_Free(stripeFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr)); - if ((numStripesBailed > 0) || (numStripeUnitsBailed > 0)) - { - stripeNum = 0; - stripeUnitNum = 0; - if (dag_h->asmList) - { - endASMList = dag_h->asmList; - while (endASMList->next) - endASMList = endASMList->next; - } - else - endASMList = NULL; - /* walk through io, stripe by stripe */ - for (i = 0, asm_p = asmap; asm_p; asm_p = asm_p->next, i++) - if (stripeFuncs[i] == NULL) - { - numStripeUnits = asm_p->numStripeUnitsAccessed; - /* walk through stripe, stripe unit by stripe unit */ - for (j = 0, physPtr = asm_p->physInfo; physPtr; physPtr = physPtr->next, j++) - { - if (stripeUnitFuncs[stripeNum][j] == NULL) - { - numBlocks = physPtr->numSector; - /* walk through stripe unit, block by block */ - for (k = 0; k < numBlocks; k++) - if (dag_h->asmList == NULL) - { - dag_h->asmList = asmh_b[stripeUnitNum][k]; - endASMList = dag_h->asmList; - } - else - { - endASMList->next = asmh_b[stripeUnitNum][k]; - endASMList = endASMList->next; - } - RF_Free(asmh_b[stripeUnitNum], numBlocks * sizeof(RF_AccessStripeMapHeader_t *)); - RF_Free(blockFuncs[stripeUnitNum], numBlocks * sizeof(RF_VoidFuncPtr)); - stripeUnitNum++; - } - if (dag_h->asmList == NULL) - { - dag_h->asmList = asmh_u[stripeNum][j]; - endASMList = dag_h->asmList; - } - else - { - endASMList->next = asmh_u[stripeNum][j]; - endASMList = endASMList->next; - } - } - RF_Free(asmh_u[stripeNum], numStripeUnits * sizeof(RF_AccessStripeMapHeader_t *)); - RF_Free(stripeUnitFuncs[stripeNum], numStripeUnits * sizeof(RF_VoidFuncPtr)); - stripeNum++; - } - RF_ASSERT(stripeNum == numStripesBailed); - RF_Free(stripeUnitFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr)); - RF_Free(asmh_u, asm_h->numStripes * sizeof(RF_AccessStripeMapHeader_t **)); - if (numStripeUnitsBailed > 0) - { - RF_ASSERT(stripeUnitNum == numStripeUnitsBailed); - RF_Free(blockFuncs, raidPtr->Layout.numDataCol * asm_h->numStripes * sizeof(RF_VoidFuncPtr)); - RF_Free(asmh_b, raidPtr->Layout.numDataCol * asm_h->numStripes * sizeof(RF_AccessStripeMapHeader_t **)); - } + + if (cantCreateDAGs) { + /* free memory and punt */ + if (asm_h->numStripes > MAXNSTRIPES) + RF_Free(stripeFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr)); + if (numStripesBailed > 0) { + stripeNum = 0; + for (i = 0, asm_p = asmap; asm_p; asm_p = asm_p->next, i++) + if (stripeFuncs[i] == NULL) { + numStripeUnits = asm_p->numStripeUnitsAccessed; + for (j = 0; j < numStripeUnits; j++) + rf_FreeAccessStripeMap(asmh_u[stripeNum][j]); + RF_Free(asmh_u[stripeNum], numStripeUnits * sizeof(RF_AccessStripeMapHeader_t *)); + RF_Free(stripeUnitFuncs[stripeNum], numStripeUnits * sizeof(RF_VoidFuncPtr)); + stripeNum++; + } + RF_ASSERT(stripeNum == numStripesBailed); + RF_Free(stripeUnitFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr)); + RF_Free(asmh_u, asm_h->numStripes * sizeof(RF_AccessStripeMapHeader_t **)); + } + return (1); + } else { + /* begin dag creation */ + stripeNum = 0; + stripeUnitNum = 0; + + /* create an array of dagLists and fill them in */ + RF_CallocAndAdd(desc->dagArray, desc->numStripes, sizeof(RF_DagList_t), (RF_DagList_t *), desc->cleanupList); + + for (i = 0, asm_p = asmap; asm_p; asm_p = asm_p->next, i++) { + /* grab dag header for this stripe */ + dag_h = NULL; + desc->dagArray[i].desc = desc; + + if (stripeFuncs[i] == (RF_VoidFuncPtr) NULL) { + /* use bailout functions for this stripe */ + for (j = 0, physPtr = asm_p->physInfo; physPtr; physPtr = physPtr->next, j++) { + uFunc = stripeUnitFuncs[stripeNum][j]; + if (uFunc == (RF_VoidFuncPtr) NULL) { + /* use bailout functions for + * this stripe unit */ + for (k = 0; k < physPtr->numSector; k++) { + /* create a dag for + * this block */ + InitHdrNode(&tempdag_h, raidPtr, rf_useMemChunks); + desc->dagArray[i].numDags++; + if (dag_h == NULL) { + dag_h = tempdag_h; + } else { + lastdag_h->next = tempdag_h; + } + lastdag_h = tempdag_h; + + bFunc = blockFuncs[stripeUnitNum][k]; + RF_ASSERT(bFunc); + asm_bp = asmh_b[stripeUnitNum][k]->stripeMap; + (*bFunc) (raidPtr, asm_bp, tempdag_h, bp, flags, tempdag_h->allocList); + } + stripeUnitNum++; + } else { + /* create a dag for this unit */ + InitHdrNode(&tempdag_h, raidPtr, rf_useMemChunks); + desc->dagArray[i].numDags++; + if (dag_h == NULL) { + dag_h = tempdag_h; + } else { + lastdag_h->next = tempdag_h; + } + lastdag_h = tempdag_h; + + asm_up = asmh_u[stripeNum][j]->stripeMap; + (*uFunc) (raidPtr, asm_up, tempdag_h, bp, flags, tempdag_h->allocList); + } + } + RF_ASSERT(j == asm_p->numStripeUnitsAccessed); + /* merge linked bailout dag to existing dag + * collection */ + stripeNum++; + } else { + /* Create a dag for this parity stripe */ + InitHdrNode(&tempdag_h, raidPtr, rf_useMemChunks); + desc->dagArray[i].numDags++; + if (dag_h == NULL) { + dag_h = tempdag_h; + } else { + lastdag_h->next = tempdag_h; + } + lastdag_h = tempdag_h; + + (stripeFuncs[i]) (raidPtr, asm_p, tempdag_h, bp, flags, tempdag_h->allocList); + } + desc->dagArray[i].dags = dag_h; + } + RF_ASSERT(i == desc->numStripes); + + /* free memory */ + if (asm_h->numStripes > MAXNSTRIPES) + RF_Free(stripeFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr)); + if ((numStripesBailed > 0) || (numStripeUnitsBailed > 0)) { + stripeNum = 0; + stripeUnitNum = 0; + if (dag_h->asmList) { + endASMList = dag_h->asmList; + while (endASMList->next) + endASMList = endASMList->next; + } else + endASMList = NULL; + /* walk through io, stripe by stripe */ + for (i = 0, asm_p = asmap; asm_p; asm_p = asm_p->next, i++) + if (stripeFuncs[i] == NULL) { + numStripeUnits = asm_p->numStripeUnitsAccessed; + /* walk through stripe, stripe unit by + * stripe unit */ + for (j = 0, physPtr = asm_p->physInfo; physPtr; physPtr = physPtr->next, j++) { + if (stripeUnitFuncs[stripeNum][j] == NULL) { + numBlocks = physPtr->numSector; + /* walk through stripe + * unit, block by + * block */ + for (k = 0; k < numBlocks; k++) + if (dag_h->asmList == NULL) { + dag_h->asmList = asmh_b[stripeUnitNum][k]; + endASMList = dag_h->asmList; + } else { + endASMList->next = asmh_b[stripeUnitNum][k]; + endASMList = endASMList->next; + } + RF_Free(asmh_b[stripeUnitNum], numBlocks * sizeof(RF_AccessStripeMapHeader_t *)); + RF_Free(blockFuncs[stripeUnitNum], numBlocks * sizeof(RF_VoidFuncPtr)); + stripeUnitNum++; + } + if (dag_h->asmList == NULL) { + dag_h->asmList = asmh_u[stripeNum][j]; + endASMList = dag_h->asmList; + } else { + endASMList->next = asmh_u[stripeNum][j]; + endASMList = endASMList->next; + } + } + RF_Free(asmh_u[stripeNum], numStripeUnits * sizeof(RF_AccessStripeMapHeader_t *)); + RF_Free(stripeUnitFuncs[stripeNum], numStripeUnits * sizeof(RF_VoidFuncPtr)); + stripeNum++; + } + RF_ASSERT(stripeNum == numStripesBailed); + RF_Free(stripeUnitFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr)); + RF_Free(asmh_u, asm_h->numStripes * sizeof(RF_AccessStripeMapHeader_t **)); + if (numStripeUnitsBailed > 0) { + RF_ASSERT(stripeUnitNum == numStripeUnitsBailed); + RF_Free(blockFuncs, raidPtr->Layout.numDataCol * asm_h->numStripes * sizeof(RF_VoidFuncPtr)); + RF_Free(asmh_b, raidPtr->Layout.numDataCol * asm_h->numStripes * sizeof(RF_AccessStripeMapHeader_t **)); + } + } + return (0); } - return(0); - } } diff --git a/sys/dev/raidframe/rf_aselect.h b/sys/dev/raidframe/rf_aselect.h index 1b1d3e51795..565f042ab53 100644 --- a/sys/dev/raidframe/rf_aselect.h +++ b/sys/dev/raidframe/rf_aselect.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_aselect.h,v 1.1 1999/01/11 14:29:00 niklas Exp $ */ -/* $NetBSD: rf_aselect.h,v 1.1 1998/11/13 04:20:26 oster Exp $ */ +/* $OpenBSD: rf_aselect.h,v 1.2 1999/02/16 00:02:24 niklas Exp $ */ +/* $NetBSD: rf_aselect.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -32,29 +32,12 @@ * aselect.h -- header file for algorithm selection code * *****************************************************************************/ -/* : - * Log: rf_aselect.h,v - * Revision 1.5 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.4 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.3 1995/11/30 16:28:00 wvcii - * added copyright info - * - * Revision 1.2 1995/11/19 16:20:46 wvcii - * changed SelectAlgorithm prototype - * - */ #ifndef _RF__RF_ASELECT_H_ #define _RF__RF_ASELECT_H_ - + #include "rf_desc.h" -int rf_SelectAlgorithm(RF_RaidAccessDesc_t *desc, RF_RaidAccessFlags_t flags); +int rf_SelectAlgorithm(RF_RaidAccessDesc_t * desc, RF_RaidAccessFlags_t flags); -#endif /* !_RF__RF_ASELECT_H_ */ +#endif /* !_RF__RF_ASELECT_H_ */ diff --git a/sys/dev/raidframe/rf_callback.c b/sys/dev/raidframe/rf_callback.c index dffd52fc7a6..ba7e3869c10 100644 --- a/sys/dev/raidframe/rf_callback.c +++ b/sys/dev/raidframe/rf_callback.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_callback.c,v 1.1 1999/01/11 14:29:00 niklas Exp $ */ -/* $NetBSD: rf_callback.c,v 1.1 1998/11/13 04:20:26 oster Exp $ */ +/* $OpenBSD: rf_callback.c,v 1.2 1999/02/16 00:02:24 niklas Exp $ */ +/* $NetBSD: rf_callback.c,v 1.3 1999/02/05 00:06:06 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -33,37 +33,6 @@ * ****************************************************************************************/ -/* : - * Log: rf_callback.c,v - * Revision 1.11 1996/06/17 03:18:04 jimz - * include shutdown.h for macroized ShutdownCreate - * - * Revision 1.10 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.9 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.8 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.7 1996/05/17 16:30:41 jimz - * convert to RF_FREELIST stuff - * - * Revision 1.6 1995/12/01 15:16:04 root - * added copyright info - * - */ - -#ifndef _KERNEL -#ifdef __NetBSD__ -#include <unistd.h> -#endif /* __NetBSD__ */ -#endif #include "rf_types.h" #include "rf_threadstuff.h" @@ -79,43 +48,47 @@ static RF_FreeList_t *rf_callback_freelist; #define RF_CALLBACK_INITIAL 4 static void rf_ShutdownCallback(void *); -static void rf_ShutdownCallback(ignored) - void *ignored; +static void +rf_ShutdownCallback(ignored) + void *ignored; { - RF_FREELIST_DESTROY(rf_callback_freelist,next,(RF_CallbackDesc_t *)); + RF_FREELIST_DESTROY(rf_callback_freelist, next, (RF_CallbackDesc_t *)); } -int rf_ConfigureCallback(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureCallback(listp) + RF_ShutdownList_t **listp; { - int rc; + int rc; RF_FREELIST_CREATE(rf_callback_freelist, RF_MAX_FREE_CALLBACK, - RF_CALLBACK_INC, sizeof(RF_CallbackDesc_t)); + RF_CALLBACK_INC, sizeof(RF_CallbackDesc_t)); if (rf_callback_freelist == NULL) - return(ENOMEM); + return (ENOMEM); rc = rf_ShutdownCreate(listp, rf_ShutdownCallback, NULL); if (rc) { RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + __LINE__, rc); rf_ShutdownCallback(NULL); - return(rc); + return (rc); } - RF_FREELIST_PRIME(rf_callback_freelist, RF_CALLBACK_INITIAL,next, - (RF_CallbackDesc_t *)); - return(0); + RF_FREELIST_PRIME(rf_callback_freelist, RF_CALLBACK_INITIAL, next, + (RF_CallbackDesc_t *)); + return (0); } -RF_CallbackDesc_t *rf_AllocCallbackDesc() +RF_CallbackDesc_t * +rf_AllocCallbackDesc() { RF_CallbackDesc_t *p; - RF_FREELIST_GET(rf_callback_freelist,p,next,(RF_CallbackDesc_t *)); - return(p); + RF_FREELIST_GET(rf_callback_freelist, p, next, (RF_CallbackDesc_t *)); + return (p); } -void rf_FreeCallbackDesc(p) - RF_CallbackDesc_t *p; +void +rf_FreeCallbackDesc(p) + RF_CallbackDesc_t *p; { - RF_FREELIST_FREE(rf_callback_freelist,p,next); + RF_FREELIST_FREE(rf_callback_freelist, p, next); } diff --git a/sys/dev/raidframe/rf_callback.h b/sys/dev/raidframe/rf_callback.h index cb3db8ebbbd..528eed625b9 100644 --- a/sys/dev/raidframe/rf_callback.h +++ b/sys/dev/raidframe/rf_callback.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_callback.h,v 1.1 1999/01/11 14:29:00 niklas Exp $ */ -/* $NetBSD: rf_callback.h,v 1.1 1998/11/13 04:20:26 oster Exp $ */ +/* $OpenBSD: rf_callback.h,v 1.2 1999/02/16 00:02:24 niklas Exp $ */ +/* $NetBSD: rf_callback.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -42,51 +42,24 @@ * ****************************************************************************************/ -/* : - * Log: rf_callback.h,v - * Revision 1.8 1996/08/01 15:57:28 jimz - * minor cleanup - * - * Revision 1.7 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.6 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.5 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.4 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.3 1996/05/17 16:30:46 jimz - * add prototypes - * - * Revision 1.2 1995/12/01 15:15:55 root - * added copyright info - * - */ - #ifndef _RF__RF_CALLBACK_H_ #define _RF__RF_CALLBACK_H_ #include "rf_types.h" struct RF_CallbackDesc_s { - void (*callbackFunc)(RF_CBParam_t); /* function to call */ - RF_CBParam_t callbackArg; /* args to give to function, or just info about this callback */ - RF_CBParam_t callbackArg2; - RF_RowCol_t row; /* disk row and column IDs to give to the callback func */ - RF_RowCol_t col; - RF_CallbackDesc_t *next; /* next entry in list */ + void (*callbackFunc) (RF_CBParam_t); /* function to call */ + RF_CBParam_t callbackArg; /* args to give to function, or just + * info about this callback */ + RF_CBParam_t callbackArg2; + RF_RowCol_t row; /* disk row and column IDs to give to the + * callback func */ + RF_RowCol_t col; + RF_CallbackDesc_t *next;/* next entry in list */ }; -int rf_ConfigureCallback(RF_ShutdownList_t **listp); -RF_CallbackDesc_t *rf_AllocCallbackDesc(void); -void rf_FreeCallbackDesc(RF_CallbackDesc_t *p); +int rf_ConfigureCallback(RF_ShutdownList_t ** listp); +RF_CallbackDesc_t *rf_AllocCallbackDesc(void); +void rf_FreeCallbackDesc(RF_CallbackDesc_t * p); -#endif /* !_RF__RF_CALLBACK_H_ */ +#endif /* !_RF__RF_CALLBACK_H_ */ diff --git a/sys/dev/raidframe/rf_chaindecluster.c b/sys/dev/raidframe/rf_chaindecluster.c index bbb7caa92ec..29a0bbf40d3 100644 --- a/sys/dev/raidframe/rf_chaindecluster.c +++ b/sys/dev/raidframe/rf_chaindecluster.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_chaindecluster.c,v 1.1 1999/01/11 14:29:01 niklas Exp $ */ -/* $NetBSD: rf_chaindecluster.c,v 1.1 1998/11/13 04:20:26 oster Exp $ */ +/* $OpenBSD: rf_chaindecluster.c,v 1.2 1999/02/16 00:02:24 niklas Exp $ */ +/* $NetBSD: rf_chaindecluster.c,v 1.3 1999/02/05 00:06:06 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -33,107 +33,6 @@ * *****************************************************************************/ -/* : - * Log: rf_chaindecluster.c,v - * Revision 1.33 1996/08/02 13:20:34 jimz - * get rid of bogus (long) casts - * - * Revision 1.32 1996/07/31 16:56:18 jimz - * dataBytesPerStripe, sectorsPerDisk init arch-indep. - * - * Revision 1.31 1996/07/29 14:05:12 jimz - * fix numPUs/numRUs confusion (everything is now numRUs) - * clean up some commenting, return values - * - * Revision 1.30 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.29 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.28 1996/06/19 17:53:48 jimz - * move GetNumSparePUs, InstallSpareTable ops into layout switch - * - * Revision 1.27 1996/06/11 15:19:57 wvcii - * added include of rf_chaindecluster.h - * fixed parameter list of rf_ConfigureChainDecluster - * - * Revision 1.26 1996/06/11 08:55:15 jimz - * improved error-checking at configuration time - * - * Revision 1.25 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.24 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.23 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.22 1996/06/06 17:31:30 jimz - * use CreateMirrorPartitionReadDAG for mirrored reads - * - * Revision 1.21 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.20 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.19 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.18 1996/05/31 16:13:28 amiri - * removed/added some commnets. - * - * Revision 1.17 1996/05/31 05:01:52 amiri - * fixed a bug related to sparing layout. - * - * Revision 1.16 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.15 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.14 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.13 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.12 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.11 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.10 1996/05/03 19:53:56 wvcii - * removed include of rf_redstripe.h - * moved dag creation routines to new dag library - * - */ - #include "rf_archs.h" #include "rf_types.h" #include "rf_raid.h" @@ -149,197 +48,200 @@ #include "rf_utils.h" typedef struct RF_ChaindeclusterConfigInfo_s { - RF_RowCol_t **stripeIdentifier; /* filled in at config time - * and used by IdentifyStripe */ - RF_StripeCount_t numSparingRegions; - RF_StripeCount_t stripeUnitsPerSparingRegion; - RF_SectorNum_t mirrorStripeOffset; -} RF_ChaindeclusterConfigInfo_t; - -int rf_ConfigureChainDecluster( - RF_ShutdownList_t **listp, - RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr) + RF_RowCol_t **stripeIdentifier; /* filled in at config time and used + * by IdentifyStripe */ + RF_StripeCount_t numSparingRegions; + RF_StripeCount_t stripeUnitsPerSparingRegion; + RF_SectorNum_t mirrorStripeOffset; +} RF_ChaindeclusterConfigInfo_t; + +int +rf_ConfigureChainDecluster( + RF_ShutdownList_t ** listp, + RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr) { - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_StripeCount_t num_used_stripeUnitsPerDisk; - RF_ChaindeclusterConfigInfo_t *info; - RF_RowCol_t i; - - /* create a Chained Declustering configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_ChaindeclusterConfigInfo_t), (RF_ChaindeclusterConfigInfo_t *), raidPtr->cleanupList); - if (info == NULL) - return(ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - - /* fill in the config structure. */ - info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, 2 , raidPtr->cleanupList); - if (info->stripeIdentifier == NULL) - return(ENOMEM); - for (i=0; i< raidPtr->numCol; i++) { - info->stripeIdentifier[i][0] = i % raidPtr->numCol; - info->stripeIdentifier[i][1] = (i+1) % raidPtr->numCol; - } - - RF_ASSERT(raidPtr->numRow == 1); - - /* fill in the remaining layout parameters */ - num_used_stripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk - (layoutPtr->stripeUnitsPerDisk % - (2*raidPtr->numCol-2) ); - info->numSparingRegions = num_used_stripeUnitsPerDisk / (2*raidPtr->numCol-2); - info->stripeUnitsPerSparingRegion = raidPtr->numCol * (raidPtr->numCol - 1); - info->mirrorStripeOffset = info->numSparingRegions * (raidPtr->numCol-1); - layoutPtr->numStripe = info->numSparingRegions * info->stripeUnitsPerSparingRegion; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = 1; - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numParityCol = 1; - - layoutPtr->dataStripeUnitsPerDisk = num_used_stripeUnitsPerDisk; - - raidPtr->sectorsPerDisk = - num_used_stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; - - raidPtr->totalSectors = - (layoutPtr->numStripe) * layoutPtr->sectorsPerStripeUnit; - - layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit; - - return(0); + RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; + RF_StripeCount_t num_used_stripeUnitsPerDisk; + RF_ChaindeclusterConfigInfo_t *info; + RF_RowCol_t i; + + /* create a Chained Declustering configuration structure */ + RF_MallocAndAdd(info, sizeof(RF_ChaindeclusterConfigInfo_t), (RF_ChaindeclusterConfigInfo_t *), raidPtr->cleanupList); + if (info == NULL) + return (ENOMEM); + layoutPtr->layoutSpecificInfo = (void *) info; + + /* fill in the config structure. */ + info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, 2, raidPtr->cleanupList); + if (info->stripeIdentifier == NULL) + return (ENOMEM); + for (i = 0; i < raidPtr->numCol; i++) { + info->stripeIdentifier[i][0] = i % raidPtr->numCol; + info->stripeIdentifier[i][1] = (i + 1) % raidPtr->numCol; + } + + RF_ASSERT(raidPtr->numRow == 1); + + /* fill in the remaining layout parameters */ + num_used_stripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk - (layoutPtr->stripeUnitsPerDisk % + (2 * raidPtr->numCol - 2)); + info->numSparingRegions = num_used_stripeUnitsPerDisk / (2 * raidPtr->numCol - 2); + info->stripeUnitsPerSparingRegion = raidPtr->numCol * (raidPtr->numCol - 1); + info->mirrorStripeOffset = info->numSparingRegions * (raidPtr->numCol - 1); + layoutPtr->numStripe = info->numSparingRegions * info->stripeUnitsPerSparingRegion; + layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; + layoutPtr->numDataCol = 1; + layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; + layoutPtr->numParityCol = 1; + + layoutPtr->dataStripeUnitsPerDisk = num_used_stripeUnitsPerDisk; + + raidPtr->sectorsPerDisk = + num_used_stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; + + raidPtr->totalSectors = + (layoutPtr->numStripe) * layoutPtr->sectorsPerStripeUnit; + + layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit; + + return (0); } -RF_ReconUnitCount_t rf_GetNumSpareRUsChainDecluster(raidPtr) - RF_Raid_t *raidPtr; +RF_ReconUnitCount_t +rf_GetNumSpareRUsChainDecluster(raidPtr) + RF_Raid_t *raidPtr; { - RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - /* - * The layout uses two stripe units per disk as spare within each - * sparing region. - */ - return (2*info->numSparingRegions); + /* + * The layout uses two stripe units per disk as spare within each + * sparing region. + */ + return (2 * info->numSparingRegions); } /* Maps to the primary copy of the data, i.e. the first mirror pair */ -void rf_MapSectorChainDecluster( - RF_Raid_t *raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t *row, - RF_RowCol_t *col, - RF_SectorNum_t *diskSector, - int remap) +void +rf_MapSectorChainDecluster( + RF_Raid_t * raidPtr, + RF_RaidAddr_t raidSector, + RF_RowCol_t * row, + RF_RowCol_t * col, + RF_SectorNum_t * diskSector, + int remap) { - RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - RF_SectorNum_t index_within_region, index_within_disk; - RF_StripeNum_t sparing_region_id; - int col_before_remap; - - *row = 0; - sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; - index_within_region = SUID % info->stripeUnitsPerSparingRegion; - index_within_disk = index_within_region / raidPtr->numCol; - col_before_remap = SUID % raidPtr->numCol; - - if (!remap) { - *col = col_before_remap; - *diskSector = ( index_within_disk + ( (raidPtr->numCol-1) * sparing_region_id) ) * - raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - } - else { - /* remap sector to spare space...*/ - *diskSector = sparing_region_id * (raidPtr->numCol+1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidPtr->numCol-1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - index_within_disk = index_within_region / raidPtr->numCol; - if (index_within_disk < col_before_remap ) - *col = index_within_disk; - else if (index_within_disk == raidPtr->numCol-2 ) { - *col = (col_before_remap+raidPtr->numCol-1) % raidPtr->numCol; - *diskSector += raidPtr->Layout.sectorsPerStripeUnit; - } - else - *col = (index_within_disk + 2) % raidPtr->numCol; - } + RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; + RF_SectorNum_t index_within_region, index_within_disk; + RF_StripeNum_t sparing_region_id; + int col_before_remap; + + *row = 0; + sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; + index_within_region = SUID % info->stripeUnitsPerSparingRegion; + index_within_disk = index_within_region / raidPtr->numCol; + col_before_remap = SUID % raidPtr->numCol; + + if (!remap) { + *col = col_before_remap; + *diskSector = (index_within_disk + ((raidPtr->numCol - 1) * sparing_region_id)) * + raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + } else { + /* remap sector to spare space... */ + *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += (raidPtr->numCol - 1) * raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + index_within_disk = index_within_region / raidPtr->numCol; + if (index_within_disk < col_before_remap) + *col = index_within_disk; + else + if (index_within_disk == raidPtr->numCol - 2) { + *col = (col_before_remap + raidPtr->numCol - 1) % raidPtr->numCol; + *diskSector += raidPtr->Layout.sectorsPerStripeUnit; + } else + *col = (index_within_disk + 2) % raidPtr->numCol; + } } /* Maps to the second copy of the mirror pair, which is chain declustered. The second copy is contained - in the next disk (mod numCol) after the disk containing the primary copy. + in the next disk (mod numCol) after the disk containing the primary copy. The offset into the disk is one-half disk down */ -void rf_MapParityChainDecluster( - RF_Raid_t *raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t *row, - RF_RowCol_t *col, - RF_SectorNum_t *diskSector, - int remap) +void +rf_MapParityChainDecluster( + RF_Raid_t * raidPtr, + RF_RaidAddr_t raidSector, + RF_RowCol_t * row, + RF_RowCol_t * col, + RF_SectorNum_t * diskSector, + int remap) { - RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - RF_SectorNum_t index_within_region, index_within_disk; - RF_StripeNum_t sparing_region_id; - int col_before_remap; - - *row = 0; - if (!remap) { - *col = SUID % raidPtr->numCol; - *col = (*col + 1) % raidPtr->numCol; - *diskSector = info->mirrorStripeOffset * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += ( SUID / raidPtr->numCol ) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - } - else { - /* remap parity to spare space ... */ - sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; - index_within_region = SUID % info->stripeUnitsPerSparingRegion; - index_within_disk = index_within_region / raidPtr->numCol; - *diskSector = sparing_region_id * (raidPtr->numCol+1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - col_before_remap = SUID % raidPtr->numCol; - if (index_within_disk < col_before_remap) - *col = index_within_disk; - else if (index_within_disk == raidPtr->numCol-2 ) { - *col = (col_before_remap+2) % raidPtr->numCol; - *diskSector -= raidPtr->Layout.sectorsPerStripeUnit; - } - else - *col = (index_within_disk + 2) % raidPtr->numCol; - } + RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; + RF_SectorNum_t index_within_region, index_within_disk; + RF_StripeNum_t sparing_region_id; + int col_before_remap; + + *row = 0; + if (!remap) { + *col = SUID % raidPtr->numCol; + *col = (*col + 1) % raidPtr->numCol; + *diskSector = info->mirrorStripeOffset * raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += (SUID / raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + } else { + /* remap parity to spare space ... */ + sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; + index_within_region = SUID % info->stripeUnitsPerSparingRegion; + index_within_disk = index_within_region / raidPtr->numCol; + *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += (raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + col_before_remap = SUID % raidPtr->numCol; + if (index_within_disk < col_before_remap) + *col = index_within_disk; + else + if (index_within_disk == raidPtr->numCol - 2) { + *col = (col_before_remap + 2) % raidPtr->numCol; + *diskSector -= raidPtr->Layout.sectorsPerStripeUnit; + } else + *col = (index_within_disk + 2) % raidPtr->numCol; + } } -void rf_IdentifyStripeChainDecluster( - RF_Raid_t *raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t **diskids, - RF_RowCol_t *outRow) +void +rf_IdentifyStripeChainDecluster( + RF_Raid_t * raidPtr, + RF_RaidAddr_t addr, + RF_RowCol_t ** diskids, + RF_RowCol_t * outRow) { - RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - RF_StripeNum_t SUID; - RF_RowCol_t col; - - SUID = addr / raidPtr->Layout.sectorsPerStripeUnit; - col = SUID % raidPtr->numCol; - *outRow = 0; - *diskids = info->stripeIdentifier[ col ]; + RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + RF_StripeNum_t SUID; + RF_RowCol_t col; + + SUID = addr / raidPtr->Layout.sectorsPerStripeUnit; + col = SUID % raidPtr->numCol; + *outRow = 0; + *diskids = info->stripeIdentifier[col]; } -void rf_MapSIDToPSIDChainDecluster( - RF_RaidLayout_t *layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t *psID, - RF_ReconUnitNum_t *which_ru) +void +rf_MapSIDToPSIDChainDecluster( + RF_RaidLayout_t * layoutPtr, + RF_StripeNum_t stripeID, + RF_StripeNum_t * psID, + RF_ReconUnitNum_t * which_ru) { - *which_ru = 0; - *psID = stripeID; + *which_ru = 0; + *psID = stripeID; } - /****************************************************************************** * select a graph to perform a single-stripe access * @@ -349,34 +251,38 @@ void rf_MapSIDToPSIDChainDecluster( * createFunc - function to use to create the graph (return value) *****************************************************************************/ -void rf_RAIDCDagSelect( - RF_Raid_t *raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t *asmap, - RF_VoidFuncPtr *createFunc) +void +rf_RAIDCDagSelect( + RF_Raid_t * raidPtr, + RF_IoType_t type, + RF_AccessStripeMap_t * asmap, + RF_VoidFuncPtr * createFunc) #if 0 - void (**createFunc)(RF_Raid_t *, RF_AccessStripeMap_t *, - RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, - RF_AllocListElem_t *)) + void (**createFunc) (RF_Raid_t *, RF_AccessStripeMap_t *, + RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, +/**INDENT** Warning@258: Extra ) */ + RF_AllocListElem_t *)) #endif { - RF_ASSERT(RF_IO_IS_R_OR_W(type)); - RF_ASSERT(raidPtr->numRow == 1); - - if (asmap->numDataFailed + asmap->numParityFailed > 1) { - RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); - *createFunc = NULL; - return; - } - - *createFunc = (type == RF_IO_TYPE_READ) ? (RF_VoidFuncPtr)rf_CreateFaultFreeReadDAG :(RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; - - if (type == RF_IO_TYPE_READ) { - if ( ( raidPtr->status[0] == rf_rs_degraded ) || ( raidPtr->status[0] == rf_rs_reconstructing) ) - *createFunc = (RF_VoidFuncPtr)rf_CreateRaidCDegradedReadDAG; /* array status is degraded, implement workload shifting */ - else - *createFunc = (RF_VoidFuncPtr)rf_CreateMirrorPartitionReadDAG; /* array status not degraded, so use mirror partition dag */ - } - else - *createFunc = (RF_VoidFuncPtr)rf_CreateRaidOneWriteDAG; + RF_ASSERT(RF_IO_IS_R_OR_W(type)); + RF_ASSERT(raidPtr->numRow == 1); + + if (asmap->numDataFailed + asmap->numParityFailed > 1) { + RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); + *createFunc = NULL; + return; + } + *createFunc = (type == RF_IO_TYPE_READ) ? (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG : (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; + + if (type == RF_IO_TYPE_READ) { + if ((raidPtr->status[0] == rf_rs_degraded) || (raidPtr->status[0] == rf_rs_reconstructing)) + *createFunc = (RF_VoidFuncPtr) rf_CreateRaidCDegradedReadDAG; /* array status is + * degraded, implement + * workload shifting */ + else + *createFunc = (RF_VoidFuncPtr) rf_CreateMirrorPartitionReadDAG; /* array status not + * degraded, so use + * mirror partition dag */ + } else + *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; } diff --git a/sys/dev/raidframe/rf_chaindecluster.h b/sys/dev/raidframe/rf_chaindecluster.h index 52a94deac2f..f8105d177d8 100644 --- a/sys/dev/raidframe/rf_chaindecluster.h +++ b/sys/dev/raidframe/rf_chaindecluster.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_chaindecluster.h,v 1.1 1999/01/11 14:29:01 niklas Exp $ */ -/* $NetBSD: rf_chaindecluster.h,v 1.1 1998/11/13 04:20:26 oster Exp $ */ +/* $OpenBSD: rf_chaindecluster.h,v 1.2 1999/02/16 00:02:26 niklas Exp $ */ +/* $NetBSD: rf_chaindecluster.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -31,93 +31,40 @@ * header file for Chained Declustering */ -/* - * : - * Log: rf_chaindecluster.h,v - * Revision 1.14 1996/07/29 14:05:12 jimz - * fix numPUs/numRUs confusion (everything is now numRUs) - * clean up some commenting, return values - * - * Revision 1.13 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.12 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.11 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.10 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.9 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.8 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.7 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.6 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.5 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.4 1996/02/22 16:45:59 amiri - * added declaration of dag selection function - * - * Revision 1.3 1995/12/01 15:16:56 root - * added copyright info - * - * Revision 1.2 1995/11/17 19:55:21 amiri - * prototyped MapParityChainDecluster - */ #ifndef _RF__RF_CHAINDECLUSTER_H_ #define _RF__RF_CHAINDECLUSTER_H_ -int rf_ConfigureChainDecluster(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr); -RF_ReconUnitCount_t rf_GetNumSpareRUsChainDecluster(RF_Raid_t *raidPtr); -void rf_MapSectorChainDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap); -void rf_MapParityChainDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap); -void rf_IdentifyStripeChainDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t **diskids, RF_RowCol_t *outRow); -void rf_MapSIDToPSIDChainDecluster(RF_RaidLayout_t *layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t *psID, - RF_ReconUnitNum_t *which_ru); -void rf_RAIDCDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t *asmap, - RF_VoidFuncPtr *); +int +rf_ConfigureChainDecluster(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr); +RF_ReconUnitCount_t rf_GetNumSpareRUsChainDecluster(RF_Raid_t * raidPtr); +void +rf_MapSectorChainDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); +void +rf_MapParityChainDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); +void +rf_IdentifyStripeChainDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, + RF_RowCol_t ** diskids, RF_RowCol_t * outRow); +void +rf_MapSIDToPSIDChainDecluster(RF_RaidLayout_t * layoutPtr, + RF_StripeNum_t stripeID, RF_StripeNum_t * psID, + RF_ReconUnitNum_t * which_ru); +void +rf_RAIDCDagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, + RF_AccessStripeMap_t * asmap, + RF_VoidFuncPtr *); #if 0 - void (**createFunc)(RF_Raid_t *, - RF_AccessStripeMap_t *, - RF_DagHeader_t *, - void *, - RF_RaidAccessFlags_t, - RF_AllocListElem_t *) +void (**createFunc) (RF_Raid_t *, + RF_AccessStripeMap_t *, + RF_DagHeader_t *, + void *, + RF_RaidAccessFlags_t, + RF_AllocListElem_t *) +/**INDENT** Warning@59: Extra ) */ ); #endif -#endif /* !_RF__RF_CHAINDECLUSTER_H_ */ +#endif /* !_RF__RF_CHAINDECLUSTER_H_ */ diff --git a/sys/dev/raidframe/rf_configure.h b/sys/dev/raidframe/rf_configure.h index aee456c52a2..81048bc43ba 100644 --- a/sys/dev/raidframe/rf_configure.h +++ b/sys/dev/raidframe/rf_configure.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_configure.h,v 1.1 1999/01/11 14:29:02 niklas Exp $ */ -/* $NetBSD: rf_configure.h,v 1.1 1998/11/13 04:20:26 oster Exp $ */ +/* $OpenBSD: rf_configure.h,v 1.2 1999/02/16 00:02:26 niklas Exp $ */ +/* $NetBSD: rf_configure.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,61 +29,13 @@ /******************************** * - * rf_configure.h + * rf_configure.h * * header file for raidframe configuration in the kernel version only. * configuration is invoked via ioctl rather than at boot time * *******************************/ -/* : - * Log: rf_configure.h,v - * Revision 1.16 1996/06/19 14:57:53 jimz - * move layout-specific config parsing hooks into RF_LayoutSW_t - * table in rf_layout.c - * - * Revision 1.15 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.14 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.13 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.12 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.11 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.10 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.9 1996/05/18 20:09:51 jimz - * bit of cleanup to compile cleanly in kernel, once again - * - * Revision 1.8 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.7 1995/12/01 15:16:26 root - * added copyright info - * - */ #ifndef _RF__RF_CONFIGURE_H_ #define _RF__RF_CONFIGURE_H_ @@ -96,32 +48,40 @@ #include <sys/ioctl.h> -/* the raidframe configuration, passed down through an ioctl. +/* the raidframe configuration, passed down through an ioctl. * the driver can be reconfigured (with total loss of data) at any time, * but it must be shut down first. */ struct RF_Config_s { - RF_RowCol_t numRow, numCol, numSpare; /* number of rows, columns, and spare disks */ - dev_t devs[RF_MAXROW][RF_MAXCOL]; /* device numbers for disks comprising array */ - char devnames[RF_MAXROW][RF_MAXCOL][50]; /* device names */ - dev_t spare_devs[RF_MAXSPARE]; /* device numbers for spare disks */ - char spare_names[RF_MAXSPARE][50]; /* device names */ - RF_SectorNum_t sectPerSU; /* sectors per stripe unit */ - RF_StripeNum_t SUsPerPU; /* stripe units per parity unit */ - RF_StripeNum_t SUsPerRU; /* stripe units per reconstruction unit */ - RF_ParityConfig_t parityConfig; /* identifies the RAID architecture to be used */ - RF_DiskQueueType_t diskQueueType; /* 'f' = fifo, 'c' = cvscan, not used in kernel */ - char maxOutstandingDiskReqs; /* # concurrent reqs to be sent to a disk. not used in kernel. */ - char debugVars[RF_MAXDBGV][50]; /* space for specifying debug variables & their values */ - unsigned int layoutSpecificSize; /* size in bytes of layout-specific info */ - void *layoutSpecific; /* a pointer to a layout-specific structure to be copied in */ + RF_RowCol_t numRow, numCol, numSpare; /* number of rows, columns, + * and spare disks */ + dev_t devs[RF_MAXROW][RF_MAXCOL]; /* device numbers for disks + * comprising array */ + char devnames[RF_MAXROW][RF_MAXCOL][50]; /* device names */ + dev_t spare_devs[RF_MAXSPARE]; /* device numbers for spare + * disks */ + char spare_names[RF_MAXSPARE][50]; /* device names */ + RF_SectorNum_t sectPerSU; /* sectors per stripe unit */ + RF_StripeNum_t SUsPerPU;/* stripe units per parity unit */ + RF_StripeNum_t SUsPerRU;/* stripe units per reconstruction unit */ + RF_ParityConfig_t parityConfig; /* identifies the RAID architecture to + * be used */ + RF_DiskQueueType_t diskQueueType; /* 'f' = fifo, 'c' = cvscan, + * not used in kernel */ + char maxOutstandingDiskReqs; /* # concurrent reqs to be sent to a + * disk. not used in kernel. */ + char debugVars[RF_MAXDBGV][50]; /* space for specifying debug + * variables & their values */ + unsigned int layoutSpecificSize; /* size in bytes of + * layout-specific info */ + void *layoutSpecific; /* a pointer to a layout-specific structure to + * be copied in */ }; +#ifndef _KERNEL +int rf_MakeConfig(char *configname, RF_Config_t * cfgPtr); +int rf_MakeLayoutSpecificNULL(FILE * fp, RF_Config_t * cfgPtr, void *arg); +int rf_MakeLayoutSpecificDeclustered(FILE * configfp, RF_Config_t * cfgPtr, void *arg); +void *rf_ReadSpareTable(RF_SparetWait_t * req, char *fname); +#endif /* !_KERNEL */ -#ifndef KERNEL -int rf_MakeConfig(char *configname, RF_Config_t *cfgPtr); -int rf_MakeLayoutSpecificNULL(FILE *fp, RF_Config_t *cfgPtr, void *arg); -int rf_MakeLayoutSpecificDeclustered(FILE *configfp, RF_Config_t *cfgPtr, void *arg); -void *rf_ReadSpareTable(RF_SparetWait_t *req, char *fname); -#endif /* !KERNEL */ - -#endif /* !_RF__RF_CONFIGURE_H_ */ +#endif /* !_RF__RF_CONFIGURE_H_ */ diff --git a/sys/dev/raidframe/rf_copyback.c b/sys/dev/raidframe/rf_copyback.c index b2fe641fded..ba06d882559 100644 --- a/sys/dev/raidframe/rf_copyback.c +++ b/sys/dev/raidframe/rf_copyback.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_copyback.c,v 1.1 1999/01/11 14:29:02 niklas Exp $ */ -/* $NetBSD: rf_copyback.c,v 1.1 1998/11/13 04:20:27 oster Exp $ */ +/* $OpenBSD: rf_copyback.c,v 1.2 1999/02/16 00:02:27 niklas Exp $ */ +/* $NetBSD: rf_copyback.c,v 1.3 1999/02/05 00:06:06 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -32,123 +32,16 @@ * copyback.c -- code to copy reconstructed data back from spare space to * the replaced disk. * - * the code operates using callbacks on the I/Os to continue with the next + * the code operates using callbacks on the I/Os to continue with the next * unit to be copied back. We do this because a simple loop containing blocking I/Os * will not work in the simulator. * ****************************************************************************************/ -/* - * : - * Log: rf_copyback.c,v - * Revision 1.26 1996/08/06 22:26:00 jimz - * don't include sys/buf.h on linux - * - * Revision 1.25 1996/07/30 03:30:40 jimz - * include rf_types.h first - * - * Revision 1.24 1996/07/27 18:39:52 jimz - * cleanup sweep - * - * Revision 1.23 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.22 1996/07/11 19:08:00 jimz - * generalize reconstruction mechanism - * allow raid1 reconstructs via copyback (done with array - * quiesced, not online, therefore not disk-directed) - * - * Revision 1.21 1996/07/11 16:03:47 jimz - * fixed hanging bug in rf_CopybackWriteDoneProc() - * - * Revision 1.20 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.19 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.18 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.17 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.16 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.15 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.14 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.13 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.12 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.11 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.10 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.9 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.8 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.7 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.6 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.5 1995/12/01 15:15:31 root - * added copyright info - * - * Revision 1.4 1995/06/23 13:41:36 robby - * updeated to prototypes in rf_layout.h - * - */ - #include "rf_types.h" + #include <sys/time.h> -#ifndef LINUX #include <sys/buf.h> -#endif /* !LINUX */ #include "rf_raid.h" #include "rf_threadid.h" #include "rf_mcpair.h" @@ -157,9 +50,6 @@ #include "rf_general.h" #include "rf_utils.h" #include "rf_copyback.h" -#if !defined(__NetBSD__) && !defined(__OpenBSD__) -#include "rf_camlayer.h" -#endif #include "rf_decluster.h" #include "rf_driver.h" #include "rf_shutdown.h" @@ -168,23 +58,23 @@ #define RF_COPYBACK_DATA 0 #define RF_COPYBACK_PARITY 1 -int rf_copyback_in_progress; +int rf_copyback_in_progress; -static int rf_CopybackReadDoneProc(RF_CopybackDesc_t *desc, int status); -static int rf_CopybackWriteDoneProc(RF_CopybackDesc_t *desc, int status); -static void rf_CopybackOne(RF_CopybackDesc_t *desc, int typ, - RF_RaidAddr_t addr, RF_RowCol_t testRow, RF_RowCol_t testCol, - RF_SectorNum_t testOffs); -static void rf_CopybackComplete(RF_CopybackDesc_t *desc, int status); +static int rf_CopybackReadDoneProc(RF_CopybackDesc_t * desc, int status); +static int rf_CopybackWriteDoneProc(RF_CopybackDesc_t * desc, int status); +static void +rf_CopybackOne(RF_CopybackDesc_t * desc, int typ, + RF_RaidAddr_t addr, RF_RowCol_t testRow, RF_RowCol_t testCol, + RF_SectorNum_t testOffs); +static void rf_CopybackComplete(RF_CopybackDesc_t * desc, int status); -int rf_ConfigureCopyback(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureCopyback(listp) + RF_ShutdownList_t **listp; { - rf_copyback_in_progress = 0; - return(0); + rf_copyback_in_progress = 0; + return (0); } - -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) #include <sys/types.h> #include <sys/param.h> #include <sys/systm.h> @@ -196,171 +86,148 @@ int rf_ConfigureCopyback(listp) #endif int raidlookup __P((char *, struct proc *, struct vnode **)); -#endif /* do a complete copyback */ -void rf_CopybackReconstructedData(raidPtr) - RF_Raid_t *raidPtr; +void +rf_CopybackReconstructedData(raidPtr) + RF_Raid_t *raidPtr; { -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) - int done,retcode; - RF_CopybackDesc_t *desc; - RF_RowCol_t frow, fcol; - RF_RaidDisk_t *badDisk; - char *databuf; - - struct partinfo dpart; - struct vnode *vp; - struct vattr va; - struct proc *proc; - -#else - int bus, targ, lun, done, retcode; - RF_CopybackDesc_t *desc; - RF_RowCol_t frow, fcol; - RF_RaidDisk_t *badDisk; - RF_DiskOp_t *tur_op; - char *databuf; -#endif - - done = 0; - fcol = 0; - for (frow=0; frow<raidPtr->numRow; frow++) { - for (fcol=0; fcol<raidPtr->numCol; fcol++) { - if (raidPtr->Disks[frow][fcol].status == rf_ds_dist_spared - || raidPtr->Disks[frow][fcol].status == rf_ds_spared) - { - done = 1; - break; - } - } - if (done) - break; - } - - if (frow == raidPtr->numRow) { - printf("COPYBACK: no disks need copyback\n"); - return; - } - - badDisk = &raidPtr->Disks[frow][fcol]; -#ifndef SIMULATE -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) - - proc = raidPtr->proc; /* XXX Yes, this is not nice.. */ - + int done, retcode; + RF_CopybackDesc_t *desc; + RF_RowCol_t frow, fcol; + RF_RaidDisk_t *badDisk; + char *databuf; + + struct partinfo dpart; + struct vnode *vp; + struct vattr va; + struct proc *proc; + + done = 0; + fcol = 0; + for (frow = 0; frow < raidPtr->numRow; frow++) { + for (fcol = 0; fcol < raidPtr->numCol; fcol++) { + if (raidPtr->Disks[frow][fcol].status == rf_ds_dist_spared + || raidPtr->Disks[frow][fcol].status == rf_ds_spared) { + done = 1; + break; + } + } + if (done) + break; + } + + if (frow == raidPtr->numRow) { + printf("COPYBACK: no disks need copyback\n"); + return; + } + badDisk = &raidPtr->Disks[frow][fcol]; + + proc = raidPtr->proc; /* XXX Yes, this is not nice.. */ + + /* This device may have been opened successfully the first time. Close + * it before trying to open it again.. */ + + if (raidPtr->raid_cinfo[frow][fcol].ci_vp != NULL) { + printf("Closed the open device: %s\n", + raidPtr->Disks[frow][fcol].devname); + (void) vn_close(raidPtr->raid_cinfo[frow][fcol].ci_vp, + FREAD | FWRITE, proc->p_ucred, proc); + } + printf("About to (re-)open the device: %s\n", + raidPtr->Disks[frow][fcol].devname); + + retcode = raidlookup(raidPtr->Disks[frow][fcol].devname, proc, &vp); + + if (retcode) { + printf("COPYBACK: raidlookup on device: %s failed: %d!\n", + raidPtr->Disks[frow][fcol].devname, retcode); + + /* XXX the component isn't responding properly... must be + * still dead :-( */ + return; + + } else { + + /* Ok, so we can at least do a lookup... How about actually + * getting a vp for it? */ + + if ((retcode = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) { + return; + } + retcode = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart, + FREAD, proc->p_ucred, proc); + if (retcode) { + return; + } + raidPtr->Disks[frow][fcol].blockSize = dpart.disklab->d_secsize; + + raidPtr->Disks[frow][fcol].numBlocks = dpart.part->p_size - + rf_protectedSectors; + + raidPtr->raid_cinfo[frow][fcol].ci_vp = vp; + raidPtr->raid_cinfo[frow][fcol].ci_dev = va.va_rdev; + + raidPtr->Disks[frow][fcol].dev = va.va_rdev; /* XXX or the above? */ + + /* we allow the user to specify that only a fraction of the + * disks should be used this is just for debug: it speeds up + * the parity scan */ + raidPtr->Disks[frow][fcol].numBlocks = + raidPtr->Disks[frow][fcol].numBlocks * + rf_sizePercentage / 100; + } #if 0 - printf("Pretending the disk is happy...\n"); - retcode = 0; /* XXX this should be set to something more realistic. */ -#endif - - /* This device may have been opened successfully the first time. - Close it before trying to open it again.. */ - - if (raidPtr->raid_cinfo[frow][fcol].ci_vp != NULL) { - printf("Closed the open device: %s\n", - raidPtr->Disks[frow][fcol].devname); - (void)vn_close(raidPtr->raid_cinfo[frow][fcol].ci_vp, - FREAD|FWRITE, proc->p_ucred, proc); - } - - printf("About to (re-)open the device: %s\n", - raidPtr->Disks[frow][fcol].devname); - - retcode = raidlookup(raidPtr->Disks[frow][fcol].devname, proc, &vp); - - if (retcode) { - printf("COPYBACK: raidlookup on device: %s failed: %d!\n", - raidPtr->Disks[frow][fcol].devname, retcode); - - /* XXX the component isn't responding properly... - must be still dead :-( */ - return; - - } else { - - /* Ok, so we can at least do a lookup... How about actually - getting a vp for it? */ - - if ((retcode = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) { - return; - } - - retcode = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, - FREAD, proc->p_ucred, proc); - if (retcode) { - return; - } - raidPtr->Disks[frow][fcol].blockSize = dpart.disklab->d_secsize; - - raidPtr->Disks[frow][fcol].numBlocks = dpart.part->p_size - - rf_protectedSectors; - - raidPtr->raid_cinfo[frow][fcol].ci_vp = vp; - raidPtr->raid_cinfo[frow][fcol].ci_dev = va.va_rdev; - - raidPtr->Disks[frow][fcol].dev = va.va_rdev; /* XXX or the above? */ - - /* we allow the user to specify that only a fraction of the - * disks should be used this is just for debug: it speeds up - * the parity scan - */ - raidPtr->Disks[frow][fcol].numBlocks = - raidPtr->Disks[frow][fcol].numBlocks * - rf_sizePercentage / 100; - } -#else - if (rf_extract_ids(badDisk->devname, &bus, &targ, &lun)) { - printf("COPYBACK: unable to extract bus, target, lun from devname %s\n", - badDisk->devname); - return; - } - - /* TUR the disk that's marked as bad to be sure that it's actually alive */ - rf_SCSI_AllocTUR(&tur_op); - retcode = rf_SCSI_DoTUR(tur_op, bus, targ, lun, badDisk->dev); - rf_SCSI_FreeDiskOp(tur_op, 0); + /* This is the way it was done before the CAM stuff was removed */ + + if (rf_extract_ids(badDisk->devname, &bus, &targ, &lun)) { + printf("COPYBACK: unable to extract bus, target, lun from devname %s\n", + badDisk->devname); + return; + } + /* TUR the disk that's marked as bad to be sure that it's actually + * alive */ + rf_SCSI_AllocTUR(&tur_op); + retcode = rf_SCSI_DoTUR(tur_op, bus, targ, lun, badDisk->dev); + rf_SCSI_FreeDiskOp(tur_op, 0); #endif - if (retcode) { - printf("COPYBACK: target disk failed TUR\n"); - return; - } -#endif /* !SIMULATE */ - - /* get a buffer to hold one SU */ - RF_Malloc(databuf, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (char *)); - - /* create a descriptor */ - RF_Malloc(desc, sizeof(*desc), (RF_CopybackDesc_t *)); - desc->raidPtr = raidPtr; - desc->status = 0; - desc->frow = frow; - desc->fcol = fcol; - desc->spRow = badDisk->spareRow; - desc->spCol = badDisk->spareCol; - desc->stripeAddr = 0; - desc->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; - desc->sectPerStripe = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.numDataCol; - desc->databuf = databuf; -#ifndef SIMULATE - desc->mcpair = rf_AllocMCPair(); -#endif /* !SIMULATE */ - - printf("COPYBACK: Quiescing the array\n"); - /* quiesce the array, since we don't want to code support for user accs here */ - rf_SuspendNewRequestsAndWait(raidPtr); - - /* adjust state of the array and of the disks */ - RF_LOCK_MUTEX(raidPtr->mutex); - raidPtr->Disks[desc->frow][desc->fcol].status = rf_ds_optimal; - raidPtr->status[desc->frow] = rf_rs_optimal; - rf_copyback_in_progress = 1; /* debug only */ - RF_UNLOCK_MUTEX(raidPtr->mutex); - - printf("COPYBACK: Beginning\n"); - RF_GETTIME(desc->starttime); - rf_ContinueCopyback(desc); + if (retcode) { + printf("COPYBACK: target disk failed TUR\n"); + return; + } + /* get a buffer to hold one SU */ + RF_Malloc(databuf, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (char *)); + + /* create a descriptor */ + RF_Malloc(desc, sizeof(*desc), (RF_CopybackDesc_t *)); + desc->raidPtr = raidPtr; + desc->status = 0; + desc->frow = frow; + desc->fcol = fcol; + desc->spRow = badDisk->spareRow; + desc->spCol = badDisk->spareCol; + desc->stripeAddr = 0; + desc->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; + desc->sectPerStripe = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.numDataCol; + desc->databuf = databuf; + desc->mcpair = rf_AllocMCPair(); + + printf("COPYBACK: Quiescing the array\n"); + /* quiesce the array, since we don't want to code support for user + * accs here */ + rf_SuspendNewRequestsAndWait(raidPtr); + + /* adjust state of the array and of the disks */ + RF_LOCK_MUTEX(raidPtr->mutex); + raidPtr->Disks[desc->frow][desc->fcol].status = rf_ds_optimal; + raidPtr->status[desc->frow] = rf_rs_optimal; + rf_copyback_in_progress = 1; /* debug only */ + RF_UNLOCK_MUTEX(raidPtr->mutex); + + printf("COPYBACK: Beginning\n"); + RF_GETTIME(desc->starttime); + rf_ContinueCopyback(desc); } @@ -368,210 +235,191 @@ void rf_CopybackReconstructedData(raidPtr) * invoked via callback after a copyback I/O has completed to * continue on with the next one */ -void rf_ContinueCopyback(desc) - RF_CopybackDesc_t *desc; +void +rf_ContinueCopyback(desc) + RF_CopybackDesc_t *desc; { - RF_SectorNum_t testOffs, stripeAddr; - RF_Raid_t *raidPtr = desc->raidPtr; - RF_RaidAddr_t addr; - RF_RowCol_t testRow, testCol; - int old_pctg, new_pctg, done; - struct timeval t, diff; - - old_pctg = (-1); - while (1) { - stripeAddr = desc->stripeAddr; - if (rf_prReconSched) { - old_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors; - } - desc->stripeAddr += desc->sectPerStripe; - if (rf_prReconSched) { - new_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors; - if (new_pctg != old_pctg) { - RF_GETTIME(t); - RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff); - printf("%d %d.%06d\n",new_pctg, (int)diff.tv_sec, (int)diff.tv_usec); - } - } - - if (stripeAddr >= raidPtr->totalSectors) { - rf_CopybackComplete(desc, 0); - return; - } - - /* walk through the current stripe, su-by-su */ - for (done=0, addr = stripeAddr; addr < stripeAddr+desc->sectPerStripe; addr += desc->sectPerSU) { - - /* map the SU, disallowing remap to spare space */ - (raidPtr->Layout.map->MapSector)(raidPtr, addr, &testRow, &testCol, &testOffs, RF_DONT_REMAP); - - if (testRow == desc->frow && testCol == desc->fcol) { - rf_CopybackOne(desc, RF_COPYBACK_DATA, addr, testRow, testCol, testOffs); -#ifdef SIMULATE - return; -#else /* SIMULATE */ - done = 1; - break; -#endif /* SIMULATE */ - } - } - - if (!done) { - /* we didn't find the failed disk in the data part. check parity. */ - - /* map the parity for this stripe, disallowing remap to spare space */ - (raidPtr->Layout.map->MapParity)(raidPtr, stripeAddr, &testRow, &testCol, &testOffs, RF_DONT_REMAP); - - if (testRow == desc->frow && testCol == desc->fcol) { - rf_CopybackOne(desc, RF_COPYBACK_PARITY, stripeAddr, testRow, testCol, testOffs); -#ifdef SIMULATE - return; -#endif /* SIMULATE */ - } - } - - /* check to see if the last read/write pair failed */ - if (desc->status) { - rf_CopybackComplete(desc, 1); - return; - } - - /* we didn't find any units to copy back in this stripe. Continue with the next one */ - } + RF_SectorNum_t testOffs, stripeAddr; + RF_Raid_t *raidPtr = desc->raidPtr; + RF_RaidAddr_t addr; + RF_RowCol_t testRow, testCol; + int old_pctg, new_pctg, done; + struct timeval t, diff; + + old_pctg = (-1); + while (1) { + stripeAddr = desc->stripeAddr; + if (rf_prReconSched) { + old_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors; + } + desc->stripeAddr += desc->sectPerStripe; + if (rf_prReconSched) { + new_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors; + if (new_pctg != old_pctg) { + RF_GETTIME(t); + RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff); + printf("%d %d.%06d\n", new_pctg, (int) diff.tv_sec, (int) diff.tv_usec); + } + } + if (stripeAddr >= raidPtr->totalSectors) { + rf_CopybackComplete(desc, 0); + return; + } + /* walk through the current stripe, su-by-su */ + for (done = 0, addr = stripeAddr; addr < stripeAddr + desc->sectPerStripe; addr += desc->sectPerSU) { + + /* map the SU, disallowing remap to spare space */ + (raidPtr->Layout.map->MapSector) (raidPtr, addr, &testRow, &testCol, &testOffs, RF_DONT_REMAP); + + if (testRow == desc->frow && testCol == desc->fcol) { + rf_CopybackOne(desc, RF_COPYBACK_DATA, addr, testRow, testCol, testOffs); + done = 1; + break; + } + } + + if (!done) { + /* we didn't find the failed disk in the data part. + * check parity. */ + + /* map the parity for this stripe, disallowing remap + * to spare space */ + (raidPtr->Layout.map->MapParity) (raidPtr, stripeAddr, &testRow, &testCol, &testOffs, RF_DONT_REMAP); + + if (testRow == desc->frow && testCol == desc->fcol) { + rf_CopybackOne(desc, RF_COPYBACK_PARITY, stripeAddr, testRow, testCol, testOffs); + } + } + /* check to see if the last read/write pair failed */ + if (desc->status) { + rf_CopybackComplete(desc, 1); + return; + } + /* we didn't find any units to copy back in this stripe. + * Continue with the next one */ + } } /* copyback one unit */ -static void rf_CopybackOne(desc, typ, addr, testRow, testCol, testOffs) - RF_CopybackDesc_t *desc; - int typ; - RF_RaidAddr_t addr; - RF_RowCol_t testRow; - RF_RowCol_t testCol; - RF_SectorNum_t testOffs; +static void +rf_CopybackOne(desc, typ, addr, testRow, testCol, testOffs) + RF_CopybackDesc_t *desc; + int typ; + RF_RaidAddr_t addr; + RF_RowCol_t testRow; + RF_RowCol_t testCol; + RF_SectorNum_t testOffs; { - RF_SectorCount_t sectPerSU = desc->sectPerSU; - RF_Raid_t *raidPtr = desc->raidPtr; - RF_RowCol_t spRow = desc->spRow; - RF_RowCol_t spCol = desc->spCol; - RF_SectorNum_t spOffs; - - /* find the spare spare location for this SU */ - if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { - if (typ == RF_COPYBACK_DATA) - raidPtr->Layout.map->MapSector(raidPtr, addr, &spRow, &spCol, &spOffs, RF_REMAP); - else - raidPtr->Layout.map->MapParity(raidPtr, addr, &spRow, &spCol, &spOffs, RF_REMAP); - } else { - spOffs = testOffs; - } - - /* create reqs to read the old location & write the new */ - desc->readreq = rf_CreateDiskQueueData(RF_IO_TYPE_READ, spOffs, - sectPerSU, desc->databuf, 0L, 0, - (int (*)(void *,int)) rf_CopybackReadDoneProc, desc, - NULL, NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL); - desc->writereq = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, testOffs, - sectPerSU, desc->databuf, 0L, 0, - (int (*)(void *,int)) rf_CopybackWriteDoneProc, desc, - NULL, NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL); - desc->frow = testRow; - desc->fcol = testCol; - - /* enqueue the read. the write will go out as part of the callback on the read. - * at user-level & in the kernel, wait for the read-write pair to complete. - * in the simulator, just return, since everything will happen as callbacks - */ -#ifndef SIMULATE - RF_LOCK_MUTEX(desc->mcpair->mutex); - desc->mcpair->flag = 0; -#endif /* !SIMULATE */ - - rf_DiskIOEnqueue(&raidPtr->Queues[spRow][spCol], desc->readreq, RF_IO_NORMAL_PRIORITY); - -#ifndef SIMULATE - while (!desc->mcpair->flag) { - RF_WAIT_MCPAIR(desc->mcpair); - } - RF_UNLOCK_MUTEX(desc->mcpair->mutex); - rf_FreeDiskQueueData(desc->readreq); - rf_FreeDiskQueueData(desc->writereq); -#endif /* !SIMULATE */ + RF_SectorCount_t sectPerSU = desc->sectPerSU; + RF_Raid_t *raidPtr = desc->raidPtr; + RF_RowCol_t spRow = desc->spRow; + RF_RowCol_t spCol = desc->spCol; + RF_SectorNum_t spOffs; + + /* find the spare spare location for this SU */ + if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { + if (typ == RF_COPYBACK_DATA) + raidPtr->Layout.map->MapSector(raidPtr, addr, &spRow, &spCol, &spOffs, RF_REMAP); + else + raidPtr->Layout.map->MapParity(raidPtr, addr, &spRow, &spCol, &spOffs, RF_REMAP); + } else { + spOffs = testOffs; + } + + /* create reqs to read the old location & write the new */ + desc->readreq = rf_CreateDiskQueueData(RF_IO_TYPE_READ, spOffs, + sectPerSU, desc->databuf, 0L, 0, + (int (*) (void *, int)) rf_CopybackReadDoneProc, desc, + NULL, NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL); + desc->writereq = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, testOffs, + sectPerSU, desc->databuf, 0L, 0, + (int (*) (void *, int)) rf_CopybackWriteDoneProc, desc, + NULL, NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL); + desc->frow = testRow; + desc->fcol = testCol; + + /* enqueue the read. the write will go out as part of the callback on + * the read. at user-level & in the kernel, wait for the read-write + * pair to complete. in the simulator, just return, since everything + * will happen as callbacks */ + + RF_LOCK_MUTEX(desc->mcpair->mutex); + desc->mcpair->flag = 0; + + rf_DiskIOEnqueue(&raidPtr->Queues[spRow][spCol], desc->readreq, RF_IO_NORMAL_PRIORITY); + + while (!desc->mcpair->flag) { + RF_WAIT_MCPAIR(desc->mcpair); + } + RF_UNLOCK_MUTEX(desc->mcpair->mutex); + rf_FreeDiskQueueData(desc->readreq); + rf_FreeDiskQueueData(desc->writereq); + } /* called at interrupt context when the read has completed. just send out the write */ -static int rf_CopybackReadDoneProc(desc, status) - RF_CopybackDesc_t *desc; - int status; +static int +rf_CopybackReadDoneProc(desc, status) + RF_CopybackDesc_t *desc; + int status; { - if (status) { /* invoke the callback with bad status */ - printf("COPYBACK: copyback read failed. Aborting.\n"); - (desc->writereq->CompleteFunc)(desc, -100); - } - else { - rf_DiskIOEnqueue(&(desc->raidPtr->Queues[desc->frow][desc->fcol]), desc->writereq, RF_IO_NORMAL_PRIORITY); - } - return(0); + if (status) { /* invoke the callback with bad status */ + printf("COPYBACK: copyback read failed. Aborting.\n"); + (desc->writereq->CompleteFunc) (desc, -100); + } else { + rf_DiskIOEnqueue(&(desc->raidPtr->Queues[desc->frow][desc->fcol]), desc->writereq, RF_IO_NORMAL_PRIORITY); + } + return (0); } - -/* called at interrupt context when the write has completed. +/* called at interrupt context when the write has completed. * at user level & in the kernel, wake up the copyback thread. * in the simulator, invoke the next copyback directly. * can't free diskqueuedata structs in the kernel b/c we're at interrupt context. */ -static int rf_CopybackWriteDoneProc(desc, status) - RF_CopybackDesc_t *desc; - int status; +static int +rf_CopybackWriteDoneProc(desc, status) + RF_CopybackDesc_t *desc; + int status; { - if (status && status != -100) { - printf("COPYBACK: copyback write failed. Aborting.\n"); - } - -#ifdef SIMULATE - rf_FreeDiskQueueData(desc->readreq); - rf_FreeDiskQueueData(desc->writereq); - if (!status) - rf_ContinueCopyback(desc); - else - rf_CopybackComplete(desc, 1); -#else /* SIMULATE */ - desc->status = status; - rf_MCPairWakeupFunc(desc->mcpair); -#endif /* SIMULATE */ - return(0); -} - + if (status && status != -100) { + printf("COPYBACK: copyback write failed. Aborting.\n"); + } + desc->status = status; + rf_MCPairWakeupFunc(desc->mcpair); + return (0); +} /* invoked when the copyback has completed */ -static void rf_CopybackComplete(desc, status) - RF_CopybackDesc_t *desc; - int status; +static void +rf_CopybackComplete(desc, status) + RF_CopybackDesc_t *desc; + int status; { - RF_Raid_t *raidPtr = desc->raidPtr; - struct timeval t, diff; - - if (!status) { - RF_LOCK_MUTEX(raidPtr->mutex); - if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { - RF_ASSERT(raidPtr->Layout.map->parityConfig == 'D'); - rf_FreeSpareTable(raidPtr); - } else { - raidPtr->Disks[desc->spRow][desc->spCol].status = rf_ds_spare; - } - RF_UNLOCK_MUTEX(raidPtr->mutex); - - RF_GETTIME(t); - RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff); - printf("Copyback time was %d.%06d seconds\n", - (int)diff.tv_sec, (int)diff.tv_usec); - } else printf("COPYBACK: Failure.\n"); - - RF_Free(desc->databuf, rf_RaidAddressToByte(raidPtr, desc->sectPerSU)); -#ifndef SIMULATE - rf_FreeMCPair(desc->mcpair); -#endif /* !SIMULATE */ - RF_Free(desc, sizeof(*desc)); - - rf_copyback_in_progress = 0; - rf_ResumeNewRequests(raidPtr); + RF_Raid_t *raidPtr = desc->raidPtr; + struct timeval t, diff; + + if (!status) { + RF_LOCK_MUTEX(raidPtr->mutex); + if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { + RF_ASSERT(raidPtr->Layout.map->parityConfig == 'D'); + rf_FreeSpareTable(raidPtr); + } else { + raidPtr->Disks[desc->spRow][desc->spCol].status = rf_ds_spare; + } + RF_UNLOCK_MUTEX(raidPtr->mutex); + + RF_GETTIME(t); + RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff); + printf("Copyback time was %d.%06d seconds\n", + (int) diff.tv_sec, (int) diff.tv_usec); + } else + printf("COPYBACK: Failure.\n"); + + RF_Free(desc->databuf, rf_RaidAddressToByte(raidPtr, desc->sectPerSU)); + rf_FreeMCPair(desc->mcpair); + RF_Free(desc, sizeof(*desc)); + + rf_copyback_in_progress = 0; + rf_ResumeNewRequests(raidPtr); } diff --git a/sys/dev/raidframe/rf_copyback.h b/sys/dev/raidframe/rf_copyback.h index 59ef0630447..d04066e291f 100644 --- a/sys/dev/raidframe/rf_copyback.h +++ b/sys/dev/raidframe/rf_copyback.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_copyback.h,v 1.1 1999/01/11 14:29:03 niklas Exp $ */ -/* $NetBSD: rf_copyback.h,v 1.1 1998/11/13 04:20:27 oster Exp $ */ +/* $OpenBSD: rf_copyback.h,v 1.2 1999/02/16 00:02:27 niklas Exp $ */ +/* $NetBSD: rf_copyback.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */ /* * rf_copyback.h */ @@ -29,31 +29,6 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ -/* - * : - * Log: rf_copyback.h,v - * Revision 1.5 1996/07/11 19:08:00 jimz - * generalize reconstruction mechanism - * allow raid1 reconstructs via copyback (done with array - * quiesced, not online, therefore not disk-directed) - * - * Revision 1.4 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.3 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.2 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.1 1996/05/18 19:55:02 jimz - * Initial revision - * - */ #ifndef _RF__RF_COPYBACK_H_ #define _RF__RF_COPYBACK_H_ @@ -61,28 +36,26 @@ #include "rf_types.h" typedef struct RF_CopybackDesc_s { - RF_Raid_t *raidPtr; - RF_RowCol_t frow; - RF_RowCol_t fcol; - RF_RowCol_t spRow; - RF_RowCol_t spCol; - int status; - RF_StripeNum_t stripeAddr; - RF_SectorCount_t sectPerSU; - RF_SectorCount_t sectPerStripe; - char *databuf; - RF_DiskQueueData_t *readreq; - RF_DiskQueueData_t *writereq; - struct timeval starttime; -#ifndef SIMULATE - RF_MCPair_t *mcpair; -#endif /* !SIMULATE */ -} RF_CopybackDesc_t; + RF_Raid_t *raidPtr; + RF_RowCol_t frow; + RF_RowCol_t fcol; + RF_RowCol_t spRow; + RF_RowCol_t spCol; + int status; + RF_StripeNum_t stripeAddr; + RF_SectorCount_t sectPerSU; + RF_SectorCount_t sectPerStripe; + char *databuf; + RF_DiskQueueData_t *readreq; + RF_DiskQueueData_t *writereq; + struct timeval starttime; + RF_MCPair_t *mcpair; +} RF_CopybackDesc_t; extern int rf_copyback_in_progress; -int rf_ConfigureCopyback(RF_ShutdownList_t **listp); -void rf_CopybackReconstructedData(RF_Raid_t *raidPtr); -void rf_ContinueCopyback(RF_CopybackDesc_t *desc); +int rf_ConfigureCopyback(RF_ShutdownList_t ** listp); +void rf_CopybackReconstructedData(RF_Raid_t * raidPtr); +void rf_ContinueCopyback(RF_CopybackDesc_t * desc); -#endif /* !_RF__RF_COPYBACK_H_ */ +#endif /* !_RF__RF_COPYBACK_H_ */ diff --git a/sys/dev/raidframe/rf_cpuutil.c b/sys/dev/raidframe/rf_cpuutil.c index 1816740bfc3..d9b7ebb9802 100644 --- a/sys/dev/raidframe/rf_cpuutil.c +++ b/sys/dev/raidframe/rf_cpuutil.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_cpuutil.c,v 1.1 1999/01/11 14:29:03 niklas Exp $ */ -/* $NetBSD: rf_cpuutil.c,v 1.1 1998/11/13 04:20:27 oster Exp $ */ +/* $OpenBSD: rf_cpuutil.c,v 1.2 1999/02/16 00:02:27 niklas Exp $ */ +/* $NetBSD: rf_cpuutil.c,v 1.3 1999/02/05 00:06:07 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -32,164 +32,90 @@ * track cpu utilization */ -#ifdef _KERNEL -#define KERNEL -#endif - #include "rf_cpuutil.h" -#ifndef KERNEL -#include <errno.h> -#endif /* !KERNEL */ #include "rf_types.h" #include "rf_general.h" #include "rf_shutdown.h" #include "rf_sys.h" -#ifdef __osf__ -#include <sys/table.h> -#endif /* __osf__ */ -#ifdef AIX -#include <nlist.h> -#include <sys/sysinfo.h> -#endif /* AIX */ -#ifdef KERNEL -#if !defined(__NetBSD__) && !defined(__OpenBSD__) -#include <sys/dk.h> -#endif /* __NetBSD__ && !__OpenBSD__ */ -#else /* KERNEL */ -extern int table(int id, int index, void *addr, int nel, u_int lel); -#endif /* KERNEL */ -#ifdef __osf__ -static struct tbl_sysinfo start, stop; -#endif /* __osf__ */ -#ifdef AIX -static int kmem_fd; -static off_t sysinfo_offset; -static struct sysinfo sysinfo_start, sysinfo_stop; -static struct nlist namelist[] = { - {{"sysinfo"}}, - {{""}}, -}; -#endif /* AIX */ - -#ifdef AIX -static void rf_ShutdownCpuMonitor(ignored) - void *ignored; -{ - close(kmem_fd); -} -#endif /* AIX */ - -int rf_ConfigureCpuMonitor(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureCpuMonitor(listp) + RF_ShutdownList_t **listp; { #ifdef AIX - int rc; + int rc; - rc = knlist(namelist, 1, sizeof(struct nlist)); - if (rc) { - RF_ERRORMSG("Could not knlist() to config CPU monitor\n"); - return(errno); - } - if (namelist[0].n_value == 0) { - RF_ERRORMSG("Got bogus results from knlist() for CPU monitor\n"); - return(EIO); - } - sysinfo_offset = namelist[0].n_value; - kmem_fd = open("/dev/kmem", O_RDONLY); - if (kmem_fd < 0) { - perror("/dev/kmem"); - return(errno); - } - rc = rf_ShutdownCreate(listp, rf_ShutdownCpuMonitor, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownCpuMonitor(NULL); - return(rc); - } -#endif /* AIX */ - return(0); + rc = knlist(namelist, 1, sizeof(struct nlist)); + if (rc) { + RF_ERRORMSG("Could not knlist() to config CPU monitor\n"); + return (errno); + } + if (namelist[0].n_value == 0) { + RF_ERRORMSG("Got bogus results from knlist() for CPU monitor\n"); + return (EIO); + } + sysinfo_offset = namelist[0].n_value; + kmem_fd = open("/dev/kmem", O_RDONLY); + if (kmem_fd < 0) { + perror("/dev/kmem"); + return (errno); + } + rc = rf_ShutdownCreate(listp, rf_ShutdownCpuMonitor, NULL); + if (rc) { + RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + rf_ShutdownCpuMonitor(NULL); + return (rc); + } +#endif /* AIX */ + return (0); } -void rf_start_cpu_monitor() +void +rf_start_cpu_monitor() { -#ifdef __osf__ -#ifndef KERNEL - if (table(TBL_SYSINFO, 0, &start, 1, sizeof(start)) != 1) { - printf("Unable to get sysinfo for cpu utilization monitor\n"); - perror("start_cpu_monitor"); - } -#else /* !KERNEL */ - /* start.si_user = cp_time[CP_USER]; - start.si_nice = cp_time[CP_NICE]; - start.si_sys = cp_time[CP_SYS]; - start.si_idle = cp_time[CP_IDLE]; - start.wait = cp_time[CP_WAIT]; */ -#endif /* !KERNEL */ -#endif /* __osf__ */ #ifdef AIX - off_t off; - int rc; + off_t off; + int rc; - off = lseek(kmem_fd, sysinfo_offset, SEEK_SET); - RF_ASSERT(off == sysinfo_offset); - rc = read(kmem_fd, &sysinfo_start, sizeof(struct sysinfo)); - if (rc != sizeof(struct sysinfo)) { - RF_ERRORMSG2("Starting CPU monitor: rc=%d != %d\n", rc, - sizeof(struct sysinfo)); - } -#endif /* AIX */ + off = lseek(kmem_fd, sysinfo_offset, SEEK_SET); + RF_ASSERT(off == sysinfo_offset); + rc = read(kmem_fd, &sysinfo_start, sizeof(struct sysinfo)); + if (rc != sizeof(struct sysinfo)) { + RF_ERRORMSG2("Starting CPU monitor: rc=%d != %d\n", rc, + sizeof(struct sysinfo)); + } +#endif /* AIX */ } -void rf_stop_cpu_monitor() +void +rf_stop_cpu_monitor() { -#ifdef __osf__ -#ifndef KERNEL - if (table(TBL_SYSINFO, 0, &stop, 1, sizeof(stop)) != 1) { - printf("Unable to get sysinfo for cpu utilization monitor\n"); - perror("stop_cpu_monitor"); - } -#else /* !KERNEL */ - /* stop.si_user = cp_time[CP_USER]; - stop.si_nice = cp_time[CP_NICE]; - stop.si_sys = cp_time[CP_SYS]; - stop.si_idle = cp_time[CP_IDLE]; - stop.wait = cp_time[CP_WAIT]; */ -#endif /* !KERNEL */ -#endif /* __osf__ */ #ifdef AIX - off_t off; - int rc; + off_t off; + int rc; - off = lseek(kmem_fd, sysinfo_offset, SEEK_SET); - RF_ASSERT(off == sysinfo_offset); - rc = read(kmem_fd, &sysinfo_stop, sizeof(struct sysinfo)); - if (rc != sizeof(struct sysinfo)) { - RF_ERRORMSG2("Stopping CPU monitor: rc=%d != %d\n", rc, - sizeof(struct sysinfo)); - } -#endif /* AIX */ + off = lseek(kmem_fd, sysinfo_offset, SEEK_SET); + RF_ASSERT(off == sysinfo_offset); + rc = read(kmem_fd, &sysinfo_stop, sizeof(struct sysinfo)); + if (rc != sizeof(struct sysinfo)) { + RF_ERRORMSG2("Stopping CPU monitor: rc=%d != %d\n", rc, + sizeof(struct sysinfo)); + } +#endif /* AIX */ } -void rf_print_cpu_util(s) - char *s; +void +rf_print_cpu_util(s) + char *s; { -#ifdef __osf__ - long totalticks, idleticks; - - idleticks = stop.si_idle - start.si_idle + stop.wait - start.wait; - totalticks = stop.si_user - start.si_user + stop.si_nice - start.si_nice + - stop.si_sys - start.si_sys + idleticks; - printf("CPU utilization during %s was %d %%\n", s, 100 - 100*idleticks/totalticks); -#endif /* __osf__ */ #ifdef AIX - long idle; + long idle; - /* XXX compute a percentage here */ - idle = (long)(sysinfo_stop.cpu[CPU_IDLE] - sysinfo_start.cpu[CPU_IDLE]); - printf("%ld idle ticks during %s.\n", idle, s); -#endif /* AIX */ + /* XXX compute a percentage here */ + idle = (long) (sysinfo_stop.cpu[CPU_IDLE] - sysinfo_start.cpu[CPU_IDLE]); + printf("%ld idle ticks during %s.\n", idle, s); +#endif /* AIX */ } diff --git a/sys/dev/raidframe/rf_cpuutil.h b/sys/dev/raidframe/rf_cpuutil.h index 72603d9aae6..b1bdac4b8a2 100644 --- a/sys/dev/raidframe/rf_cpuutil.h +++ b/sys/dev/raidframe/rf_cpuutil.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_cpuutil.h,v 1.1 1999/01/11 14:29:03 niklas Exp $ */ -/* $NetBSD: rf_cpuutil.h,v 1.1 1998/11/13 04:20:27 oster Exp $ */ +/* $OpenBSD: rf_cpuutil.h,v 1.2 1999/02/16 00:02:27 niklas Exp $ */ +/* $NetBSD: rf_cpuutil.h,v 1.2 1999/02/05 00:06:07 oster Exp $ */ /* * rf_cpuutil.h */ @@ -30,8 +30,8 @@ * rights to redistribute these changes. */ /* - * : - * Log: rf_cpuutil.h,v + * : + * Log: rf_cpuutil.h,v * Revision 1.3 1996/07/18 22:57:14 jimz * port simulator to AIX * @@ -49,9 +49,9 @@ #include "rf_types.h" -int rf_ConfigureCpuMonitor(RF_ShutdownList_t **listp); -void rf_start_cpu_monitor(void); -void rf_stop_cpu_monitor(void); -void rf_print_cpu_util(char *s); +int rf_ConfigureCpuMonitor(RF_ShutdownList_t ** listp); +void rf_start_cpu_monitor(void); +void rf_stop_cpu_monitor(void); +void rf_print_cpu_util(char *s); -#endif /* !_RF__RF_CPUUTIL_H_ */ +#endif /* !_RF__RF_CPUUTIL_H_ */ diff --git a/sys/dev/raidframe/rf_cvscan.c b/sys/dev/raidframe/rf_cvscan.c index 73a6e64d001..4076883cfa0 100644 --- a/sys/dev/raidframe/rf_cvscan.c +++ b/sys/dev/raidframe/rf_cvscan.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_cvscan.c,v 1.1 1999/01/11 14:29:05 niklas Exp $ */ -/* $NetBSD: rf_cvscan.c,v 1.2 1998/11/18 15:13:51 oster Exp $ */ +/* $OpenBSD: rf_cvscan.c,v 1.2 1999/02/16 00:02:28 niklas Exp $ */ +/* $NetBSD: rf_cvscan.c,v 1.4 1999/02/05 00:06:07 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,39 +29,12 @@ /******************************************************************************* * - * cvscan.c -- prioritized cvscan disk queueing code. + * cvscan.c -- prioritized cvscan disk queueing code. * * Nov 9, 1994, adapted from raidSim version (MCH) * ******************************************************************************/ -/* - * : - * Log: rf_cvscan.c,v - * Revision 1.6 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.5 1996/07/15 17:22:18 jimz - * nit-pick code cleanup - * resolve stdlib problems on DEC OSF - * - * Revision 1.4 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.3 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.2 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.1 1996/06/05 19:17:40 jimz - * Initial revision - * - */ - #include "rf_types.h" #include "rf_alloclist.h" #include "rf_stripelocks.h" @@ -76,34 +49,33 @@ #define pri_ok(p) ( ((p) == RF_IO_NORMAL_PRIORITY) || ((p) == RF_IO_LOW_PRIORITY)) -static void CheckCvscanState(RF_CvscanHeader_t *hdr, char *file, int line) +static void +CheckCvscanState(RF_CvscanHeader_t * hdr, char *file, int line) { - long i, key; + long i, key; RF_DiskQueueData_t *tmp; - if( hdr->left != (RF_DiskQueueData_t *) NULL ) - RF_ASSERT( hdr->left->sectorOffset < hdr->cur_block ); - for( key=hdr->cur_block, i=0, tmp=hdr->left; - tmp != (RF_DiskQueueData_t *) NULL; - key=tmp->sectorOffset, i++, tmp=tmp->next ) - RF_ASSERT( tmp->sectorOffset <= key - && tmp->priority == hdr->nxt_priority && pri_ok(tmp->priority) ); - RF_ASSERT( i == hdr->left_cnt ); - - for( key=hdr->cur_block, i=0, tmp=hdr->right; - tmp != (RF_DiskQueueData_t *) NULL; - key=tmp->sectorOffset, i++, tmp=tmp->next ) - { + if (hdr->left != (RF_DiskQueueData_t *) NULL) + RF_ASSERT(hdr->left->sectorOffset < hdr->cur_block); + for (key = hdr->cur_block, i = 0, tmp = hdr->left; + tmp != (RF_DiskQueueData_t *) NULL; + key = tmp->sectorOffset, i++, tmp = tmp->next) + RF_ASSERT(tmp->sectorOffset <= key + && tmp->priority == hdr->nxt_priority && pri_ok(tmp->priority)); + RF_ASSERT(i == hdr->left_cnt); + + for (key = hdr->cur_block, i = 0, tmp = hdr->right; + tmp != (RF_DiskQueueData_t *) NULL; + key = tmp->sectorOffset, i++, tmp = tmp->next) { RF_ASSERT(key <= tmp->sectorOffset); RF_ASSERT(tmp->priority == hdr->nxt_priority); RF_ASSERT(pri_ok(tmp->priority)); } - RF_ASSERT( i == hdr->right_cnt ); + RF_ASSERT(i == hdr->right_cnt); - for( key=hdr->nxt_priority-1, tmp=hdr->burner; - tmp != (RF_DiskQueueData_t *) NULL; - key=tmp->priority, tmp=tmp->next ) - { + for (key = hdr->nxt_priority - 1, tmp = hdr->burner; + tmp != (RF_DiskQueueData_t *) NULL; + key = tmp->priority, tmp = tmp->next) { RF_ASSERT(tmp); RF_ASSERT(hdr); RF_ASSERT(pri_ok(tmp->priority)); @@ -114,73 +86,76 @@ static void CheckCvscanState(RF_CvscanHeader_t *hdr, char *file, int line) -static void PriorityInsert(RF_DiskQueueData_t **list_ptr, RF_DiskQueueData_t *req ) +static void +PriorityInsert(RF_DiskQueueData_t ** list_ptr, RF_DiskQueueData_t * req) { - /* - ** insert block pointed to by req in to list whose first - ** entry is pointed to by the pointer that list_ptr points to - ** ie., list_ptr is a grandparent of the first entry - */ - - for( ; (*list_ptr)!=(RF_DiskQueueData_t *)NULL && - (*list_ptr)->priority > req->priority; - list_ptr = &((*list_ptr)->next) ) {} + /* * insert block pointed to by req in to list whose first * entry is + * pointed to by the pointer that list_ptr points to * ie., list_ptr + * is a grandparent of the first entry */ + + for (; (*list_ptr) != (RF_DiskQueueData_t *) NULL && + (*list_ptr)->priority > req->priority; + list_ptr = &((*list_ptr)->next)) { + } req->next = (*list_ptr); (*list_ptr) = req; } -static void ReqInsert(RF_DiskQueueData_t **list_ptr, RF_DiskQueueData_t *req, RF_CvscanArmDir_t order) +static void +ReqInsert(RF_DiskQueueData_t ** list_ptr, RF_DiskQueueData_t * req, RF_CvscanArmDir_t order) { - /* - ** insert block pointed to by req in to list whose first - ** entry is pointed to by the pointer that list_ptr points to - ** ie., list_ptr is a grandparent of the first entry - */ + /* * insert block pointed to by req in to list whose first * entry is + * pointed to by the pointer that list_ptr points to * ie., list_ptr + * is a grandparent of the first entry */ - for( ; (*list_ptr)!=(RF_DiskQueueData_t *)NULL && + for (; (*list_ptr) != (RF_DiskQueueData_t *) NULL && - ( (order==rf_cvscan_RIGHT && (*list_ptr)->sectorOffset <= req->sectorOffset) - || (order==rf_cvscan_LEFT && (*list_ptr)->sectorOffset > req->sectorOffset) ); - list_ptr = &((*list_ptr)->next) ) {} + ((order == rf_cvscan_RIGHT && (*list_ptr)->sectorOffset <= req->sectorOffset) + || (order == rf_cvscan_LEFT && (*list_ptr)->sectorOffset > req->sectorOffset)); + list_ptr = &((*list_ptr)->next)) { + } req->next = (*list_ptr); (*list_ptr) = req; } -static RF_DiskQueueData_t *ReqDequeue(RF_DiskQueueData_t **list_ptr) +static RF_DiskQueueData_t * +ReqDequeue(RF_DiskQueueData_t ** list_ptr) { - RF_DiskQueueData_t * ret = (*list_ptr); - if( (*list_ptr) != (RF_DiskQueueData_t *) NULL ) { + RF_DiskQueueData_t *ret = (*list_ptr); + if ((*list_ptr) != (RF_DiskQueueData_t *) NULL) { (*list_ptr) = (*list_ptr)->next; } - return( ret ); + return (ret); } -static void ReBalance(RF_CvscanHeader_t *hdr) +static void +ReBalance(RF_CvscanHeader_t * hdr) { /* DO_CHECK_STATE(hdr); */ - while( hdr->right != (RF_DiskQueueData_t *) NULL - && hdr->right->sectorOffset < hdr->cur_block ) { + while (hdr->right != (RF_DiskQueueData_t *) NULL + && hdr->right->sectorOffset < hdr->cur_block) { hdr->right_cnt--; hdr->left_cnt++; - ReqInsert( &hdr->left, ReqDequeue( &hdr->right ), rf_cvscan_LEFT ); + ReqInsert(&hdr->left, ReqDequeue(&hdr->right), rf_cvscan_LEFT); } /* DO_CHECK_STATE(hdr); */ } -static void Transfer(RF_DiskQueueData_t **to_list_ptr, RF_DiskQueueData_t **from_list_ptr ) +static void +Transfer(RF_DiskQueueData_t ** to_list_ptr, RF_DiskQueueData_t ** from_list_ptr) { RF_DiskQueueData_t *gp; - for( gp=(*from_list_ptr); gp != (RF_DiskQueueData_t *) NULL; ) { + for (gp = (*from_list_ptr); gp != (RF_DiskQueueData_t *) NULL;) { RF_DiskQueueData_t *p = gp->next; - PriorityInsert( to_list_ptr, gp ); + PriorityInsert(to_list_ptr, gp); gp = p; } (*from_list_ptr) = (RF_DiskQueueData_t *) NULL; @@ -188,37 +163,38 @@ static void Transfer(RF_DiskQueueData_t **to_list_ptr, RF_DiskQueueData_t **from -static void RealEnqueue(RF_CvscanHeader_t *hdr, RF_DiskQueueData_t *req) +static void +RealEnqueue(RF_CvscanHeader_t * hdr, RF_DiskQueueData_t * req) { RF_ASSERT(req->priority == RF_IO_NORMAL_PRIORITY || req->priority == RF_IO_LOW_PRIORITY); - + DO_CHECK_STATE(hdr); - if( hdr->left_cnt == 0 && hdr->right_cnt == 0 ) { + if (hdr->left_cnt == 0 && hdr->right_cnt == 0) { hdr->nxt_priority = req->priority; } - if( req->priority > hdr->nxt_priority ) { + if (req->priority > hdr->nxt_priority) { /* ** dump all other outstanding requests on the back burner */ - Transfer( &hdr->burner, &hdr->left ); - Transfer( &hdr->burner, &hdr->right ); + Transfer(&hdr->burner, &hdr->left); + Transfer(&hdr->burner, &hdr->right); hdr->left_cnt = 0; hdr->right_cnt = 0; hdr->nxt_priority = req->priority; } - if( req->priority < hdr->nxt_priority ) { + if (req->priority < hdr->nxt_priority) { /* ** yet another low priority task! */ - PriorityInsert( &hdr->burner, req ); + PriorityInsert(&hdr->burner, req); } else { - if( req->sectorOffset < hdr->cur_block ) { + if (req->sectorOffset < hdr->cur_block) { /* this request is to the left of the current arms */ - ReqInsert( &hdr->left, req, rf_cvscan_LEFT ); + ReqInsert(&hdr->left, req, rf_cvscan_LEFT); hdr->left_cnt++; } else { /* this request is to the right of the current arms */ - ReqInsert( &hdr->right, req, rf_cvscan_RIGHT ); + ReqInsert(&hdr->right, req, rf_cvscan_RIGHT); hdr->right_cnt++; } } @@ -227,106 +203,110 @@ static void RealEnqueue(RF_CvscanHeader_t *hdr, RF_DiskQueueData_t *req) -void rf_CvscanEnqueue(void *q_in, RF_DiskQueueData_t *elem, int priority) +void +rf_CvscanEnqueue(void *q_in, RF_DiskQueueData_t * elem, int priority) { - RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in; - RealEnqueue( hdr, elem /*req*/ ); + RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in; + RealEnqueue(hdr, elem /* req */ ); } -RF_DiskQueueData_t *rf_CvscanDequeue(void *q_in) +RF_DiskQueueData_t * +rf_CvscanDequeue(void *q_in) { - RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in; - long range, i, sum_dist_left, sum_dist_right; + RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in; + long range, i, sum_dist_left, sum_dist_right; RF_DiskQueueData_t *ret; RF_DiskQueueData_t *tmp; DO_CHECK_STATE(hdr); - - if( hdr->left_cnt == 0 && hdr->right_cnt == 0 ) return( (RF_DiskQueueData_t *) NULL ); - - range = RF_MIN( hdr->range_for_avg, RF_MIN(hdr->left_cnt,hdr->right_cnt)); - for( i=0, tmp=hdr->left, sum_dist_left= - ((hdr->direction==rf_cvscan_RIGHT)?range*hdr->change_penalty:0); - tmp != (RF_DiskQueueData_t *) NULL && i < range; - tmp = tmp->next, i++ ) { + + if (hdr->left_cnt == 0 && hdr->right_cnt == 0) + return ((RF_DiskQueueData_t *) NULL); + + range = RF_MIN(hdr->range_for_avg, RF_MIN(hdr->left_cnt, hdr->right_cnt)); + for (i = 0, tmp = hdr->left, sum_dist_left = + ((hdr->direction == rf_cvscan_RIGHT) ? range * hdr->change_penalty : 0); + tmp != (RF_DiskQueueData_t *) NULL && i < range; + tmp = tmp->next, i++) { sum_dist_left += hdr->cur_block - tmp->sectorOffset; } - for( i=0, tmp=hdr->right, sum_dist_right= - ((hdr->direction==rf_cvscan_LEFT)?range*hdr->change_penalty:0); - tmp != (RF_DiskQueueData_t *) NULL && i < range; - tmp = tmp->next, i++ ) { + for (i = 0, tmp = hdr->right, sum_dist_right = + ((hdr->direction == rf_cvscan_LEFT) ? range * hdr->change_penalty : 0); + tmp != (RF_DiskQueueData_t *) NULL && i < range; + tmp = tmp->next, i++) { sum_dist_right += tmp->sectorOffset - hdr->cur_block; } - if( hdr->right_cnt == 0 || sum_dist_left < sum_dist_right ) { + if (hdr->right_cnt == 0 || sum_dist_left < sum_dist_right) { hdr->direction = rf_cvscan_LEFT; hdr->cur_block = hdr->left->sectorOffset + hdr->left->numSector; - hdr->left_cnt = RF_MAX(hdr->left_cnt-1,0); + hdr->left_cnt = RF_MAX(hdr->left_cnt - 1, 0); tmp = hdr->left; - ret = (ReqDequeue(&hdr->left))/*->parent*/; + ret = (ReqDequeue(&hdr->left)) /*->parent*/ ; } else { hdr->direction = rf_cvscan_RIGHT; hdr->cur_block = hdr->right->sectorOffset + hdr->right->numSector; - hdr->right_cnt = RF_MAX(hdr->right_cnt-1,0); + hdr->right_cnt = RF_MAX(hdr->right_cnt - 1, 0); tmp = hdr->right; - ret = (ReqDequeue(&hdr->right))/*->parent*/; + ret = (ReqDequeue(&hdr->right)) /*->parent*/ ; } - ReBalance( hdr ); + ReBalance(hdr); - if( hdr->left_cnt == 0 && hdr->right_cnt == 0 - && hdr->burner != (RF_DiskQueueData_t *) NULL ) { + if (hdr->left_cnt == 0 && hdr->right_cnt == 0 + && hdr->burner != (RF_DiskQueueData_t *) NULL) { /* ** restore low priority requests for next dequeue */ RF_DiskQueueData_t *burner = hdr->burner; hdr->nxt_priority = burner->priority; - while( burner != (RF_DiskQueueData_t *) NULL - && burner->priority == hdr->nxt_priority ) { + while (burner != (RF_DiskQueueData_t *) NULL + && burner->priority == hdr->nxt_priority) { RF_DiskQueueData_t *next = burner->next; - RealEnqueue( hdr, burner ); + RealEnqueue(hdr, burner); burner = next; } hdr->burner = burner; } DO_CHECK_STATE(hdr); - return( ret ); + return (ret); } -RF_DiskQueueData_t *rf_CvscanPeek(void *q_in) +RF_DiskQueueData_t * +rf_CvscanPeek(void *q_in) { - RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in; - long range, i, sum_dist_left, sum_dist_right; - RF_DiskQueueData_t *tmp, *headElement; + RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in; + long range, i, sum_dist_left, sum_dist_right; + RF_DiskQueueData_t *tmp, *headElement; - DO_CHECK_STATE(hdr); - - if( hdr->left_cnt == 0 && hdr->right_cnt == 0 ) - headElement = NULL; - else { - range = RF_MIN( hdr->range_for_avg, RF_MIN(hdr->left_cnt,hdr->right_cnt)); - for( i=0, tmp=hdr->left, sum_dist_left= - ((hdr->direction==rf_cvscan_RIGHT)?range*hdr->change_penalty:0); - tmp != (RF_DiskQueueData_t *) NULL && i < range; - tmp = tmp->next, i++ ) { - sum_dist_left += hdr->cur_block - tmp->sectorOffset; - } - for( i=0, tmp=hdr->right, sum_dist_right= - ((hdr->direction==rf_cvscan_LEFT)?range*hdr->change_penalty:0); - tmp != (RF_DiskQueueData_t *) NULL && i < range; - tmp = tmp->next, i++ ) { - sum_dist_right += tmp->sectorOffset - hdr->cur_block; - } - - if( hdr->right_cnt == 0 || sum_dist_left < sum_dist_right ) - headElement = hdr->left; - else - headElement = hdr->right; - } - return(headElement); + DO_CHECK_STATE(hdr); + + if (hdr->left_cnt == 0 && hdr->right_cnt == 0) + headElement = NULL; + else { + range = RF_MIN(hdr->range_for_avg, RF_MIN(hdr->left_cnt, hdr->right_cnt)); + for (i = 0, tmp = hdr->left, sum_dist_left = + ((hdr->direction == rf_cvscan_RIGHT) ? range * hdr->change_penalty : 0); + tmp != (RF_DiskQueueData_t *) NULL && i < range; + tmp = tmp->next, i++) { + sum_dist_left += hdr->cur_block - tmp->sectorOffset; + } + for (i = 0, tmp = hdr->right, sum_dist_right = + ((hdr->direction == rf_cvscan_LEFT) ? range * hdr->change_penalty : 0); + tmp != (RF_DiskQueueData_t *) NULL && i < range; + tmp = tmp->next, i++) { + sum_dist_right += tmp->sectorOffset - hdr->cur_block; + } + + if (hdr->right_cnt == 0 || sum_dist_left < sum_dist_right) + headElement = hdr->left; + else + headElement = hdr->right; + } + return (headElement); } @@ -339,25 +319,27 @@ RF_DiskQueueData_t *rf_CvscanPeek(void *q_in) */ -int rf_CvscanConfigure() +int +rf_CvscanConfigure() { - return(0); + return (0); } -void *rf_CvscanCreate(RF_SectorCount_t sectPerDisk, - RF_AllocListElem_t *clList, - RF_ShutdownList_t **listp) +void * +rf_CvscanCreate(RF_SectorCount_t sectPerDisk, + RF_AllocListElem_t * clList, + RF_ShutdownList_t ** listp) { RF_CvscanHeader_t *hdr; - long range = 2; /* Currently no mechanism to change these */ - long penalty = sectPerDisk / 5; + long range = 2; /* Currently no mechanism to change these */ + long penalty = sectPerDisk / 5; RF_MallocAndAdd(hdr, sizeof(RF_CvscanHeader_t), (RF_CvscanHeader_t *), clList); - bzero((char *)hdr, sizeof(RF_CvscanHeader_t)); - hdr->range_for_avg = RF_MAX( range, 1 ); - hdr->change_penalty = RF_MAX( penalty, 0 ); + bzero((char *) hdr, sizeof(RF_CvscanHeader_t)); + hdr->range_for_avg = RF_MAX(range, 1); + hdr->change_penalty = RF_MAX(penalty, 0); hdr->direction = rf_cvscan_RIGHT; hdr->cur_block = 0; hdr->left_cnt = hdr->right_cnt = 0; @@ -365,43 +347,44 @@ void *rf_CvscanCreate(RF_SectorCount_t sectPerDisk, hdr->burner = (RF_DiskQueueData_t *) NULL; DO_CHECK_STATE(hdr); - return( (void *) hdr ); + return ((void *) hdr); } #if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) /* PrintCvscanQueue is not used, so we ignore it... */ #else -static void PrintCvscanQueue(RF_CvscanHeader_t *hdr) -{ - RF_DiskQueueData_t *tmp; - - printf( "CVSCAN(%d,%d) at %d going %s\n", - (int)hdr->range_for_avg, - (int)hdr->change_penalty, - (int)hdr->cur_block, - (hdr->direction==rf_cvscan_LEFT)?"LEFT":"RIGHT" ); - printf( "\tLeft(%d): ", hdr->left_cnt ); - for( tmp = hdr->left; tmp != (RF_DiskQueueData_t *) NULL; tmp = tmp->next) - printf( "(%d,%ld,%d) ", - (int) tmp->sectorOffset, - (long) (tmp->sectorOffset + tmp->numSector), - tmp->priority ); - printf( "\n" ); - printf( "\tRight(%d): ", hdr->right_cnt ); - for( tmp = hdr->right; tmp != (RF_DiskQueueData_t *) NULL; tmp = tmp->next) - printf( "(%d,%ld,%d) ", - (int) tmp->sectorOffset, - (long) (tmp->sectorOffset + tmp->numSector), - tmp->priority ); - printf( "\n" ); - printf( "\tBurner: " ); - for( tmp = hdr->burner; tmp != (RF_DiskQueueData_t *) NULL; tmp = tmp->next) - printf( "(%d,%ld,%d) ", - (int) tmp->sectorOffset, - (long) (tmp->sectorOffset + tmp->numSector), - tmp->priority ); - printf( "\n" ); ++ static void ++ PrintCvscanQueue(RF_CvscanHeader_t * hdr) + { + RF_DiskQueueData_t *tmp; + + printf("CVSCAN(%d,%d) at %d going %s\n", + (int) hdr->range_for_avg, + (int) hdr->change_penalty, + (int) hdr->cur_block, + (hdr->direction == rf_cvscan_LEFT) ? "LEFT" : "RIGHT"); + printf("\tLeft(%d): ", hdr->left_cnt); + for (tmp = hdr->left; tmp != (RF_DiskQueueData_t *) NULL; tmp = tmp->next) + printf("(%d,%ld,%d) ", + (int) tmp->sectorOffset, + (long) (tmp->sectorOffset + tmp->numSector), + tmp->priority); + printf("\n"); + printf("\tRight(%d): ", hdr->right_cnt); + for (tmp = hdr->right; tmp != (RF_DiskQueueData_t *) NULL; tmp = tmp->next) + printf("(%d,%ld,%d) ", + (int) tmp->sectorOffset, + (long) (tmp->sectorOffset + tmp->numSector), + tmp->priority); + printf("\n"); + printf("\tBurner: "); + for (tmp = hdr->burner; tmp != (RF_DiskQueueData_t *) NULL; tmp = tmp->next) + printf("(%d,%ld,%d) ", + (int) tmp->sectorOffset, + (long) (tmp->sectorOffset + tmp->numSector), + tmp->priority); + printf("\n"); } #endif @@ -411,40 +394,47 @@ static void PrintCvscanQueue(RF_CvscanHeader_t *hdr) * only have one or zero entries in the burner queue, so execution time should * be short. */ -int rf_CvscanPromote(void *q_in, RF_StripeNum_t parityStripeID, RF_ReconUnitNum_t which_ru) +int +rf_CvscanPromote(void *q_in, RF_StripeNum_t parityStripeID, RF_ReconUnitNum_t which_ru) { - RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in; - RF_DiskQueueData_t *trailer = NULL, *tmp = hdr->burner, *tlist = NULL; - int retval=0; + RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in; + RF_DiskQueueData_t *trailer = NULL, *tmp = hdr->burner, *tlist = NULL; + int retval = 0; DO_CHECK_STATE(hdr); - while (tmp) { /* handle entries at the front of the list */ - if (tmp->parityStripeID == parityStripeID && tmp->which_ru == which_ru) { - hdr->burner = tmp->next; - tmp->priority = RF_IO_NORMAL_PRIORITY; - tmp->next = tlist; tlist=tmp; - tmp = hdr->burner; - } else break; - } - if (tmp) {trailer=tmp; tmp=tmp->next;} - while (tmp) { /* handle entries on the rest of the list */ - if (tmp->parityStripeID == parityStripeID && tmp->which_ru == which_ru) { - trailer->next = tmp->next; - tmp->priority = RF_IO_NORMAL_PRIORITY; - tmp->next = tlist; tlist=tmp; /* insert on a temp queue */ - tmp = trailer->next; - } else { - trailer=tmp; tmp=tmp->next; + while (tmp) { /* handle entries at the front of the list */ + if (tmp->parityStripeID == parityStripeID && tmp->which_ru == which_ru) { + hdr->burner = tmp->next; + tmp->priority = RF_IO_NORMAL_PRIORITY; + tmp->next = tlist; + tlist = tmp; + tmp = hdr->burner; + } else + break; + } + if (tmp) { + trailer = tmp; + tmp = tmp->next; + } + while (tmp) { /* handle entries on the rest of the list */ + if (tmp->parityStripeID == parityStripeID && tmp->which_ru == which_ru) { + trailer->next = tmp->next; + tmp->priority = RF_IO_NORMAL_PRIORITY; + tmp->next = tlist; + tlist = tmp; /* insert on a temp queue */ + tmp = trailer->next; + } else { + trailer = tmp; + tmp = tmp->next; + } } - } - while (tlist) { - retval++; - tmp = tlist->next; - RealEnqueue(hdr, tlist); - tlist = tmp; - } - RF_ASSERT(retval==0 || retval==1); - DO_CHECK_STATE((RF_CvscanHeader_t *)q_in); - return(retval); + while (tlist) { + retval++; + tmp = tlist->next; + RealEnqueue(hdr, tlist); + tlist = tmp; + } + RF_ASSERT(retval == 0 || retval == 1); + DO_CHECK_STATE((RF_CvscanHeader_t *) q_in); + return (retval); } - diff --git a/sys/dev/raidframe/rf_cvscan.h b/sys/dev/raidframe/rf_cvscan.h index 4347fb06a63..4175865e6d6 100644 --- a/sys/dev/raidframe/rf_cvscan.h +++ b/sys/dev/raidframe/rf_cvscan.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_cvscan.h,v 1.1 1999/01/11 14:29:06 niklas Exp $ */ -/* $NetBSD: rf_cvscan.h,v 1.1 1998/11/13 04:20:27 oster Exp $ */ +/* $OpenBSD: rf_cvscan.h,v 1.2 1999/02/16 00:02:28 niklas Exp $ */ +/* $NetBSD: rf_cvscan.h,v 1.3 1999/02/05 00:06:07 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -48,50 +48,38 @@ ** share the same, highest priority level. */ -/* : - * Log: rf_cvscan.h,v - * Revision 1.3 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.2 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.1 1996/06/05 19:17:40 jimz - * Initial revision - * - */ - #ifndef _RF__RF_CVSCAN_H_ #define _RF__RF_CVSCAN_H_ #include "rf_diskqueue.h" typedef enum RF_CvscanArmDir_e { - rf_cvscan_LEFT, - rf_cvscan_RIGHT -} RF_CvscanArmDir_t; + rf_cvscan_LEFT, + rf_cvscan_RIGHT +} RF_CvscanArmDir_t; typedef struct RF_CvscanHeader_s { - long range_for_avg; /* CVSCAN param N */ - long change_penalty; /* CVSCAN param R */ - RF_CvscanArmDir_t direction; - RF_SectorNum_t cur_block; - int nxt_priority; - RF_DiskQueueData_t *left; - int left_cnt; - RF_DiskQueueData_t *right; - int right_cnt; - RF_DiskQueueData_t *burner; -} RF_CvscanHeader_t; + long range_for_avg; /* CVSCAN param N */ + long change_penalty; /* CVSCAN param R */ + RF_CvscanArmDir_t direction; + RF_SectorNum_t cur_block; + int nxt_priority; + RF_DiskQueueData_t *left; + int left_cnt; + RF_DiskQueueData_t *right; + int right_cnt; + RF_DiskQueueData_t *burner; +} RF_CvscanHeader_t; -int rf_CvscanConfigure(void); -void *rf_CvscanCreate(RF_SectorCount_t sect_per_disk, - RF_AllocListElem_t *cl_list, RF_ShutdownList_t **listp); -void rf_CvscanEnqueue(void *qptr, RF_DiskQueueData_t *req, int priority); +int rf_CvscanConfigure(void); +void * +rf_CvscanCreate(RF_SectorCount_t sect_per_disk, + RF_AllocListElem_t * cl_list, RF_ShutdownList_t ** listp); +void rf_CvscanEnqueue(void *qptr, RF_DiskQueueData_t * req, int priority); RF_DiskQueueData_t *rf_CvscanDequeue(void *qptr); RF_DiskQueueData_t *rf_CvscanPeek(void *qptr); -int rf_CvscanPromote(void *qptr, RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru); +int +rf_CvscanPromote(void *qptr, RF_StripeNum_t parityStripeID, + RF_ReconUnitNum_t which_ru); -#endif /* !_RF__RF_CVSCAN_H_ */ +#endif /* !_RF__RF_CVSCAN_H_ */ diff --git a/sys/dev/raidframe/rf_dag.h b/sys/dev/raidframe/rf_dag.h index f13fc3f76c3..4200e3bb193 100644 --- a/sys/dev/raidframe/rf_dag.h +++ b/sys/dev/raidframe/rf_dag.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_dag.h,v 1.1 1999/01/11 14:29:06 niklas Exp $ */ -/* $NetBSD: rf_dag.h,v 1.1 1998/11/13 04:20:27 oster Exp $ */ +/* $OpenBSD: rf_dag.h,v 1.2 1999/02/16 00:02:28 niklas Exp $ */ +/* $NetBSD: rf_dag.h,v 1.3 1999/02/05 00:06:07 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -32,106 +32,6 @@ * dag.h -- header file for DAG-related data structures * * * ****************************************************************************/ -/* - * - * : - * Log: rf_dag.h,v - * Revision 1.35 1996/11/05 18:38:37 jimz - * add patch from galvarez@cs.ucsd.edu (Guillermo Alvarez) - * to fix dag_params memory-sizing problem (should be an array - * of the type, not an array of pointers to the type) - * - * Revision 1.34 1996/07/28 20:31:39 jimz - * i386netbsd port - * true/false fixup - * - * Revision 1.33 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.32 1996/06/10 22:22:13 wvcii - * added two node status types for use in backward error - * recovery experiments. - * - * Revision 1.31 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.30 1996/06/07 22:49:18 jimz - * fix up raidPtr typing - * - * Revision 1.29 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.28 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.27 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.26 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.25 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.24 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.23 1996/05/16 23:05:20 jimz - * Added dag_ptrs field, RF_DAG_PTRCACHESIZE - * - * The dag_ptrs field of the node is basically some scribble - * space to be used here. We could get rid of it, and always - * allocate the range of pointers, but that's expensive. So, - * we pick a "common case" size for the pointer cache. Hopefully, - * we'll find that: - * (1) Generally, nptrs doesn't exceed RF_DAG_PTRCACHESIZE by - * only a little bit (least efficient case) - * (2) Generally, ntprs isn't a lot less than RF_DAG_PTRCACHESIZE - * (wasted memory) - * - * Revision 1.22 1996/05/08 21:01:24 jimz - * fixed up enum type names that were conflicting with other - * enums and function names (ie, "panic") - * future naming trends will be towards RF_ and rf_ for - * everything raidframe-related - * - * Revision 1.21 1996/05/08 15:23:47 wvcii - * added new node states: undone, recover, panic - * - * Revision 1.20 1995/12/01 14:59:19 root - * increased MAX_ANTECEDENTS from 10 to 20 - * should consider getting rid of this (eliminate static array) - * - * Revision 1.19 1995/11/30 15:58:59 wvcii - * added copyright info - * - * Revision 1.18 1995/11/19 16:27:03 wvcii - * created struct dagList - * - * Revision 1.17 1995/11/07 15:43:01 wvcii - * added static array to DAGnode: antType - * added commitNode type - * added commit node counts to dag header - * added ptr (firstDag) to support multi-dag requests - * added succedent done/fired counts to nodes to support rollback - * added node status type "skipped" - * added hdr status types "rollForward, rollBackward" - * deleted hdr status type "disable" - * updated ResetNode & ResetDAGHeader to zero new fields - * - */ #ifndef _RF__RF_DAG_H_ #define _RF__RF_DAG_H_ @@ -145,143 +45,168 @@ #include "rf_acctrace.h" #include "rf_memchunk.h" -#define RF_THREAD_CONTEXT 0 /* we were invoked from thread context */ -#define RF_INTR_CONTEXT 1 /* we were invoked from interrupt context */ -#define RF_MAX_ANTECEDENTS 20 /* max num of antecedents a node may posses */ +#define RF_THREAD_CONTEXT 0 /* we were invoked from thread context */ +#define RF_INTR_CONTEXT 1 /* we were invoked from interrupt context */ +#define RF_MAX_ANTECEDENTS 20 /* max num of antecedents a node may posses */ -#ifdef KERNEL #include <sys/buf.h> -#endif /* KERNEL */ -struct RF_PropHeader_s { /* structure for propagation of results */ - int resultNum; /* bind result # resultNum */ - int paramNum; /* to parameter # paramNum */ - RF_PropHeader_t *next; /* linked list for multiple results/params */ +struct RF_PropHeader_s { /* structure for propagation of results */ + int resultNum; /* bind result # resultNum */ + int paramNum; /* to parameter # paramNum */ + RF_PropHeader_t *next; /* linked list for multiple results/params */ }; typedef enum RF_NodeStatus_e { - rf_bwd1, /* node is ready for undo logging (backward error recovery only) */ - rf_bwd2, /* node has completed undo logging (backward error recovery only) */ - rf_wait, /* node is waiting to be executed */ - rf_fired, /* node is currently executing its do function */ - rf_good, /* node successfully completed execution of its do function */ - rf_bad, /* node failed to successfully execute its do function */ - rf_skipped, /* not used anymore, used to imply a node was not executed */ - rf_recover, /* node is currently executing its undo function */ - rf_panic, /* node failed to successfully execute its undo function */ - rf_undone /* node successfully executed its undo function */ -} RF_NodeStatus_t; - + rf_bwd1, /* node is ready for undo logging (backward + * error recovery only) */ + rf_bwd2, /* node has completed undo logging (backward + * error recovery only) */ + rf_wait, /* node is waiting to be executed */ + rf_fired, /* node is currently executing its do function */ + rf_good, /* node successfully completed execution of + * its do function */ + rf_bad, /* node failed to successfully execute its do + * function */ + rf_skipped, /* not used anymore, used to imply a node was + * not executed */ + rf_recover, /* node is currently executing its undo + * function */ + rf_panic, /* node failed to successfully execute its + * undo function */ + rf_undone /* node successfully executed its undo + * function */ +} RF_NodeStatus_t; /* * These were used to control skipping a node. * Now, these are only used as comments. */ typedef enum RF_AntecedentType_e { - rf_trueData, - rf_antiData, - rf_outputData, - rf_control -} RF_AntecedentType_t; - + rf_trueData, + rf_antiData, + rf_outputData, + rf_control +} RF_AntecedentType_t; #define RF_DAG_PTRCACHESIZE 40 #define RF_DAG_PARAMCACHESIZE 12 typedef RF_uint8 RF_DagNodeFlags_t; struct RF_DagNode_s { - RF_NodeStatus_t status; /* current status of this node */ - int (*doFunc)(RF_DagNode_t *); /* normal function */ - int (*undoFunc)(RF_DagNode_t *); /* func to remove effect of doFunc */ - int (*wakeFunc)(RF_DagNode_t *, int status); /* func called when the node completes an I/O */ - int numParams; /* number of parameters required by *funcPtr */ - int numResults; /* number of results produced by *funcPtr */ - int numAntecedents; /* number of antecedents */ - int numAntDone; /* number of antecedents which have finished */ - int numSuccedents; /* number of succedents */ - int numSuccFired; /* incremented when a succedent is fired during forward execution */ - int numSuccDone; /* incremented when a succedent finishes during rollBackward */ - int commitNode; /* boolean flag - if true, this is a commit node */ - RF_DagNode_t **succedents; /* succedents, array size numSuccedents */ - RF_DagNode_t **antecedents; /* antecedents, array size numAntecedents */ - RF_AntecedentType_t antType[RF_MAX_ANTECEDENTS]; /* type of each antecedent */ - void **results; /* array of results produced by *funcPtr */ - RF_DagParam_t *params; /* array of parameters required by *funcPtr */ - RF_PropHeader_t **propList; /* propagation list, size numSuccedents */ - RF_DagHeader_t *dagHdr; /* ptr to head of dag containing this node */ - void *dagFuncData; /* dag execution func uses this for whatever it wants */ - RF_DagNode_t *next; - int nodeNum; /* used by PrintDAG for debug only */ - int visited; /* used to avoid re-visiting nodes on DAG walks */ - /* ANY CODE THAT USES THIS FIELD MUST MAINTAIN THE PROPERTY - * THAT AFTER IT FINISHES, ALL VISITED FLAGS IN THE DAG ARE IDENTICAL */ - char *name; /* debug only */ - RF_DagNodeFlags_t flags; /* see below */ - RF_DagNode_t *dag_ptrs[RF_DAG_PTRCACHESIZE]; /* cache for performance */ - RF_DagParam_t dag_params[RF_DAG_PARAMCACHESIZE]; /* cache for performance */ + RF_NodeStatus_t status; /* current status of this node */ + int (*doFunc) (RF_DagNode_t *); /* normal function */ + int (*undoFunc) (RF_DagNode_t *); /* func to remove effect of + * doFunc */ + int (*wakeFunc) (RF_DagNode_t *, int status); /* func called when the + * node completes an I/O */ + int numParams; /* number of parameters required by *funcPtr */ + int numResults; /* number of results produced by *funcPtr */ + int numAntecedents; /* number of antecedents */ + int numAntDone; /* number of antecedents which have finished */ + int numSuccedents; /* number of succedents */ + int numSuccFired; /* incremented when a succedent is fired + * during forward execution */ + int numSuccDone; /* incremented when a succedent finishes + * during rollBackward */ + int commitNode; /* boolean flag - if true, this is a commit + * node */ + RF_DagNode_t **succedents; /* succedents, array size + * numSuccedents */ + RF_DagNode_t **antecedents; /* antecedents, array size + * numAntecedents */ + RF_AntecedentType_t antType[RF_MAX_ANTECEDENTS]; /* type of each + * antecedent */ + void **results; /* array of results produced by *funcPtr */ + RF_DagParam_t *params; /* array of parameters required by *funcPtr */ + RF_PropHeader_t **propList; /* propagation list, size + * numSuccedents */ + RF_DagHeader_t *dagHdr; /* ptr to head of dag containing this node */ + void *dagFuncData; /* dag execution func uses this for whatever + * it wants */ + RF_DagNode_t *next; + int nodeNum; /* used by PrintDAG for debug only */ + int visited; /* used to avoid re-visiting nodes on DAG + * walks */ + /* ANY CODE THAT USES THIS FIELD MUST MAINTAIN THE PROPERTY THAT AFTER + * IT FINISHES, ALL VISITED FLAGS IN THE DAG ARE IDENTICAL */ + char *name; /* debug only */ + RF_DagNodeFlags_t flags;/* see below */ + RF_DagNode_t *dag_ptrs[RF_DAG_PTRCACHESIZE]; /* cache for performance */ + RF_DagParam_t dag_params[RF_DAG_PARAMCACHESIZE]; /* cache for performance */ }; - /* * Bit values for flags field of RF_DagNode_t */ #define RF_DAGNODE_FLAG_NONE 0x00 -#define RF_DAGNODE_FLAG_YIELD 0x01 /* in the kernel, yield the processor before firing this node */ +#define RF_DAGNODE_FLAG_YIELD 0x01 /* in the kernel, yield the processor + * before firing this node */ /* enable - DAG ready for normal execution, no errors encountered * rollForward - DAG encountered an error after commit point, rolling forward * rollBackward - DAG encountered an error prior to commit point, rolling backward */ typedef enum RF_DagStatus_e { - rf_enable, - rf_rollForward, - rf_rollBackward -} RF_DagStatus_t; - + rf_enable, + rf_rollForward, + rf_rollBackward +} RF_DagStatus_t; #define RF_MAX_HDR_SUCC 1 #define RF_MAXCHUNKS 10 struct RF_DagHeader_s { - RF_DagStatus_t status; /* status of this DAG */ - int numSuccedents; /* DAG may be a tree, i.e. may have > 1 root */ - int numCommitNodes; /* number of commit nodes in graph */ - int numCommits; /* number of commit nodes which have been fired */ - RF_DagNode_t *succedents[RF_MAX_HDR_SUCC]; /* array of succedents, size numSuccedents */ - RF_DagHeader_t *next; /* ptr to allow a list of dags */ - RF_AllocListElem_t *allocList; /* ptr to list of ptrs to be freed prior to freeing DAG */ - RF_AccessStripeMapHeader_t *asmList; /* list of access stripe maps to be freed */ - int nodeNum; /* used by PrintDAG for debug only */ - int numNodesCompleted; - RF_AccTraceEntry_t *tracerec; /* perf mon only */ - - void (*cbFunc)(void *); /* function to call when the dag completes */ - void *cbArg; /* argument for cbFunc */ - char *creator; /* name of function used to create this dag */ + RF_DagStatus_t status; /* status of this DAG */ + int numSuccedents; /* DAG may be a tree, i.e. may have > 1 root */ + int numCommitNodes; /* number of commit nodes in graph */ + int numCommits; /* number of commit nodes which have been + * fired */ + RF_DagNode_t *succedents[RF_MAX_HDR_SUCC]; /* array of succedents, + * size numSuccedents */ + RF_DagHeader_t *next; /* ptr to allow a list of dags */ + RF_AllocListElem_t *allocList; /* ptr to list of ptrs to be freed + * prior to freeing DAG */ + RF_AccessStripeMapHeader_t *asmList; /* list of access stripe maps + * to be freed */ + int nodeNum; /* used by PrintDAG for debug only */ + int numNodesCompleted; + RF_AccTraceEntry_t *tracerec; /* perf mon only */ + + void (*cbFunc) (void *); /* function to call when the dag + * completes */ + void *cbArg; /* argument for cbFunc */ + char *creator; /* name of function used to create this dag */ + + RF_Raid_t *raidPtr; /* the descriptor for the RAID device this DAG + * is for */ + void *bp; /* the bp for this I/O passed down from the + * file system. ignored outside kernel */ + + RF_ChunkDesc_t *memChunk[RF_MAXCHUNKS]; /* experimental- Chunks of + * memory to be retained upon + * DAG free for re-use */ + int chunkIndex; /* the idea is to avoid calls to alloc and + * free */ + + RF_ChunkDesc_t **xtraMemChunk; /* escape hatch which allows + * SelectAlgorithm to merge memChunks + * from several dags */ + int xtraChunkIndex; /* number of ptrs to valid chunks */ + int xtraChunkCnt; /* number of ptrs to chunks allocated */ - RF_Raid_t *raidPtr; /* the descriptor for the RAID device this DAG is for */ - void *bp; /* the bp for this I/O passed down from the file system. ignored outside kernel */ - - RF_ChunkDesc_t *memChunk[RF_MAXCHUNKS]; /* experimental- Chunks of memory to be retained upon DAG free for re-use */ - int chunkIndex; /* the idea is to avoid calls to alloc and free */ - - RF_ChunkDesc_t **xtraMemChunk; /* escape hatch which allows SelectAlgorithm to merge memChunks from several dags */ - int xtraChunkIndex; /* number of ptrs to valid chunks */ - int xtraChunkCnt; /* number of ptrs to chunks allocated */ - -#ifdef SIMULATE - int done; /* Tag to tell if termination node has been fired */ -#endif /* SIMULATE */ }; struct RF_DagList_s { - /* common info for a list of dags which will be fired sequentially */ - int numDags; /* number of dags in the list */ - int numDagsFired; /* number of dags in list which have initiated execution */ - int numDagsDone; /* number of dags in list which have completed execution */ - RF_DagHeader_t *dags; /* list of dags */ - RF_RaidAccessDesc_t *desc; /* ptr to descriptor for this access */ - RF_AccTraceEntry_t tracerec; /* perf mon info for dags (not user info) */ + /* common info for a list of dags which will be fired sequentially */ + int numDags; /* number of dags in the list */ + int numDagsFired; /* number of dags in list which have initiated + * execution */ + int numDagsDone; /* number of dags in list which have completed + * execution */ + RF_DagHeader_t *dags; /* list of dags */ + RF_RaidAccessDesc_t *desc; /* ptr to descriptor for this access */ + RF_AccTraceEntry_t tracerec; /* perf mon info for dags (not user + * info) */ }; - /* resets a node so that it can be fired again */ #define RF_ResetNode(_n_) { \ (_n_)->status = rf_wait; \ @@ -291,20 +216,11 @@ struct RF_DagList_s { (_n_)->next = NULL; \ } -#ifdef SIMULATE -#define RF_ResetDagHeader(_h_) { \ - (_h_)->done = RF_FALSE; \ - (_h_)->numNodesCompleted = 0; \ - (_h_)->numCommits = 0; \ - (_h_)->status = rf_enable; \ -} -#else /* SIMULATE */ #define RF_ResetDagHeader(_h_) { \ (_h_)->numNodesCompleted = 0; \ (_h_)->numCommits = 0; \ (_h_)->status = rf_enable; \ } -#endif /* SIMULATE */ /* convience macro for declaring a create dag function */ @@ -317,4 +233,4 @@ void _name_ ( \ RF_RaidAccessFlags_t flags, \ RF_AllocListElem_t *allocList) -#endif /* !_RF__RF_DAG_H_ */ +#endif /* !_RF__RF_DAG_H_ */ diff --git a/sys/dev/raidframe/rf_dagdegrd.c b/sys/dev/raidframe/rf_dagdegrd.c index 06390061306..652c9600012 100644 --- a/sys/dev/raidframe/rf_dagdegrd.c +++ b/sys/dev/raidframe/rf_dagdegrd.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_dagdegrd.c,v 1.1 1999/01/11 14:29:06 niklas Exp $ */ -/* $NetBSD: rf_dagdegrd.c,v 1.1 1998/11/13 04:20:27 oster Exp $ */ +/* $OpenBSD: rf_dagdegrd.c,v 1.2 1999/02/16 00:02:29 niklas Exp $ */ +/* $NetBSD: rf_dagdegrd.c,v 1.3 1999/02/05 00:06:07 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -31,96 +31,6 @@ * rf_dagdegrd.c * * code for creating degraded read DAGs - * - * : - * Log: rf_dagdegrd.c,v - * Revision 1.20 1996/11/05 21:10:40 jimz - * failed pda generalization - * - * Revision 1.19 1996/08/19 23:30:36 jimz - * fix chained declustered accesses in degraded mode when mirror copy is failed - * (workload shifting not allowed when there are no duplicate copies extant) - * - * Revision 1.18 1996/07/31 16:29:01 jimz - * asm/asmap re-fix (EO merge) - * - * Revision 1.17 1996/07/31 15:34:34 jimz - * evenodd changes; bugfixes for double-degraded archs, generalize - * some formerly PQ-only functions - * - * Revision 1.16 1996/07/28 20:31:39 jimz - * i386netbsd port - * true/false fixup - * - * Revision 1.15 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.14 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.13 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.12 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.11 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.10 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.9 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.8 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.7 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.6 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.5 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.4 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.3 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.2 1996/05/08 21:01:24 jimz - * fixed up enum type names that were conflicting with other - * enums and function names (ie, "panic") - * future naming trends will be towards RF_ and rf_ for - * everything raidframe-related - * - * Revision 1.1 1996/05/03 19:22:23 wvcii - * Initial revision - * */ #include "rf_types.h" @@ -139,7 +49,7 @@ /****************************************************************************** * * General comments on DAG creation: - * + * * All DAGs in this file use roll-away error recovery. Each DAG has a single * commit node, usually called "Cmt." If an error occurs before the Cmt node * is reached, the execution engine will halt forward execution and work @@ -163,16 +73,17 @@ * the DAG creation routines to be replaced at this single point. */ -void rf_CreateRaidFiveDegradedReadDAG( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList) +void +rf_CreateRaidFiveDegradedReadDAG( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList) { - rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - &rf_xorRecoveryFuncs); + rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, + &rf_xorRecoveryFuncs); } @@ -194,111 +105,115 @@ void rf_CreateRaidFiveDegradedReadDAG( * Parameters: raidPtr - description of the physical array * asmap - logical & physical addresses for this access * bp - buffer ptr (for holding write data) - * flags - general flags (e.g. disk locking) + * flags - general flags (e.g. disk locking) * allocList - list of memory allocated in DAG creation *****************************************************************************/ -void rf_CreateRaidOneDegradedReadDAG( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList) +void +rf_CreateRaidOneDegradedReadDAG( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList) { - RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode; - RF_StripeNum_t parityStripeID; - RF_ReconUnitNum_t which_ru; - RF_PhysDiskAddr_t *pda; - int useMirror, i; - - useMirror = 0; - parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), - asmap->raidAddress, &which_ru); - if (rf_dagDebug) { - printf("[Creating RAID level 1 degraded read DAG]\n"); - } - dag_h->creator = "RaidOneDegradedReadDAG"; - /* alloc the Wnd nodes and the Wmir node */ - if (asmap->numDataFailed == 0) - useMirror = RF_FALSE; - else - useMirror = RF_TRUE; - - /* total number of nodes = 1 + (block + commit + terminator) */ - RF_CallocAndAdd(nodes, 4, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - i = 0; - rdNode = &nodes[i]; i++; - blockNode = &nodes[i]; i++; - commitNode = &nodes[i]; i++; - termNode = &nodes[i]; i++; - - /* this dag can not commit until the commit node is reached. errors prior - * to the commit point imply the dag has failed and must be retried - */ - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* initialize the block, commit, and terminator nodes */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - - pda = asmap->physInfo; - RF_ASSERT(pda != NULL); - /* parityInfo must describe entire parity unit */ - RF_ASSERT(asmap->parityInfo->next == NULL); - - /* initialize the data node */ - if (!useMirror) { - /* read primary copy of data */ - rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rpd", allocList); - rdNode->params[0].p = pda; - rdNode->params[1].p = pda->bufPtr; - rdNode->params[2].v = parityStripeID; - rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - else { - /* read secondary copy of data */ - rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rsd", allocList); - rdNode->params[0].p = asmap->parityInfo; - rdNode->params[1].p = pda->bufPtr; - rdNode->params[2].v = parityStripeID; - rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - - /* connect header to block node */ - RF_ASSERT(dag_h->numSuccedents == 1); - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - /* connect block node to rdnode */ - RF_ASSERT(blockNode->numSuccedents == 1); - RF_ASSERT(rdNode->numAntecedents == 1); - blockNode->succedents[0] = rdNode; - rdNode->antecedents[0] = blockNode; - rdNode->antType[0] = rf_control; - - /* connect rdnode to commit node */ - RF_ASSERT(rdNode->numSuccedents == 1); - RF_ASSERT(commitNode->numAntecedents == 1); - rdNode->succedents[0] = commitNode; - commitNode->antecedents[0] = rdNode; - commitNode->antType[0] = rf_control; - - /* connect commit node to terminator */ - RF_ASSERT(commitNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - commitNode->succedents[0] = termNode; - termNode->antecedents[0] = commitNode; - termNode->antType[0] = rf_control; + RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode; + RF_StripeNum_t parityStripeID; + RF_ReconUnitNum_t which_ru; + RF_PhysDiskAddr_t *pda; + int useMirror, i; + + useMirror = 0; + parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), + asmap->raidAddress, &which_ru); + if (rf_dagDebug) { + printf("[Creating RAID level 1 degraded read DAG]\n"); + } + dag_h->creator = "RaidOneDegradedReadDAG"; + /* alloc the Wnd nodes and the Wmir node */ + if (asmap->numDataFailed == 0) + useMirror = RF_FALSE; + else + useMirror = RF_TRUE; + + /* total number of nodes = 1 + (block + commit + terminator) */ + RF_CallocAndAdd(nodes, 4, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); + i = 0; + rdNode = &nodes[i]; + i++; + blockNode = &nodes[i]; + i++; + commitNode = &nodes[i]; + i++; + termNode = &nodes[i]; + i++; + + /* this dag can not commit until the commit node is reached. errors + * prior to the commit point imply the dag has failed and must be + * retried */ + dag_h->numCommitNodes = 1; + dag_h->numCommits = 0; + dag_h->numSuccedents = 1; + + /* initialize the block, commit, and terminator nodes */ + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, + NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); + rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, + NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, + NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); + + pda = asmap->physInfo; + RF_ASSERT(pda != NULL); + /* parityInfo must describe entire parity unit */ + RF_ASSERT(asmap->parityInfo->next == NULL); + + /* initialize the data node */ + if (!useMirror) { + /* read primary copy of data */ + rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rpd", allocList); + rdNode->params[0].p = pda; + rdNode->params[1].p = pda->bufPtr; + rdNode->params[2].v = parityStripeID; + rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + } else { + /* read secondary copy of data */ + rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rsd", allocList); + rdNode->params[0].p = asmap->parityInfo; + rdNode->params[1].p = pda->bufPtr; + rdNode->params[2].v = parityStripeID; + rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + } + + /* connect header to block node */ + RF_ASSERT(dag_h->numSuccedents == 1); + RF_ASSERT(blockNode->numAntecedents == 0); + dag_h->succedents[0] = blockNode; + + /* connect block node to rdnode */ + RF_ASSERT(blockNode->numSuccedents == 1); + RF_ASSERT(rdNode->numAntecedents == 1); + blockNode->succedents[0] = rdNode; + rdNode->antecedents[0] = blockNode; + rdNode->antType[0] = rf_control; + + /* connect rdnode to commit node */ + RF_ASSERT(rdNode->numSuccedents == 1); + RF_ASSERT(commitNode->numAntecedents == 1); + rdNode->succedents[0] = commitNode; + commitNode->antecedents[0] = rdNode; + commitNode->antType[0] = rf_control; + + /* connect commit node to terminator */ + RF_ASSERT(commitNode->numSuccedents == 1); + RF_ASSERT(termNode->numAntecedents == 1); + RF_ASSERT(termNode->numSuccedents == 0); + commitNode->succedents[0] = termNode; + termNode->antecedents[0] = commitNode; + termNode->antType[0] = rf_control; } @@ -329,259 +244,268 @@ void rf_CreateRaidOneDegradedReadDAG( * * The recfunc argument at the end specifies the name and function used for * the redundancy - * recovery function. + * recovery function. * *****************************************************************************/ -void rf_CreateDegradedReadDAG( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList, - RF_RedFuncs_t *recFunc) +void +rf_CreateDegradedReadDAG( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList, + RF_RedFuncs_t * recFunc) { - RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *xorNode, *blockNode; - RF_DagNode_t *commitNode, *rpNode, *termNode; - int nNodes, nRrdNodes, nRudNodes, nXorBufs, i; - int j, paramNum; - RF_SectorCount_t sectorsPerSU; - RF_ReconUnitNum_t which_ru; - char *overlappingPDAs; /* a temporary array of flags */ - RF_AccessStripeMapHeader_t *new_asm_h[2]; - RF_PhysDiskAddr_t *pda, *parityPDA; - RF_StripeNum_t parityStripeID; - RF_PhysDiskAddr_t *failedPDA; - RF_RaidLayout_t *layoutPtr; - char *rpBuf; - - layoutPtr = &(raidPtr->Layout); - /* failedPDA points to the pda within the asm that targets the failed disk */ - failedPDA = asmap->failedPDAs[0]; - parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, - asmap->raidAddress, &which_ru); - sectorsPerSU = layoutPtr->sectorsPerStripeUnit; - - if (rf_dagDebug) { - printf("[Creating degraded read DAG]\n"); - } - - RF_ASSERT( asmap->numDataFailed == 1 ); - dag_h->creator = "DegradedReadDAG"; - - /* - * generate two ASMs identifying the surviving data we need - * in order to recover the lost data - */ - - /* overlappingPDAs array must be zero'd */ - RF_Calloc(overlappingPDAs, asmap->numStripeUnitsAccessed, sizeof(char), (char *)); - rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h, new_asm_h, &nXorBufs, - &rpBuf, overlappingPDAs, allocList); - - /* - * create all the nodes at once - * - * -1 because no access is generated for the failed pda - */ - nRudNodes = asmap->numStripeUnitsAccessed-1; - nRrdNodes = ((new_asm_h[0]) ? new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) + - ((new_asm_h[1]) ? new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0); - nNodes = 5 + nRudNodes + nRrdNodes; /* lock, unlock, xor, Rp, Rud, Rrd */ - RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), - allocList); - i = 0; - blockNode = &nodes[i]; i++; - commitNode = &nodes[i]; i++; - xorNode = &nodes[i]; i++; - rpNode = &nodes[i]; i++; - termNode = &nodes[i]; i++; - rudNodes = &nodes[i]; i += nRudNodes; - rrdNodes = &nodes[i]; i += nRrdNodes; - RF_ASSERT(i == nNodes); - - /* initialize nodes */ - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - /* this dag can not commit until the commit node is reached - * errors prior to the commit point imply the dag has failed - */ - dag_h->numSuccedents = 1; - - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, nRudNodes+nRrdNodes+1, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - rf_InitNode(xorNode, rf_wait, RF_FALSE, recFunc->simple, rf_NullNodeUndoFunc, - NULL, 1, nRudNodes+nRrdNodes+1, 2*nXorBufs+2, 1, dag_h, - recFunc->SimpleName, allocList); - - /* fill in the Rud nodes */ - for (pda=asmap->physInfo, i=0; i<nRudNodes; i++, pda=pda->next) { - if (pda == failedPDA) {i--; continue;} - rf_InitNode(&rudNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, - rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, - "Rud", allocList); - RF_ASSERT(pda); - rudNodes[i].params[0].p = pda; - rudNodes[i].params[1].p = pda->bufPtr; - rudNodes[i].params[2].v = parityStripeID; - rudNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - - /* fill in the Rrd nodes */ - i = 0; - if (new_asm_h[0]) { - for (pda=new_asm_h[0]->stripeMap->physInfo; - i<new_asm_h[0]->stripeMap->numStripeUnitsAccessed; - i++, pda=pda->next) - { - rf_InitNode(&rrdNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, - rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, - dag_h, "Rrd", allocList); - RF_ASSERT(pda); - rrdNodes[i].params[0].p = pda; - rrdNodes[i].params[1].p = pda->bufPtr; - rrdNodes[i].params[2].v = parityStripeID; - rrdNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - } - if (new_asm_h[1]) { - for (j=0,pda=new_asm_h[1]->stripeMap->physInfo; - j<new_asm_h[1]->stripeMap->numStripeUnitsAccessed; - j++, pda=pda->next) - { - rf_InitNode(&rrdNodes[i+j], rf_wait, RF_FALSE, rf_DiskReadFunc, - rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, - dag_h, "Rrd", allocList); - RF_ASSERT(pda); - rrdNodes[i+j].params[0].p = pda; - rrdNodes[i+j].params[1].p = pda->bufPtr; - rrdNodes[i+j].params[2].v = parityStripeID; - rrdNodes[i+j].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - } - - /* make a PDA for the parity unit */ - RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - parityPDA->row = asmap->parityInfo->row; - parityPDA->col = asmap->parityInfo->col; - parityPDA->startSector = ((asmap->parityInfo->startSector / sectorsPerSU) - * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU); - parityPDA->numSector = failedPDA->numSector; - - /* initialize the Rp node */ - rf_InitNode(rpNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rp ", allocList); - rpNode->params[0].p = parityPDA; - rpNode->params[1].p = rpBuf; - rpNode->params[2].v = parityStripeID; - rpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - - /* - * the last and nastiest step is to assign all - * the parameters of the Xor node - */ - paramNum=0; - for (i=0; i<nRrdNodes; i++) { - /* all the Rrd nodes need to be xored together */ - xorNode->params[paramNum++] = rrdNodes[i].params[0]; - xorNode->params[paramNum++] = rrdNodes[i].params[1]; - } - for (i=0; i<nRudNodes; i++) { - /* any Rud nodes that overlap the failed access need to be xored in */ - if (overlappingPDAs[i]) { - RF_MallocAndAdd(pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - bcopy((char *)rudNodes[i].params[0].p, (char *)pda, sizeof(RF_PhysDiskAddr_t)); - rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_DOBUFFER, 0); - xorNode->params[paramNum++].p = pda; - xorNode->params[paramNum++].p = pda->bufPtr; - } - } - RF_Free(overlappingPDAs, asmap->numStripeUnitsAccessed * sizeof(char)); - - /* install parity pda as last set of params to be xor'd */ - xorNode->params[paramNum++].p = parityPDA; - xorNode->params[paramNum++].p = rpBuf; - - /* - * the last 2 params to the recovery xor node are - * the failed PDA and the raidPtr - */ - xorNode->params[paramNum++].p = failedPDA; - xorNode->params[paramNum++].p = raidPtr; - RF_ASSERT( paramNum == 2*nXorBufs+2 ); - - /* - * The xor node uses results[0] as the target buffer. - * Set pointer and zero the buffer. In the kernel, this - * may be a user buffer in which case we have to remap it. - */ - xorNode->results[0] = failedPDA->bufPtr; - RF_BZERO(bp, failedPDA->bufPtr, rf_RaidAddressToByte(raidPtr, - failedPDA->numSector)); - - /* connect nodes to form graph */ - /* connect the header to the block node */ - RF_ASSERT(dag_h->numSuccedents == 1); - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - /* connect the block node to the read nodes */ - RF_ASSERT(blockNode->numSuccedents == (1 + nRrdNodes + nRudNodes)); - RF_ASSERT(rpNode->numAntecedents == 1); - blockNode->succedents[0] = rpNode; - rpNode->antecedents[0] = blockNode; - rpNode->antType[0] = rf_control; - for (i = 0; i < nRrdNodes; i++) { - RF_ASSERT(rrdNodes[i].numSuccedents == 1); - blockNode->succedents[1 + i] = &rrdNodes[i]; - rrdNodes[i].antecedents[0] = blockNode; - rrdNodes[i].antType[0] = rf_control; - } - for (i = 0; i < nRudNodes; i++) { - RF_ASSERT(rudNodes[i].numSuccedents == 1); - blockNode->succedents[1 + nRrdNodes + i] = &rudNodes[i]; - rudNodes[i].antecedents[0] = blockNode; - rudNodes[i].antType[0] = rf_control; - } - - /* connect the read nodes to the xor node */ - RF_ASSERT(xorNode->numAntecedents == (1 + nRrdNodes + nRudNodes)); - RF_ASSERT(rpNode->numSuccedents == 1); - rpNode->succedents[0] = xorNode; - xorNode->antecedents[0] = rpNode; - xorNode->antType[0] = rf_trueData; - for (i = 0; i < nRrdNodes; i++) { - RF_ASSERT(rrdNodes[i].numSuccedents == 1); - rrdNodes[i].succedents[0] = xorNode; - xorNode->antecedents[1 + i] = &rrdNodes[i]; - xorNode->antType[1 + i] = rf_trueData; - } - for (i = 0; i < nRudNodes; i++) { - RF_ASSERT(rudNodes[i].numSuccedents == 1); - rudNodes[i].succedents[0] = xorNode; - xorNode->antecedents[1 + nRrdNodes + i] = &rudNodes[i]; - xorNode->antType[1 + nRrdNodes + i] = rf_trueData; - } - - /* connect the xor node to the commit node */ - RF_ASSERT(xorNode->numSuccedents == 1); - RF_ASSERT(commitNode->numAntecedents == 1); - xorNode->succedents[0] = commitNode; - commitNode->antecedents[0] = xorNode; - commitNode->antType[0] = rf_control; - - /* connect the termNode to the commit node */ - RF_ASSERT(commitNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - commitNode->succedents[0] = termNode; - termNode->antType[0] = rf_control; - termNode->antecedents[0] = commitNode; + RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *xorNode, *blockNode; + RF_DagNode_t *commitNode, *rpNode, *termNode; + int nNodes, nRrdNodes, nRudNodes, nXorBufs, i; + int j, paramNum; + RF_SectorCount_t sectorsPerSU; + RF_ReconUnitNum_t which_ru; + char *overlappingPDAs;/* a temporary array of flags */ + RF_AccessStripeMapHeader_t *new_asm_h[2]; + RF_PhysDiskAddr_t *pda, *parityPDA; + RF_StripeNum_t parityStripeID; + RF_PhysDiskAddr_t *failedPDA; + RF_RaidLayout_t *layoutPtr; + char *rpBuf; + + layoutPtr = &(raidPtr->Layout); + /* failedPDA points to the pda within the asm that targets the failed + * disk */ + failedPDA = asmap->failedPDAs[0]; + parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, + asmap->raidAddress, &which_ru); + sectorsPerSU = layoutPtr->sectorsPerStripeUnit; + + if (rf_dagDebug) { + printf("[Creating degraded read DAG]\n"); + } + RF_ASSERT(asmap->numDataFailed == 1); + dag_h->creator = "DegradedReadDAG"; + + /* + * generate two ASMs identifying the surviving data we need + * in order to recover the lost data + */ + + /* overlappingPDAs array must be zero'd */ + RF_Calloc(overlappingPDAs, asmap->numStripeUnitsAccessed, sizeof(char), (char *)); + rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h, new_asm_h, &nXorBufs, + &rpBuf, overlappingPDAs, allocList); + + /* + * create all the nodes at once + * + * -1 because no access is generated for the failed pda + */ + nRudNodes = asmap->numStripeUnitsAccessed - 1; + nRrdNodes = ((new_asm_h[0]) ? new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) + + ((new_asm_h[1]) ? new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0); + nNodes = 5 + nRudNodes + nRrdNodes; /* lock, unlock, xor, Rp, Rud, + * Rrd */ + RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), + allocList); + i = 0; + blockNode = &nodes[i]; + i++; + commitNode = &nodes[i]; + i++; + xorNode = &nodes[i]; + i++; + rpNode = &nodes[i]; + i++; + termNode = &nodes[i]; + i++; + rudNodes = &nodes[i]; + i += nRudNodes; + rrdNodes = &nodes[i]; + i += nRrdNodes; + RF_ASSERT(i == nNodes); + + /* initialize nodes */ + dag_h->numCommitNodes = 1; + dag_h->numCommits = 0; + /* this dag can not commit until the commit node is reached errors + * prior to the commit point imply the dag has failed */ + dag_h->numSuccedents = 1; + + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, + NULL, nRudNodes + nRrdNodes + 1, 0, 0, 0, dag_h, "Nil", allocList); + rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, + NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, + NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); + rf_InitNode(xorNode, rf_wait, RF_FALSE, recFunc->simple, rf_NullNodeUndoFunc, + NULL, 1, nRudNodes + nRrdNodes + 1, 2 * nXorBufs + 2, 1, dag_h, + recFunc->SimpleName, allocList); + + /* fill in the Rud nodes */ + for (pda = asmap->physInfo, i = 0; i < nRudNodes; i++, pda = pda->next) { + if (pda == failedPDA) { + i--; + continue; + } + rf_InitNode(&rudNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, + rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, + "Rud", allocList); + RF_ASSERT(pda); + rudNodes[i].params[0].p = pda; + rudNodes[i].params[1].p = pda->bufPtr; + rudNodes[i].params[2].v = parityStripeID; + rudNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + } + + /* fill in the Rrd nodes */ + i = 0; + if (new_asm_h[0]) { + for (pda = new_asm_h[0]->stripeMap->physInfo; + i < new_asm_h[0]->stripeMap->numStripeUnitsAccessed; + i++, pda = pda->next) { + rf_InitNode(&rrdNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, + rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, + dag_h, "Rrd", allocList); + RF_ASSERT(pda); + rrdNodes[i].params[0].p = pda; + rrdNodes[i].params[1].p = pda->bufPtr; + rrdNodes[i].params[2].v = parityStripeID; + rrdNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + } + } + if (new_asm_h[1]) { + for (j = 0, pda = new_asm_h[1]->stripeMap->physInfo; + j < new_asm_h[1]->stripeMap->numStripeUnitsAccessed; + j++, pda = pda->next) { + rf_InitNode(&rrdNodes[i + j], rf_wait, RF_FALSE, rf_DiskReadFunc, + rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, + dag_h, "Rrd", allocList); + RF_ASSERT(pda); + rrdNodes[i + j].params[0].p = pda; + rrdNodes[i + j].params[1].p = pda->bufPtr; + rrdNodes[i + j].params[2].v = parityStripeID; + rrdNodes[i + j].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + } + } + /* make a PDA for the parity unit */ + RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); + parityPDA->row = asmap->parityInfo->row; + parityPDA->col = asmap->parityInfo->col; + parityPDA->startSector = ((asmap->parityInfo->startSector / sectorsPerSU) + * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU); + parityPDA->numSector = failedPDA->numSector; + + /* initialize the Rp node */ + rf_InitNode(rpNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rp ", allocList); + rpNode->params[0].p = parityPDA; + rpNode->params[1].p = rpBuf; + rpNode->params[2].v = parityStripeID; + rpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + + /* + * the last and nastiest step is to assign all + * the parameters of the Xor node + */ + paramNum = 0; + for (i = 0; i < nRrdNodes; i++) { + /* all the Rrd nodes need to be xored together */ + xorNode->params[paramNum++] = rrdNodes[i].params[0]; + xorNode->params[paramNum++] = rrdNodes[i].params[1]; + } + for (i = 0; i < nRudNodes; i++) { + /* any Rud nodes that overlap the failed access need to be + * xored in */ + if (overlappingPDAs[i]) { + RF_MallocAndAdd(pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); + bcopy((char *) rudNodes[i].params[0].p, (char *) pda, sizeof(RF_PhysDiskAddr_t)); + rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_DOBUFFER, 0); + xorNode->params[paramNum++].p = pda; + xorNode->params[paramNum++].p = pda->bufPtr; + } + } + RF_Free(overlappingPDAs, asmap->numStripeUnitsAccessed * sizeof(char)); + + /* install parity pda as last set of params to be xor'd */ + xorNode->params[paramNum++].p = parityPDA; + xorNode->params[paramNum++].p = rpBuf; + + /* + * the last 2 params to the recovery xor node are + * the failed PDA and the raidPtr + */ + xorNode->params[paramNum++].p = failedPDA; + xorNode->params[paramNum++].p = raidPtr; + RF_ASSERT(paramNum == 2 * nXorBufs + 2); + + /* + * The xor node uses results[0] as the target buffer. + * Set pointer and zero the buffer. In the kernel, this + * may be a user buffer in which case we have to remap it. + */ + xorNode->results[0] = failedPDA->bufPtr; + RF_BZERO(bp, failedPDA->bufPtr, rf_RaidAddressToByte(raidPtr, + failedPDA->numSector)); + + /* connect nodes to form graph */ + /* connect the header to the block node */ + RF_ASSERT(dag_h->numSuccedents == 1); + RF_ASSERT(blockNode->numAntecedents == 0); + dag_h->succedents[0] = blockNode; + + /* connect the block node to the read nodes */ + RF_ASSERT(blockNode->numSuccedents == (1 + nRrdNodes + nRudNodes)); + RF_ASSERT(rpNode->numAntecedents == 1); + blockNode->succedents[0] = rpNode; + rpNode->antecedents[0] = blockNode; + rpNode->antType[0] = rf_control; + for (i = 0; i < nRrdNodes; i++) { + RF_ASSERT(rrdNodes[i].numSuccedents == 1); + blockNode->succedents[1 + i] = &rrdNodes[i]; + rrdNodes[i].antecedents[0] = blockNode; + rrdNodes[i].antType[0] = rf_control; + } + for (i = 0; i < nRudNodes; i++) { + RF_ASSERT(rudNodes[i].numSuccedents == 1); + blockNode->succedents[1 + nRrdNodes + i] = &rudNodes[i]; + rudNodes[i].antecedents[0] = blockNode; + rudNodes[i].antType[0] = rf_control; + } + + /* connect the read nodes to the xor node */ + RF_ASSERT(xorNode->numAntecedents == (1 + nRrdNodes + nRudNodes)); + RF_ASSERT(rpNode->numSuccedents == 1); + rpNode->succedents[0] = xorNode; + xorNode->antecedents[0] = rpNode; + xorNode->antType[0] = rf_trueData; + for (i = 0; i < nRrdNodes; i++) { + RF_ASSERT(rrdNodes[i].numSuccedents == 1); + rrdNodes[i].succedents[0] = xorNode; + xorNode->antecedents[1 + i] = &rrdNodes[i]; + xorNode->antType[1 + i] = rf_trueData; + } + for (i = 0; i < nRudNodes; i++) { + RF_ASSERT(rudNodes[i].numSuccedents == 1); + rudNodes[i].succedents[0] = xorNode; + xorNode->antecedents[1 + nRrdNodes + i] = &rudNodes[i]; + xorNode->antType[1 + nRrdNodes + i] = rf_trueData; + } + + /* connect the xor node to the commit node */ + RF_ASSERT(xorNode->numSuccedents == 1); + RF_ASSERT(commitNode->numAntecedents == 1); + xorNode->succedents[0] = commitNode; + commitNode->antecedents[0] = xorNode; + commitNode->antType[0] = rf_control; + + /* connect the termNode to the commit node */ + RF_ASSERT(commitNode->numSuccedents == 1); + RF_ASSERT(termNode->numAntecedents == 1); + RF_ASSERT(termNode->numSuccedents == 0); + commitNode->succedents[0] = termNode; + termNode->antType[0] = rf_control; + termNode->antecedents[0] = commitNode; } @@ -597,162 +521,164 @@ void rf_CreateDegradedReadDAG( * Parameters: raidPtr - description of the physical array * asmap - logical & physical addresses for this access * bp - buffer ptr (for holding write data) - * flags - general flags (e.g. disk locking) + * flags - general flags (e.g. disk locking) * allocList - list of memory allocated in DAG creation *****************************************************************************/ -void rf_CreateRaidCDegradedReadDAG( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList) +void +rf_CreateRaidCDegradedReadDAG( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList) { - RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode; - RF_StripeNum_t parityStripeID; - int useMirror, i, shiftable; - RF_ReconUnitNum_t which_ru; - RF_PhysDiskAddr_t *pda; - - if ((asmap->numDataFailed + asmap->numParityFailed) == 0) { - shiftable = RF_TRUE; - } - else { - shiftable = RF_FALSE; - } - useMirror = 0; - parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), - asmap->raidAddress, &which_ru); - - if (rf_dagDebug) { - printf("[Creating RAID C degraded read DAG]\n"); - } - dag_h->creator = "RaidCDegradedReadDAG"; - /* alloc the Wnd nodes and the Wmir node */ - if (asmap->numDataFailed == 0) - useMirror = RF_FALSE; - else - useMirror = RF_TRUE; - - /* total number of nodes = 1 + (block + commit + terminator) */ - RF_CallocAndAdd(nodes, 4, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - i = 0; - rdNode = &nodes[i]; i++; - blockNode = &nodes[i]; i++; - commitNode = &nodes[i]; i++; - termNode = &nodes[i]; i++; - - /* - * This dag can not commit until the commit node is reached. - * Errors prior to the commit point imply the dag has failed - * and must be retried. - */ - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* initialize the block, commit, and terminator nodes */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - - pda = asmap->physInfo; - RF_ASSERT(pda != NULL); - /* parityInfo must describe entire parity unit */ - RF_ASSERT(asmap->parityInfo->next == NULL); - - /* initialize the data node */ - if (!useMirror) { - rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rpd", allocList); - if (shiftable && rf_compute_workload_shift(raidPtr, pda)) { - /* shift this read to the next disk in line */ - rdNode->params[0].p = asmap->parityInfo; - rdNode->params[1].p = pda->bufPtr; - rdNode->params[2].v = parityStripeID; - rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode; + RF_StripeNum_t parityStripeID; + int useMirror, i, shiftable; + RF_ReconUnitNum_t which_ru; + RF_PhysDiskAddr_t *pda; + + if ((asmap->numDataFailed + asmap->numParityFailed) == 0) { + shiftable = RF_TRUE; + } else { + shiftable = RF_FALSE; + } + useMirror = 0; + parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), + asmap->raidAddress, &which_ru); + + if (rf_dagDebug) { + printf("[Creating RAID C degraded read DAG]\n"); + } + dag_h->creator = "RaidCDegradedReadDAG"; + /* alloc the Wnd nodes and the Wmir node */ + if (asmap->numDataFailed == 0) + useMirror = RF_FALSE; + else + useMirror = RF_TRUE; + + /* total number of nodes = 1 + (block + commit + terminator) */ + RF_CallocAndAdd(nodes, 4, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); + i = 0; + rdNode = &nodes[i]; + i++; + blockNode = &nodes[i]; + i++; + commitNode = &nodes[i]; + i++; + termNode = &nodes[i]; + i++; + + /* + * This dag can not commit until the commit node is reached. + * Errors prior to the commit point imply the dag has failed + * and must be retried. + */ + dag_h->numCommitNodes = 1; + dag_h->numCommits = 0; + dag_h->numSuccedents = 1; + + /* initialize the block, commit, and terminator nodes */ + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, + NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); + rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, + NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, + NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); + + pda = asmap->physInfo; + RF_ASSERT(pda != NULL); + /* parityInfo must describe entire parity unit */ + RF_ASSERT(asmap->parityInfo->next == NULL); + + /* initialize the data node */ + if (!useMirror) { + rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rpd", allocList); + if (shiftable && rf_compute_workload_shift(raidPtr, pda)) { + /* shift this read to the next disk in line */ + rdNode->params[0].p = asmap->parityInfo; + rdNode->params[1].p = pda->bufPtr; + rdNode->params[2].v = parityStripeID; + rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + } else { + /* read primary copy */ + rdNode->params[0].p = pda; + rdNode->params[1].p = pda->bufPtr; + rdNode->params[2].v = parityStripeID; + rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + } + } else { + /* read secondary copy of data */ + rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rsd", allocList); + rdNode->params[0].p = asmap->parityInfo; + rdNode->params[1].p = pda->bufPtr; + rdNode->params[2].v = parityStripeID; + rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); } - else { - /* read primary copy */ - rdNode->params[0].p = pda; - rdNode->params[1].p = pda->bufPtr; - rdNode->params[2].v = parityStripeID; - rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - } - else { - /* read secondary copy of data */ - rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rsd", allocList); - rdNode->params[0].p = asmap->parityInfo; - rdNode->params[1].p = pda->bufPtr; - rdNode->params[2].v = parityStripeID; - rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - - /* connect header to block node */ - RF_ASSERT(dag_h->numSuccedents == 1); - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - /* connect block node to rdnode */ - RF_ASSERT(blockNode->numSuccedents == 1); - RF_ASSERT(rdNode->numAntecedents == 1); - blockNode->succedents[0] = rdNode; - rdNode->antecedents[0] = blockNode; - rdNode->antType[0] = rf_control; - - /* connect rdnode to commit node */ - RF_ASSERT(rdNode->numSuccedents == 1); - RF_ASSERT(commitNode->numAntecedents == 1); - rdNode->succedents[0] = commitNode; - commitNode->antecedents[0] = rdNode; - commitNode->antType[0] = rf_control; - - /* connect commit node to terminator */ - RF_ASSERT(commitNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - commitNode->succedents[0] = termNode; - termNode->antecedents[0] = commitNode; - termNode->antType[0] = rf_control; -} + /* connect header to block node */ + RF_ASSERT(dag_h->numSuccedents == 1); + RF_ASSERT(blockNode->numAntecedents == 0); + dag_h->succedents[0] = blockNode; + + /* connect block node to rdnode */ + RF_ASSERT(blockNode->numSuccedents == 1); + RF_ASSERT(rdNode->numAntecedents == 1); + blockNode->succedents[0] = rdNode; + rdNode->antecedents[0] = blockNode; + rdNode->antType[0] = rf_control; + + /* connect rdnode to commit node */ + RF_ASSERT(rdNode->numSuccedents == 1); + RF_ASSERT(commitNode->numAntecedents == 1); + rdNode->succedents[0] = commitNode; + commitNode->antecedents[0] = rdNode; + commitNode->antType[0] = rf_control; + + /* connect commit node to terminator */ + RF_ASSERT(commitNode->numSuccedents == 1); + RF_ASSERT(termNode->numAntecedents == 1); + RF_ASSERT(termNode->numSuccedents == 0); + commitNode->succedents[0] = termNode; + termNode->antecedents[0] = commitNode; + termNode->antType[0] = rf_control; +} /* * XXX move this elsewhere? */ -void rf_DD_GenerateFailedAccessASMs( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_PhysDiskAddr_t **pdap, - int *nNodep, - RF_PhysDiskAddr_t **pqpdap, - int *nPQNodep, - RF_AllocListElem_t *allocList) +void +rf_DD_GenerateFailedAccessASMs( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_PhysDiskAddr_t ** pdap, + int *nNodep, + RF_PhysDiskAddr_t ** pqpdap, + int *nPQNodep, + RF_AllocListElem_t * allocList) { - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - int PDAPerDisk,i; - RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; - int numDataCol = layoutPtr->numDataCol; - int state; - RF_SectorNum_t suoff, suend; - unsigned firstDataCol, napdas, count; - RF_SectorNum_t fone_start, fone_end, ftwo_start = 0, ftwo_end = 0; - RF_PhysDiskAddr_t *fone = asmap->failedPDAs[0], *ftwo = asmap->failedPDAs[1]; - RF_PhysDiskAddr_t *pda_p; - RF_PhysDiskAddr_t *phys_p; - RF_RaidAddr_t sosAddr; - - /* determine how many pda's we will have to generate per unaccess stripe. - If there is only one failed data unit, it is one; if two, possibly two, - depending wether they overlap. */ - - fone_start = rf_StripeUnitOffset(layoutPtr,fone->startSector); - fone_end = fone_start + fone->numSector; + RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); + int PDAPerDisk, i; + RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; + int numDataCol = layoutPtr->numDataCol; + int state; + RF_SectorNum_t suoff, suend; + unsigned firstDataCol, napdas, count; + RF_SectorNum_t fone_start, fone_end, ftwo_start = 0, ftwo_end = 0; + RF_PhysDiskAddr_t *fone = asmap->failedPDAs[0], *ftwo = asmap->failedPDAs[1]; + RF_PhysDiskAddr_t *pda_p; + RF_PhysDiskAddr_t *phys_p; + RF_RaidAddr_t sosAddr; + + /* determine how many pda's we will have to generate per unaccess + * stripe. If there is only one failed data unit, it is one; if two, + * possibly two, depending wether they overlap. */ + + fone_start = rf_StripeUnitOffset(layoutPtr, fone->startSector); + fone_end = fone_start + fone->numSector; #define CONS_PDA(if,start,num) \ pda_p->row = asmap->if->row; pda_p->col = asmap->if->col; \ @@ -761,307 +687,288 @@ void rf_DD_GenerateFailedAccessASMs( pda_p->next = NULL; \ RF_MallocAndAdd(pda_p->bufPtr,rf_RaidAddressToByte(raidPtr,num),(char *), allocList) - if (asmap->numDataFailed==1) - { - PDAPerDisk = 1; - state = 1; - RF_MallocAndAdd(*pqpdap,2*sizeof(RF_PhysDiskAddr_t),(RF_PhysDiskAddr_t *), allocList); - pda_p = *pqpdap; - /* build p */ - CONS_PDA(parityInfo,fone_start,fone->numSector); - pda_p->type = RF_PDA_TYPE_PARITY; - pda_p++; - /* build q */ - CONS_PDA(qInfo,fone_start,fone->numSector); - pda_p->type = RF_PDA_TYPE_Q; - } - else - { - ftwo_start = rf_StripeUnitOffset(layoutPtr,ftwo->startSector); - ftwo_end = ftwo_start + ftwo->numSector; - if (fone->numSector + ftwo->numSector > secPerSU) - { - PDAPerDisk = 1; - state = 2; - RF_MallocAndAdd(*pqpdap,2*sizeof(RF_PhysDiskAddr_t),(RF_PhysDiskAddr_t *), allocList); - pda_p = *pqpdap; - CONS_PDA(parityInfo,0,secPerSU); - pda_p->type = RF_PDA_TYPE_PARITY; - pda_p++; - CONS_PDA(qInfo,0,secPerSU); - pda_p->type = RF_PDA_TYPE_Q; - } - else - { - PDAPerDisk = 2; - state = 3; - /* four of them, fone, then ftwo */ - RF_MallocAndAdd(*pqpdap,4*sizeof(RF_PhysDiskAddr_t),(RF_PhysDiskAddr_t *), allocList); - pda_p = *pqpdap; - CONS_PDA(parityInfo,fone_start,fone->numSector); - pda_p->type = RF_PDA_TYPE_PARITY; - pda_p++; - CONS_PDA(qInfo,fone_start,fone->numSector); - pda_p->type = RF_PDA_TYPE_Q; - pda_p++; - CONS_PDA(parityInfo,ftwo_start,ftwo->numSector); - pda_p->type = RF_PDA_TYPE_PARITY; - pda_p++; - CONS_PDA(qInfo,ftwo_start,ftwo->numSector); - pda_p->type = RF_PDA_TYPE_Q; - } - } - /* figure out number of nonaccessed pda */ - napdas = PDAPerDisk * (numDataCol - asmap->numStripeUnitsAccessed - (ftwo==NULL ? 1 : 0)); - *nPQNodep = PDAPerDisk; - - /* sweep over the over accessed pda's, figuring out the number of - additional pda's to generate. Of course, skip the failed ones */ - - count = 0; - for ( pda_p=asmap->physInfo; pda_p; pda_p= pda_p->next) - { - if ((pda_p == fone) || (pda_p == ftwo)) - continue; - suoff = rf_StripeUnitOffset(layoutPtr,pda_p->startSector); - suend = suoff + pda_p->numSector; - switch (state) - { - case 1: /* one failed PDA to overlap */ - /* if a PDA doesn't contain the failed unit, it can - only miss the start or end, not both */ - if ((suoff > fone_start) || (suend <fone_end)) - count++; - break; - case 2: /* whole stripe */ - if (suoff) /* leak at begining */ - count++; - if (suend < numDataCol) /* leak at end */ - count++; - break; - case 3: /* two disjoint units */ - if ((suoff > fone_start) || (suend <fone_end)) - count++; - if ((suoff > ftwo_start) || (suend <ftwo_end)) - count++; - break; - default: - RF_PANIC(); - } - } - - napdas += count; - *nNodep = napdas; - if (napdas == 0) return; /* short circuit */ - - /* allocate up our list of pda's */ - - RF_CallocAndAdd(pda_p, napdas, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - *pdap = pda_p; - - /* linkem together */ - for (i=0; i < (napdas-1); i++) - pda_p[i].next = pda_p+(i+1); - - /* march through the one's up to the first accessed disk */ - firstDataCol = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),asmap->physInfo->raidAddress) % numDataCol; - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - for (i=0; i < firstDataCol; i++) - { - if ((pda_p - (*pdap)) == napdas) - continue; - pda_p->type = RF_PDA_TYPE_DATA; - pda_p->raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - /* skip over dead disks */ - if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status)) - continue; - switch (state) - { - case 1: /* fone */ - pda_p->numSector = fone->numSector; - pda_p->raidAddress += fone_start; - pda_p->startSector += fone_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); - break; - case 2: /* full stripe */ - pda_p->numSector = secPerSU; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,secPerSU), (char *), allocList); - break; - case 3: /* two slabs */ - pda_p->numSector = fone->numSector; - pda_p->raidAddress += fone_start; - pda_p->startSector += fone_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); - pda_p++; - pda_p->type = RF_PDA_TYPE_DATA; - pda_p->raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - pda_p->numSector = ftwo->numSector; - pda_p->raidAddress += ftwo_start; - pda_p->startSector += ftwo_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); - break; - default: - RF_PANIC(); - } - pda_p++; - } - - /* march through the touched stripe units */ - for (phys_p = asmap->physInfo; phys_p; phys_p = phys_p->next, i++) - { - if ((phys_p == asmap->failedPDAs[0]) || (phys_p == asmap->failedPDAs[1])) - continue; - suoff = rf_StripeUnitOffset(layoutPtr,phys_p->startSector); - suend = suoff + phys_p->numSector; - switch(state) - { - case 1: /* single buffer */ - if (suoff > fone_start) - { - RF_ASSERT( suend >= fone_end ); - /* The data read starts after the mapped access, - snip off the begining */ - pda_p->numSector = suoff - fone_start; - pda_p->raidAddress = sosAddr + (i*secPerSU) + fone_start; - (raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); - pda_p++; - } - if (suend < fone_end) - { - RF_ASSERT ( suoff <= fone_start); - /* The data read stops before the end of the failed access, extend */ - pda_p->numSector = fone_end - suend; - pda_p->raidAddress = sosAddr + (i*secPerSU) + suend; /* off by one? */ - (raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); - pda_p++; - } - break; - case 2: /* whole stripe unit */ - RF_ASSERT( (suoff == 0) || (suend == secPerSU)); - if (suend < secPerSU) - { /* short read, snip from end on */ - pda_p->numSector = secPerSU - suend; - pda_p->raidAddress = sosAddr + (i*secPerSU) + suend; /* off by one? */ - (raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); - pda_p++; - } - else - if (suoff > 0) - { /* short at front */ - pda_p->numSector = suoff; - pda_p->raidAddress = sosAddr + (i*secPerSU); - (raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); + if (asmap->numDataFailed == 1) { + PDAPerDisk = 1; + state = 1; + RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); + pda_p = *pqpdap; + /* build p */ + CONS_PDA(parityInfo, fone_start, fone->numSector); + pda_p->type = RF_PDA_TYPE_PARITY; pda_p++; - } - break; - case 3: /* two nonoverlapping failures */ - if ((suoff > fone_start) || (suend <fone_end)) - { - if (suoff > fone_start) - { - RF_ASSERT( suend >= fone_end ); - /* The data read starts after the mapped access, - snip off the begining */ - pda_p->numSector = suoff - fone_start; - pda_p->raidAddress = sosAddr + (i*secPerSU) + fone_start; - (raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); - pda_p++; + /* build q */ + CONS_PDA(qInfo, fone_start, fone->numSector); + pda_p->type = RF_PDA_TYPE_Q; + } else { + ftwo_start = rf_StripeUnitOffset(layoutPtr, ftwo->startSector); + ftwo_end = ftwo_start + ftwo->numSector; + if (fone->numSector + ftwo->numSector > secPerSU) { + PDAPerDisk = 1; + state = 2; + RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); + pda_p = *pqpdap; + CONS_PDA(parityInfo, 0, secPerSU); + pda_p->type = RF_PDA_TYPE_PARITY; + pda_p++; + CONS_PDA(qInfo, 0, secPerSU); + pda_p->type = RF_PDA_TYPE_Q; + } else { + PDAPerDisk = 2; + state = 3; + /* four of them, fone, then ftwo */ + RF_MallocAndAdd(*pqpdap, 4 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); + pda_p = *pqpdap; + CONS_PDA(parityInfo, fone_start, fone->numSector); + pda_p->type = RF_PDA_TYPE_PARITY; + pda_p++; + CONS_PDA(qInfo, fone_start, fone->numSector); + pda_p->type = RF_PDA_TYPE_Q; + pda_p++; + CONS_PDA(parityInfo, ftwo_start, ftwo->numSector); + pda_p->type = RF_PDA_TYPE_PARITY; + pda_p++; + CONS_PDA(qInfo, ftwo_start, ftwo->numSector); + pda_p->type = RF_PDA_TYPE_Q; } - if (suend < fone_end) - { - RF_ASSERT ( suoff <= fone_start); - /* The data read stops before the end of the failed access, extend */ - pda_p->numSector = fone_end - suend; - pda_p->raidAddress = sosAddr + (i*secPerSU) + suend; /* off by one? */ - (raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); - pda_p++; + } + /* figure out number of nonaccessed pda */ + napdas = PDAPerDisk * (numDataCol - asmap->numStripeUnitsAccessed - (ftwo == NULL ? 1 : 0)); + *nPQNodep = PDAPerDisk; + + /* sweep over the over accessed pda's, figuring out the number of + * additional pda's to generate. Of course, skip the failed ones */ + + count = 0; + for (pda_p = asmap->physInfo; pda_p; pda_p = pda_p->next) { + if ((pda_p == fone) || (pda_p == ftwo)) + continue; + suoff = rf_StripeUnitOffset(layoutPtr, pda_p->startSector); + suend = suoff + pda_p->numSector; + switch (state) { + case 1: /* one failed PDA to overlap */ + /* if a PDA doesn't contain the failed unit, it can + * only miss the start or end, not both */ + if ((suoff > fone_start) || (suend < fone_end)) + count++; + break; + case 2: /* whole stripe */ + if (suoff) /* leak at begining */ + count++; + if (suend < numDataCol) /* leak at end */ + count++; + break; + case 3: /* two disjoint units */ + if ((suoff > fone_start) || (suend < fone_end)) + count++; + if ((suoff > ftwo_start) || (suend < ftwo_end)) + count++; + break; + default: + RF_PANIC(); } - } - if ((suoff > ftwo_start) || (suend <ftwo_end)) - { - if (suoff > ftwo_start) - { - RF_ASSERT( suend >= ftwo_end ); - /* The data read starts after the mapped access, - snip off the begining */ - pda_p->numSector = suoff - ftwo_start; - pda_p->raidAddress = sosAddr + (i*secPerSU) + ftwo_start; - (raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); - pda_p++; + } + + napdas += count; + *nNodep = napdas; + if (napdas == 0) + return; /* short circuit */ + + /* allocate up our list of pda's */ + + RF_CallocAndAdd(pda_p, napdas, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); + *pdap = pda_p; + + /* linkem together */ + for (i = 0; i < (napdas - 1); i++) + pda_p[i].next = pda_p + (i + 1); + + /* march through the one's up to the first accessed disk */ + firstDataCol = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), asmap->physInfo->raidAddress) % numDataCol; + sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); + for (i = 0; i < firstDataCol; i++) { + if ((pda_p - (*pdap)) == napdas) + continue; + pda_p->type = RF_PDA_TYPE_DATA; + pda_p->raidAddress = sosAddr + (i * secPerSU); + (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); + /* skip over dead disks */ + if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status)) + continue; + switch (state) { + case 1: /* fone */ + pda_p->numSector = fone->numSector; + pda_p->raidAddress += fone_start; + pda_p->startSector += fone_start; + RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + break; + case 2: /* full stripe */ + pda_p->numSector = secPerSU; + RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, secPerSU), (char *), allocList); + break; + case 3: /* two slabs */ + pda_p->numSector = fone->numSector; + pda_p->raidAddress += fone_start; + pda_p->startSector += fone_start; + RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + pda_p++; + pda_p->type = RF_PDA_TYPE_DATA; + pda_p->raidAddress = sosAddr + (i * secPerSU); + (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); + pda_p->numSector = ftwo->numSector; + pda_p->raidAddress += ftwo_start; + pda_p->startSector += ftwo_start; + RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + break; + default: + RF_PANIC(); } - if (suend < ftwo_end) - { - RF_ASSERT ( suoff <= ftwo_start); - /* The data read stops before the end of the failed access, extend */ - pda_p->numSector = ftwo_end - suend; - pda_p->raidAddress = sosAddr + (i*secPerSU) + suend; /* off by one? */ - (raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); - pda_p++; + pda_p++; + } + + /* march through the touched stripe units */ + for (phys_p = asmap->physInfo; phys_p; phys_p = phys_p->next, i++) { + if ((phys_p == asmap->failedPDAs[0]) || (phys_p == asmap->failedPDAs[1])) + continue; + suoff = rf_StripeUnitOffset(layoutPtr, phys_p->startSector); + suend = suoff + phys_p->numSector; + switch (state) { + case 1: /* single buffer */ + if (suoff > fone_start) { + RF_ASSERT(suend >= fone_end); + /* The data read starts after the mapped + * access, snip off the begining */ + pda_p->numSector = suoff - fone_start; + pda_p->raidAddress = sosAddr + (i * secPerSU) + fone_start; + (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); + RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + pda_p++; + } + if (suend < fone_end) { + RF_ASSERT(suoff <= fone_start); + /* The data read stops before the end of the + * failed access, extend */ + pda_p->numSector = fone_end - suend; + pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ + (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); + RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + pda_p++; + } + break; + case 2: /* whole stripe unit */ + RF_ASSERT((suoff == 0) || (suend == secPerSU)); + if (suend < secPerSU) { /* short read, snip from end + * on */ + pda_p->numSector = secPerSU - suend; + pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ + (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); + RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + pda_p++; + } else + if (suoff > 0) { /* short at front */ + pda_p->numSector = suoff; + pda_p->raidAddress = sosAddr + (i * secPerSU); + (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); + RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + pda_p++; + } + break; + case 3: /* two nonoverlapping failures */ + if ((suoff > fone_start) || (suend < fone_end)) { + if (suoff > fone_start) { + RF_ASSERT(suend >= fone_end); + /* The data read starts after the + * mapped access, snip off the + * begining */ + pda_p->numSector = suoff - fone_start; + pda_p->raidAddress = sosAddr + (i * secPerSU) + fone_start; + (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); + RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + pda_p++; + } + if (suend < fone_end) { + RF_ASSERT(suoff <= fone_start); + /* The data read stops before the end + * of the failed access, extend */ + pda_p->numSector = fone_end - suend; + pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ + (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); + RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + pda_p++; + } + } + if ((suoff > ftwo_start) || (suend < ftwo_end)) { + if (suoff > ftwo_start) { + RF_ASSERT(suend >= ftwo_end); + /* The data read starts after the + * mapped access, snip off the + * begining */ + pda_p->numSector = suoff - ftwo_start; + pda_p->raidAddress = sosAddr + (i * secPerSU) + ftwo_start; + (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); + RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + pda_p++; + } + if (suend < ftwo_end) { + RF_ASSERT(suoff <= ftwo_start); + /* The data read stops before the end + * of the failed access, extend */ + pda_p->numSector = ftwo_end - suend; + pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ + (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); + RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + pda_p++; + } + } + break; + default: + RF_PANIC(); } - } - break; - default: - RF_PANIC(); - } - } - - /* after the last accessed disk */ - for (; i < numDataCol; i++ ) - { - if ((pda_p - (*pdap)) == napdas) - continue; - pda_p->type = RF_PDA_TYPE_DATA; - pda_p->raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - /* skip over dead disks */ - if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status)) - continue; - switch (state) - { - case 1: /* fone */ - pda_p->numSector = fone->numSector; - pda_p->raidAddress += fone_start; - pda_p->startSector += fone_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); - break; - case 2: /* full stripe */ - pda_p->numSector = secPerSU; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,secPerSU), (char *), allocList); - break; - case 3: /* two slabs */ - pda_p->numSector = fone->numSector; - pda_p->raidAddress += fone_start; - pda_p->startSector += fone_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); - pda_p++; - pda_p->type = RF_PDA_TYPE_DATA; - pda_p->raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - pda_p->numSector = ftwo->numSector; - pda_p->raidAddress += ftwo_start; - pda_p->startSector += ftwo_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); - break; - default: - RF_PANIC(); } - pda_p++; - } - RF_ASSERT (pda_p - *pdap == napdas); - return; -} + /* after the last accessed disk */ + for (; i < numDataCol; i++) { + if ((pda_p - (*pdap)) == napdas) + continue; + pda_p->type = RF_PDA_TYPE_DATA; + pda_p->raidAddress = sosAddr + (i * secPerSU); + (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); + /* skip over dead disks */ + if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status)) + continue; + switch (state) { + case 1: /* fone */ + pda_p->numSector = fone->numSector; + pda_p->raidAddress += fone_start; + pda_p->startSector += fone_start; + RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + break; + case 2: /* full stripe */ + pda_p->numSector = secPerSU; + RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, secPerSU), (char *), allocList); + break; + case 3: /* two slabs */ + pda_p->numSector = fone->numSector; + pda_p->raidAddress += fone_start; + pda_p->startSector += fone_start; + RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + pda_p++; + pda_p->type = RF_PDA_TYPE_DATA; + pda_p->raidAddress = sosAddr + (i * secPerSU); + (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); + pda_p->numSector = ftwo->numSector; + pda_p->raidAddress += ftwo_start; + pda_p->startSector += ftwo_start; + RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + break; + default: + RF_PANIC(); + } + pda_p++; + } + RF_ASSERT(pda_p - *pdap == napdas); + return; +} #define INIT_DISK_NODE(node,name) \ rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 2,1,4,0, dag_h, name, allocList); \ (node)->succedents[0] = unblockNode; \ @@ -1075,138 +982,145 @@ rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_Ge (_node_).params[2].v = parityStripeID; \ (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru) -void rf_DoubleDegRead( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList, - char *redundantReadNodeName, - char *recoveryNodeName, - int (*recovFunc)(RF_DagNode_t *)) +void +rf_DoubleDegRead( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList, + char *redundantReadNodeName, + char *recoveryNodeName, + int (*recovFunc) (RF_DagNode_t *)) { - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *recoveryNode, *blockNode, *unblockNode, *rpNodes, *rqNodes, *termNode; - RF_PhysDiskAddr_t *pda, *pqPDAs; - RF_PhysDiskAddr_t *npdas; - int nNodes, nRrdNodes, nRudNodes, i; - RF_ReconUnitNum_t which_ru; - int nReadNodes, nPQNodes; - RF_PhysDiskAddr_t *failedPDA = asmap->failedPDAs[0]; - RF_PhysDiskAddr_t *failedPDAtwo = asmap->failedPDAs[1]; - RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, &which_ru); - - if (rf_dagDebug) printf("[Creating Double Degraded Read DAG]\n"); - rf_DD_GenerateFailedAccessASMs(raidPtr, asmap, &npdas, &nRrdNodes, &pqPDAs, &nPQNodes,allocList); - - nRudNodes = asmap->numStripeUnitsAccessed - (asmap->numDataFailed); - nReadNodes = nRrdNodes + nRudNodes + 2*nPQNodes; - nNodes = 4 /* block, unblock, recovery, term */ + nReadNodes; - - RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - i = 0; - blockNode = &nodes[i]; i += 1; - unblockNode = &nodes[i]; i += 1; - recoveryNode = &nodes[i]; i += 1; - termNode = &nodes[i]; i += 1; - rudNodes = &nodes[i]; i += nRudNodes; - rrdNodes = &nodes[i]; i += nRrdNodes; - rpNodes = &nodes[i]; i += nPQNodes; - rqNodes = &nodes[i]; i += nPQNodes; - RF_ASSERT(i == nNodes); - - dag_h->numSuccedents = 1; - dag_h->succedents[0] = blockNode; - dag_h->creator = "DoubleDegRead"; - dag_h->numCommits = 0; - dag_h->numCommitNodes = 1; /*unblock */ - - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 2, 0, 0, dag_h, "Trm", allocList); - termNode->antecedents[0] = unblockNode; - termNode->antType[0] = rf_control; - termNode->antecedents[1] = recoveryNode; - termNode->antType[1] = rf_control; - - /* init the block and unblock nodes */ - /* The block node has all nodes except itself, unblock and recovery as successors. Similarly for - predecessors of the unblock. */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nReadNodes, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nReadNodes, 0, 0, dag_h, "Nil", allocList); - - for (i=0; i < nReadNodes; i++) - { - blockNode->succedents[i] = rudNodes+i; - unblockNode->antecedents[i] = rudNodes+i; - unblockNode->antType[i] = rf_control; - } - unblockNode->succedents[0] = termNode; - - /* The recovery node has all the reads as predecessors, and the term node as successors. It gets a pda as a param - from each of the read nodes plus the raidPtr. - For each failed unit is has a result pda. */ - rf_InitNode(recoveryNode, rf_wait, RF_FALSE, recovFunc, rf_NullNodeUndoFunc, NULL, - 1, /* succesors */ - nReadNodes, /* preds */ - nReadNodes+2, /* params */ - asmap->numDataFailed, /* results */ - dag_h, recoveryNodeName, allocList); - - recoveryNode->succedents[0] = termNode; - for (i=0; i < nReadNodes; i++) { - recoveryNode->antecedents[i] = rudNodes+i; - recoveryNode->antType[i] = rf_trueData; - } - - /* build the read nodes, then come back and fill in recovery params and results */ - pda = asmap->physInfo; - for (i=0; i < nRudNodes; pda = pda->next) - { - if ((pda == failedPDA) || (pda == failedPDAtwo)) - continue; - INIT_DISK_NODE(rudNodes+i,"Rud"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rudNodes[i],pda); - i++; - } - - pda = npdas; - for (i=0; i < nRrdNodes; i++, pda = pda->next) - { - INIT_DISK_NODE(rrdNodes+i,"Rrd"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rrdNodes[i],pda); - } - - /* redundancy pdas */ - pda = pqPDAs; - INIT_DISK_NODE(rpNodes,"Rp"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rpNodes[0],pda); - pda++; - INIT_DISK_NODE(rqNodes,redundantReadNodeName ); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rqNodes[0],pda); - if (nPQNodes==2) - { - pda++; - INIT_DISK_NODE(rpNodes+1,"Rp"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rpNodes[1],pda); - pda++; - INIT_DISK_NODE( rqNodes+1,redundantReadNodeName ); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rqNodes[1],pda); - } - - /* fill in recovery node params */ - for (i=0; i < nReadNodes; i++) - recoveryNode->params[i] = rudNodes[i].params[0]; /* pda */ - recoveryNode->params[i++].p = (void *) raidPtr; - recoveryNode->params[i++].p = (void *) asmap; - recoveryNode->results[0] = failedPDA; - if (asmap->numDataFailed ==2 ) - recoveryNode->results[1] = failedPDAtwo; - - /* zero fill the target data buffers? */ + RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); + RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *recoveryNode, *blockNode, + *unblockNode, *rpNodes, *rqNodes, *termNode; + RF_PhysDiskAddr_t *pda, *pqPDAs; + RF_PhysDiskAddr_t *npdas; + int nNodes, nRrdNodes, nRudNodes, i; + RF_ReconUnitNum_t which_ru; + int nReadNodes, nPQNodes; + RF_PhysDiskAddr_t *failedPDA = asmap->failedPDAs[0]; + RF_PhysDiskAddr_t *failedPDAtwo = asmap->failedPDAs[1]; + RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, &which_ru); + + if (rf_dagDebug) + printf("[Creating Double Degraded Read DAG]\n"); + rf_DD_GenerateFailedAccessASMs(raidPtr, asmap, &npdas, &nRrdNodes, &pqPDAs, &nPQNodes, allocList); + + nRudNodes = asmap->numStripeUnitsAccessed - (asmap->numDataFailed); + nReadNodes = nRrdNodes + nRudNodes + 2 * nPQNodes; + nNodes = 4 /* block, unblock, recovery, term */ + nReadNodes; + + RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); + i = 0; + blockNode = &nodes[i]; + i += 1; + unblockNode = &nodes[i]; + i += 1; + recoveryNode = &nodes[i]; + i += 1; + termNode = &nodes[i]; + i += 1; + rudNodes = &nodes[i]; + i += nRudNodes; + rrdNodes = &nodes[i]; + i += nRrdNodes; + rpNodes = &nodes[i]; + i += nPQNodes; + rqNodes = &nodes[i]; + i += nPQNodes; + RF_ASSERT(i == nNodes); + + dag_h->numSuccedents = 1; + dag_h->succedents[0] = blockNode; + dag_h->creator = "DoubleDegRead"; + dag_h->numCommits = 0; + dag_h->numCommitNodes = 1; /* unblock */ + + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 2, 0, 0, dag_h, "Trm", allocList); + termNode->antecedents[0] = unblockNode; + termNode->antType[0] = rf_control; + termNode->antecedents[1] = recoveryNode; + termNode->antType[1] = rf_control; + + /* init the block and unblock nodes */ + /* The block node has all nodes except itself, unblock and recovery as + * successors. Similarly for predecessors of the unblock. */ + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nReadNodes, 0, 0, 0, dag_h, "Nil", allocList); + rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nReadNodes, 0, 0, dag_h, "Nil", allocList); + + for (i = 0; i < nReadNodes; i++) { + blockNode->succedents[i] = rudNodes + i; + unblockNode->antecedents[i] = rudNodes + i; + unblockNode->antType[i] = rf_control; + } + unblockNode->succedents[0] = termNode; + + /* The recovery node has all the reads as predecessors, and the term + * node as successors. It gets a pda as a param from each of the read + * nodes plus the raidPtr. For each failed unit is has a result pda. */ + rf_InitNode(recoveryNode, rf_wait, RF_FALSE, recovFunc, rf_NullNodeUndoFunc, NULL, + 1, /* succesors */ + nReadNodes, /* preds */ + nReadNodes + 2, /* params */ + asmap->numDataFailed, /* results */ + dag_h, recoveryNodeName, allocList); + + recoveryNode->succedents[0] = termNode; + for (i = 0; i < nReadNodes; i++) { + recoveryNode->antecedents[i] = rudNodes + i; + recoveryNode->antType[i] = rf_trueData; + } + + /* build the read nodes, then come back and fill in recovery params + * and results */ + pda = asmap->physInfo; + for (i = 0; i < nRudNodes; pda = pda->next) { + if ((pda == failedPDA) || (pda == failedPDAtwo)) + continue; + INIT_DISK_NODE(rudNodes + i, "Rud"); + RF_ASSERT(pda); + DISK_NODE_PARAMS(rudNodes[i], pda); + i++; + } + + pda = npdas; + for (i = 0; i < nRrdNodes; i++, pda = pda->next) { + INIT_DISK_NODE(rrdNodes + i, "Rrd"); + RF_ASSERT(pda); + DISK_NODE_PARAMS(rrdNodes[i], pda); + } + + /* redundancy pdas */ + pda = pqPDAs; + INIT_DISK_NODE(rpNodes, "Rp"); + RF_ASSERT(pda); + DISK_NODE_PARAMS(rpNodes[0], pda); + pda++; + INIT_DISK_NODE(rqNodes, redundantReadNodeName); + RF_ASSERT(pda); + DISK_NODE_PARAMS(rqNodes[0], pda); + if (nPQNodes == 2) { + pda++; + INIT_DISK_NODE(rpNodes + 1, "Rp"); + RF_ASSERT(pda); + DISK_NODE_PARAMS(rpNodes[1], pda); + pda++; + INIT_DISK_NODE(rqNodes + 1, redundantReadNodeName); + RF_ASSERT(pda); + DISK_NODE_PARAMS(rqNodes[1], pda); + } + /* fill in recovery node params */ + for (i = 0; i < nReadNodes; i++) + recoveryNode->params[i] = rudNodes[i].params[0]; /* pda */ + recoveryNode->params[i++].p = (void *) raidPtr; + recoveryNode->params[i++].p = (void *) asmap; + recoveryNode->results[0] = failedPDA; + if (asmap->numDataFailed == 2) + recoveryNode->results[1] = failedPDAtwo; + + /* zero fill the target data buffers? */ } diff --git a/sys/dev/raidframe/rf_dagdegrd.h b/sys/dev/raidframe/rf_dagdegrd.h index 3e0bce1c7ff..8071bbb6bf5 100644 --- a/sys/dev/raidframe/rf_dagdegrd.h +++ b/sys/dev/raidframe/rf_dagdegrd.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_dagdegrd.h,v 1.1 1999/01/11 14:29:07 niklas Exp $ */ -/* $NetBSD: rf_dagdegrd.h,v 1.1 1998/11/13 04:20:27 oster Exp $ */ +/* $OpenBSD: rf_dagdegrd.h,v 1.2 1999/02/16 00:02:29 niklas Exp $ */ +/* $NetBSD: rf_dagdegrd.h,v 1.3 1999/02/05 00:06:07 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,62 +27,38 @@ * rights to redistribute these changes. */ -/* - * : - * Log: rf_dagdegrd.h,v - * Revision 1.6 1996/07/31 16:29:06 jimz - * asm/asmap re-fix (EO merge) - * - * Revision 1.5 1996/07/31 15:34:40 jimz - * evenodd changes; bugfixes for double-degraded archs, generalize - * some formerly PQ-only functions - * - * Revision 1.4 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.3 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.2 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.1 1996/05/03 19:22:06 wvcii - * Initial revision - * - */ - #ifndef _RF__RF_DAGDEGRD_H_ #define _RF__RF_DAGDEGRD_H_ #include "rf_types.h" /* degraded read DAG creation routines */ -void rf_CreateRaidFiveDegradedReadDAG(RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList); -void rf_CreateRaidOneDegradedReadDAG(RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList); -void rf_CreateDegradedReadDAG(RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList, - RF_RedFuncs_t *recFunc); -void rf_CreateRaidCDegradedReadDAG(RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList); -void rf_DD_GenerateFailedAccessASMs(RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, RF_PhysDiskAddr_t **pdap, - int *nNodep, RF_PhysDiskAddr_t **pqpdap, int *nPQNodep, - RF_AllocListElem_t *allocList); -void rf_DoubleDegRead(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList, char *redundantReadNodeName, - char *recoveryNodeName, int (*recovFunc)(RF_DagNode_t *)); +void +rf_CreateRaidFiveDegradedReadDAG(RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, + RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); +void +rf_CreateRaidOneDegradedReadDAG(RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, + RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); +void +rf_CreateDegradedReadDAG(RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, + RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, + RF_RedFuncs_t * recFunc); +void +rf_CreateRaidCDegradedReadDAG(RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, + RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); +void +rf_DD_GenerateFailedAccessASMs(RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, RF_PhysDiskAddr_t ** pdap, + int *nNodep, RF_PhysDiskAddr_t ** pqpdap, int *nPQNodep, + RF_AllocListElem_t * allocList); +void +rf_DoubleDegRead(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList, char *redundantReadNodeName, + char *recoveryNodeName, int (*recovFunc) (RF_DagNode_t *)); -#endif /* !_RF__RF_DAGDEGRD_H_ */ +#endif /* !_RF__RF_DAGDEGRD_H_ */ diff --git a/sys/dev/raidframe/rf_dagdegwr.c b/sys/dev/raidframe/rf_dagdegwr.c index a712dd1e83b..407e69cdac6 100644 --- a/sys/dev/raidframe/rf_dagdegwr.c +++ b/sys/dev/raidframe/rf_dagdegwr.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_dagdegwr.c,v 1.1 1999/01/11 14:29:07 niklas Exp $ */ -/* $NetBSD: rf_dagdegwr.c,v 1.1 1998/11/13 04:20:27 oster Exp $ */ +/* $OpenBSD: rf_dagdegwr.c,v 1.2 1999/02/16 00:02:29 niklas Exp $ */ +/* $NetBSD: rf_dagdegwr.c,v 1.3 1999/02/05 00:06:07 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -32,108 +32,6 @@ * * code for creating degraded write DAGs * - * : - * Log: rf_dagdegwr.c,v - * Revision 1.23 1996/11/05 21:10:40 jimz - * failed pda generalization - * - * Revision 1.22 1996/08/23 14:49:48 jimz - * remove bogus assert from small write double deg DAG generator - * - * Revision 1.21 1996/08/21 05:09:44 jimz - * get rid of bogus fakery in DoubleDegSmallWrite - * - * Revision 1.20 1996/08/21 04:14:35 jimz - * cleanup doubledegsmallwrite - * NOTE: we need doubledeglargewrite - * - * Revision 1.19 1996/08/19 21:39:38 jimz - * CommonCreateSimpleDegradedWriteDAG() was unable to correctly create DAGs for - * complete stripe overwrite accesses- it assumed the necessity to read old - * data. Rather than do the "right" thing, and risk breaking a critical DAG so - * close to release, I made a no-op read node to stick in and link up in this - * case. Seems to work. - * - * Revision 1.18 1996/07/31 15:35:34 jimz - * evenodd changes; bugfixes for double-degraded archs, generalize - * some formerly PQ-only functions - * - * Revision 1.17 1996/07/28 20:31:39 jimz - * i386netbsd port - * true/false fixup - * - * Revision 1.16 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.15 1996/07/27 16:30:19 jimz - * cleanup sweep - * - * Revision 1.14 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.13 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.12 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.11 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.10 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.9 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.8 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.7 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.6 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.5 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.4 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.3 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.2 1996/05/08 21:01:24 jimz - * fixed up enum type names that were conflicting with other - * enums and function names (ie, "panic") - * future naming trends will be towards RF_ and rf_ for - * everything raidframe-related - * - * Revision 1.1 1996/05/03 19:21:50 wvcii - * Initial revision - * */ #include "rf_types.h" @@ -152,7 +50,7 @@ /****************************************************************************** * * General comments on DAG creation: - * + * * All DAGs in this file use roll-away error recovery. Each DAG has a single * commit node, usually called "Cmt." If an error occurs before the Cmt node * is reached, the execution engine will halt forward execution and work @@ -176,37 +74,39 @@ * the DAG creation routines to be replaced at this single point. */ -static RF_CREATE_DAG_FUNC_DECL(rf_CreateSimpleDegradedWriteDAG) +static +RF_CREATE_DAG_FUNC_DECL(rf_CreateSimpleDegradedWriteDAG) { - rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, - flags, allocList,1, rf_RecoveryXorFunc, RF_TRUE); + rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, + flags, allocList, 1, rf_RecoveryXorFunc, RF_TRUE); } -void rf_CreateDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *asmap; - RF_DagHeader_t *dag_h; - void *bp; - RF_RaidAccessFlags_t flags; - RF_AllocListElem_t *allocList; +void +rf_CreateDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList) + RF_Raid_t *raidPtr; + RF_AccessStripeMap_t *asmap; + RF_DagHeader_t *dag_h; + void *bp; + RF_RaidAccessFlags_t flags; + RF_AllocListElem_t *allocList; { - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_PhysDiskAddr_t *failedPDA = asmap->failedPDAs[0]; - - RF_ASSERT( asmap->numDataFailed == 1 ); - dag_h->creator = "DegradedWriteDAG"; - - /* if the access writes only a portion of the failed unit, and also writes - * some portion of at least one surviving unit, we create two DAGs, one for - * the failed component and one for the non-failed component, and do them - * sequentially. Note that the fact that we're accessing only a portion of - * the failed unit indicates that the access either starts or ends in the - * failed unit, and hence we need create only two dags. This is inefficient - * in that the same data or parity can get read and written twice using this - * structure. I need to fix this to do the access all at once. - */ - RF_ASSERT(!(asmap->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit)); - rf_CreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList); + RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); + RF_PhysDiskAddr_t *failedPDA = asmap->failedPDAs[0]; + + RF_ASSERT(asmap->numDataFailed == 1); + dag_h->creator = "DegradedWriteDAG"; + + /* if the access writes only a portion of the failed unit, and also + * writes some portion of at least one surviving unit, we create two + * DAGs, one for the failed component and one for the non-failed + * component, and do them sequentially. Note that the fact that we're + * accessing only a portion of the failed unit indicates that the + * access either starts or ends in the failed unit, and hence we need + * create only two dags. This is inefficient in that the same data or + * parity can get read and written twice using this structure. I need + * to fix this to do the access all at once. */ + RF_ASSERT(!(asmap->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit)); + rf_CreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList); } @@ -250,363 +150,364 @@ void rf_CreateDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList) * is used. *****************************************************************************/ -void rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, - allocList, nfaults, redFunc, allowBufferRecycle) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *asmap; - RF_DagHeader_t *dag_h; - void *bp; - RF_RaidAccessFlags_t flags; - RF_AllocListElem_t *allocList; - int nfaults; - int (*redFunc)(RF_DagNode_t *); - int allowBufferRecycle; +void +rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, + allocList, nfaults, redFunc, allowBufferRecycle) + RF_Raid_t *raidPtr; + RF_AccessStripeMap_t *asmap; + RF_DagHeader_t *dag_h; + void *bp; + RF_RaidAccessFlags_t flags; + RF_AllocListElem_t *allocList; + int nfaults; + int (*redFunc) (RF_DagNode_t *); + int allowBufferRecycle; { - int nNodes, nRrdNodes, nWndNodes, nXorBufs, i, j, paramNum, rdnodesFaked; - RF_DagNode_t *blockNode, *unblockNode, *wnpNode, *wnqNode, *termNode; - RF_DagNode_t *nodes, *wndNodes, *rrdNodes, *xorNode, *commitNode; - RF_SectorCount_t sectorsPerSU; - RF_ReconUnitNum_t which_ru; - char *xorTargetBuf = NULL; /* the target buffer for the XOR operation */ - char *overlappingPDAs; /* a temporary array of flags */ - RF_AccessStripeMapHeader_t *new_asm_h[2]; - RF_PhysDiskAddr_t *pda, *parityPDA; - RF_StripeNum_t parityStripeID; - RF_PhysDiskAddr_t *failedPDA; - RF_RaidLayout_t *layoutPtr; - - layoutPtr = &(raidPtr->Layout); - parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, - &which_ru); - sectorsPerSU = layoutPtr->sectorsPerStripeUnit; - /* failedPDA points to the pda within the asm that targets the failed disk */ - failedPDA = asmap->failedPDAs[0]; - - if (rf_dagDebug) - printf("[Creating degraded-write DAG]\n"); - - RF_ASSERT( asmap->numDataFailed == 1 ); - dag_h->creator = "SimpleDegradedWriteDAG"; - - /* - * Generate two ASMs identifying the surviving data - * we need in order to recover the lost data. - */ - /* overlappingPDAs array must be zero'd */ - RF_Calloc(overlappingPDAs, asmap->numStripeUnitsAccessed, sizeof(char), (char *)); - rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h, new_asm_h, - &nXorBufs, NULL, overlappingPDAs, allocList); - - /* create all the nodes at once */ - nWndNodes = asmap->numStripeUnitsAccessed - 1; /* no access is generated - * for the failed pda */ - - nRrdNodes = ((new_asm_h[0]) ? new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) + - ((new_asm_h[1]) ? new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0); - /* - * XXX - * - * There's a bug with a complete stripe overwrite- that means 0 reads - * of old data, and the rest of the DAG generation code doesn't like - * that. A release is coming, and I don't wanna risk breaking a critical - * DAG generator, so here's what I'm gonna do- if there's no read nodes, - * I'm gonna fake there being a read node, and I'm gonna swap in a - * no-op node in its place (to make all the link-up code happy). - * This should be fixed at some point. --jimz - */ - if (nRrdNodes == 0) { - nRrdNodes = 1; - rdnodesFaked = 1; - } - else { - rdnodesFaked = 0; - } - /* lock, unlock, xor, Wnd, Rrd, W(nfaults) */ - nNodes = 5 + nfaults + nWndNodes + nRrdNodes; - RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), - (RF_DagNode_t *), allocList); - i = 0; - blockNode = &nodes[i]; i += 1; - commitNode = &nodes[i]; i += 1; - unblockNode = &nodes[i]; i += 1; - termNode = &nodes[i]; i += 1; - xorNode = &nodes[i]; i += 1; - wnpNode = &nodes[i]; i += 1; - wndNodes = &nodes[i]; i += nWndNodes; - rrdNodes = &nodes[i]; i += nRrdNodes; - if (nfaults == 2) { - wnqNode = &nodes[i]; i += 1; - } - else { - wnqNode = NULL; - } - RF_ASSERT(i == nNodes); - - /* this dag can not commit until all rrd and xor Nodes have completed */ - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - RF_ASSERT( nRrdNodes > 0 ); - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, nRrdNodes, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, nWndNodes + nfaults, 1, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, nWndNodes + nfaults, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1, - nRrdNodes, 2*nXorBufs+2, nfaults, dag_h, "Xrc", allocList); - - /* - * Fill in the Rrd nodes. If any of the rrd buffers are the same size as - * the failed buffer, save a pointer to it so we can use it as the target - * of the XOR. The pdas in the rrd nodes have been range-restricted, so if - * a buffer is the same size as the failed buffer, it must also be at the - * same alignment within the SU. - */ - i = 0; - if (new_asm_h[0]) { - for (i=0, pda=new_asm_h[0]->stripeMap->physInfo; - i<new_asm_h[0]->stripeMap->numStripeUnitsAccessed; - i++, pda=pda->next) - { - rf_InitNode(&rrdNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rrd", allocList); - RF_ASSERT(pda); - rrdNodes[i].params[0].p = pda; - rrdNodes[i].params[1].p = pda->bufPtr; - rrdNodes[i].params[2].v = parityStripeID; - rrdNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - } - /* i now equals the number of stripe units accessed in new_asm_h[0] */ - if (new_asm_h[1]) { - for (j=0,pda=new_asm_h[1]->stripeMap->physInfo; - j<new_asm_h[1]->stripeMap->numStripeUnitsAccessed; - j++, pda=pda->next) - { - rf_InitNode(&rrdNodes[i+j], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rrd", allocList); - RF_ASSERT(pda); - rrdNodes[i+j].params[0].p = pda; - rrdNodes[i+j].params[1].p = pda->bufPtr; - rrdNodes[i+j].params[2].v = parityStripeID; - rrdNodes[i+j].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - if (allowBufferRecycle && (pda->numSector == failedPDA->numSector)) - xorTargetBuf = pda->bufPtr; - } - } - if (rdnodesFaked) { - /* - * This is where we'll init that fake noop read node - * (XXX should the wakeup func be different?) - */ - rf_InitNode(&rrdNodes[0], rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 1, 0, 0, dag_h, "RrN", allocList); - } - - /* - * Make a PDA for the parity unit. The parity PDA should start at - * the same offset into the SU as the failed PDA. - */ - /* - * Danner comment: - * I don't think this copy is really necessary. - * We are in one of two cases here. - * (1) The entire failed unit is written. Then asmap->parityInfo will - * describe the entire parity. - * (2) We are only writing a subset of the failed unit and nothing - * else. Then the asmap->parityInfo describes the failed unit and - * the copy can also be avoided. - */ - - RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - parityPDA->row = asmap->parityInfo->row; - parityPDA->col = asmap->parityInfo->col; - parityPDA->startSector = ((asmap->parityInfo->startSector / sectorsPerSU) - * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU); - parityPDA->numSector = failedPDA->numSector; - - if (!xorTargetBuf) { - RF_CallocAndAdd(xorTargetBuf, 1, - rf_RaidAddressToByte(raidPtr, failedPDA->numSector), (char *), allocList); - } - - /* init the Wnp node */ - rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnp", allocList); - wnpNode->params[0].p = parityPDA; - wnpNode->params[1].p = xorTargetBuf; - wnpNode->params[2].v = parityStripeID; - wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - - /* fill in the Wnq Node */ - if (nfaults == 2) { - { - RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t), - (RF_PhysDiskAddr_t *), allocList); - parityPDA->row = asmap->qInfo->row; - parityPDA->col = asmap->qInfo->col; - parityPDA->startSector = ((asmap->qInfo->startSector / sectorsPerSU) - * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU); - parityPDA->numSector = failedPDA->numSector; - - rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnq", allocList); - wnqNode->params[0].p = parityPDA; - RF_CallocAndAdd(xorNode->results[1], 1, - rf_RaidAddressToByte(raidPtr, failedPDA->numSector), (char *), allocList); - wnqNode->params[1].p = xorNode->results[1]; - wnqNode->params[2].v = parityStripeID; - wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - } - - /* fill in the Wnd nodes */ - for (pda=asmap->physInfo, i=0; i<nWndNodes; i++, pda=pda->next) { - if (pda == failedPDA) { - i--; - continue; - } - rf_InitNode(&wndNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList); - RF_ASSERT(pda); - wndNodes[i].params[0].p = pda; - wndNodes[i].params[1].p = pda->bufPtr; - wndNodes[i].params[2].v = parityStripeID; - wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - - /* fill in the results of the xor node */ - xorNode->results[0] = xorTargetBuf; - - /* fill in the params of the xor node */ - - paramNum=0; - if (rdnodesFaked == 0) { - for (i=0; i<nRrdNodes; i++) { - /* all the Rrd nodes need to be xored together */ - xorNode->params[paramNum++] = rrdNodes[i].params[0]; - xorNode->params[paramNum++] = rrdNodes[i].params[1]; - } - } - for (i=0; i < nWndNodes; i++) { - /* any Wnd nodes that overlap the failed access need to be xored in */ - if (overlappingPDAs[i]) { - RF_MallocAndAdd(pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - bcopy((char *)wndNodes[i].params[0].p, (char *)pda, sizeof(RF_PhysDiskAddr_t)); - rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_DOBUFFER, 0); - xorNode->params[paramNum++].p = pda; - xorNode->params[paramNum++].p = pda->bufPtr; - } - } - RF_Free(overlappingPDAs, asmap->numStripeUnitsAccessed * sizeof(char)); - - /* - * Install the failed PDA into the xor param list so that the - * new data gets xor'd in. - */ - xorNode->params[paramNum++].p = failedPDA; - xorNode->params[paramNum++].p = failedPDA->bufPtr; - - /* - * The last 2 params to the recovery xor node are always the failed - * PDA and the raidPtr. install the failedPDA even though we have just - * done so above. This allows us to use the same XOR function for both - * degraded reads and degraded writes. - */ - xorNode->params[paramNum++].p = failedPDA; - xorNode->params[paramNum++].p = raidPtr; - RF_ASSERT( paramNum == 2*nXorBufs+2 ); - - /* - * Code to link nodes begins here - */ - - /* link header to block node */ - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - /* link block node to rd nodes */ - RF_ASSERT(blockNode->numSuccedents == nRrdNodes); - for (i = 0; i < nRrdNodes; i++) { - RF_ASSERT(rrdNodes[i].numAntecedents == 1); - blockNode->succedents[i] = &rrdNodes[i]; - rrdNodes[i].antecedents[0] = blockNode; - rrdNodes[i].antType[0] = rf_control; - } - - /* link read nodes to xor node*/ - RF_ASSERT(xorNode->numAntecedents == nRrdNodes); - for (i = 0; i < nRrdNodes; i++) { - RF_ASSERT(rrdNodes[i].numSuccedents == 1); - rrdNodes[i].succedents[0] = xorNode; - xorNode->antecedents[i] = &rrdNodes[i]; - xorNode->antType[i] = rf_trueData; - } - - /* link xor node to commit node */ - RF_ASSERT(xorNode->numSuccedents == 1); - RF_ASSERT(commitNode->numAntecedents == 1); - xorNode->succedents[0] = commitNode; - commitNode->antecedents[0] = xorNode; - commitNode->antType[0] = rf_control; - - /* link commit node to wnd nodes */ - RF_ASSERT(commitNode->numSuccedents == nfaults + nWndNodes); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNodes[i].numAntecedents == 1); - commitNode->succedents[i] = &wndNodes[i]; - wndNodes[i].antecedents[0] = commitNode; - wndNodes[i].antType[0] = rf_control; - } - - /* link the commit node to wnp, wnq nodes */ - RF_ASSERT(wnpNode->numAntecedents == 1); - commitNode->succedents[nWndNodes] = wnpNode; - wnpNode->antecedents[0] = commitNode; - wnpNode->antType[0] = rf_control; - if (nfaults == 2) { - RF_ASSERT(wnqNode->numAntecedents == 1); - commitNode->succedents[nWndNodes + 1] = wnqNode; - wnqNode->antecedents[0] = commitNode; - wnqNode->antType[0] = rf_control; - } - - /* link write new data nodes to unblock node */ - RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nfaults)); - for(i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNodes[i].numSuccedents == 1); - wndNodes[i].succedents[0] = unblockNode; - unblockNode->antecedents[i] = &wndNodes[i]; - unblockNode->antType[i] = rf_control; - } - - /* link write new parity node to unblock node */ - RF_ASSERT(wnpNode->numSuccedents == 1); - wnpNode->succedents[0] = unblockNode; - unblockNode->antecedents[nWndNodes] = wnpNode; - unblockNode->antType[nWndNodes] = rf_control; - - /* link write new q node to unblock node */ - if (nfaults == 2) { - RF_ASSERT(wnqNode->numSuccedents == 1); - wnqNode->succedents[0] = unblockNode; - unblockNode->antecedents[nWndNodes+1] = wnqNode; - unblockNode->antType[nWndNodes+1] = rf_control; - } - - /* link unblock node to term node */ - RF_ASSERT(unblockNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - unblockNode->succedents[0] = termNode; - termNode->antecedents[0] = unblockNode; - termNode->antType[0] = rf_control; -} + int nNodes, nRrdNodes, nWndNodes, nXorBufs, i, j, paramNum, + rdnodesFaked; + RF_DagNode_t *blockNode, *unblockNode, *wnpNode, *wnqNode, *termNode; + RF_DagNode_t *nodes, *wndNodes, *rrdNodes, *xorNode, *commitNode; + RF_SectorCount_t sectorsPerSU; + RF_ReconUnitNum_t which_ru; + char *xorTargetBuf = NULL; /* the target buffer for the XOR + * operation */ + char *overlappingPDAs;/* a temporary array of flags */ + RF_AccessStripeMapHeader_t *new_asm_h[2]; + RF_PhysDiskAddr_t *pda, *parityPDA; + RF_StripeNum_t parityStripeID; + RF_PhysDiskAddr_t *failedPDA; + RF_RaidLayout_t *layoutPtr; + + layoutPtr = &(raidPtr->Layout); + parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, + &which_ru); + sectorsPerSU = layoutPtr->sectorsPerStripeUnit; + /* failedPDA points to the pda within the asm that targets the failed + * disk */ + failedPDA = asmap->failedPDAs[0]; + + if (rf_dagDebug) + printf("[Creating degraded-write DAG]\n"); + + RF_ASSERT(asmap->numDataFailed == 1); + dag_h->creator = "SimpleDegradedWriteDAG"; + + /* + * Generate two ASMs identifying the surviving data + * we need in order to recover the lost data. + */ + /* overlappingPDAs array must be zero'd */ + RF_Calloc(overlappingPDAs, asmap->numStripeUnitsAccessed, sizeof(char), (char *)); + rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h, new_asm_h, + &nXorBufs, NULL, overlappingPDAs, allocList); + + /* create all the nodes at once */ + nWndNodes = asmap->numStripeUnitsAccessed - 1; /* no access is + * generated for the + * failed pda */ + + nRrdNodes = ((new_asm_h[0]) ? new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) + + ((new_asm_h[1]) ? new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0); + /* + * XXX + * + * There's a bug with a complete stripe overwrite- that means 0 reads + * of old data, and the rest of the DAG generation code doesn't like + * that. A release is coming, and I don't wanna risk breaking a critical + * DAG generator, so here's what I'm gonna do- if there's no read nodes, + * I'm gonna fake there being a read node, and I'm gonna swap in a + * no-op node in its place (to make all the link-up code happy). + * This should be fixed at some point. --jimz + */ + if (nRrdNodes == 0) { + nRrdNodes = 1; + rdnodesFaked = 1; + } else { + rdnodesFaked = 0; + } + /* lock, unlock, xor, Wnd, Rrd, W(nfaults) */ + nNodes = 5 + nfaults + nWndNodes + nRrdNodes; + RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), + (RF_DagNode_t *), allocList); + i = 0; + blockNode = &nodes[i]; + i += 1; + commitNode = &nodes[i]; + i += 1; + unblockNode = &nodes[i]; + i += 1; + termNode = &nodes[i]; + i += 1; + xorNode = &nodes[i]; + i += 1; + wnpNode = &nodes[i]; + i += 1; + wndNodes = &nodes[i]; + i += nWndNodes; + rrdNodes = &nodes[i]; + i += nRrdNodes; + if (nfaults == 2) { + wnqNode = &nodes[i]; + i += 1; + } else { + wnqNode = NULL; + } + RF_ASSERT(i == nNodes); + + /* this dag can not commit until all rrd and xor Nodes have completed */ + dag_h->numCommitNodes = 1; + dag_h->numCommits = 0; + dag_h->numSuccedents = 1; + + RF_ASSERT(nRrdNodes > 0); + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, + NULL, nRrdNodes, 0, 0, 0, dag_h, "Nil", allocList); + rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, + NULL, nWndNodes + nfaults, 1, 0, 0, dag_h, "Cmt", allocList); + rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, + NULL, 1, nWndNodes + nfaults, 0, 0, dag_h, "Nil", allocList); + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, + NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); + rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1, + nRrdNodes, 2 * nXorBufs + 2, nfaults, dag_h, "Xrc", allocList); + + /* + * Fill in the Rrd nodes. If any of the rrd buffers are the same size as + * the failed buffer, save a pointer to it so we can use it as the target + * of the XOR. The pdas in the rrd nodes have been range-restricted, so if + * a buffer is the same size as the failed buffer, it must also be at the + * same alignment within the SU. + */ + i = 0; + if (new_asm_h[0]) { + for (i = 0, pda = new_asm_h[0]->stripeMap->physInfo; + i < new_asm_h[0]->stripeMap->numStripeUnitsAccessed; + i++, pda = pda->next) { + rf_InitNode(&rrdNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rrd", allocList); + RF_ASSERT(pda); + rrdNodes[i].params[0].p = pda; + rrdNodes[i].params[1].p = pda->bufPtr; + rrdNodes[i].params[2].v = parityStripeID; + rrdNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + } + } + /* i now equals the number of stripe units accessed in new_asm_h[0] */ + if (new_asm_h[1]) { + for (j = 0, pda = new_asm_h[1]->stripeMap->physInfo; + j < new_asm_h[1]->stripeMap->numStripeUnitsAccessed; + j++, pda = pda->next) { + rf_InitNode(&rrdNodes[i + j], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rrd", allocList); + RF_ASSERT(pda); + rrdNodes[i + j].params[0].p = pda; + rrdNodes[i + j].params[1].p = pda->bufPtr; + rrdNodes[i + j].params[2].v = parityStripeID; + rrdNodes[i + j].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + if (allowBufferRecycle && (pda->numSector == failedPDA->numSector)) + xorTargetBuf = pda->bufPtr; + } + } + if (rdnodesFaked) { + /* + * This is where we'll init that fake noop read node + * (XXX should the wakeup func be different?) + */ + rf_InitNode(&rrdNodes[0], rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, + NULL, 1, 1, 0, 0, dag_h, "RrN", allocList); + } + /* + * Make a PDA for the parity unit. The parity PDA should start at + * the same offset into the SU as the failed PDA. + */ + /* Danner comment: I don't think this copy is really necessary. We are + * in one of two cases here. (1) The entire failed unit is written. + * Then asmap->parityInfo will describe the entire parity. (2) We are + * only writing a subset of the failed unit and nothing else. Then the + * asmap->parityInfo describes the failed unit and the copy can also + * be avoided. */ + + RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); + parityPDA->row = asmap->parityInfo->row; + parityPDA->col = asmap->parityInfo->col; + parityPDA->startSector = ((asmap->parityInfo->startSector / sectorsPerSU) + * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU); + parityPDA->numSector = failedPDA->numSector; + + if (!xorTargetBuf) { + RF_CallocAndAdd(xorTargetBuf, 1, + rf_RaidAddressToByte(raidPtr, failedPDA->numSector), (char *), allocList); + } + /* init the Wnp node */ + rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnp", allocList); + wnpNode->params[0].p = parityPDA; + wnpNode->params[1].p = xorTargetBuf; + wnpNode->params[2].v = parityStripeID; + wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + + /* fill in the Wnq Node */ + if (nfaults == 2) { + { + RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t), + (RF_PhysDiskAddr_t *), allocList); + parityPDA->row = asmap->qInfo->row; + parityPDA->col = asmap->qInfo->col; + parityPDA->startSector = ((asmap->qInfo->startSector / sectorsPerSU) + * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU); + parityPDA->numSector = failedPDA->numSector; + + rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnq", allocList); + wnqNode->params[0].p = parityPDA; + RF_CallocAndAdd(xorNode->results[1], 1, + rf_RaidAddressToByte(raidPtr, failedPDA->numSector), (char *), allocList); + wnqNode->params[1].p = xorNode->results[1]; + wnqNode->params[2].v = parityStripeID; + wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + } + } + /* fill in the Wnd nodes */ + for (pda = asmap->physInfo, i = 0; i < nWndNodes; i++, pda = pda->next) { + if (pda == failedPDA) { + i--; + continue; + } + rf_InitNode(&wndNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList); + RF_ASSERT(pda); + wndNodes[i].params[0].p = pda; + wndNodes[i].params[1].p = pda->bufPtr; + wndNodes[i].params[2].v = parityStripeID; + wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + } + /* fill in the results of the xor node */ + xorNode->results[0] = xorTargetBuf; + + /* fill in the params of the xor node */ + + paramNum = 0; + if (rdnodesFaked == 0) { + for (i = 0; i < nRrdNodes; i++) { + /* all the Rrd nodes need to be xored together */ + xorNode->params[paramNum++] = rrdNodes[i].params[0]; + xorNode->params[paramNum++] = rrdNodes[i].params[1]; + } + } + for (i = 0; i < nWndNodes; i++) { + /* any Wnd nodes that overlap the failed access need to be + * xored in */ + if (overlappingPDAs[i]) { + RF_MallocAndAdd(pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); + bcopy((char *) wndNodes[i].params[0].p, (char *) pda, sizeof(RF_PhysDiskAddr_t)); + rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_DOBUFFER, 0); + xorNode->params[paramNum++].p = pda; + xorNode->params[paramNum++].p = pda->bufPtr; + } + } + RF_Free(overlappingPDAs, asmap->numStripeUnitsAccessed * sizeof(char)); + + /* + * Install the failed PDA into the xor param list so that the + * new data gets xor'd in. + */ + xorNode->params[paramNum++].p = failedPDA; + xorNode->params[paramNum++].p = failedPDA->bufPtr; + + /* + * The last 2 params to the recovery xor node are always the failed + * PDA and the raidPtr. install the failedPDA even though we have just + * done so above. This allows us to use the same XOR function for both + * degraded reads and degraded writes. + */ + xorNode->params[paramNum++].p = failedPDA; + xorNode->params[paramNum++].p = raidPtr; + RF_ASSERT(paramNum == 2 * nXorBufs + 2); + + /* + * Code to link nodes begins here + */ + + /* link header to block node */ + RF_ASSERT(blockNode->numAntecedents == 0); + dag_h->succedents[0] = blockNode; + + /* link block node to rd nodes */ + RF_ASSERT(blockNode->numSuccedents == nRrdNodes); + for (i = 0; i < nRrdNodes; i++) { + RF_ASSERT(rrdNodes[i].numAntecedents == 1); + blockNode->succedents[i] = &rrdNodes[i]; + rrdNodes[i].antecedents[0] = blockNode; + rrdNodes[i].antType[0] = rf_control; + } + + /* link read nodes to xor node */ + RF_ASSERT(xorNode->numAntecedents == nRrdNodes); + for (i = 0; i < nRrdNodes; i++) { + RF_ASSERT(rrdNodes[i].numSuccedents == 1); + rrdNodes[i].succedents[0] = xorNode; + xorNode->antecedents[i] = &rrdNodes[i]; + xorNode->antType[i] = rf_trueData; + } + + /* link xor node to commit node */ + RF_ASSERT(xorNode->numSuccedents == 1); + RF_ASSERT(commitNode->numAntecedents == 1); + xorNode->succedents[0] = commitNode; + commitNode->antecedents[0] = xorNode; + commitNode->antType[0] = rf_control; + + /* link commit node to wnd nodes */ + RF_ASSERT(commitNode->numSuccedents == nfaults + nWndNodes); + for (i = 0; i < nWndNodes; i++) { + RF_ASSERT(wndNodes[i].numAntecedents == 1); + commitNode->succedents[i] = &wndNodes[i]; + wndNodes[i].antecedents[0] = commitNode; + wndNodes[i].antType[0] = rf_control; + } + + /* link the commit node to wnp, wnq nodes */ + RF_ASSERT(wnpNode->numAntecedents == 1); + commitNode->succedents[nWndNodes] = wnpNode; + wnpNode->antecedents[0] = commitNode; + wnpNode->antType[0] = rf_control; + if (nfaults == 2) { + RF_ASSERT(wnqNode->numAntecedents == 1); + commitNode->succedents[nWndNodes + 1] = wnqNode; + wnqNode->antecedents[0] = commitNode; + wnqNode->antType[0] = rf_control; + } + /* link write new data nodes to unblock node */ + RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nfaults)); + for (i = 0; i < nWndNodes; i++) { + RF_ASSERT(wndNodes[i].numSuccedents == 1); + wndNodes[i].succedents[0] = unblockNode; + unblockNode->antecedents[i] = &wndNodes[i]; + unblockNode->antType[i] = rf_control; + } + + /* link write new parity node to unblock node */ + RF_ASSERT(wnpNode->numSuccedents == 1); + wnpNode->succedents[0] = unblockNode; + unblockNode->antecedents[nWndNodes] = wnpNode; + unblockNode->antType[nWndNodes] = rf_control; + + /* link write new q node to unblock node */ + if (nfaults == 2) { + RF_ASSERT(wnqNode->numSuccedents == 1); + wnqNode->succedents[0] = unblockNode; + unblockNode->antecedents[nWndNodes + 1] = wnqNode; + unblockNode->antType[nWndNodes + 1] = rf_control; + } + /* link unblock node to term node */ + RF_ASSERT(unblockNode->numSuccedents == 1); + RF_ASSERT(termNode->numAntecedents == 1); + RF_ASSERT(termNode->numSuccedents == 0); + unblockNode->succedents[0] = termNode; + termNode->antecedents[0] = unblockNode; + termNode->antType[0] = rf_control; +} #define CONS_PDA(if,start,num) \ pda_p->row = asmap->if->row; pda_p->col = asmap->if->col; \ pda_p->startSector = ((asmap->if->startSector / secPerSU) * secPerSU) + start; \ @@ -614,146 +515,139 @@ void rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, pda_p->next = NULL; \ RF_MallocAndAdd(pda_p->bufPtr,rf_RaidAddressToByte(raidPtr,num),(char *), allocList) -void rf_WriteGenerateFailedAccessASMs( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_PhysDiskAddr_t **pdap, - int *nNodep, - RF_PhysDiskAddr_t **pqpdap, - int *nPQNodep, - RF_AllocListElem_t *allocList) +void +rf_WriteGenerateFailedAccessASMs( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_PhysDiskAddr_t ** pdap, + int *nNodep, + RF_PhysDiskAddr_t ** pqpdap, + int *nPQNodep, + RF_AllocListElem_t * allocList) { - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - int PDAPerDisk,i; - RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; - int numDataCol = layoutPtr->numDataCol; - int state; - unsigned napdas; - RF_SectorNum_t fone_start, fone_end, ftwo_start = 0, ftwo_end; - RF_PhysDiskAddr_t *fone = asmap->failedPDAs[0], *ftwo = asmap->failedPDAs[1]; - RF_PhysDiskAddr_t *pda_p; - RF_RaidAddr_t sosAddr; - - /* determine how many pda's we will have to generate per unaccess stripe. - If there is only one failed data unit, it is one; if two, possibly two, - depending wether they overlap. */ - - fone_start = rf_StripeUnitOffset(layoutPtr,fone->startSector); - fone_end = fone_start + fone->numSector; - - if (asmap->numDataFailed==1) - { - PDAPerDisk = 1; - state = 1; - RF_MallocAndAdd(*pqpdap,2*sizeof(RF_PhysDiskAddr_t),(RF_PhysDiskAddr_t *), allocList); - pda_p = *pqpdap; - /* build p */ - CONS_PDA(parityInfo,fone_start,fone->numSector); - pda_p->type = RF_PDA_TYPE_PARITY; - pda_p++; - /* build q */ - CONS_PDA(qInfo,fone_start,fone->numSector); - pda_p->type = RF_PDA_TYPE_Q; - } - else - { - ftwo_start = rf_StripeUnitOffset(layoutPtr,ftwo->startSector); - ftwo_end = ftwo_start + ftwo->numSector; - if (fone->numSector + ftwo->numSector > secPerSU) - { - PDAPerDisk = 1; - state = 2; - RF_MallocAndAdd(*pqpdap,2*sizeof(RF_PhysDiskAddr_t),(RF_PhysDiskAddr_t *), allocList); - pda_p = *pqpdap; - CONS_PDA(parityInfo,0,secPerSU); - pda_p->type = RF_PDA_TYPE_PARITY; - pda_p++; - CONS_PDA(qInfo,0,secPerSU); - pda_p->type = RF_PDA_TYPE_Q; - } - else - { - PDAPerDisk = 2; - state = 3; - /* four of them, fone, then ftwo */ - RF_MallocAndAdd(*pqpdap,4*sizeof(RF_PhysDiskAddr_t),(RF_PhysDiskAddr_t *), allocList); - pda_p = *pqpdap; - CONS_PDA(parityInfo,fone_start,fone->numSector); - pda_p->type = RF_PDA_TYPE_PARITY; - pda_p++; - CONS_PDA(qInfo,fone_start,fone->numSector); - pda_p->type = RF_PDA_TYPE_Q; - pda_p++; - CONS_PDA(parityInfo,ftwo_start,ftwo->numSector); - pda_p->type = RF_PDA_TYPE_PARITY; - pda_p++; - CONS_PDA(qInfo,ftwo_start,ftwo->numSector); - pda_p->type = RF_PDA_TYPE_Q; + RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); + int PDAPerDisk, i; + RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; + int numDataCol = layoutPtr->numDataCol; + int state; + unsigned napdas; + RF_SectorNum_t fone_start, fone_end, ftwo_start = 0, ftwo_end; + RF_PhysDiskAddr_t *fone = asmap->failedPDAs[0], *ftwo = asmap->failedPDAs[1]; + RF_PhysDiskAddr_t *pda_p; + RF_RaidAddr_t sosAddr; + + /* determine how many pda's we will have to generate per unaccess + * stripe. If there is only one failed data unit, it is one; if two, + * possibly two, depending wether they overlap. */ + + fone_start = rf_StripeUnitOffset(layoutPtr, fone->startSector); + fone_end = fone_start + fone->numSector; + + if (asmap->numDataFailed == 1) { + PDAPerDisk = 1; + state = 1; + RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); + pda_p = *pqpdap; + /* build p */ + CONS_PDA(parityInfo, fone_start, fone->numSector); + pda_p->type = RF_PDA_TYPE_PARITY; + pda_p++; + /* build q */ + CONS_PDA(qInfo, fone_start, fone->numSector); + pda_p->type = RF_PDA_TYPE_Q; + } else { + ftwo_start = rf_StripeUnitOffset(layoutPtr, ftwo->startSector); + ftwo_end = ftwo_start + ftwo->numSector; + if (fone->numSector + ftwo->numSector > secPerSU) { + PDAPerDisk = 1; + state = 2; + RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); + pda_p = *pqpdap; + CONS_PDA(parityInfo, 0, secPerSU); + pda_p->type = RF_PDA_TYPE_PARITY; + pda_p++; + CONS_PDA(qInfo, 0, secPerSU); + pda_p->type = RF_PDA_TYPE_Q; + } else { + PDAPerDisk = 2; + state = 3; + /* four of them, fone, then ftwo */ + RF_MallocAndAdd(*pqpdap, 4 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); + pda_p = *pqpdap; + CONS_PDA(parityInfo, fone_start, fone->numSector); + pda_p->type = RF_PDA_TYPE_PARITY; + pda_p++; + CONS_PDA(qInfo, fone_start, fone->numSector); + pda_p->type = RF_PDA_TYPE_Q; + pda_p++; + CONS_PDA(parityInfo, ftwo_start, ftwo->numSector); + pda_p->type = RF_PDA_TYPE_PARITY; + pda_p++; + CONS_PDA(qInfo, ftwo_start, ftwo->numSector); + pda_p->type = RF_PDA_TYPE_Q; + } } - } - /* figure out number of nonaccessed pda */ - napdas = PDAPerDisk * (numDataCol - 2); - *nPQNodep = PDAPerDisk; - - *nNodep = napdas; - if (napdas == 0) return; /* short circuit */ - - /* allocate up our list of pda's */ - - RF_CallocAndAdd(pda_p, napdas, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - *pdap = pda_p; - - /* linkem together */ - for (i=0; i < (napdas-1); i++) - pda_p[i].next = pda_p+(i+1); - - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - for (i=0; i < numDataCol; i++) - { - if ((pda_p - (*pdap)) == napdas) - continue; - pda_p->type = RF_PDA_TYPE_DATA; - pda_p->raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - /* skip over dead disks */ - if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status)) - continue; - switch (state) - { - case 1: /* fone */ - pda_p->numSector = fone->numSector; - pda_p->raidAddress += fone_start; - pda_p->startSector += fone_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); - break; - case 2: /* full stripe */ - pda_p->numSector = secPerSU; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,secPerSU), (char *), allocList); - break; - case 3: /* two slabs */ - pda_p->numSector = fone->numSector; - pda_p->raidAddress += fone_start; - pda_p->startSector += fone_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); - pda_p++; - pda_p->type = RF_PDA_TYPE_DATA; - pda_p->raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - pda_p->numSector = ftwo->numSector; - pda_p->raidAddress += ftwo_start; - pda_p->startSector += ftwo_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList); - break; - default: - RF_PANIC(); + /* figure out number of nonaccessed pda */ + napdas = PDAPerDisk * (numDataCol - 2); + *nPQNodep = PDAPerDisk; + + *nNodep = napdas; + if (napdas == 0) + return; /* short circuit */ + + /* allocate up our list of pda's */ + + RF_CallocAndAdd(pda_p, napdas, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); + *pdap = pda_p; + + /* linkem together */ + for (i = 0; i < (napdas - 1); i++) + pda_p[i].next = pda_p + (i + 1); + + sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); + for (i = 0; i < numDataCol; i++) { + if ((pda_p - (*pdap)) == napdas) + continue; + pda_p->type = RF_PDA_TYPE_DATA; + pda_p->raidAddress = sosAddr + (i * secPerSU); + (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); + /* skip over dead disks */ + if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status)) + continue; + switch (state) { + case 1: /* fone */ + pda_p->numSector = fone->numSector; + pda_p->raidAddress += fone_start; + pda_p->startSector += fone_start; + RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + break; + case 2: /* full stripe */ + pda_p->numSector = secPerSU; + RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, secPerSU), (char *), allocList); + break; + case 3: /* two slabs */ + pda_p->numSector = fone->numSector; + pda_p->raidAddress += fone_start; + pda_p->startSector += fone_start; + RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + pda_p++; + pda_p->type = RF_PDA_TYPE_DATA; + pda_p->raidAddress = sosAddr + (i * secPerSU); + (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); + pda_p->numSector = ftwo->numSector; + pda_p->raidAddress += ftwo_start; + pda_p->startSector += ftwo_start; + RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + break; + default: + RF_PANIC(); + } + pda_p++; } - pda_p++; - } - RF_ASSERT (pda_p - *pdap == napdas); - return; + RF_ASSERT(pda_p - *pdap == napdas); + return; } - #define DISK_NODE_PDA(node) ((node)->params[0].p) #define DISK_NODE_PARAMS(_node_,_p_) \ @@ -762,208 +656,190 @@ void rf_WriteGenerateFailedAccessASMs( (_node_).params[2].v = parityStripeID; \ (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru) -void rf_DoubleDegSmallWrite( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList, - char *redundantReadNodeName, - char *redundantWriteNodeName, - char *recoveryNodeName, - int (*recovFunc)(RF_DagNode_t *)) +void +rf_DoubleDegSmallWrite( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList, + char *redundantReadNodeName, + char *redundantWriteNodeName, + char *recoveryNodeName, + int (*recovFunc) (RF_DagNode_t *)) { - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DagNode_t *nodes, *wudNodes, *rrdNodes, *recoveryNode, *blockNode, *unblockNode, *rpNodes,*rqNodes, *wpNodes, *wqNodes, *termNode; - RF_PhysDiskAddr_t *pda, *pqPDAs; - RF_PhysDiskAddr_t *npdas; - int nWriteNodes, nNodes, nReadNodes, nRrdNodes, nWudNodes, i; - RF_ReconUnitNum_t which_ru; - int nPQNodes; - RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, &which_ru); - - /* simple small write case - - First part looks like a reconstruct-read of the failed data units. - Then a write of all data units not failed. */ - - - /* - Hdr - | - ------Block- - / / \ - Rrd Rrd ... Rrd Rp Rq - \ \ / - -------PQ----- - / \ \ - Wud Wp WQ - \ | / - --Unblock- - | - T - - Rrd = read recovery data (potentially none) - Wud = write user data (not incl. failed disks) - Wp = Write P (could be two) - Wq = Write Q (could be two) - - */ - - rf_WriteGenerateFailedAccessASMs(raidPtr, asmap, &npdas, &nRrdNodes, &pqPDAs, &nPQNodes,allocList); - - RF_ASSERT(asmap->numDataFailed == 1); - - nWudNodes = asmap->numStripeUnitsAccessed - (asmap->numDataFailed); - nReadNodes = nRrdNodes + 2*nPQNodes; - nWriteNodes = nWudNodes+ 2*nPQNodes; - nNodes = 4 + nReadNodes + nWriteNodes; - - RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - blockNode = nodes; - unblockNode = blockNode+1; - termNode = unblockNode+1; - recoveryNode = termNode+1; - rrdNodes = recoveryNode+1; - rpNodes = rrdNodes + nRrdNodes; - rqNodes = rpNodes + nPQNodes; - wudNodes = rqNodes + nPQNodes; - wpNodes = wudNodes + nWudNodes; - wqNodes = wpNodes + nPQNodes; - - dag_h->creator = "PQ_DDSimpleSmallWrite"; - dag_h->numSuccedents = 1; - dag_h->succedents[0] = blockNode; - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - termNode->antecedents[0] = unblockNode; - termNode->antType[0] = rf_control; - - /* init the block and unblock nodes */ - /* The block node has all the read nodes as successors */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nReadNodes, 0, 0, 0, dag_h, "Nil", allocList); - for (i=0; i < nReadNodes; i++) - blockNode->succedents[i] = rrdNodes+i; - - /* The unblock node has all the writes as successors */ - rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nWriteNodes, 0, 0, dag_h, "Nil", allocList); - for (i=0; i < nWriteNodes; i++) { - unblockNode->antecedents[i] = wudNodes+i; - unblockNode->antType[i] = rf_control; - } - unblockNode->succedents[0] = termNode; + RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); + RF_DagNode_t *nodes, *wudNodes, *rrdNodes, *recoveryNode, *blockNode, + *unblockNode, *rpNodes, *rqNodes, *wpNodes, *wqNodes, *termNode; + RF_PhysDiskAddr_t *pda, *pqPDAs; + RF_PhysDiskAddr_t *npdas; + int nWriteNodes, nNodes, nReadNodes, nRrdNodes, nWudNodes, i; + RF_ReconUnitNum_t which_ru; + int nPQNodes; + RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, &which_ru); + + /* simple small write case - First part looks like a reconstruct-read + * of the failed data units. Then a write of all data units not + * failed. */ + + + /* Hdr | ------Block- / / \ Rrd Rrd ... Rrd Rp Rq \ \ + * / -------PQ----- / \ \ Wud Wp WQ \ | / + * --Unblock- | T + * + * Rrd = read recovery data (potentially none) Wud = write user data + * (not incl. failed disks) Wp = Write P (could be two) Wq = Write Q + * (could be two) + * + */ + + rf_WriteGenerateFailedAccessASMs(raidPtr, asmap, &npdas, &nRrdNodes, &pqPDAs, &nPQNodes, allocList); + + RF_ASSERT(asmap->numDataFailed == 1); + + nWudNodes = asmap->numStripeUnitsAccessed - (asmap->numDataFailed); + nReadNodes = nRrdNodes + 2 * nPQNodes; + nWriteNodes = nWudNodes + 2 * nPQNodes; + nNodes = 4 + nReadNodes + nWriteNodes; + + RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); + blockNode = nodes; + unblockNode = blockNode + 1; + termNode = unblockNode + 1; + recoveryNode = termNode + 1; + rrdNodes = recoveryNode + 1; + rpNodes = rrdNodes + nRrdNodes; + rqNodes = rpNodes + nPQNodes; + wudNodes = rqNodes + nPQNodes; + wpNodes = wudNodes + nWudNodes; + wqNodes = wpNodes + nPQNodes; + + dag_h->creator = "PQ_DDSimpleSmallWrite"; + dag_h->numSuccedents = 1; + dag_h->succedents[0] = blockNode; + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); + termNode->antecedents[0] = unblockNode; + termNode->antType[0] = rf_control; + + /* init the block and unblock nodes */ + /* The block node has all the read nodes as successors */ + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nReadNodes, 0, 0, 0, dag_h, "Nil", allocList); + for (i = 0; i < nReadNodes; i++) + blockNode->succedents[i] = rrdNodes + i; + + /* The unblock node has all the writes as successors */ + rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nWriteNodes, 0, 0, dag_h, "Nil", allocList); + for (i = 0; i < nWriteNodes; i++) { + unblockNode->antecedents[i] = wudNodes + i; + unblockNode->antType[i] = rf_control; + } + unblockNode->succedents[0] = termNode; #define INIT_READ_NODE(node,name) \ rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, allocList); \ (node)->succedents[0] = recoveryNode; \ (node)->antecedents[0] = blockNode; \ (node)->antType[0] = rf_control; - - /* build the read nodes */ - pda = npdas; - for (i=0; i < nRrdNodes; i++, pda = pda->next) { - INIT_READ_NODE(rrdNodes+i,"rrd"); - DISK_NODE_PARAMS(rrdNodes[i],pda); - } - - /* read redundancy pdas */ - pda = pqPDAs; - INIT_READ_NODE(rpNodes,"Rp"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rpNodes[0],pda); - pda++; - INIT_READ_NODE(rqNodes, redundantReadNodeName ); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rqNodes[0],pda); - if (nPQNodes==2) - { - pda++; - INIT_READ_NODE(rpNodes+1,"Rp"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rpNodes[1],pda); - pda++; - INIT_READ_NODE(rqNodes+1,redundantReadNodeName ); - RF_ASSERT(pda); - DISK_NODE_PARAMS(rqNodes[1],pda); - } - - /* the recovery node has all reads as precedessors and all writes as successors. - It generates a result for every write P or write Q node. - As parameters, it takes a pda per read and a pda per stripe of user data written. - It also takes as the last params the raidPtr and asm. - For results, it takes PDA for P & Q. */ - - - rf_InitNode(recoveryNode, rf_wait, RF_FALSE, recovFunc, rf_NullNodeUndoFunc, NULL, - nWriteNodes, /* succesors */ - nReadNodes, /* preds */ - nReadNodes + nWudNodes + 3, /* params */ - 2 * nPQNodes, /* results */ - dag_h, recoveryNodeName, allocList); - - - - for (i=0; i < nReadNodes; i++ ) - { - recoveryNode->antecedents[i] = rrdNodes+i; - recoveryNode->antType[i] = rf_control; - recoveryNode->params[i].p = DISK_NODE_PDA(rrdNodes+i); - } - for (i=0; i < nWudNodes; i++) - { - recoveryNode->succedents[i] = wudNodes+i; - } - recoveryNode->params[nReadNodes+nWudNodes].p = asmap->failedPDAs[0]; - recoveryNode->params[nReadNodes+nWudNodes+1].p = raidPtr; - recoveryNode->params[nReadNodes+nWudNodes+2].p = asmap; - - for ( ; i < nWriteNodes; i++) - recoveryNode->succedents[i] = wudNodes+i; - - pda = pqPDAs; - recoveryNode->results[0] = pda; - pda++; - recoveryNode->results[1] = pda; - if ( nPQNodes == 2) - { - pda++; - recoveryNode->results[2] = pda; - pda++; - recoveryNode->results[3] = pda; - } - - /* fill writes */ + + /* build the read nodes */ + pda = npdas; + for (i = 0; i < nRrdNodes; i++, pda = pda->next) { + INIT_READ_NODE(rrdNodes + i, "rrd"); + DISK_NODE_PARAMS(rrdNodes[i], pda); + } + + /* read redundancy pdas */ + pda = pqPDAs; + INIT_READ_NODE(rpNodes, "Rp"); + RF_ASSERT(pda); + DISK_NODE_PARAMS(rpNodes[0], pda); + pda++; + INIT_READ_NODE(rqNodes, redundantReadNodeName); + RF_ASSERT(pda); + DISK_NODE_PARAMS(rqNodes[0], pda); + if (nPQNodes == 2) { + pda++; + INIT_READ_NODE(rpNodes + 1, "Rp"); + RF_ASSERT(pda); + DISK_NODE_PARAMS(rpNodes[1], pda); + pda++; + INIT_READ_NODE(rqNodes + 1, redundantReadNodeName); + RF_ASSERT(pda); + DISK_NODE_PARAMS(rqNodes[1], pda); + } + /* the recovery node has all reads as precedessors and all writes as + * successors. It generates a result for every write P or write Q + * node. As parameters, it takes a pda per read and a pda per stripe + * of user data written. It also takes as the last params the raidPtr + * and asm. For results, it takes PDA for P & Q. */ + + + rf_InitNode(recoveryNode, rf_wait, RF_FALSE, recovFunc, rf_NullNodeUndoFunc, NULL, + nWriteNodes, /* succesors */ + nReadNodes, /* preds */ + nReadNodes + nWudNodes + 3, /* params */ + 2 * nPQNodes, /* results */ + dag_h, recoveryNodeName, allocList); + + + + for (i = 0; i < nReadNodes; i++) { + recoveryNode->antecedents[i] = rrdNodes + i; + recoveryNode->antType[i] = rf_control; + recoveryNode->params[i].p = DISK_NODE_PDA(rrdNodes + i); + } + for (i = 0; i < nWudNodes; i++) { + recoveryNode->succedents[i] = wudNodes + i; + } + recoveryNode->params[nReadNodes + nWudNodes].p = asmap->failedPDAs[0]; + recoveryNode->params[nReadNodes + nWudNodes + 1].p = raidPtr; + recoveryNode->params[nReadNodes + nWudNodes + 2].p = asmap; + + for (; i < nWriteNodes; i++) + recoveryNode->succedents[i] = wudNodes + i; + + pda = pqPDAs; + recoveryNode->results[0] = pda; + pda++; + recoveryNode->results[1] = pda; + if (nPQNodes == 2) { + pda++; + recoveryNode->results[2] = pda; + pda++; + recoveryNode->results[3] = pda; + } + /* fill writes */ #define INIT_WRITE_NODE(node,name) \ rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, allocList); \ (node)->succedents[0] = unblockNode; \ (node)->antecedents[0] = recoveryNode; \ (node)->antType[0] = rf_control; - pda = asmap->physInfo; - for (i=0; i < nWudNodes; i++) - { - INIT_WRITE_NODE(wudNodes+i,"Wd"); - DISK_NODE_PARAMS(wudNodes[i],pda); - recoveryNode->params[nReadNodes+i].p = DISK_NODE_PDA(wudNodes+i); - pda = pda->next; - } - /* write redundancy pdas */ - pda = pqPDAs; - INIT_WRITE_NODE(wpNodes,"Wp"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(wpNodes[0],pda); - pda++; - INIT_WRITE_NODE(wqNodes,"Wq"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(wqNodes[0],pda); - if (nPQNodes==2) - { - pda++; - INIT_WRITE_NODE(wpNodes+1,"Wp"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(wpNodes[1],pda); - pda++; - INIT_WRITE_NODE(wqNodes+1,"Wq"); - RF_ASSERT(pda); - DISK_NODE_PARAMS(wqNodes[1],pda); - } + pda = asmap->physInfo; + for (i = 0; i < nWudNodes; i++) { + INIT_WRITE_NODE(wudNodes + i, "Wd"); + DISK_NODE_PARAMS(wudNodes[i], pda); + recoveryNode->params[nReadNodes + i].p = DISK_NODE_PDA(wudNodes + i); + pda = pda->next; + } + /* write redundancy pdas */ + pda = pqPDAs; + INIT_WRITE_NODE(wpNodes, "Wp"); + RF_ASSERT(pda); + DISK_NODE_PARAMS(wpNodes[0], pda); + pda++; + INIT_WRITE_NODE(wqNodes, "Wq"); + RF_ASSERT(pda); + DISK_NODE_PARAMS(wqNodes[0], pda); + if (nPQNodes == 2) { + pda++; + INIT_WRITE_NODE(wpNodes + 1, "Wp"); + RF_ASSERT(pda); + DISK_NODE_PARAMS(wpNodes[1], pda); + pda++; + INIT_WRITE_NODE(wqNodes + 1, "Wq"); + RF_ASSERT(pda); + DISK_NODE_PARAMS(wqNodes[1], pda); + } } diff --git a/sys/dev/raidframe/rf_dagdegwr.h b/sys/dev/raidframe/rf_dagdegwr.h index 180c5f75668..5c58697f06e 100644 --- a/sys/dev/raidframe/rf_dagdegwr.h +++ b/sys/dev/raidframe/rf_dagdegwr.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_dagdegwr.h,v 1.1 1999/01/11 14:29:08 niklas Exp $ */ -/* $NetBSD: rf_dagdegwr.h,v 1.1 1998/11/13 04:20:27 oster Exp $ */ +/* $OpenBSD: rf_dagdegwr.h,v 1.2 1999/02/16 00:02:30 niklas Exp $ */ +/* $NetBSD: rf_dagdegwr.h,v 1.3 1999/02/05 00:06:07 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,55 +27,28 @@ * rights to redistribute these changes. */ -/* - * : - * Log: rf_dagdegwr.h,v - * Revision 1.6 1996/07/31 16:30:28 jimz - * asm/asmap fix (EO merge) - * - * Revision 1.5 1996/07/31 15:35:38 jimz - * evenodd changes; bugfixes for double-degraded archs, generalize - * some formerly PQ-only functions - * - * Revision 1.4 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.3 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.2 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.1 1996/05/03 19:21:28 wvcii - * Initial revision - * - */ #ifndef _RF__RF_DAGDEGWR_H_ #define _RF__RF_DAGDEGWR_H_ /* degraded write DAG creation routines */ -void rf_CreateDegradedWriteDAG(RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList); -void rf_CommonCreateSimpleDegradedWriteDAG(RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList, - int nfaults, int (*redFunc)(RF_DagNode_t *), int allowBufferRecycle); -void rf_WriteGenerateFailedAccessASMs(RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, RF_PhysDiskAddr_t **pdap, - int *nNodep, RF_PhysDiskAddr_t **pqpdap, - int *nPQNodep, RF_AllocListElem_t *allocList); -void rf_DoubleDegSmallWrite(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList, char *redundantReadNodeName, - char *redundantWriteNodeName, char *recoveryNodeName, - int (*recovFunc)(RF_DagNode_t *)); +void +rf_CreateDegradedWriteDAG(RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, + RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); +void +rf_CommonCreateSimpleDegradedWriteDAG(RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, + RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, + int nfaults, int (*redFunc) (RF_DagNode_t *), int allowBufferRecycle); + void rf_WriteGenerateFailedAccessASMs(RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, RF_PhysDiskAddr_t ** pdap, + int *nNodep, RF_PhysDiskAddr_t ** pqpdap, + int *nPQNodep, RF_AllocListElem_t * allocList); + void rf_DoubleDegSmallWrite(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList, char *redundantReadNodeName, + char *redundantWriteNodeName, char *recoveryNodeName, + int (*recovFunc) (RF_DagNode_t *)); -#endif /* !_RF__RF_DAGDEGWR_H_ */ +#endif /* !_RF__RF_DAGDEGWR_H_ */ diff --git a/sys/dev/raidframe/rf_dagffrd.c b/sys/dev/raidframe/rf_dagffrd.c index b831980cb0e..09df06c22e6 100644 --- a/sys/dev/raidframe/rf_dagffrd.c +++ b/sys/dev/raidframe/rf_dagffrd.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_dagffrd.c,v 1.1 1999/01/11 14:29:08 niklas Exp $ */ -/* $NetBSD: rf_dagffrd.c,v 1.1 1998/11/13 04:20:27 oster Exp $ */ +/* $OpenBSD: rf_dagffrd.c,v 1.2 1999/02/16 00:02:30 niklas Exp $ */ +/* $NetBSD: rf_dagffrd.c,v 1.3 1999/02/05 00:06:07 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -32,78 +32,6 @@ * * code for creating fault-free read DAGs * - * : - * Log: rf_dagffrd.c,v - * Revision 1.14 1996/07/28 20:31:39 jimz - * i386netbsd port - * true/false fixup - * - * Revision 1.13 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.12 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.11 1996/06/06 17:30:44 jimz - * turn old Raid1 mirror read creation into a more generic function - * parameterized by an addtional parameter: type of mirrored read - * this is now used by other dag creation routines so chained declustering - * and raid1 can share dag creation code, but have different mirroring - * policies - * - * Revision 1.10 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.9 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.8 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.7 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.6 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.5 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.4 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.3 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.2 1996/05/08 21:01:24 jimz - * fixed up enum type names that were conflicting with other - * enums and function names (ie, "panic") - * future naming trends will be towards RF_ and rf_ for - * everything raidframe-related - * - * Revision 1.1 1996/05/03 19:19:20 wvcii - * Initial revision - * */ #include "rf_types.h" @@ -120,7 +48,7 @@ /****************************************************************************** * * General comments on DAG creation: - * + * * All DAGs in this file use roll-away error recovery. Each DAG has a single * commit node, usually called "Cmt." If an error occurs before the Cmt node * is reached, the execution engine will halt forward execution and work @@ -144,16 +72,17 @@ * the DAG creation routines to be replaced at this single point. */ -void rf_CreateFaultFreeReadDAG( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList) +void +rf_CreateFaultFreeReadDAG( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList) { - rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - RF_IO_TYPE_READ); + rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList, + RF_IO_TYPE_READ); } @@ -168,13 +97,13 @@ void rf_CreateFaultFreeReadDAG( * stripe. * For reads, this DAG is as follows: * - * /---- read ----\ + * /---- read ----\ * Header -- Block ---- read ---- Commit -- Terminate * \---- read ----/ * * For writes, this DAG is as follows: * - * /---- write ----\ + * /---- write ----\ * Header -- Commit ---- write ---- Block -- Terminate * \---- write ----/ * @@ -196,163 +125,168 @@ void rf_CreateFaultFreeReadDAG( * *****************************************************************************/ -void rf_CreateNonredundantDAG( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList, - RF_IoType_t type) +void +rf_CreateNonredundantDAG( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList, + RF_IoType_t type) { - RF_DagNode_t *nodes, *diskNodes, *blockNode, *commitNode, *termNode; - RF_PhysDiskAddr_t *pda = asmap->physInfo; - int (*doFunc)(RF_DagNode_t *), (*undoFunc)(RF_DagNode_t *); - int i, n, totalNumNodes; - char *name; - - n = asmap->numStripeUnitsAccessed; - dag_h->creator = "NonredundantDAG"; - - RF_ASSERT(RF_IO_IS_R_OR_W(type)); - switch (type) { - case RF_IO_TYPE_READ: - doFunc = rf_DiskReadFunc; - undoFunc = rf_DiskReadUndoFunc; - name = "R "; - if (rf_dagDebug) printf("[Creating non-redundant read DAG]\n"); - break; - case RF_IO_TYPE_WRITE: - doFunc = rf_DiskWriteFunc; - undoFunc = rf_DiskWriteUndoFunc; - name = "W "; - if (rf_dagDebug) printf("[Creating non-redundant write DAG]\n"); - break; - default: - RF_PANIC(); - } - - /* - * For reads, the dag can not commit until the block node is reached. - * for writes, the dag commits immediately. - */ - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* - * Node count: - * 1 block node - * n data reads (or writes) - * 1 commit node - * 1 terminator node - */ - RF_ASSERT(n > 0); - totalNumNodes = n + 3; - RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), - (RF_DagNode_t *), allocList); - i = 0; - diskNodes = &nodes[i]; i += n; - blockNode = &nodes[i]; i += 1; - commitNode = &nodes[i]; i += 1; - termNode = &nodes[i]; i += 1; - RF_ASSERT(i == totalNumNodes); - - /* initialize nodes */ - switch (type) { - case RF_IO_TYPE_READ: - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, n, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, n, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - break; - case RF_IO_TYPE_WRITE: - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, n, 1, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, n, 0, 0, dag_h, "Trm", allocList); - break; - default: - RF_PANIC(); - } - - for (i = 0; i < n; i++) { - RF_ASSERT(pda != NULL); - rf_InitNode(&diskNodes[i], rf_wait, RF_FALSE, doFunc, undoFunc, rf_GenericWakeupFunc, - 1, 1, 4, 0, dag_h, name, allocList); - diskNodes[i].params[0].p = pda; - diskNodes[i].params[1].p = pda->bufPtr; - /* parity stripe id is not necessary */ - diskNodes[i].params[2].v = 0; - diskNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); - pda = pda->next; - } - - /* - * Connect nodes. - */ - - /* connect hdr to block node */ - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - if (type == RF_IO_TYPE_READ) { - /* connecting a nonredundant read DAG */ - RF_ASSERT(blockNode->numSuccedents == n); - RF_ASSERT(commitNode->numAntecedents == n); - for (i=0; i < n; i++) { - /* connect block node to each read node */ - RF_ASSERT(diskNodes[i].numAntecedents == 1); - blockNode->succedents[i] = &diskNodes[i]; - diskNodes[i].antecedents[0] = blockNode; - diskNodes[i].antType[0] = rf_control; - - /* connect each read node to the commit node */ - RF_ASSERT(diskNodes[i].numSuccedents == 1); - diskNodes[i].succedents[0] = commitNode; - commitNode->antecedents[i] = &diskNodes[i]; - commitNode->antType[i] = rf_control; - } - /* connect the commit node to the term node */ - RF_ASSERT(commitNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - commitNode->succedents[0] = termNode; - termNode->antecedents[0] = commitNode; - termNode->antType[0] = rf_control; - } - else { - /* connecting a nonredundant write DAG */ - /* connect the block node to the commit node */ - RF_ASSERT(blockNode->numSuccedents == 1); - RF_ASSERT(commitNode->numAntecedents == 1); - blockNode->succedents[0] = commitNode; - commitNode->antecedents[0] = blockNode; - commitNode->antType[0] = rf_control; - - RF_ASSERT(commitNode->numSuccedents == n); - RF_ASSERT(termNode->numAntecedents == n); - RF_ASSERT(termNode->numSuccedents == 0); - for (i=0; i < n; i++) { - /* connect the commit node to each write node */ - RF_ASSERT(diskNodes[i].numAntecedents == 1); - commitNode->succedents[i] = &diskNodes[i]; - diskNodes[i].antecedents[0] = commitNode; - diskNodes[i].antType[0] = rf_control; - - /* connect each write node to the term node */ - RF_ASSERT(diskNodes[i].numSuccedents == 1); - diskNodes[i].succedents[0] = termNode; - termNode->antecedents[i] = &diskNodes[i]; - termNode->antType[i] = rf_control; - } - } + RF_DagNode_t *nodes, *diskNodes, *blockNode, *commitNode, *termNode; + RF_PhysDiskAddr_t *pda = asmap->physInfo; + int (*doFunc) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *); + int i, n, totalNumNodes; + char *name; + + n = asmap->numStripeUnitsAccessed; + dag_h->creator = "NonredundantDAG"; + + RF_ASSERT(RF_IO_IS_R_OR_W(type)); + switch (type) { + case RF_IO_TYPE_READ: + doFunc = rf_DiskReadFunc; + undoFunc = rf_DiskReadUndoFunc; + name = "R "; + if (rf_dagDebug) + printf("[Creating non-redundant read DAG]\n"); + break; + case RF_IO_TYPE_WRITE: + doFunc = rf_DiskWriteFunc; + undoFunc = rf_DiskWriteUndoFunc; + name = "W "; + if (rf_dagDebug) + printf("[Creating non-redundant write DAG]\n"); + break; + default: + RF_PANIC(); + } + + /* + * For reads, the dag can not commit until the block node is reached. + * for writes, the dag commits immediately. + */ + dag_h->numCommitNodes = 1; + dag_h->numCommits = 0; + dag_h->numSuccedents = 1; + + /* + * Node count: + * 1 block node + * n data reads (or writes) + * 1 commit node + * 1 terminator node + */ + RF_ASSERT(n > 0); + totalNumNodes = n + 3; + RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), + (RF_DagNode_t *), allocList); + i = 0; + diskNodes = &nodes[i]; + i += n; + blockNode = &nodes[i]; + i += 1; + commitNode = &nodes[i]; + i += 1; + termNode = &nodes[i]; + i += 1; + RF_ASSERT(i == totalNumNodes); + + /* initialize nodes */ + switch (type) { + case RF_IO_TYPE_READ: + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, + NULL, n, 0, 0, 0, dag_h, "Nil", allocList); + rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, + NULL, 1, n, 0, 0, dag_h, "Cmt", allocList); + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, + NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); + break; + case RF_IO_TYPE_WRITE: + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, + NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); + rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, + NULL, n, 1, 0, 0, dag_h, "Cmt", allocList); + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, + NULL, 0, n, 0, 0, dag_h, "Trm", allocList); + break; + default: + RF_PANIC(); + } + + for (i = 0; i < n; i++) { + RF_ASSERT(pda != NULL); + rf_InitNode(&diskNodes[i], rf_wait, RF_FALSE, doFunc, undoFunc, rf_GenericWakeupFunc, + 1, 1, 4, 0, dag_h, name, allocList); + diskNodes[i].params[0].p = pda; + diskNodes[i].params[1].p = pda->bufPtr; + /* parity stripe id is not necessary */ + diskNodes[i].params[2].v = 0; + diskNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); + pda = pda->next; + } + + /* + * Connect nodes. + */ + + /* connect hdr to block node */ + RF_ASSERT(blockNode->numAntecedents == 0); + dag_h->succedents[0] = blockNode; + + if (type == RF_IO_TYPE_READ) { + /* connecting a nonredundant read DAG */ + RF_ASSERT(blockNode->numSuccedents == n); + RF_ASSERT(commitNode->numAntecedents == n); + for (i = 0; i < n; i++) { + /* connect block node to each read node */ + RF_ASSERT(diskNodes[i].numAntecedents == 1); + blockNode->succedents[i] = &diskNodes[i]; + diskNodes[i].antecedents[0] = blockNode; + diskNodes[i].antType[0] = rf_control; + + /* connect each read node to the commit node */ + RF_ASSERT(diskNodes[i].numSuccedents == 1); + diskNodes[i].succedents[0] = commitNode; + commitNode->antecedents[i] = &diskNodes[i]; + commitNode->antType[i] = rf_control; + } + /* connect the commit node to the term node */ + RF_ASSERT(commitNode->numSuccedents == 1); + RF_ASSERT(termNode->numAntecedents == 1); + RF_ASSERT(termNode->numSuccedents == 0); + commitNode->succedents[0] = termNode; + termNode->antecedents[0] = commitNode; + termNode->antType[0] = rf_control; + } else { + /* connecting a nonredundant write DAG */ + /* connect the block node to the commit node */ + RF_ASSERT(blockNode->numSuccedents == 1); + RF_ASSERT(commitNode->numAntecedents == 1); + blockNode->succedents[0] = commitNode; + commitNode->antecedents[0] = blockNode; + commitNode->antType[0] = rf_control; + + RF_ASSERT(commitNode->numSuccedents == n); + RF_ASSERT(termNode->numAntecedents == n); + RF_ASSERT(termNode->numSuccedents == 0); + for (i = 0; i < n; i++) { + /* connect the commit node to each write node */ + RF_ASSERT(diskNodes[i].numAntecedents == 1); + commitNode->succedents[i] = &diskNodes[i]; + diskNodes[i].antecedents[0] = commitNode; + diskNodes[i].antType[0] = rf_control; + + /* connect each write node to the term node */ + RF_ASSERT(diskNodes[i].numSuccedents == 1); + diskNodes[i].succedents[0] = termNode; + termNode->antecedents[i] = &diskNodes[i]; + termNode->antType[i] = rf_control; + } + } } - /****************************************************************************** * Create a fault-free read DAG for RAID level 1 * @@ -366,135 +300,141 @@ void rf_CreateNonredundantDAG( * Parameters: raidPtr - description of the physical array * asmap - logical & physical addresses for this access * bp - buffer ptr (for holding read data) - * flags - general flags (e.g. disk locking) + * flags - general flags (e.g. disk locking) * allocList - list of memory allocated in DAG creation *****************************************************************************/ -static void CreateMirrorReadDAG( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList, - int (*readfunc)(RF_DagNode_t *node)) +static void +CreateMirrorReadDAG( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList, + int (*readfunc) (RF_DagNode_t * node)) { - RF_DagNode_t *readNodes, *nodes, *blockNode, *commitNode, *termNode; - RF_PhysDiskAddr_t *data_pda = asmap->physInfo; - RF_PhysDiskAddr_t *parity_pda = asmap->parityInfo; - int i, n, totalNumNodes; - - n = asmap->numStripeUnitsAccessed; - dag_h->creator = "RaidOneReadDAG"; - if (rf_dagDebug) { - printf("[Creating RAID level 1 read DAG]\n"); - } - - /* - * This dag can not commit until the commit node is reached - * errors prior to the commit point imply the dag has failed. - */ - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* - * Node count: - * n data reads - * 1 block node - * 1 commit node - * 1 terminator node - */ - RF_ASSERT(n > 0); - totalNumNodes = n + 3; - RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), - (RF_DagNode_t *), allocList); - i = 0; - readNodes = &nodes[i]; i += n; - blockNode = &nodes[i]; i += 1; - commitNode = &nodes[i]; i += 1; - termNode = &nodes[i]; i += 1; - RF_ASSERT(i == totalNumNodes); - - /* initialize nodes */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, - rf_NullNodeUndoFunc, NULL, n, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, - rf_NullNodeUndoFunc, NULL, 1, n, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, - rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - - for (i = 0; i < n; i++) { - RF_ASSERT(data_pda != NULL); - RF_ASSERT(parity_pda != NULL); - rf_InitNode(&readNodes[i], rf_wait, RF_FALSE, readfunc, - rf_DiskReadMirrorUndoFunc, rf_GenericWakeupFunc, 1, 1, 5, 0, dag_h, - "Rmir", allocList); - readNodes[i].params[0].p = data_pda; - readNodes[i].params[1].p = data_pda->bufPtr; - /* parity stripe id is not necessary */ - readNodes[i].params[2].p = 0; - readNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); - readNodes[i].params[4].p = parity_pda; - data_pda = data_pda->next; - parity_pda = parity_pda->next; - } - - /* - * Connect nodes - */ - - /* connect hdr to block node */ - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - /* connect block node to read nodes */ - RF_ASSERT(blockNode->numSuccedents == n); - for (i=0; i < n; i++) { - RF_ASSERT(readNodes[i].numAntecedents == 1); - blockNode->succedents[i] = &readNodes[i]; - readNodes[i].antecedents[0] = blockNode; - readNodes[i].antType[0] = rf_control; - } - - /* connect read nodes to commit node */ - RF_ASSERT(commitNode->numAntecedents == n); - for (i=0; i < n; i++) { - RF_ASSERT(readNodes[i].numSuccedents == 1); - readNodes[i].succedents[0] = commitNode; - commitNode->antecedents[i] = &readNodes[i]; - commitNode->antType[i] = rf_control; - } - - /* connect commit node to term node */ - RF_ASSERT(commitNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - commitNode->succedents[0] = termNode; - termNode->antecedents[0] = commitNode; - termNode->antType[0] = rf_control; + RF_DagNode_t *readNodes, *nodes, *blockNode, *commitNode, *termNode; + RF_PhysDiskAddr_t *data_pda = asmap->physInfo; + RF_PhysDiskAddr_t *parity_pda = asmap->parityInfo; + int i, n, totalNumNodes; + + n = asmap->numStripeUnitsAccessed; + dag_h->creator = "RaidOneReadDAG"; + if (rf_dagDebug) { + printf("[Creating RAID level 1 read DAG]\n"); + } + /* + * This dag can not commit until the commit node is reached + * errors prior to the commit point imply the dag has failed. + */ + dag_h->numCommitNodes = 1; + dag_h->numCommits = 0; + dag_h->numSuccedents = 1; + + /* + * Node count: + * n data reads + * 1 block node + * 1 commit node + * 1 terminator node + */ + RF_ASSERT(n > 0); + totalNumNodes = n + 3; + RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), + (RF_DagNode_t *), allocList); + i = 0; + readNodes = &nodes[i]; + i += n; + blockNode = &nodes[i]; + i += 1; + commitNode = &nodes[i]; + i += 1; + termNode = &nodes[i]; + i += 1; + RF_ASSERT(i == totalNumNodes); + + /* initialize nodes */ + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, n, 0, 0, 0, dag_h, "Nil", allocList); + rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, 1, n, 0, 0, dag_h, "Cmt", allocList); + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, + rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); + + for (i = 0; i < n; i++) { + RF_ASSERT(data_pda != NULL); + RF_ASSERT(parity_pda != NULL); + rf_InitNode(&readNodes[i], rf_wait, RF_FALSE, readfunc, + rf_DiskReadMirrorUndoFunc, rf_GenericWakeupFunc, 1, 1, 5, 0, dag_h, + "Rmir", allocList); + readNodes[i].params[0].p = data_pda; + readNodes[i].params[1].p = data_pda->bufPtr; + /* parity stripe id is not necessary */ + readNodes[i].params[2].p = 0; + readNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); + readNodes[i].params[4].p = parity_pda; + data_pda = data_pda->next; + parity_pda = parity_pda->next; + } + + /* + * Connect nodes + */ + + /* connect hdr to block node */ + RF_ASSERT(blockNode->numAntecedents == 0); + dag_h->succedents[0] = blockNode; + + /* connect block node to read nodes */ + RF_ASSERT(blockNode->numSuccedents == n); + for (i = 0; i < n; i++) { + RF_ASSERT(readNodes[i].numAntecedents == 1); + blockNode->succedents[i] = &readNodes[i]; + readNodes[i].antecedents[0] = blockNode; + readNodes[i].antType[0] = rf_control; + } + + /* connect read nodes to commit node */ + RF_ASSERT(commitNode->numAntecedents == n); + for (i = 0; i < n; i++) { + RF_ASSERT(readNodes[i].numSuccedents == 1); + readNodes[i].succedents[0] = commitNode; + commitNode->antecedents[i] = &readNodes[i]; + commitNode->antType[i] = rf_control; + } + + /* connect commit node to term node */ + RF_ASSERT(commitNode->numSuccedents == 1); + RF_ASSERT(termNode->numAntecedents == 1); + RF_ASSERT(termNode->numSuccedents == 0); + commitNode->succedents[0] = termNode; + termNode->antecedents[0] = commitNode; + termNode->antType[0] = rf_control; } -void rf_CreateMirrorIdleReadDAG( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList) +void +rf_CreateMirrorIdleReadDAG( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList) { - CreateMirrorReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - rf_DiskReadMirrorIdleFunc); + CreateMirrorReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, + rf_DiskReadMirrorIdleFunc); } -void rf_CreateMirrorPartitionReadDAG( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList) +void +rf_CreateMirrorPartitionReadDAG( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList) { - CreateMirrorReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - rf_DiskReadMirrorPartitionFunc); + CreateMirrorReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, + rf_DiskReadMirrorPartitionFunc); } diff --git a/sys/dev/raidframe/rf_dagffrd.h b/sys/dev/raidframe/rf_dagffrd.h index 61e3ee86241..ae068900493 100644 --- a/sys/dev/raidframe/rf_dagffrd.h +++ b/sys/dev/raidframe/rf_dagffrd.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_dagffrd.h,v 1.1 1999/01/11 14:29:08 niklas Exp $ */ -/* $NetBSD: rf_dagffrd.h,v 1.1 1998/11/13 04:20:27 oster Exp $ */ +/* $OpenBSD: rf_dagffrd.h,v 1.2 1999/02/16 00:02:30 niklas Exp $ */ +/* $NetBSD: rf_dagffrd.h,v 1.3 1999/02/05 00:06:07 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,49 +27,27 @@ * rights to redistribute these changes. */ -/* - * : - * Log: rf_dagffrd.h,v - * Revision 1.5 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.4 1996/06/06 17:31:13 jimz - * new mirror read creation dags - * - * Revision 1.3 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.2 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.1 1996/05/03 19:19:53 wvcii - * Initial revision - * - */ - #ifndef _RF__RF_DAGFFRD_H_ #define _RF__RF_DAGFFRD_H_ #include "rf_types.h" /* fault-free read DAG creation routines */ -void rf_CreateFaultFreeReadDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList); -void rf_CreateNonredundantDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList, RF_IoType_t type); -void rf_CreateMirrorIdleReadDAG(RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList); -void rf_CreateMirrorPartitionReadDAG(RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList); +void +rf_CreateFaultFreeReadDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList); +void +rf_CreateNonredundantDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList, RF_IoType_t type); +void +rf_CreateMirrorIdleReadDAG(RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, + RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); +void +rf_CreateMirrorPartitionReadDAG(RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, + RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); -#endif /* !_RF__RF_DAGFFRD_H_ */ +#endif /* !_RF__RF_DAGFFRD_H_ */ diff --git a/sys/dev/raidframe/rf_dagffwr.c b/sys/dev/raidframe/rf_dagffwr.c index f502de1b293..49de3ccf554 100644 --- a/sys/dev/raidframe/rf_dagffwr.c +++ b/sys/dev/raidframe/rf_dagffwr.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_dagffwr.c,v 1.1 1999/01/11 14:29:09 niklas Exp $ */ -/* $NetBSD: rf_dagffwr.c,v 1.1 1998/11/13 04:20:27 oster Exp $ */ +/* $OpenBSD: rf_dagffwr.c,v 1.2 1999/02/16 00:02:31 niklas Exp $ */ +/* $NetBSD: rf_dagffwr.c,v 1.3 1999/02/05 00:06:07 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -32,100 +32,6 @@ * * code for creating fault-free DAGs * - * : - * Log: rf_dagffwr.c,v - * Revision 1.19 1996/07/31 15:35:24 jimz - * evenodd changes; bugfixes for double-degraded archs, generalize - * some formerly PQ-only functions - * - * Revision 1.18 1996/07/28 20:31:39 jimz - * i386netbsd port - * true/false fixup - * - * Revision 1.17 1996/07/27 18:40:24 jimz - * cleanup sweep - * - * Revision 1.16 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.15 1996/06/11 01:27:50 jimz - * Fixed bug where diskthread shutdown would crash or hang. This - * turned out to be two distinct bugs: - * (1) [crash] The thread shutdown code wasn't properly waiting for - * all the diskthreads to complete. This caused diskthreads that were - * exiting+cleaning up to unlock a destroyed mutex. - * (2) [hang] TerminateDiskQueues wasn't locking, and DiskIODequeue - * only checked for termination _after_ a wakeup if the queues were - * empty. This was a race where the termination wakeup could be lost - * by the dequeueing thread, and the system would hang waiting for the - * thread to exit, while the thread waited for an I/O or a signal to - * check the termination flag. - * - * Revision 1.14 1996/06/10 22:24:01 wvcii - * added write dags which do not have a commit node and are - * used in forward and backward error recovery experiments. - * - * Revision 1.13 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.12 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.11 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.10 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.9 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.8 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.7 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.6 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.5 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.4 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.3 1996/05/15 23:23:12 wvcii - * fixed bug in small write read old q node succedent initialization - * - * Revision 1.2 1996/05/08 21:01:24 jimz - * fixed up enum type names that were conflicting with other - * enums and function names (ie, "panic") - * future naming trends will be towards RF_ and rf_ for - * everything raidframe-related - * - * Revision 1.1 1996/05/03 19:20:45 wvcii - * Initial revision - * */ #include "rf_types.h" @@ -143,7 +49,7 @@ /****************************************************************************** * * General comments on DAG creation: - * + * * All DAGs in this file use roll-away error recovery. Each DAG has a single * commit node, usually called "Cmt." If an error occurs before the Cmt node * is reached, the execution engine will halt forward execution and work @@ -168,76 +74,80 @@ */ -void rf_CreateNonRedundantWriteDAG( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList, - RF_IoType_t type) +void +rf_CreateNonRedundantWriteDAG( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList, + RF_IoType_t type) { - rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - RF_IO_TYPE_WRITE); + rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList, + RF_IO_TYPE_WRITE); } -void rf_CreateRAID0WriteDAG( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList, - RF_IoType_t type) +void +rf_CreateRAID0WriteDAG( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList, + RF_IoType_t type) { - rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - RF_IO_TYPE_WRITE); + rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList, + RF_IO_TYPE_WRITE); } -void rf_CreateSmallWriteDAG( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList) +void +rf_CreateSmallWriteDAG( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList) { #if RF_FORWARD > 0 - rf_CommonCreateSmallWriteDAGFwd(raidPtr, asmap, dag_h, bp, flags, allocList, - &rf_xorFuncs, NULL); -#else /* RF_FORWARD > 0 */ + rf_CommonCreateSmallWriteDAGFwd(raidPtr, asmap, dag_h, bp, flags, allocList, + &rf_xorFuncs, NULL); +#else /* RF_FORWARD > 0 */ #if RF_BACKWARD > 0 - rf_CommonCreateSmallWriteDAGFwd(raidPtr, asmap, dag_h, bp, flags, allocList, - &rf_xorFuncs, NULL); -#else /* RF_BACKWARD > 0 */ - /* "normal" rollaway */ - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - &rf_xorFuncs, NULL); -#endif /* RF_BACKWARD > 0 */ -#endif /* RF_FORWARD > 0 */ + rf_CommonCreateSmallWriteDAGFwd(raidPtr, asmap, dag_h, bp, flags, allocList, + &rf_xorFuncs, NULL); +#else /* RF_BACKWARD > 0 */ + /* "normal" rollaway */ + rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, + &rf_xorFuncs, NULL); +#endif /* RF_BACKWARD > 0 */ +#endif /* RF_FORWARD > 0 */ } -void rf_CreateLargeWriteDAG( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList) +void +rf_CreateLargeWriteDAG( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList) { #if RF_FORWARD > 0 - rf_CommonCreateLargeWriteDAGFwd(raidPtr, asmap, dag_h, bp, flags, allocList, - 1, rf_RegularXorFunc, RF_TRUE); -#else /* RF_FORWARD > 0 */ + rf_CommonCreateLargeWriteDAGFwd(raidPtr, asmap, dag_h, bp, flags, allocList, + 1, rf_RegularXorFunc, RF_TRUE); +#else /* RF_FORWARD > 0 */ #if RF_BACKWARD > 0 - rf_CommonCreateLargeWriteDAGFwd(raidPtr, asmap, dag_h, bp, flags, allocList, - 1, rf_RegularXorFunc, RF_TRUE); -#else /* RF_BACKWARD > 0 */ - /* "normal" rollaway */ - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - 1, rf_RegularXorFunc, RF_TRUE); -#endif /* RF_BACKWARD > 0 */ -#endif /* RF_FORWARD > 0 */ + rf_CommonCreateLargeWriteDAGFwd(raidPtr, asmap, dag_h, bp, flags, allocList, + 1, rf_RegularXorFunc, RF_TRUE); +#else /* RF_BACKWARD > 0 */ + /* "normal" rollaway */ + rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, + 1, rf_RegularXorFunc, RF_TRUE); +#endif /* RF_BACKWARD > 0 */ +#endif /* RF_FORWARD > 0 */ } @@ -270,7 +180,7 @@ void rf_CreateLargeWriteDAG( * Parameters: raidPtr - description of the physical array * asmap - logical & physical addresses for this access * bp - buffer ptr (holds write data) - * flags - general flags (e.g. disk locking) + * flags - general flags (e.g. disk locking) * allocList - list of memory allocated in DAG creation * nfaults - number of faults array can tolerate * (equal to # redundancy units in stripe) @@ -278,268 +188,267 @@ void rf_CreateLargeWriteDAG( * *****************************************************************************/ -void rf_CommonCreateLargeWriteDAG( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList, - int nfaults, - int (*redFunc)(RF_DagNode_t *), - int allowBufferRecycle) +void +rf_CommonCreateLargeWriteDAG( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList, + int nfaults, + int (*redFunc) (RF_DagNode_t *), + int allowBufferRecycle) { - RF_DagNode_t *nodes, *wndNodes, *rodNodes, *xorNode, *wnpNode; - RF_DagNode_t *wnqNode, *blockNode, *commitNode, *termNode; - int nWndNodes, nRodNodes, i, nodeNum, asmNum; - RF_AccessStripeMapHeader_t *new_asm_h[2]; - RF_StripeNum_t parityStripeID; - char *sosBuffer, *eosBuffer; - RF_ReconUnitNum_t which_ru; - RF_RaidLayout_t *layoutPtr; - RF_PhysDiskAddr_t *pda; - - layoutPtr = &(raidPtr->Layout); - parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, - &which_ru); - - if (rf_dagDebug) { - printf("[Creating large-write DAG]\n"); - } - dag_h->creator = "LargeWriteDAG"; - - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* alloc the nodes: Wnd, xor, commit, block, term, and Wnp */ - nWndNodes = asmap->numStripeUnitsAccessed; - RF_CallocAndAdd(nodes, nWndNodes + 4 + nfaults, sizeof(RF_DagNode_t), - (RF_DagNode_t *), allocList); - i = 0; - wndNodes = &nodes[i]; i += nWndNodes; - xorNode = &nodes[i]; i += 1; - wnpNode = &nodes[i]; i += 1; - blockNode = &nodes[i]; i += 1; - commitNode = &nodes[i]; i += 1; - termNode = &nodes[i]; i += 1; - if (nfaults == 2) { - wnqNode = &nodes[i]; i += 1; - } - else { - wnqNode = NULL; - } - rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h, new_asm_h, - &nRodNodes, &sosBuffer, &eosBuffer, allocList); - if (nRodNodes > 0) { - RF_CallocAndAdd(rodNodes, nRodNodes, sizeof(RF_DagNode_t), - (RF_DagNode_t *), allocList); - } - else { - rodNodes = NULL; - } - - /* begin node initialization */ - if (nRodNodes > 0) { - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, nRodNodes, 0, 0, 0, dag_h, "Nil", allocList); - } - else { - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); - } - - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, - nWndNodes + nfaults, 1, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, - 0, nWndNodes + nfaults, 0, 0, dag_h, "Trm", allocList); - - /* initialize the Rod nodes */ - for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) { - if (new_asm_h[asmNum]) { - pda = new_asm_h[asmNum]->stripeMap->physInfo; - while (pda) { - rf_InitNode(&rodNodes[nodeNum], rf_wait, RF_FALSE, rf_DiskReadFunc, - rf_DiskReadUndoFunc,rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, - "Rod", allocList); - rodNodes[nodeNum].params[0].p = pda; - rodNodes[nodeNum].params[1].p = pda->bufPtr; - rodNodes[nodeNum].params[2].v = parityStripeID; - rodNodes[nodeNum].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, 0, which_ru); - nodeNum++; - pda = pda->next; - } - } - } - RF_ASSERT(nodeNum == nRodNodes); - - /* initialize the wnd nodes */ - pda = asmap->physInfo; - for (i=0; i < nWndNodes; i++) { - rf_InitNode(&wndNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList); - RF_ASSERT(pda != NULL); - wndNodes[i].params[0].p = pda; - wndNodes[i].params[1].p = pda->bufPtr; - wndNodes[i].params[2].v = parityStripeID; - wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - pda = pda->next; - } - - /* initialize the redundancy node */ - if (nRodNodes > 0) { - rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1, - nRodNodes, 2 * (nWndNodes+nRodNodes) + 1, nfaults, dag_h, - "Xr ", allocList); - } - else { - rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1, - 1, 2 * (nWndNodes+nRodNodes) + 1, nfaults, dag_h, "Xr ", allocList); - } - xorNode->flags |= RF_DAGNODE_FLAG_YIELD; - for (i=0; i < nWndNodes; i++) { - xorNode->params[2*i+0] = wndNodes[i].params[0]; /* pda */ - xorNode->params[2*i+1] = wndNodes[i].params[1]; /* buf ptr */ - } - for (i=0; i < nRodNodes; i++) { - xorNode->params[2*(nWndNodes+i)+0] = rodNodes[i].params[0]; /* pda */ - xorNode->params[2*(nWndNodes+i)+1] = rodNodes[i].params[1]; /* buf ptr */ - } - /* xor node needs to get at RAID information */ - xorNode->params[2*(nWndNodes+nRodNodes)].p = raidPtr; - - /* - * Look for an Rod node that reads a complete SU. If none, alloc a buffer - * to receive the parity info. Note that we can't use a new data buffer - * because it will not have gotten written when the xor occurs. - */ - if (allowBufferRecycle) { - for (i = 0; i < nRodNodes; i++) { - if (((RF_PhysDiskAddr_t *)rodNodes[i].params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit) - break; - } - } - if ((!allowBufferRecycle) || (i == nRodNodes)) { - RF_CallocAndAdd(xorNode->results[0], 1, - rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), - (void *), allocList); - } - else { - xorNode->results[0] = rodNodes[i].params[1].p; - } - - /* initialize the Wnp node */ - rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnp", allocList); - wnpNode->params[0].p = asmap->parityInfo; - wnpNode->params[1].p = xorNode->results[0]; - wnpNode->params[2].v = parityStripeID; - wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - /* parityInfo must describe entire parity unit */ - RF_ASSERT(asmap->parityInfo->next == NULL); - - if (nfaults == 2) { - /* - * We never try to recycle a buffer for the Q calcuation - * in addition to the parity. This would cause two buffers - * to get smashed during the P and Q calculation, guaranteeing - * one would be wrong. - */ - RF_CallocAndAdd(xorNode->results[1], 1, - rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), - (void *),allocList); - rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnq", allocList); - wnqNode->params[0].p = asmap->qInfo; - wnqNode->params[1].p = xorNode->results[1]; - wnqNode->params[2].v = parityStripeID; - wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - /* parityInfo must describe entire parity unit */ - RF_ASSERT(asmap->parityInfo->next == NULL); - } - - /* - * Connect nodes to form graph. - */ - - /* connect dag header to block node */ - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - if (nRodNodes > 0) { - /* connect the block node to the Rod nodes */ - RF_ASSERT(blockNode->numSuccedents == nRodNodes); - RF_ASSERT(xorNode->numAntecedents == nRodNodes); - for (i = 0; i < nRodNodes; i++) { - RF_ASSERT(rodNodes[i].numAntecedents == 1); - blockNode->succedents[i] = &rodNodes[i]; - rodNodes[i].antecedents[0] = blockNode; - rodNodes[i].antType[0] = rf_control; - - /* connect the Rod nodes to the Xor node */ - RF_ASSERT(rodNodes[i].numSuccedents == 1); - rodNodes[i].succedents[0] = xorNode; - xorNode->antecedents[i] = &rodNodes[i]; - xorNode->antType[i] = rf_trueData; - } - } - else { - /* connect the block node to the Xor node */ - RF_ASSERT(blockNode->numSuccedents == 1); - RF_ASSERT(xorNode->numAntecedents == 1); - blockNode->succedents[0] = xorNode; - xorNode->antecedents[0] = blockNode; - xorNode->antType[0] = rf_control; - } - - /* connect the xor node to the commit node */ - RF_ASSERT(xorNode->numSuccedents == 1); - RF_ASSERT(commitNode->numAntecedents == 1); - xorNode->succedents[0] = commitNode; - commitNode->antecedents[0] = xorNode; - commitNode->antType[0] = rf_control; - - /* connect the commit node to the write nodes */ - RF_ASSERT(commitNode->numSuccedents == nWndNodes + nfaults); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNodes->numAntecedents == 1); - commitNode->succedents[i] = &wndNodes[i]; - wndNodes[i].antecedents[0] = commitNode; - wndNodes[i].antType[0] = rf_control; - } - RF_ASSERT(wnpNode->numAntecedents == 1); - commitNode->succedents[nWndNodes] = wnpNode; - wnpNode->antecedents[0]= commitNode; - wnpNode->antType[0] = rf_trueData; - if (nfaults == 2) { - RF_ASSERT(wnqNode->numAntecedents == 1); - commitNode->succedents[nWndNodes + 1] = wnqNode; - wnqNode->antecedents[0] = commitNode; - wnqNode->antType[0] = rf_trueData; - } - - /* connect the write nodes to the term node */ - RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults); - RF_ASSERT(termNode->numSuccedents == 0); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNodes->numSuccedents == 1); - wndNodes[i].succedents[0] = termNode; - termNode->antecedents[i] = &wndNodes[i]; - termNode->antType[i] = rf_control; - } - RF_ASSERT(wnpNode->numSuccedents == 1); - wnpNode->succedents[0] = termNode; - termNode->antecedents[nWndNodes] = wnpNode; - termNode->antType[nWndNodes] = rf_control; - if (nfaults == 2) { - RF_ASSERT(wnqNode->numSuccedents == 1); - wnqNode->succedents[0] = termNode; - termNode->antecedents[nWndNodes + 1] = wnqNode; - termNode->antType[nWndNodes + 1] = rf_control; - } -} + RF_DagNode_t *nodes, *wndNodes, *rodNodes, *xorNode, *wnpNode; + RF_DagNode_t *wnqNode, *blockNode, *commitNode, *termNode; + int nWndNodes, nRodNodes, i, nodeNum, asmNum; + RF_AccessStripeMapHeader_t *new_asm_h[2]; + RF_StripeNum_t parityStripeID; + char *sosBuffer, *eosBuffer; + RF_ReconUnitNum_t which_ru; + RF_RaidLayout_t *layoutPtr; + RF_PhysDiskAddr_t *pda; + + layoutPtr = &(raidPtr->Layout); + parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, + &which_ru); + + if (rf_dagDebug) { + printf("[Creating large-write DAG]\n"); + } + dag_h->creator = "LargeWriteDAG"; + + dag_h->numCommitNodes = 1; + dag_h->numCommits = 0; + dag_h->numSuccedents = 1; + + /* alloc the nodes: Wnd, xor, commit, block, term, and Wnp */ + nWndNodes = asmap->numStripeUnitsAccessed; + RF_CallocAndAdd(nodes, nWndNodes + 4 + nfaults, sizeof(RF_DagNode_t), + (RF_DagNode_t *), allocList); + i = 0; + wndNodes = &nodes[i]; + i += nWndNodes; + xorNode = &nodes[i]; + i += 1; + wnpNode = &nodes[i]; + i += 1; + blockNode = &nodes[i]; + i += 1; + commitNode = &nodes[i]; + i += 1; + termNode = &nodes[i]; + i += 1; + if (nfaults == 2) { + wnqNode = &nodes[i]; + i += 1; + } else { + wnqNode = NULL; + } + rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h, new_asm_h, + &nRodNodes, &sosBuffer, &eosBuffer, allocList); + if (nRodNodes > 0) { + RF_CallocAndAdd(rodNodes, nRodNodes, sizeof(RF_DagNode_t), + (RF_DagNode_t *), allocList); + } else { + rodNodes = NULL; + } + + /* begin node initialization */ + if (nRodNodes > 0) { + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, + NULL, nRodNodes, 0, 0, 0, dag_h, "Nil", allocList); + } else { + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, + NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); + } + rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, + nWndNodes + nfaults, 1, 0, 0, dag_h, "Cmt", allocList); + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, + 0, nWndNodes + nfaults, 0, 0, dag_h, "Trm", allocList); + + /* initialize the Rod nodes */ + for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) { + if (new_asm_h[asmNum]) { + pda = new_asm_h[asmNum]->stripeMap->physInfo; + while (pda) { + rf_InitNode(&rodNodes[nodeNum], rf_wait, RF_FALSE, rf_DiskReadFunc, + rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, + "Rod", allocList); + rodNodes[nodeNum].params[0].p = pda; + rodNodes[nodeNum].params[1].p = pda->bufPtr; + rodNodes[nodeNum].params[2].v = parityStripeID; + rodNodes[nodeNum].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + 0, 0, which_ru); + nodeNum++; + pda = pda->next; + } + } + } + RF_ASSERT(nodeNum == nRodNodes); + + /* initialize the wnd nodes */ + pda = asmap->physInfo; + for (i = 0; i < nWndNodes; i++) { + rf_InitNode(&wndNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList); + RF_ASSERT(pda != NULL); + wndNodes[i].params[0].p = pda; + wndNodes[i].params[1].p = pda->bufPtr; + wndNodes[i].params[2].v = parityStripeID; + wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + pda = pda->next; + } + + /* initialize the redundancy node */ + if (nRodNodes > 0) { + rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1, + nRodNodes, 2 * (nWndNodes + nRodNodes) + 1, nfaults, dag_h, + "Xr ", allocList); + } else { + rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1, + 1, 2 * (nWndNodes + nRodNodes) + 1, nfaults, dag_h, "Xr ", allocList); + } + xorNode->flags |= RF_DAGNODE_FLAG_YIELD; + for (i = 0; i < nWndNodes; i++) { + xorNode->params[2 * i + 0] = wndNodes[i].params[0]; /* pda */ + xorNode->params[2 * i + 1] = wndNodes[i].params[1]; /* buf ptr */ + } + for (i = 0; i < nRodNodes; i++) { + xorNode->params[2 * (nWndNodes + i) + 0] = rodNodes[i].params[0]; /* pda */ + xorNode->params[2 * (nWndNodes + i) + 1] = rodNodes[i].params[1]; /* buf ptr */ + } + /* xor node needs to get at RAID information */ + xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr; + + /* + * Look for an Rod node that reads a complete SU. If none, alloc a buffer + * to receive the parity info. Note that we can't use a new data buffer + * because it will not have gotten written when the xor occurs. + */ + if (allowBufferRecycle) { + for (i = 0; i < nRodNodes; i++) { + if (((RF_PhysDiskAddr_t *) rodNodes[i].params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit) + break; + } + } + if ((!allowBufferRecycle) || (i == nRodNodes)) { + RF_CallocAndAdd(xorNode->results[0], 1, + rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), + (void *), allocList); + } else { + xorNode->results[0] = rodNodes[i].params[1].p; + } + + /* initialize the Wnp node */ + rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnp", allocList); + wnpNode->params[0].p = asmap->parityInfo; + wnpNode->params[1].p = xorNode->results[0]; + wnpNode->params[2].v = parityStripeID; + wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + /* parityInfo must describe entire parity unit */ + RF_ASSERT(asmap->parityInfo->next == NULL); + + if (nfaults == 2) { + /* + * We never try to recycle a buffer for the Q calcuation + * in addition to the parity. This would cause two buffers + * to get smashed during the P and Q calculation, guaranteeing + * one would be wrong. + */ + RF_CallocAndAdd(xorNode->results[1], 1, + rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), + (void *), allocList); + rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnq", allocList); + wnqNode->params[0].p = asmap->qInfo; + wnqNode->params[1].p = xorNode->results[1]; + wnqNode->params[2].v = parityStripeID; + wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + /* parityInfo must describe entire parity unit */ + RF_ASSERT(asmap->parityInfo->next == NULL); + } + /* + * Connect nodes to form graph. + */ + + /* connect dag header to block node */ + RF_ASSERT(blockNode->numAntecedents == 0); + dag_h->succedents[0] = blockNode; + + if (nRodNodes > 0) { + /* connect the block node to the Rod nodes */ + RF_ASSERT(blockNode->numSuccedents == nRodNodes); + RF_ASSERT(xorNode->numAntecedents == nRodNodes); + for (i = 0; i < nRodNodes; i++) { + RF_ASSERT(rodNodes[i].numAntecedents == 1); + blockNode->succedents[i] = &rodNodes[i]; + rodNodes[i].antecedents[0] = blockNode; + rodNodes[i].antType[0] = rf_control; + + /* connect the Rod nodes to the Xor node */ + RF_ASSERT(rodNodes[i].numSuccedents == 1); + rodNodes[i].succedents[0] = xorNode; + xorNode->antecedents[i] = &rodNodes[i]; + xorNode->antType[i] = rf_trueData; + } + } else { + /* connect the block node to the Xor node */ + RF_ASSERT(blockNode->numSuccedents == 1); + RF_ASSERT(xorNode->numAntecedents == 1); + blockNode->succedents[0] = xorNode; + xorNode->antecedents[0] = blockNode; + xorNode->antType[0] = rf_control; + } + + /* connect the xor node to the commit node */ + RF_ASSERT(xorNode->numSuccedents == 1); + RF_ASSERT(commitNode->numAntecedents == 1); + xorNode->succedents[0] = commitNode; + commitNode->antecedents[0] = xorNode; + commitNode->antType[0] = rf_control; + + /* connect the commit node to the write nodes */ + RF_ASSERT(commitNode->numSuccedents == nWndNodes + nfaults); + for (i = 0; i < nWndNodes; i++) { + RF_ASSERT(wndNodes->numAntecedents == 1); + commitNode->succedents[i] = &wndNodes[i]; + wndNodes[i].antecedents[0] = commitNode; + wndNodes[i].antType[0] = rf_control; + } + RF_ASSERT(wnpNode->numAntecedents == 1); + commitNode->succedents[nWndNodes] = wnpNode; + wnpNode->antecedents[0] = commitNode; + wnpNode->antType[0] = rf_trueData; + if (nfaults == 2) { + RF_ASSERT(wnqNode->numAntecedents == 1); + commitNode->succedents[nWndNodes + 1] = wnqNode; + wnqNode->antecedents[0] = commitNode; + wnqNode->antType[0] = rf_trueData; + } + /* connect the write nodes to the term node */ + RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults); + RF_ASSERT(termNode->numSuccedents == 0); + for (i = 0; i < nWndNodes; i++) { + RF_ASSERT(wndNodes->numSuccedents == 1); + wndNodes[i].succedents[0] = termNode; + termNode->antecedents[i] = &wndNodes[i]; + termNode->antType[i] = rf_control; + } + RF_ASSERT(wnpNode->numSuccedents == 1); + wnpNode->succedents[0] = termNode; + termNode->antecedents[nWndNodes] = wnpNode; + termNode->antType[nWndNodes] = rf_control; + if (nfaults == 2) { + RF_ASSERT(wnqNode->numSuccedents == 1); + wnqNode->succedents[0] = termNode; + termNode->antecedents[nWndNodes + 1] = wnqNode; + termNode->antType[nWndNodes + 1] = rf_control; + } +} /****************************************************************************** * * creates a DAG to perform a small-write operation (either raid 5 or pq), @@ -565,7 +474,7 @@ void rf_CommonCreateLargeWriteDAG( * Parameters: raidPtr - description of the physical array * asmap - logical & physical addresses for this access * bp - buffer ptr (holds write data) - * flags - general flags (e.g. disk locking) + * flags - general flags (e.g. disk locking) * allocList - list of memory allocated in DAG creation * pfuncs - list of parity generating functions * qfuncs - list of q generating functions @@ -573,584 +482,587 @@ void rf_CommonCreateLargeWriteDAG( * A null qfuncs indicates single fault tolerant *****************************************************************************/ -void rf_CommonCreateSmallWriteDAG( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList, - RF_RedFuncs_t *pfuncs, - RF_RedFuncs_t *qfuncs) +void +rf_CommonCreateSmallWriteDAG( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList, + RF_RedFuncs_t * pfuncs, + RF_RedFuncs_t * qfuncs) { - RF_DagNode_t *readDataNodes, *readParityNodes, *readQNodes, *termNode; - RF_DagNode_t *unlockDataNodes, *unlockParityNodes, *unlockQNodes; - RF_DagNode_t *xorNodes, *qNodes, *blockNode, *commitNode, *nodes; - RF_DagNode_t *writeDataNodes, *writeParityNodes, *writeQNodes; - int i, j, nNodes, totalNumNodes, lu_flag; - RF_ReconUnitNum_t which_ru; - int (*func)(RF_DagNode_t *), (*undoFunc)(RF_DagNode_t *); - int (*qfunc)(RF_DagNode_t *); - int numDataNodes, numParityNodes; - RF_StripeNum_t parityStripeID; - RF_PhysDiskAddr_t *pda; - char *name, *qname; - long nfaults; - - nfaults = qfuncs ? 2 : 1; - lu_flag = (rf_enableAtomicRMW) ? 1 : 0; /* lock/unlock flag */ - - parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), - asmap->raidAddress, &which_ru); - pda = asmap->physInfo; - numDataNodes = asmap->numStripeUnitsAccessed; - numParityNodes = (asmap->parityInfo->next) ? 2 : 1; - - if (rf_dagDebug) { - printf("[Creating small-write DAG]\n"); - } - RF_ASSERT(numDataNodes > 0); - dag_h->creator = "SmallWriteDAG"; - - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* - * DAG creation occurs in four steps: - * 1. count the number of nodes in the DAG - * 2. create the nodes - * 3. initialize the nodes - * 4. connect the nodes - */ - - /* - * Step 1. compute number of nodes in the graph - */ - - /* number of nodes: - * a read and write for each data unit - * a redundancy computation node for each parity node (nfaults * nparity) - * a read and write for each parity unit - * a block and commit node (2) - * a terminate node - * if atomic RMW - * an unlock node for each data unit, redundancy unit - */ - totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes) - + (nfaults * 2 * numParityNodes) + 3; - if (lu_flag) { - totalNumNodes += (numDataNodes + (nfaults * numParityNodes)); - } - - /* - * Step 2. create the nodes - */ - RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), - (RF_DagNode_t *), allocList); - i = 0; - blockNode = &nodes[i]; i += 1; - commitNode = &nodes[i]; i += 1; - readDataNodes = &nodes[i]; i += numDataNodes; - readParityNodes = &nodes[i]; i += numParityNodes; - writeDataNodes = &nodes[i]; i += numDataNodes; - writeParityNodes = &nodes[i]; i += numParityNodes; - xorNodes = &nodes[i]; i += numParityNodes; - termNode = &nodes[i]; i += 1; - if (lu_flag) { - unlockDataNodes = &nodes[i]; i += numDataNodes; - unlockParityNodes = &nodes[i]; i += numParityNodes; - } - else { - unlockDataNodes = unlockParityNodes = NULL; - } - if (nfaults == 2) { - readQNodes = &nodes[i]; i += numParityNodes; - writeQNodes = &nodes[i]; i += numParityNodes; - qNodes = &nodes[i]; i += numParityNodes; - if (lu_flag) { - unlockQNodes = &nodes[i]; i += numParityNodes; - } - else { - unlockQNodes = NULL; - } - } - else { - readQNodes = writeQNodes = qNodes = unlockQNodes = NULL; - } - RF_ASSERT(i == totalNumNodes); - - /* - * Step 3. initialize the nodes - */ - /* initialize block node (Nil) */ - nNodes = numDataNodes + (nfaults * numParityNodes); - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, nNodes, 0, 0, 0, dag_h, "Nil", allocList); - - /* initialize commit node (Cmt) */ - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, nNodes, (nfaults * numParityNodes), 0, 0, dag_h, "Cmt", allocList); - - /* initialize terminate node (Trm) */ - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, nNodes, 0, 0, dag_h, "Trm", allocList); - - /* initialize nodes which read old data (Rod) */ - for (i = 0; i < numDataNodes; i++) { - rf_InitNode(&readDataNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, (nfaults * numParityNodes), 1, 4, 0, dag_h, - "Rod", allocList); - RF_ASSERT(pda != NULL); - /* physical disk addr desc */ - readDataNodes[i].params[0].p = pda; - /* buffer to hold old data */ - readDataNodes[i].params[1].p = rf_AllocBuffer(raidPtr, - dag_h, pda, allocList); - readDataNodes[i].params[2].v = parityStripeID; - readDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - lu_flag, 0, which_ru); - pda = pda->next; - for (j = 0; j < readDataNodes[i].numSuccedents; j++) { - readDataNodes[i].propList[j] = NULL; - } - } - - /* initialize nodes which read old parity (Rop) */ - pda = asmap->parityInfo; i = 0; - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(pda != NULL); - rf_InitNode(&readParityNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, - rf_DiskReadUndoFunc, rf_GenericWakeupFunc, numParityNodes, 1, 4, - 0, dag_h, "Rop", allocList); - readParityNodes[i].params[0].p = pda; - /* buffer to hold old parity */ - readParityNodes[i].params[1].p = rf_AllocBuffer(raidPtr, - dag_h, pda, allocList); - readParityNodes[i].params[2].v = parityStripeID; - readParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - lu_flag, 0, which_ru); - pda = pda->next; - for (j = 0; j < readParityNodes[i].numSuccedents; j++) { - readParityNodes[i].propList[0] = NULL; - } - } - - /* initialize nodes which read old Q (Roq) */ - if (nfaults == 2) { - pda = asmap->qInfo; - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(pda != NULL); - rf_InitNode(&readQNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, numParityNodes, 1, 4, 0, dag_h, "Roq", allocList); - readQNodes[i].params[0].p = pda; - /* buffer to hold old Q */ - readQNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, - allocList); - readQNodes[i].params[2].v = parityStripeID; - readQNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - lu_flag, 0, which_ru); - pda = pda->next; - for (j = 0; j < readQNodes[i].numSuccedents; j++) { - readQNodes[i].propList[0] = NULL; - } - } - } - - /* initialize nodes which write new data (Wnd) */ - pda = asmap->physInfo; - for (i=0; i < numDataNodes; i++) { - RF_ASSERT(pda != NULL); - rf_InitNode(&writeDataNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, - rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, - "Wnd", allocList); - /* physical disk addr desc */ - writeDataNodes[i].params[0].p = pda; - /* buffer holding new data to be written */ - writeDataNodes[i].params[1].p = pda->bufPtr; - writeDataNodes[i].params[2].v = parityStripeID; - writeDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, 0, which_ru); - if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockDataNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, - rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, - "Und", allocList); - /* physical disk addr desc */ - unlockDataNodes[i].params[0].p = pda; - unlockDataNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, lu_flag, which_ru); - } - pda = pda->next; - } - - /* - * Initialize nodes which compute new parity and Q. - */ - /* - * We use the simple XOR func in the double-XOR case, and when - * we're accessing only a portion of one stripe unit. The distinction - * between the two is that the regular XOR func assumes that the targbuf - * is a full SU in size, and examines the pda associated with the buffer - * to decide where within the buffer to XOR the data, whereas - * the simple XOR func just XORs the data into the start of the buffer. - */ - if ((numParityNodes==2) || ((numDataNodes == 1) - && (asmap->totalSectorsAccessed < raidPtr->Layout.sectorsPerStripeUnit))) - { - func = pfuncs->simple; undoFunc = rf_NullNodeUndoFunc; name = pfuncs->SimpleName; - if (qfuncs) { - qfunc = qfuncs->simple; - qname = qfuncs->SimpleName; - } - else { - qfunc = NULL; - qname = NULL; - } - } - else { - func = pfuncs->regular; - undoFunc = rf_NullNodeUndoFunc; - name = pfuncs->RegularName; - if (qfuncs) { - qfunc = qfuncs->regular; - qname = qfuncs->RegularName; - } - else { - qfunc = NULL; - qname = NULL; - } - } - /* - * Initialize the xor nodes: params are {pda,buf} - * from {Rod,Wnd,Rop} nodes, and raidPtr - */ - if (numParityNodes==2) { - /* double-xor case */ - for (i=0; i < numParityNodes; i++) { - /* note: no wakeup func for xor */ - rf_InitNode(&xorNodes[i], rf_wait, RF_FALSE, func, undoFunc, NULL, - 1, (numDataNodes + numParityNodes), 7, 1, dag_h, name, allocList); - xorNodes[i].flags |= RF_DAGNODE_FLAG_YIELD; - xorNodes[i].params[0] = readDataNodes[i].params[0]; - xorNodes[i].params[1] = readDataNodes[i].params[1]; - xorNodes[i].params[2] = readParityNodes[i].params[0]; - xorNodes[i].params[3] = readParityNodes[i].params[1]; - xorNodes[i].params[4] = writeDataNodes[i].params[0]; - xorNodes[i].params[5] = writeDataNodes[i].params[1]; - xorNodes[i].params[6].p = raidPtr; - /* use old parity buf as target buf */ - xorNodes[i].results[0] = readParityNodes[i].params[1].p; - if (nfaults == 2) { - /* note: no wakeup func for qor */ - rf_InitNode(&qNodes[i], rf_wait, RF_FALSE, qfunc, undoFunc, NULL, 1, - (numDataNodes + numParityNodes), 7, 1, dag_h, qname, allocList); - qNodes[i].params[0] = readDataNodes[i].params[0]; - qNodes[i].params[1] = readDataNodes[i].params[1]; - qNodes[i].params[2] = readQNodes[i].params[0]; - qNodes[i].params[3] = readQNodes[i].params[1]; - qNodes[i].params[4] = writeDataNodes[i].params[0]; - qNodes[i].params[5] = writeDataNodes[i].params[1]; - qNodes[i].params[6].p = raidPtr; - /* use old Q buf as target buf */ - qNodes[i].results[0] = readQNodes[i].params[1].p; - } - } - } - else { - /* there is only one xor node in this case */ - rf_InitNode(&xorNodes[0], rf_wait, RF_FALSE, func, undoFunc, NULL, 1, - (numDataNodes + numParityNodes), - (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, name, allocList); - xorNodes[0].flags |= RF_DAGNODE_FLAG_YIELD; - for (i=0; i < numDataNodes + 1; i++) { - /* set up params related to Rod and Rop nodes */ - xorNodes[0].params[2*i+0] = readDataNodes[i].params[0]; /* pda */ - xorNodes[0].params[2*i+1] = readDataNodes[i].params[1]; /* buffer ptr */ - } - for (i=0; i < numDataNodes; i++) { - /* set up params related to Wnd and Wnp nodes */ - xorNodes[0].params[2*(numDataNodes+1+i)+0] = /* pda */ - writeDataNodes[i].params[0]; - xorNodes[0].params[2*(numDataNodes+1+i)+1] = /* buffer ptr */ - writeDataNodes[i].params[1]; - } - /* xor node needs to get at RAID information */ - xorNodes[0].params[2*(numDataNodes+numDataNodes+1)].p = raidPtr; - xorNodes[0].results[0] = readParityNodes[0].params[1].p; - if (nfaults == 2) { - rf_InitNode(&qNodes[0], rf_wait, RF_FALSE, qfunc, undoFunc, NULL, 1, - (numDataNodes + numParityNodes), - (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, - qname, allocList); - for (i=0; i<numDataNodes; i++) { - /* set up params related to Rod */ - qNodes[0].params[2*i+0] = readDataNodes[i].params[0]; /* pda */ - qNodes[0].params[2*i+1] = readDataNodes[i].params[1]; /* buffer ptr */ - } - /* and read old q */ - qNodes[0].params[2*numDataNodes + 0] = /* pda */ - readQNodes[0].params[0]; - qNodes[0].params[2*numDataNodes + 1] = /* buffer ptr */ - readQNodes[0].params[1]; - for (i=0; i < numDataNodes; i++) { - /* set up params related to Wnd nodes */ - qNodes[0].params[2*(numDataNodes+1+i)+0] = /* pda */ - writeDataNodes[i].params[0]; - qNodes[0].params[2*(numDataNodes+1+i)+1] = /* buffer ptr */ - writeDataNodes[i].params[1]; - } - /* xor node needs to get at RAID information */ - qNodes[0].params[2*(numDataNodes+numDataNodes+1)].p = raidPtr; - qNodes[0].results[0] = readQNodes[0].params[1].p; - } - } - - /* initialize nodes which write new parity (Wnp) */ - pda = asmap->parityInfo; - for (i=0; i < numParityNodes; i++) { - rf_InitNode(&writeParityNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, - rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, - "Wnp", allocList); - RF_ASSERT(pda != NULL); - writeParityNodes[i].params[0].p = pda; /* param 1 (bufPtr) filled in by xor node */ - writeParityNodes[i].params[1].p = xorNodes[i].results[0]; /* buffer pointer for parity write operation */ - writeParityNodes[i].params[2].v = parityStripeID; - writeParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, 0, which_ru); - if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockParityNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, - rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, - "Unp", allocList); - unlockParityNodes[i].params[0].p = pda; /* physical disk addr desc */ - unlockParityNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, lu_flag, which_ru); - } - pda = pda->next; - } - - /* initialize nodes which write new Q (Wnq) */ - if (nfaults == 2) { - pda = asmap->qInfo; - for (i=0; i < numParityNodes; i++) { - rf_InitNode(&writeQNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, - rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, - "Wnq", allocList); - RF_ASSERT(pda != NULL); - writeQNodes[i].params[0].p = pda; /* param 1 (bufPtr) filled in by xor node */ - writeQNodes[i].params[1].p = qNodes[i].results[0]; /* buffer pointer for parity write operation */ - writeQNodes[i].params[2].v = parityStripeID; - writeQNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, 0, which_ru); - if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockQNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, - rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, - "Unq", allocList); - unlockQNodes[i].params[0].p = pda; /* physical disk addr desc */ - unlockQNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, lu_flag, which_ru); - } - pda = pda->next; - } - } - - /* - * Step 4. connect the nodes. - */ - - /* connect header to block node */ - dag_h->succedents[0] = blockNode; - - /* connect block node to read old data nodes */ - RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults))); - for (i = 0; i < numDataNodes; i++) { - blockNode->succedents[i] = &readDataNodes[i]; - RF_ASSERT(readDataNodes[i].numAntecedents == 1); - readDataNodes[i].antecedents[0]= blockNode; - readDataNodes[i].antType[0] = rf_control; - } - - /* connect block node to read old parity nodes */ - for (i = 0; i < numParityNodes; i++) { - blockNode->succedents[numDataNodes + i] = &readParityNodes[i]; - RF_ASSERT(readParityNodes[i].numAntecedents == 1); - readParityNodes[i].antecedents[0] = blockNode; - readParityNodes[i].antType[0] = rf_control; - } - - /* connect block node to read old Q nodes */ - if (nfaults == 2) { - for (i = 0; i < numParityNodes; i++) { - blockNode->succedents[numDataNodes + numParityNodes + i] = &readQNodes[i]; - RF_ASSERT(readQNodes[i].numAntecedents == 1); - readQNodes[i].antecedents[0] = blockNode; - readQNodes[i].antType[0] = rf_control; - } - } - - /* connect read old data nodes to xor nodes */ - for (i = 0; i < numDataNodes; i++) { - RF_ASSERT(readDataNodes[i].numSuccedents == (nfaults * numParityNodes)); - for (j = 0; j < numParityNodes; j++){ - RF_ASSERT(xorNodes[j].numAntecedents == numDataNodes + numParityNodes); - readDataNodes[i].succedents[j] = &xorNodes[j]; - xorNodes[j].antecedents[i] = &readDataNodes[i]; - xorNodes[j].antType[i] = rf_trueData; - } - } - - /* connect read old data nodes to q nodes */ - if (nfaults == 2) { - for (i = 0; i < numDataNodes; i++) { - for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(qNodes[j].numAntecedents == numDataNodes + numParityNodes); - readDataNodes[i].succedents[numParityNodes + j] = &qNodes[j]; - qNodes[j].antecedents[i] = &readDataNodes[i]; - qNodes[j].antType[i] = rf_trueData; - } - } - } - - /* connect read old parity nodes to xor nodes */ - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(readParityNodes[i].numSuccedents == numParityNodes); - for (j = 0; j < numParityNodes; j++) { - readParityNodes[i].succedents[j] = &xorNodes[j]; - xorNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i]; - xorNodes[j].antType[numDataNodes + i] = rf_trueData; - } - } - - /* connect read old q nodes to q nodes */ - if (nfaults == 2) { - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(readParityNodes[i].numSuccedents == numParityNodes); - for (j = 0; j < numParityNodes; j++) { - readQNodes[i].succedents[j] = &qNodes[j]; - qNodes[j].antecedents[numDataNodes + i] = &readQNodes[i]; - qNodes[j].antType[numDataNodes + i] = rf_trueData; - } - } - } - - /* connect xor nodes to commit node */ - RF_ASSERT(commitNode->numAntecedents == (nfaults * numParityNodes)); - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(xorNodes[i].numSuccedents == 1); - xorNodes[i].succedents[0] = commitNode; - commitNode->antecedents[i] = &xorNodes[i]; - commitNode->antType[i] = rf_control; - } - - /* connect q nodes to commit node */ - if (nfaults == 2) { - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(qNodes[i].numSuccedents == 1); - qNodes[i].succedents[0] = commitNode; - commitNode->antecedents[i + numParityNodes] = &qNodes[i]; - commitNode->antType[i + numParityNodes] = rf_control; - } - } - - /* connect commit node to write nodes */ - RF_ASSERT(commitNode->numSuccedents == (numDataNodes + (nfaults * numParityNodes))); - for (i = 0; i < numDataNodes; i++) { - RF_ASSERT(writeDataNodes[i].numAntecedents == 1); - commitNode->succedents[i] = &writeDataNodes[i]; - writeDataNodes[i].antecedents[0] = commitNode; - writeDataNodes[i].antType[0] = rf_trueData; - } - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(writeParityNodes[i].numAntecedents == 1); - commitNode->succedents[i + numDataNodes] = &writeParityNodes[i]; - writeParityNodes[i].antecedents[0] = commitNode; - writeParityNodes[i].antType[0] = rf_trueData; - } - if (nfaults == 2) { - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(writeQNodes[i].numAntecedents == 1); - commitNode->succedents[i + numDataNodes + numParityNodes] = &writeQNodes[i]; - writeQNodes[i].antecedents[0] = commitNode; - writeQNodes[i].antType[0] = rf_trueData; - } - } - - RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); - RF_ASSERT(termNode->numSuccedents == 0); - for (i = 0; i < numDataNodes; i++) { - if (lu_flag) { - /* connect write new data nodes to unlock nodes */ - RF_ASSERT(writeDataNodes[i].numSuccedents == 1); - RF_ASSERT(unlockDataNodes[i].numAntecedents == 1); - writeDataNodes[i].succedents[0] = &unlockDataNodes[i]; - unlockDataNodes[i].antecedents[0] = &writeDataNodes[i]; - unlockDataNodes[i].antType[0] = rf_control; - - /* connect unlock nodes to term node */ - RF_ASSERT(unlockDataNodes[i].numSuccedents == 1); - unlockDataNodes[i].succedents[0] = termNode; - termNode->antecedents[i] = &unlockDataNodes[i]; - termNode->antType[i] = rf_control; - } - else { - /* connect write new data nodes to term node */ - RF_ASSERT(writeDataNodes[i].numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); - writeDataNodes[i].succedents[0] = termNode; - termNode->antecedents[i] = &writeDataNodes[i]; - termNode->antType[i] = rf_control; - } - } - - for (i = 0; i < numParityNodes; i++) { - if (lu_flag) { - /* connect write new parity nodes to unlock nodes */ - RF_ASSERT(writeParityNodes[i].numSuccedents == 1); - RF_ASSERT(unlockParityNodes[i].numAntecedents == 1); - writeParityNodes[i].succedents[0] = &unlockParityNodes[i]; - unlockParityNodes[i].antecedents[0] = &writeParityNodes[i]; - unlockParityNodes[i].antType[0] = rf_control; - - /* connect unlock nodes to term node */ - RF_ASSERT(unlockParityNodes[i].numSuccedents == 1); - unlockParityNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + i] = &unlockParityNodes[i]; - termNode->antType[numDataNodes + i] = rf_control; - } - else { - RF_ASSERT(writeParityNodes[i].numSuccedents == 1); - writeParityNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + i] = &writeParityNodes[i]; - termNode->antType[numDataNodes + i] = rf_control; - } - } - - if (nfaults == 2) { - for (i = 0; i < numParityNodes; i++) { - if (lu_flag) { - /* connect write new Q nodes to unlock nodes */ - RF_ASSERT(writeQNodes[i].numSuccedents == 1); - RF_ASSERT(unlockQNodes[i].numAntecedents == 1); - writeQNodes[i].succedents[0] = &unlockQNodes[i]; - unlockQNodes[i].antecedents[0] = &writeQNodes[i]; - unlockQNodes[i].antType[0] = rf_control; - - /* connect unlock nodes to unblock node */ - RF_ASSERT(unlockQNodes[i].numSuccedents == 1); - unlockQNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + numParityNodes + i] = &unlockQNodes[i]; - termNode->antType[numDataNodes + numParityNodes + i] = rf_control; - } - else { - RF_ASSERT(writeQNodes[i].numSuccedents == 1); - writeQNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + numParityNodes + i] = &writeQNodes[i]; - termNode->antType[numDataNodes + numParityNodes + i] = rf_control; - } - } - } + RF_DagNode_t *readDataNodes, *readParityNodes, *readQNodes, *termNode; + RF_DagNode_t *unlockDataNodes, *unlockParityNodes, *unlockQNodes; + RF_DagNode_t *xorNodes, *qNodes, *blockNode, *commitNode, *nodes; + RF_DagNode_t *writeDataNodes, *writeParityNodes, *writeQNodes; + int i, j, nNodes, totalNumNodes, lu_flag; + RF_ReconUnitNum_t which_ru; + int (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *); + int (*qfunc) (RF_DagNode_t *); + int numDataNodes, numParityNodes; + RF_StripeNum_t parityStripeID; + RF_PhysDiskAddr_t *pda; + char *name, *qname; + long nfaults; + + nfaults = qfuncs ? 2 : 1; + lu_flag = (rf_enableAtomicRMW) ? 1 : 0; /* lock/unlock flag */ + + parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), + asmap->raidAddress, &which_ru); + pda = asmap->physInfo; + numDataNodes = asmap->numStripeUnitsAccessed; + numParityNodes = (asmap->parityInfo->next) ? 2 : 1; + + if (rf_dagDebug) { + printf("[Creating small-write DAG]\n"); + } + RF_ASSERT(numDataNodes > 0); + dag_h->creator = "SmallWriteDAG"; + + dag_h->numCommitNodes = 1; + dag_h->numCommits = 0; + dag_h->numSuccedents = 1; + + /* + * DAG creation occurs in four steps: + * 1. count the number of nodes in the DAG + * 2. create the nodes + * 3. initialize the nodes + * 4. connect the nodes + */ + + /* + * Step 1. compute number of nodes in the graph + */ + + /* number of nodes: a read and write for each data unit a redundancy + * computation node for each parity node (nfaults * nparity) a read + * and write for each parity unit a block and commit node (2) a + * terminate node if atomic RMW an unlock node for each data unit, + * redundancy unit */ + totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes) + + (nfaults * 2 * numParityNodes) + 3; + if (lu_flag) { + totalNumNodes += (numDataNodes + (nfaults * numParityNodes)); + } + /* + * Step 2. create the nodes + */ + RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), + (RF_DagNode_t *), allocList); + i = 0; + blockNode = &nodes[i]; + i += 1; + commitNode = &nodes[i]; + i += 1; + readDataNodes = &nodes[i]; + i += numDataNodes; + readParityNodes = &nodes[i]; + i += numParityNodes; + writeDataNodes = &nodes[i]; + i += numDataNodes; + writeParityNodes = &nodes[i]; + i += numParityNodes; + xorNodes = &nodes[i]; + i += numParityNodes; + termNode = &nodes[i]; + i += 1; + if (lu_flag) { + unlockDataNodes = &nodes[i]; + i += numDataNodes; + unlockParityNodes = &nodes[i]; + i += numParityNodes; + } else { + unlockDataNodes = unlockParityNodes = NULL; + } + if (nfaults == 2) { + readQNodes = &nodes[i]; + i += numParityNodes; + writeQNodes = &nodes[i]; + i += numParityNodes; + qNodes = &nodes[i]; + i += numParityNodes; + if (lu_flag) { + unlockQNodes = &nodes[i]; + i += numParityNodes; + } else { + unlockQNodes = NULL; + } + } else { + readQNodes = writeQNodes = qNodes = unlockQNodes = NULL; + } + RF_ASSERT(i == totalNumNodes); + + /* + * Step 3. initialize the nodes + */ + /* initialize block node (Nil) */ + nNodes = numDataNodes + (nfaults * numParityNodes); + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, + NULL, nNodes, 0, 0, 0, dag_h, "Nil", allocList); + + /* initialize commit node (Cmt) */ + rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, + NULL, nNodes, (nfaults * numParityNodes), 0, 0, dag_h, "Cmt", allocList); + + /* initialize terminate node (Trm) */ + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, + NULL, 0, nNodes, 0, 0, dag_h, "Trm", allocList); + + /* initialize nodes which read old data (Rod) */ + for (i = 0; i < numDataNodes; i++) { + rf_InitNode(&readDataNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, + rf_GenericWakeupFunc, (nfaults * numParityNodes), 1, 4, 0, dag_h, + "Rod", allocList); + RF_ASSERT(pda != NULL); + /* physical disk addr desc */ + readDataNodes[i].params[0].p = pda; + /* buffer to hold old data */ + readDataNodes[i].params[1].p = rf_AllocBuffer(raidPtr, + dag_h, pda, allocList); + readDataNodes[i].params[2].v = parityStripeID; + readDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + lu_flag, 0, which_ru); + pda = pda->next; + for (j = 0; j < readDataNodes[i].numSuccedents; j++) { + readDataNodes[i].propList[j] = NULL; + } + } + + /* initialize nodes which read old parity (Rop) */ + pda = asmap->parityInfo; + i = 0; + for (i = 0; i < numParityNodes; i++) { + RF_ASSERT(pda != NULL); + rf_InitNode(&readParityNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, + rf_DiskReadUndoFunc, rf_GenericWakeupFunc, numParityNodes, 1, 4, + 0, dag_h, "Rop", allocList); + readParityNodes[i].params[0].p = pda; + /* buffer to hold old parity */ + readParityNodes[i].params[1].p = rf_AllocBuffer(raidPtr, + dag_h, pda, allocList); + readParityNodes[i].params[2].v = parityStripeID; + readParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + lu_flag, 0, which_ru); + pda = pda->next; + for (j = 0; j < readParityNodes[i].numSuccedents; j++) { + readParityNodes[i].propList[0] = NULL; + } + } + + /* initialize nodes which read old Q (Roq) */ + if (nfaults == 2) { + pda = asmap->qInfo; + for (i = 0; i < numParityNodes; i++) { + RF_ASSERT(pda != NULL); + rf_InitNode(&readQNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, + rf_GenericWakeupFunc, numParityNodes, 1, 4, 0, dag_h, "Roq", allocList); + readQNodes[i].params[0].p = pda; + /* buffer to hold old Q */ + readQNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, + allocList); + readQNodes[i].params[2].v = parityStripeID; + readQNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + lu_flag, 0, which_ru); + pda = pda->next; + for (j = 0; j < readQNodes[i].numSuccedents; j++) { + readQNodes[i].propList[0] = NULL; + } + } + } + /* initialize nodes which write new data (Wnd) */ + pda = asmap->physInfo; + for (i = 0; i < numDataNodes; i++) { + RF_ASSERT(pda != NULL); + rf_InitNode(&writeDataNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, + rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, + "Wnd", allocList); + /* physical disk addr desc */ + writeDataNodes[i].params[0].p = pda; + /* buffer holding new data to be written */ + writeDataNodes[i].params[1].p = pda->bufPtr; + writeDataNodes[i].params[2].v = parityStripeID; + writeDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + 0, 0, which_ru); + if (lu_flag) { + /* initialize node to unlock the disk queue */ + rf_InitNode(&unlockDataNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, + rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, + "Und", allocList); + /* physical disk addr desc */ + unlockDataNodes[i].params[0].p = pda; + unlockDataNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + 0, lu_flag, which_ru); + } + pda = pda->next; + } + + /* + * Initialize nodes which compute new parity and Q. + */ + /* + * We use the simple XOR func in the double-XOR case, and when + * we're accessing only a portion of one stripe unit. The distinction + * between the two is that the regular XOR func assumes that the targbuf + * is a full SU in size, and examines the pda associated with the buffer + * to decide where within the buffer to XOR the data, whereas + * the simple XOR func just XORs the data into the start of the buffer. + */ + if ((numParityNodes == 2) || ((numDataNodes == 1) + && (asmap->totalSectorsAccessed < raidPtr->Layout.sectorsPerStripeUnit))) { + func = pfuncs->simple; + undoFunc = rf_NullNodeUndoFunc; + name = pfuncs->SimpleName; + if (qfuncs) { + qfunc = qfuncs->simple; + qname = qfuncs->SimpleName; + } else { + qfunc = NULL; + qname = NULL; + } + } else { + func = pfuncs->regular; + undoFunc = rf_NullNodeUndoFunc; + name = pfuncs->RegularName; + if (qfuncs) { + qfunc = qfuncs->regular; + qname = qfuncs->RegularName; + } else { + qfunc = NULL; + qname = NULL; + } + } + /* + * Initialize the xor nodes: params are {pda,buf} + * from {Rod,Wnd,Rop} nodes, and raidPtr + */ + if (numParityNodes == 2) { + /* double-xor case */ + for (i = 0; i < numParityNodes; i++) { + /* note: no wakeup func for xor */ + rf_InitNode(&xorNodes[i], rf_wait, RF_FALSE, func, undoFunc, NULL, + 1, (numDataNodes + numParityNodes), 7, 1, dag_h, name, allocList); + xorNodes[i].flags |= RF_DAGNODE_FLAG_YIELD; + xorNodes[i].params[0] = readDataNodes[i].params[0]; + xorNodes[i].params[1] = readDataNodes[i].params[1]; + xorNodes[i].params[2] = readParityNodes[i].params[0]; + xorNodes[i].params[3] = readParityNodes[i].params[1]; + xorNodes[i].params[4] = writeDataNodes[i].params[0]; + xorNodes[i].params[5] = writeDataNodes[i].params[1]; + xorNodes[i].params[6].p = raidPtr; + /* use old parity buf as target buf */ + xorNodes[i].results[0] = readParityNodes[i].params[1].p; + if (nfaults == 2) { + /* note: no wakeup func for qor */ + rf_InitNode(&qNodes[i], rf_wait, RF_FALSE, qfunc, undoFunc, NULL, 1, + (numDataNodes + numParityNodes), 7, 1, dag_h, qname, allocList); + qNodes[i].params[0] = readDataNodes[i].params[0]; + qNodes[i].params[1] = readDataNodes[i].params[1]; + qNodes[i].params[2] = readQNodes[i].params[0]; + qNodes[i].params[3] = readQNodes[i].params[1]; + qNodes[i].params[4] = writeDataNodes[i].params[0]; + qNodes[i].params[5] = writeDataNodes[i].params[1]; + qNodes[i].params[6].p = raidPtr; + /* use old Q buf as target buf */ + qNodes[i].results[0] = readQNodes[i].params[1].p; + } + } + } else { + /* there is only one xor node in this case */ + rf_InitNode(&xorNodes[0], rf_wait, RF_FALSE, func, undoFunc, NULL, 1, + (numDataNodes + numParityNodes), + (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, name, allocList); + xorNodes[0].flags |= RF_DAGNODE_FLAG_YIELD; + for (i = 0; i < numDataNodes + 1; i++) { + /* set up params related to Rod and Rop nodes */ + xorNodes[0].params[2 * i + 0] = readDataNodes[i].params[0]; /* pda */ + xorNodes[0].params[2 * i + 1] = readDataNodes[i].params[1]; /* buffer ptr */ + } + for (i = 0; i < numDataNodes; i++) { + /* set up params related to Wnd and Wnp nodes */ + xorNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = /* pda */ + writeDataNodes[i].params[0]; + xorNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = /* buffer ptr */ + writeDataNodes[i].params[1]; + } + /* xor node needs to get at RAID information */ + xorNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr; + xorNodes[0].results[0] = readParityNodes[0].params[1].p; + if (nfaults == 2) { + rf_InitNode(&qNodes[0], rf_wait, RF_FALSE, qfunc, undoFunc, NULL, 1, + (numDataNodes + numParityNodes), + (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, + qname, allocList); + for (i = 0; i < numDataNodes; i++) { + /* set up params related to Rod */ + qNodes[0].params[2 * i + 0] = readDataNodes[i].params[0]; /* pda */ + qNodes[0].params[2 * i + 1] = readDataNodes[i].params[1]; /* buffer ptr */ + } + /* and read old q */ + qNodes[0].params[2 * numDataNodes + 0] = /* pda */ + readQNodes[0].params[0]; + qNodes[0].params[2 * numDataNodes + 1] = /* buffer ptr */ + readQNodes[0].params[1]; + for (i = 0; i < numDataNodes; i++) { + /* set up params related to Wnd nodes */ + qNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = /* pda */ + writeDataNodes[i].params[0]; + qNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = /* buffer ptr */ + writeDataNodes[i].params[1]; + } + /* xor node needs to get at RAID information */ + qNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr; + qNodes[0].results[0] = readQNodes[0].params[1].p; + } + } + + /* initialize nodes which write new parity (Wnp) */ + pda = asmap->parityInfo; + for (i = 0; i < numParityNodes; i++) { + rf_InitNode(&writeParityNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, + rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, + "Wnp", allocList); + RF_ASSERT(pda != NULL); + writeParityNodes[i].params[0].p = pda; /* param 1 (bufPtr) + * filled in by xor node */ + writeParityNodes[i].params[1].p = xorNodes[i].results[0]; /* buffer pointer for + * parity write + * operation */ + writeParityNodes[i].params[2].v = parityStripeID; + writeParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + 0, 0, which_ru); + if (lu_flag) { + /* initialize node to unlock the disk queue */ + rf_InitNode(&unlockParityNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, + rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, + "Unp", allocList); + unlockParityNodes[i].params[0].p = pda; /* physical disk addr + * desc */ + unlockParityNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + 0, lu_flag, which_ru); + } + pda = pda->next; + } + + /* initialize nodes which write new Q (Wnq) */ + if (nfaults == 2) { + pda = asmap->qInfo; + for (i = 0; i < numParityNodes; i++) { + rf_InitNode(&writeQNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, + rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, + "Wnq", allocList); + RF_ASSERT(pda != NULL); + writeQNodes[i].params[0].p = pda; /* param 1 (bufPtr) + * filled in by xor node */ + writeQNodes[i].params[1].p = qNodes[i].results[0]; /* buffer pointer for + * parity write + * operation */ + writeQNodes[i].params[2].v = parityStripeID; + writeQNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + 0, 0, which_ru); + if (lu_flag) { + /* initialize node to unlock the disk queue */ + rf_InitNode(&unlockQNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, + rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, + "Unq", allocList); + unlockQNodes[i].params[0].p = pda; /* physical disk addr + * desc */ + unlockQNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + 0, lu_flag, which_ru); + } + pda = pda->next; + } + } + /* + * Step 4. connect the nodes. + */ + + /* connect header to block node */ + dag_h->succedents[0] = blockNode; + + /* connect block node to read old data nodes */ + RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults))); + for (i = 0; i < numDataNodes; i++) { + blockNode->succedents[i] = &readDataNodes[i]; + RF_ASSERT(readDataNodes[i].numAntecedents == 1); + readDataNodes[i].antecedents[0] = blockNode; + readDataNodes[i].antType[0] = rf_control; + } + + /* connect block node to read old parity nodes */ + for (i = 0; i < numParityNodes; i++) { + blockNode->succedents[numDataNodes + i] = &readParityNodes[i]; + RF_ASSERT(readParityNodes[i].numAntecedents == 1); + readParityNodes[i].antecedents[0] = blockNode; + readParityNodes[i].antType[0] = rf_control; + } + + /* connect block node to read old Q nodes */ + if (nfaults == 2) { + for (i = 0; i < numParityNodes; i++) { + blockNode->succedents[numDataNodes + numParityNodes + i] = &readQNodes[i]; + RF_ASSERT(readQNodes[i].numAntecedents == 1); + readQNodes[i].antecedents[0] = blockNode; + readQNodes[i].antType[0] = rf_control; + } + } + /* connect read old data nodes to xor nodes */ + for (i = 0; i < numDataNodes; i++) { + RF_ASSERT(readDataNodes[i].numSuccedents == (nfaults * numParityNodes)); + for (j = 0; j < numParityNodes; j++) { + RF_ASSERT(xorNodes[j].numAntecedents == numDataNodes + numParityNodes); + readDataNodes[i].succedents[j] = &xorNodes[j]; + xorNodes[j].antecedents[i] = &readDataNodes[i]; + xorNodes[j].antType[i] = rf_trueData; + } + } + + /* connect read old data nodes to q nodes */ + if (nfaults == 2) { + for (i = 0; i < numDataNodes; i++) { + for (j = 0; j < numParityNodes; j++) { + RF_ASSERT(qNodes[j].numAntecedents == numDataNodes + numParityNodes); + readDataNodes[i].succedents[numParityNodes + j] = &qNodes[j]; + qNodes[j].antecedents[i] = &readDataNodes[i]; + qNodes[j].antType[i] = rf_trueData; + } + } + } + /* connect read old parity nodes to xor nodes */ + for (i = 0; i < numParityNodes; i++) { + RF_ASSERT(readParityNodes[i].numSuccedents == numParityNodes); + for (j = 0; j < numParityNodes; j++) { + readParityNodes[i].succedents[j] = &xorNodes[j]; + xorNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i]; + xorNodes[j].antType[numDataNodes + i] = rf_trueData; + } + } + + /* connect read old q nodes to q nodes */ + if (nfaults == 2) { + for (i = 0; i < numParityNodes; i++) { + RF_ASSERT(readParityNodes[i].numSuccedents == numParityNodes); + for (j = 0; j < numParityNodes; j++) { + readQNodes[i].succedents[j] = &qNodes[j]; + qNodes[j].antecedents[numDataNodes + i] = &readQNodes[i]; + qNodes[j].antType[numDataNodes + i] = rf_trueData; + } + } + } + /* connect xor nodes to commit node */ + RF_ASSERT(commitNode->numAntecedents == (nfaults * numParityNodes)); + for (i = 0; i < numParityNodes; i++) { + RF_ASSERT(xorNodes[i].numSuccedents == 1); + xorNodes[i].succedents[0] = commitNode; + commitNode->antecedents[i] = &xorNodes[i]; + commitNode->antType[i] = rf_control; + } + + /* connect q nodes to commit node */ + if (nfaults == 2) { + for (i = 0; i < numParityNodes; i++) { + RF_ASSERT(qNodes[i].numSuccedents == 1); + qNodes[i].succedents[0] = commitNode; + commitNode->antecedents[i + numParityNodes] = &qNodes[i]; + commitNode->antType[i + numParityNodes] = rf_control; + } + } + /* connect commit node to write nodes */ + RF_ASSERT(commitNode->numSuccedents == (numDataNodes + (nfaults * numParityNodes))); + for (i = 0; i < numDataNodes; i++) { + RF_ASSERT(writeDataNodes[i].numAntecedents == 1); + commitNode->succedents[i] = &writeDataNodes[i]; + writeDataNodes[i].antecedents[0] = commitNode; + writeDataNodes[i].antType[0] = rf_trueData; + } + for (i = 0; i < numParityNodes; i++) { + RF_ASSERT(writeParityNodes[i].numAntecedents == 1); + commitNode->succedents[i + numDataNodes] = &writeParityNodes[i]; + writeParityNodes[i].antecedents[0] = commitNode; + writeParityNodes[i].antType[0] = rf_trueData; + } + if (nfaults == 2) { + for (i = 0; i < numParityNodes; i++) { + RF_ASSERT(writeQNodes[i].numAntecedents == 1); + commitNode->succedents[i + numDataNodes + numParityNodes] = &writeQNodes[i]; + writeQNodes[i].antecedents[0] = commitNode; + writeQNodes[i].antType[0] = rf_trueData; + } + } + RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); + RF_ASSERT(termNode->numSuccedents == 0); + for (i = 0; i < numDataNodes; i++) { + if (lu_flag) { + /* connect write new data nodes to unlock nodes */ + RF_ASSERT(writeDataNodes[i].numSuccedents == 1); + RF_ASSERT(unlockDataNodes[i].numAntecedents == 1); + writeDataNodes[i].succedents[0] = &unlockDataNodes[i]; + unlockDataNodes[i].antecedents[0] = &writeDataNodes[i]; + unlockDataNodes[i].antType[0] = rf_control; + + /* connect unlock nodes to term node */ + RF_ASSERT(unlockDataNodes[i].numSuccedents == 1); + unlockDataNodes[i].succedents[0] = termNode; + termNode->antecedents[i] = &unlockDataNodes[i]; + termNode->antType[i] = rf_control; + } else { + /* connect write new data nodes to term node */ + RF_ASSERT(writeDataNodes[i].numSuccedents == 1); + RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); + writeDataNodes[i].succedents[0] = termNode; + termNode->antecedents[i] = &writeDataNodes[i]; + termNode->antType[i] = rf_control; + } + } + + for (i = 0; i < numParityNodes; i++) { + if (lu_flag) { + /* connect write new parity nodes to unlock nodes */ + RF_ASSERT(writeParityNodes[i].numSuccedents == 1); + RF_ASSERT(unlockParityNodes[i].numAntecedents == 1); + writeParityNodes[i].succedents[0] = &unlockParityNodes[i]; + unlockParityNodes[i].antecedents[0] = &writeParityNodes[i]; + unlockParityNodes[i].antType[0] = rf_control; + + /* connect unlock nodes to term node */ + RF_ASSERT(unlockParityNodes[i].numSuccedents == 1); + unlockParityNodes[i].succedents[0] = termNode; + termNode->antecedents[numDataNodes + i] = &unlockParityNodes[i]; + termNode->antType[numDataNodes + i] = rf_control; + } else { + RF_ASSERT(writeParityNodes[i].numSuccedents == 1); + writeParityNodes[i].succedents[0] = termNode; + termNode->antecedents[numDataNodes + i] = &writeParityNodes[i]; + termNode->antType[numDataNodes + i] = rf_control; + } + } + + if (nfaults == 2) { + for (i = 0; i < numParityNodes; i++) { + if (lu_flag) { + /* connect write new Q nodes to unlock nodes */ + RF_ASSERT(writeQNodes[i].numSuccedents == 1); + RF_ASSERT(unlockQNodes[i].numAntecedents == 1); + writeQNodes[i].succedents[0] = &unlockQNodes[i]; + unlockQNodes[i].antecedents[0] = &writeQNodes[i]; + unlockQNodes[i].antType[0] = rf_control; + + /* connect unlock nodes to unblock node */ + RF_ASSERT(unlockQNodes[i].numSuccedents == 1); + unlockQNodes[i].succedents[0] = termNode; + termNode->antecedents[numDataNodes + numParityNodes + i] = &unlockQNodes[i]; + termNode->antType[numDataNodes + numParityNodes + i] = rf_control; + } else { + RF_ASSERT(writeQNodes[i].numSuccedents == 1); + writeQNodes[i].succedents[0] = termNode; + termNode->antecedents[numDataNodes + numParityNodes + i] = &writeQNodes[i]; + termNode->antType[numDataNodes + numParityNodes + i] = rf_control; + } + } + } } @@ -1166,143 +1078,148 @@ void rf_CommonCreateSmallWriteDAG( * Parameters: raidPtr - description of the physical array * asmap - logical & physical addresses for this access * bp - buffer ptr (holds write data) - * flags - general flags (e.g. disk locking) + * flags - general flags (e.g. disk locking) * allocList - list of memory allocated in DAG creation *****************************************************************************/ -void rf_CreateRaidOneWriteDAG( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList) +void +rf_CreateRaidOneWriteDAG( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList) { - RF_DagNode_t *unblockNode, *termNode, *commitNode; - RF_DagNode_t *nodes, *wndNode, *wmirNode; - int nWndNodes, nWmirNodes, i; - RF_ReconUnitNum_t which_ru; - RF_PhysDiskAddr_t *pda, *pdaP; - RF_StripeNum_t parityStripeID; - - parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), - asmap->raidAddress, &which_ru); - if (rf_dagDebug) { - printf("[Creating RAID level 1 write DAG]\n"); - } - dag_h->creator = "RaidOneWriteDAG"; - - /* 2 implies access not SU aligned */ - nWmirNodes = (asmap->parityInfo->next) ? 2 : 1; - nWndNodes = (asmap->physInfo->next) ? 2 : 1; - - /* alloc the Wnd nodes and the Wmir node */ - if (asmap->numDataFailed == 1) - nWndNodes--; - if (asmap->numParityFailed == 1) - nWmirNodes--; - - /* total number of nodes = nWndNodes + nWmirNodes + (commit + unblock + terminator) */ - RF_CallocAndAdd(nodes, nWndNodes + nWmirNodes + 3, sizeof(RF_DagNode_t), - (RF_DagNode_t *), allocList); - i = 0; - wndNode = &nodes[i]; i += nWndNodes; - wmirNode = &nodes[i]; i += nWmirNodes; - commitNode = &nodes[i]; i += 1; - unblockNode = &nodes[i]; i += 1; - termNode = &nodes[i]; i += 1; - RF_ASSERT(i == (nWndNodes + nWmirNodes + 3)); - - /* this dag can commit immediately */ - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* initialize the commit, unblock, and term nodes */ - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, (nWndNodes + nWmirNodes), 0, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, (nWndNodes + nWmirNodes), 0, 0, dag_h, "Nil", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - - /* initialize the wnd nodes */ - if (nWndNodes > 0) { - pda = asmap->physInfo; - for (i = 0; i < nWndNodes; i++) { - rf_InitNode(&wndNode[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wpd", allocList); - RF_ASSERT(pda != NULL); - wndNode[i].params[0].p = pda; - wndNode[i].params[1].p = pda->bufPtr; - wndNode[i].params[2].v = parityStripeID; - wndNode[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - pda = pda->next; - } - RF_ASSERT(pda == NULL); - } - - /* initialize the mirror nodes */ - if (nWmirNodes > 0) { - pda = asmap->physInfo; - pdaP = asmap->parityInfo; - for (i = 0; i < nWmirNodes; i++) { - rf_InitNode(&wmirNode[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wsd", allocList); - RF_ASSERT(pda != NULL); - wmirNode[i].params[0].p = pdaP; - wmirNode[i].params[1].p = pda->bufPtr; - wmirNode[i].params[2].v = parityStripeID; - wmirNode[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - pda = pda->next; - pdaP = pdaP->next; - } - RF_ASSERT(pda == NULL); - RF_ASSERT(pdaP == NULL); - } - - /* link the header node to the commit node */ - RF_ASSERT(dag_h->numSuccedents == 1); - RF_ASSERT(commitNode->numAntecedents == 0); - dag_h->succedents[0] = commitNode; - - /* link the commit node to the write nodes */ - RF_ASSERT(commitNode->numSuccedents == (nWndNodes + nWmirNodes)); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNode[i].numAntecedents == 1); - commitNode->succedents[i] = &wndNode[i]; - wndNode[i].antecedents[0] = commitNode; - wndNode[i].antType[0] = rf_control; - } - for (i = 0; i < nWmirNodes; i++) { - RF_ASSERT(wmirNode[i].numAntecedents == 1); - commitNode->succedents[i + nWndNodes] = &wmirNode[i]; - wmirNode[i].antecedents[0] = commitNode; - wmirNode[i].antType[0] = rf_control; - } - - /* link the write nodes to the unblock node */ - RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes)); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNode[i].numSuccedents == 1); - wndNode[i].succedents[0] = unblockNode; - unblockNode->antecedents[i] = &wndNode[i]; - unblockNode->antType[i] = rf_control; - } - for (i = 0; i < nWmirNodes; i++) { - RF_ASSERT(wmirNode[i].numSuccedents == 1); - wmirNode[i].succedents[0] = unblockNode; - unblockNode->antecedents[i + nWndNodes] = &wmirNode[i]; - unblockNode->antType[i + nWndNodes] = rf_control; - } - - /* link the unblock node to the term node */ - RF_ASSERT(unblockNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - unblockNode->succedents[0] = termNode; - termNode->antecedents[0] = unblockNode; - termNode->antType[0] = rf_control; + RF_DagNode_t *unblockNode, *termNode, *commitNode; + RF_DagNode_t *nodes, *wndNode, *wmirNode; + int nWndNodes, nWmirNodes, i; + RF_ReconUnitNum_t which_ru; + RF_PhysDiskAddr_t *pda, *pdaP; + RF_StripeNum_t parityStripeID; + + parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), + asmap->raidAddress, &which_ru); + if (rf_dagDebug) { + printf("[Creating RAID level 1 write DAG]\n"); + } + dag_h->creator = "RaidOneWriteDAG"; + + /* 2 implies access not SU aligned */ + nWmirNodes = (asmap->parityInfo->next) ? 2 : 1; + nWndNodes = (asmap->physInfo->next) ? 2 : 1; + + /* alloc the Wnd nodes and the Wmir node */ + if (asmap->numDataFailed == 1) + nWndNodes--; + if (asmap->numParityFailed == 1) + nWmirNodes--; + + /* total number of nodes = nWndNodes + nWmirNodes + (commit + unblock + * + terminator) */ + RF_CallocAndAdd(nodes, nWndNodes + nWmirNodes + 3, sizeof(RF_DagNode_t), + (RF_DagNode_t *), allocList); + i = 0; + wndNode = &nodes[i]; + i += nWndNodes; + wmirNode = &nodes[i]; + i += nWmirNodes; + commitNode = &nodes[i]; + i += 1; + unblockNode = &nodes[i]; + i += 1; + termNode = &nodes[i]; + i += 1; + RF_ASSERT(i == (nWndNodes + nWmirNodes + 3)); + + /* this dag can commit immediately */ + dag_h->numCommitNodes = 1; + dag_h->numCommits = 0; + dag_h->numSuccedents = 1; + + /* initialize the commit, unblock, and term nodes */ + rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, + NULL, (nWndNodes + nWmirNodes), 0, 0, 0, dag_h, "Cmt", allocList); + rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, + NULL, 1, (nWndNodes + nWmirNodes), 0, 0, dag_h, "Nil", allocList); + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, + NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); + + /* initialize the wnd nodes */ + if (nWndNodes > 0) { + pda = asmap->physInfo; + for (i = 0; i < nWndNodes; i++) { + rf_InitNode(&wndNode[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wpd", allocList); + RF_ASSERT(pda != NULL); + wndNode[i].params[0].p = pda; + wndNode[i].params[1].p = pda->bufPtr; + wndNode[i].params[2].v = parityStripeID; + wndNode[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + pda = pda->next; + } + RF_ASSERT(pda == NULL); + } + /* initialize the mirror nodes */ + if (nWmirNodes > 0) { + pda = asmap->physInfo; + pdaP = asmap->parityInfo; + for (i = 0; i < nWmirNodes; i++) { + rf_InitNode(&wmirNode[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wsd", allocList); + RF_ASSERT(pda != NULL); + wmirNode[i].params[0].p = pdaP; + wmirNode[i].params[1].p = pda->bufPtr; + wmirNode[i].params[2].v = parityStripeID; + wmirNode[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + pda = pda->next; + pdaP = pdaP->next; + } + RF_ASSERT(pda == NULL); + RF_ASSERT(pdaP == NULL); + } + /* link the header node to the commit node */ + RF_ASSERT(dag_h->numSuccedents == 1); + RF_ASSERT(commitNode->numAntecedents == 0); + dag_h->succedents[0] = commitNode; + + /* link the commit node to the write nodes */ + RF_ASSERT(commitNode->numSuccedents == (nWndNodes + nWmirNodes)); + for (i = 0; i < nWndNodes; i++) { + RF_ASSERT(wndNode[i].numAntecedents == 1); + commitNode->succedents[i] = &wndNode[i]; + wndNode[i].antecedents[0] = commitNode; + wndNode[i].antType[0] = rf_control; + } + for (i = 0; i < nWmirNodes; i++) { + RF_ASSERT(wmirNode[i].numAntecedents == 1); + commitNode->succedents[i + nWndNodes] = &wmirNode[i]; + wmirNode[i].antecedents[0] = commitNode; + wmirNode[i].antType[0] = rf_control; + } + + /* link the write nodes to the unblock node */ + RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes)); + for (i = 0; i < nWndNodes; i++) { + RF_ASSERT(wndNode[i].numSuccedents == 1); + wndNode[i].succedents[0] = unblockNode; + unblockNode->antecedents[i] = &wndNode[i]; + unblockNode->antType[i] = rf_control; + } + for (i = 0; i < nWmirNodes; i++) { + RF_ASSERT(wmirNode[i].numSuccedents == 1); + wmirNode[i].succedents[0] = unblockNode; + unblockNode->antecedents[i + nWndNodes] = &wmirNode[i]; + unblockNode->antType[i + nWndNodes] = rf_control; + } + + /* link the unblock node to the term node */ + RF_ASSERT(unblockNode->numSuccedents == 1); + RF_ASSERT(termNode->numAntecedents == 1); + RF_ASSERT(termNode->numSuccedents == 0); + unblockNode->succedents[0] = termNode; + termNode->antecedents[0] = unblockNode; + termNode->antType[0] = rf_control; } @@ -1316,233 +1233,238 @@ void rf_CreateRaidOneWriteDAG( -void rf_CommonCreateLargeWriteDAGFwd( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList, - int nfaults, - int (*redFunc)(RF_DagNode_t *), - int allowBufferRecycle) +void +rf_CommonCreateLargeWriteDAGFwd( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList, + int nfaults, + int (*redFunc) (RF_DagNode_t *), + int allowBufferRecycle) { - RF_DagNode_t *nodes, *wndNodes, *rodNodes, *xorNode, *wnpNode; - RF_DagNode_t *wnqNode, *blockNode, *syncNode, *termNode; - int nWndNodes, nRodNodes, i, nodeNum, asmNum; - RF_AccessStripeMapHeader_t *new_asm_h[2]; - RF_StripeNum_t parityStripeID; - char *sosBuffer, *eosBuffer; - RF_ReconUnitNum_t which_ru; - RF_RaidLayout_t *layoutPtr; - RF_PhysDiskAddr_t *pda; - - layoutPtr = &(raidPtr->Layout); - parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru); - - if (rf_dagDebug) - printf("[Creating large-write DAG]\n"); - dag_h->creator = "LargeWriteDAGFwd"; - - dag_h->numCommitNodes = 0; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* alloc the nodes: Wnd, xor, commit, block, term, and Wnp */ - nWndNodes = asmap->numStripeUnitsAccessed; - RF_CallocAndAdd(nodes, nWndNodes + 4 + nfaults, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - i = 0; - wndNodes = &nodes[i]; i += nWndNodes; - xorNode = &nodes[i]; i += 1; - wnpNode = &nodes[i]; i += 1; - blockNode = &nodes[i]; i += 1; - syncNode = &nodes[i]; i += 1; - termNode = &nodes[i]; i += 1; - if (nfaults == 2) { - wnqNode = &nodes[i]; i += 1; - } - else { - wnqNode = NULL; - } - rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h, new_asm_h, &nRodNodes, &sosBuffer, &eosBuffer, allocList); - if (nRodNodes > 0) { - RF_CallocAndAdd(rodNodes, nRodNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - } - else { - rodNodes = NULL; - } - - /* begin node initialization */ - if (nRodNodes > 0) { - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(syncNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nWndNodes + 1, nRodNodes, 0, 0, dag_h, "Nil", allocList); - } - else { - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(syncNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nWndNodes + 1, 1, 0, 0, dag_h, "Nil", allocList); - } - - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0, dag_h, "Trm", allocList); - - /* initialize the Rod nodes */ - for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) { - if (new_asm_h[asmNum]) { - pda = new_asm_h[asmNum]->stripeMap->physInfo; - while (pda) { - rf_InitNode(&rodNodes[nodeNum], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rod", allocList); - rodNodes[nodeNum].params[0].p = pda; - rodNodes[nodeNum].params[1].p = pda->bufPtr; - rodNodes[nodeNum].params[2].v = parityStripeID; - rodNodes[nodeNum].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - nodeNum++; - pda=pda->next; - } - } - } - RF_ASSERT(nodeNum == nRodNodes); - - /* initialize the wnd nodes */ - pda = asmap->physInfo; - for (i=0; i < nWndNodes; i++) { - rf_InitNode(&wndNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList); - RF_ASSERT(pda != NULL); - wndNodes[i].params[0].p = pda; - wndNodes[i].params[1].p = pda->bufPtr; - wndNodes[i].params[2].v = parityStripeID; - wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - pda = pda->next; - } - - /* initialize the redundancy node */ - rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1, nfaults, 2 * (nWndNodes + nRodNodes) + 1, nfaults, dag_h, "Xr ", allocList); - xorNode->flags |= RF_DAGNODE_FLAG_YIELD; - for (i=0; i < nWndNodes; i++) { - xorNode->params[2*i+0] = wndNodes[i].params[0]; /* pda */ - xorNode->params[2*i+1] = wndNodes[i].params[1]; /* buf ptr */ - } - for (i=0; i < nRodNodes; i++) { - xorNode->params[2*(nWndNodes+i)+0] = rodNodes[i].params[0]; /* pda */ - xorNode->params[2*(nWndNodes+i)+1] = rodNodes[i].params[1]; /* buf ptr */ - } - xorNode->params[2*(nWndNodes+nRodNodes)].p = raidPtr; /* xor node needs to get at RAID information */ - - /* look for an Rod node that reads a complete SU. If none, alloc a buffer to receive the parity info. - * Note that we can't use a new data buffer because it will not have gotten written when the xor occurs. - */ - if (allowBufferRecycle) { - for (i = 0; i < nRodNodes; i++) - if (((RF_PhysDiskAddr_t *) rodNodes[i].params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit) - break; - } - if ((!allowBufferRecycle) || (i == nRodNodes)) { - RF_CallocAndAdd(xorNode->results[0], 1, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (void *), allocList); - } - else - xorNode->results[0] = rodNodes[i].params[1].p; - - /* initialize the Wnp node */ - rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnp", allocList); - wnpNode->params[0].p = asmap->parityInfo; - wnpNode->params[1].p = xorNode->results[0]; - wnpNode->params[2].v = parityStripeID; - wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - RF_ASSERT(asmap->parityInfo->next == NULL); /* parityInfo must describe entire parity unit */ - - if (nfaults == 2) - { - /* we never try to recycle a buffer for the Q calcuation in addition to the parity. - This would cause two buffers to get smashed during the P and Q calculation, - guaranteeing one would be wrong. - */ - RF_CallocAndAdd(xorNode->results[1], 1, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (void *), allocList); - rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnq", allocList); - wnqNode->params[0].p = asmap->qInfo; - wnqNode->params[1].p = xorNode->results[1]; - wnqNode->params[2].v = parityStripeID; - wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - RF_ASSERT(asmap->parityInfo->next == NULL); /* parityInfo must describe entire parity unit */ - } - - - /* connect nodes to form graph */ - - /* connect dag header to block node */ - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - if (nRodNodes > 0) { - /* connect the block node to the Rod nodes */ - RF_ASSERT(blockNode->numSuccedents == nRodNodes); - RF_ASSERT(syncNode->numAntecedents == nRodNodes); - for (i = 0; i < nRodNodes; i++) { - RF_ASSERT(rodNodes[i].numAntecedents == 1); - blockNode->succedents[i] = &rodNodes[i]; - rodNodes[i].antecedents[0] = blockNode; - rodNodes[i].antType[0] = rf_control; - - /* connect the Rod nodes to the Nil node */ - RF_ASSERT(rodNodes[i].numSuccedents == 1); - rodNodes[i].succedents[0] = syncNode; - syncNode->antecedents[i] = &rodNodes[i]; - syncNode->antType[i] = rf_trueData; - } - } - else { - /* connect the block node to the Nil node */ - RF_ASSERT(blockNode->numSuccedents == 1); - RF_ASSERT(syncNode->numAntecedents == 1); - blockNode->succedents[0] = syncNode; - syncNode->antecedents[0] = blockNode; - syncNode->antType[0] = rf_control; - } - - /* connect the sync node to the Wnd nodes */ - RF_ASSERT(syncNode->numSuccedents == (1 + nWndNodes)); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNodes->numAntecedents == 1); - syncNode->succedents[i] = &wndNodes[i]; - wndNodes[i].antecedents[0] = syncNode; - wndNodes[i].antType[0] = rf_control; - } - - /* connect the sync node to the Xor node */ - RF_ASSERT(xorNode->numAntecedents == 1); - syncNode->succedents[nWndNodes] = xorNode; - xorNode->antecedents[0] = syncNode; - xorNode->antType[0] = rf_control; - - /* connect the xor node to the write parity node */ - RF_ASSERT(xorNode->numSuccedents == nfaults); - RF_ASSERT(wnpNode->numAntecedents == 1); - xorNode->succedents[0] = wnpNode; - wnpNode->antecedents[0]= xorNode; - wnpNode->antType[0] = rf_trueData; - if (nfaults == 2) { - RF_ASSERT(wnqNode->numAntecedents == 1); - xorNode->succedents[1] = wnqNode; - wnqNode->antecedents[0] = xorNode; - wnqNode->antType[0] = rf_trueData; - } - - /* connect the write nodes to the term node */ - RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults); - RF_ASSERT(termNode->numSuccedents == 0); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNodes->numSuccedents == 1); - wndNodes[i].succedents[0] = termNode; - termNode->antecedents[i] = &wndNodes[i]; - termNode->antType[i] = rf_control; - } - RF_ASSERT(wnpNode->numSuccedents == 1); - wnpNode->succedents[0] = termNode; - termNode->antecedents[nWndNodes] = wnpNode; - termNode->antType[nWndNodes] = rf_control; - if (nfaults == 2) { - RF_ASSERT(wnqNode->numSuccedents == 1); - wnqNode->succedents[0] = termNode; - termNode->antecedents[nWndNodes + 1] = wnqNode; - termNode->antType[nWndNodes + 1] = rf_control; - } + RF_DagNode_t *nodes, *wndNodes, *rodNodes, *xorNode, *wnpNode; + RF_DagNode_t *wnqNode, *blockNode, *syncNode, *termNode; + int nWndNodes, nRodNodes, i, nodeNum, asmNum; + RF_AccessStripeMapHeader_t *new_asm_h[2]; + RF_StripeNum_t parityStripeID; + char *sosBuffer, *eosBuffer; + RF_ReconUnitNum_t which_ru; + RF_RaidLayout_t *layoutPtr; + RF_PhysDiskAddr_t *pda; + + layoutPtr = &(raidPtr->Layout); + parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru); + + if (rf_dagDebug) + printf("[Creating large-write DAG]\n"); + dag_h->creator = "LargeWriteDAGFwd"; + + dag_h->numCommitNodes = 0; + dag_h->numCommits = 0; + dag_h->numSuccedents = 1; + + /* alloc the nodes: Wnd, xor, commit, block, term, and Wnp */ + nWndNodes = asmap->numStripeUnitsAccessed; + RF_CallocAndAdd(nodes, nWndNodes + 4 + nfaults, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); + i = 0; + wndNodes = &nodes[i]; + i += nWndNodes; + xorNode = &nodes[i]; + i += 1; + wnpNode = &nodes[i]; + i += 1; + blockNode = &nodes[i]; + i += 1; + syncNode = &nodes[i]; + i += 1; + termNode = &nodes[i]; + i += 1; + if (nfaults == 2) { + wnqNode = &nodes[i]; + i += 1; + } else { + wnqNode = NULL; + } + rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h, new_asm_h, &nRodNodes, &sosBuffer, &eosBuffer, allocList); + if (nRodNodes > 0) { + RF_CallocAndAdd(rodNodes, nRodNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); + } else { + rodNodes = NULL; + } + + /* begin node initialization */ + if (nRodNodes > 0) { + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0, dag_h, "Nil", allocList); + rf_InitNode(syncNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nWndNodes + 1, nRodNodes, 0, 0, dag_h, "Nil", allocList); + } else { + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); + rf_InitNode(syncNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nWndNodes + 1, 1, 0, 0, dag_h, "Nil", allocList); + } + + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0, dag_h, "Trm", allocList); + + /* initialize the Rod nodes */ + for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) { + if (new_asm_h[asmNum]) { + pda = new_asm_h[asmNum]->stripeMap->physInfo; + while (pda) { + rf_InitNode(&rodNodes[nodeNum], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rod", allocList); + rodNodes[nodeNum].params[0].p = pda; + rodNodes[nodeNum].params[1].p = pda->bufPtr; + rodNodes[nodeNum].params[2].v = parityStripeID; + rodNodes[nodeNum].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + nodeNum++; + pda = pda->next; + } + } + } + RF_ASSERT(nodeNum == nRodNodes); + + /* initialize the wnd nodes */ + pda = asmap->physInfo; + for (i = 0; i < nWndNodes; i++) { + rf_InitNode(&wndNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList); + RF_ASSERT(pda != NULL); + wndNodes[i].params[0].p = pda; + wndNodes[i].params[1].p = pda->bufPtr; + wndNodes[i].params[2].v = parityStripeID; + wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + pda = pda->next; + } + + /* initialize the redundancy node */ + rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1, nfaults, 2 * (nWndNodes + nRodNodes) + 1, nfaults, dag_h, "Xr ", allocList); + xorNode->flags |= RF_DAGNODE_FLAG_YIELD; + for (i = 0; i < nWndNodes; i++) { + xorNode->params[2 * i + 0] = wndNodes[i].params[0]; /* pda */ + xorNode->params[2 * i + 1] = wndNodes[i].params[1]; /* buf ptr */ + } + for (i = 0; i < nRodNodes; i++) { + xorNode->params[2 * (nWndNodes + i) + 0] = rodNodes[i].params[0]; /* pda */ + xorNode->params[2 * (nWndNodes + i) + 1] = rodNodes[i].params[1]; /* buf ptr */ + } + xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr; /* xor node needs to get + * at RAID information */ + + /* look for an Rod node that reads a complete SU. If none, alloc a + * buffer to receive the parity info. Note that we can't use a new + * data buffer because it will not have gotten written when the xor + * occurs. */ + if (allowBufferRecycle) { + for (i = 0; i < nRodNodes; i++) + if (((RF_PhysDiskAddr_t *) rodNodes[i].params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit) + break; + } + if ((!allowBufferRecycle) || (i == nRodNodes)) { + RF_CallocAndAdd(xorNode->results[0], 1, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (void *), allocList); + } else + xorNode->results[0] = rodNodes[i].params[1].p; + + /* initialize the Wnp node */ + rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnp", allocList); + wnpNode->params[0].p = asmap->parityInfo; + wnpNode->params[1].p = xorNode->results[0]; + wnpNode->params[2].v = parityStripeID; + wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + RF_ASSERT(asmap->parityInfo->next == NULL); /* parityInfo must + * describe entire + * parity unit */ + + if (nfaults == 2) { + /* we never try to recycle a buffer for the Q calcuation in + * addition to the parity. This would cause two buffers to get + * smashed during the P and Q calculation, guaranteeing one + * would be wrong. */ + RF_CallocAndAdd(xorNode->results[1], 1, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (void *), allocList); + rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnq", allocList); + wnqNode->params[0].p = asmap->qInfo; + wnqNode->params[1].p = xorNode->results[1]; + wnqNode->params[2].v = parityStripeID; + wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + RF_ASSERT(asmap->parityInfo->next == NULL); /* parityInfo must + * describe entire + * parity unit */ + } + /* connect nodes to form graph */ + + /* connect dag header to block node */ + RF_ASSERT(blockNode->numAntecedents == 0); + dag_h->succedents[0] = blockNode; + + if (nRodNodes > 0) { + /* connect the block node to the Rod nodes */ + RF_ASSERT(blockNode->numSuccedents == nRodNodes); + RF_ASSERT(syncNode->numAntecedents == nRodNodes); + for (i = 0; i < nRodNodes; i++) { + RF_ASSERT(rodNodes[i].numAntecedents == 1); + blockNode->succedents[i] = &rodNodes[i]; + rodNodes[i].antecedents[0] = blockNode; + rodNodes[i].antType[0] = rf_control; + + /* connect the Rod nodes to the Nil node */ + RF_ASSERT(rodNodes[i].numSuccedents == 1); + rodNodes[i].succedents[0] = syncNode; + syncNode->antecedents[i] = &rodNodes[i]; + syncNode->antType[i] = rf_trueData; + } + } else { + /* connect the block node to the Nil node */ + RF_ASSERT(blockNode->numSuccedents == 1); + RF_ASSERT(syncNode->numAntecedents == 1); + blockNode->succedents[0] = syncNode; + syncNode->antecedents[0] = blockNode; + syncNode->antType[0] = rf_control; + } + + /* connect the sync node to the Wnd nodes */ + RF_ASSERT(syncNode->numSuccedents == (1 + nWndNodes)); + for (i = 0; i < nWndNodes; i++) { + RF_ASSERT(wndNodes->numAntecedents == 1); + syncNode->succedents[i] = &wndNodes[i]; + wndNodes[i].antecedents[0] = syncNode; + wndNodes[i].antType[0] = rf_control; + } + + /* connect the sync node to the Xor node */ + RF_ASSERT(xorNode->numAntecedents == 1); + syncNode->succedents[nWndNodes] = xorNode; + xorNode->antecedents[0] = syncNode; + xorNode->antType[0] = rf_control; + + /* connect the xor node to the write parity node */ + RF_ASSERT(xorNode->numSuccedents == nfaults); + RF_ASSERT(wnpNode->numAntecedents == 1); + xorNode->succedents[0] = wnpNode; + wnpNode->antecedents[0] = xorNode; + wnpNode->antType[0] = rf_trueData; + if (nfaults == 2) { + RF_ASSERT(wnqNode->numAntecedents == 1); + xorNode->succedents[1] = wnqNode; + wnqNode->antecedents[0] = xorNode; + wnqNode->antType[0] = rf_trueData; + } + /* connect the write nodes to the term node */ + RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults); + RF_ASSERT(termNode->numSuccedents == 0); + for (i = 0; i < nWndNodes; i++) { + RF_ASSERT(wndNodes->numSuccedents == 1); + wndNodes[i].succedents[0] = termNode; + termNode->antecedents[i] = &wndNodes[i]; + termNode->antType[i] = rf_control; + } + RF_ASSERT(wnpNode->numSuccedents == 1); + wnpNode->succedents[0] = termNode; + termNode->antecedents[nWndNodes] = wnpNode; + termNode->antType[nWndNodes] = rf_control; + if (nfaults == 2) { + RF_ASSERT(wnqNode->numSuccedents == 1); + wnqNode->succedents[0] = termNode; + termNode->antecedents[nWndNodes + 1] = wnqNode; + termNode->antType[nWndNodes + 1] = rf_control; + } } @@ -1571,7 +1493,7 @@ void rf_CommonCreateLargeWriteDAGFwd( * Parameters: raidPtr - description of the physical array * asmap - logical & physical addresses for this access * bp - buffer ptr (holds write data) - * flags - general flags (e.g. disk locking) + * flags - general flags (e.g. disk locking) * allocList - list of memory allocated in DAG creation * pfuncs - list of parity generating functions * qfuncs - list of q generating functions @@ -1579,479 +1501,500 @@ void rf_CommonCreateLargeWriteDAGFwd( * A null qfuncs indicates single fault tolerant *****************************************************************************/ -void rf_CommonCreateSmallWriteDAGFwd( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList, - RF_RedFuncs_t *pfuncs, - RF_RedFuncs_t *qfuncs) +void +rf_CommonCreateSmallWriteDAGFwd( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList, + RF_RedFuncs_t * pfuncs, + RF_RedFuncs_t * qfuncs) { - RF_DagNode_t *readDataNodes, *readParityNodes, *readQNodes, *termNode; - RF_DagNode_t *unlockDataNodes, *unlockParityNodes, *unlockQNodes; - RF_DagNode_t *xorNodes, *qNodes, *blockNode, *nodes; - RF_DagNode_t *writeDataNodes, *writeParityNodes, *writeQNodes; - int i, j, nNodes, totalNumNodes, lu_flag; - RF_ReconUnitNum_t which_ru; - int (*func)(RF_DagNode_t *), (*undoFunc)(RF_DagNode_t *); - int (*qfunc)(RF_DagNode_t *); - int numDataNodes, numParityNodes; - RF_StripeNum_t parityStripeID; - RF_PhysDiskAddr_t *pda; - char *name, *qname; - long nfaults; - - nfaults = qfuncs ? 2 : 1; - lu_flag = (rf_enableAtomicRMW) ? 1 : 0; /* lock/unlock flag */ - - parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru); - pda = asmap->physInfo; - numDataNodes = asmap->numStripeUnitsAccessed; - numParityNodes = (asmap->parityInfo->next) ? 2 : 1; - - if (rf_dagDebug) printf("[Creating small-write DAG]\n"); - RF_ASSERT(numDataNodes > 0); - dag_h->creator = "SmallWriteDAGFwd"; - - dag_h->numCommitNodes = 0; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - qfunc = NULL; - qname = NULL; - - /* DAG creation occurs in four steps: - 1. count the number of nodes in the DAG - 2. create the nodes - 3. initialize the nodes - 4. connect the nodes - */ - - /* Step 1. compute number of nodes in the graph */ - - /* number of nodes: - a read and write for each data unit - a redundancy computation node for each parity node (nfaults * nparity) - a read and write for each parity unit - a block node - a terminate node - if atomic RMW - an unlock node for each data unit, redundancy unit - */ - totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes) + (nfaults * 2 * numParityNodes) + 2; - if (lu_flag) - totalNumNodes += (numDataNodes + (nfaults * numParityNodes)); - - - /* Step 2. create the nodes */ - RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - i = 0; - blockNode = &nodes[i]; i += 1; - readDataNodes = &nodes[i]; i += numDataNodes; - readParityNodes = &nodes[i]; i += numParityNodes; - writeDataNodes = &nodes[i]; i += numDataNodes; - writeParityNodes = &nodes[i]; i += numParityNodes; - xorNodes = &nodes[i]; i += numParityNodes; - termNode = &nodes[i]; i += 1; - if (lu_flag) { - unlockDataNodes = &nodes[i]; i += numDataNodes; - unlockParityNodes = &nodes[i]; i += numParityNodes; - } - else { - unlockDataNodes = unlockParityNodes = NULL; - } - if (nfaults == 2) { - readQNodes = &nodes[i]; i += numParityNodes; - writeQNodes = &nodes[i]; i += numParityNodes; - qNodes = &nodes[i]; i += numParityNodes; - if (lu_flag) { - unlockQNodes = &nodes[i]; i += numParityNodes; - } - else { - unlockQNodes = NULL; - } - } - else { - readQNodes = writeQNodes = qNodes = unlockQNodes = NULL; - } - RF_ASSERT(i == totalNumNodes); - - /* Step 3. initialize the nodes */ - /* initialize block node (Nil) */ - nNodes = numDataNodes + (nfaults * numParityNodes); - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", allocList); - - /* initialize terminate node (Trm) */ - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, nNodes, 0, 0, dag_h, "Trm", allocList); - - /* initialize nodes which read old data (Rod) */ - for (i = 0; i < numDataNodes; i++) { - rf_InitNode(&readDataNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, (numParityNodes * nfaults) + 1, 1, 4, 0, dag_h, "Rod", allocList); - RF_ASSERT(pda != NULL); - readDataNodes[i].params[0].p = pda; /* physical disk addr desc */ - readDataNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old data */ - readDataNodes[i].params[2].v = parityStripeID; - readDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru); - pda=pda->next; - for (j = 0; j < readDataNodes[i].numSuccedents; j++) - readDataNodes[i].propList[j] = NULL; - } - - /* initialize nodes which read old parity (Rop) */ - pda = asmap->parityInfo; i = 0; - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(pda != NULL); - rf_InitNode(&readParityNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, numParityNodes, 1, 4, 0, dag_h, "Rop", allocList); - readParityNodes[i].params[0].p = pda; - readParityNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old parity */ - readParityNodes[i].params[2].v = parityStripeID; - readParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru); - for (j = 0; j < readParityNodes[i].numSuccedents; j++) - readParityNodes[i].propList[0] = NULL; - pda=pda->next; - } - - /* initialize nodes which read old Q (Roq) */ - if (nfaults == 2) - { - pda = asmap->qInfo; - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(pda != NULL); - rf_InitNode(&readQNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, numParityNodes, 1, 4, 0, dag_h, "Roq", allocList); - readQNodes[i].params[0].p = pda; - readQNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old Q */ - readQNodes[i].params[2].v = parityStripeID; - readQNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru); - for (j = 0; j < readQNodes[i].numSuccedents; j++) - readQNodes[i].propList[0] = NULL; - pda=pda->next; - } - } - - /* initialize nodes which write new data (Wnd) */ - pda = asmap->physInfo; - for (i=0; i < numDataNodes; i++) { - RF_ASSERT(pda != NULL); - rf_InitNode(&writeDataNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList); - writeDataNodes[i].params[0].p = pda; /* physical disk addr desc */ - writeDataNodes[i].params[1].p = pda->bufPtr; /* buffer holding new data to be written */ - writeDataNodes[i].params[2].v = parityStripeID; - writeDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - - if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockDataNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Und", allocList); - unlockDataNodes[i].params[0].p = pda; /* physical disk addr desc */ - unlockDataNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru); - } - - pda = pda->next; - } - - - /* initialize nodes which compute new parity and Q */ - /* we use the simple XOR func in the double-XOR case, and when we're accessing only a portion of one stripe unit. - * the distinction between the two is that the regular XOR func assumes that the targbuf is a full SU in size, - * and examines the pda associated with the buffer to decide where within the buffer to XOR the data, whereas - * the simple XOR func just XORs the data into the start of the buffer. - */ - if ((numParityNodes==2) || ((numDataNodes == 1) && (asmap->totalSectorsAccessed < raidPtr->Layout.sectorsPerStripeUnit))) { - func = pfuncs->simple; undoFunc = rf_NullNodeUndoFunc; name = pfuncs->SimpleName; - if (qfuncs) { - qfunc = qfuncs->simple; - qname = qfuncs->SimpleName; - } - } - else { - func = pfuncs->regular; undoFunc = rf_NullNodeUndoFunc; name = pfuncs->RegularName; - if (qfuncs) { qfunc = qfuncs->regular; qname = qfuncs->RegularName;} - } - /* initialize the xor nodes: params are {pda,buf} from {Rod,Wnd,Rop} nodes, and raidPtr */ - if (numParityNodes==2) { /* double-xor case */ - for (i=0; i < numParityNodes; i++) { - rf_InitNode(&xorNodes[i], rf_wait, RF_FALSE, func, undoFunc, NULL, numParityNodes, numParityNodes + numDataNodes, 7, 1, dag_h, name, allocList); /* no wakeup func for xor */ - xorNodes[i].flags |= RF_DAGNODE_FLAG_YIELD; - xorNodes[i].params[0] = readDataNodes[i].params[0]; - xorNodes[i].params[1] = readDataNodes[i].params[1]; - xorNodes[i].params[2] = readParityNodes[i].params[0]; - xorNodes[i].params[3] = readParityNodes[i].params[1]; - xorNodes[i].params[4] = writeDataNodes[i].params[0]; - xorNodes[i].params[5] = writeDataNodes[i].params[1]; - xorNodes[i].params[6].p = raidPtr; - xorNodes[i].results[0] = readParityNodes[i].params[1].p; /* use old parity buf as target buf */ - if (nfaults==2) - { - rf_InitNode(&qNodes[i], rf_wait, RF_FALSE, qfunc, undoFunc, NULL, numParityNodes, numParityNodes + numDataNodes, 7, 1, dag_h, qname, allocList); /* no wakeup func for xor */ - qNodes[i].params[0] = readDataNodes[i].params[0]; - qNodes[i].params[1] = readDataNodes[i].params[1]; - qNodes[i].params[2] = readQNodes[i].params[0]; - qNodes[i].params[3] = readQNodes[i].params[1]; - qNodes[i].params[4] = writeDataNodes[i].params[0]; - qNodes[i].params[5] = writeDataNodes[i].params[1]; - qNodes[i].params[6].p = raidPtr; - qNodes[i].results[0] = readQNodes[i].params[1].p; /* use old Q buf as target buf */ - } - } - } - else { - /* there is only one xor node in this case */ - rf_InitNode(&xorNodes[0], rf_wait, RF_FALSE, func, undoFunc, NULL, numParityNodes, numParityNodes + numDataNodes, (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, name, allocList); - xorNodes[0].flags |= RF_DAGNODE_FLAG_YIELD; - for (i=0; i < numDataNodes + 1; i++) { - /* set up params related to Rod and Rop nodes */ - xorNodes[0].params[2*i+0] = readDataNodes[i].params[0]; /* pda */ - xorNodes[0].params[2*i+1] = readDataNodes[i].params[1]; /* buffer pointer */ - } - for (i=0; i < numDataNodes; i++) { - /* set up params related to Wnd and Wnp nodes */ - xorNodes[0].params[2*(numDataNodes+1+i)+0] = writeDataNodes[i].params[0]; /* pda */ - xorNodes[0].params[2*(numDataNodes+1+i)+1] = writeDataNodes[i].params[1]; /* buffer pointer */ - } - xorNodes[0].params[2*(numDataNodes+numDataNodes+1)].p = raidPtr; /* xor node needs to get at RAID information */ - xorNodes[0].results[0] = readParityNodes[0].params[1].p; - if (nfaults==2) - { - rf_InitNode(&qNodes[0], rf_wait, RF_FALSE, qfunc, undoFunc, NULL, numParityNodes, numParityNodes + numDataNodes, (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, qname, allocList); - for (i=0; i<numDataNodes; i++) { - /* set up params related to Rod */ - qNodes[0].params[2*i+0] = readDataNodes[i].params[0]; /* pda */ - qNodes[0].params[2*i+1] = readDataNodes[i].params[1]; /* buffer pointer */ - } - /* and read old q */ - qNodes[0].params[2*numDataNodes + 0] = readQNodes[0].params[0]; /* pda */ - qNodes[0].params[2*numDataNodes + 1] = readQNodes[0].params[1]; /* buffer pointer */ - for (i=0; i < numDataNodes; i++) { - /* set up params related to Wnd nodes */ - qNodes[0].params[2*(numDataNodes+1+i)+0] = writeDataNodes[i].params[0]; /* pda */ - qNodes[0].params[2*(numDataNodes+1+i)+1] = writeDataNodes[i].params[1]; /* buffer pointer */ - } - qNodes[0].params[2*(numDataNodes+numDataNodes+1)].p = raidPtr; /* xor node needs to get at RAID information */ - qNodes[0].results[0] = readQNodes[0].params[1].p; - } - } - - /* initialize nodes which write new parity (Wnp) */ - pda = asmap->parityInfo; - for (i=0; i < numParityNodes; i++) { - rf_InitNode(&writeParityNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, numParityNodes, 4, 0, dag_h, "Wnp", allocList); - RF_ASSERT(pda != NULL); - writeParityNodes[i].params[0].p = pda; /* param 1 (bufPtr) filled in by xor node */ - writeParityNodes[i].params[1].p = xorNodes[i].results[0]; /* buffer pointer for parity write operation */ - writeParityNodes[i].params[2].v = parityStripeID; - writeParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - - if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockParityNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Unp", allocList); - unlockParityNodes[i].params[0].p = pda; /* physical disk addr desc */ - unlockParityNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru); - } - - pda = pda->next; - } - - /* initialize nodes which write new Q (Wnq) */ - if (nfaults == 2) - { - pda = asmap->qInfo; - for (i=0; i < numParityNodes; i++) { - rf_InitNode(&writeQNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, numParityNodes, 4, 0, dag_h, "Wnq", allocList); - RF_ASSERT(pda != NULL); - writeQNodes[i].params[0].p = pda; /* param 1 (bufPtr) filled in by xor node */ - writeQNodes[i].params[1].p = qNodes[i].results[0]; /* buffer pointer for parity write operation */ - writeQNodes[i].params[2].v = parityStripeID; - writeQNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - + RF_DagNode_t *readDataNodes, *readParityNodes, *readQNodes, *termNode; + RF_DagNode_t *unlockDataNodes, *unlockParityNodes, *unlockQNodes; + RF_DagNode_t *xorNodes, *qNodes, *blockNode, *nodes; + RF_DagNode_t *writeDataNodes, *writeParityNodes, *writeQNodes; + int i, j, nNodes, totalNumNodes, lu_flag; + RF_ReconUnitNum_t which_ru; + int (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *); + int (*qfunc) (RF_DagNode_t *); + int numDataNodes, numParityNodes; + RF_StripeNum_t parityStripeID; + RF_PhysDiskAddr_t *pda; + char *name, *qname; + long nfaults; + + nfaults = qfuncs ? 2 : 1; + lu_flag = (rf_enableAtomicRMW) ? 1 : 0; /* lock/unlock flag */ + + parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru); + pda = asmap->physInfo; + numDataNodes = asmap->numStripeUnitsAccessed; + numParityNodes = (asmap->parityInfo->next) ? 2 : 1; + + if (rf_dagDebug) + printf("[Creating small-write DAG]\n"); + RF_ASSERT(numDataNodes > 0); + dag_h->creator = "SmallWriteDAGFwd"; + + dag_h->numCommitNodes = 0; + dag_h->numCommits = 0; + dag_h->numSuccedents = 1; + + qfunc = NULL; + qname = NULL; + + /* DAG creation occurs in four steps: 1. count the number of nodes in + * the DAG 2. create the nodes 3. initialize the nodes 4. connect the + * nodes */ + + /* Step 1. compute number of nodes in the graph */ + + /* number of nodes: a read and write for each data unit a redundancy + * computation node for each parity node (nfaults * nparity) a read + * and write for each parity unit a block node a terminate node if + * atomic RMW an unlock node for each data unit, redundancy unit */ + totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes) + (nfaults * 2 * numParityNodes) + 2; + if (lu_flag) + totalNumNodes += (numDataNodes + (nfaults * numParityNodes)); + + + /* Step 2. create the nodes */ + RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); + i = 0; + blockNode = &nodes[i]; + i += 1; + readDataNodes = &nodes[i]; + i += numDataNodes; + readParityNodes = &nodes[i]; + i += numParityNodes; + writeDataNodes = &nodes[i]; + i += numDataNodes; + writeParityNodes = &nodes[i]; + i += numParityNodes; + xorNodes = &nodes[i]; + i += numParityNodes; + termNode = &nodes[i]; + i += 1; if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockQNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Unq", allocList); - unlockQNodes[i].params[0].p = pda; /* physical disk addr desc */ - unlockQNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru); - } - - pda = pda->next; - } - } - - /* Step 4. connect the nodes */ - - /* connect header to block node */ - dag_h->succedents[0] = blockNode; - - /* connect block node to read old data nodes */ - RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults))); - for (i = 0; i < numDataNodes; i++) { - blockNode->succedents[i] = &readDataNodes[i]; - RF_ASSERT(readDataNodes[i].numAntecedents == 1); - readDataNodes[i].antecedents[0]= blockNode; - readDataNodes[i].antType[0] = rf_control; - } - - /* connect block node to read old parity nodes */ - for (i = 0; i < numParityNodes; i++) { - blockNode->succedents[numDataNodes + i] = &readParityNodes[i]; - RF_ASSERT(readParityNodes[i].numAntecedents == 1); - readParityNodes[i].antecedents[0] = blockNode; - readParityNodes[i].antType[0] = rf_control; - } - - /* connect block node to read old Q nodes */ - if (nfaults == 2) - for (i = 0; i < numParityNodes; i++) { - blockNode->succedents[numDataNodes + numParityNodes + i] = &readQNodes[i]; - RF_ASSERT(readQNodes[i].numAntecedents == 1); - readQNodes[i].antecedents[0] = blockNode; - readQNodes[i].antType[0] = rf_control; - } - - /* connect read old data nodes to write new data nodes */ - for (i = 0; i < numDataNodes; i++) { - RF_ASSERT(readDataNodes[i].numSuccedents == ((nfaults * numParityNodes) + 1)); - RF_ASSERT(writeDataNodes[i].numAntecedents == 1); - readDataNodes[i].succedents[0] = &writeDataNodes[i]; - writeDataNodes[i].antecedents[0] = &readDataNodes[i]; - writeDataNodes[i].antType[0] = rf_antiData; - } - - /* connect read old data nodes to xor nodes */ - for (i = 0; i < numDataNodes; i++) { - for (j = 0; j < numParityNodes; j++){ - RF_ASSERT(xorNodes[j].numAntecedents == numDataNodes + numParityNodes); - readDataNodes[i].succedents[1 + j] = &xorNodes[j]; - xorNodes[j].antecedents[i] = &readDataNodes[i]; - xorNodes[j].antType[i] = rf_trueData; - } - } - - /* connect read old data nodes to q nodes */ - if (nfaults == 2) - for (i = 0; i < numDataNodes; i++) - for (j = 0; j < numParityNodes; j++){ - RF_ASSERT(qNodes[j].numAntecedents == numDataNodes + numParityNodes); - readDataNodes[i].succedents[1 + numParityNodes + j] = &qNodes[j]; - qNodes[j].antecedents[i] = &readDataNodes[i]; - qNodes[j].antType[i] = rf_trueData; - } - - /* connect read old parity nodes to xor nodes */ - for (i = 0; i < numParityNodes; i++) { - for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(readParityNodes[i].numSuccedents == numParityNodes); - readParityNodes[i].succedents[j] = &xorNodes[j]; - xorNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i]; - xorNodes[j].antType[numDataNodes + i] = rf_trueData; - } - } - - /* connect read old q nodes to q nodes */ - if (nfaults == 2) - for (i = 0; i < numParityNodes; i++) { - for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(readQNodes[i].numSuccedents == numParityNodes); - readQNodes[i].succedents[j] = &qNodes[j]; - qNodes[j].antecedents[numDataNodes + i] = &readQNodes[i]; - qNodes[j].antType[numDataNodes + i] = rf_trueData; - } - } - - /* connect xor nodes to the write new parity nodes */ - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(writeParityNodes[i].numAntecedents == numParityNodes); - for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(xorNodes[j].numSuccedents == numParityNodes); - xorNodes[i].succedents[j] = &writeParityNodes[j]; - writeParityNodes[j].antecedents[i] = &xorNodes[i]; - writeParityNodes[j].antType[i] = rf_trueData; - } - } - - /* connect q nodes to the write new q nodes */ - if (nfaults == 2) - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(writeQNodes[i].numAntecedents == numParityNodes); - for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(qNodes[j].numSuccedents == 1); - qNodes[i].succedents[j] = &writeQNodes[j]; - writeQNodes[j].antecedents[i] = &qNodes[i]; - writeQNodes[j].antType[i] = rf_trueData; - } - } - - RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); - RF_ASSERT(termNode->numSuccedents == 0); - for (i = 0; i < numDataNodes; i++) { - if (lu_flag) { - /* connect write new data nodes to unlock nodes */ - RF_ASSERT(writeDataNodes[i].numSuccedents == 1); - RF_ASSERT(unlockDataNodes[i].numAntecedents == 1); - writeDataNodes[i].succedents[0] = &unlockDataNodes[i]; - unlockDataNodes[i].antecedents[0] = &writeDataNodes[i]; - unlockDataNodes[i].antType[0] = rf_control; - - /* connect unlock nodes to term node */ - RF_ASSERT(unlockDataNodes[i].numSuccedents == 1); - unlockDataNodes[i].succedents[0] = termNode; - termNode->antecedents[i] = &unlockDataNodes[i]; - termNode->antType[i] = rf_control; - } - else { - /* connect write new data nodes to term node */ - RF_ASSERT(writeDataNodes[i].numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); - writeDataNodes[i].succedents[0] = termNode; - termNode->antecedents[i] = &writeDataNodes[i]; - termNode->antType[i] = rf_control; - } - } - - for (i = 0; i < numParityNodes; i++) { - if (lu_flag) { - /* connect write new parity nodes to unlock nodes */ - RF_ASSERT(writeParityNodes[i].numSuccedents == 1); - RF_ASSERT(unlockParityNodes[i].numAntecedents == 1); - writeParityNodes[i].succedents[0] = &unlockParityNodes[i]; - unlockParityNodes[i].antecedents[0] = &writeParityNodes[i]; - unlockParityNodes[i].antType[0] = rf_control; - - /* connect unlock nodes to term node */ - RF_ASSERT(unlockParityNodes[i].numSuccedents == 1); - unlockParityNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + i] = &unlockParityNodes[i]; - termNode->antType[numDataNodes + i] = rf_control; - } - else { - RF_ASSERT(writeParityNodes[i].numSuccedents == 1); - writeParityNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + i] = &writeParityNodes[i]; - termNode->antType[numDataNodes + i] = rf_control; - } - } - - if (nfaults == 2) - for (i = 0; i < numParityNodes; i++) { - if (lu_flag) { - /* connect write new Q nodes to unlock nodes */ - RF_ASSERT(writeQNodes[i].numSuccedents == 1); - RF_ASSERT(unlockQNodes[i].numAntecedents == 1); - writeQNodes[i].succedents[0] = &unlockQNodes[i]; - unlockQNodes[i].antecedents[0] = &writeQNodes[i]; - unlockQNodes[i].antType[0] = rf_control; - - /* connect unlock nodes to unblock node */ - RF_ASSERT(unlockQNodes[i].numSuccedents == 1); - unlockQNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + numParityNodes + i] = &unlockQNodes[i]; - termNode->antType[numDataNodes + numParityNodes + i] = rf_control; - } - else { - RF_ASSERT(writeQNodes[i].numSuccedents == 1); - writeQNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + numParityNodes + i] = &writeQNodes[i]; - termNode->antType[numDataNodes + numParityNodes + i] = rf_control; - } - } + unlockDataNodes = &nodes[i]; + i += numDataNodes; + unlockParityNodes = &nodes[i]; + i += numParityNodes; + } else { + unlockDataNodes = unlockParityNodes = NULL; + } + if (nfaults == 2) { + readQNodes = &nodes[i]; + i += numParityNodes; + writeQNodes = &nodes[i]; + i += numParityNodes; + qNodes = &nodes[i]; + i += numParityNodes; + if (lu_flag) { + unlockQNodes = &nodes[i]; + i += numParityNodes; + } else { + unlockQNodes = NULL; + } + } else { + readQNodes = writeQNodes = qNodes = unlockQNodes = NULL; + } + RF_ASSERT(i == totalNumNodes); + + /* Step 3. initialize the nodes */ + /* initialize block node (Nil) */ + nNodes = numDataNodes + (nfaults * numParityNodes); + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", allocList); + + /* initialize terminate node (Trm) */ + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, nNodes, 0, 0, dag_h, "Trm", allocList); + + /* initialize nodes which read old data (Rod) */ + for (i = 0; i < numDataNodes; i++) { + rf_InitNode(&readDataNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, (numParityNodes * nfaults) + 1, 1, 4, 0, dag_h, "Rod", allocList); + RF_ASSERT(pda != NULL); + readDataNodes[i].params[0].p = pda; /* physical disk addr + * desc */ + readDataNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old + * data */ + readDataNodes[i].params[2].v = parityStripeID; + readDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru); + pda = pda->next; + for (j = 0; j < readDataNodes[i].numSuccedents; j++) + readDataNodes[i].propList[j] = NULL; + } + + /* initialize nodes which read old parity (Rop) */ + pda = asmap->parityInfo; + i = 0; + for (i = 0; i < numParityNodes; i++) { + RF_ASSERT(pda != NULL); + rf_InitNode(&readParityNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, numParityNodes, 1, 4, 0, dag_h, "Rop", allocList); + readParityNodes[i].params[0].p = pda; + readParityNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old + * parity */ + readParityNodes[i].params[2].v = parityStripeID; + readParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru); + for (j = 0; j < readParityNodes[i].numSuccedents; j++) + readParityNodes[i].propList[0] = NULL; + pda = pda->next; + } + + /* initialize nodes which read old Q (Roq) */ + if (nfaults == 2) { + pda = asmap->qInfo; + for (i = 0; i < numParityNodes; i++) { + RF_ASSERT(pda != NULL); + rf_InitNode(&readQNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, numParityNodes, 1, 4, 0, dag_h, "Roq", allocList); + readQNodes[i].params[0].p = pda; + readQNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old Q */ + readQNodes[i].params[2].v = parityStripeID; + readQNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru); + for (j = 0; j < readQNodes[i].numSuccedents; j++) + readQNodes[i].propList[0] = NULL; + pda = pda->next; + } + } + /* initialize nodes which write new data (Wnd) */ + pda = asmap->physInfo; + for (i = 0; i < numDataNodes; i++) { + RF_ASSERT(pda != NULL); + rf_InitNode(&writeDataNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList); + writeDataNodes[i].params[0].p = pda; /* physical disk addr + * desc */ + writeDataNodes[i].params[1].p = pda->bufPtr; /* buffer holding new + * data to be written */ + writeDataNodes[i].params[2].v = parityStripeID; + writeDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + + if (lu_flag) { + /* initialize node to unlock the disk queue */ + rf_InitNode(&unlockDataNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Und", allocList); + unlockDataNodes[i].params[0].p = pda; /* physical disk addr + * desc */ + unlockDataNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru); + } + pda = pda->next; + } + + + /* initialize nodes which compute new parity and Q */ + /* we use the simple XOR func in the double-XOR case, and when we're + * accessing only a portion of one stripe unit. the distinction + * between the two is that the regular XOR func assumes that the + * targbuf is a full SU in size, and examines the pda associated with + * the buffer to decide where within the buffer to XOR the data, + * whereas the simple XOR func just XORs the data into the start of + * the buffer. */ + if ((numParityNodes == 2) || ((numDataNodes == 1) && (asmap->totalSectorsAccessed < raidPtr->Layout.sectorsPerStripeUnit))) { + func = pfuncs->simple; + undoFunc = rf_NullNodeUndoFunc; + name = pfuncs->SimpleName; + if (qfuncs) { + qfunc = qfuncs->simple; + qname = qfuncs->SimpleName; + } + } else { + func = pfuncs->regular; + undoFunc = rf_NullNodeUndoFunc; + name = pfuncs->RegularName; + if (qfuncs) { + qfunc = qfuncs->regular; + qname = qfuncs->RegularName; + } + } + /* initialize the xor nodes: params are {pda,buf} from {Rod,Wnd,Rop} + * nodes, and raidPtr */ + if (numParityNodes == 2) { /* double-xor case */ + for (i = 0; i < numParityNodes; i++) { + rf_InitNode(&xorNodes[i], rf_wait, RF_FALSE, func, undoFunc, NULL, numParityNodes, numParityNodes + numDataNodes, 7, 1, dag_h, name, allocList); /* no wakeup func for + * xor */ + xorNodes[i].flags |= RF_DAGNODE_FLAG_YIELD; + xorNodes[i].params[0] = readDataNodes[i].params[0]; + xorNodes[i].params[1] = readDataNodes[i].params[1]; + xorNodes[i].params[2] = readParityNodes[i].params[0]; + xorNodes[i].params[3] = readParityNodes[i].params[1]; + xorNodes[i].params[4] = writeDataNodes[i].params[0]; + xorNodes[i].params[5] = writeDataNodes[i].params[1]; + xorNodes[i].params[6].p = raidPtr; + xorNodes[i].results[0] = readParityNodes[i].params[1].p; /* use old parity buf as + * target buf */ + if (nfaults == 2) { + rf_InitNode(&qNodes[i], rf_wait, RF_FALSE, qfunc, undoFunc, NULL, numParityNodes, numParityNodes + numDataNodes, 7, 1, dag_h, qname, allocList); /* no wakeup func for + * xor */ + qNodes[i].params[0] = readDataNodes[i].params[0]; + qNodes[i].params[1] = readDataNodes[i].params[1]; + qNodes[i].params[2] = readQNodes[i].params[0]; + qNodes[i].params[3] = readQNodes[i].params[1]; + qNodes[i].params[4] = writeDataNodes[i].params[0]; + qNodes[i].params[5] = writeDataNodes[i].params[1]; + qNodes[i].params[6].p = raidPtr; + qNodes[i].results[0] = readQNodes[i].params[1].p; /* use old Q buf as + * target buf */ + } + } + } else { + /* there is only one xor node in this case */ + rf_InitNode(&xorNodes[0], rf_wait, RF_FALSE, func, undoFunc, NULL, numParityNodes, numParityNodes + numDataNodes, (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, name, allocList); + xorNodes[0].flags |= RF_DAGNODE_FLAG_YIELD; + for (i = 0; i < numDataNodes + 1; i++) { + /* set up params related to Rod and Rop nodes */ + xorNodes[0].params[2 * i + 0] = readDataNodes[i].params[0]; /* pda */ + xorNodes[0].params[2 * i + 1] = readDataNodes[i].params[1]; /* buffer pointer */ + } + for (i = 0; i < numDataNodes; i++) { + /* set up params related to Wnd and Wnp nodes */ + xorNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = writeDataNodes[i].params[0]; /* pda */ + xorNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = writeDataNodes[i].params[1]; /* buffer pointer */ + } + xorNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr; /* xor node needs to get + * at RAID information */ + xorNodes[0].results[0] = readParityNodes[0].params[1].p; + if (nfaults == 2) { + rf_InitNode(&qNodes[0], rf_wait, RF_FALSE, qfunc, undoFunc, NULL, numParityNodes, numParityNodes + numDataNodes, (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, qname, allocList); + for (i = 0; i < numDataNodes; i++) { + /* set up params related to Rod */ + qNodes[0].params[2 * i + 0] = readDataNodes[i].params[0]; /* pda */ + qNodes[0].params[2 * i + 1] = readDataNodes[i].params[1]; /* buffer pointer */ + } + /* and read old q */ + qNodes[0].params[2 * numDataNodes + 0] = readQNodes[0].params[0]; /* pda */ + qNodes[0].params[2 * numDataNodes + 1] = readQNodes[0].params[1]; /* buffer pointer */ + for (i = 0; i < numDataNodes; i++) { + /* set up params related to Wnd nodes */ + qNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = writeDataNodes[i].params[0]; /* pda */ + qNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = writeDataNodes[i].params[1]; /* buffer pointer */ + } + qNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr; /* xor node needs to get + * at RAID information */ + qNodes[0].results[0] = readQNodes[0].params[1].p; + } + } + + /* initialize nodes which write new parity (Wnp) */ + pda = asmap->parityInfo; + for (i = 0; i < numParityNodes; i++) { + rf_InitNode(&writeParityNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, numParityNodes, 4, 0, dag_h, "Wnp", allocList); + RF_ASSERT(pda != NULL); + writeParityNodes[i].params[0].p = pda; /* param 1 (bufPtr) + * filled in by xor node */ + writeParityNodes[i].params[1].p = xorNodes[i].results[0]; /* buffer pointer for + * parity write + * operation */ + writeParityNodes[i].params[2].v = parityStripeID; + writeParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + + if (lu_flag) { + /* initialize node to unlock the disk queue */ + rf_InitNode(&unlockParityNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Unp", allocList); + unlockParityNodes[i].params[0].p = pda; /* physical disk addr + * desc */ + unlockParityNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru); + } + pda = pda->next; + } + + /* initialize nodes which write new Q (Wnq) */ + if (nfaults == 2) { + pda = asmap->qInfo; + for (i = 0; i < numParityNodes; i++) { + rf_InitNode(&writeQNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, numParityNodes, 4, 0, dag_h, "Wnq", allocList); + RF_ASSERT(pda != NULL); + writeQNodes[i].params[0].p = pda; /* param 1 (bufPtr) + * filled in by xor node */ + writeQNodes[i].params[1].p = qNodes[i].results[0]; /* buffer pointer for + * parity write + * operation */ + writeQNodes[i].params[2].v = parityStripeID; + writeQNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + + if (lu_flag) { + /* initialize node to unlock the disk queue */ + rf_InitNode(&unlockQNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Unq", allocList); + unlockQNodes[i].params[0].p = pda; /* physical disk addr + * desc */ + unlockQNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru); + } + pda = pda->next; + } + } + /* Step 4. connect the nodes */ + + /* connect header to block node */ + dag_h->succedents[0] = blockNode; + + /* connect block node to read old data nodes */ + RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults))); + for (i = 0; i < numDataNodes; i++) { + blockNode->succedents[i] = &readDataNodes[i]; + RF_ASSERT(readDataNodes[i].numAntecedents == 1); + readDataNodes[i].antecedents[0] = blockNode; + readDataNodes[i].antType[0] = rf_control; + } + + /* connect block node to read old parity nodes */ + for (i = 0; i < numParityNodes; i++) { + blockNode->succedents[numDataNodes + i] = &readParityNodes[i]; + RF_ASSERT(readParityNodes[i].numAntecedents == 1); + readParityNodes[i].antecedents[0] = blockNode; + readParityNodes[i].antType[0] = rf_control; + } + + /* connect block node to read old Q nodes */ + if (nfaults == 2) + for (i = 0; i < numParityNodes; i++) { + blockNode->succedents[numDataNodes + numParityNodes + i] = &readQNodes[i]; + RF_ASSERT(readQNodes[i].numAntecedents == 1); + readQNodes[i].antecedents[0] = blockNode; + readQNodes[i].antType[0] = rf_control; + } + + /* connect read old data nodes to write new data nodes */ + for (i = 0; i < numDataNodes; i++) { + RF_ASSERT(readDataNodes[i].numSuccedents == ((nfaults * numParityNodes) + 1)); + RF_ASSERT(writeDataNodes[i].numAntecedents == 1); + readDataNodes[i].succedents[0] = &writeDataNodes[i]; + writeDataNodes[i].antecedents[0] = &readDataNodes[i]; + writeDataNodes[i].antType[0] = rf_antiData; + } + + /* connect read old data nodes to xor nodes */ + for (i = 0; i < numDataNodes; i++) { + for (j = 0; j < numParityNodes; j++) { + RF_ASSERT(xorNodes[j].numAntecedents == numDataNodes + numParityNodes); + readDataNodes[i].succedents[1 + j] = &xorNodes[j]; + xorNodes[j].antecedents[i] = &readDataNodes[i]; + xorNodes[j].antType[i] = rf_trueData; + } + } + + /* connect read old data nodes to q nodes */ + if (nfaults == 2) + for (i = 0; i < numDataNodes; i++) + for (j = 0; j < numParityNodes; j++) { + RF_ASSERT(qNodes[j].numAntecedents == numDataNodes + numParityNodes); + readDataNodes[i].succedents[1 + numParityNodes + j] = &qNodes[j]; + qNodes[j].antecedents[i] = &readDataNodes[i]; + qNodes[j].antType[i] = rf_trueData; + } + + /* connect read old parity nodes to xor nodes */ + for (i = 0; i < numParityNodes; i++) { + for (j = 0; j < numParityNodes; j++) { + RF_ASSERT(readParityNodes[i].numSuccedents == numParityNodes); + readParityNodes[i].succedents[j] = &xorNodes[j]; + xorNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i]; + xorNodes[j].antType[numDataNodes + i] = rf_trueData; + } + } + + /* connect read old q nodes to q nodes */ + if (nfaults == 2) + for (i = 0; i < numParityNodes; i++) { + for (j = 0; j < numParityNodes; j++) { + RF_ASSERT(readQNodes[i].numSuccedents == numParityNodes); + readQNodes[i].succedents[j] = &qNodes[j]; + qNodes[j].antecedents[numDataNodes + i] = &readQNodes[i]; + qNodes[j].antType[numDataNodes + i] = rf_trueData; + } + } + + /* connect xor nodes to the write new parity nodes */ + for (i = 0; i < numParityNodes; i++) { + RF_ASSERT(writeParityNodes[i].numAntecedents == numParityNodes); + for (j = 0; j < numParityNodes; j++) { + RF_ASSERT(xorNodes[j].numSuccedents == numParityNodes); + xorNodes[i].succedents[j] = &writeParityNodes[j]; + writeParityNodes[j].antecedents[i] = &xorNodes[i]; + writeParityNodes[j].antType[i] = rf_trueData; + } + } + + /* connect q nodes to the write new q nodes */ + if (nfaults == 2) + for (i = 0; i < numParityNodes; i++) { + RF_ASSERT(writeQNodes[i].numAntecedents == numParityNodes); + for (j = 0; j < numParityNodes; j++) { + RF_ASSERT(qNodes[j].numSuccedents == 1); + qNodes[i].succedents[j] = &writeQNodes[j]; + writeQNodes[j].antecedents[i] = &qNodes[i]; + writeQNodes[j].antType[i] = rf_trueData; + } + } + + RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); + RF_ASSERT(termNode->numSuccedents == 0); + for (i = 0; i < numDataNodes; i++) { + if (lu_flag) { + /* connect write new data nodes to unlock nodes */ + RF_ASSERT(writeDataNodes[i].numSuccedents == 1); + RF_ASSERT(unlockDataNodes[i].numAntecedents == 1); + writeDataNodes[i].succedents[0] = &unlockDataNodes[i]; + unlockDataNodes[i].antecedents[0] = &writeDataNodes[i]; + unlockDataNodes[i].antType[0] = rf_control; + + /* connect unlock nodes to term node */ + RF_ASSERT(unlockDataNodes[i].numSuccedents == 1); + unlockDataNodes[i].succedents[0] = termNode; + termNode->antecedents[i] = &unlockDataNodes[i]; + termNode->antType[i] = rf_control; + } else { + /* connect write new data nodes to term node */ + RF_ASSERT(writeDataNodes[i].numSuccedents == 1); + RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); + writeDataNodes[i].succedents[0] = termNode; + termNode->antecedents[i] = &writeDataNodes[i]; + termNode->antType[i] = rf_control; + } + } + + for (i = 0; i < numParityNodes; i++) { + if (lu_flag) { + /* connect write new parity nodes to unlock nodes */ + RF_ASSERT(writeParityNodes[i].numSuccedents == 1); + RF_ASSERT(unlockParityNodes[i].numAntecedents == 1); + writeParityNodes[i].succedents[0] = &unlockParityNodes[i]; + unlockParityNodes[i].antecedents[0] = &writeParityNodes[i]; + unlockParityNodes[i].antType[0] = rf_control; + + /* connect unlock nodes to term node */ + RF_ASSERT(unlockParityNodes[i].numSuccedents == 1); + unlockParityNodes[i].succedents[0] = termNode; + termNode->antecedents[numDataNodes + i] = &unlockParityNodes[i]; + termNode->antType[numDataNodes + i] = rf_control; + } else { + RF_ASSERT(writeParityNodes[i].numSuccedents == 1); + writeParityNodes[i].succedents[0] = termNode; + termNode->antecedents[numDataNodes + i] = &writeParityNodes[i]; + termNode->antType[numDataNodes + i] = rf_control; + } + } + + if (nfaults == 2) + for (i = 0; i < numParityNodes; i++) { + if (lu_flag) { + /* connect write new Q nodes to unlock nodes */ + RF_ASSERT(writeQNodes[i].numSuccedents == 1); + RF_ASSERT(unlockQNodes[i].numAntecedents == 1); + writeQNodes[i].succedents[0] = &unlockQNodes[i]; + unlockQNodes[i].antecedents[0] = &writeQNodes[i]; + unlockQNodes[i].antType[0] = rf_control; + + /* connect unlock nodes to unblock node */ + RF_ASSERT(unlockQNodes[i].numSuccedents == 1); + unlockQNodes[i].succedents[0] = termNode; + termNode->antecedents[numDataNodes + numParityNodes + i] = &unlockQNodes[i]; + termNode->antType[numDataNodes + numParityNodes + i] = rf_control; + } else { + RF_ASSERT(writeQNodes[i].numSuccedents == 1); + writeQNodes[i].succedents[0] = termNode; + termNode->antecedents[numDataNodes + numParityNodes + i] = &writeQNodes[i]; + termNode->antType[numDataNodes + numParityNodes + i] = rf_control; + } + } } @@ -2068,135 +2011,140 @@ void rf_CommonCreateSmallWriteDAGFwd( * Parameters: raidPtr - description of the physical array * asmap - logical & physical addresses for this access * bp - buffer ptr (holds write data) - * flags - general flags (e.g. disk locking) + * flags - general flags (e.g. disk locking) * allocList - list of memory allocated in DAG creation *****************************************************************************/ -void rf_CreateRaidOneWriteDAGFwd( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList) +void +rf_CreateRaidOneWriteDAGFwd( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList) { - RF_DagNode_t *blockNode, *unblockNode, *termNode; - RF_DagNode_t *nodes, *wndNode, *wmirNode; - int nWndNodes, nWmirNodes, i; - RF_ReconUnitNum_t which_ru; - RF_PhysDiskAddr_t *pda, *pdaP; - RF_StripeNum_t parityStripeID; - - parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), - asmap->raidAddress, &which_ru); - if (rf_dagDebug) { - printf("[Creating RAID level 1 write DAG]\n"); - } - - nWmirNodes = (asmap->parityInfo->next) ? 2 : 1; /* 2 implies access not SU aligned */ - nWndNodes = (asmap->physInfo->next) ? 2 : 1; - - /* alloc the Wnd nodes and the Wmir node */ - if (asmap->numDataFailed == 1) - nWndNodes--; - if (asmap->numParityFailed == 1) - nWmirNodes--; - - /* total number of nodes = nWndNodes + nWmirNodes + (block + unblock + terminator) */ - RF_CallocAndAdd(nodes, nWndNodes + nWmirNodes + 3, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - i = 0; - wndNode = &nodes[i]; i += nWndNodes; - wmirNode = &nodes[i]; i += nWmirNodes; - blockNode = &nodes[i]; i += 1; - unblockNode = &nodes[i]; i += 1; - termNode = &nodes[i]; i += 1; - RF_ASSERT(i == (nWndNodes + nWmirNodes + 3)); - - /* this dag can commit immediately */ - dag_h->numCommitNodes = 0; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* initialize the unblock and term nodes */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, (nWndNodes + nWmirNodes), 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, (nWndNodes + nWmirNodes), 0, 0, dag_h, "Nil", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - - /* initialize the wnd nodes */ - if (nWndNodes > 0) { - pda = asmap->physInfo; - for (i = 0; i < nWndNodes; i++) { - rf_InitNode(&wndNode[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wpd", allocList); - RF_ASSERT(pda != NULL); - wndNode[i].params[0].p = pda; - wndNode[i].params[1].p = pda->bufPtr; - wndNode[i].params[2].v = parityStripeID; - wndNode[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - pda = pda->next; - } - RF_ASSERT(pda == NULL); - } - - /* initialize the mirror nodes */ - if (nWmirNodes > 0) { - pda = asmap->physInfo; - pdaP = asmap->parityInfo; - for (i = 0; i < nWmirNodes; i++) { - rf_InitNode(&wmirNode[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wsd", allocList); - RF_ASSERT(pda != NULL); - wmirNode[i].params[0].p = pdaP; - wmirNode[i].params[1].p = pda->bufPtr; - wmirNode[i].params[2].v = parityStripeID; - wmirNode[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - pda = pda->next; - pdaP = pdaP->next; - } - RF_ASSERT(pda == NULL); - RF_ASSERT(pdaP == NULL); - } - - /* link the header node to the block node */ - RF_ASSERT(dag_h->numSuccedents == 1); - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - /* link the block node to the write nodes */ - RF_ASSERT(blockNode->numSuccedents == (nWndNodes + nWmirNodes)); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNode[i].numAntecedents == 1); - blockNode->succedents[i] = &wndNode[i]; - wndNode[i].antecedents[0] = blockNode; - wndNode[i].antType[0] = rf_control; - } - for (i = 0; i < nWmirNodes; i++) { - RF_ASSERT(wmirNode[i].numAntecedents == 1); - blockNode->succedents[i + nWndNodes] = &wmirNode[i]; - wmirNode[i].antecedents[0] = blockNode; - wmirNode[i].antType[0] = rf_control; - } - - /* link the write nodes to the unblock node */ - RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes)); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNode[i].numSuccedents == 1); - wndNode[i].succedents[0] = unblockNode; - unblockNode->antecedents[i] = &wndNode[i]; - unblockNode->antType[i] = rf_control; - } - for (i = 0; i < nWmirNodes; i++) { - RF_ASSERT(wmirNode[i].numSuccedents == 1); - wmirNode[i].succedents[0] = unblockNode; - unblockNode->antecedents[i + nWndNodes] = &wmirNode[i]; - unblockNode->antType[i + nWndNodes] = rf_control; - } - - /* link the unblock node to the term node */ - RF_ASSERT(unblockNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - unblockNode->succedents[0] = termNode; - termNode->antecedents[0] = unblockNode; - termNode->antType[0] = rf_control; - - return; + RF_DagNode_t *blockNode, *unblockNode, *termNode; + RF_DagNode_t *nodes, *wndNode, *wmirNode; + int nWndNodes, nWmirNodes, i; + RF_ReconUnitNum_t which_ru; + RF_PhysDiskAddr_t *pda, *pdaP; + RF_StripeNum_t parityStripeID; + + parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), + asmap->raidAddress, &which_ru); + if (rf_dagDebug) { + printf("[Creating RAID level 1 write DAG]\n"); + } + nWmirNodes = (asmap->parityInfo->next) ? 2 : 1; /* 2 implies access not + * SU aligned */ + nWndNodes = (asmap->physInfo->next) ? 2 : 1; + + /* alloc the Wnd nodes and the Wmir node */ + if (asmap->numDataFailed == 1) + nWndNodes--; + if (asmap->numParityFailed == 1) + nWmirNodes--; + + /* total number of nodes = nWndNodes + nWmirNodes + (block + unblock + + * terminator) */ + RF_CallocAndAdd(nodes, nWndNodes + nWmirNodes + 3, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); + i = 0; + wndNode = &nodes[i]; + i += nWndNodes; + wmirNode = &nodes[i]; + i += nWmirNodes; + blockNode = &nodes[i]; + i += 1; + unblockNode = &nodes[i]; + i += 1; + termNode = &nodes[i]; + i += 1; + RF_ASSERT(i == (nWndNodes + nWmirNodes + 3)); + + /* this dag can commit immediately */ + dag_h->numCommitNodes = 0; + dag_h->numCommits = 0; + dag_h->numSuccedents = 1; + + /* initialize the unblock and term nodes */ + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, (nWndNodes + nWmirNodes), 0, 0, 0, dag_h, "Nil", allocList); + rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, (nWndNodes + nWmirNodes), 0, 0, dag_h, "Nil", allocList); + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); + + /* initialize the wnd nodes */ + if (nWndNodes > 0) { + pda = asmap->physInfo; + for (i = 0; i < nWndNodes; i++) { + rf_InitNode(&wndNode[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wpd", allocList); + RF_ASSERT(pda != NULL); + wndNode[i].params[0].p = pda; + wndNode[i].params[1].p = pda->bufPtr; + wndNode[i].params[2].v = parityStripeID; + wndNode[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + pda = pda->next; + } + RF_ASSERT(pda == NULL); + } + /* initialize the mirror nodes */ + if (nWmirNodes > 0) { + pda = asmap->physInfo; + pdaP = asmap->parityInfo; + for (i = 0; i < nWmirNodes; i++) { + rf_InitNode(&wmirNode[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wsd", allocList); + RF_ASSERT(pda != NULL); + wmirNode[i].params[0].p = pdaP; + wmirNode[i].params[1].p = pda->bufPtr; + wmirNode[i].params[2].v = parityStripeID; + wmirNode[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + pda = pda->next; + pdaP = pdaP->next; + } + RF_ASSERT(pda == NULL); + RF_ASSERT(pdaP == NULL); + } + /* link the header node to the block node */ + RF_ASSERT(dag_h->numSuccedents == 1); + RF_ASSERT(blockNode->numAntecedents == 0); + dag_h->succedents[0] = blockNode; + + /* link the block node to the write nodes */ + RF_ASSERT(blockNode->numSuccedents == (nWndNodes + nWmirNodes)); + for (i = 0; i < nWndNodes; i++) { + RF_ASSERT(wndNode[i].numAntecedents == 1); + blockNode->succedents[i] = &wndNode[i]; + wndNode[i].antecedents[0] = blockNode; + wndNode[i].antType[0] = rf_control; + } + for (i = 0; i < nWmirNodes; i++) { + RF_ASSERT(wmirNode[i].numAntecedents == 1); + blockNode->succedents[i + nWndNodes] = &wmirNode[i]; + wmirNode[i].antecedents[0] = blockNode; + wmirNode[i].antType[0] = rf_control; + } + + /* link the write nodes to the unblock node */ + RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes)); + for (i = 0; i < nWndNodes; i++) { + RF_ASSERT(wndNode[i].numSuccedents == 1); + wndNode[i].succedents[0] = unblockNode; + unblockNode->antecedents[i] = &wndNode[i]; + unblockNode->antType[i] = rf_control; + } + for (i = 0; i < nWmirNodes; i++) { + RF_ASSERT(wmirNode[i].numSuccedents == 1); + wmirNode[i].succedents[0] = unblockNode; + unblockNode->antecedents[i + nWndNodes] = &wmirNode[i]; + unblockNode->antType[i + nWndNodes] = rf_control; + } + + /* link the unblock node to the term node */ + RF_ASSERT(unblockNode->numSuccedents == 1); + RF_ASSERT(termNode->numAntecedents == 1); + RF_ASSERT(termNode->numSuccedents == 0); + unblockNode->succedents[0] = termNode; + termNode->antecedents[0] = unblockNode; + termNode->antType[0] = rf_control; + + return; } diff --git a/sys/dev/raidframe/rf_dagffwr.h b/sys/dev/raidframe/rf_dagffwr.h index 69c7fdf4832..b7b50da1e70 100644 --- a/sys/dev/raidframe/rf_dagffwr.h +++ b/sys/dev/raidframe/rf_dagffwr.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_dagffwr.h,v 1.1 1999/01/11 14:29:10 niklas Exp $ */ -/* $NetBSD: rf_dagffwr.h,v 1.1 1998/11/13 04:20:27 oster Exp $ */ +/* $OpenBSD: rf_dagffwr.h,v 1.2 1999/02/16 00:02:31 niklas Exp $ */ +/* $NetBSD: rf_dagffwr.h,v 1.3 1999/02/05 00:06:08 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,77 +27,51 @@ * rights to redistribute these changes. */ -/* - * : - * Log: rf_dagffwr.h,v - * Revision 1.6 1996/07/31 15:35:29 jimz - * evenodd changes; bugfixes for double-degraded archs, generalize - * some formerly PQ-only functions - * - * Revision 1.5 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.4 1996/06/10 22:25:28 wvcii - * added write dags which do not have a commit node and are - * used in forward and backward error recovery experiments. - * - * Revision 1.3 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.2 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.1 1996/05/03 19:20:18 wvcii - * Initial revision - * - */ - #ifndef _RF__RF_DAGFFWR_H_ #define _RF__RF_DAGFFWR_H_ #include "rf_types.h" /* fault-free write DAG creation routines */ -void rf_CreateNonRedundantWriteDAG(RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList, - RF_IoType_t type); -void rf_CreateRAID0WriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList, RF_IoType_t type); -void rf_CreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList); -void rf_CreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList); -void rf_CommonCreateLargeWriteDAG(RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList, int nfaults, - int (*redFunc)(RF_DagNode_t *), int allowBufferRecycle); -void rf_CommonCreateLargeWriteDAGFwd(RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList, int nfaults, - int (*redFunc)(RF_DagNode_t *), int allowBufferRecycle); -void rf_CommonCreateSmallWriteDAG(RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList, - RF_RedFuncs_t *pfuncs, RF_RedFuncs_t *qfuncs); -void rf_CommonCreateSmallWriteDAGFwd(RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList, - RF_RedFuncs_t *pfuncs, RF_RedFuncs_t *qfuncs); -void rf_CreateRaidOneWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList); -void rf_CreateRaidOneWriteDAGFwd(RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList); +void +rf_CreateNonRedundantWriteDAG(RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, + RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, + RF_IoType_t type); +void +rf_CreateRAID0WriteDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList, RF_IoType_t type); +void +rf_CreateSmallWriteDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList); +void +rf_CreateLargeWriteDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList); +void +rf_CommonCreateLargeWriteDAG(RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, + RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, int nfaults, + int (*redFunc) (RF_DagNode_t *), int allowBufferRecycle); + void rf_CommonCreateLargeWriteDAGFwd(RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, + RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, int nfaults, + int (*redFunc) (RF_DagNode_t *), int allowBufferRecycle); + void rf_CommonCreateSmallWriteDAG(RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, + RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, + RF_RedFuncs_t * pfuncs, RF_RedFuncs_t * qfuncs); + void rf_CommonCreateSmallWriteDAGFwd(RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, + RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, + RF_RedFuncs_t * pfuncs, RF_RedFuncs_t * qfuncs); + void rf_CreateRaidOneWriteDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList); + void rf_CreateRaidOneWriteDAGFwd(RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, + RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); -#endif /* !_RF__RF_DAGFFWR_H_ */ +#endif /* !_RF__RF_DAGFFWR_H_ */ diff --git a/sys/dev/raidframe/rf_dagflags.h b/sys/dev/raidframe/rf_dagflags.h index ac6f5ec5705..a978088ce9f 100644 --- a/sys/dev/raidframe/rf_dagflags.h +++ b/sys/dev/raidframe/rf_dagflags.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_dagflags.h,v 1.1 1999/01/11 14:29:10 niklas Exp $ */ -/* $NetBSD: rf_dagflags.h,v 1.1 1998/11/13 04:20:27 oster Exp $ */ +/* $OpenBSD: rf_dagflags.h,v 1.2 1999/02/16 00:02:31 niklas Exp $ */ +/* $NetBSD: rf_dagflags.h,v 1.3 1999/02/05 00:06:08 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -35,32 +35,6 @@ * **************************************************************************************/ -/* : - * Log: rf_dagflags.h,v - * Revision 1.10 1996/06/13 19:08:23 jimz - * remove unused BD flag - * - * Revision 1.9 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.8 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.7 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.6 1995/12/01 15:59:40 root - * added copyright info - * - */ #ifndef _RF__RF_DAGFLAGS_H_ #define _RF__RF_DAGFLAGS_H_ @@ -74,13 +48,21 @@ * specify USE_DAG. */ -#define RF_DAG_FLAGS_NONE 0 /* no flags */ -#define RF_DAG_SUPPRESS_LOCKS (1<<0) /* supress all stripe locks in the DAG */ -#define RF_DAG_RETURN_ASM (1<<1) /* create an ASM and return it instead of freeing it */ -#define RF_DAG_RETURN_DAG (1<<2) /* create a DAG and return it instead of freeing it */ -#define RF_DAG_NONBLOCKING_IO (1<<3) /* cause DoAccess to be non-blocking */ -#define RF_DAG_ACCESS_COMPLETE (1<<4) /* the access is complete */ -#define RF_DAG_DISPATCH_RETURNED (1<<5) /* used to handle the case where the dag invokes no I/O */ -#define RF_DAG_TEST_ACCESS (1<<6) /* this access came through rf_ioctl instead of rf_strategy */ +#define RF_DAG_FLAGS_NONE 0 /* no flags */ +#define RF_DAG_SUPPRESS_LOCKS (1<<0) /* supress all stripe locks in + * the DAG */ +#define RF_DAG_RETURN_ASM (1<<1) /* create an ASM and return it + * instead of freeing it */ +#define RF_DAG_RETURN_DAG (1<<2) /* create a DAG and return it + * instead of freeing it */ +#define RF_DAG_NONBLOCKING_IO (1<<3) /* cause DoAccess to be + * non-blocking */ +#define RF_DAG_ACCESS_COMPLETE (1<<4) /* the access is complete */ +#define RF_DAG_DISPATCH_RETURNED (1<<5) /* used to handle the case + * where the dag invokes no + * I/O */ +#define RF_DAG_TEST_ACCESS (1<<6) /* this access came through + * rf_ioctl instead of + * rf_strategy */ -#endif /* !_RF__RF_DAGFLAGS_H_ */ +#endif /* !_RF__RF_DAGFLAGS_H_ */ diff --git a/sys/dev/raidframe/rf_dagfuncs.c b/sys/dev/raidframe/rf_dagfuncs.c index 78e23ed1d95..a4ea944ba05 100644 --- a/sys/dev/raidframe/rf_dagfuncs.c +++ b/sys/dev/raidframe/rf_dagfuncs.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_dagfuncs.c,v 1.1 1999/01/11 14:29:10 niklas Exp $ */ -/* $NetBSD: rf_dagfuncs.c,v 1.1 1998/11/13 04:20:28 oster Exp $ */ +/* $OpenBSD: rf_dagfuncs.c,v 1.2 1999/02/16 00:02:32 niklas Exp $ */ +/* $NetBSD: rf_dagfuncs.c,v 1.3 1999/02/05 00:06:08 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -48,172 +48,6 @@ * to check to find out whether or not the acquire was suppressed. */ -/* : - * Log: rf_dagfuncs.c,v - * Revision 1.64 1996/07/31 16:29:26 jimz - * LONGSHIFT -> RF_LONGSHIFT, defined in rf_types.h - * - * Revision 1.63 1996/07/30 04:00:20 jimz - * define LONGSHIFT for mips - * - * Revision 1.62 1996/07/28 20:31:39 jimz - * i386netbsd port - * true/false fixup - * - * Revision 1.61 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.60 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.59 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.58 1996/07/17 21:00:58 jimz - * clean up timer interface, tracing - * - * Revision 1.57 1996/07/15 17:22:18 jimz - * nit-pick code cleanup - * resolve stdlib problems on DEC OSF - * - * Revision 1.56 1996/06/11 01:27:50 jimz - * Fixed bug where diskthread shutdown would crash or hang. This - * turned out to be two distinct bugs: - * (1) [crash] The thread shutdown code wasn't properly waiting for - * all the diskthreads to complete. This caused diskthreads that were - * exiting+cleaning up to unlock a destroyed mutex. - * (2) [hang] TerminateDiskQueues wasn't locking, and DiskIODequeue - * only checked for termination _after_ a wakeup if the queues were - * empty. This was a race where the termination wakeup could be lost - * by the dequeueing thread, and the system would hang waiting for the - * thread to exit, while the thread waited for an I/O or a signal to - * check the termination flag. - * - * Revision 1.55 1996/06/10 22:23:18 wvcii - * disk and xor funcs now optionally support undo logging - * for backward error recovery experiments - * - * Revision 1.54 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.53 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.52 1996/06/06 17:28:44 jimz - * add new read mirror partition func, rename old read mirror - * to rf_DiskReadMirrorIdleFunc - * - * Revision 1.51 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.50 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.49 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.48 1996/05/30 12:59:18 jimz - * make etimer happier, more portable - * - * Revision 1.47 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.46 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.45 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.44 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.43 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.42 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.41 1996/05/08 21:01:24 jimz - * fixed up enum type names that were conflicting with other - * enums and function names (ie, "panic") - * future naming trends will be towards RF_ and rf_ for - * everything raidframe-related - * - * Revision 1.40 1996/05/08 15:24:14 wvcii - * modified GenericWakeupFunc to use recover, undone, and panic node states - * - * Revision 1.39 1996/05/02 17:18:01 jimz - * fix up headers for user-land, following ccmn cleanup - * - * Revision 1.38 1996/05/01 16:26:51 jimz - * don't include rf_ccmn.h (get ready to phase out) - * - * Revision 1.37 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.36 1995/12/04 19:19:09 wvcii - * modified DiskReadMirrorFunc - * - added fifth parameter, physical disk address of mirror copy - * - SelectIdleDisk conditionally swaps parameters 0 & 4 - * - * Revision 1.35 1995/12/01 15:58:33 root - * added copyright info - * - * Revision 1.34 1995/11/17 18:12:17 amiri - * Changed DiskReadMirrorFunc to use the generic mapping routines - * to find the mirror of the data, function was assuming RAID level 1. - * - * Revision 1.33 1995/11/17 15:15:59 wvcii - * changes in DiskReadMirrorFunc - * - added ASSERTs - * - added call to MapParityRAID1 - * - * Revision 1.32 1995/11/07 16:25:50 wvcii - * added DiskUnlockFuncForThreads - * general debugging of undo functions (first time they were used) - * - * Revision 1.31 1995/09/06 19:23:36 wvcii - * fixed tracing for parity logging nodes - * - * Revision 1.30 95/07/07 00:13:01 wvcii - * added 4th parameter to ParityLogAppend - * - */ - -#ifdef _KERNEL -#define KERNEL -#endif - -#ifndef KERNEL -#include <errno.h> -#endif /* !KERNEL */ - #include <sys/ioctl.h> #include <sys/param.h> @@ -229,41 +63,40 @@ #include "rf_engine.h" #include "rf_dagutils.h" -#ifdef KERNEL #include "rf_kintf.h" -#endif /* KERNEL */ #if RF_INCLUDE_PARITYLOGGING > 0 #include "rf_paritylog.h" -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ - -int (*rf_DiskReadFunc)(RF_DagNode_t *); -int (*rf_DiskWriteFunc)(RF_DagNode_t *); -int (*rf_DiskReadUndoFunc)(RF_DagNode_t *); -int (*rf_DiskWriteUndoFunc)(RF_DagNode_t *); -int (*rf_DiskUnlockFunc)(RF_DagNode_t *); -int (*rf_DiskUnlockUndoFunc)(RF_DagNode_t *); -int (*rf_RegularXorUndoFunc)(RF_DagNode_t *); -int (*rf_SimpleXorUndoFunc)(RF_DagNode_t *); -int (*rf_RecoveryXorUndoFunc)(RF_DagNode_t *); +#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ + +int (*rf_DiskReadFunc) (RF_DagNode_t *); +int (*rf_DiskWriteFunc) (RF_DagNode_t *); +int (*rf_DiskReadUndoFunc) (RF_DagNode_t *); +int (*rf_DiskWriteUndoFunc) (RF_DagNode_t *); +int (*rf_DiskUnlockFunc) (RF_DagNode_t *); +int (*rf_DiskUnlockUndoFunc) (RF_DagNode_t *); +int (*rf_RegularXorUndoFunc) (RF_DagNode_t *); +int (*rf_SimpleXorUndoFunc) (RF_DagNode_t *); +int (*rf_RecoveryXorUndoFunc) (RF_DagNode_t *); /***************************************************************************************** * main (only) configuration routine for this module ****************************************************************************************/ -int rf_ConfigureDAGFuncs(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureDAGFuncs(listp) + RF_ShutdownList_t **listp; { - RF_ASSERT( ((sizeof(long)==8) && RF_LONGSHIFT==3) || ((sizeof(long)==4) && RF_LONGSHIFT==2) ); - rf_DiskReadFunc = rf_DiskReadFuncForThreads; - rf_DiskReadUndoFunc = rf_DiskUndoFunc; - rf_DiskWriteFunc = rf_DiskWriteFuncForThreads; - rf_DiskWriteUndoFunc = rf_DiskUndoFunc; - rf_DiskUnlockFunc = rf_DiskUnlockFuncForThreads; - rf_DiskUnlockUndoFunc = rf_NullNodeUndoFunc; - rf_RegularXorUndoFunc = rf_NullNodeUndoFunc; - rf_SimpleXorUndoFunc = rf_NullNodeUndoFunc; - rf_RecoveryXorUndoFunc = rf_NullNodeUndoFunc; - return(0); + RF_ASSERT(((sizeof(long) == 8) && RF_LONGSHIFT == 3) || ((sizeof(long) == 4) && RF_LONGSHIFT == 2)); + rf_DiskReadFunc = rf_DiskReadFuncForThreads; + rf_DiskReadUndoFunc = rf_DiskUndoFunc; + rf_DiskWriteFunc = rf_DiskWriteFuncForThreads; + rf_DiskWriteUndoFunc = rf_DiskUndoFunc; + rf_DiskUnlockFunc = rf_DiskUnlockFuncForThreads; + rf_DiskUnlockUndoFunc = rf_NullNodeUndoFunc; + rf_RegularXorUndoFunc = rf_NullNodeUndoFunc; + rf_SimpleXorUndoFunc = rf_NullNodeUndoFunc; + rf_RecoveryXorUndoFunc = rf_NullNodeUndoFunc; + return (0); } @@ -271,18 +104,20 @@ int rf_ConfigureDAGFuncs(listp) /***************************************************************************************** * the execution function associated with a terminate node ****************************************************************************************/ -int rf_TerminateFunc(node) - RF_DagNode_t *node; +int +rf_TerminateFunc(node) + RF_DagNode_t *node; { - RF_ASSERT(node->dagHdr->numCommits == node->dagHdr->numCommitNodes); - node->status = rf_good; - return(rf_FinishNode(node, RF_THREAD_CONTEXT)); + RF_ASSERT(node->dagHdr->numCommits == node->dagHdr->numCommitNodes); + node->status = rf_good; + return (rf_FinishNode(node, RF_THREAD_CONTEXT)); } -int rf_TerminateUndoFunc(node) - RF_DagNode_t *node; +int +rf_TerminateUndoFunc(node) + RF_DagNode_t *node; { - return(0); + return (0); } @@ -299,30 +134,33 @@ int rf_TerminateUndoFunc(node) * ****************************************************************************************/ -int rf_DiskReadMirrorIdleFunc(node) - RF_DagNode_t *node; +int +rf_DiskReadMirrorIdleFunc(node) + RF_DagNode_t *node; { - /* select the mirror copy with the shortest queue and fill in node parameters - with physical disk address */ + /* select the mirror copy with the shortest queue and fill in node + * parameters with physical disk address */ - rf_SelectMirrorDiskIdle(node); - return(rf_DiskReadFunc(node)); + rf_SelectMirrorDiskIdle(node); + return (rf_DiskReadFunc(node)); } -int rf_DiskReadMirrorPartitionFunc(node) - RF_DagNode_t *node; +int +rf_DiskReadMirrorPartitionFunc(node) + RF_DagNode_t *node; { - /* select the mirror copy with the shortest queue and fill in node parameters - with physical disk address */ + /* select the mirror copy with the shortest queue and fill in node + * parameters with physical disk address */ - rf_SelectMirrorDiskPartition(node); - return(rf_DiskReadFunc(node)); + rf_SelectMirrorDiskPartition(node); + return (rf_DiskReadFunc(node)); } -int rf_DiskReadMirrorUndoFunc(node) - RF_DagNode_t *node; +int +rf_DiskReadMirrorUndoFunc(node) + RF_DagNode_t *node; { - return(0); + return (0); } @@ -331,346 +169,350 @@ int rf_DiskReadMirrorUndoFunc(node) /***************************************************************************************** * the execution function associated with a parity log update node ****************************************************************************************/ -int rf_ParityLogUpdateFunc(node) - RF_DagNode_t *node; +int +rf_ParityLogUpdateFunc(node) + RF_DagNode_t *node; { - RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; - caddr_t buf = (caddr_t) node->params[1].p; - RF_ParityLogData_t *logData; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - - if (node->dagHdr->status == rf_enable) - { - RF_ETIMER_START(timer); - logData = rf_CreateParityLogData(RF_UPDATE, pda, buf, - (RF_Raid_t *) (node->dagHdr->raidPtr), - node->wakeFunc, (void *) node, - node->dagHdr->tracerec, timer); - if (logData) - rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE); - else - { - RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->plog_us += RF_ETIMER_VAL_US(timer); - (node->wakeFunc)(node, ENOMEM); + RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; + caddr_t buf = (caddr_t) node->params[1].p; + RF_ParityLogData_t *logData; + RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; + RF_Etimer_t timer; + + if (node->dagHdr->status == rf_enable) { + RF_ETIMER_START(timer); + logData = rf_CreateParityLogData(RF_UPDATE, pda, buf, + (RF_Raid_t *) (node->dagHdr->raidPtr), + node->wakeFunc, (void *) node, + node->dagHdr->tracerec, timer); + if (logData) + rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE); + else { + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->plog_us += RF_ETIMER_VAL_US(timer); + (node->wakeFunc) (node, ENOMEM); + } } - } - return(0); + return (0); } /***************************************************************************************** * the execution function associated with a parity log overwrite node ****************************************************************************************/ -int rf_ParityLogOverwriteFunc(node) - RF_DagNode_t *node; +int +rf_ParityLogOverwriteFunc(node) + RF_DagNode_t *node; { - RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; - caddr_t buf = (caddr_t) node->params[1].p; - RF_ParityLogData_t *logData; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - - if (node->dagHdr->status == rf_enable) - { - RF_ETIMER_START(timer); - logData = rf_CreateParityLogData(RF_OVERWRITE, pda, buf, (RF_Raid_t *) (node->dagHdr->raidPtr), - node->wakeFunc, (void *) node, node->dagHdr->tracerec, timer); - if (logData) - rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE); - else - { - RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->plog_us += RF_ETIMER_VAL_US(timer); - (node->wakeFunc)(node, ENOMEM); + RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; + caddr_t buf = (caddr_t) node->params[1].p; + RF_ParityLogData_t *logData; + RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; + RF_Etimer_t timer; + + if (node->dagHdr->status == rf_enable) { + RF_ETIMER_START(timer); + logData = rf_CreateParityLogData(RF_OVERWRITE, pda, buf, (RF_Raid_t *) (node->dagHdr->raidPtr), + node->wakeFunc, (void *) node, node->dagHdr->tracerec, timer); + if (logData) + rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE); + else { + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->plog_us += RF_ETIMER_VAL_US(timer); + (node->wakeFunc) (node, ENOMEM); + } } - } - return(0); + return (0); } +#else /* RF_INCLUDE_PARITYLOGGING > 0 */ -#else /* RF_INCLUDE_PARITYLOGGING > 0 */ - -int rf_ParityLogUpdateFunc(node) - RF_DagNode_t *node; +int +rf_ParityLogUpdateFunc(node) + RF_DagNode_t *node; { - return(0); + return (0); } -int rf_ParityLogOverwriteFunc(node) - RF_DagNode_t *node; +int +rf_ParityLogOverwriteFunc(node) + RF_DagNode_t *node; { - return(0); + return (0); } +#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ - -int rf_ParityLogUpdateUndoFunc(node) - RF_DagNode_t *node; +int +rf_ParityLogUpdateUndoFunc(node) + RF_DagNode_t *node; { - return(0); + return (0); } -int rf_ParityLogOverwriteUndoFunc(node) - RF_DagNode_t *node; +int +rf_ParityLogOverwriteUndoFunc(node) + RF_DagNode_t *node; { - return(0); + return (0); } - /***************************************************************************************** * the execution function associated with a NOP node ****************************************************************************************/ -int rf_NullNodeFunc(node) - RF_DagNode_t *node; +int +rf_NullNodeFunc(node) + RF_DagNode_t *node; { - node->status = rf_good; - return(rf_FinishNode(node, RF_THREAD_CONTEXT)); + node->status = rf_good; + return (rf_FinishNode(node, RF_THREAD_CONTEXT)); } -int rf_NullNodeUndoFunc(node) - RF_DagNode_t *node; +int +rf_NullNodeUndoFunc(node) + RF_DagNode_t *node; { - node->status = rf_undone; - return(rf_FinishNode(node, RF_THREAD_CONTEXT)); + node->status = rf_undone; + return (rf_FinishNode(node, RF_THREAD_CONTEXT)); } /***************************************************************************************** * the execution function associated with a disk-read node ****************************************************************************************/ -int rf_DiskReadFuncForThreads(node) - RF_DagNode_t *node; +int +rf_DiskReadFuncForThreads(node) + RF_DagNode_t *node; { - RF_DiskQueueData_t *req; - RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *)node->params[0].p; - caddr_t buf = (caddr_t)node->params[1].p; - RF_StripeNum_t parityStripeID = (RF_StripeNum_t)node->params[2].v; - unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v); - unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v); - unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); - unsigned which_ru = RF_EXTRACT_RU(node->params[3].v); - RF_DiskQueueDataFlags_t flags = 0; - RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_READ : RF_IO_TYPE_NOP; - RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; - void *b_proc = NULL; + RF_DiskQueueData_t *req; + RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; + caddr_t buf = (caddr_t) node->params[1].p; + RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v; + unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v); + unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v); + unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); + unsigned which_ru = RF_EXTRACT_RU(node->params[3].v); + RF_DiskQueueDataFlags_t flags = 0; + RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_READ : RF_IO_TYPE_NOP; + RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; + void *b_proc = NULL; #if RF_BACKWARD > 0 - caddr_t undoBuf; + caddr_t undoBuf; #endif -#ifdef KERNEL - if (node->dagHdr->bp) b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc; -#endif /* KERNEL */ + if (node->dagHdr->bp) + b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc; - RF_ASSERT( !(lock && unlock) ); - flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0; - flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0; + RF_ASSERT(!(lock && unlock)); + flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0; + flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0; #if RF_BACKWARD > 0 - /* allocate and zero the undo buffer. - * this is equivalent to copying the original buffer's contents to the undo buffer - * prior to performing the disk read. - * XXX hardcoded 512 bytes per sector! - */ - if (node->dagHdr->allocList == NULL) - rf_MakeAllocList(node->dagHdr->allocList); - RF_CallocAndAdd(undoBuf, 1, 512 * pda->numSector, (caddr_t), node->dagHdr->allocList); -#endif /* RF_BACKWARD > 0 */ - req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector, - buf, parityStripeID, which_ru, - (int (*)(void *,int)) node->wakeFunc, - node, NULL, node->dagHdr->tracerec, - (void *)(node->dagHdr->raidPtr), flags, b_proc); - if (!req) { - (node->wakeFunc)(node, ENOMEM); - } else { - node->dagFuncData = (void *) req; - rf_DiskIOEnqueue( &(dqs[pda->row][pda->col]), req, priority ); - } - return(0); + /* allocate and zero the undo buffer. this is equivalent to copying + * the original buffer's contents to the undo buffer prior to + * performing the disk read. XXX hardcoded 512 bytes per sector! */ + if (node->dagHdr->allocList == NULL) + rf_MakeAllocList(node->dagHdr->allocList); + RF_CallocAndAdd(undoBuf, 1, 512 * pda->numSector, (caddr_t), node->dagHdr->allocList); +#endif /* RF_BACKWARD > 0 */ + req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector, + buf, parityStripeID, which_ru, + (int (*) (void *, int)) node->wakeFunc, + node, NULL, node->dagHdr->tracerec, + (void *) (node->dagHdr->raidPtr), flags, b_proc); + if (!req) { + (node->wakeFunc) (node, ENOMEM); + } else { + node->dagFuncData = (void *) req; + rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority); + } + return (0); } /***************************************************************************************** * the execution function associated with a disk-write node ****************************************************************************************/ -int rf_DiskWriteFuncForThreads(node) - RF_DagNode_t *node; +int +rf_DiskWriteFuncForThreads(node) + RF_DagNode_t *node; { - RF_DiskQueueData_t *req; - RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *)node->params[0].p; - caddr_t buf = (caddr_t)node->params[1].p; - RF_StripeNum_t parityStripeID = (RF_StripeNum_t)node->params[2].v; - unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v); - unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v); - unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); - unsigned which_ru = RF_EXTRACT_RU(node->params[3].v); - RF_DiskQueueDataFlags_t flags = 0; - RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_WRITE : RF_IO_TYPE_NOP; - RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; - void *b_proc = NULL; + RF_DiskQueueData_t *req; + RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; + caddr_t buf = (caddr_t) node->params[1].p; + RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v; + unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v); + unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v); + unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); + unsigned which_ru = RF_EXTRACT_RU(node->params[3].v); + RF_DiskQueueDataFlags_t flags = 0; + RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_WRITE : RF_IO_TYPE_NOP; + RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; + void *b_proc = NULL; #if RF_BACKWARD > 0 - caddr_t undoBuf; + caddr_t undoBuf; #endif -#ifdef KERNEL - if (node->dagHdr->bp) b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc; -#endif /* KERNEL */ + if (node->dagHdr->bp) + b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc; #if RF_BACKWARD > 0 - /* This area is used only for backward error recovery experiments - * First, schedule allocate a buffer and schedule a pre-read of the disk - * After the pre-read, proceed with the normal disk write - */ - if (node->status == rf_bwd2) { - /* just finished undo logging, now perform real function */ - node->status = rf_fired; - RF_ASSERT( !(lock && unlock) ); - flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0; - flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0; - req = rf_CreateDiskQueueData(iotype, - pda->startSector, pda->numSector, buf, parityStripeID, which_ru, - node->wakeFunc, (void *) node, NULL, node->dagHdr->tracerec, - (void *) (node->dagHdr->raidPtr), flags, b_proc); - - if (!req) { - (node->wakeFunc)(node, ENOMEM); - } else { - node->dagFuncData = (void *) req; - rf_DiskIOEnqueue( &(dqs[pda->row][pda->col]), req, priority ); - } - } - - else { - /* node status should be rf_fired */ - /* schedule a disk pre-read */ - node->status = rf_bwd1; - RF_ASSERT( !(lock && unlock) ); - flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0; - flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0; - if (node->dagHdr->allocList == NULL) - rf_MakeAllocList(node->dagHdr->allocList); - RF_CallocAndAdd(undoBuf, 1, 512 * pda->numSector, (caddr_t), node->dagHdr->allocList); - req = rf_CreateDiskQueueData(RF_IO_TYPE_READ, - pda->startSector, pda->numSector, undoBuf, parityStripeID, which_ru, - node->wakeFunc, (void *) node, NULL, node->dagHdr->tracerec, - (void *) (node->dagHdr->raidPtr), flags, b_proc); - - if (!req) { - (node->wakeFunc)(node, ENOMEM); - } else { - node->dagFuncData = (void *) req; - rf_DiskIOEnqueue( &(dqs[pda->row][pda->col]), req, priority ); - } - } - return(0); -#endif /* RF_BACKWARD > 0 */ - - /* normal processing (rollaway or forward recovery) begins here */ - RF_ASSERT( !(lock && unlock) ); - flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0; - flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0; - req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector, - buf, parityStripeID, which_ru, - (int (*)(void *,int)) node->wakeFunc, - (void *) node, NULL, - node->dagHdr->tracerec, - (void *) (node->dagHdr->raidPtr), - flags, b_proc); - - if (!req) { - (node->wakeFunc)(node, ENOMEM); - } else { - node->dagFuncData = (void *) req; - rf_DiskIOEnqueue( &(dqs[pda->row][pda->col]), req, priority ); - } - - return(0); -} + /* This area is used only for backward error recovery experiments + * First, schedule allocate a buffer and schedule a pre-read of the + * disk After the pre-read, proceed with the normal disk write */ + if (node->status == rf_bwd2) { + /* just finished undo logging, now perform real function */ + node->status = rf_fired; + RF_ASSERT(!(lock && unlock)); + flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0; + flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0; + req = rf_CreateDiskQueueData(iotype, + pda->startSector, pda->numSector, buf, parityStripeID, which_ru, + node->wakeFunc, (void *) node, NULL, node->dagHdr->tracerec, + (void *) (node->dagHdr->raidPtr), flags, b_proc); + + if (!req) { + (node->wakeFunc) (node, ENOMEM); + } else { + node->dagFuncData = (void *) req; + rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority); + } + } else { + /* node status should be rf_fired */ + /* schedule a disk pre-read */ + node->status = rf_bwd1; + RF_ASSERT(!(lock && unlock)); + flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0; + flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0; + if (node->dagHdr->allocList == NULL) + rf_MakeAllocList(node->dagHdr->allocList); + RF_CallocAndAdd(undoBuf, 1, 512 * pda->numSector, (caddr_t), node->dagHdr->allocList); + req = rf_CreateDiskQueueData(RF_IO_TYPE_READ, + pda->startSector, pda->numSector, undoBuf, parityStripeID, which_ru, + node->wakeFunc, (void *) node, NULL, node->dagHdr->tracerec, + (void *) (node->dagHdr->raidPtr), flags, b_proc); + + if (!req) { + (node->wakeFunc) (node, ENOMEM); + } else { + node->dagFuncData = (void *) req; + rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority); + } + } + return (0); +#endif /* RF_BACKWARD > 0 */ + + /* normal processing (rollaway or forward recovery) begins here */ + RF_ASSERT(!(lock && unlock)); + flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0; + flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0; + req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector, + buf, parityStripeID, which_ru, + (int (*) (void *, int)) node->wakeFunc, + (void *) node, NULL, + node->dagHdr->tracerec, + (void *) (node->dagHdr->raidPtr), + flags, b_proc); + + if (!req) { + (node->wakeFunc) (node, ENOMEM); + } else { + node->dagFuncData = (void *) req; + rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority); + } + return (0); +} /***************************************************************************************** * the undo function for disk nodes * Note: this is not a proper undo of a write node, only locks are released. * old data is not restored to disk! ****************************************************************************************/ -int rf_DiskUndoFunc(node) - RF_DagNode_t *node; +int +rf_DiskUndoFunc(node) + RF_DagNode_t *node; { - RF_DiskQueueData_t *req; - RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *)node->params[0].p; - RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; - - req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, - 0L, 0, NULL, 0L, 0, - (int (*)(void *,int)) node->wakeFunc, - (void *) node, - NULL, node->dagHdr->tracerec, - (void *) (node->dagHdr->raidPtr), - RF_UNLOCK_DISK_QUEUE, NULL); - if (!req) - (node->wakeFunc)(node, ENOMEM); - else { - node->dagFuncData = (void *) req; - rf_DiskIOEnqueue( &(dqs[pda->row][pda->col]), req, RF_IO_NORMAL_PRIORITY ); - } - - return(0); -} + RF_DiskQueueData_t *req; + RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; + RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; + + req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, + 0L, 0, NULL, 0L, 0, + (int (*) (void *, int)) node->wakeFunc, + (void *) node, + NULL, node->dagHdr->tracerec, + (void *) (node->dagHdr->raidPtr), + RF_UNLOCK_DISK_QUEUE, NULL); + if (!req) + (node->wakeFunc) (node, ENOMEM); + else { + node->dagFuncData = (void *) req; + rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, RF_IO_NORMAL_PRIORITY); + } + return (0); +} /***************************************************************************************** * the execution function associated with an "unlock disk queue" node ****************************************************************************************/ -int rf_DiskUnlockFuncForThreads(node) - RF_DagNode_t *node; +int +rf_DiskUnlockFuncForThreads(node) + RF_DagNode_t *node; { - RF_DiskQueueData_t *req; - RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *)node->params[0].p; - RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; - - req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, - 0L, 0, NULL, 0L, 0, - (int (*)(void *,int)) node->wakeFunc, - (void *) node, - NULL, node->dagHdr->tracerec, - (void *) (node->dagHdr->raidPtr), - RF_UNLOCK_DISK_QUEUE, NULL); - if (!req) - (node->wakeFunc)(node, ENOMEM); - else { - node->dagFuncData = (void *) req; - rf_DiskIOEnqueue( &(dqs[pda->row][pda->col]), req, RF_IO_NORMAL_PRIORITY ); - } - - return(0); -} + RF_DiskQueueData_t *req; + RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; + RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; + + req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, + 0L, 0, NULL, 0L, 0, + (int (*) (void *, int)) node->wakeFunc, + (void *) node, + NULL, node->dagHdr->tracerec, + (void *) (node->dagHdr->raidPtr), + RF_UNLOCK_DISK_QUEUE, NULL); + if (!req) + (node->wakeFunc) (node, ENOMEM); + else { + node->dagFuncData = (void *) req; + rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, RF_IO_NORMAL_PRIORITY); + } + return (0); +} /***************************************************************************************** * Callback routine for DiskRead and DiskWrite nodes. When the disk op completes, * the routine is called to set the node status and inform the execution engine that * the node has fired. ****************************************************************************************/ -int rf_GenericWakeupFunc(node, status) - RF_DagNode_t *node; - int status; +int +rf_GenericWakeupFunc(node, status) + RF_DagNode_t *node; + int status; { - switch (node->status) { - case rf_bwd1 : - node->status = rf_bwd2; - if (node->dagFuncData) - rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData); - return(rf_DiskWriteFuncForThreads(node)); - break; - case rf_fired : - if (status) node->status = rf_bad; - else node->status = rf_good; - break; - case rf_recover : - /* probably should never reach this case */ - if (status) node->status = rf_panic; - else node->status = rf_undone; - break; - default : - RF_PANIC(); - break; - } - if (node->dagFuncData) - rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData); - return(rf_FinishNode(node, RF_INTR_CONTEXT)); + switch (node->status) { + case rf_bwd1: + node->status = rf_bwd2; + if (node->dagFuncData) + rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData); + return (rf_DiskWriteFuncForThreads(node)); + break; + case rf_fired: + if (status) + node->status = rf_bad; + else + node->status = rf_good; + break; + case rf_recover: + /* probably should never reach this case */ + if (status) + node->status = rf_panic; + else + node->status = rf_undone; + break; + default: + RF_PANIC(); + break; + } + if (node->dagFuncData) + rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData); + return (rf_FinishNode(node, RF_INTR_CONTEXT)); } @@ -696,78 +538,87 @@ int rf_GenericWakeupFunc(node, status) * and use the pda params to determine where within the buffer to XOR * the input buffers. */ -int rf_RegularXorFunc(node) - RF_DagNode_t *node; +int +rf_RegularXorFunc(node) + RF_DagNode_t *node; { - RF_Raid_t *raidPtr = (RF_Raid_t *)node->params[node->numParams-1].p; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - int i, retcode; + RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; + RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; + RF_Etimer_t timer; + int i, retcode; #if RF_BACKWARD > 0 - RF_PhysDiskAddr_t *pda; - caddr_t undoBuf; + RF_PhysDiskAddr_t *pda; + caddr_t undoBuf; #endif - retcode = 0; - if (node->dagHdr->status == rf_enable) { - /* don't do the XOR if the input is the same as the output */ - RF_ETIMER_START(timer); - for (i=0; i<node->numParams-1; i+=2) if (node->params[i+1].p != node->results[0]) { + retcode = 0; + if (node->dagHdr->status == rf_enable) { + /* don't do the XOR if the input is the same as the output */ + RF_ETIMER_START(timer); + for (i = 0; i < node->numParams - 1; i += 2) + if (node->params[i + 1].p != node->results[0]) { #if RF_BACKWARD > 0 - /* This section mimics undo logging for backward error recovery experiments b - * allocating and initializing a buffer - * XXX 512 byte sector size is hard coded! - */ - pda = node->params[i].p; - if (node->dagHdr->allocList == NULL) - rf_MakeAllocList(node->dagHdr->allocList); - RF_CallocAndAdd(undoBuf, 1, 512 * pda->numSector, (caddr_t), node->dagHdr->allocList); -#endif /* RF_BACKWARD > 0 */ - retcode = rf_XorIntoBuffer(raidPtr, (RF_PhysDiskAddr_t *) node->params[i].p, - (char *)node->params[i+1].p, (char *) node->results[0], node->dagHdr->bp); - } - RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->xor_us += RF_ETIMER_VAL_US(timer); - } - return(rf_GenericWakeupFunc(node, retcode)); /* call wake func explicitly since no I/O in this node */ + /* This section mimics undo logging for + * backward error recovery experiments b + * allocating and initializing a buffer XXX + * 512 byte sector size is hard coded! */ + pda = node->params[i].p; + if (node->dagHdr->allocList == NULL) + rf_MakeAllocList(node->dagHdr->allocList); + RF_CallocAndAdd(undoBuf, 1, 512 * pda->numSector, (caddr_t), node->dagHdr->allocList); +#endif /* RF_BACKWARD > 0 */ + retcode = rf_XorIntoBuffer(raidPtr, (RF_PhysDiskAddr_t *) node->params[i].p, + (char *) node->params[i + 1].p, (char *) node->results[0], node->dagHdr->bp); + } + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->xor_us += RF_ETIMER_VAL_US(timer); + } + return (rf_GenericWakeupFunc(node, retcode)); /* call wake func + * explicitly since no + * I/O in this node */ } - /* xor the inputs into the result buffer, ignoring placement issues */ -int rf_SimpleXorFunc(node) - RF_DagNode_t *node; +int +rf_SimpleXorFunc(node) + RF_DagNode_t *node; { - RF_Raid_t *raidPtr = (RF_Raid_t *)node->params[node->numParams-1].p; - int i, retcode = 0; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; + RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; + int i, retcode = 0; + RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; + RF_Etimer_t timer; #if RF_BACKWARD > 0 - RF_PhysDiskAddr_t *pda; - caddr_t undoBuf; + RF_PhysDiskAddr_t *pda; + caddr_t undoBuf; #endif - if (node->dagHdr->status == rf_enable) { - RF_ETIMER_START(timer); - /* don't do the XOR if the input is the same as the output */ - for (i=0; i<node->numParams-1; i+=2) if (node->params[i+1].p != node->results[0]) { + if (node->dagHdr->status == rf_enable) { + RF_ETIMER_START(timer); + /* don't do the XOR if the input is the same as the output */ + for (i = 0; i < node->numParams - 1; i += 2) + if (node->params[i + 1].p != node->results[0]) { #if RF_BACKWARD > 0 - /* This section mimics undo logging for backward error recovery experiments b - * allocating and initializing a buffer - * XXX 512 byte sector size is hard coded! - */ - pda = node->params[i].p; - if (node->dagHdr->allocList == NULL) - rf_MakeAllocList(node->dagHdr->allocList); - RF_CallocAndAdd(undoBuf, 1, 512 * pda->numSector, (caddr_t), node->dagHdr->allocList); -#endif /* RF_BACKWARD > 0 */ - retcode = rf_bxor((char *)node->params[i+1].p, (char *) node->results[0], - rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *)node->params[i].p)->numSector), - (struct buf *) node->dagHdr->bp); - } - RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->xor_us += RF_ETIMER_VAL_US(timer); - } - - return(rf_GenericWakeupFunc(node, retcode)); /* call wake func explicitly since no I/O in this node */ + /* This section mimics undo logging for + * backward error recovery experiments b + * allocating and initializing a buffer XXX + * 512 byte sector size is hard coded! */ + pda = node->params[i].p; + if (node->dagHdr->allocList == NULL) + rf_MakeAllocList(node->dagHdr->allocList); + RF_CallocAndAdd(undoBuf, 1, 512 * pda->numSector, (caddr_t), node->dagHdr->allocList); +#endif /* RF_BACKWARD > 0 */ + retcode = rf_bxor((char *) node->params[i + 1].p, (char *) node->results[0], + rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[i].p)->numSector), + (struct buf *) node->dagHdr->bp); + } + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->xor_us += RF_ETIMER_VAL_US(timer); + } + return (rf_GenericWakeupFunc(node, retcode)); /* call wake func + * explicitly since no + * I/O in this node */ } - /* this xor is used by the degraded-mode dag functions to recover lost data. * the second-to-last parameter is the PDA for the failed portion of the access. * the code here looks at this PDA and assumes that the xor target buffer is @@ -775,45 +626,48 @@ int rf_SimpleXorFunc(node) * the other PDAs in the parameter list to determine where within the target * buffer the corresponding data should be xored. */ -int rf_RecoveryXorFunc(node) - RF_DagNode_t *node; +int +rf_RecoveryXorFunc(node) + RF_DagNode_t *node; { - RF_Raid_t *raidPtr = (RF_Raid_t *)node->params[node->numParams-1].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout; - RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *)node->params[node->numParams-2].p; - int i, retcode = 0; - RF_PhysDiskAddr_t *pda; - int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr,failedPDA->startSector); - char *srcbuf, *destbuf; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; + RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; + RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; + RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; + int i, retcode = 0; + RF_PhysDiskAddr_t *pda; + int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); + char *srcbuf, *destbuf; + RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; + RF_Etimer_t timer; #if RF_BACKWARD > 0 - caddr_t undoBuf; + caddr_t undoBuf; #endif - if (node->dagHdr->status == rf_enable) { - RF_ETIMER_START(timer); - for (i=0; i<node->numParams-2; i+=2) if (node->params[i+1].p != node->results[0]) { - pda = (RF_PhysDiskAddr_t *)node->params[i].p; + if (node->dagHdr->status == rf_enable) { + RF_ETIMER_START(timer); + for (i = 0; i < node->numParams - 2; i += 2) + if (node->params[i + 1].p != node->results[0]) { + pda = (RF_PhysDiskAddr_t *) node->params[i].p; #if RF_BACKWARD > 0 - /* This section mimics undo logging for backward error recovery experiments b - * allocating and initializing a buffer - * XXX 512 byte sector size is hard coded! - */ - if (node->dagHdr->allocList == NULL) - rf_MakeAllocList(node->dagHdr->allocList); - RF_CallocAndAdd(undoBuf, 1, 512 * pda->numSector, (caddr_t), node->dagHdr->allocList); -#endif /* RF_BACKWARD > 0 */ - srcbuf = (char *)node->params[i+1].p; - suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); - destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr,suoffset-failedSUOffset); - retcode = rf_bxor(srcbuf, destbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), node->dagHdr->bp); - } - RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->xor_us += RF_ETIMER_VAL_US(timer); - } - return (rf_GenericWakeupFunc(node, retcode)); + /* This section mimics undo logging for + * backward error recovery experiments b + * allocating and initializing a buffer XXX + * 512 byte sector size is hard coded! */ + if (node->dagHdr->allocList == NULL) + rf_MakeAllocList(node->dagHdr->allocList); + RF_CallocAndAdd(undoBuf, 1, 512 * pda->numSector, (caddr_t), node->dagHdr->allocList); +#endif /* RF_BACKWARD > 0 */ + srcbuf = (char *) node->params[i + 1].p; + suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); + destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); + retcode = rf_bxor(srcbuf, destbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), node->dagHdr->bp); + } + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->xor_us += RF_ETIMER_VAL_US(timer); + } + return (rf_GenericWakeupFunc(node, retcode)); } - /***************************************************************************************** * The next three functions are utilities used by the above xor-execution functions. ****************************************************************************************/ @@ -827,58 +681,49 @@ int rf_RecoveryXorFunc(node) * SU in size the XOR occurs on only the portion of targbuf identified in the pda. */ -int rf_XorIntoBuffer(raidPtr, pda, srcbuf, targbuf, bp) - RF_Raid_t *raidPtr; - RF_PhysDiskAddr_t *pda; - char *srcbuf; - char *targbuf; - void *bp; +int +rf_XorIntoBuffer(raidPtr, pda, srcbuf, targbuf, bp) + RF_Raid_t *raidPtr; + RF_PhysDiskAddr_t *pda; + char *srcbuf; + char *targbuf; + void *bp; { - char *targptr; - int sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; - int SUOffset = pda->startSector % sectPerSU; - int length, retcode = 0; - - RF_ASSERT(pda->numSector <= sectPerSU); - - targptr = targbuf + rf_RaidAddressToByte(raidPtr, SUOffset); - length = rf_RaidAddressToByte(raidPtr, pda->numSector); - retcode = rf_bxor(srcbuf, targptr, length, bp); - return(retcode); -} + char *targptr; + int sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; + int SUOffset = pda->startSector % sectPerSU; + int length, retcode = 0; + + RF_ASSERT(pda->numSector <= sectPerSU); + targptr = targbuf + rf_RaidAddressToByte(raidPtr, SUOffset); + length = rf_RaidAddressToByte(raidPtr, pda->numSector); + retcode = rf_bxor(srcbuf, targptr, length, bp); + return (retcode); +} /* it really should be the case that the buffer pointers (returned by malloc) * are aligned to the natural word size of the machine, so this is the only * case we optimize for. The length should always be a multiple of the sector * size, so there should be no problem with leftover bytes at the end. */ -int rf_bxor(src, dest, len, bp) - char *src; - char *dest; - int len; - void *bp; +int +rf_bxor(src, dest, len, bp) + char *src; + char *dest; + int len; + void *bp; { - unsigned mask = sizeof(long) -1, retcode = 0; - - if ( !(((unsigned long) src) & mask) && !(((unsigned long) dest) & mask) && !(len&mask) ) { - retcode = rf_longword_bxor((unsigned long *) src, (unsigned long *) dest, len>>RF_LONGSHIFT, bp); - } else { - RF_ASSERT(0); - } - return(retcode); -} + unsigned mask = sizeof(long) - 1, retcode = 0; + if (!(((unsigned long) src) & mask) && !(((unsigned long) dest) & mask) && !(len & mask)) { + retcode = rf_longword_bxor((unsigned long *) src, (unsigned long *) dest, len >> RF_LONGSHIFT, bp); + } else { + RF_ASSERT(0); + } + return (retcode); +} /* map a user buffer into kernel space, if necessary */ -#ifdef KERNEL -#if defined(__NetBSD__) || defined(__OpenBSD__) -/* XXX Not a clue if this is even close.. */ #define REMAP_VA(_bp,x,y) (y) = (x) -#else -#define REMAP_VA(_bp,x,y) (y) = (unsigned long *) ((IS_SYS_VA(x)) ? (unsigned long *)(x) : (unsigned long *) rf_MapToKernelSpace((struct buf *) (_bp), (caddr_t)(x))) -#endif /* __NetBSD__ || __OpenBSD__ */ -#else /* KERNEL */ -#define REMAP_VA(_bp,x,y) (y) = (x) -#endif /* KERNEL */ /* When XORing in kernel mode, we need to map each user page to kernel space before we can access it. * We don't want to assume anything about which input buffers are in kernel/user @@ -886,61 +731,75 @@ int rf_bxor(src, dest, len, bp) * of bytes that we can xor without crossing any page boundaries, and do only this many * bytes before the next remap. */ -int rf_longword_bxor(src, dest, len, bp) - register unsigned long *src; - register unsigned long *dest; - int len; /* longwords */ - void *bp; +int +rf_longword_bxor(src, dest, len, bp) + register unsigned long *src; + register unsigned long *dest; + int len; /* longwords */ + void *bp; { - register unsigned long *end = src+len; - register unsigned long d0, d1, d2, d3, s0, s1, s2, s3; /* temps */ - register unsigned long *pg_src, *pg_dest; /* per-page source/dest pointers */ - int longs_this_time; /* # longwords to xor in the current iteration */ - - REMAP_VA(bp, src, pg_src); - REMAP_VA(bp, dest, pg_dest); - if (!pg_src || !pg_dest) return(EFAULT); - - while (len >= 4 ) { - longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(pg_src), RF_BLIP(pg_dest)) >> RF_LONGSHIFT); /* note len in longwords */ - src += longs_this_time; dest+= longs_this_time; len -= longs_this_time; - while (longs_this_time >= 4) { - d0 = pg_dest[0]; - d1 = pg_dest[1]; - d2 = pg_dest[2]; - d3 = pg_dest[3]; - s0 = pg_src[0]; - s1 = pg_src[1]; - s2 = pg_src[2]; - s3 = pg_src[3]; - pg_dest[0] = d0 ^ s0; - pg_dest[1] = d1 ^ s1; - pg_dest[2] = d2 ^ s2; - pg_dest[3] = d3 ^ s3; - pg_src += 4; - pg_dest += 4; - longs_this_time -= 4; - } - while (longs_this_time > 0) { /* cannot cross any page boundaries here */ - *pg_dest++ ^= *pg_src++; - longs_this_time--; - } - - /* either we're done, or we've reached a page boundary on one (or possibly both) of the pointers */ - if (len) { - if (RF_PAGE_ALIGNED(src)) REMAP_VA(bp, src, pg_src); - if (RF_PAGE_ALIGNED(dest)) REMAP_VA(bp, dest, pg_dest); - if (!pg_src || !pg_dest) return(EFAULT); - } - } - while (src < end) { - *pg_dest++ ^= *pg_src++; - src++; dest++; len--; - if (RF_PAGE_ALIGNED(src)) REMAP_VA(bp, src, pg_src); - if (RF_PAGE_ALIGNED(dest)) REMAP_VA(bp, dest, pg_dest); - } - RF_ASSERT(len == 0); - return(0); + register unsigned long *end = src + len; + register unsigned long d0, d1, d2, d3, s0, s1, s2, s3; /* temps */ + register unsigned long *pg_src, *pg_dest; /* per-page source/dest + * pointers */ + int longs_this_time;/* # longwords to xor in the current iteration */ + + REMAP_VA(bp, src, pg_src); + REMAP_VA(bp, dest, pg_dest); + if (!pg_src || !pg_dest) + return (EFAULT); + + while (len >= 4) { + longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(pg_src), RF_BLIP(pg_dest)) >> RF_LONGSHIFT); /* note len in longwords */ + src += longs_this_time; + dest += longs_this_time; + len -= longs_this_time; + while (longs_this_time >= 4) { + d0 = pg_dest[0]; + d1 = pg_dest[1]; + d2 = pg_dest[2]; + d3 = pg_dest[3]; + s0 = pg_src[0]; + s1 = pg_src[1]; + s2 = pg_src[2]; + s3 = pg_src[3]; + pg_dest[0] = d0 ^ s0; + pg_dest[1] = d1 ^ s1; + pg_dest[2] = d2 ^ s2; + pg_dest[3] = d3 ^ s3; + pg_src += 4; + pg_dest += 4; + longs_this_time -= 4; + } + while (longs_this_time > 0) { /* cannot cross any page + * boundaries here */ + *pg_dest++ ^= *pg_src++; + longs_this_time--; + } + + /* either we're done, or we've reached a page boundary on one + * (or possibly both) of the pointers */ + if (len) { + if (RF_PAGE_ALIGNED(src)) + REMAP_VA(bp, src, pg_src); + if (RF_PAGE_ALIGNED(dest)) + REMAP_VA(bp, dest, pg_dest); + if (!pg_src || !pg_dest) + return (EFAULT); + } + } + while (src < end) { + *pg_dest++ ^= *pg_src++; + src++; + dest++; + len--; + if (RF_PAGE_ALIGNED(src)) + REMAP_VA(bp, src, pg_src); + if (RF_PAGE_ALIGNED(dest)) + REMAP_VA(bp, dest, pg_dest); + } + RF_ASSERT(len == 0); + return (0); } @@ -949,102 +808,182 @@ int rf_longword_bxor(src, dest, len, bp) a may equal dst see comment above longword_bxor */ -int rf_longword_bxor3(dst,a,b,c,len, bp) - register unsigned long *dst; - register unsigned long *a; - register unsigned long *b; - register unsigned long *c; - int len; /* length in longwords */ - void *bp; +int +rf_longword_bxor3(dst, a, b, c, len, bp) + register unsigned long *dst; + register unsigned long *a; + register unsigned long *b; + register unsigned long *c; + int len; /* length in longwords */ + void *bp; { - unsigned long a0,a1,a2,a3, b0,b1,b2,b3; - register unsigned long *pg_a, *pg_b, *pg_c, *pg_dst; /* per-page source/dest pointers */ - int longs_this_time; /* # longs to xor in the current iteration */ - char dst_is_a = 0; - - REMAP_VA(bp, a, pg_a); - REMAP_VA(bp, b, pg_b); - REMAP_VA(bp, c, pg_c); - if (a == dst) {pg_dst = pg_a; dst_is_a = 1;} else { REMAP_VA(bp, dst, pg_dst); } - - /* align dest to cache line. Can't cross a pg boundary on dst here. */ - while ((((unsigned long) pg_dst) & 0x1f)) { - *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; - dst++; a++; b++; c++; - if (RF_PAGE_ALIGNED(a)) {REMAP_VA(bp, a, pg_a); if (!pg_a) return(EFAULT);} - if (RF_PAGE_ALIGNED(b)) {REMAP_VA(bp, a, pg_b); if (!pg_b) return(EFAULT);} - if (RF_PAGE_ALIGNED(c)) {REMAP_VA(bp, a, pg_c); if (!pg_c) return(EFAULT);} - len--; - } - - while (len > 4 ) { - longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(a), RF_MIN(RF_BLIP(b), RF_MIN(RF_BLIP(c), RF_BLIP(dst)))) >> RF_LONGSHIFT); - a+= longs_this_time; b+= longs_this_time; c+= longs_this_time; dst+=longs_this_time; len-=longs_this_time; - while (longs_this_time >= 4) { - a0 = pg_a[0]; longs_this_time -= 4; - - a1 = pg_a[1]; - a2 = pg_a[2]; - - a3 = pg_a[3]; pg_a += 4; - - b0 = pg_b[0]; - b1 = pg_b[1]; - - b2 = pg_b[2]; - b3 = pg_b[3]; - /* start dual issue */ - a0 ^= b0; b0 = pg_c[0]; - - pg_b += 4; a1 ^= b1; - - a2 ^= b2; a3 ^= b3; - - b1 = pg_c[1]; a0 ^= b0; - - b2 = pg_c[2]; a1 ^= b1; - - b3 = pg_c[3]; a2 ^= b2; - - pg_dst[0] = a0; a3 ^= b3; - pg_dst[1] = a1; pg_c += 4; - pg_dst[2] = a2; - pg_dst[3] = a3; pg_dst += 4; - } - while (longs_this_time > 0) { /* cannot cross any page boundaries here */ - *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; - longs_this_time--; - } - - if (len) { - if (RF_PAGE_ALIGNED(a)) {REMAP_VA(bp, a, pg_a); if (!pg_a) return(EFAULT); if (dst_is_a) pg_dst = pg_a;} - if (RF_PAGE_ALIGNED(b)) {REMAP_VA(bp, b, pg_b); if (!pg_b) return(EFAULT);} - if (RF_PAGE_ALIGNED(c)) {REMAP_VA(bp, c, pg_c); if (!pg_c) return(EFAULT);} - if (!dst_is_a) if (RF_PAGE_ALIGNED(dst)) {REMAP_VA(bp, dst, pg_dst); if (!pg_dst) return(EFAULT);} - } - } - while (len) { - *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; - dst++; a++; b++; c++; - if (RF_PAGE_ALIGNED(a)) {REMAP_VA(bp, a, pg_a); if (!pg_a) return(EFAULT); if (dst_is_a) pg_dst = pg_a;} - if (RF_PAGE_ALIGNED(b)) {REMAP_VA(bp, b, pg_b); if (!pg_b) return(EFAULT);} - if (RF_PAGE_ALIGNED(c)) {REMAP_VA(bp, c, pg_c); if (!pg_c) return(EFAULT);} - if (!dst_is_a) if (RF_PAGE_ALIGNED(dst)) {REMAP_VA(bp, dst, pg_dst); if (!pg_dst) return(EFAULT);} - len--; - } - return(0); + unsigned long a0, a1, a2, a3, b0, b1, b2, b3; + register unsigned long *pg_a, *pg_b, *pg_c, *pg_dst; /* per-page source/dest + * pointers */ + int longs_this_time;/* # longs to xor in the current iteration */ + char dst_is_a = 0; + + REMAP_VA(bp, a, pg_a); + REMAP_VA(bp, b, pg_b); + REMAP_VA(bp, c, pg_c); + if (a == dst) { + pg_dst = pg_a; + dst_is_a = 1; + } else { + REMAP_VA(bp, dst, pg_dst); + } + + /* align dest to cache line. Can't cross a pg boundary on dst here. */ + while ((((unsigned long) pg_dst) & 0x1f)) { + *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; + dst++; + a++; + b++; + c++; + if (RF_PAGE_ALIGNED(a)) { + REMAP_VA(bp, a, pg_a); + if (!pg_a) + return (EFAULT); + } + if (RF_PAGE_ALIGNED(b)) { + REMAP_VA(bp, a, pg_b); + if (!pg_b) + return (EFAULT); + } + if (RF_PAGE_ALIGNED(c)) { + REMAP_VA(bp, a, pg_c); + if (!pg_c) + return (EFAULT); + } + len--; + } + + while (len > 4) { + longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(a), RF_MIN(RF_BLIP(b), RF_MIN(RF_BLIP(c), RF_BLIP(dst)))) >> RF_LONGSHIFT); + a += longs_this_time; + b += longs_this_time; + c += longs_this_time; + dst += longs_this_time; + len -= longs_this_time; + while (longs_this_time >= 4) { + a0 = pg_a[0]; + longs_this_time -= 4; + + a1 = pg_a[1]; + a2 = pg_a[2]; + + a3 = pg_a[3]; + pg_a += 4; + + b0 = pg_b[0]; + b1 = pg_b[1]; + + b2 = pg_b[2]; + b3 = pg_b[3]; + /* start dual issue */ + a0 ^= b0; + b0 = pg_c[0]; + + pg_b += 4; + a1 ^= b1; + + a2 ^= b2; + a3 ^= b3; + + b1 = pg_c[1]; + a0 ^= b0; + + b2 = pg_c[2]; + a1 ^= b1; + + b3 = pg_c[3]; + a2 ^= b2; + + pg_dst[0] = a0; + a3 ^= b3; + pg_dst[1] = a1; + pg_c += 4; + pg_dst[2] = a2; + pg_dst[3] = a3; + pg_dst += 4; + } + while (longs_this_time > 0) { /* cannot cross any page + * boundaries here */ + *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; + longs_this_time--; + } + + if (len) { + if (RF_PAGE_ALIGNED(a)) { + REMAP_VA(bp, a, pg_a); + if (!pg_a) + return (EFAULT); + if (dst_is_a) + pg_dst = pg_a; + } + if (RF_PAGE_ALIGNED(b)) { + REMAP_VA(bp, b, pg_b); + if (!pg_b) + return (EFAULT); + } + if (RF_PAGE_ALIGNED(c)) { + REMAP_VA(bp, c, pg_c); + if (!pg_c) + return (EFAULT); + } + if (!dst_is_a) + if (RF_PAGE_ALIGNED(dst)) { + REMAP_VA(bp, dst, pg_dst); + if (!pg_dst) + return (EFAULT); + } + } + } + while (len) { + *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; + dst++; + a++; + b++; + c++; + if (RF_PAGE_ALIGNED(a)) { + REMAP_VA(bp, a, pg_a); + if (!pg_a) + return (EFAULT); + if (dst_is_a) + pg_dst = pg_a; + } + if (RF_PAGE_ALIGNED(b)) { + REMAP_VA(bp, b, pg_b); + if (!pg_b) + return (EFAULT); + } + if (RF_PAGE_ALIGNED(c)) { + REMAP_VA(bp, c, pg_c); + if (!pg_c) + return (EFAULT); + } + if (!dst_is_a) + if (RF_PAGE_ALIGNED(dst)) { + REMAP_VA(bp, dst, pg_dst); + if (!pg_dst) + return (EFAULT); + } + len--; + } + return (0); } -int rf_bxor3(dst,a,b,c,len, bp) - register unsigned char *dst; - register unsigned char *a; - register unsigned char *b; - register unsigned char *c; - unsigned long len; - void *bp; +int +rf_bxor3(dst, a, b, c, len, bp) + register unsigned char *dst; + register unsigned char *a; + register unsigned char *b; + register unsigned char *c; + unsigned long len; + void *bp; { - RF_ASSERT(((RF_UL(dst)|RF_UL(a)|RF_UL(b)|RF_UL(c)|len) & 0x7) == 0); + RF_ASSERT(((RF_UL(dst) | RF_UL(a) | RF_UL(b) | RF_UL(c) | len) & 0x7) == 0); - return(rf_longword_bxor3((unsigned long *)dst, (unsigned long *)a, - (unsigned long *)b, (unsigned long *)c, len>>RF_LONGSHIFT, bp)); + return (rf_longword_bxor3((unsigned long *) dst, (unsigned long *) a, + (unsigned long *) b, (unsigned long *) c, len >> RF_LONGSHIFT, bp)); } diff --git a/sys/dev/raidframe/rf_dagfuncs.h b/sys/dev/raidframe/rf_dagfuncs.h index ab19b712421..0382fab2455 100644 --- a/sys/dev/raidframe/rf_dagfuncs.h +++ b/sys/dev/raidframe/rf_dagfuncs.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_dagfuncs.h,v 1.1 1999/01/11 14:29:11 niklas Exp $ */ -/* $NetBSD: rf_dagfuncs.h,v 1.1 1998/11/13 04:20:28 oster Exp $ */ +/* $OpenBSD: rf_dagfuncs.h,v 1.2 1999/02/16 00:02:32 niklas Exp $ */ +/* $NetBSD: rf_dagfuncs.h,v 1.3 1999/02/05 00:06:08 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -30,103 +30,56 @@ /***************************************************************************************** * * dagfuncs.h -- header file for DAG node execution routines - * - ****************************************************************************************/ - -/* - * : - * Log: rf_dagfuncs.h,v - * Revision 1.17 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.16 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.15 1996/06/06 17:27:20 jimz - * added another read mirror func (partitioning), changed names so dag - * creation routines can use the appropriate one - * - * Revision 1.14 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.13 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags * - * Revision 1.12 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.11 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.10 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.9 1995/12/01 15:56:46 root - * added copyright info - * - * Revision 1.8 1995/11/07 16:25:23 wvcii - * added DiskUnlockFuncForThreads - * - */ + ****************************************************************************************/ #ifndef _RF__RF_DAGFUNCS_H_ #define _RF__RF_DAGFUNCS_H_ -int rf_ConfigureDAGFuncs(RF_ShutdownList_t **listp); -int rf_TerminateFunc(RF_DagNode_t *node); -int rf_TerminateUndoFunc(RF_DagNode_t *node); -int rf_DiskReadMirrorIdleFunc(RF_DagNode_t *node); -int rf_DiskReadMirrorPartitionFunc(RF_DagNode_t *node); -int rf_DiskReadMirrorUndoFunc(RF_DagNode_t *node); -int rf_ParityLogUpdateFunc(RF_DagNode_t *node); -int rf_ParityLogOverwriteFunc(RF_DagNode_t *node); -int rf_ParityLogUpdateUndoFunc(RF_DagNode_t *node); -int rf_ParityLogOverwriteUndoFunc(RF_DagNode_t *node); -int rf_NullNodeFunc(RF_DagNode_t *node); -int rf_NullNodeUndoFunc(RF_DagNode_t *node); -int rf_DiskReadFuncForThreads(RF_DagNode_t *node); -int rf_DiskWriteFuncForThreads(RF_DagNode_t *node); -int rf_DiskUndoFunc(RF_DagNode_t *node); -int rf_DiskUnlockFuncForThreads(RF_DagNode_t *node); -int rf_GenericWakeupFunc(RF_DagNode_t *node, int status); -int rf_RegularXorFunc(RF_DagNode_t *node); -int rf_SimpleXorFunc(RF_DagNode_t *node); -int rf_RecoveryXorFunc(RF_DagNode_t *node); -int rf_XorIntoBuffer(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda, char *srcbuf, - char *targbuf, void *bp); -int rf_bxor(char *src, char *dest, int len, void *bp); -int rf_longword_bxor(register unsigned long *src, register unsigned long *dest, - int len, void *bp); -int rf_longword_bxor3(register unsigned long *dest, register unsigned long *a, - register unsigned long *b, register unsigned long *c, int len, void *bp); -int rf_bxor3(unsigned char *dst, unsigned char *a, unsigned char *b, - unsigned char *c, unsigned long len, void *bp); +int rf_ConfigureDAGFuncs(RF_ShutdownList_t ** listp); +int rf_TerminateFunc(RF_DagNode_t * node); +int rf_TerminateUndoFunc(RF_DagNode_t * node); +int rf_DiskReadMirrorIdleFunc(RF_DagNode_t * node); +int rf_DiskReadMirrorPartitionFunc(RF_DagNode_t * node); +int rf_DiskReadMirrorUndoFunc(RF_DagNode_t * node); +int rf_ParityLogUpdateFunc(RF_DagNode_t * node); +int rf_ParityLogOverwriteFunc(RF_DagNode_t * node); +int rf_ParityLogUpdateUndoFunc(RF_DagNode_t * node); +int rf_ParityLogOverwriteUndoFunc(RF_DagNode_t * node); +int rf_NullNodeFunc(RF_DagNode_t * node); +int rf_NullNodeUndoFunc(RF_DagNode_t * node); +int rf_DiskReadFuncForThreads(RF_DagNode_t * node); +int rf_DiskWriteFuncForThreads(RF_DagNode_t * node); +int rf_DiskUndoFunc(RF_DagNode_t * node); +int rf_DiskUnlockFuncForThreads(RF_DagNode_t * node); +int rf_GenericWakeupFunc(RF_DagNode_t * node, int status); +int rf_RegularXorFunc(RF_DagNode_t * node); +int rf_SimpleXorFunc(RF_DagNode_t * node); +int rf_RecoveryXorFunc(RF_DagNode_t * node); +int +rf_XorIntoBuffer(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * pda, char *srcbuf, + char *targbuf, void *bp); +int rf_bxor(char *src, char *dest, int len, void *bp); +int +rf_longword_bxor(register unsigned long *src, register unsigned long *dest, + int len, void *bp); +int +rf_longword_bxor3(register unsigned long *dest, register unsigned long *a, + register unsigned long *b, register unsigned long *c, int len, void *bp); +int +rf_bxor3(unsigned char *dst, unsigned char *a, unsigned char *b, + unsigned char *c, unsigned long len, void *bp); /* function ptrs defined in ConfigureDAGFuncs() */ -extern int (*rf_DiskReadFunc)(RF_DagNode_t *); -extern int (*rf_DiskWriteFunc)(RF_DagNode_t *); -extern int (*rf_DiskReadUndoFunc)(RF_DagNode_t *); -extern int (*rf_DiskWriteUndoFunc)(RF_DagNode_t *); -extern int (*rf_DiskUnlockFunc)(RF_DagNode_t *); -extern int (*rf_DiskUnlockUndoFunc)(RF_DagNode_t *); -extern int (*rf_SimpleXorUndoFunc)(RF_DagNode_t *); -extern int (*rf_RegularXorUndoFunc)(RF_DagNode_t *); -extern int (*rf_RecoveryXorUndoFunc)(RF_DagNode_t *); +extern int (*rf_DiskReadFunc) (RF_DagNode_t *); +extern int (*rf_DiskWriteFunc) (RF_DagNode_t *); +extern int (*rf_DiskReadUndoFunc) (RF_DagNode_t *); +extern int (*rf_DiskWriteUndoFunc) (RF_DagNode_t *); +extern int (*rf_DiskUnlockFunc) (RF_DagNode_t *); +extern int (*rf_DiskUnlockUndoFunc) (RF_DagNode_t *); +extern int (*rf_SimpleXorUndoFunc) (RF_DagNode_t *); +extern int (*rf_RegularXorUndoFunc) (RF_DagNode_t *); +extern int (*rf_RecoveryXorUndoFunc) (RF_DagNode_t *); /* macros for manipulating the param[3] in a read or write node */ #define RF_CREATE_PARAM3(pri, lk, unlk, wru) (((RF_uint64)(((wru&0xFFFFFF)<<8)|((lk)?0x10:0)|((unlk)?0x20:0)|((pri)&0xF)) )) @@ -135,4 +88,4 @@ extern int (*rf_RecoveryXorUndoFunc)(RF_DagNode_t *); #define RF_EXTRACT_UNLOCK_FLAG(_x_) ((((unsigned) ((unsigned long)(_x_))) >> 5) & 0x1) #define RF_EXTRACT_RU(_x_) ((((unsigned) ((unsigned long)(_x_))) >> 8) & 0xFFFFFF) -#endif /* !_RF__RF_DAGFUNCS_H_ */ +#endif /* !_RF__RF_DAGFUNCS_H_ */ diff --git a/sys/dev/raidframe/rf_dagutils.c b/sys/dev/raidframe/rf_dagutils.c index b050b832af6..b0d41a7c106 100644 --- a/sys/dev/raidframe/rf_dagutils.c +++ b/sys/dev/raidframe/rf_dagutils.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_dagutils.c,v 1.1 1999/01/11 14:29:11 niklas Exp $ */ -/* $NetBSD: rf_dagutils.c,v 1.1 1998/11/13 04:20:28 oster Exp $ */ +/* $OpenBSD: rf_dagutils.c,v 1.2 1999/02/16 00:02:33 niklas Exp $ */ +/* $NetBSD: rf_dagutils.c,v 1.3 1999/02/05 00:06:08 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -33,157 +33,6 @@ * *****************************************************************************/ -/* - * : - * Log: rf_dagutils.c,v - * Revision 1.55 1996/08/22 14:39:47 jimz - * reduce v/k fraction (better load balancing) - * - * Revision 1.54 1996/08/21 04:14:12 jimz - * minor workload shift tweaking - * - * Revision 1.53 1996/08/20 23:41:16 jimz - * fix up workload shift computation - * - * Revision 1.52 1996/08/20 22:34:16 jimz - * first cut at fixing workload shift - * needs work - * - * Revision 1.51 1996/08/20 16:51:16 jimz - * comment more verbosely compute_workload_shift() - * - * Revision 1.50 1996/08/11 00:40:50 jimz - * fix up broken comment - * - * Revision 1.49 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.48 1996/07/27 18:40:01 jimz - * cleanup sweep - * - * Revision 1.47 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.46 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.45 1996/06/17 03:24:59 jimz - * include shutdown.h for define of now-macroized ShutdownCreate - * - * Revision 1.44 1996/06/10 12:50:57 jimz - * Add counters to freelists to track number of allocations, frees, - * grows, max size, etc. Adjust a couple sets of PRIME params based - * on the results. - * - * Revision 1.43 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.42 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.41 1996/06/06 17:28:58 jimz - * make PrintNodeInfoString aware of new mirroring funcs - * - * Revision 1.40 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.39 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.38 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.37 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.36 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.35 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.34 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.33 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.32 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.31 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.30 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.29 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.28 1996/05/16 23:05:52 jimz - * changed InitNode() to use dag_ptrs field of node when appropriate - * (see rf_dag.h or comments within InitNode() for details) - * - * Revision 1.27 1996/05/16 15:37:19 jimz - * convert to RF_FREELIST stuff for dag headers - * - * Revision 1.26 1996/05/08 21:01:24 jimz - * fixed up enum type names that were conflicting with other - * enums and function names (ie, "panic") - * future naming trends will be towards RF_ and rf_ for - * everything raidframe-related - * - * Revision 1.25 1996/05/03 19:56:15 wvcii - * added misc routines from old dag creation files - * - * Revision 1.24 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.23 1995/12/01 15:59:50 root - * added copyright info - * - * Revision 1.22 1995/11/17 15:14:12 wvcii - * PrintDAG now processes DiskReadMirrorFunc nodes - * - * Revision 1.21 1995/11/07 16:22:38 wvcii - * InitNode and InitNodeFromBuf now initialize commit fields - * beefed up ValidateDag - * prettied up PrintDAGList - * - */ - #include "rf_archs.h" #include "rf_types.h" #include "rf_threadstuff.h" @@ -201,16 +50,17 @@ RF_RedFuncs_t rf_xorFuncs = { rf_RegularXorFunc, "Reg Xr", - rf_SimpleXorFunc, "Simple Xr"}; +rf_SimpleXorFunc, "Simple Xr"}; RF_RedFuncs_t rf_xorRecoveryFuncs = { rf_RecoveryXorFunc, "Recovery Xr", - rf_RecoveryXorFunc, "Recovery Xr"}; +rf_RecoveryXorFunc, "Recovery Xr"}; static void rf_RecurPrintDAG(RF_DagNode_t *, int, int); static void rf_PrintDAG(RF_DagHeader_t *); -static int rf_ValidateBranch(RF_DagNode_t *, int *, int *, - RF_DagNode_t **, int ); +static int +rf_ValidateBranch(RF_DagNode_t *, int *, int *, + RF_DagNode_t **, int); static void rf_ValidateBranchVisitedBits(RF_DagNode_t *, int, int); static void rf_ValidateVisitedBits(RF_DagHeader_t *); @@ -222,77 +72,75 @@ static void rf_ValidateVisitedBits(RF_DagHeader_t *); * successors array. * *****************************************************************************/ -void rf_InitNode( - RF_DagNode_t *node, - RF_NodeStatus_t initstatus, - int commit, - int (*doFunc)(RF_DagNode_t *node), - int (*undoFunc)(RF_DagNode_t *node), - int (*wakeFunc)(RF_DagNode_t *node,int status), - int nSucc, - int nAnte, - int nParam, - int nResult, - RF_DagHeader_t *hdr, - char *name, - RF_AllocListElem_t *alist) +void +rf_InitNode( + RF_DagNode_t * node, + RF_NodeStatus_t initstatus, + int commit, + int (*doFunc) (RF_DagNode_t * node), + int (*undoFunc) (RF_DagNode_t * node), + int (*wakeFunc) (RF_DagNode_t * node, int status), + int nSucc, + int nAnte, + int nParam, + int nResult, + RF_DagHeader_t * hdr, + char *name, + RF_AllocListElem_t * alist) { - void **ptrs; - int nptrs; - - if (nAnte > RF_MAX_ANTECEDENTS) - RF_PANIC(); - node->status = initstatus; - node->commitNode = commit; - node->doFunc = doFunc; - node->undoFunc = undoFunc; - node->wakeFunc = wakeFunc; - node->numParams = nParam; - node->numResults = nResult; - node->numAntecedents = nAnte; - node->numAntDone = 0; - node->next = NULL; - node->numSuccedents = nSucc; - node->name = name; - node->dagHdr = hdr; - node->visited = 0; - - /* allocate all the pointers with one call to malloc */ - nptrs = nSucc+nAnte+nResult+nSucc; - - if (nptrs <= RF_DAG_PTRCACHESIZE) { - /* - * The dag_ptrs field of the node is basically some scribble - * space to be used here. We could get rid of it, and always - * allocate the range of pointers, but that's expensive. So, - * we pick a "common case" size for the pointer cache. Hopefully, - * we'll find that: - * (1) Generally, nptrs doesn't exceed RF_DAG_PTRCACHESIZE by - * only a little bit (least efficient case) - * (2) Generally, ntprs isn't a lot less than RF_DAG_PTRCACHESIZE - * (wasted memory) - */ - ptrs = (void **)node->dag_ptrs; - } - else { - RF_CallocAndAdd(ptrs, nptrs, sizeof(void *), (void **), alist); - } - node->succedents = (nSucc) ? (RF_DagNode_t **) ptrs : NULL; - node->antecedents = (nAnte) ? (RF_DagNode_t **) (ptrs+nSucc) : NULL; - node->results = (nResult) ? (void **) (ptrs+nSucc+nAnte) : NULL; - node->propList = (nSucc) ? (RF_PropHeader_t **) (ptrs+nSucc+nAnte+nResult) : NULL; - - if (nParam) { - if (nParam <= RF_DAG_PARAMCACHESIZE) { - node->params = (RF_DagParam_t *)node->dag_params; - } - else { - RF_CallocAndAdd(node->params, nParam, sizeof(RF_DagParam_t), (RF_DagParam_t *), alist); - } - } - else { - node->params = NULL; - } + void **ptrs; + int nptrs; + + if (nAnte > RF_MAX_ANTECEDENTS) + RF_PANIC(); + node->status = initstatus; + node->commitNode = commit; + node->doFunc = doFunc; + node->undoFunc = undoFunc; + node->wakeFunc = wakeFunc; + node->numParams = nParam; + node->numResults = nResult; + node->numAntecedents = nAnte; + node->numAntDone = 0; + node->next = NULL; + node->numSuccedents = nSucc; + node->name = name; + node->dagHdr = hdr; + node->visited = 0; + + /* allocate all the pointers with one call to malloc */ + nptrs = nSucc + nAnte + nResult + nSucc; + + if (nptrs <= RF_DAG_PTRCACHESIZE) { + /* + * The dag_ptrs field of the node is basically some scribble + * space to be used here. We could get rid of it, and always + * allocate the range of pointers, but that's expensive. So, + * we pick a "common case" size for the pointer cache. Hopefully, + * we'll find that: + * (1) Generally, nptrs doesn't exceed RF_DAG_PTRCACHESIZE by + * only a little bit (least efficient case) + * (2) Generally, ntprs isn't a lot less than RF_DAG_PTRCACHESIZE + * (wasted memory) + */ + ptrs = (void **) node->dag_ptrs; + } else { + RF_CallocAndAdd(ptrs, nptrs, sizeof(void *), (void **), alist); + } + node->succedents = (nSucc) ? (RF_DagNode_t **) ptrs : NULL; + node->antecedents = (nAnte) ? (RF_DagNode_t **) (ptrs + nSucc) : NULL; + node->results = (nResult) ? (void **) (ptrs + nSucc + nAnte) : NULL; + node->propList = (nSucc) ? (RF_PropHeader_t **) (ptrs + nSucc + nAnte + nResult) : NULL; + + if (nParam) { + if (nParam <= RF_DAG_PARAMCACHESIZE) { + node->params = (RF_DagParam_t *) node->dag_params; + } else { + RF_CallocAndAdd(node->params, nParam, sizeof(RF_DagParam_t), (RF_DagParam_t *), alist); + } + } else { + node->params = NULL; + } } @@ -303,58 +151,60 @@ void rf_InitNode( * *****************************************************************************/ -void rf_FreeDAG(dag_h) - RF_DagHeader_t *dag_h; +void +rf_FreeDAG(dag_h) + RF_DagHeader_t *dag_h; { - RF_AccessStripeMapHeader_t *asmap, *t_asmap; - RF_DagHeader_t *nextDag; - int i; - - while (dag_h) { - nextDag = dag_h->next; - for (i=0; dag_h->memChunk[i] && i < RF_MAXCHUNKS; i++) { - /* release mem chunks */ - rf_ReleaseMemChunk(dag_h->memChunk[i]); - dag_h->memChunk[i] = NULL; - } - - RF_ASSERT(i == dag_h->chunkIndex); - if (dag_h->xtraChunkCnt > 0) { - /* free xtraMemChunks */ - for (i=0; dag_h->xtraMemChunk[i] && i < dag_h->xtraChunkIndex; i++) { - rf_ReleaseMemChunk(dag_h->xtraMemChunk[i]); - dag_h->xtraMemChunk[i] = NULL; - } - RF_ASSERT(i == dag_h->xtraChunkIndex); - /* free ptrs to xtraMemChunks */ - RF_Free(dag_h->xtraMemChunk, dag_h->xtraChunkCnt * sizeof(RF_ChunkDesc_t *)); - } - rf_FreeAllocList(dag_h->allocList); - for (asmap = dag_h->asmList; asmap;) { - t_asmap = asmap; - asmap = asmap->next; - rf_FreeAccessStripeMap(t_asmap); - } - rf_FreeDAGHeader(dag_h); - dag_h = nextDag; - } + RF_AccessStripeMapHeader_t *asmap, *t_asmap; + RF_DagHeader_t *nextDag; + int i; + + while (dag_h) { + nextDag = dag_h->next; + for (i = 0; dag_h->memChunk[i] && i < RF_MAXCHUNKS; i++) { + /* release mem chunks */ + rf_ReleaseMemChunk(dag_h->memChunk[i]); + dag_h->memChunk[i] = NULL; + } + + RF_ASSERT(i == dag_h->chunkIndex); + if (dag_h->xtraChunkCnt > 0) { + /* free xtraMemChunks */ + for (i = 0; dag_h->xtraMemChunk[i] && i < dag_h->xtraChunkIndex; i++) { + rf_ReleaseMemChunk(dag_h->xtraMemChunk[i]); + dag_h->xtraMemChunk[i] = NULL; + } + RF_ASSERT(i == dag_h->xtraChunkIndex); + /* free ptrs to xtraMemChunks */ + RF_Free(dag_h->xtraMemChunk, dag_h->xtraChunkCnt * sizeof(RF_ChunkDesc_t *)); + } + rf_FreeAllocList(dag_h->allocList); + for (asmap = dag_h->asmList; asmap;) { + t_asmap = asmap; + asmap = asmap->next; + rf_FreeAccessStripeMap(t_asmap); + } + rf_FreeDAGHeader(dag_h); + dag_h = nextDag; + } } -RF_PropHeader_t *rf_MakePropListEntry( - RF_DagHeader_t *dag_h, - int resultNum, - int paramNum, - RF_PropHeader_t *next, - RF_AllocListElem_t *allocList) +RF_PropHeader_t * +rf_MakePropListEntry( + RF_DagHeader_t * dag_h, + int resultNum, + int paramNum, + RF_PropHeader_t * next, + RF_AllocListElem_t * allocList) { - RF_PropHeader_t *p; - - RF_CallocAndAdd(p, 1, sizeof(RF_PropHeader_t), - (RF_PropHeader_t *), allocList); - p->resultNum = resultNum; - p->paramNum = paramNum; - p->next = next; - return(p); + RF_PropHeader_t *p; + + RF_CallocAndAdd(p, 1, sizeof(RF_PropHeader_t), + (RF_PropHeader_t *), allocList); + p->resultNum = resultNum; + p->paramNum = paramNum; + p->next = next; + return (p); } static RF_FreeList_t *rf_dagh_freelist; @@ -364,401 +214,409 @@ static RF_FreeList_t *rf_dagh_freelist; #define RF_DAGH_INITIAL 32 static void rf_ShutdownDAGs(void *); -static void rf_ShutdownDAGs(ignored) - void *ignored; +static void +rf_ShutdownDAGs(ignored) + void *ignored; { - RF_FREELIST_DESTROY(rf_dagh_freelist,next,(RF_DagHeader_t *)); + RF_FREELIST_DESTROY(rf_dagh_freelist, next, (RF_DagHeader_t *)); } -int rf_ConfigureDAGs(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureDAGs(listp) + RF_ShutdownList_t **listp; { - int rc; + int rc; RF_FREELIST_CREATE(rf_dagh_freelist, RF_MAX_FREE_DAGH, - RF_DAGH_INC, sizeof(RF_DagHeader_t)); + RF_DAGH_INC, sizeof(RF_DagHeader_t)); if (rf_dagh_freelist == NULL) - return(ENOMEM); + return (ENOMEM); rc = rf_ShutdownCreate(listp, rf_ShutdownDAGs, NULL); if (rc) { RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); + __FILE__, __LINE__, rc); rf_ShutdownDAGs(NULL); - return(rc); + return (rc); } - RF_FREELIST_PRIME(rf_dagh_freelist, RF_DAGH_INITIAL,next, - (RF_DagHeader_t *)); - return(0); + RF_FREELIST_PRIME(rf_dagh_freelist, RF_DAGH_INITIAL, next, + (RF_DagHeader_t *)); + return (0); } -RF_DagHeader_t *rf_AllocDAGHeader() +RF_DagHeader_t * +rf_AllocDAGHeader() { RF_DagHeader_t *dh; - RF_FREELIST_GET(rf_dagh_freelist,dh,next,(RF_DagHeader_t *)); + RF_FREELIST_GET(rf_dagh_freelist, dh, next, (RF_DagHeader_t *)); if (dh) { - bzero((char *)dh, sizeof(RF_DagHeader_t)); + bzero((char *) dh, sizeof(RF_DagHeader_t)); } - return(dh); + return (dh); } -void rf_FreeDAGHeader(RF_DagHeader_t *dh) +void +rf_FreeDAGHeader(RF_DagHeader_t * dh) { - RF_FREELIST_FREE(rf_dagh_freelist,dh,next); + RF_FREELIST_FREE(rf_dagh_freelist, dh, next); } - /* allocates a buffer big enough to hold the data described by pda */ -void *rf_AllocBuffer( - RF_Raid_t *raidPtr, - RF_DagHeader_t *dag_h, - RF_PhysDiskAddr_t *pda, - RF_AllocListElem_t *allocList) +void * +rf_AllocBuffer( + RF_Raid_t * raidPtr, + RF_DagHeader_t * dag_h, + RF_PhysDiskAddr_t * pda, + RF_AllocListElem_t * allocList) { - char *p; + char *p; - RF_MallocAndAdd(p, pda->numSector << raidPtr->logBytesPerSector, - (char *), allocList); - return((void *)p); + RF_MallocAndAdd(p, pda->numSector << raidPtr->logBytesPerSector, + (char *), allocList); + return ((void *) p); } - /****************************************************************************** * * debug routines * *****************************************************************************/ -char *rf_NodeStatusString(RF_DagNode_t *node) +char * +rf_NodeStatusString(RF_DagNode_t * node) { - switch (node->status) { - case rf_wait: return("wait"); - case rf_fired: return("fired"); - case rf_good: return("good"); - case rf_bad: return("bad"); - default: return("?"); - } + switch (node->status) { + case rf_wait:return ("wait"); + case rf_fired: + return ("fired"); + case rf_good: + return ("good"); + case rf_bad: + return ("bad"); + default: + return ("?"); + } } -void rf_PrintNodeInfoString(RF_DagNode_t *node) +void +rf_PrintNodeInfoString(RF_DagNode_t * node) { - RF_PhysDiskAddr_t *pda; - int (*df)(RF_DagNode_t *) = node->doFunc; - int i, lk, unlk; - void *bufPtr; - - if ((df==rf_DiskReadFunc) || (df==rf_DiskWriteFunc) - || (df==rf_DiskReadMirrorIdleFunc) - || (df == rf_DiskReadMirrorPartitionFunc)) - { - pda = (RF_PhysDiskAddr_t *)node->params[0].p; - bufPtr = (void *)node->params[1].p; - lk = RF_EXTRACT_LOCK_FLAG(node->params[3].v); - unlk = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); - RF_ASSERT( !(lk && unlk) ); - printf("r %d c %d offs %ld nsect %d buf 0x%lx %s\n", pda->row, pda->col, - (long)pda->startSector, (int) pda->numSector, (long)bufPtr, - (lk) ? "LOCK" : ((unlk) ? "UNLK" : " ")); - return; - } - - if (df == rf_DiskUnlockFunc) { - pda = (RF_PhysDiskAddr_t *)node->params[0].p; - lk = RF_EXTRACT_LOCK_FLAG(node->params[3].v); - unlk = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); - RF_ASSERT( !(lk && unlk) ); - printf("r %d c %d %s\n", pda->row, pda->col, - (lk) ? "LOCK" : ((unlk) ? "UNLK" : "nop")); - return; - } - - if ((df==rf_SimpleXorFunc) || (df==rf_RegularXorFunc) - || (df==rf_RecoveryXorFunc)) - { - printf("result buf 0x%lx\n",(long) node->results[0]); - for (i=0; i<node->numParams-1; i+=2) { - pda = (RF_PhysDiskAddr_t *)node->params[i].p; - bufPtr = (RF_PhysDiskAddr_t *)node->params[i+1].p; - printf(" buf 0x%lx r%d c%d offs %ld nsect %d\n", - (long)bufPtr, pda->row, pda->col, - (long)pda->startSector, (int)pda->numSector); - } - return; - } - + RF_PhysDiskAddr_t *pda; + int (*df) (RF_DagNode_t *) = node->doFunc; + int i, lk, unlk; + void *bufPtr; + + if ((df == rf_DiskReadFunc) || (df == rf_DiskWriteFunc) + || (df == rf_DiskReadMirrorIdleFunc) + || (df == rf_DiskReadMirrorPartitionFunc)) { + pda = (RF_PhysDiskAddr_t *) node->params[0].p; + bufPtr = (void *) node->params[1].p; + lk = RF_EXTRACT_LOCK_FLAG(node->params[3].v); + unlk = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); + RF_ASSERT(!(lk && unlk)); + printf("r %d c %d offs %ld nsect %d buf 0x%lx %s\n", pda->row, pda->col, + (long) pda->startSector, (int) pda->numSector, (long) bufPtr, + (lk) ? "LOCK" : ((unlk) ? "UNLK" : " ")); + return; + } + if (df == rf_DiskUnlockFunc) { + pda = (RF_PhysDiskAddr_t *) node->params[0].p; + lk = RF_EXTRACT_LOCK_FLAG(node->params[3].v); + unlk = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); + RF_ASSERT(!(lk && unlk)); + printf("r %d c %d %s\n", pda->row, pda->col, + (lk) ? "LOCK" : ((unlk) ? "UNLK" : "nop")); + return; + } + if ((df == rf_SimpleXorFunc) || (df == rf_RegularXorFunc) + || (df == rf_RecoveryXorFunc)) { + printf("result buf 0x%lx\n", (long) node->results[0]); + for (i = 0; i < node->numParams - 1; i += 2) { + pda = (RF_PhysDiskAddr_t *) node->params[i].p; + bufPtr = (RF_PhysDiskAddr_t *) node->params[i + 1].p; + printf(" buf 0x%lx r%d c%d offs %ld nsect %d\n", + (long) bufPtr, pda->row, pda->col, + (long) pda->startSector, (int) pda->numSector); + } + return; + } #if RF_INCLUDE_PARITYLOGGING > 0 - if (df==rf_ParityLogOverwriteFunc || df==rf_ParityLogUpdateFunc) { - for (i=0; i<node->numParams-1; i+=2) { - pda = (RF_PhysDiskAddr_t *)node->params[i].p; - bufPtr = (RF_PhysDiskAddr_t *)node->params[i+1].p; - printf(" r%d c%d offs %ld nsect %d buf 0x%lx\n", - pda->row, pda->col, (long) pda->startSector, - (int) pda->numSector, (long) bufPtr); - } - return; - } -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ - - if ((df==rf_TerminateFunc) || (df==rf_NullNodeFunc)) { - printf("\n"); - return; - } - - printf("?\n"); + if (df == rf_ParityLogOverwriteFunc || df == rf_ParityLogUpdateFunc) { + for (i = 0; i < node->numParams - 1; i += 2) { + pda = (RF_PhysDiskAddr_t *) node->params[i].p; + bufPtr = (RF_PhysDiskAddr_t *) node->params[i + 1].p; + printf(" r%d c%d offs %ld nsect %d buf 0x%lx\n", + pda->row, pda->col, (long) pda->startSector, + (int) pda->numSector, (long) bufPtr); + } + return; + } +#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ + + if ((df == rf_TerminateFunc) || (df == rf_NullNodeFunc)) { + printf("\n"); + return; + } + printf("?\n"); } -static void rf_RecurPrintDAG(node, depth, unvisited) - RF_DagNode_t *node; - int depth; - int unvisited; +static void +rf_RecurPrintDAG(node, depth, unvisited) + RF_DagNode_t *node; + int depth; + int unvisited; { - char *anttype; - int i; - - node->visited = (unvisited) ? 0 : 1; - printf("(%d) %d C%d %s: %s,s%d %d/%d,a%d/%d,p%d,r%d S{", depth, - node->nodeNum, node->commitNode, node->name, rf_NodeStatusString(node), - node->numSuccedents, node->numSuccFired, node->numSuccDone, - node->numAntecedents, node->numAntDone, node->numParams,node->numResults); - for (i=0; i<node->numSuccedents; i++) { - printf("%d%s", node->succedents[i]->nodeNum, - ((i==node->numSuccedents-1) ? "\0" : " ")); - } - printf("} A{"); - for (i=0; i<node->numAntecedents; i++) { - switch (node->antType[i]) { - case rf_trueData : - anttype = "T"; - break; - case rf_antiData : - anttype = "A"; - break; - case rf_outputData : - anttype = "O"; - break; - case rf_control : - anttype = "C"; - break; - default : - anttype = "?"; - break; - } - printf("%d(%s)%s", node->antecedents[i]->nodeNum, anttype, (i==node->numAntecedents-1) ? "\0" : " "); - } - printf("}; "); - rf_PrintNodeInfoString(node); - for (i=0; i<node->numSuccedents; i++) { - if (node->succedents[i]->visited == unvisited) - rf_RecurPrintDAG(node->succedents[i], depth+1, unvisited); - } + char *anttype; + int i; + + node->visited = (unvisited) ? 0 : 1; + printf("(%d) %d C%d %s: %s,s%d %d/%d,a%d/%d,p%d,r%d S{", depth, + node->nodeNum, node->commitNode, node->name, rf_NodeStatusString(node), + node->numSuccedents, node->numSuccFired, node->numSuccDone, + node->numAntecedents, node->numAntDone, node->numParams, node->numResults); + for (i = 0; i < node->numSuccedents; i++) { + printf("%d%s", node->succedents[i]->nodeNum, + ((i == node->numSuccedents - 1) ? "\0" : " ")); + } + printf("} A{"); + for (i = 0; i < node->numAntecedents; i++) { + switch (node->antType[i]) { + case rf_trueData: + anttype = "T"; + break; + case rf_antiData: + anttype = "A"; + break; + case rf_outputData: + anttype = "O"; + break; + case rf_control: + anttype = "C"; + break; + default: + anttype = "?"; + break; + } + printf("%d(%s)%s", node->antecedents[i]->nodeNum, anttype, (i == node->numAntecedents - 1) ? "\0" : " "); + } + printf("}; "); + rf_PrintNodeInfoString(node); + for (i = 0; i < node->numSuccedents; i++) { + if (node->succedents[i]->visited == unvisited) + rf_RecurPrintDAG(node->succedents[i], depth + 1, unvisited); + } } -static void rf_PrintDAG(dag_h) - RF_DagHeader_t *dag_h; +static void +rf_PrintDAG(dag_h) + RF_DagHeader_t *dag_h; { - int unvisited, i; - char *status; - - /* set dag status */ - switch (dag_h->status) { - case rf_enable : - status = "enable"; - break; - case rf_rollForward : - status = "rollForward"; - break; - case rf_rollBackward : - status = "rollBackward"; - break; - default : - status = "illegal!"; - break; - } - /* find out if visited bits are currently set or clear */ - unvisited = dag_h->succedents[0]->visited; - - printf("DAG type: %s\n", dag_h->creator); - printf("format is (depth) num commit type: status,nSucc nSuccFired/nSuccDone,nAnte/nAnteDone,nParam,nResult S{x} A{x(type)}; info\n"); - printf("(0) %d Hdr: %s, s%d, (commit %d/%d) S{", dag_h->nodeNum, - status, dag_h->numSuccedents, dag_h->numCommitNodes, dag_h->numCommits); - for (i=0; i<dag_h->numSuccedents; i++) { - printf("%d%s", dag_h->succedents[i]->nodeNum, - ((i==dag_h->numSuccedents-1) ? "\0" : " ")); - } - printf("};\n"); - for (i=0; i<dag_h->numSuccedents; i++) { - if (dag_h->succedents[i]->visited == unvisited) - rf_RecurPrintDAG(dag_h->succedents[i], 1, unvisited); - } + int unvisited, i; + char *status; + + /* set dag status */ + switch (dag_h->status) { + case rf_enable: + status = "enable"; + break; + case rf_rollForward: + status = "rollForward"; + break; + case rf_rollBackward: + status = "rollBackward"; + break; + default: + status = "illegal!"; + break; + } + /* find out if visited bits are currently set or clear */ + unvisited = dag_h->succedents[0]->visited; + + printf("DAG type: %s\n", dag_h->creator); + printf("format is (depth) num commit type: status,nSucc nSuccFired/nSuccDone,nAnte/nAnteDone,nParam,nResult S{x} A{x(type)}; info\n"); + printf("(0) %d Hdr: %s, s%d, (commit %d/%d) S{", dag_h->nodeNum, + status, dag_h->numSuccedents, dag_h->numCommitNodes, dag_h->numCommits); + for (i = 0; i < dag_h->numSuccedents; i++) { + printf("%d%s", dag_h->succedents[i]->nodeNum, + ((i == dag_h->numSuccedents - 1) ? "\0" : " ")); + } + printf("};\n"); + for (i = 0; i < dag_h->numSuccedents; i++) { + if (dag_h->succedents[i]->visited == unvisited) + rf_RecurPrintDAG(dag_h->succedents[i], 1, unvisited); + } } - /* assigns node numbers */ -int rf_AssignNodeNums(RF_DagHeader_t *dag_h) +int +rf_AssignNodeNums(RF_DagHeader_t * dag_h) { - int unvisited, i, nnum; - RF_DagNode_t *node; - - nnum = 0; - unvisited = dag_h->succedents[0]->visited; - - dag_h->nodeNum = nnum++; - for (i=0; i<dag_h->numSuccedents; i++) { - node = dag_h->succedents[i]; - if (node->visited == unvisited) { - nnum = rf_RecurAssignNodeNums(dag_h->succedents[i], nnum, unvisited); - } - } - return(nnum); + int unvisited, i, nnum; + RF_DagNode_t *node; + + nnum = 0; + unvisited = dag_h->succedents[0]->visited; + + dag_h->nodeNum = nnum++; + for (i = 0; i < dag_h->numSuccedents; i++) { + node = dag_h->succedents[i]; + if (node->visited == unvisited) { + nnum = rf_RecurAssignNodeNums(dag_h->succedents[i], nnum, unvisited); + } + } + return (nnum); } -int rf_RecurAssignNodeNums(node, num, unvisited) - RF_DagNode_t *node; - int num; - int unvisited; +int +rf_RecurAssignNodeNums(node, num, unvisited) + RF_DagNode_t *node; + int num; + int unvisited; { - int i; + int i; - node->visited = (unvisited) ? 0 : 1; + node->visited = (unvisited) ? 0 : 1; - node->nodeNum = num++; - for (i=0; i<node->numSuccedents; i++) { - if (node->succedents[i]->visited == unvisited) { - num = rf_RecurAssignNodeNums(node->succedents[i], num, unvisited); - } - } - return(num); + node->nodeNum = num++; + for (i = 0; i < node->numSuccedents; i++) { + if (node->succedents[i]->visited == unvisited) { + num = rf_RecurAssignNodeNums(node->succedents[i], num, unvisited); + } + } + return (num); } - /* set the header pointers in each node to "newptr" */ -void rf_ResetDAGHeaderPointers(dag_h, newptr) - RF_DagHeader_t *dag_h; - RF_DagHeader_t *newptr; +void +rf_ResetDAGHeaderPointers(dag_h, newptr) + RF_DagHeader_t *dag_h; + RF_DagHeader_t *newptr; { - int i; - for (i=0; i<dag_h->numSuccedents; i++) - if (dag_h->succedents[i]->dagHdr != newptr) - rf_RecurResetDAGHeaderPointers(dag_h->succedents[i], newptr); + int i; + for (i = 0; i < dag_h->numSuccedents; i++) + if (dag_h->succedents[i]->dagHdr != newptr) + rf_RecurResetDAGHeaderPointers(dag_h->succedents[i], newptr); } -void rf_RecurResetDAGHeaderPointers(node, newptr) - RF_DagNode_t *node; - RF_DagHeader_t *newptr; +void +rf_RecurResetDAGHeaderPointers(node, newptr) + RF_DagNode_t *node; + RF_DagHeader_t *newptr; { - int i; - node->dagHdr = newptr; - for (i=0; i<node->numSuccedents; i++) - if (node->succedents[i]->dagHdr != newptr) - rf_RecurResetDAGHeaderPointers(node->succedents[i], newptr); + int i; + node->dagHdr = newptr; + for (i = 0; i < node->numSuccedents; i++) + if (node->succedents[i]->dagHdr != newptr) + rf_RecurResetDAGHeaderPointers(node->succedents[i], newptr); } -void rf_PrintDAGList(RF_DagHeader_t *dag_h) +void +rf_PrintDAGList(RF_DagHeader_t * dag_h) { - int i=0; + int i = 0; - for (; dag_h; dag_h=dag_h->next) { - rf_AssignNodeNums(dag_h); - printf("\n\nDAG %d IN LIST:\n",i++); - rf_PrintDAG(dag_h); - } + for (; dag_h; dag_h = dag_h->next) { + rf_AssignNodeNums(dag_h); + printf("\n\nDAG %d IN LIST:\n", i++); + rf_PrintDAG(dag_h); + } } -static int rf_ValidateBranch(node, scount, acount, nodes, unvisited) - RF_DagNode_t *node; - int *scount; - int *acount; - RF_DagNode_t **nodes; - int unvisited; +static int +rf_ValidateBranch(node, scount, acount, nodes, unvisited) + RF_DagNode_t *node; + int *scount; + int *acount; + RF_DagNode_t **nodes; + int unvisited; { - int i, retcode = 0; - - /* construct an array of node pointers indexed by node num */ - node->visited = (unvisited) ? 0 : 1; - nodes[ node->nodeNum ] = node; - - if (node->next != NULL) { - printf("INVALID DAG: next pointer in node is not NULL\n"); - retcode = 1; - } - if (node->status != rf_wait) { - printf("INVALID DAG: Node status is not wait\n"); - retcode = 1; - } - if (node->numAntDone != 0) { - printf("INVALID DAG: numAntDone is not zero\n"); - retcode = 1; - } - if (node->doFunc == rf_TerminateFunc) { - if (node->numSuccedents != 0) { - printf("INVALID DAG: Terminator node has succedents\n"); - retcode = 1; - } - } else { - if (node->numSuccedents == 0) { - printf("INVALID DAG: Non-terminator node has no succedents\n"); - retcode = 1; - } - } - for (i=0; i<node->numSuccedents; i++) { - if (!node->succedents[i]) { - printf("INVALID DAG: succedent %d of node %s is NULL\n",i,node->name); - retcode = 1; - } - scount[ node->succedents[i]->nodeNum ]++; - } - for (i=0; i<node->numAntecedents; i++) { - if (!node->antecedents[i]) { - printf("INVALID DAG: antecedent %d of node %s is NULL\n",i,node->name); - retcode = 1; - } - acount[ node->antecedents[i]->nodeNum ]++; - } - for (i=0; i<node->numSuccedents; i++) { - if (node->succedents[i]->visited == unvisited) { - if (rf_ValidateBranch(node->succedents[i], scount, - acount, nodes, unvisited)) - { - retcode = 1; - } - } - } - return(retcode); + int i, retcode = 0; + + /* construct an array of node pointers indexed by node num */ + node->visited = (unvisited) ? 0 : 1; + nodes[node->nodeNum] = node; + + if (node->next != NULL) { + printf("INVALID DAG: next pointer in node is not NULL\n"); + retcode = 1; + } + if (node->status != rf_wait) { + printf("INVALID DAG: Node status is not wait\n"); + retcode = 1; + } + if (node->numAntDone != 0) { + printf("INVALID DAG: numAntDone is not zero\n"); + retcode = 1; + } + if (node->doFunc == rf_TerminateFunc) { + if (node->numSuccedents != 0) { + printf("INVALID DAG: Terminator node has succedents\n"); + retcode = 1; + } + } else { + if (node->numSuccedents == 0) { + printf("INVALID DAG: Non-terminator node has no succedents\n"); + retcode = 1; + } + } + for (i = 0; i < node->numSuccedents; i++) { + if (!node->succedents[i]) { + printf("INVALID DAG: succedent %d of node %s is NULL\n", i, node->name); + retcode = 1; + } + scount[node->succedents[i]->nodeNum]++; + } + for (i = 0; i < node->numAntecedents; i++) { + if (!node->antecedents[i]) { + printf("INVALID DAG: antecedent %d of node %s is NULL\n", i, node->name); + retcode = 1; + } + acount[node->antecedents[i]->nodeNum]++; + } + for (i = 0; i < node->numSuccedents; i++) { + if (node->succedents[i]->visited == unvisited) { + if (rf_ValidateBranch(node->succedents[i], scount, + acount, nodes, unvisited)) { + retcode = 1; + } + } + } + return (retcode); } -static void rf_ValidateBranchVisitedBits(node, unvisited, rl) - RF_DagNode_t *node; - int unvisited; - int rl; +static void +rf_ValidateBranchVisitedBits(node, unvisited, rl) + RF_DagNode_t *node; + int unvisited; + int rl; { - int i; - - RF_ASSERT(node->visited == unvisited); - for (i=0; i<node->numSuccedents; i++) { - if (node->succedents[i] == NULL) { - printf("node=%lx node->succedents[%d] is NULL\n", (long)node, i); - RF_ASSERT(0); - } - rf_ValidateBranchVisitedBits(node->succedents[i],unvisited, rl+1); - } + int i; + + RF_ASSERT(node->visited == unvisited); + for (i = 0; i < node->numSuccedents; i++) { + if (node->succedents[i] == NULL) { + printf("node=%lx node->succedents[%d] is NULL\n", (long) node, i); + RF_ASSERT(0); + } + rf_ValidateBranchVisitedBits(node->succedents[i], unvisited, rl + 1); + } } - /* NOTE: never call this on a big dag, because it is exponential * in execution time */ -static void rf_ValidateVisitedBits(dag) - RF_DagHeader_t *dag; +static void +rf_ValidateVisitedBits(dag) + RF_DagHeader_t *dag; { - int i, unvisited; + int i, unvisited; - unvisited = dag->succedents[0]->visited; + unvisited = dag->succedents[0]->visited; - for (i=0; i<dag->numSuccedents; i++) { - if (dag->succedents[i] == NULL) { - printf("dag=%lx dag->succedents[%d] is NULL\n", (long) dag, i); - RF_ASSERT(0); - } - rf_ValidateBranchVisitedBits(dag->succedents[i],unvisited,0); - } + for (i = 0; i < dag->numSuccedents; i++) { + if (dag->succedents[i] == NULL) { + printf("dag=%lx dag->succedents[%d] is NULL\n", (long) dag, i); + RF_ASSERT(0); + } + rf_ValidateBranchVisitedBits(dag->succedents[i], unvisited, 0); + } } - /* validate a DAG. _at entry_ verify that: * -- numNodesCompleted is zero * -- node queue is null @@ -775,101 +633,104 @@ static void rf_ValidateVisitedBits(dag) * is equal to the succedent count on that node * -- what else? */ -int rf_ValidateDAG(dag_h) - RF_DagHeader_t *dag_h; +int +rf_ValidateDAG(dag_h) + RF_DagHeader_t *dag_h; { - int i, nodecount; - int *scount, *acount; /* per-node successor and antecedent counts */ - RF_DagNode_t **nodes; /* array of ptrs to nodes in dag */ - int retcode = 0; - int unvisited; - int commitNodeCount = 0; - - if (rf_validateVisitedDebug) - rf_ValidateVisitedBits(dag_h); - - if (dag_h->numNodesCompleted != 0) { - printf("INVALID DAG: num nodes completed is %d, should be 0\n",dag_h->numNodesCompleted); - retcode = 1; goto validate_dag_bad; - } - if (dag_h->status != rf_enable) { - printf("INVALID DAG: not enabled\n"); - retcode = 1; goto validate_dag_bad; - } - if (dag_h->numCommits != 0) { - printf("INVALID DAG: numCommits != 0 (%d)\n",dag_h->numCommits); - retcode = 1; goto validate_dag_bad; - } - if (dag_h->numSuccedents != 1) { - /* currently, all dags must have only one succedent */ - printf("INVALID DAG: numSuccedents !1 (%d)\n",dag_h->numSuccedents); - retcode = 1; goto validate_dag_bad; - } - nodecount = rf_AssignNodeNums(dag_h); - - unvisited = dag_h->succedents[0]->visited; - - RF_Calloc(scount, nodecount, sizeof(int), (int *)); - RF_Calloc(acount, nodecount, sizeof(int), (int *)); - RF_Calloc(nodes, nodecount, sizeof(RF_DagNode_t *), (RF_DagNode_t **)); - for (i=0; i<dag_h->numSuccedents; i++) { - if ((dag_h->succedents[i]->visited == unvisited) - && rf_ValidateBranch(dag_h->succedents[i], scount, - acount, nodes, unvisited)) - { - retcode = 1; - } - } - /* start at 1 to skip the header node */ - for (i=1; i<nodecount; i++) { - if ( nodes[i]->commitNode ) - commitNodeCount++; - if ( nodes[i]->doFunc == NULL ) { - printf("INVALID DAG: node %s has an undefined doFunc\n", nodes[i]->name); - retcode = 1; - goto validate_dag_out; - } - if ( nodes[i]->undoFunc == NULL ) { - printf("INVALID DAG: node %s has an undefined doFunc\n", nodes[i]->name); - retcode = 1; - goto validate_dag_out; - } - if ( nodes[i]->numAntecedents != scount[ nodes[i]->nodeNum ] ) { - printf("INVALID DAG: node %s has %d antecedents but appears as a succedent %d times\n", - nodes[i]->name, nodes[i]->numAntecedents, scount[nodes[i]->nodeNum]); - retcode = 1; - goto validate_dag_out; - } - if ( nodes[i]->numSuccedents != acount[ nodes[i]->nodeNum ] ) { - printf("INVALID DAG: node %s has %d succedents but appears as an antecedent %d times\n", - nodes[i]->name, nodes[i]->numSuccedents, acount[nodes[i]->nodeNum]); - retcode = 1; - goto validate_dag_out; - } - } - - if ( dag_h->numCommitNodes != commitNodeCount ) { - printf("INVALID DAG: incorrect commit node count. hdr->numCommitNodes (%d) found (%d) commit nodes in graph\n", - dag_h->numCommitNodes, commitNodeCount); - retcode = 1; - goto validate_dag_out; - } + int i, nodecount; + int *scount, *acount;/* per-node successor and antecedent counts */ + RF_DagNode_t **nodes; /* array of ptrs to nodes in dag */ + int retcode = 0; + int unvisited; + int commitNodeCount = 0; + + if (rf_validateVisitedDebug) + rf_ValidateVisitedBits(dag_h); + + if (dag_h->numNodesCompleted != 0) { + printf("INVALID DAG: num nodes completed is %d, should be 0\n", dag_h->numNodesCompleted); + retcode = 1; + goto validate_dag_bad; + } + if (dag_h->status != rf_enable) { + printf("INVALID DAG: not enabled\n"); + retcode = 1; + goto validate_dag_bad; + } + if (dag_h->numCommits != 0) { + printf("INVALID DAG: numCommits != 0 (%d)\n", dag_h->numCommits); + retcode = 1; + goto validate_dag_bad; + } + if (dag_h->numSuccedents != 1) { + /* currently, all dags must have only one succedent */ + printf("INVALID DAG: numSuccedents !1 (%d)\n", dag_h->numSuccedents); + retcode = 1; + goto validate_dag_bad; + } + nodecount = rf_AssignNodeNums(dag_h); + + unvisited = dag_h->succedents[0]->visited; + + RF_Calloc(scount, nodecount, sizeof(int), (int *)); + RF_Calloc(acount, nodecount, sizeof(int), (int *)); + RF_Calloc(nodes, nodecount, sizeof(RF_DagNode_t *), (RF_DagNode_t **)); + for (i = 0; i < dag_h->numSuccedents; i++) { + if ((dag_h->succedents[i]->visited == unvisited) + && rf_ValidateBranch(dag_h->succedents[i], scount, + acount, nodes, unvisited)) { + retcode = 1; + } + } + /* start at 1 to skip the header node */ + for (i = 1; i < nodecount; i++) { + if (nodes[i]->commitNode) + commitNodeCount++; + if (nodes[i]->doFunc == NULL) { + printf("INVALID DAG: node %s has an undefined doFunc\n", nodes[i]->name); + retcode = 1; + goto validate_dag_out; + } + if (nodes[i]->undoFunc == NULL) { + printf("INVALID DAG: node %s has an undefined doFunc\n", nodes[i]->name); + retcode = 1; + goto validate_dag_out; + } + if (nodes[i]->numAntecedents != scount[nodes[i]->nodeNum]) { + printf("INVALID DAG: node %s has %d antecedents but appears as a succedent %d times\n", + nodes[i]->name, nodes[i]->numAntecedents, scount[nodes[i]->nodeNum]); + retcode = 1; + goto validate_dag_out; + } + if (nodes[i]->numSuccedents != acount[nodes[i]->nodeNum]) { + printf("INVALID DAG: node %s has %d succedents but appears as an antecedent %d times\n", + nodes[i]->name, nodes[i]->numSuccedents, acount[nodes[i]->nodeNum]); + retcode = 1; + goto validate_dag_out; + } + } + if (dag_h->numCommitNodes != commitNodeCount) { + printf("INVALID DAG: incorrect commit node count. hdr->numCommitNodes (%d) found (%d) commit nodes in graph\n", + dag_h->numCommitNodes, commitNodeCount); + retcode = 1; + goto validate_dag_out; + } validate_dag_out: - RF_Free(scount, nodecount*sizeof(int)); - RF_Free(acount, nodecount*sizeof(int)); - RF_Free(nodes, nodecount*sizeof(RF_DagNode_t *)); - if (retcode) - rf_PrintDAGList(dag_h); - - if (rf_validateVisitedDebug) - rf_ValidateVisitedBits(dag_h); - - return(retcode); + RF_Free(scount, nodecount * sizeof(int)); + RF_Free(acount, nodecount * sizeof(int)); + RF_Free(nodes, nodecount * sizeof(RF_DagNode_t *)); + if (retcode) + rf_PrintDAGList(dag_h); + + if (rf_validateVisitedDebug) + rf_ValidateVisitedBits(dag_h); + + return (retcode); validate_dag_bad: - rf_PrintDAGList(dag_h); - return(retcode); + rf_PrintDAGList(dag_h); + return (retcode); } @@ -879,52 +740,52 @@ validate_dag_bad: * *****************************************************************************/ -void rf_redirect_asm( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap) +void +rf_redirect_asm( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap) { - int ds = (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) ? 1 : 0; - int row = asmap->physInfo->row; - int fcol = raidPtr->reconControl[row]->fcol; - int srow = raidPtr->reconControl[row]->spareRow; - int scol = raidPtr->reconControl[row]->spareCol; - RF_PhysDiskAddr_t *pda; - - RF_ASSERT( raidPtr->status[row] == rf_rs_reconstructing ); - for (pda = asmap->physInfo; pda; pda=pda->next) { - if (pda->col == fcol) { - if (rf_dagDebug) { - if (!rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, - pda->startSector)) - { - RF_PANIC(); - } - } - /*printf("Remapped data for large write\n");*/ - if (ds) { - raidPtr->Layout.map->MapSector(raidPtr, pda->raidAddress, - &pda->row, &pda->col, &pda->startSector, RF_REMAP); - } - else { - pda->row = srow; pda->col = scol; - } - } - } - for (pda = asmap->parityInfo; pda; pda=pda->next) { - if (pda->col == fcol) { - if (rf_dagDebug) { - if (!rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, pda->startSector)) { - RF_PANIC(); - } - } - } - if (ds) { - (raidPtr->Layout.map->MapParity)(raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP); - } - else { - pda->row = srow; pda->col = scol; - } - } + int ds = (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) ? 1 : 0; + int row = asmap->physInfo->row; + int fcol = raidPtr->reconControl[row]->fcol; + int srow = raidPtr->reconControl[row]->spareRow; + int scol = raidPtr->reconControl[row]->spareCol; + RF_PhysDiskAddr_t *pda; + + RF_ASSERT(raidPtr->status[row] == rf_rs_reconstructing); + for (pda = asmap->physInfo; pda; pda = pda->next) { + if (pda->col == fcol) { + if (rf_dagDebug) { + if (!rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, + pda->startSector)) { + RF_PANIC(); + } + } + /* printf("Remapped data for large write\n"); */ + if (ds) { + raidPtr->Layout.map->MapSector(raidPtr, pda->raidAddress, + &pda->row, &pda->col, &pda->startSector, RF_REMAP); + } else { + pda->row = srow; + pda->col = scol; + } + } + } + for (pda = asmap->parityInfo; pda; pda = pda->next) { + if (pda->col == fcol) { + if (rf_dagDebug) { + if (!rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, pda->startSector)) { + RF_PANIC(); + } + } + } + if (ds) { + (raidPtr->Layout.map->MapParity) (raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP); + } else { + pda->row = srow; + pda->col = scol; + } + } } @@ -936,70 +797,75 @@ void rf_redirect_asm( * contained within one stripe unit, so we RF_ASSERT against this case at the * start. */ -void rf_MapUnaccessedPortionOfStripe( - RF_Raid_t *raidPtr, - RF_RaidLayout_t *layoutPtr, /* in: layout information */ - RF_AccessStripeMap_t *asmap, /* in: access stripe map */ - RF_DagHeader_t *dag_h, /* in: header of the dag to create */ - RF_AccessStripeMapHeader_t **new_asm_h, /* in: ptr to array of 2 headers, to be filled in */ - int *nRodNodes, /* out: num nodes to be generated to read unaccessed data */ - char **sosBuffer, /* out: pointers to newly allocated buffer */ - char **eosBuffer, - RF_AllocListElem_t *allocList) +void +rf_MapUnaccessedPortionOfStripe( + RF_Raid_t * raidPtr, + RF_RaidLayout_t * layoutPtr,/* in: layout information */ + RF_AccessStripeMap_t * asmap, /* in: access stripe map */ + RF_DagHeader_t * dag_h, /* in: header of the dag to create */ + RF_AccessStripeMapHeader_t ** new_asm_h, /* in: ptr to array of 2 + * headers, to be filled in */ + int *nRodNodes, /* out: num nodes to be generated to read + * unaccessed data */ + char **sosBuffer, /* out: pointers to newly allocated buffer */ + char **eosBuffer, + RF_AllocListElem_t * allocList) { - RF_RaidAddr_t sosRaidAddress, eosRaidAddress; - RF_SectorNum_t sosNumSector, eosNumSector; - - RF_ASSERT( asmap->numStripeUnitsAccessed > (layoutPtr->numDataCol/2) ); - /* generate an access map for the region of the array from start of stripe - * to start of access */ - new_asm_h[0] = new_asm_h[1] = NULL; *nRodNodes = 0; - if (!rf_RaidAddressStripeAligned(layoutPtr, asmap->raidAddress)) { - sosRaidAddress = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - sosNumSector = asmap->raidAddress - sosRaidAddress; - RF_MallocAndAdd(*sosBuffer, rf_RaidAddressToByte(raidPtr, sosNumSector), (char *), allocList); - new_asm_h[0] = rf_MapAccess(raidPtr, sosRaidAddress, sosNumSector, *sosBuffer, RF_DONT_REMAP); - new_asm_h[0]->next = dag_h->asmList; - dag_h->asmList = new_asm_h[0]; - *nRodNodes += new_asm_h[0]->stripeMap->numStripeUnitsAccessed; - - RF_ASSERT(new_asm_h[0]->stripeMap->next == NULL); - /* we're totally within one stripe here */ - if (asmap->flags & RF_ASM_REDIR_LARGE_WRITE) - rf_redirect_asm(raidPtr, new_asm_h[0]->stripeMap); - } - /* generate an access map for the region of the array from end of access - * to end of stripe */ - if (!rf_RaidAddressStripeAligned(layoutPtr, asmap->endRaidAddress)) { - eosRaidAddress = asmap->endRaidAddress; - eosNumSector = rf_RaidAddressOfNextStripeBoundary(layoutPtr, eosRaidAddress) - eosRaidAddress; - RF_MallocAndAdd(*eosBuffer, rf_RaidAddressToByte(raidPtr, eosNumSector), (char *), allocList); - new_asm_h[1] = rf_MapAccess(raidPtr, eosRaidAddress, eosNumSector, *eosBuffer, RF_DONT_REMAP); - new_asm_h[1]->next = dag_h->asmList; - dag_h->asmList = new_asm_h[1]; - *nRodNodes += new_asm_h[1]->stripeMap->numStripeUnitsAccessed; - - RF_ASSERT(new_asm_h[1]->stripeMap->next == NULL); - /* we're totally within one stripe here */ - if (asmap->flags & RF_ASM_REDIR_LARGE_WRITE) - rf_redirect_asm(raidPtr, new_asm_h[1]->stripeMap); - } + RF_RaidAddr_t sosRaidAddress, eosRaidAddress; + RF_SectorNum_t sosNumSector, eosNumSector; + + RF_ASSERT(asmap->numStripeUnitsAccessed > (layoutPtr->numDataCol / 2)); + /* generate an access map for the region of the array from start of + * stripe to start of access */ + new_asm_h[0] = new_asm_h[1] = NULL; + *nRodNodes = 0; + if (!rf_RaidAddressStripeAligned(layoutPtr, asmap->raidAddress)) { + sosRaidAddress = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); + sosNumSector = asmap->raidAddress - sosRaidAddress; + RF_MallocAndAdd(*sosBuffer, rf_RaidAddressToByte(raidPtr, sosNumSector), (char *), allocList); + new_asm_h[0] = rf_MapAccess(raidPtr, sosRaidAddress, sosNumSector, *sosBuffer, RF_DONT_REMAP); + new_asm_h[0]->next = dag_h->asmList; + dag_h->asmList = new_asm_h[0]; + *nRodNodes += new_asm_h[0]->stripeMap->numStripeUnitsAccessed; + + RF_ASSERT(new_asm_h[0]->stripeMap->next == NULL); + /* we're totally within one stripe here */ + if (asmap->flags & RF_ASM_REDIR_LARGE_WRITE) + rf_redirect_asm(raidPtr, new_asm_h[0]->stripeMap); + } + /* generate an access map for the region of the array from end of + * access to end of stripe */ + if (!rf_RaidAddressStripeAligned(layoutPtr, asmap->endRaidAddress)) { + eosRaidAddress = asmap->endRaidAddress; + eosNumSector = rf_RaidAddressOfNextStripeBoundary(layoutPtr, eosRaidAddress) - eosRaidAddress; + RF_MallocAndAdd(*eosBuffer, rf_RaidAddressToByte(raidPtr, eosNumSector), (char *), allocList); + new_asm_h[1] = rf_MapAccess(raidPtr, eosRaidAddress, eosNumSector, *eosBuffer, RF_DONT_REMAP); + new_asm_h[1]->next = dag_h->asmList; + dag_h->asmList = new_asm_h[1]; + *nRodNodes += new_asm_h[1]->stripeMap->numStripeUnitsAccessed; + + RF_ASSERT(new_asm_h[1]->stripeMap->next == NULL); + /* we're totally within one stripe here */ + if (asmap->flags & RF_ASM_REDIR_LARGE_WRITE) + rf_redirect_asm(raidPtr, new_asm_h[1]->stripeMap); + } } /* returns non-zero if the indicated ranges of stripe unit offsets overlap */ -int rf_PDAOverlap( - RF_RaidLayout_t *layoutPtr, - RF_PhysDiskAddr_t *src, - RF_PhysDiskAddr_t *dest) +int +rf_PDAOverlap( + RF_RaidLayout_t * layoutPtr, + RF_PhysDiskAddr_t * src, + RF_PhysDiskAddr_t * dest) { - RF_SectorNum_t soffs = rf_StripeUnitOffset(layoutPtr, src->startSector); - RF_SectorNum_t doffs = rf_StripeUnitOffset(layoutPtr, dest->startSector); - /* use -1 to be sure we stay within SU */ - RF_SectorNum_t send = rf_StripeUnitOffset(layoutPtr, src->startSector + src->numSector-1); - RF_SectorNum_t dend = rf_StripeUnitOffset(layoutPtr, dest->startSector + dest->numSector-1); - return( (RF_MAX(soffs,doffs) <= RF_MIN(send,dend)) ? 1 : 0 ); + RF_SectorNum_t soffs = rf_StripeUnitOffset(layoutPtr, src->startSector); + RF_SectorNum_t doffs = rf_StripeUnitOffset(layoutPtr, dest->startSector); + /* use -1 to be sure we stay within SU */ + RF_SectorNum_t send = rf_StripeUnitOffset(layoutPtr, src->startSector + src->numSector - 1); + RF_SectorNum_t dend = rf_StripeUnitOffset(layoutPtr, dest->startSector + dest->numSector - 1); + return ((RF_MAX(soffs, doffs) <= RF_MIN(send, dend)) ? 1 : 0); } @@ -1031,110 +897,132 @@ int rf_PDAOverlap( /* out: new_asm_h - the two new ASMs */ /* out: nXorBufs - the total number of xor bufs required */ /* out: rpBufPtr - a buffer for the parity read */ -void rf_GenerateFailedAccessASMs( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_PhysDiskAddr_t *failedPDA, - RF_DagHeader_t *dag_h, - RF_AccessStripeMapHeader_t **new_asm_h, - int *nXorBufs, - char **rpBufPtr, - char *overlappingPDAs, - RF_AllocListElem_t *allocList) +void +rf_GenerateFailedAccessASMs( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_PhysDiskAddr_t * failedPDA, + RF_DagHeader_t * dag_h, + RF_AccessStripeMapHeader_t ** new_asm_h, + int *nXorBufs, + char **rpBufPtr, + char *overlappingPDAs, + RF_AllocListElem_t * allocList) { - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - - /* s=start, e=end, s=stripe, a=access, f=failed, su=stripe unit */ - RF_RaidAddr_t sosAddr, sosEndAddr, eosStartAddr, eosAddr; - - RF_SectorCount_t numSect[2], numParitySect; - RF_PhysDiskAddr_t *pda; - char *rdBuf, *bufP; - int foundit, i; - - bufP = NULL; - foundit = 0; - /* first compute the following raid addresses: - start of stripe, (sosAddr) - MIN(start of access, start of failed SU), (sosEndAddr) - MAX(end of access, end of failed SU), (eosStartAddr) - end of stripe (i.e. start of next stripe) (eosAddr) - */ - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - sosEndAddr = RF_MIN(asmap->raidAddress, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,failedPDA->raidAddress)); - eosStartAddr = RF_MAX(asmap->endRaidAddress, rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, failedPDA->raidAddress)); - eosAddr = rf_RaidAddressOfNextStripeBoundary(layoutPtr, asmap->raidAddress); - - /* now generate access stripe maps for each of the above regions of the - * stripe. Use a dummy (NULL) buf ptr for now */ - - new_asm_h[0] = (sosAddr != sosEndAddr) ? rf_MapAccess(raidPtr, sosAddr, sosEndAddr-sosAddr, NULL, RF_DONT_REMAP) : NULL; - new_asm_h[1] = (eosStartAddr != eosAddr) ? rf_MapAccess(raidPtr, eosStartAddr, eosAddr-eosStartAddr, NULL, RF_DONT_REMAP) : NULL; - - /* walk through the PDAs and range-restrict each SU to the region of the - * SU touched on the failed PDA. also compute total data buffer space - * requirements in this step. Ignore the parity for now. */ - - numSect[0] = numSect[1] = 0; - if (new_asm_h[0]) { - new_asm_h[0]->next = dag_h->asmList; dag_h->asmList = new_asm_h[0]; - for (pda = new_asm_h[0]->stripeMap->physInfo; pda; pda = pda->next) { - rf_RangeRestrictPDA(raidPtr,failedPDA, pda, RF_RESTRICT_NOBUFFER, 0); numSect[0] += pda->numSector; - } - } - if (new_asm_h[1]) { - new_asm_h[1]->next = dag_h->asmList; dag_h->asmList = new_asm_h[1]; - for (pda = new_asm_h[1]->stripeMap->physInfo; pda; pda = pda->next) { - rf_RangeRestrictPDA(raidPtr,failedPDA, pda, RF_RESTRICT_NOBUFFER, 0); numSect[1] += pda->numSector; - } - } - numParitySect = failedPDA->numSector; - - /* allocate buffer space for the data & parity we have to read to recover - * from the failure */ - - if (numSect[0]+numSect[1]+ ((rpBufPtr) ? numParitySect : 0)) { /* don't allocate parity buf if not needed */ - RF_MallocAndAdd(rdBuf, rf_RaidAddressToByte(raidPtr,numSect[0]+numSect[1]+numParitySect), (char *), allocList); - bufP = rdBuf; - if (rf_degDagDebug) printf("Newly allocated buffer (%d bytes) is 0x%lx\n", - (int)rf_RaidAddressToByte(raidPtr,numSect[0]+numSect[1]+numParitySect), (unsigned long) bufP); - } - - /* now walk through the pdas one last time and assign buffer pointers - * (ugh!). Again, ignore the parity. also, count nodes to find out how - * many bufs need to be xored together */ - (*nXorBufs) = 1; /* in read case, 1 is for parity. In write case, 1 is for failed data */ - if (new_asm_h[0]) { - for (pda=new_asm_h[0]->stripeMap->physInfo; pda; pda=pda->next) {pda->bufPtr = bufP; bufP += rf_RaidAddressToByte(raidPtr,pda->numSector);} - *nXorBufs += new_asm_h[0]->stripeMap->numStripeUnitsAccessed; - } - if (new_asm_h[1]) { - for (pda=new_asm_h[1]->stripeMap->physInfo; pda; pda=pda->next) {pda->bufPtr = bufP; bufP += rf_RaidAddressToByte(raidPtr,pda->numSector);} - (*nXorBufs) += new_asm_h[1]->stripeMap->numStripeUnitsAccessed; - } - if (rpBufPtr) *rpBufPtr = bufP; /* the rest of the buffer is for parity */ - - /* the last step is to figure out how many more distinct buffers need to - * get xor'd to produce the missing unit. there's one for each user-data - * read node that overlaps the portion of the failed unit being accessed */ - - for (foundit=i=0,pda=asmap->physInfo; pda; i++,pda=pda->next) { - if (pda == failedPDA) {i--; foundit=1; continue;} - if (rf_PDAOverlap(layoutPtr, pda, failedPDA)) { - overlappingPDAs[i] = 1; - (*nXorBufs)++; - } - } - if (!foundit) {RF_ERRORMSG("GenerateFailedAccessASMs: did not find failedPDA in asm list\n"); RF_ASSERT(0);} - - if (rf_degDagDebug) { - if (new_asm_h[0]) { - printf("First asm:\n"); rf_PrintFullAccessStripeMap(new_asm_h[0], 1); - } - if (new_asm_h[1]) { - printf("Second asm:\n"); rf_PrintFullAccessStripeMap(new_asm_h[1], 1); - } - } + RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); + + /* s=start, e=end, s=stripe, a=access, f=failed, su=stripe unit */ + RF_RaidAddr_t sosAddr, sosEndAddr, eosStartAddr, eosAddr; + + RF_SectorCount_t numSect[2], numParitySect; + RF_PhysDiskAddr_t *pda; + char *rdBuf, *bufP; + int foundit, i; + + bufP = NULL; + foundit = 0; + /* first compute the following raid addresses: start of stripe, + * (sosAddr) MIN(start of access, start of failed SU), (sosEndAddr) + * MAX(end of access, end of failed SU), (eosStartAddr) end of + * stripe (i.e. start of next stripe) (eosAddr) */ + sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); + sosEndAddr = RF_MIN(asmap->raidAddress, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, failedPDA->raidAddress)); + eosStartAddr = RF_MAX(asmap->endRaidAddress, rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, failedPDA->raidAddress)); + eosAddr = rf_RaidAddressOfNextStripeBoundary(layoutPtr, asmap->raidAddress); + + /* now generate access stripe maps for each of the above regions of + * the stripe. Use a dummy (NULL) buf ptr for now */ + + new_asm_h[0] = (sosAddr != sosEndAddr) ? rf_MapAccess(raidPtr, sosAddr, sosEndAddr - sosAddr, NULL, RF_DONT_REMAP) : NULL; + new_asm_h[1] = (eosStartAddr != eosAddr) ? rf_MapAccess(raidPtr, eosStartAddr, eosAddr - eosStartAddr, NULL, RF_DONT_REMAP) : NULL; + + /* walk through the PDAs and range-restrict each SU to the region of + * the SU touched on the failed PDA. also compute total data buffer + * space requirements in this step. Ignore the parity for now. */ + + numSect[0] = numSect[1] = 0; + if (new_asm_h[0]) { + new_asm_h[0]->next = dag_h->asmList; + dag_h->asmList = new_asm_h[0]; + for (pda = new_asm_h[0]->stripeMap->physInfo; pda; pda = pda->next) { + rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_NOBUFFER, 0); + numSect[0] += pda->numSector; + } + } + if (new_asm_h[1]) { + new_asm_h[1]->next = dag_h->asmList; + dag_h->asmList = new_asm_h[1]; + for (pda = new_asm_h[1]->stripeMap->physInfo; pda; pda = pda->next) { + rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_NOBUFFER, 0); + numSect[1] += pda->numSector; + } + } + numParitySect = failedPDA->numSector; + + /* allocate buffer space for the data & parity we have to read to + * recover from the failure */ + + if (numSect[0] + numSect[1] + ((rpBufPtr) ? numParitySect : 0)) { /* don't allocate parity + * buf if not needed */ + RF_MallocAndAdd(rdBuf, rf_RaidAddressToByte(raidPtr, numSect[0] + numSect[1] + numParitySect), (char *), allocList); + bufP = rdBuf; + if (rf_degDagDebug) + printf("Newly allocated buffer (%d bytes) is 0x%lx\n", + (int) rf_RaidAddressToByte(raidPtr, numSect[0] + numSect[1] + numParitySect), (unsigned long) bufP); + } + /* now walk through the pdas one last time and assign buffer pointers + * (ugh!). Again, ignore the parity. also, count nodes to find out + * how many bufs need to be xored together */ + (*nXorBufs) = 1; /* in read case, 1 is for parity. In write + * case, 1 is for failed data */ + if (new_asm_h[0]) { + for (pda = new_asm_h[0]->stripeMap->physInfo; pda; pda = pda->next) { + pda->bufPtr = bufP; + bufP += rf_RaidAddressToByte(raidPtr, pda->numSector); + } + *nXorBufs += new_asm_h[0]->stripeMap->numStripeUnitsAccessed; + } + if (new_asm_h[1]) { + for (pda = new_asm_h[1]->stripeMap->physInfo; pda; pda = pda->next) { + pda->bufPtr = bufP; + bufP += rf_RaidAddressToByte(raidPtr, pda->numSector); + } + (*nXorBufs) += new_asm_h[1]->stripeMap->numStripeUnitsAccessed; + } + if (rpBufPtr) + *rpBufPtr = bufP; /* the rest of the buffer is for + * parity */ + + /* the last step is to figure out how many more distinct buffers need + * to get xor'd to produce the missing unit. there's one for each + * user-data read node that overlaps the portion of the failed unit + * being accessed */ + + for (foundit = i = 0, pda = asmap->physInfo; pda; i++, pda = pda->next) { + if (pda == failedPDA) { + i--; + foundit = 1; + continue; + } + if (rf_PDAOverlap(layoutPtr, pda, failedPDA)) { + overlappingPDAs[i] = 1; + (*nXorBufs)++; + } + } + if (!foundit) { + RF_ERRORMSG("GenerateFailedAccessASMs: did not find failedPDA in asm list\n"); + RF_ASSERT(0); + } + if (rf_degDagDebug) { + if (new_asm_h[0]) { + printf("First asm:\n"); + rf_PrintFullAccessStripeMap(new_asm_h[0], 1); + } + if (new_asm_h[1]) { + printf("Second asm:\n"); + rf_PrintFullAccessStripeMap(new_asm_h[1], 1); + } + } } @@ -1160,31 +1048,32 @@ void rf_GenerateFailedAccessASMs( * | rrrrrrrrrrrrrrrr | * */ -void rf_RangeRestrictPDA( - RF_Raid_t *raidPtr, - RF_PhysDiskAddr_t *src, - RF_PhysDiskAddr_t *dest, - int dobuffer, - int doraidaddr) +void +rf_RangeRestrictPDA( + RF_Raid_t * raidPtr, + RF_PhysDiskAddr_t * src, + RF_PhysDiskAddr_t * dest, + int dobuffer, + int doraidaddr) { - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_SectorNum_t soffs = rf_StripeUnitOffset(layoutPtr, src->startSector); - RF_SectorNum_t doffs = rf_StripeUnitOffset(layoutPtr, dest->startSector); - RF_SectorNum_t send = rf_StripeUnitOffset(layoutPtr, src->startSector + src->numSector-1); /* use -1 to be sure we stay within SU */ - RF_SectorNum_t dend = rf_StripeUnitOffset(layoutPtr, dest->startSector + dest->numSector-1); - RF_SectorNum_t subAddr = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, dest->startSector); /* stripe unit boundary */ - - dest->startSector = subAddr + RF_MAX(soffs,doffs); - dest->numSector = subAddr + RF_MIN(send,dend) + 1 - dest->startSector; - - if (dobuffer) - dest->bufPtr += (soffs > doffs) ? rf_RaidAddressToByte(raidPtr,soffs-doffs) : 0; - if (doraidaddr) { - dest->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, dest->raidAddress) + - rf_StripeUnitOffset(layoutPtr, dest->startSector); - } + RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; + RF_SectorNum_t soffs = rf_StripeUnitOffset(layoutPtr, src->startSector); + RF_SectorNum_t doffs = rf_StripeUnitOffset(layoutPtr, dest->startSector); + RF_SectorNum_t send = rf_StripeUnitOffset(layoutPtr, src->startSector + src->numSector - 1); /* use -1 to be sure we + * stay within SU */ + RF_SectorNum_t dend = rf_StripeUnitOffset(layoutPtr, dest->startSector + dest->numSector - 1); + RF_SectorNum_t subAddr = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, dest->startSector); /* stripe unit boundary */ + + dest->startSector = subAddr + RF_MAX(soffs, doffs); + dest->numSector = subAddr + RF_MIN(send, dend) + 1 - dest->startSector; + + if (dobuffer) + dest->bufPtr += (soffs > doffs) ? rf_RaidAddressToByte(raidPtr, soffs - doffs) : 0; + if (doraidaddr) { + dest->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, dest->raidAddress) + + rf_StripeUnitOffset(layoutPtr, dest->startSector); + } } - /* * Want the highest of these primes to be the largest one * less than the max expected number of columns (won't hurt @@ -1192,93 +1081,90 @@ void rf_RangeRestrictPDA( * --jimz */ #define NLOWPRIMES 8 -static int lowprimes[NLOWPRIMES] = {2,3,5,7,11,13,17,19}; - +static int lowprimes[NLOWPRIMES] = {2, 3, 5, 7, 11, 13, 17, 19}; /***************************************************************************** * compute the workload shift factor. (chained declustering) * * return nonzero if access should shift to secondary, otherwise, * access is to primary *****************************************************************************/ -int rf_compute_workload_shift( - RF_Raid_t *raidPtr, - RF_PhysDiskAddr_t *pda) +int +rf_compute_workload_shift( + RF_Raid_t * raidPtr, + RF_PhysDiskAddr_t * pda) { - /* - * variables: - * d = column of disk containing primary - * f = column of failed disk - * n = number of disks in array - * sd = "shift distance" (number of columns that d is to the right of f) - * row = row of array the access is in - * v = numerator of redirection ratio - * k = denominator of redirection ratio - */ - RF_RowCol_t d, f, sd, row, n; - int k, v, ret, i; - - row = pda->row; - n = raidPtr->numCol; - - /* assign column of primary copy to d */ - d = pda->col; - - /* assign column of dead disk to f */ - for(f=0;((!RF_DEAD_DISK(raidPtr->Disks[row][f].status))&&(f<n));f++); - - RF_ASSERT(f < n); - RF_ASSERT(f != d); - - sd = (f > d) ? (n + d - f) : (d - f); - RF_ASSERT(sd < n); - - /* - * v of every k accesses should be redirected - * - * v/k := (n-1-sd)/(n-1) - */ - v = (n-1-sd); - k = (n-1); + /* + * variables: + * d = column of disk containing primary + * f = column of failed disk + * n = number of disks in array + * sd = "shift distance" (number of columns that d is to the right of f) + * row = row of array the access is in + * v = numerator of redirection ratio + * k = denominator of redirection ratio + */ + RF_RowCol_t d, f, sd, row, n; + int k, v, ret, i; + + row = pda->row; + n = raidPtr->numCol; + + /* assign column of primary copy to d */ + d = pda->col; + + /* assign column of dead disk to f */ + for (f = 0; ((!RF_DEAD_DISK(raidPtr->Disks[row][f].status)) && (f < n)); f++); + + RF_ASSERT(f < n); + RF_ASSERT(f != d); + + sd = (f > d) ? (n + d - f) : (d - f); + RF_ASSERT(sd < n); + + /* + * v of every k accesses should be redirected + * + * v/k := (n-1-sd)/(n-1) + */ + v = (n - 1 - sd); + k = (n - 1); #if 1 - /* - * XXX - * Is this worth it? - * - * Now reduce the fraction, by repeatedly factoring - * out primes (just like they teach in elementary school!) - */ - for(i=0;i<NLOWPRIMES;i++) { - if (lowprimes[i] > v) - break; - while (((v%lowprimes[i])==0) && ((k%lowprimes[i])==0)) { - v /= lowprimes[i]; - k /= lowprimes[i]; - } - } + /* + * XXX + * Is this worth it? + * + * Now reduce the fraction, by repeatedly factoring + * out primes (just like they teach in elementary school!) + */ + for (i = 0; i < NLOWPRIMES; i++) { + if (lowprimes[i] > v) + break; + while (((v % lowprimes[i]) == 0) && ((k % lowprimes[i]) == 0)) { + v /= lowprimes[i]; + k /= lowprimes[i]; + } + } #endif - raidPtr->hist_diskreq[row][d]++; - if (raidPtr->hist_diskreq[row][d] > v) { - ret = 0; /* do not redirect */ - } - else { - ret = 1; /* redirect */ - } + raidPtr->hist_diskreq[row][d]++; + if (raidPtr->hist_diskreq[row][d] > v) { + ret = 0; /* do not redirect */ + } else { + ret = 1; /* redirect */ + } #if 0 - printf("d=%d f=%d sd=%d v=%d k=%d ret=%d h=%d\n", d, f, sd, v, k, ret, - raidPtr->hist_diskreq[row][d]); + printf("d=%d f=%d sd=%d v=%d k=%d ret=%d h=%d\n", d, f, sd, v, k, ret, + raidPtr->hist_diskreq[row][d]); #endif - if (raidPtr->hist_diskreq[row][d] >= k) { - /* reset counter */ - raidPtr->hist_diskreq[row][d] = 0; - } - - return(ret); + if (raidPtr->hist_diskreq[row][d] >= k) { + /* reset counter */ + raidPtr->hist_diskreq[row][d] = 0; + } + return (ret); } - /* * Disk selection routines */ @@ -1288,119 +1174,116 @@ int rf_compute_workload_shift( * Both the disk I/Os queued in RAIDframe as well as those at the physical * disk are counted as members of the "queue" */ -void rf_SelectMirrorDiskIdle(RF_DagNode_t *node) +void +rf_SelectMirrorDiskIdle(RF_DagNode_t * node) { - RF_Raid_t *raidPtr = (RF_Raid_t *) node->dagHdr->raidPtr; - RF_RowCol_t rowData, colData, rowMirror, colMirror; - int dataQueueLength, mirrorQueueLength, usemirror; - RF_PhysDiskAddr_t *data_pda = (RF_PhysDiskAddr_t *)node->params[0].p; - RF_PhysDiskAddr_t *mirror_pda = (RF_PhysDiskAddr_t *)node->params[4].p; - RF_PhysDiskAddr_t *tmp_pda; - RF_RaidDisk_t **disks = raidPtr->Disks; - RF_DiskQueue_t **dqs = raidPtr->Queues, *dataQueue, *mirrorQueue; - - /* return the [row col] of the disk with the shortest queue */ - rowData = data_pda->row; - colData = data_pda->col; - rowMirror = mirror_pda->row; - colMirror = mirror_pda->col; - dataQueue = &(dqs[rowData][colData]); - mirrorQueue = &(dqs[rowMirror][colMirror]); + RF_Raid_t *raidPtr = (RF_Raid_t *) node->dagHdr->raidPtr; + RF_RowCol_t rowData, colData, rowMirror, colMirror; + int dataQueueLength, mirrorQueueLength, usemirror; + RF_PhysDiskAddr_t *data_pda = (RF_PhysDiskAddr_t *) node->params[0].p; + RF_PhysDiskAddr_t *mirror_pda = (RF_PhysDiskAddr_t *) node->params[4].p; + RF_PhysDiskAddr_t *tmp_pda; + RF_RaidDisk_t **disks = raidPtr->Disks; + RF_DiskQueue_t **dqs = raidPtr->Queues, *dataQueue, *mirrorQueue; + + /* return the [row col] of the disk with the shortest queue */ + rowData = data_pda->row; + colData = data_pda->col; + rowMirror = mirror_pda->row; + colMirror = mirror_pda->col; + dataQueue = &(dqs[rowData][colData]); + mirrorQueue = &(dqs[rowMirror][colMirror]); #ifdef RF_LOCK_QUEUES_TO_READ_LEN - RF_LOCK_QUEUE_MUTEX(dataQueue, "SelectMirrorDiskIdle"); -#endif /* RF_LOCK_QUEUES_TO_READ_LEN */ - dataQueueLength = dataQueue->queueLength + dataQueue->numOutstanding; + RF_LOCK_QUEUE_MUTEX(dataQueue, "SelectMirrorDiskIdle"); +#endif /* RF_LOCK_QUEUES_TO_READ_LEN */ + dataQueueLength = dataQueue->queueLength + dataQueue->numOutstanding; #ifdef RF_LOCK_QUEUES_TO_READ_LEN - RF_UNLOCK_QUEUE_MUTEX(dataQueue, "SelectMirrorDiskIdle"); - RF_LOCK_QUEUE_MUTEX(mirrorQueue, "SelectMirrorDiskIdle"); -#endif /* RF_LOCK_QUEUES_TO_READ_LEN */ - mirrorQueueLength = mirrorQueue->queueLength + mirrorQueue->numOutstanding; + RF_UNLOCK_QUEUE_MUTEX(dataQueue, "SelectMirrorDiskIdle"); + RF_LOCK_QUEUE_MUTEX(mirrorQueue, "SelectMirrorDiskIdle"); +#endif /* RF_LOCK_QUEUES_TO_READ_LEN */ + mirrorQueueLength = mirrorQueue->queueLength + mirrorQueue->numOutstanding; #ifdef RF_LOCK_QUEUES_TO_READ_LEN - RF_UNLOCK_QUEUE_MUTEX(mirrorQueue, "SelectMirrorDiskIdle"); -#endif /* RF_LOCK_QUEUES_TO_READ_LEN */ - - usemirror = 0; - if (RF_DEAD_DISK(disks[rowMirror][colMirror].status)) { - usemirror = 0; - } - else if (RF_DEAD_DISK(disks[rowData][colData].status)) { - usemirror = 1; - } - else if (dataQueueLength < mirrorQueueLength) { - usemirror = 0; - } - else if (mirrorQueueLength < dataQueueLength) { - usemirror = 1; - } - else { - /* queues are equal length. attempt cleverness. */ - if (SNUM_DIFF(dataQueue->last_deq_sector,data_pda->startSector) - <= SNUM_DIFF(mirrorQueue->last_deq_sector,mirror_pda->startSector)) - { - usemirror = 0; - } - else { - usemirror = 1; - } - } - - if (usemirror) { - /* use mirror (parity) disk, swap params 0 & 4 */ - tmp_pda = data_pda; - node->params[0].p = mirror_pda; - node->params[4].p = tmp_pda; - } - else { - /* use data disk, leave param 0 unchanged */ - } - /* printf("dataQueueLength %d, mirrorQueueLength %d\n",dataQueueLength, mirrorQueueLength); */ + RF_UNLOCK_QUEUE_MUTEX(mirrorQueue, "SelectMirrorDiskIdle"); +#endif /* RF_LOCK_QUEUES_TO_READ_LEN */ + + usemirror = 0; + if (RF_DEAD_DISK(disks[rowMirror][colMirror].status)) { + usemirror = 0; + } else + if (RF_DEAD_DISK(disks[rowData][colData].status)) { + usemirror = 1; + } else + if (dataQueueLength < mirrorQueueLength) { + usemirror = 0; + } else + if (mirrorQueueLength < dataQueueLength) { + usemirror = 1; + } else { + /* queues are equal length. attempt + * cleverness. */ + if (SNUM_DIFF(dataQueue->last_deq_sector, data_pda->startSector) + <= SNUM_DIFF(mirrorQueue->last_deq_sector, mirror_pda->startSector)) { + usemirror = 0; + } else { + usemirror = 1; + } + } + + if (usemirror) { + /* use mirror (parity) disk, swap params 0 & 4 */ + tmp_pda = data_pda; + node->params[0].p = mirror_pda; + node->params[4].p = tmp_pda; + } else { + /* use data disk, leave param 0 unchanged */ + } + /* printf("dataQueueLength %d, mirrorQueueLength + * %d\n",dataQueueLength, mirrorQueueLength); */ } - /* * Do simple partitioning. This assumes that * the data and parity disks are laid out identically. */ -void rf_SelectMirrorDiskPartition(RF_DagNode_t *node) +void +rf_SelectMirrorDiskPartition(RF_DagNode_t * node) { - RF_Raid_t *raidPtr = (RF_Raid_t *) node->dagHdr->raidPtr; - RF_RowCol_t rowData, colData, rowMirror, colMirror; - RF_PhysDiskAddr_t *data_pda = (RF_PhysDiskAddr_t *)node->params[0].p; - RF_PhysDiskAddr_t *mirror_pda = (RF_PhysDiskAddr_t *)node->params[4].p; - RF_PhysDiskAddr_t *tmp_pda; - RF_RaidDisk_t **disks = raidPtr->Disks; - RF_DiskQueue_t **dqs = raidPtr->Queues, *dataQueue, *mirrorQueue; - int usemirror; - - /* return the [row col] of the disk with the shortest queue */ - rowData = data_pda->row; - colData = data_pda->col; - rowMirror = mirror_pda->row; - colMirror = mirror_pda->col; - dataQueue = &(dqs[rowData][colData]); - mirrorQueue = &(dqs[rowMirror][colMirror]); - - usemirror = 0; - if (RF_DEAD_DISK(disks[rowMirror][colMirror].status)) { - usemirror = 0; - } - else if (RF_DEAD_DISK(disks[rowData][colData].status)) { - usemirror = 1; - } - else if (data_pda->startSector < (disks[rowData][colData].numBlocks / 2)) { - usemirror = 0; - } - else { - usemirror = 1; - } - - if (usemirror) { - /* use mirror (parity) disk, swap params 0 & 4 */ - tmp_pda = data_pda; - node->params[0].p = mirror_pda; - node->params[4].p = tmp_pda; - } - else { - /* use data disk, leave param 0 unchanged */ - } + RF_Raid_t *raidPtr = (RF_Raid_t *) node->dagHdr->raidPtr; + RF_RowCol_t rowData, colData, rowMirror, colMirror; + RF_PhysDiskAddr_t *data_pda = (RF_PhysDiskAddr_t *) node->params[0].p; + RF_PhysDiskAddr_t *mirror_pda = (RF_PhysDiskAddr_t *) node->params[4].p; + RF_PhysDiskAddr_t *tmp_pda; + RF_RaidDisk_t **disks = raidPtr->Disks; + RF_DiskQueue_t **dqs = raidPtr->Queues, *dataQueue, *mirrorQueue; + int usemirror; + + /* return the [row col] of the disk with the shortest queue */ + rowData = data_pda->row; + colData = data_pda->col; + rowMirror = mirror_pda->row; + colMirror = mirror_pda->col; + dataQueue = &(dqs[rowData][colData]); + mirrorQueue = &(dqs[rowMirror][colMirror]); + + usemirror = 0; + if (RF_DEAD_DISK(disks[rowMirror][colMirror].status)) { + usemirror = 0; + } else + if (RF_DEAD_DISK(disks[rowData][colData].status)) { + usemirror = 1; + } else + if (data_pda->startSector < (disks[rowData][colData].numBlocks / 2)) { + usemirror = 0; + } else { + usemirror = 1; + } + + if (usemirror) { + /* use mirror (parity) disk, swap params 0 & 4 */ + tmp_pda = data_pda; + node->params[0].p = mirror_pda; + node->params[4].p = tmp_pda; + } else { + /* use data disk, leave param 0 unchanged */ + } } diff --git a/sys/dev/raidframe/rf_dagutils.h b/sys/dev/raidframe/rf_dagutils.h index cb732879230..abd3fa8f520 100644 --- a/sys/dev/raidframe/rf_dagutils.h +++ b/sys/dev/raidframe/rf_dagutils.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_dagutils.h,v 1.1 1999/01/11 14:29:12 niklas Exp $ */ -/* $NetBSD: rf_dagutils.h,v 1.1 1998/11/13 04:20:28 oster Exp $ */ +/* $OpenBSD: rf_dagutils.h,v 1.2 1999/02/16 00:02:33 niklas Exp $ */ +/* $NetBSD: rf_dagutils.h,v 1.3 1999/02/05 00:06:08 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -33,78 +33,6 @@ * *************************************************************************/ -/* - * : - * Log: rf_dagutils.h,v - * Revision 1.19 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.18 1996/07/15 17:22:18 jimz - * nit-pick code cleanup - * resolve stdlib problems on DEC OSF - * - * Revision 1.17 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.16 1996/06/06 17:27:46 jimz - * added another select mirror func (partitioning), changed names so dag - * creation routines can use the appropriate one - * - * fixed old idle mirror func to pick closest arm if queue lengths are equal - * - * Revision 1.15 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.14 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.13 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.12 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.11 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.10 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.9 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.8 1996/05/08 21:01:24 jimz - * fixed up enum type names that were conflicting with other - * enums and function names (ie, "panic") - * future naming trends will be towards RF_ and rf_ for - * everything raidframe-related - * - * Revision 1.7 1996/05/03 19:55:27 wvcii - * added misc routines from old dag creation files - * - * Revision 1.6 1995/12/01 15:57:28 root - * added copyright info - * - * Revision 1.5 1995/11/07 16:21:36 wvcii - * modified InitNode and InitNodeFromBuf prototypes - * - */ #include "rf_types.h" #include "rf_dagfuncs.h" @@ -114,79 +42,80 @@ #define _RF__RF_DAGUTILS_H_ struct RF_RedFuncs_s { - int (*regular)(RF_DagNode_t *); - char *RegularName; - int (*simple)(RF_DagNode_t *); - char *SimpleName; + int (*regular) (RF_DagNode_t *); + char *RegularName; + int (*simple) (RF_DagNode_t *); + char *SimpleName; }; extern RF_RedFuncs_t rf_xorFuncs; extern RF_RedFuncs_t rf_xorRecoveryFuncs; -void rf_InitNode(RF_DagNode_t *node, RF_NodeStatus_t initstatus, - int commit, - int (*doFunc)(RF_DagNode_t *node), - int (*undoFunc)(RF_DagNode_t *node), - int (*wakeFunc)(RF_DagNode_t *node, int status), - int nSucc, int nAnte, int nParam, int nResult, - RF_DagHeader_t *hdr, char *name, RF_AllocListElem_t *alist); +void +rf_InitNode(RF_DagNode_t * node, RF_NodeStatus_t initstatus, + int commit, + int (*doFunc) (RF_DagNode_t * node), + int (*undoFunc) (RF_DagNode_t * node), + int (*wakeFunc) (RF_DagNode_t * node, int status), + int nSucc, int nAnte, int nParam, int nResult, + RF_DagHeader_t * hdr, char *name, RF_AllocListElem_t * alist); -void rf_FreeDAG(RF_DagHeader_t *dag_h); + void rf_FreeDAG(RF_DagHeader_t * dag_h); -RF_PropHeader_t *rf_MakePropListEntry(RF_DagHeader_t *dag_h, int resultNum, - int paramNum, RF_PropHeader_t *next, RF_AllocListElem_t *allocList); + RF_PropHeader_t *rf_MakePropListEntry(RF_DagHeader_t * dag_h, int resultNum, + int paramNum, RF_PropHeader_t * next, RF_AllocListElem_t * allocList); -int rf_ConfigureDAGs(RF_ShutdownList_t **listp); + int rf_ConfigureDAGs(RF_ShutdownList_t ** listp); -RF_DagHeader_t *rf_AllocDAGHeader(void); + RF_DagHeader_t *rf_AllocDAGHeader(void); -void rf_FreeDAGHeader(RF_DagHeader_t *dh); + void rf_FreeDAGHeader(RF_DagHeader_t * dh); -void *rf_AllocBuffer(RF_Raid_t *raidPtr, RF_DagHeader_t *dag_h, - RF_PhysDiskAddr_t *pda, RF_AllocListElem_t *allocList); + void *rf_AllocBuffer(RF_Raid_t * raidPtr, RF_DagHeader_t * dag_h, + RF_PhysDiskAddr_t * pda, RF_AllocListElem_t * allocList); -char *rf_NodeStatusString(RF_DagNode_t *node); + char *rf_NodeStatusString(RF_DagNode_t * node); -void rf_PrintNodeInfoString(RF_DagNode_t *node); + void rf_PrintNodeInfoString(RF_DagNode_t * node); -int rf_AssignNodeNums(RF_DagHeader_t *dag_h); + int rf_AssignNodeNums(RF_DagHeader_t * dag_h); -int rf_RecurAssignNodeNums(RF_DagNode_t *node, int num, int unvisited); + int rf_RecurAssignNodeNums(RF_DagNode_t * node, int num, int unvisited); -void rf_ResetDAGHeaderPointers(RF_DagHeader_t *dag_h, RF_DagHeader_t *newptr); + void rf_ResetDAGHeaderPointers(RF_DagHeader_t * dag_h, RF_DagHeader_t * newptr); -void rf_RecurResetDAGHeaderPointers(RF_DagNode_t *node, RF_DagHeader_t *newptr); + void rf_RecurResetDAGHeaderPointers(RF_DagNode_t * node, RF_DagHeader_t * newptr); -void rf_PrintDAGList(RF_DagHeader_t *dag_h); + void rf_PrintDAGList(RF_DagHeader_t * dag_h); -int rf_ValidateDAG(RF_DagHeader_t *dag_h); + int rf_ValidateDAG(RF_DagHeader_t * dag_h); -void rf_redirect_asm(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap); + void rf_redirect_asm(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap); -void rf_MapUnaccessedPortionOfStripe(RF_Raid_t *raidPtr, - RF_RaidLayout_t *layoutPtr, - RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, - RF_AccessStripeMapHeader_t **new_asm_h, int *nRodNodes, char **sosBuffer, - char **eosBuffer, RF_AllocListElem_t *allocList); + void rf_MapUnaccessedPortionOfStripe(RF_Raid_t * raidPtr, + RF_RaidLayout_t * layoutPtr, + RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, + RF_AccessStripeMapHeader_t ** new_asm_h, int *nRodNodes, char **sosBuffer, + char **eosBuffer, RF_AllocListElem_t * allocList); -int rf_PDAOverlap(RF_RaidLayout_t *layoutPtr, RF_PhysDiskAddr_t *src, - RF_PhysDiskAddr_t *dest); + int rf_PDAOverlap(RF_RaidLayout_t * layoutPtr, RF_PhysDiskAddr_t * src, + RF_PhysDiskAddr_t * dest); -void rf_GenerateFailedAccessASMs(RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, RF_PhysDiskAddr_t *failedPDA, - RF_DagHeader_t *dag_h, RF_AccessStripeMapHeader_t **new_asm_h, - int *nXorBufs, char **rpBufPtr, char *overlappingPDAs, - RF_AllocListElem_t *allocList); + void rf_GenerateFailedAccessASMs(RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, RF_PhysDiskAddr_t * failedPDA, + RF_DagHeader_t * dag_h, RF_AccessStripeMapHeader_t ** new_asm_h, + int *nXorBufs, char **rpBufPtr, char *overlappingPDAs, + RF_AllocListElem_t * allocList); /* flags used by RangeRestrictPDA */ #define RF_RESTRICT_NOBUFFER 0 #define RF_RESTRICT_DOBUFFER 1 -void rf_RangeRestrictPDA(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *src, - RF_PhysDiskAddr_t *dest, int dobuffer, int doraidaddr); + void rf_RangeRestrictPDA(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * src, + RF_PhysDiskAddr_t * dest, int dobuffer, int doraidaddr); -int rf_compute_workload_shift(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda); -void rf_SelectMirrorDiskIdle(RF_DagNode_t *node); -void rf_SelectMirrorDiskPartition(RF_DagNode_t *node); + int rf_compute_workload_shift(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * pda); + void rf_SelectMirrorDiskIdle(RF_DagNode_t * node); + void rf_SelectMirrorDiskPartition(RF_DagNode_t * node); -#endif /* !_RF__RF_DAGUTILS_H_ */ +#endif /* !_RF__RF_DAGUTILS_H_ */ diff --git a/sys/dev/raidframe/rf_debugMem.c b/sys/dev/raidframe/rf_debugMem.c index 7d32463a11a..ec579d1f08d 100644 --- a/sys/dev/raidframe/rf_debugMem.c +++ b/sys/dev/raidframe/rf_debugMem.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_debugMem.c,v 1.1 1999/01/11 14:29:12 niklas Exp $ */ -/* $NetBSD: rf_debugMem.c,v 1.1 1998/11/13 04:20:28 oster Exp $ */ +/* $OpenBSD: rf_debugMem.c,v 1.2 1999/02/16 00:02:33 niklas Exp $ */ +/* $NetBSD: rf_debugMem.c,v 1.3 1999/02/05 00:06:08 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -28,7 +28,7 @@ */ /* debugMem.c: memory usage debugging stuff. - * Malloc, Calloc, and Free are #defined everywhere + * Malloc, Calloc, and Free are #defined everywhere * to do_malloc, do_calloc, and do_free. * * if RF_UTILITY is nonzero, it means were compiling one of the @@ -37,109 +37,6 @@ * and to the allocation list stuff. */ -/* : - * Log: rf_debugMem.c,v - * Revision 1.38 1996/08/20 14:45:43 jimz - * add debugging to track memory allocated (amount only, w/out - * excessive sanity checking) - * - * Revision 1.37 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.36 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.35 1996/06/13 08:55:38 jimz - * make error messages refer to file, line of original - * allocation - * - * Revision 1.34 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.33 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.32 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.31 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.30 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.29 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.28 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.27 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.26 1996/05/21 18:53:46 jimz - * return NULL for failed allocations, not panic - * - * Revision 1.25 1996/05/20 16:14:19 jimz - * switch to rf_{mutex,cond}_{init,destroy} - * - * Revision 1.24 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.23 1996/05/17 12:42:35 jimz - * wrap get_threadid stuff in #ifndef UTILITY for utils which use - * redzone allocation stuff - * - * Revision 1.22 1996/05/16 23:06:09 jimz - * don't warn about NULL alists - * - * Revision 1.21 1996/05/16 22:25:02 jimz - * show allocations for [MC]allocAndAdd - * - * Revision 1.20 1996/05/15 18:30:22 jimz - * print memory allocation as well as frees if memDebug > 1 - * - * Revision 1.19 1996/05/07 17:41:17 jimz - * add "level 2" for memDebug, which will print freed address ranges - * - * Revision 1.18 1996/05/02 20:41:53 jimz - * really fix malloc problem out-of-kernel in memory_hash_insert() - * - * Revision 1.17 1996/05/02 20:04:29 jimz - * fixed malloc deadlock previous change introduced - * - * Revision 1.16 1996/05/01 16:27:26 jimz - * get rid of ALLOCMH - * stop using ccmn_ memory management - * - * Revision 1.15 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.14 1995/12/01 15:56:17 root - * added copyright info - * - */ - #include "rf_types.h" #include "rf_sys.h" @@ -147,14 +44,10 @@ #include "rf_threadstuff.h" #include "rf_threadid.h" #include "rf_options.h" -#else /* RF_UTILITY == 0 */ +#else /* RF_UTILITY == 0 */ #include "rf_utility.h" -#endif /* RF_UTILITY == 0 */ +#endif /* RF_UTILITY == 0 */ -#ifndef KERNEL -#include <stdio.h> -#include <assert.h> -#endif /* !KERNEL */ #include "rf_debugMem.h" #include "rf_general.h" @@ -164,127 +57,131 @@ static long tot_mem_in_use = 0, max_mem = 0; #define RF_MH_TABLESIZE 1000 struct mh_struct { - void *address; - int size; - int line; - char *filen; - char allocated; - struct mh_struct *next; + void *address; + int size; + int line; + char *filen; + char allocated; + struct mh_struct *next; }; static struct mh_struct *mh_table[RF_MH_TABLESIZE]; RF_DECLARE_MUTEX(rf_debug_mem_mutex) -static int mh_table_initialized=0; + static int mh_table_initialized = 0; -static void memory_hash_insert(void *addr, int size, int line, char *filen); -static int memory_hash_remove(void *addr, int sz); + static void memory_hash_insert(void *addr, int size, int line, char *filen); + static int memory_hash_remove(void *addr, int sz); -#ifndef KERNEL /* no redzones or "real_" routines in the kernel */ +#ifndef _KERNEL /* no redzones or "real_" routines in the + * kernel */ -static void rf_redzone_free_failed(void *ptr, int size, int line, char *file); + static void rf_redzone_free_failed(void *ptr, int size, int line, char *file); -void *rf_real_redzone_malloc(_size_) - int _size_; + void *rf_real_redzone_malloc(_size_) + int _size_; { - char *p; - - rf_validate_mh_table(); - p = malloc((_size_)+16); - if (p == NULL) - return(p); - RF_ASSERT (p); - *((long *) p) = (_size_) ; - ((char *) p)[(_size_)+8] = '!'; - ((char *) p)[(_size_)+15] = '!'; - p += 8; - return(p); + char *p; + + rf_validate_mh_table(); + p = malloc((_size_) + 16); + if (p == NULL) + return (p); + RF_ASSERT(p); + *((long *) p) = (_size_); + ((char *) p)[(_size_) + 8] = '!'; + ((char *) p)[(_size_) + 15] = '!'; + p += 8; + return (p); } -void *rf_real_redzone_calloc(_n_,_size_) -int _n_,_size_; +void * +rf_real_redzone_calloc(_n_, _size_) + int _n_, _size_; { - char *p; - int _sz_; - - rf_validate_mh_table(); - _sz_ = (_n_) * (_size_); - p = malloc((_sz_)+16); - if (p == NULL) - return(p); - bzero(p,(_sz_)+16); - *((long *) p) = (_sz_) ; - ((char *) p)[(_sz_)+8] = '!'; - ((char *) p)[(_sz_)+15] = '!'; - p += 8; - return(p); + char *p; + int _sz_; + + rf_validate_mh_table(); + _sz_ = (_n_) * (_size_); + p = malloc((_sz_) + 16); + if (p == NULL) + return (p); + bzero(p, (_sz_) + 16); + *((long *) p) = (_sz_); + ((char *) p)[(_sz_) + 8] = '!'; + ((char *) p)[(_sz_) + 15] = '!'; + p += 8; + return (p); } -void rf_real_redzone_free(p, line, filen) -char *p; -int line; -char *filen; +void +rf_real_redzone_free(p, line, filen) + char *p; + int line; + char *filen; { - unsigned long _size_; - - rf_validate_mh_table(); - p -= 8; - _size_ = *((long *) p); - if ((((char *) p)[(_size_)+8] != '!') || (((char *) p)[(_size_)+15] != '!')) - rf_redzone_free_failed(p,(_size_),line,filen); - free(p); + unsigned long _size_; + + rf_validate_mh_table(); + p -= 8; + _size_ = *((long *) p); + if ((((char *) p)[(_size_) + 8] != '!') || (((char *) p)[(_size_) + 15] != '!')) + rf_redzone_free_failed(p, (_size_), line, filen); + free(p); } unsigned long rf_mem_alloc = 0; -char *rf_real_Malloc(size, line, file) - int size; - int line; - char *file; +char * +rf_real_Malloc(size, line, file) + int size; + int line; + char *file; { - void *pp; - char *p; - int tid; - - RF_LOCK_MUTEX(rf_debug_mem_mutex); - rf_redzone_malloc(pp, size); - p = pp; - if (p == NULL) { - RF_ERRORMSG3("Unable to malloc %d bytes at line %d file %s\n", size, - line, file); - } - if (rf_memAmtDebug) { - rf_mem_alloc += size; - printf("%lu size %d %s:%d\n", rf_mem_alloc, size, file, line); - } + void *pp; + char *p; + int tid; + + RF_LOCK_MUTEX(rf_debug_mem_mutex); + rf_redzone_malloc(pp, size); + p = pp; + if (p == NULL) { + RF_ERRORMSG3("Unable to malloc %d bytes at line %d file %s\n", size, + line, file); + } + if (rf_memAmtDebug) { + rf_mem_alloc += size; + printf("%lu size %d %s:%d\n", rf_mem_alloc, size, file, line); + } #if RF_UTILITY == 0 - if (rf_memDebug > 1) { - rf_get_threadid(tid); - printf("[%d] malloc 0x%lx - 0x%lx (%d) %s %d\n", tid, p, p+size, size, - file, line); - } -#endif /* RF_UTILITY == 0 */ - if (rf_memDebug) - rf_record_malloc(p, size, line, file); - RF_UNLOCK_MUTEX(rf_debug_mem_mutex); - return(p); + if (rf_memDebug > 1) { + rf_get_threadid(tid); + printf("[%d] malloc 0x%lx - 0x%lx (%d) %s %d\n", tid, p, p + size, size, + file, line); + } +#endif /* RF_UTILITY == 0 */ + if (rf_memDebug) + rf_record_malloc(p, size, line, file); + RF_UNLOCK_MUTEX(rf_debug_mem_mutex); + return (p); } - #if RF_UTILITY == 0 -char *rf_real_MallocAndAdd(size, alist, line, file) - int size; - RF_AllocListElem_t *alist; - int line; - char *file; +char * +rf_real_MallocAndAdd(size, alist, line, file) + int size; + RF_AllocListElem_t *alist; + int line; + char *file; { - void *pp; - char *p; - int tid; + void *pp; + char *p; + int tid; RF_LOCK_MUTEX(rf_debug_mem_mutex); rf_redzone_malloc(pp, size); p = pp; if (p == NULL) { RF_ERRORMSG3("Unable to malloc %d bytes at line %d file %s\n", size, - line, file); + line, file); } if (rf_memAmtDebug) { rf_mem_alloc += size; @@ -292,8 +189,8 @@ char *rf_real_MallocAndAdd(size, alist, line, file) } if (rf_memDebug > 1) { rf_get_threadid(tid); - printf("[%d] malloc+add 0x%lx - 0x%lx (%d) %s %d\n", tid, p, p+size, - size, file, line); + printf("[%d] malloc+add 0x%lx - 0x%lx (%d) %s %d\n", tid, p, p + size, + size, file, line); } if (alist) { rf_real_AddToAllocList(alist, pp, size, 0); @@ -301,58 +198,59 @@ char *rf_real_MallocAndAdd(size, alist, line, file) if (rf_memDebug) rf_record_malloc(p, size, line, file); RF_UNLOCK_MUTEX(rf_debug_mem_mutex); - return(p); + return (p); } -#endif /* RF_UTILITY == 0 */ - -char *rf_real_Calloc(nel, elsz, line, file) - int nel; - int elsz; - int line; - char *file; +#endif /* RF_UTILITY == 0 */ + +char * +rf_real_Calloc(nel, elsz, line, file) + int nel; + int elsz; + int line; + char *file; { - int tid, size; - void *pp; - char *p; - - size = nel * elsz; - RF_LOCK_MUTEX(rf_debug_mem_mutex); - rf_redzone_calloc(pp, nel, elsz); - p = pp; - if (p == NULL) { - RF_ERRORMSG4("Unable to calloc %d objects of size %d at line %d file %s\n", - nel, elsz, line, file); - return(NULL); - } - if (rf_memAmtDebug) { - rf_mem_alloc += size; - printf("%lu size %d %s:%d\n", rf_mem_alloc, size, file, line); - } + int tid, size; + void *pp; + char *p; + + size = nel * elsz; + RF_LOCK_MUTEX(rf_debug_mem_mutex); + rf_redzone_calloc(pp, nel, elsz); + p = pp; + if (p == NULL) { + RF_ERRORMSG4("Unable to calloc %d objects of size %d at line %d file %s\n", + nel, elsz, line, file); + return (NULL); + } + if (rf_memAmtDebug) { + rf_mem_alloc += size; + printf("%lu size %d %s:%d\n", rf_mem_alloc, size, file, line); + } #if RF_UTILITY == 0 - if (rf_memDebug > 1) { - rf_get_threadid(tid); - printf("[%d] calloc 0x%lx - 0x%lx (%d,%d) %s %d\n", tid, p, p+size, nel, - elsz, file, line); - } -#endif /* RF_UTILITY == 0 */ - if (rf_memDebug) { - rf_record_malloc(p, size, line, file); - } - RF_UNLOCK_MUTEX(rf_debug_mem_mutex); - return(p); + if (rf_memDebug > 1) { + rf_get_threadid(tid); + printf("[%d] calloc 0x%lx - 0x%lx (%d,%d) %s %d\n", tid, p, p + size, nel, + elsz, file, line); + } +#endif /* RF_UTILITY == 0 */ + if (rf_memDebug) { + rf_record_malloc(p, size, line, file); + } + RF_UNLOCK_MUTEX(rf_debug_mem_mutex); + return (p); } - #if RF_UTILITY == 0 -char *rf_real_CallocAndAdd(nel, elsz, alist, line, file) - int nel; - int elsz; - RF_AllocListElem_t *alist; - int line; - char *file; +char * +rf_real_CallocAndAdd(nel, elsz, alist, line, file) + int nel; + int elsz; + RF_AllocListElem_t *alist; + int line; + char *file; { - int tid, size; - void *pp; - char *p; + int tid, size; + void *pp; + char *p; size = nel * elsz; RF_LOCK_MUTEX(rf_debug_mem_mutex); @@ -360,8 +258,8 @@ char *rf_real_CallocAndAdd(nel, elsz, alist, line, file) p = pp; if (p == NULL) { RF_ERRORMSG4("Unable to calloc %d objs of size %d at line %d file %s\n", - nel, elsz, line, file); - return(NULL); + nel, elsz, line, file); + return (NULL); } if (rf_memAmtDebug) { rf_mem_alloc += size; @@ -370,7 +268,7 @@ char *rf_real_CallocAndAdd(nel, elsz, alist, line, file) if (rf_memDebug > 1) { rf_get_threadid(tid); printf("[%d] calloc+add 0x%lx - 0x%lx (%d,%d) %s %d\n", tid, p, - p+size, nel, elsz, file, line); + p + size, nel, elsz, file, line); } if (alist) { rf_real_AddToAllocList(alist, pp, size, 0); @@ -378,201 +276,208 @@ char *rf_real_CallocAndAdd(nel, elsz, alist, line, file) if (rf_memDebug) rf_record_malloc(p, size, line, file); RF_UNLOCK_MUTEX(rf_debug_mem_mutex); - return(p); + return (p); } -#endif /* RF_UTILITY == 0 */ - -void rf_real_Free(p, sz, line, file) - void *p; - int sz; - int line; - char *file; +#endif /* RF_UTILITY == 0 */ + +void +rf_real_Free(p, sz, line, file) + void *p; + int sz; + int line; + char *file; { - int tid; + int tid; #if RF_UTILITY == 0 - if (rf_memDebug > 1) { - rf_get_threadid(tid); - printf("[%d] free 0x%lx - 0x%lx (%d) %s %d\n", tid, p, ((char *)p)+sz, sz, - file, line); - } -#endif /* RF_UTILITY == 0 */ - RF_LOCK_MUTEX(rf_debug_mem_mutex); - if (rf_memAmtDebug) { - rf_mem_alloc -= sz; - printf("%lu - size %d %s:%d\n", rf_mem_alloc, sz, file, line); - } - if (rf_memDebug) { - rf_unrecord_malloc(p,sz); - } - rf_redzone_free(p); - RF_UNLOCK_MUTEX(rf_debug_mem_mutex); + if (rf_memDebug > 1) { + rf_get_threadid(tid); + printf("[%d] free 0x%lx - 0x%lx (%d) %s %d\n", tid, p, ((char *) p) + sz, sz, + file, line); + } +#endif /* RF_UTILITY == 0 */ + RF_LOCK_MUTEX(rf_debug_mem_mutex); + if (rf_memAmtDebug) { + rf_mem_alloc -= sz; + printf("%lu - size %d %s:%d\n", rf_mem_alloc, sz, file, line); + } + if (rf_memDebug) { + rf_unrecord_malloc(p, sz); + } + rf_redzone_free(p); + RF_UNLOCK_MUTEX(rf_debug_mem_mutex); } -void rf_validate_mh_table() +void +rf_validate_mh_table() { - int i, size; - struct mh_struct *p; - char *cp; - - return; - for (i=0; i<RF_MH_TABLESIZE; i++) { - for (p=mh_table[i]; p; p=p->next) if (p->allocated) { - cp = ((char *) p->address) - 8; - size = *((long *) cp); - if ((((char *) cp)[(size)+8] != '!') || (((char *) cp)[(size)+15] != '!')) { - rf_redzone_free_failed(cp,(size),__LINE__,__FILE__); - } - } - } + int i, size; + struct mh_struct *p; + char *cp; + + return; + for (i = 0; i < RF_MH_TABLESIZE; i++) { + for (p = mh_table[i]; p; p = p->next) + if (p->allocated) { + cp = ((char *) p->address) - 8; + size = *((long *) cp); + if ((((char *) cp)[(size) + 8] != '!') || (((char *) cp)[(size) + 15] != '!')) { + rf_redzone_free_failed(cp, (size), __LINE__, __FILE__); + } + } + } } -static void rf_redzone_free_failed(ptr,size,line,file) - void *ptr; - int size; - int line; - char *file; +static void +rf_redzone_free_failed(ptr, size, line, file) + void *ptr; + int size; + int line; + char *file; { - RF_ERRORMSG4("Free of 0x%lx (recorded size %d) at %d of %s detected redzone overrun\n",ptr,size,line,file); - RF_ASSERT(0); + RF_ERRORMSG4("Free of 0x%lx (recorded size %d) at %d of %s detected redzone overrun\n", ptr, size, line, file); + RF_ASSERT(0); } +#endif /* !_KERNEL */ -#endif /* !KERNEL */ - -void rf_record_malloc(p, size, line, filen) -void *p; -int size, line; -char *filen; +void +rf_record_malloc(p, size, line, filen) + void *p; + int size, line; + char *filen; { - RF_ASSERT(size != 0); - - /*RF_LOCK_MUTEX(rf_debug_mem_mutex);*/ - memory_hash_insert(p, size, line, filen); - tot_mem_in_use += size; - /*RF_UNLOCK_MUTEX(rf_debug_mem_mutex);*/ - if ( (long) p == rf_memDebugAddress) { - printf("Allocate: debug address allocated from line %d file %s\n",line,filen); - } + RF_ASSERT(size != 0); + + /* RF_LOCK_MUTEX(rf_debug_mem_mutex); */ + memory_hash_insert(p, size, line, filen); + tot_mem_in_use += size; + /* RF_UNLOCK_MUTEX(rf_debug_mem_mutex); */ + if ((long) p == rf_memDebugAddress) { + printf("Allocate: debug address allocated from line %d file %s\n", line, filen); + } } -void rf_unrecord_malloc(p, sz) -void *p; -int sz; +void +rf_unrecord_malloc(p, sz) + void *p; + int sz; { - int size; - - /*RF_LOCK_MUTEX(rf_debug_mem_mutex);*/ - size = memory_hash_remove(p, sz); - tot_mem_in_use -= size; - /*RF_UNLOCK_MUTEX(rf_debug_mem_mutex);*/ - if ( (long) p == rf_memDebugAddress) { - printf("Free: Found debug address\n"); /* this is really only a flag line for gdb */ - } + int size; + + /* RF_LOCK_MUTEX(rf_debug_mem_mutex); */ + size = memory_hash_remove(p, sz); + tot_mem_in_use -= size; + /* RF_UNLOCK_MUTEX(rf_debug_mem_mutex); */ + if ((long) p == rf_memDebugAddress) { + printf("Free: Found debug address\n"); /* this is really only a + * flag line for gdb */ + } } -void rf_print_unfreed() +void +rf_print_unfreed() { - int i, foundone=0; - struct mh_struct *p; - - for (i=0; i<RF_MH_TABLESIZE; i++) { - for (p=mh_table[i]; p; p=p->next) if (p->allocated) { - if (!foundone) printf("\n\nThere are unfreed memory locations at program shutdown:\n"); - foundone = 1; - printf("Addr 0x%lx Size %d line %d file %s\n", - (long)p->address,p->size,p->line,p->filen); - } - } - if (tot_mem_in_use) { - printf("%ld total bytes in use\n", tot_mem_in_use); - } + int i, foundone = 0; + struct mh_struct *p; + + for (i = 0; i < RF_MH_TABLESIZE; i++) { + for (p = mh_table[i]; p; p = p->next) + if (p->allocated) { + if (!foundone) + printf("\n\nThere are unfreed memory locations at program shutdown:\n"); + foundone = 1; + printf("Addr 0x%lx Size %d line %d file %s\n", + (long) p->address, p->size, p->line, p->filen); + } + } + if (tot_mem_in_use) { + printf("%ld total bytes in use\n", tot_mem_in_use); + } } -int rf_ConfigureDebugMem(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureDebugMem(listp) + RF_ShutdownList_t **listp; { - int i, rc; - - rc = rf_create_managed_mutex(listp, &rf_debug_mem_mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return(rc); - } - if (rf_memDebug) { - for (i=0; i<RF_MH_TABLESIZE; i++) - mh_table[i] = NULL; - mh_table_initialized=1; - } - return(0); -} + int i, rc; + rc = rf_create_managed_mutex(listp, &rf_debug_mem_mutex); + if (rc) { + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + return (rc); + } + if (rf_memDebug) { + for (i = 0; i < RF_MH_TABLESIZE; i++) + mh_table[i] = NULL; + mh_table_initialized = 1; + } + return (0); +} #define HASHADDR(_a_) ( (((unsigned long) _a_)>>3) % RF_MH_TABLESIZE ) -static void memory_hash_insert(addr, size, line, filen) -void *addr; -int size, line; -char *filen; +static void +memory_hash_insert(addr, size, line, filen) + void *addr; + int size, line; + char *filen; { - unsigned long bucket = HASHADDR(addr); - struct mh_struct *p; - - RF_ASSERT(mh_table_initialized); - - /* search for this address in the hash table */ - for (p=mh_table[bucket]; p && (p->address != addr); p=p->next); - if (!p) { -#ifdef KERNEL - RF_Malloc(p,sizeof(struct mh_struct),(struct mh_struct *)); -#else /* KERNEL */ - p = (struct mh_struct *)malloc(sizeof(struct mh_struct)); -#endif /* KERNEL */ - RF_ASSERT(p); - p->next = mh_table[bucket]; - mh_table[bucket] = p; - p->address = addr; - p->allocated = 0; - } - if (p->allocated) { - printf("ERROR: reallocated address 0x%lx from line %d, file %s without intervening free\n",(long) addr, line, filen); - printf(" last allocated from line %d file %s\n",p->line, p->filen); - RF_ASSERT(0); - } - p->size = size; p->line = line; p->filen = filen; - p->allocated = 1; + unsigned long bucket = HASHADDR(addr); + struct mh_struct *p; + + RF_ASSERT(mh_table_initialized); + + /* search for this address in the hash table */ + for (p = mh_table[bucket]; p && (p->address != addr); p = p->next); + if (!p) { + RF_Malloc(p, sizeof(struct mh_struct), (struct mh_struct *)); + RF_ASSERT(p); + p->next = mh_table[bucket]; + mh_table[bucket] = p; + p->address = addr; + p->allocated = 0; + } + if (p->allocated) { + printf("ERROR: reallocated address 0x%lx from line %d, file %s without intervening free\n", (long) addr, line, filen); + printf(" last allocated from line %d file %s\n", p->line, p->filen); + RF_ASSERT(0); + } + p->size = size; + p->line = line; + p->filen = filen; + p->allocated = 1; } -static int memory_hash_remove(addr, sz) -void *addr; -int sz; +static int +memory_hash_remove(addr, sz) + void *addr; + int sz; { - unsigned long bucket = HASHADDR(addr); - struct mh_struct *p; - - RF_ASSERT(mh_table_initialized); - for (p=mh_table[bucket]; p && (p->address != addr); p=p->next); - if (!p) { - printf("ERROR: freeing never-allocated address 0x%lx\n",(long) addr); - RF_PANIC(); - } - if (!p->allocated) { - printf("ERROR: freeing unallocated address 0x%lx. Last allocation line %d file %s\n",(long) addr, p->line, p->filen); - RF_PANIC(); - } - if (sz > 0 && p->size != sz) { /* you can suppress this error by using a negative value as the size to free */ - printf("ERROR: incorrect size at free for address 0x%lx: is %d should be %d. Alloc at line %d of file %s\n",(unsigned long) addr, sz, p->size,p->line, p->filen); - RF_PANIC(); - } - p->allocated = 0; - return(p->size); + unsigned long bucket = HASHADDR(addr); + struct mh_struct *p; + + RF_ASSERT(mh_table_initialized); + for (p = mh_table[bucket]; p && (p->address != addr); p = p->next); + if (!p) { + printf("ERROR: freeing never-allocated address 0x%lx\n", (long) addr); + RF_PANIC(); + } + if (!p->allocated) { + printf("ERROR: freeing unallocated address 0x%lx. Last allocation line %d file %s\n", (long) addr, p->line, p->filen); + RF_PANIC(); + } + if (sz > 0 && p->size != sz) { /* you can suppress this error by + * using a negative value as the size + * to free */ + printf("ERROR: incorrect size at free for address 0x%lx: is %d should be %d. Alloc at line %d of file %s\n", (unsigned long) addr, sz, p->size, p->line, p->filen); + RF_PANIC(); + } + p->allocated = 0; + return (p->size); } -void rf_ReportMaxMem() +void +rf_ReportMaxMem() { - printf("Max memory used: %d bytes\n",(int)max_mem); -#ifndef KERNEL - fflush(stdout); - fprintf(stderr,"Max memory used: %d bytes\n",max_mem); - fflush(stderr); -#endif /* !KERNEL */ + printf("Max memory used: %d bytes\n", (int) max_mem); } diff --git a/sys/dev/raidframe/rf_debugMem.h b/sys/dev/raidframe/rf_debugMem.h index 2b5f1545d12..133c3acc91c 100644 --- a/sys/dev/raidframe/rf_debugMem.h +++ b/sys/dev/raidframe/rf_debugMem.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_debugMem.h,v 1.1 1999/01/11 14:29:12 niklas Exp $ */ -/* $NetBSD: rf_debugMem.h,v 1.1 1998/11/13 04:20:28 oster Exp $ */ +/* $OpenBSD: rf_debugMem.h,v 1.2 1999/02/16 00:02:34 niklas Exp $ */ +/* $NetBSD: rf_debugMem.h,v 1.4 1999/02/05 00:06:08 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -33,106 +33,6 @@ * IMPORTANT: if you put the lock/unlock mutex stuff back in here, you * need to take it out of the routines in debugMem.c * - * Log: rf_debugMem.h,v - * Revision 1.27 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.26 1996/06/11 13:46:43 jimz - * make bracing consistent around memory allocation macros - * - * Revision 1.25 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.24 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.23 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.22 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.21 1996/05/23 22:17:40 jimz - * fix alloclist macro names for kernel - * - * Revision 1.20 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.19 1996/05/23 13:18:23 jimz - * include rf_options.h - * - * Revision 1.18 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.17 1996/05/21 18:51:54 jimz - * cleaned up macro args - * - * Revision 1.16 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.15 1996/05/01 16:26:22 jimz - * get rid of old ccmn stuff - * - * Revision 1.14 1995/12/01 15:58:09 root - * added copyright info - * - * Revision 1.13 1995/10/11 15:26:03 jimz - * zero memory after allocation in kernel (hide effects - * of uninitialized structs) - * - * Revision 1.12 1995/10/06 17:04:15 jimz - * make Malloc and Free in kernel use kernel malloc package, not cam - * dbufs (which is gross, and was exhausting cam zalloc limit) - * - * Revision 1.11 1995/05/01 13:28:00 holland - * parity range locks, locking disk requests, recon+parityscan in kernel, etc. - * - * Revision 1.10 1995/04/24 13:25:51 holland - * rewrite to move disk queues, recon, & atomic RMW to kernel - * - * Revision 1.9 1995/02/17 19:39:56 holland - * added size param to all calls to Free(). - * this is ignored at user level, but necessary in the kernel. - * - * Revision 1.8 1995/02/10 17:34:10 holland - * kernelization changes - * - * Revision 1.7 1995/02/03 22:31:36 holland - * many changes related to kernelization - * - * Revision 1.6 1995/02/01 15:13:05 holland - * moved #include of general.h out of raid.h and into each file - * - * Revision 1.5 1995/02/01 14:25:19 holland - * began changes for kernelization: - * changed all instances of mutex_t and cond_t to DECLARE macros - * converted configuration code to use config structure - * - * Revision 1.4 1995/01/11 19:27:02 holland - * many changes related to performance tuning - * - * Revision 1.3 1994/11/29 21:34:56 danner - * Changed type of redzone_calloc and malloc to void *. - * - * Revision 1.2 1994/11/28 22:13:23 danner - * Many macros converted to functions. - * */ #ifndef _RF__RF_DEBUGMEM_H_ @@ -142,13 +42,12 @@ #include "rf_alloclist.h" #include "rf_options.h" -#ifndef KERNEL +#ifndef _KERNEL #if !defined(__NetBSD__) && !defined(__OpenBSD__) -void *malloc(), *calloc(); +void *malloc(), *calloc(); #endif RF_DECLARE_EXTERN_MUTEX(rf_debug_mem_mutex) - /* * redzone malloc, calloc, and free allocate an extra 16 bytes on each * malloc/calloc call to allow tracking of overflows on free. @@ -158,11 +57,11 @@ RF_DECLARE_EXTERN_MUTEX(rf_debug_mem_mutex) #define rf_redzone_malloc(_p_,_size_) _p_ = rf_real_redzone_malloc(_size_) #define rf_redzone_calloc(_p_,_n_,_size_) _p_ = rf_real_redzone_calloc(_n_,_size_) #define rf_redzone_free(_p_) rf_real_redzone_free(_p_, __LINE__, __FILE__) -#else /* RF_MEMORY_REDZONES > 0 */ +#else /* RF_MEMORY_REDZONES > 0 */ #define rf_redzone_malloc(_p_,_size_) _p_ = malloc(_size_) #define rf_redzone_calloc(_p_,_nel_,_size_) _p_ = calloc(_nel_,_size_) #define rf_redzone_free(_ptr_) free(_ptr_) -#endif /* RF_MEMORY_REDZONES > 0 */ +#endif /* RF_MEMORY_REDZONES > 0 */ #define RF_Malloc(_p_, _size_, _cast_) { \ _p_ = _cast_ rf_real_Malloc(_size_, __LINE__, __FILE__); \ @@ -184,36 +83,21 @@ RF_DECLARE_EXTERN_MUTEX(rf_debug_mem_mutex) rf_real_Free(__p_, _sz_, __LINE__, __FILE__); \ } -#else /* KERNEL */ +#else /* KERNEL */ #include <sys/types.h> -#if defined(__NetBSD__) || defined(__OpenBSD__) typedef u_int32_t U32; -#else -#include <io/common/iotypes.h> /* just to get defn of U32 */ -#endif /* __NetBSD__ || __OpenBSD__ */ #include <sys/malloc.h> -#if defined(__NetBSD__) || defined(__OpenBSD__) #define RF_Malloc(_p_, _size_, _cast_) \ { \ - _p_ = _cast_ malloc((u_long)_size_, M_DEVBUF, M_WAITOK); \ + _p_ = _cast_ malloc((u_long)_size_, M_RAIDFRAME, M_WAITOK); \ bzero((char *)_p_, _size_); \ if (rf_memDebug) rf_record_malloc(_p_, _size_, __LINE__, __FILE__); \ } -#else - -#define RF_Malloc(_p_, _size_, _cast_) \ - { \ - _p_ = _cast_ malloc((u_long)_size_, BUCKETINDEX(_size_), M_DEVBUF, M_WAITOK); \ - bzero((char *)_p_, _size_); \ - if (rf_memDebug) rf_record_malloc(_p_, _size_, __LINE__, __FILE__); \ - } -#endif /* __NetBSD__ || __OpenBSD__ */ - #define RF_MallocAndAdd(__p_, __size_, __cast_, __alist_) \ { \ RF_Malloc(__p_, __size_, __cast_); \ @@ -234,30 +118,30 @@ typedef u_int32_t U32; #define RF_Free(_p_, _sz_) \ { \ - free((void *)(_p_), M_DEVBUF); \ + free((void *)(_p_), M_RAIDFRAME); \ if (rf_memDebug) rf_unrecord_malloc(_p_, (U32) (_sz_)); \ } -#endif /* KERNEL */ +#endif /* _KERNEL */ -#ifndef KERNEL -void *rf_real_redzone_malloc(int size); -void *rf_real_redzone_calloc(int n, int size); -void rf_real_redzone_free(char *p, int line, char *filen); -char *rf_real_Malloc(int size, int line, char *file); -char *rf_real_Calloc(int nel, int elsz, int line, char *file); -void rf_real_Free(void *p, int sz, int line, char *file); -void rf_validate_mh_table(void); +#ifndef _KERNEL +void *rf_real_redzone_malloc(int size); +void *rf_real_redzone_calloc(int n, int size); +void rf_real_redzone_free(char *p, int line, char *filen); +char *rf_real_Malloc(int size, int line, char *file); +char *rf_real_Calloc(int nel, int elsz, int line, char *file); +void rf_real_Free(void *p, int sz, int line, char *file); +void rf_validate_mh_table(void); #if RF_UTILITY == 0 -char *rf_real_MallocAndAdd(int size, RF_AllocListElem_t *alist, int line, char *file); -char *rf_real_CallocAndAdd(int nel, int elsz, RF_AllocListElem_t *alist, int line, char *file); -#endif /* RF_UTILITY == 0 */ -#endif /* !KERNEL */ - -void rf_record_malloc(void *p, int size, int line, char *filen); -void rf_unrecord_malloc(void *p, int sz); -void rf_print_unfreed(void); -int rf_ConfigureDebugMem(RF_ShutdownList_t **listp); -void rf_ReportMaxMem(void); - -#endif /* !_RF__RF_DEBUGMEM_H_ */ +char *rf_real_MallocAndAdd(int size, RF_AllocListElem_t * alist, int line, char *file); +char *rf_real_CallocAndAdd(int nel, int elsz, RF_AllocListElem_t * alist, int line, char *file); +#endif /* RF_UTILITY == 0 */ +#endif /* !KERNEL */ + +void rf_record_malloc(void *p, int size, int line, char *filen); +void rf_unrecord_malloc(void *p, int sz); +void rf_print_unfreed(void); +int rf_ConfigureDebugMem(RF_ShutdownList_t ** listp); +void rf_ReportMaxMem(void); + +#endif /* !_RF__RF_DEBUGMEM_H_ */ diff --git a/sys/dev/raidframe/rf_debugprint.c b/sys/dev/raidframe/rf_debugprint.c index 573d53ae71a..f6546c135f8 100644 --- a/sys/dev/raidframe/rf_debugprint.c +++ b/sys/dev/raidframe/rf_debugprint.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_debugprint.c,v 1.1 1999/01/11 14:29:13 niklas Exp $ */ -/* $NetBSD: rf_debugprint.c,v 1.1 1998/11/13 04:20:28 oster Exp $ */ +/* $OpenBSD: rf_debugprint.c,v 1.2 1999/02/16 00:02:34 niklas Exp $ */ +/* $NetBSD: rf_debugprint.c,v 1.3 1999/02/05 00:06:08 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,54 +27,13 @@ * rights to redistribute these changes. */ -/* +/* * Code to do debug printfs. Calls to rf_debug_printf cause the corresponding * information to be printed to a circular buffer rather than the screen. * The point is to try and minimize the timing variations induced by the * printfs, and to capture only the printf's immediately preceding a failure. */ -/* : - * Log: rf_debugprint.c,v - * Revision 1.13 1996/08/07 21:08:31 jimz - * remove bogus ; from mutex decl - * - * Revision 1.12 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.11 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.10 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.9 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.8 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.7 1996/05/20 16:16:06 jimz - * switch to rf_{mutex,cond}_{init,destroy} - * - * Revision 1.6 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.5 1995/12/01 16:00:45 root - * added copyright info - * - */ - #include "rf_types.h" #include "rf_threadstuff.h" #include "rf_debugprint.h" @@ -84,10 +43,9 @@ #include <sys/param.h> struct RF_Entry_s { - char *cstring; - void *a1, *a2, *a3, *a4, *a5, *a6, *a7, *a8; + char *cstring; + void *a1, *a2, *a3, *a4, *a5, *a6, *a7, *a8; }; - /* space for 1k lines */ #define BUFSHIFT 10 #define BUFSIZE (1<<BUFSHIFT) @@ -96,91 +54,81 @@ struct RF_Entry_s { static struct RF_Entry_s rf_debugprint_buf[BUFSIZE]; static int rf_debugprint_index = 0; RF_DECLARE_STATIC_MUTEX(rf_debug_print_mutex) - -int rf_ConfigureDebugPrint(listp) - RF_ShutdownList_t **listp; + int rf_ConfigureDebugPrint(listp) + RF_ShutdownList_t **listp; { - int rc; - - rc = rf_create_managed_mutex(listp, &rf_debug_print_mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return(rc); - } - rf_clear_debug_print_buffer(); - return(0); + int rc; + + rc = rf_create_managed_mutex(listp, &rf_debug_print_mutex); + if (rc) { + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + return (rc); + } + rf_clear_debug_print_buffer(); + return (0); } -void rf_clear_debug_print_buffer() +void +rf_clear_debug_print_buffer() { - int i; + int i; - for (i=0; i<BUFSIZE; i++) - rf_debugprint_buf[i].cstring = NULL; - rf_debugprint_index = 0; + for (i = 0; i < BUFSIZE; i++) + rf_debugprint_buf[i].cstring = NULL; + rf_debugprint_index = 0; } -void rf_debug_printf(s,a1,a2,a3,a4,a5,a6,a7,a8) -char *s; -void *a1,*a2,*a3,*a4,*a5,*a6,*a7,*a8; +void +rf_debug_printf(s, a1, a2, a3, a4, a5, a6, a7, a8) + char *s; + void *a1, *a2, *a3, *a4, *a5, *a6, *a7, *a8; { - int idx; - - if (rf_debugPrintUseBuffer) { - - RF_LOCK_MUTEX(rf_debug_print_mutex); - idx = rf_debugprint_index; - rf_debugprint_index = (rf_debugprint_index+1) & BUFMASK; - RF_UNLOCK_MUTEX(rf_debug_print_mutex); - - rf_debugprint_buf[idx].cstring = s; - rf_debugprint_buf[idx].a1 = a1; - rf_debugprint_buf[idx].a2 = a2; - rf_debugprint_buf[idx].a3 = a3; - rf_debugprint_buf[idx].a4 = a4; - rf_debugprint_buf[idx].a5 = a5; - rf_debugprint_buf[idx].a6 = a6; - rf_debugprint_buf[idx].a7 = a7; - rf_debugprint_buf[idx].a8 = a8; - } - else { - printf(s,a1,a2,a3,a4,a5,a6,a7,a8); - } + int idx; + + if (rf_debugPrintUseBuffer) { + + RF_LOCK_MUTEX(rf_debug_print_mutex); + idx = rf_debugprint_index; + rf_debugprint_index = (rf_debugprint_index + 1) & BUFMASK; + RF_UNLOCK_MUTEX(rf_debug_print_mutex); + + rf_debugprint_buf[idx].cstring = s; + rf_debugprint_buf[idx].a1 = a1; + rf_debugprint_buf[idx].a2 = a2; + rf_debugprint_buf[idx].a3 = a3; + rf_debugprint_buf[idx].a4 = a4; + rf_debugprint_buf[idx].a5 = a5; + rf_debugprint_buf[idx].a6 = a6; + rf_debugprint_buf[idx].a7 = a7; + rf_debugprint_buf[idx].a8 = a8; + } else { + printf(s, a1, a2, a3, a4, a5, a6, a7, a8); + } } -void rf_print_debug_buffer() +void +rf_print_debug_buffer() { - rf_spill_debug_buffer(NULL); + rf_spill_debug_buffer(NULL); } -void rf_spill_debug_buffer(fname) - char *fname; +void +rf_spill_debug_buffer(fname) + char *fname; { - int i; -#ifndef KERNEL - FILE *fp; -#endif /* !KERNEL */ - - if (!rf_debugPrintUseBuffer) - return; - - RF_LOCK_MUTEX(rf_debug_print_mutex); -#ifndef KERNEL - fp = (fname) ? fopen(fname,"w") : stdout; - if (!fp) {printf("Unable to open file %s for writing\n",fname); return;} - for (i=rf_debugprint_index+1; i != rf_debugprint_index; i = (i+1)&BUFMASK) if (rf_debugprint_buf[i].cstring) - fprintf(fp,rf_debugprint_buf[i].cstring,rf_debugprint_buf[i].a1,rf_debugprint_buf[i].a2,rf_debugprint_buf[i].a3, - rf_debugprint_buf[i].a4,rf_debugprint_buf[i].a5,rf_debugprint_buf[i].a6,rf_debugprint_buf[i].a7,rf_debugprint_buf[i].a8); - fprintf(fp,rf_debugprint_buf[i].cstring,rf_debugprint_buf[i].a1,rf_debugprint_buf[i].a2,rf_debugprint_buf[i].a3, - rf_debugprint_buf[i].a4,rf_debugprint_buf[i].a5,rf_debugprint_buf[i].a6,rf_debugprint_buf[i].a7,rf_debugprint_buf[i].a8); - fclose(fp); -#else /* !KERNEL */ - for (i=rf_debugprint_index+1; i != rf_debugprint_index; i = (i+1)&BUFMASK) if (rf_debugprint_buf[i].cstring) - printf(rf_debugprint_buf[i].cstring,rf_debugprint_buf[i].a1,rf_debugprint_buf[i].a2,rf_debugprint_buf[i].a3, - rf_debugprint_buf[i].a4,rf_debugprint_buf[i].a5,rf_debugprint_buf[i].a6,rf_debugprint_buf[i].a7,rf_debugprint_buf[i].a8); - printf(rf_debugprint_buf[i].cstring,rf_debugprint_buf[i].a1,rf_debugprint_buf[i].a2,rf_debugprint_buf[i].a3, - rf_debugprint_buf[i].a4,rf_debugprint_buf[i].a5,rf_debugprint_buf[i].a6,rf_debugprint_buf[i].a7,rf_debugprint_buf[i].a8); -#endif /* !KERNEL */ - RF_UNLOCK_MUTEX(rf_debug_print_mutex); + int i; + + if (!rf_debugPrintUseBuffer) + return; + + RF_LOCK_MUTEX(rf_debug_print_mutex); + + for (i = rf_debugprint_index + 1; i != rf_debugprint_index; i = (i + 1) & BUFMASK) + if (rf_debugprint_buf[i].cstring) + printf(rf_debugprint_buf[i].cstring, rf_debugprint_buf[i].a1, rf_debugprint_buf[i].a2, rf_debugprint_buf[i].a3, + rf_debugprint_buf[i].a4, rf_debugprint_buf[i].a5, rf_debugprint_buf[i].a6, rf_debugprint_buf[i].a7, rf_debugprint_buf[i].a8); + printf(rf_debugprint_buf[i].cstring, rf_debugprint_buf[i].a1, rf_debugprint_buf[i].a2, rf_debugprint_buf[i].a3, + rf_debugprint_buf[i].a4, rf_debugprint_buf[i].a5, rf_debugprint_buf[i].a6, rf_debugprint_buf[i].a7, rf_debugprint_buf[i].a8); + RF_UNLOCK_MUTEX(rf_debug_print_mutex); } diff --git a/sys/dev/raidframe/rf_debugprint.h b/sys/dev/raidframe/rf_debugprint.h index 6810fd0a6ee..a0d1168f863 100644 --- a/sys/dev/raidframe/rf_debugprint.h +++ b/sys/dev/raidframe/rf_debugprint.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_debugprint.h,v 1.1 1999/01/11 14:29:13 niklas Exp $ */ -/* $NetBSD: rf_debugprint.h,v 1.1 1998/11/13 04:20:28 oster Exp $ */ +/* $OpenBSD: rf_debugprint.h,v 1.2 1999/02/16 00:02:34 niklas Exp $ */ +/* $NetBSD: rf_debugprint.h,v 1.3 1999/02/05 00:06:08 oster Exp $ */ /* * rf_debugprint.h */ @@ -29,36 +29,16 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ -/* - * : - * Log: rf_debugprint.h,v - * Revision 1.4 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.3 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.2 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.1 1996/05/18 19:55:43 jimz - * Initial revision - * - */ #ifndef _RF__RF_DEBUGPRINT_H_ #define _RF__RF_DEBUGPRINT_H_ -int rf_ConfigureDebugPrint(RF_ShutdownList_t **listp); -void rf_clear_debug_print_buffer(void); -void rf_debug_printf(char *s, void *a1, void *a2, void *a3, void *a4, - void *a5, void *a6, void *a7, void *a8); -void rf_print_debug_buffer(void); -void rf_spill_debug_buffer(char *fname); +int rf_ConfigureDebugPrint(RF_ShutdownList_t ** listp); +void rf_clear_debug_print_buffer(void); +void +rf_debug_printf(char *s, void *a1, void *a2, void *a3, void *a4, + void *a5, void *a6, void *a7, void *a8); +void rf_print_debug_buffer(void); +void rf_spill_debug_buffer(char *fname); -#endif /* !_RF__RF_DEBUGPRINT_H_ */ +#endif /* !_RF__RF_DEBUGPRINT_H_ */ diff --git a/sys/dev/raidframe/rf_decluster.c b/sys/dev/raidframe/rf_decluster.c index 11cff33143a..73036456760 100644 --- a/sys/dev/raidframe/rf_decluster.c +++ b/sys/dev/raidframe/rf_decluster.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_decluster.c,v 1.1 1999/01/11 14:29:14 niklas Exp $ */ -/* $NetBSD: rf_decluster.c,v 1.1 1998/11/13 04:20:28 oster Exp $ */ +/* $OpenBSD: rf_decluster.c,v 1.2 1999/02/16 00:02:34 niklas Exp $ */ +/* $NetBSD: rf_decluster.c,v 1.3 1999/02/05 00:06:08 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -48,139 +48,6 @@ * *--------------------------------------------------------------------*/ -/* - * : - * Log: rf_decluster.c,v - * Revision 1.51 1996/08/21 19:47:10 jimz - * fix bogus return values from config - * - * Revision 1.50 1996/08/20 22:41:42 jimz - * better diagnostics for bad blockdesigns - * - * Revision 1.49 1996/07/31 16:56:18 jimz - * dataBytesPerStripe, sectorsPerDisk init arch-indep. - * - * Revision 1.48 1996/07/29 14:05:12 jimz - * fix numPUs/numRUs confusion (everything is now numRUs) - * clean up some commenting, return values - * - * Revision 1.47 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.46 1996/07/27 18:40:11 jimz - * cleanup sweep - * - * Revision 1.45 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.44 1996/07/13 00:00:59 jimz - * sanitized generalized reconstruction architecture - * cleaned up head sep, rbuf problems - * - * Revision 1.43 1996/06/19 17:53:48 jimz - * move GetNumSparePUs, InstallSpareTable ops into layout switch - * - * Revision 1.42 1996/06/17 03:23:48 jimz - * switch DeclusteredDS typing - * - * Revision 1.41 1996/06/11 08:55:15 jimz - * improved error-checking at configuration time - * - * Revision 1.40 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.39 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.38 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.37 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.36 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.35 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.34 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.33 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.32 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.31 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.30 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.29 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.28 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.27 1995/12/01 16:00:08 root - * added copyright info - * - * Revision 1.26 1995/11/28 21:35:12 amiri - * set the RF_BD_DECLUSTERED flag - * - * Revision 1.25 1995/11/17 18:56:00 wvcii - * added prototyping to MapParity - * - * Revision 1.24 1995/07/04 22:25:33 holland - * increased default num bufs - * - * Revision 1.23 1995/07/03 20:23:51 holland - * changed floating recon bufs & head sep yet again - * - * Revision 1.22 1995/07/03 18:12:14 holland - * changed the way the number of floating recon bufs & the head sep - * limit are set - * - * Revision 1.21 1995/07/02 15:07:42 holland - * bug fixes related to getting distributed sparing numbers - * - * Revision 1.20 1995/06/23 13:41:28 robby - * updeated to prototypes in rf_layout.h - * - */ - -#ifdef _KERNEL -#define KERNEL -#endif - - #include "rf_types.h" #include "rf_raid.h" #include "rf_raidframe.h" @@ -193,413 +60,441 @@ #include "rf_shutdown.h" #include "rf_sys.h" -extern int rf_copyback_in_progress; /* debug only */ +extern int rf_copyback_in_progress; /* debug only */ /* found in rf_kintf.c */ -int rf_GetSpareTableFromDaemon(RF_SparetWait_t *req); +int rf_GetSpareTableFromDaemon(RF_SparetWait_t * req); /* configuration code */ -int rf_ConfigureDeclustered( - RF_ShutdownList_t **listp, - RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr) +int +rf_ConfigureDeclustered( + RF_ShutdownList_t ** listp, + RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr) { - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - int b, v, k, r, lambda; /* block design params */ - int i, j; - RF_RowCol_t *first_avail_slot; - RF_StripeCount_t complete_FT_count, numCompleteFullTablesPerDisk; - RF_DeclusteredConfigInfo_t *info; - RF_StripeCount_t PUsPerDisk, spareRegionDepthInPUs, numCompleteSpareRegionsPerDisk, extraPUsPerDisk; - RF_StripeCount_t totSparePUsPerDisk; - RF_SectorNum_t diskOffsetOfLastFullTableInSUs; - RF_SectorCount_t SpareSpaceInSUs; - char *cfgBuf = (char *) (cfgPtr->layoutSpecific); - RF_StripeNum_t l, SUID; - - SUID = l = 0; - numCompleteSpareRegionsPerDisk = 0; - - /* 1. create layout specific structure */ - RF_MallocAndAdd(info, sizeof(RF_DeclusteredConfigInfo_t), (RF_DeclusteredConfigInfo_t *), raidPtr->cleanupList); - if (info == NULL) - return(ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - info->SpareTable = NULL; - - /* 2. extract parameters from the config structure */ - if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) { - (void) bcopy(cfgBuf, info->sparemap_fname, RF_SPAREMAP_NAME_LEN); - } - cfgBuf += RF_SPAREMAP_NAME_LEN; - - b = *( (int *) cfgBuf); cfgBuf += sizeof(int); - v = *( (int *) cfgBuf); cfgBuf += sizeof(int); - k = *( (int *) cfgBuf); cfgBuf += sizeof(int); - r = *( (int *) cfgBuf); cfgBuf += sizeof(int); - lambda = *( (int *) cfgBuf); cfgBuf += sizeof(int); - raidPtr->noRotate = *( (int *) cfgBuf); cfgBuf += sizeof(int); - - /* the sparemaps are generated assuming that parity is rotated, so we issue - * a warning if both distributed sparing and no-rotate are on at the same time - */ - if ((layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) && raidPtr->noRotate) { - RF_ERRORMSG("Warning: distributed sparing specified without parity rotation.\n"); - } - - if (raidPtr->numCol != v) { - RF_ERRORMSG2("RAID: config error: table element count (%d) not equal to no. of cols (%d)\n", v, raidPtr->numCol); - return(EINVAL); - } - - /* 3. set up the values used in the mapping code */ - info->BlocksPerTable = b; - info->Lambda = lambda; - info->NumParityReps = info->groupSize = k; - info->SUsPerTable = b * (k-1) * layoutPtr->SUsPerPU;/* b blks, k-1 SUs each */ - info->SUsPerFullTable = k * info->SUsPerTable; /* rot k times */ - info->PUsPerBlock = k-1; - info->SUsPerBlock = info->PUsPerBlock * layoutPtr->SUsPerPU; - info->TableDepthInPUs = (b*k) / v; - info->FullTableDepthInPUs = info->TableDepthInPUs * k; /* k repetitions */ - - /* used only in distributed sparing case */ - info->FullTablesPerSpareRegion = (v-1) / rf_gcd(r, v-1); /* (v-1)/gcd fulltables */ - info->TablesPerSpareRegion = k * info->FullTablesPerSpareRegion; - info->SpareSpaceDepthPerRegionInSUs = (r * info->TablesPerSpareRegion / (v-1)) * layoutPtr->SUsPerPU; - - /* check to make sure the block design is sufficiently small */ - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - if (info->FullTableDepthInPUs * layoutPtr->SUsPerPU + info->SpareSpaceDepthPerRegionInSUs > layoutPtr->stripeUnitsPerDisk) { - RF_ERRORMSG3("RAID: config error: Full Table depth (%d) + Spare Space (%d) larger than disk size (%d) (BD too big)\n", - (int)info->FullTableDepthInPUs, - (int)info->SpareSpaceDepthPerRegionInSUs, - (int)layoutPtr->stripeUnitsPerDisk); - return(EINVAL); + RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); + int b, v, k, r, lambda; /* block design params */ + int i, j; + RF_RowCol_t *first_avail_slot; + RF_StripeCount_t complete_FT_count, numCompleteFullTablesPerDisk; + RF_DeclusteredConfigInfo_t *info; + RF_StripeCount_t PUsPerDisk, spareRegionDepthInPUs, numCompleteSpareRegionsPerDisk, + extraPUsPerDisk; + RF_StripeCount_t totSparePUsPerDisk; + RF_SectorNum_t diskOffsetOfLastFullTableInSUs; + RF_SectorCount_t SpareSpaceInSUs; + char *cfgBuf = (char *) (cfgPtr->layoutSpecific); + RF_StripeNum_t l, SUID; + + SUID = l = 0; + numCompleteSpareRegionsPerDisk = 0; + + /* 1. create layout specific structure */ + RF_MallocAndAdd(info, sizeof(RF_DeclusteredConfigInfo_t), (RF_DeclusteredConfigInfo_t *), raidPtr->cleanupList); + if (info == NULL) + return (ENOMEM); + layoutPtr->layoutSpecificInfo = (void *) info; + info->SpareTable = NULL; + + /* 2. extract parameters from the config structure */ + if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) { + (void) bcopy(cfgBuf, info->sparemap_fname, RF_SPAREMAP_NAME_LEN); } - } else { - if (info->TableDepthInPUs * layoutPtr->SUsPerPU > layoutPtr->stripeUnitsPerDisk) { - RF_ERRORMSG2("RAID: config error: Table depth (%d) larger than disk size (%d) (BD too big)\n", - (int)(info->TableDepthInPUs * layoutPtr->SUsPerPU), \ - (int)layoutPtr->stripeUnitsPerDisk); - return(EINVAL); + cfgBuf += RF_SPAREMAP_NAME_LEN; + + b = *((int *) cfgBuf); + cfgBuf += sizeof(int); + v = *((int *) cfgBuf); + cfgBuf += sizeof(int); + k = *((int *) cfgBuf); + cfgBuf += sizeof(int); + r = *((int *) cfgBuf); + cfgBuf += sizeof(int); + lambda = *((int *) cfgBuf); + cfgBuf += sizeof(int); + raidPtr->noRotate = *((int *) cfgBuf); + cfgBuf += sizeof(int); + + /* the sparemaps are generated assuming that parity is rotated, so we + * issue a warning if both distributed sparing and no-rotate are on at + * the same time */ + if ((layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) && raidPtr->noRotate) { + RF_ERRORMSG("Warning: distributed sparing specified without parity rotation.\n"); } - } - - - /* compute the size of each disk, and the number of tables in the last fulltable (which - * need not be complete) - */ - if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { - - PUsPerDisk = layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU; - spareRegionDepthInPUs = (info->TablesPerSpareRegion * info->TableDepthInPUs + - (info->TablesPerSpareRegion * info->TableDepthInPUs) / (v-1)); - info->SpareRegionDepthInSUs = spareRegionDepthInPUs * layoutPtr->SUsPerPU; - - numCompleteSpareRegionsPerDisk = PUsPerDisk / spareRegionDepthInPUs; - info->NumCompleteSRs = numCompleteSpareRegionsPerDisk; - extraPUsPerDisk = PUsPerDisk % spareRegionDepthInPUs; - - /* assume conservatively that we need the full amount of spare space in one region in order - * to provide spares for the partial spare region at the end of the array. We set "i" to - * the number of tables in the partial spare region. This may actually include some fulltables. - */ - extraPUsPerDisk -= (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU); - if (extraPUsPerDisk <= 0) i = 0; - else i = extraPUsPerDisk/info->TableDepthInPUs; - - complete_FT_count = raidPtr->numRow * (numCompleteSpareRegionsPerDisk * (info->TablesPerSpareRegion/k) + i/k); - info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable; - info->ExtraTablesPerDisk = i % k; - - /* note that in the last spare region, the spare space is complete even though data/parity space is not */ - totSparePUsPerDisk = (numCompleteSpareRegionsPerDisk+1) * (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU); - info->TotSparePUsPerDisk = totSparePUsPerDisk; - - layoutPtr->stripeUnitsPerDisk = - ((complete_FT_count/raidPtr->numRow) * info->FullTableDepthInPUs + /* data & parity space */ - info->ExtraTablesPerDisk * info->TableDepthInPUs + - totSparePUsPerDisk /* spare space */ - ) * layoutPtr->SUsPerPU; - layoutPtr->dataStripeUnitsPerDisk = - (complete_FT_count * info->FullTableDepthInPUs + info->ExtraTablesPerDisk * info->TableDepthInPUs) - * layoutPtr->SUsPerPU * (k-1) / k; - - } else { - /* non-dist spare case: force each disk to contain an integral number of tables */ - layoutPtr->stripeUnitsPerDisk /= (info->TableDepthInPUs * layoutPtr->SUsPerPU); - layoutPtr->stripeUnitsPerDisk *= (info->TableDepthInPUs * layoutPtr->SUsPerPU); - - /* compute the number of tables in the last fulltable, which need not be complete */ - complete_FT_count = - ((layoutPtr->stripeUnitsPerDisk/layoutPtr->SUsPerPU) / info->FullTableDepthInPUs) * raidPtr->numRow; - - info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable; - info->ExtraTablesPerDisk = - ((layoutPtr->stripeUnitsPerDisk/layoutPtr->SUsPerPU) / info->TableDepthInPUs) % k; - } - - raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; - - /* find the disk offset of the stripe unit where the last fulltable starts */ - numCompleteFullTablesPerDisk = complete_FT_count / raidPtr->numRow; - diskOffsetOfLastFullTableInSUs = numCompleteFullTablesPerDisk * info->FullTableDepthInPUs * layoutPtr->SUsPerPU; - if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { - SpareSpaceInSUs = numCompleteSpareRegionsPerDisk * info->SpareSpaceDepthPerRegionInSUs; - diskOffsetOfLastFullTableInSUs += SpareSpaceInSUs; - info->DiskOffsetOfLastSpareSpaceChunkInSUs = - diskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU; - } - info->DiskOffsetOfLastFullTableInSUs = diskOffsetOfLastFullTableInSUs; - info->numCompleteFullTablesPerDisk = numCompleteFullTablesPerDisk; - - /* 4. create and initialize the lookup tables */ - info->LayoutTable = rf_make_2d_array(b, k, raidPtr->cleanupList); - if (info->LayoutTable == NULL) - return(ENOMEM); - info->OffsetTable = rf_make_2d_array(b, k, raidPtr->cleanupList); - if (info->OffsetTable == NULL) - return(ENOMEM); - info->BlockTable = rf_make_2d_array(info->TableDepthInPUs*layoutPtr->SUsPerPU, raidPtr->numCol, raidPtr->cleanupList); - if (info->BlockTable == NULL) - return(ENOMEM); - - first_avail_slot = rf_make_1d_array(v, NULL); - if (first_avail_slot == NULL) - return(ENOMEM); - - for (i=0; i<b; i++) - for (j=0; j<k; j++) - info->LayoutTable[i][j] = *cfgBuf++; - - /* initialize offset table */ - for (i=0; i<b; i++) for (j=0; j<k; j++) { - info->OffsetTable[i][j] = first_avail_slot[ info->LayoutTable[i][j] ]; - first_avail_slot[ info->LayoutTable[i][j] ]++; - } - - /* initialize block table */ - for (SUID=l=0; l<layoutPtr->SUsPerPU; l++) { - for (i=0; i<b; i++) { - for (j=0; j<k; j++) { - info->BlockTable[ (info->OffsetTable[i][j] * layoutPtr->SUsPerPU) + l ] - [ info->LayoutTable[i][j] ] = SUID; - } - SUID++; - } - } - - rf_free_1d_array(first_avail_slot, v); - - /* 5. set up the remaining redundant-but-useful parameters */ - - raidPtr->totalSectors = (k*complete_FT_count + raidPtr->numRow*info->ExtraTablesPerDisk) * - info->SUsPerTable * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numStripe = (raidPtr->totalSectors / layoutPtr->sectorsPerStripeUnit) / (k-1); - - /* strange evaluation order below to try and minimize overflow problems */ - - layoutPtr->dataSectorsPerStripe = (k-1) * layoutPtr->sectorsPerStripeUnit; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = k-1; - layoutPtr->numParityCol = 1; - - return(0); -} + if (raidPtr->numCol != v) { + RF_ERRORMSG2("RAID: config error: table element count (%d) not equal to no. of cols (%d)\n", v, raidPtr->numCol); + return (EINVAL); + } + /* 3. set up the values used in the mapping code */ + info->BlocksPerTable = b; + info->Lambda = lambda; + info->NumParityReps = info->groupSize = k; + info->SUsPerTable = b * (k - 1) * layoutPtr->SUsPerPU; /* b blks, k-1 SUs each */ + info->SUsPerFullTable = k * info->SUsPerTable; /* rot k times */ + info->PUsPerBlock = k - 1; + info->SUsPerBlock = info->PUsPerBlock * layoutPtr->SUsPerPU; + info->TableDepthInPUs = (b * k) / v; + info->FullTableDepthInPUs = info->TableDepthInPUs * k; /* k repetitions */ + + /* used only in distributed sparing case */ + info->FullTablesPerSpareRegion = (v - 1) / rf_gcd(r, v - 1); /* (v-1)/gcd fulltables */ + info->TablesPerSpareRegion = k * info->FullTablesPerSpareRegion; + info->SpareSpaceDepthPerRegionInSUs = (r * info->TablesPerSpareRegion / (v - 1)) * layoutPtr->SUsPerPU; + + /* check to make sure the block design is sufficiently small */ + if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { + if (info->FullTableDepthInPUs * layoutPtr->SUsPerPU + info->SpareSpaceDepthPerRegionInSUs > layoutPtr->stripeUnitsPerDisk) { + RF_ERRORMSG3("RAID: config error: Full Table depth (%d) + Spare Space (%d) larger than disk size (%d) (BD too big)\n", + (int) info->FullTableDepthInPUs, + (int) info->SpareSpaceDepthPerRegionInSUs, + (int) layoutPtr->stripeUnitsPerDisk); + return (EINVAL); + } + } else { + if (info->TableDepthInPUs * layoutPtr->SUsPerPU > layoutPtr->stripeUnitsPerDisk) { + RF_ERRORMSG2("RAID: config error: Table depth (%d) larger than disk size (%d) (BD too big)\n", + (int) (info->TableDepthInPUs * layoutPtr->SUsPerPU), \ + (int) layoutPtr->stripeUnitsPerDisk); + return (EINVAL); + } + } + + + /* compute the size of each disk, and the number of tables in the last + * fulltable (which need not be complete) */ + if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { + + PUsPerDisk = layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU; + spareRegionDepthInPUs = (info->TablesPerSpareRegion * info->TableDepthInPUs + + (info->TablesPerSpareRegion * info->TableDepthInPUs) / (v - 1)); + info->SpareRegionDepthInSUs = spareRegionDepthInPUs * layoutPtr->SUsPerPU; + + numCompleteSpareRegionsPerDisk = PUsPerDisk / spareRegionDepthInPUs; + info->NumCompleteSRs = numCompleteSpareRegionsPerDisk; + extraPUsPerDisk = PUsPerDisk % spareRegionDepthInPUs; + + /* assume conservatively that we need the full amount of spare + * space in one region in order to provide spares for the + * partial spare region at the end of the array. We set "i" + * to the number of tables in the partial spare region. This + * may actually include some fulltables. */ + extraPUsPerDisk -= (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU); + if (extraPUsPerDisk <= 0) + i = 0; + else + i = extraPUsPerDisk / info->TableDepthInPUs; + + complete_FT_count = raidPtr->numRow * (numCompleteSpareRegionsPerDisk * (info->TablesPerSpareRegion / k) + i / k); + info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable; + info->ExtraTablesPerDisk = i % k; + + /* note that in the last spare region, the spare space is + * complete even though data/parity space is not */ + totSparePUsPerDisk = (numCompleteSpareRegionsPerDisk + 1) * (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU); + info->TotSparePUsPerDisk = totSparePUsPerDisk; + + layoutPtr->stripeUnitsPerDisk = + ((complete_FT_count / raidPtr->numRow) * info->FullTableDepthInPUs + /* data & parity space */ + info->ExtraTablesPerDisk * info->TableDepthInPUs + + totSparePUsPerDisk /* spare space */ + ) * layoutPtr->SUsPerPU; + layoutPtr->dataStripeUnitsPerDisk = + (complete_FT_count * info->FullTableDepthInPUs + info->ExtraTablesPerDisk * info->TableDepthInPUs) + * layoutPtr->SUsPerPU * (k - 1) / k; + + } else { + /* non-dist spare case: force each disk to contain an + * integral number of tables */ + layoutPtr->stripeUnitsPerDisk /= (info->TableDepthInPUs * layoutPtr->SUsPerPU); + layoutPtr->stripeUnitsPerDisk *= (info->TableDepthInPUs * layoutPtr->SUsPerPU); + + /* compute the number of tables in the last fulltable, which + * need not be complete */ + complete_FT_count = + ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) / info->FullTableDepthInPUs) * raidPtr->numRow; + + info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable; + info->ExtraTablesPerDisk = + ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) / info->TableDepthInPUs) % k; + } + + raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; + + /* find the disk offset of the stripe unit where the last fulltable + * starts */ + numCompleteFullTablesPerDisk = complete_FT_count / raidPtr->numRow; + diskOffsetOfLastFullTableInSUs = numCompleteFullTablesPerDisk * info->FullTableDepthInPUs * layoutPtr->SUsPerPU; + if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { + SpareSpaceInSUs = numCompleteSpareRegionsPerDisk * info->SpareSpaceDepthPerRegionInSUs; + diskOffsetOfLastFullTableInSUs += SpareSpaceInSUs; + info->DiskOffsetOfLastSpareSpaceChunkInSUs = + diskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU; + } + info->DiskOffsetOfLastFullTableInSUs = diskOffsetOfLastFullTableInSUs; + info->numCompleteFullTablesPerDisk = numCompleteFullTablesPerDisk; + + /* 4. create and initialize the lookup tables */ + info->LayoutTable = rf_make_2d_array(b, k, raidPtr->cleanupList); + if (info->LayoutTable == NULL) + return (ENOMEM); + info->OffsetTable = rf_make_2d_array(b, k, raidPtr->cleanupList); + if (info->OffsetTable == NULL) + return (ENOMEM); + info->BlockTable = rf_make_2d_array(info->TableDepthInPUs * layoutPtr->SUsPerPU, raidPtr->numCol, raidPtr->cleanupList); + if (info->BlockTable == NULL) + return (ENOMEM); + + first_avail_slot = rf_make_1d_array(v, NULL); + if (first_avail_slot == NULL) + return (ENOMEM); + + for (i = 0; i < b; i++) + for (j = 0; j < k; j++) + info->LayoutTable[i][j] = *cfgBuf++; + + /* initialize offset table */ + for (i = 0; i < b; i++) + for (j = 0; j < k; j++) { + info->OffsetTable[i][j] = first_avail_slot[info->LayoutTable[i][j]]; + first_avail_slot[info->LayoutTable[i][j]]++; + } + + /* initialize block table */ + for (SUID = l = 0; l < layoutPtr->SUsPerPU; l++) { + for (i = 0; i < b; i++) { + for (j = 0; j < k; j++) { + info->BlockTable[(info->OffsetTable[i][j] * layoutPtr->SUsPerPU) + l] + [info->LayoutTable[i][j]] = SUID; + } + SUID++; + } + } + + rf_free_1d_array(first_avail_slot, v); + + /* 5. set up the remaining redundant-but-useful parameters */ + raidPtr->totalSectors = (k * complete_FT_count + raidPtr->numRow * info->ExtraTablesPerDisk) * + info->SUsPerTable * layoutPtr->sectorsPerStripeUnit; + layoutPtr->numStripe = (raidPtr->totalSectors / layoutPtr->sectorsPerStripeUnit) / (k - 1); + + /* strange evaluation order below to try and minimize overflow + * problems */ + + layoutPtr->dataSectorsPerStripe = (k - 1) * layoutPtr->sectorsPerStripeUnit; + layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; + layoutPtr->numDataCol = k - 1; + layoutPtr->numParityCol = 1; + + return (0); +} /* declustering with distributed sparing */ static void rf_ShutdownDeclusteredDS(RF_ThreadArg_t); -static void rf_ShutdownDeclusteredDS(arg) - RF_ThreadArg_t arg; +static void +rf_ShutdownDeclusteredDS(arg) + RF_ThreadArg_t arg; { - RF_DeclusteredConfigInfo_t *info; - RF_Raid_t *raidPtr; + RF_DeclusteredConfigInfo_t *info; + RF_Raid_t *raidPtr; - raidPtr = (RF_Raid_t *)arg; - info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - if (info->SpareTable) - rf_FreeSpareTable(raidPtr); + raidPtr = (RF_Raid_t *) arg; + info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + if (info->SpareTable) + rf_FreeSpareTable(raidPtr); } -int rf_ConfigureDeclusteredDS( - RF_ShutdownList_t **listp, - RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr) +int +rf_ConfigureDeclusteredDS( + RF_ShutdownList_t ** listp, + RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr) { - int rc; - - rc = rf_ConfigureDeclustered(listp, raidPtr, cfgPtr); - if (rc) - return(rc); - rc = rf_ShutdownCreate(listp, rf_ShutdownDeclusteredDS, raidPtr); - if (rc) { - RF_ERRORMSG1("Got %d adding shutdown event for DeclusteredDS\n", rc); - rf_ShutdownDeclusteredDS(raidPtr); - return(rc); - } - return(0); + int rc; + + rc = rf_ConfigureDeclustered(listp, raidPtr, cfgPtr); + if (rc) + return (rc); + rc = rf_ShutdownCreate(listp, rf_ShutdownDeclusteredDS, raidPtr); + if (rc) { + RF_ERRORMSG1("Got %d adding shutdown event for DeclusteredDS\n", rc); + rf_ShutdownDeclusteredDS(raidPtr); + return (rc); + } + return (0); } -void rf_MapSectorDeclustered(raidPtr, raidSector, row, col, diskSector, remap) - RF_Raid_t *raidPtr; - RF_RaidAddr_t raidSector; - RF_RowCol_t *row; - RF_RowCol_t *col; - RF_SectorNum_t *diskSector; - int remap; +void +rf_MapSectorDeclustered(raidPtr, raidSector, row, col, diskSector, remap) + RF_Raid_t *raidPtr; + RF_RaidAddr_t raidSector; + RF_RowCol_t *row; + RF_RowCol_t *col; + RF_SectorNum_t *diskSector; + int remap; { - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit; - RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset; - RF_StripeNum_t BlockID, BlockOffset, RepIndex; - RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; - RF_StripeNum_t base_suid = 0, outSU, SpareRegion=0, SpareSpace=0; - - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); - - FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array (across rows) */ - if (raidPtr->numRow == 1) *row = 0; /* avoid a mod and a div in the common case */ - else { - *row = FullTableID % raidPtr->numRow; - FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this disk */ - } - if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { - SpareRegion = FullTableID / info->FullTablesPerSpareRegion; - SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs; - } - FullTableOffset = SUID % sus_per_fulltable; - TableID = FullTableOffset / info->SUsPerTable; - TableOffset = FullTableOffset - TableID * info->SUsPerTable; - BlockID = TableOffset / info->PUsPerBlock; - BlockOffset = TableOffset - BlockID * info->PUsPerBlock; - BlockID %= info->BlocksPerTable; - RepIndex = info->PUsPerBlock - TableID; - if (!raidPtr->noRotate) BlockOffset += ((BlockOffset >= RepIndex) ? 1 : 0); - *col = info->LayoutTable[BlockID][BlockOffset]; - - /* remap to distributed spare space if indicated */ - if (remap) { - RF_ASSERT( raidPtr->Disks[*row][*col].status == rf_ds_reconstructing || raidPtr->Disks[*row][*col].status == rf_ds_dist_spared || - (rf_copyback_in_progress && raidPtr->Disks[*row][*col].status == rf_ds_optimal)); - rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU); - } else { - - outSU = base_suid; - outSU += FullTableID * fulltable_depth; /* offs to strt of FT */ - outSU += SpareSpace; /* skip rsvd spare space */ - outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; /* offs to strt of tble */ - outSU += info->OffsetTable[BlockID][BlockOffset] * layoutPtr->SUsPerPU; /* offs to the PU */ - } - outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); /* offs to the SU within a PU */ - - /* convert SUs to sectors, and, if not aligned to SU boundary, add in offset to sector. */ - *diskSector = outSU*layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit); - - RF_ASSERT( *col != -1 ); + RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); + RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; + RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit; + RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset; + RF_StripeNum_t BlockID, BlockOffset, RepIndex; + RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; + RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; + RF_StripeNum_t base_suid = 0, outSU, SpareRegion = 0, SpareSpace = 0; + + rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); + + FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array + * (across rows) */ + if (raidPtr->numRow == 1) + *row = 0; /* avoid a mod and a div in the common case */ + else { + *row = FullTableID % raidPtr->numRow; + FullTableID /= raidPtr->numRow; /* convert to fulltable ID on + * this disk */ + } + if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { + SpareRegion = FullTableID / info->FullTablesPerSpareRegion; + SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs; + } + FullTableOffset = SUID % sus_per_fulltable; + TableID = FullTableOffset / info->SUsPerTable; + TableOffset = FullTableOffset - TableID * info->SUsPerTable; + BlockID = TableOffset / info->PUsPerBlock; + BlockOffset = TableOffset - BlockID * info->PUsPerBlock; + BlockID %= info->BlocksPerTable; + RepIndex = info->PUsPerBlock - TableID; + if (!raidPtr->noRotate) + BlockOffset += ((BlockOffset >= RepIndex) ? 1 : 0); + *col = info->LayoutTable[BlockID][BlockOffset]; + + /* remap to distributed spare space if indicated */ + if (remap) { + RF_ASSERT(raidPtr->Disks[*row][*col].status == rf_ds_reconstructing || raidPtr->Disks[*row][*col].status == rf_ds_dist_spared || + (rf_copyback_in_progress && raidPtr->Disks[*row][*col].status == rf_ds_optimal)); + rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU); + } else { + + outSU = base_suid; + outSU += FullTableID * fulltable_depth; /* offs to strt of FT */ + outSU += SpareSpace; /* skip rsvd spare space */ + outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; /* offs to strt of tble */ + outSU += info->OffsetTable[BlockID][BlockOffset] * layoutPtr->SUsPerPU; /* offs to the PU */ + } + outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); /* offs to the SU within + * a PU */ + + /* convert SUs to sectors, and, if not aligned to SU boundary, add in + * offset to sector. */ + *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit); + + RF_ASSERT(*col != -1); } /* prototyping this inexplicably causes the compile of the layout table (rf_layout.c) to fail */ -void rf_MapParityDeclustered( - RF_Raid_t *raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t *row, - RF_RowCol_t *col, - RF_SectorNum_t *diskSector, - int remap) +void +rf_MapParityDeclustered( + RF_Raid_t * raidPtr, + RF_RaidAddr_t raidSector, + RF_RowCol_t * row, + RF_RowCol_t * col, + RF_SectorNum_t * diskSector, + int remap) { - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit; - RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset; - RF_StripeNum_t BlockID, BlockOffset, RepIndex; - RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; - RF_StripeNum_t base_suid = 0, outSU, SpareRegion=0, SpareSpace=0; - - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); - - /* compute row & (possibly) spare space exactly as before */ - FullTableID = SUID / sus_per_fulltable; - if (raidPtr->numRow == 1) *row = 0; /* avoid a mod and a div in the common case */ - else { - *row = FullTableID % raidPtr->numRow; - FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this disk */ - } - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - SpareRegion = FullTableID / info->FullTablesPerSpareRegion; - SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs; - } - - /* compute BlockID and RepIndex exactly as before */ - FullTableOffset = SUID % sus_per_fulltable; - TableID = FullTableOffset / info->SUsPerTable; - TableOffset = FullTableOffset - TableID * info->SUsPerTable; - /*TableOffset = FullTableOffset % info->SUsPerTable;*/ - /*BlockID = (TableOffset / info->PUsPerBlock) % info->BlocksPerTable;*/ - BlockID = TableOffset / info->PUsPerBlock; - /*BlockOffset = TableOffset % info->PUsPerBlock;*/ - BlockOffset = TableOffset - BlockID * info->PUsPerBlock; - BlockID %= info->BlocksPerTable; - - /* the parity block is in the position indicated by RepIndex */ - RepIndex = (raidPtr->noRotate) ? info->PUsPerBlock : info->PUsPerBlock - TableID; - *col = info->LayoutTable[BlockID][RepIndex]; - - if (remap) { - RF_ASSERT( raidPtr->Disks[*row][*col].status == rf_ds_reconstructing || raidPtr->Disks[*row][*col].status == rf_ds_dist_spared || - (rf_copyback_in_progress && raidPtr->Disks[*row][*col].status == rf_ds_optimal)); - rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU); - } else { - - /* compute sector as before, except use RepIndex instead of BlockOffset */ - outSU = base_suid; - outSU += FullTableID * fulltable_depth; - outSU += SpareSpace; /* skip rsvd spare space */ - outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; - outSU += info->OffsetTable[BlockID][RepIndex] * layoutPtr->SUsPerPU; - } - - outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); - *diskSector = outSU*layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit); - - RF_ASSERT( *col != -1 ); -} + RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); + RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; + RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit; + RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset; + RF_StripeNum_t BlockID, BlockOffset, RepIndex; + RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; + RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; + RF_StripeNum_t base_suid = 0, outSU, SpareRegion = 0, SpareSpace = 0; + + rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); + + /* compute row & (possibly) spare space exactly as before */ + FullTableID = SUID / sus_per_fulltable; + if (raidPtr->numRow == 1) + *row = 0; /* avoid a mod and a div in the common case */ + else { + *row = FullTableID % raidPtr->numRow; + FullTableID /= raidPtr->numRow; /* convert to fulltable ID on + * this disk */ + } + if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { + SpareRegion = FullTableID / info->FullTablesPerSpareRegion; + SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs; + } + /* compute BlockID and RepIndex exactly as before */ + FullTableOffset = SUID % sus_per_fulltable; + TableID = FullTableOffset / info->SUsPerTable; + TableOffset = FullTableOffset - TableID * info->SUsPerTable; + /* TableOffset = FullTableOffset % info->SUsPerTable; */ + /* BlockID = (TableOffset / info->PUsPerBlock) % + * info->BlocksPerTable; */ + BlockID = TableOffset / info->PUsPerBlock; + /* BlockOffset = TableOffset % info->PUsPerBlock; */ + BlockOffset = TableOffset - BlockID * info->PUsPerBlock; + BlockID %= info->BlocksPerTable; + + /* the parity block is in the position indicated by RepIndex */ + RepIndex = (raidPtr->noRotate) ? info->PUsPerBlock : info->PUsPerBlock - TableID; + *col = info->LayoutTable[BlockID][RepIndex]; + + if (remap) { + RF_ASSERT(raidPtr->Disks[*row][*col].status == rf_ds_reconstructing || raidPtr->Disks[*row][*col].status == rf_ds_dist_spared || + (rf_copyback_in_progress && raidPtr->Disks[*row][*col].status == rf_ds_optimal)); + rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU); + } else { + + /* compute sector as before, except use RepIndex instead of + * BlockOffset */ + outSU = base_suid; + outSU += FullTableID * fulltable_depth; + outSU += SpareSpace; /* skip rsvd spare space */ + outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; + outSU += info->OffsetTable[BlockID][RepIndex] * layoutPtr->SUsPerPU; + } + outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); + *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit); + + RF_ASSERT(*col != -1); +} /* returns an array of ints identifying the disks that comprise the stripe containing the indicated address. * the caller must _never_ attempt to modify this array. */ -void rf_IdentifyStripeDeclustered( - RF_Raid_t *raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t **diskids, - RF_RowCol_t *outRow) +void +rf_IdentifyStripeDeclustered( + RF_Raid_t * raidPtr, + RF_RaidAddr_t addr, + RF_RowCol_t ** diskids, + RF_RowCol_t * outRow) { - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; - RF_StripeNum_t base_suid = 0; - RF_StripeNum_t SUID = rf_RaidAddressToStripeUnitID(layoutPtr, addr); - RF_StripeNum_t stripeID, FullTableID; - int tableOffset; - - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); - FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array (across rows) */ - *outRow = FullTableID % raidPtr->numRow; - stripeID = rf_StripeUnitIDToStripeID(layoutPtr, SUID); /* find stripe offset into array */ - tableOffset = (stripeID % info->BlocksPerTable); /* find offset into block design table */ - *diskids = info->LayoutTable[tableOffset]; + RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); + RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; + RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; + RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; + RF_StripeNum_t base_suid = 0; + RF_StripeNum_t SUID = rf_RaidAddressToStripeUnitID(layoutPtr, addr); + RF_StripeNum_t stripeID, FullTableID; + int tableOffset; + + rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); + FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array + * (across rows) */ + *outRow = FullTableID % raidPtr->numRow; + stripeID = rf_StripeUnitIDToStripeID(layoutPtr, SUID); /* find stripe offset + * into array */ + tableOffset = (stripeID % info->BlocksPerTable); /* find offset into + * block design table */ + *diskids = info->LayoutTable[tableOffset]; } - /* This returns the default head-separation limit, which is measured * in "required units for reconstruction". Each time a disk fetches * a unit, it bumps a counter. The head-sep code prohibits any disk - * from getting more than headSepLimit counter values ahead of any + * from getting more than headSepLimit counter values ahead of any * other. * * We assume here that the number of floating recon buffers is already @@ -608,7 +503,7 @@ void rf_IdentifyStripeDeclustered( * under recon at any one time. In each table, lambda units are required * from each disk, so given B buffers, the head sep limit has to be * (lambda*B)/r units. We subtract one to avoid weird boundary cases. - * + * * for example, suppose were given 50 buffers, r=19, and lambda=4 as in * the 20.5 design. There are 19 stripes/table to be reconstructed, so * we can have 50/19 tables concurrently under reconstruction, which means @@ -619,24 +514,24 @@ void rf_IdentifyStripeDeclustered( * If numBufsToAccumulate is not 1, we need to limit the head sep further * because multiple bufs will be required for each stripe under recon. */ -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitDeclustered( - RF_Raid_t *raidPtr) +RF_HeadSepLimit_t +rf_GetDefaultHeadSepLimitDeclustered( + RF_Raid_t * raidPtr) { - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - return(info->Lambda * raidPtr->numFloatingReconBufs / info->TableDepthInPUs / rf_numBufsToAccumulate); + return (info->Lambda * raidPtr->numFloatingReconBufs / info->TableDepthInPUs / rf_numBufsToAccumulate); } - /* returns the default number of recon buffers to use. The value * is somewhat arbitrary...it's intended to be large enough to allow * for a reasonably large head-sep limit, but small enough that you * don't use up all your system memory with buffers. */ -int rf_GetDefaultNumFloatingReconBuffersDeclustered(RF_Raid_t *raidPtr) +int +rf_GetDefaultNumFloatingReconBuffersDeclustered(RF_Raid_t * raidPtr) { - return(100 * rf_numBufsToAccumulate); + return (100 * rf_numBufsToAccumulate); } - /* sectors in the last fulltable of the array need to be handled * specially since this fulltable can be incomplete. this function * changes the values of certain params to handle this. @@ -653,195 +548,198 @@ int rf_GetDefaultNumFloatingReconBuffersDeclustered(RF_Raid_t *raidPtr) * the users stripe unit number from an offset into the array to * an offset into the last fulltable. */ -void rf_decluster_adjust_params( - RF_RaidLayout_t *layoutPtr, - RF_StripeNum_t *SUID, - RF_StripeCount_t *sus_per_fulltable, - RF_StripeCount_t *fulltable_depth, - RF_StripeNum_t *base_suid) +void +rf_decluster_adjust_params( + RF_RaidLayout_t * layoutPtr, + RF_StripeNum_t * SUID, + RF_StripeCount_t * sus_per_fulltable, + RF_StripeCount_t * fulltable_depth, + RF_StripeNum_t * base_suid) { - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) - /* Nothing! */ + RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; +#if (defined(__NetBSD__) || defined (__OpenBSD__)) && defined(_KERNEL) + /* Nothing! */ #else - char pc = layoutPtr->map->parityConfig; + char pc = layoutPtr->map->parityConfig; #endif - if (*SUID >= info->FullTableLimitSUID) { - /* new full table size is size of last full table on disk */ - *sus_per_fulltable = info->ExtraTablesPerDisk * info->SUsPerTable; + if (*SUID >= info->FullTableLimitSUID) { + /* new full table size is size of last full table on disk */ + *sus_per_fulltable = info->ExtraTablesPerDisk * info->SUsPerTable; - /* new full table depth is corresponding depth */ - *fulltable_depth = info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU; + /* new full table depth is corresponding depth */ + *fulltable_depth = info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU; - /* set up the new base offset */ - *base_suid = info->DiskOffsetOfLastFullTableInSUs; + /* set up the new base offset */ + *base_suid = info->DiskOffsetOfLastFullTableInSUs; - /* convert users array address to an offset into the last fulltable */ - *SUID -= info->FullTableLimitSUID; - } + /* convert users array address to an offset into the last + * fulltable */ + *SUID -= info->FullTableLimitSUID; + } } - /* * map a stripe ID to a parity stripe ID. * See comment above RaidAddressToParityStripeID in layout.c. */ -void rf_MapSIDToPSIDDeclustered( - RF_RaidLayout_t *layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t *psID, - RF_ReconUnitNum_t *which_ru) +void +rf_MapSIDToPSIDDeclustered( + RF_RaidLayout_t * layoutPtr, + RF_StripeNum_t stripeID, + RF_StripeNum_t * psID, + RF_ReconUnitNum_t * which_ru) { - RF_DeclusteredConfigInfo_t *info; - - info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - - *psID = (stripeID / (layoutPtr->SUsPerPU * info->BlocksPerTable)) - * info->BlocksPerTable + (stripeID % info->BlocksPerTable); - *which_ru = (stripeID % (info->BlocksPerTable * layoutPtr->SUsPerPU)) - / info->BlocksPerTable; - RF_ASSERT( (*which_ru) < layoutPtr->SUsPerPU/layoutPtr->SUsPerRU); -} + RF_DeclusteredConfigInfo_t *info; + + info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; + *psID = (stripeID / (layoutPtr->SUsPerPU * info->BlocksPerTable)) + * info->BlocksPerTable + (stripeID % info->BlocksPerTable); + *which_ru = (stripeID % (info->BlocksPerTable * layoutPtr->SUsPerPU)) + / info->BlocksPerTable; + RF_ASSERT((*which_ru) < layoutPtr->SUsPerPU / layoutPtr->SUsPerRU); +} /* * Called from MapSector and MapParity to retarget an access at the spare unit. * Modifies the "col" and "outSU" parameters only. */ -void rf_remap_to_spare_space( - RF_RaidLayout_t *layoutPtr, - RF_DeclusteredConfigInfo_t *info, - RF_RowCol_t row, - RF_StripeNum_t FullTableID, - RF_StripeNum_t TableID, - RF_SectorNum_t BlockID, - RF_StripeNum_t base_suid, - RF_StripeNum_t SpareRegion, - RF_RowCol_t *outCol, - RF_StripeNum_t *outSU) +void +rf_remap_to_spare_space( + RF_RaidLayout_t * layoutPtr, + RF_DeclusteredConfigInfo_t * info, + RF_RowCol_t row, + RF_StripeNum_t FullTableID, + RF_StripeNum_t TableID, + RF_SectorNum_t BlockID, + RF_StripeNum_t base_suid, + RF_StripeNum_t SpareRegion, + RF_RowCol_t * outCol, + RF_StripeNum_t * outSU) { - RF_StripeNum_t ftID, spareTableStartSU, TableInSpareRegion, lastSROffset, which_ft; - - /* - * note that FullTableID and hence SpareRegion may have gotten - * tweaked by rf_decluster_adjust_params. We detect this by - * noticing that base_suid is not 0. - */ - if (base_suid == 0) { - ftID = FullTableID; - } - else { - /* - * There may be > 1.0 full tables in the last (i.e. partial) - * spare region. find out which of these we're in. - */ - lastSROffset = info->NumCompleteSRs * info->SpareRegionDepthInSUs; - which_ft = (info->DiskOffsetOfLastFullTableInSUs - lastSROffset) / (info->FullTableDepthInPUs * layoutPtr->SUsPerPU); - - /* compute the actual full table ID */ - ftID = info->DiskOffsetOfLastFullTableInSUs / (info->FullTableDepthInPUs * layoutPtr->SUsPerPU) + which_ft; - SpareRegion = info->NumCompleteSRs; - } - TableInSpareRegion = (ftID * info->NumParityReps + TableID) % info->TablesPerSpareRegion; - - *outCol = info->SpareTable[TableInSpareRegion][BlockID].spareDisk; - RF_ASSERT( *outCol != -1); - - spareTableStartSU = (SpareRegion == info->NumCompleteSRs) ? + RF_StripeNum_t ftID, spareTableStartSU, TableInSpareRegion, lastSROffset, + which_ft; + + /* + * note that FullTableID and hence SpareRegion may have gotten + * tweaked by rf_decluster_adjust_params. We detect this by + * noticing that base_suid is not 0. + */ + if (base_suid == 0) { + ftID = FullTableID; + } else { + /* + * There may be > 1.0 full tables in the last (i.e. partial) + * spare region. find out which of these we're in. + */ + lastSROffset = info->NumCompleteSRs * info->SpareRegionDepthInSUs; + which_ft = (info->DiskOffsetOfLastFullTableInSUs - lastSROffset) / (info->FullTableDepthInPUs * layoutPtr->SUsPerPU); + + /* compute the actual full table ID */ + ftID = info->DiskOffsetOfLastFullTableInSUs / (info->FullTableDepthInPUs * layoutPtr->SUsPerPU) + which_ft; + SpareRegion = info->NumCompleteSRs; + } + TableInSpareRegion = (ftID * info->NumParityReps + TableID) % info->TablesPerSpareRegion; + + *outCol = info->SpareTable[TableInSpareRegion][BlockID].spareDisk; + RF_ASSERT(*outCol != -1); + + spareTableStartSU = (SpareRegion == info->NumCompleteSRs) ? info->DiskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU : - (SpareRegion+1) * info->SpareRegionDepthInSUs - info->SpareSpaceDepthPerRegionInSUs; - *outSU = spareTableStartSU + info->SpareTable[TableInSpareRegion][BlockID].spareBlockOffsetInSUs; - if (*outSU >= layoutPtr->stripeUnitsPerDisk) { - printf("rf_remap_to_spare_space: invalid remapped disk SU offset %ld\n",(long)*outSU); - } + (SpareRegion + 1) * info->SpareRegionDepthInSUs - info->SpareSpaceDepthPerRegionInSUs; + *outSU = spareTableStartSU + info->SpareTable[TableInSpareRegion][BlockID].spareBlockOffsetInSUs; + if (*outSU >= layoutPtr->stripeUnitsPerDisk) { + printf("rf_remap_to_spare_space: invalid remapped disk SU offset %ld\n", (long) *outSU); + } } -int rf_InstallSpareTable( - RF_Raid_t *raidPtr, - RF_RowCol_t frow, - RF_RowCol_t fcol) +int +rf_InstallSpareTable( + RF_Raid_t * raidPtr, + RF_RowCol_t frow, + RF_RowCol_t fcol) { - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - RF_SparetWait_t *req; - int retcode; - - RF_Malloc(req, sizeof(*req), (RF_SparetWait_t *)); - req->C = raidPtr->numCol; - req->G = raidPtr->Layout.numDataCol + raidPtr->Layout.numParityCol; - req->fcol = fcol; - req->SUsPerPU = raidPtr->Layout.SUsPerPU; - req->TablesPerSpareRegion = info->TablesPerSpareRegion; - req->BlocksPerTable = info->BlocksPerTable; - req->TableDepthInPUs = info->TableDepthInPUs; - req->SpareSpaceDepthPerRegionInSUs = info->SpareSpaceDepthPerRegionInSUs; - -#ifndef KERNEL - info->SpareTable = rf_ReadSpareTable(req, info->sparemap_fname); - RF_Free(req, sizeof(*req)); - retcode = (info->SpareTable) ? 0 : 1; -#else /* !KERNEL */ - retcode = rf_GetSpareTableFromDaemon(req); - RF_ASSERT(!retcode); /* XXX -- fix this to recover gracefully -- XXX */ -#endif /* !KERNEL */ - - return(retcode); + RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + RF_SparetWait_t *req; + int retcode; + + RF_Malloc(req, sizeof(*req), (RF_SparetWait_t *)); + req->C = raidPtr->numCol; + req->G = raidPtr->Layout.numDataCol + raidPtr->Layout.numParityCol; + req->fcol = fcol; + req->SUsPerPU = raidPtr->Layout.SUsPerPU; + req->TablesPerSpareRegion = info->TablesPerSpareRegion; + req->BlocksPerTable = info->BlocksPerTable; + req->TableDepthInPUs = info->TableDepthInPUs; + req->SpareSpaceDepthPerRegionInSUs = info->SpareSpaceDepthPerRegionInSUs; + + retcode = rf_GetSpareTableFromDaemon(req); + RF_ASSERT(!retcode); /* XXX -- fix this to recover gracefully -- + * XXX */ + return (retcode); } - -#ifdef KERNEL /* * Invoked via ioctl to install a spare table in the kernel. */ -int rf_SetSpareTable(raidPtr, data) - RF_Raid_t *raidPtr; - void *data; +int +rf_SetSpareTable(raidPtr, data) + RF_Raid_t *raidPtr; + void *data; { - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - RF_SpareTableEntry_t **ptrs; - int i, retcode; - - /* what we need to copyin is a 2-d array, so first copyin the user pointers to the rows in the table */ - RF_Malloc(ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **)); - retcode = copyin((caddr_t) data, (caddr_t) ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *)); - - if (retcode) return(retcode); - - /* now allocate kernel space for the row pointers */ - RF_Malloc(info->SpareTable, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **)); - - /* now allocate kernel space for each row in the table, and copy it in from user space */ - for (i=0; i<info->TablesPerSpareRegion; i++) { - RF_Malloc(info->SpareTable[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t), (RF_SpareTableEntry_t *)); - retcode = copyin(ptrs[i], info->SpareTable[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t)); - if (retcode) { - info->SpareTable = NULL; /* blow off the memory we've allocated */ - return(retcode); - } - } + RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + RF_SpareTableEntry_t **ptrs; + int i, retcode; + + /* what we need to copyin is a 2-d array, so first copyin the user + * pointers to the rows in the table */ + RF_Malloc(ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **)); + retcode = copyin((caddr_t) data, (caddr_t) ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *)); + + if (retcode) + return (retcode); + + /* now allocate kernel space for the row pointers */ + RF_Malloc(info->SpareTable, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **)); + + /* now allocate kernel space for each row in the table, and copy it in + * from user space */ + for (i = 0; i < info->TablesPerSpareRegion; i++) { + RF_Malloc(info->SpareTable[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t), (RF_SpareTableEntry_t *)); + retcode = copyin(ptrs[i], info->SpareTable[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t)); + if (retcode) { + info->SpareTable = NULL; /* blow off the memory + * we've allocated */ + return (retcode); + } + } - /* free up the temporary array we used */ - RF_Free(ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *)); + /* free up the temporary array we used */ + RF_Free(ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *)); - return(0); + return (0); } -#endif /* KERNEL */ -RF_ReconUnitCount_t rf_GetNumSpareRUsDeclustered(raidPtr) - RF_Raid_t *raidPtr; +RF_ReconUnitCount_t +rf_GetNumSpareRUsDeclustered(raidPtr) + RF_Raid_t *raidPtr; { - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; + RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - return( ((RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo)->TotSparePUsPerDisk ); + return (((RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo)->TotSparePUsPerDisk); } -void rf_FreeSpareTable(raidPtr) - RF_Raid_t *raidPtr; +void +rf_FreeSpareTable(raidPtr) + RF_Raid_t *raidPtr; { - long i; - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - RF_SpareTableEntry_t **table = info->SpareTable; - - for (i=0; i<info->TablesPerSpareRegion; i++) {RF_Free(table[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t));} - RF_Free(table, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *)); - info->SpareTable = (RF_SpareTableEntry_t **) NULL; + long i; + RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; + RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; + RF_SpareTableEntry_t **table = info->SpareTable; + + for (i = 0; i < info->TablesPerSpareRegion; i++) { + RF_Free(table[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t)); + } + RF_Free(table, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *)); + info->SpareTable = (RF_SpareTableEntry_t **) NULL; } diff --git a/sys/dev/raidframe/rf_decluster.h b/sys/dev/raidframe/rf_decluster.h index 5e08fa12a55..e4e3ac40c7d 100644 --- a/sys/dev/raidframe/rf_decluster.h +++ b/sys/dev/raidframe/rf_decluster.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_decluster.h,v 1.1 1999/01/11 14:29:14 niklas Exp $ */ -/* $NetBSD: rf_decluster.h,v 1.1 1998/11/13 04:20:28 oster Exp $ */ +/* $OpenBSD: rf_decluster.h,v 1.2 1999/02/16 00:02:35 niklas Exp $ */ +/* $NetBSD: rf_decluster.h,v 1.3 1999/02/05 00:06:09 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -36,68 +36,6 @@ * *--------------------------------------------------------------------*/ -/* - * : - * Log: rf_decluster.h,v - * Revision 1.20 1996/07/29 14:05:12 jimz - * fix numPUs/numRUs confusion (everything is now numRUs) - * clean up some commenting, return values - * - * Revision 1.19 1996/07/13 00:00:59 jimz - * sanitized generalized reconstruction architecture - * cleaned up head sep, rbuf problems - * - * Revision 1.18 1996/06/19 17:53:48 jimz - * move GetNumSparePUs, InstallSpareTable ops into layout switch - * - * Revision 1.17 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.16 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.15 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.14 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.13 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.12 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.11 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.10 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.9 1995/12/01 15:58:23 root - * added copyright info - * - * Revision 1.8 1995/11/17 18:57:02 wvcii - * added prototyping to MapParity - * - * Revision 1.7 1995/07/02 15:08:31 holland - * bug fixes related to getting distributed sparing numbers - * - * Revision 1.6 1995/06/23 13:41:18 robby - * updeated to prototypes in rf_layout.h - * - */ - #ifndef _RF__RF_DECLUSTER_H_ #define _RF__RF_DECLUSTER_H_ @@ -109,74 +47,95 @@ * the associated inverse mapping. */ struct RF_SpareTableEntry_s { - u_int spareDisk; /* disk to which this block is spared */ - u_int spareBlockOffsetInSUs; /* offset into spare table for that disk */ + u_int spareDisk; /* disk to which this block is spared */ + u_int spareBlockOffsetInSUs; /* offset into spare table for that + * disk */ }; - #define RF_SPAREMAP_NAME_LEN 128 /* this is the layout-specific info structure for the declustered layout. */ struct RF_DeclusteredConfigInfo_s { - RF_StripeCount_t groupSize; /* no. of stripe units per parity stripe */ - RF_RowCol_t **LayoutTable; /* the block design table */ - RF_RowCol_t **OffsetTable; /* the sector offset table */ - RF_RowCol_t **BlockTable; /* the block membership table */ - RF_StripeCount_t SUsPerFullTable; /* stripe units per full table */ - RF_StripeCount_t SUsPerTable; /* stripe units per table */ - RF_StripeCount_t PUsPerBlock; /* parity units per block */ - RF_StripeCount_t SUsPerBlock; /* stripe units per block */ - RF_StripeCount_t BlocksPerTable; /* block design tuples per table */ - RF_StripeCount_t NumParityReps; /* tables per full table */ - RF_StripeCount_t TableDepthInPUs; /* PUs on one disk in 1 table */ - RF_StripeCount_t FullTableDepthInPUs; /* PUs on one disk in 1 fulltable */ - RF_StripeCount_t FullTableLimitSUID; /* SU where partial fulltables start */ - RF_StripeCount_t ExtraTablesPerDisk; /* # of tables in last fulltable */ - RF_SectorNum_t DiskOffsetOfLastFullTableInSUs; /* disk offs of partial ft, if any */ - RF_StripeCount_t numCompleteFullTablesPerDisk; /* ft identifier of partial ft, if any */ - u_int Lambda; /* the pair count in the block design */ + RF_StripeCount_t groupSize; /* no. of stripe units per parity + * stripe */ + RF_RowCol_t **LayoutTable; /* the block design table */ + RF_RowCol_t **OffsetTable; /* the sector offset table */ + RF_RowCol_t **BlockTable; /* the block membership table */ + RF_StripeCount_t SUsPerFullTable; /* stripe units per full table */ + RF_StripeCount_t SUsPerTable; /* stripe units per table */ + RF_StripeCount_t PUsPerBlock; /* parity units per block */ + RF_StripeCount_t SUsPerBlock; /* stripe units per block */ + RF_StripeCount_t BlocksPerTable; /* block design tuples per + * table */ + RF_StripeCount_t NumParityReps; /* tables per full table */ + RF_StripeCount_t TableDepthInPUs; /* PUs on one disk in 1 table */ + RF_StripeCount_t FullTableDepthInPUs; /* PUs on one disk in 1 + * fulltable */ + RF_StripeCount_t FullTableLimitSUID; /* SU where partial fulltables + * start */ + RF_StripeCount_t ExtraTablesPerDisk; /* # of tables in last + * fulltable */ + RF_SectorNum_t DiskOffsetOfLastFullTableInSUs; /* disk offs of partial + * ft, if any */ + RF_StripeCount_t numCompleteFullTablesPerDisk; /* ft identifier of + * partial ft, if any */ + u_int Lambda; /* the pair count in the block design */ - /* these are used only in the distributed-sparing case */ - RF_StripeCount_t FullTablesPerSpareRegion; /* # of ft's comprising 1 spare region */ - RF_StripeCount_t TablesPerSpareRegion; /* # of tables */ - RF_SectorCount_t SpareSpaceDepthPerRegionInSUs; /* spare space/disk/region */ - RF_SectorCount_t SpareRegionDepthInSUs; /* # of units/disk/region */ - RF_SectorNum_t DiskOffsetOfLastSpareSpaceChunkInSUs; /* locates sp space after partial ft */ - RF_StripeCount_t TotSparePUsPerDisk; /* total number of spare PUs per disk */ - RF_StripeCount_t NumCompleteSRs; - RF_SpareTableEntry_t **SpareTable; /* remap table for spare space */ - char sparemap_fname[RF_SPAREMAP_NAME_LEN]; /* where to find sparemap. not used in kernel */ + /* these are used only in the distributed-sparing case */ + RF_StripeCount_t FullTablesPerSpareRegion; /* # of ft's comprising + * 1 spare region */ + RF_StripeCount_t TablesPerSpareRegion; /* # of tables */ + RF_SectorCount_t SpareSpaceDepthPerRegionInSUs; /* spare + * space/disk/region */ + RF_SectorCount_t SpareRegionDepthInSUs; /* # of units/disk/region */ + RF_SectorNum_t DiskOffsetOfLastSpareSpaceChunkInSUs; /* locates sp space + * after partial ft */ + RF_StripeCount_t TotSparePUsPerDisk; /* total number of spare PUs + * per disk */ + RF_StripeCount_t NumCompleteSRs; + RF_SpareTableEntry_t **SpareTable; /* remap table for spare space */ + char sparemap_fname[RF_SPAREMAP_NAME_LEN]; /* where to find + * sparemap. not used in + * kernel */ }; -int rf_ConfigureDeclustered(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr); -int rf_ConfigureDeclusteredDS(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr); +int +rf_ConfigureDeclustered(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr); +int +rf_ConfigureDeclusteredDS(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr); -void rf_MapSectorDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap); -void rf_MapParityDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap); -void rf_IdentifyStripeDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t **diskids, RF_RowCol_t *outRow); -void rf_MapSIDToPSIDDeclustered(RF_RaidLayout_t *layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t *psID, - RF_ReconUnitNum_t *which_ru); -int rf_InstallSpareTable(RF_Raid_t *raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol); -void rf_FreeSpareTable(RF_Raid_t *raidPtr); +void +rf_MapSectorDeclustered(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); +void +rf_MapParityDeclustered(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); +void +rf_IdentifyStripeDeclustered(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, + RF_RowCol_t ** diskids, RF_RowCol_t * outRow); +void +rf_MapSIDToPSIDDeclustered(RF_RaidLayout_t * layoutPtr, + RF_StripeNum_t stripeID, RF_StripeNum_t * psID, + RF_ReconUnitNum_t * which_ru); +int rf_InstallSpareTable(RF_Raid_t * raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol); +void rf_FreeSpareTable(RF_Raid_t * raidPtr); -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitDeclustered(RF_Raid_t *raidPtr); -int rf_GetDefaultNumFloatingReconBuffersDeclustered(RF_Raid_t *raidPtr); +RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitDeclustered(RF_Raid_t * raidPtr); +int rf_GetDefaultNumFloatingReconBuffersDeclustered(RF_Raid_t * raidPtr); -void rf_decluster_adjust_params(RF_RaidLayout_t *layoutPtr, - RF_StripeNum_t *SUID, RF_StripeCount_t *sus_per_fulltable, - RF_StripeCount_t *fulltable_depth, RF_StripeNum_t *base_suid); -void rf_remap_to_spare_space( -RF_RaidLayout_t *layoutPtr, -RF_DeclusteredConfigInfo_t *info, RF_RowCol_t row, RF_StripeNum_t FullTableID, - RF_StripeNum_t TableID, RF_SectorNum_t BlockID, RF_StripeNum_t base_suid, - RF_StripeNum_t SpareRegion, RF_RowCol_t *outCol, RF_StripeNum_t *outSU); -int rf_SetSpareTable(RF_Raid_t *raidPtr, void *data); -RF_ReconUnitCount_t rf_GetNumSpareRUsDeclustered(RF_Raid_t *raidPtr); +void +rf_decluster_adjust_params(RF_RaidLayout_t * layoutPtr, + RF_StripeNum_t * SUID, RF_StripeCount_t * sus_per_fulltable, + RF_StripeCount_t * fulltable_depth, RF_StripeNum_t * base_suid); +void +rf_remap_to_spare_space( + RF_RaidLayout_t * layoutPtr, + RF_DeclusteredConfigInfo_t * info, RF_RowCol_t row, RF_StripeNum_t FullTableID, + RF_StripeNum_t TableID, RF_SectorNum_t BlockID, RF_StripeNum_t base_suid, + RF_StripeNum_t SpareRegion, RF_RowCol_t * outCol, RF_StripeNum_t * outSU); +int rf_SetSpareTable(RF_Raid_t * raidPtr, void *data); +RF_ReconUnitCount_t rf_GetNumSpareRUsDeclustered(RF_Raid_t * raidPtr); -#endif /* !_RF__RF_DECLUSTER_H_ */ +#endif /* !_RF__RF_DECLUSTER_H_ */ diff --git a/sys/dev/raidframe/rf_declusterPQ.c b/sys/dev/raidframe/rf_declusterPQ.c index 75acfa32670..d1ea277bb3a 100644 --- a/sys/dev/raidframe/rf_declusterPQ.c +++ b/sys/dev/raidframe/rf_declusterPQ.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_declusterPQ.c,v 1.1 1999/01/11 14:29:14 niklas Exp $ */ -/* $NetBSD: rf_declusterPQ.c,v 1.1 1998/11/13 04:20:28 oster Exp $ */ +/* $OpenBSD: rf_declusterPQ.c,v 1.2 1999/02/16 00:02:35 niklas Exp $ */ +/* $NetBSD: rf_declusterPQ.c,v 1.3 1999/02/05 00:06:09 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -35,130 +35,6 @@ * *--------------------------------------------------*/ -/* - * $Header: /cvs/OpenBSD/src/sys/dev/raidframe/Attic/rf_declusterPQ.c,v 1.1 1999/01/11 14:29:14 niklas Exp $ - * - * Log: rf_declusterPQ.c,v - * Revision 1.34 1996/08/21 19:47:14 jimz - * fix bogus return values from config - * - * Revision 1.33 1996/08/21 15:09:16 jimz - * cleanup debugging spoo - * - * Revision 1.32 1996/08/21 04:13:36 jimz - * debug with EvenOdd - * - * Revision 1.31 1996/08/20 22:41:54 jimz - * 2 parity disks, not 1 - * - * Revision 1.30 1996/07/31 16:56:18 jimz - * dataBytesPerStripe, sectorsPerDisk init arch-indep. - * - * Revision 1.29 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.28 1996/07/13 00:00:59 jimz - * sanitized generalized reconstruction architecture - * cleaned up head sep, rbuf problems - * - * Revision 1.27 1996/06/11 08:45:12 jimz - * improved error-checking on array configuration - * - * Revision 1.26 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.25 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.24 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.23 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.22 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.21 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.20 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.19 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.18 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.17 1996/05/17 00:52:56 jimz - * RepIndex was not being initialized before the computation of - * RepIndexQ in MapQDeclusteredPQ(). I copied the initialization - * from MapParityDeclusteredPQ(). Hope that was right. - * - * Revision 1.16 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.15 1995/12/01 15:57:46 root - * added copyright info - * - * Revision 1.14 1995/11/17 19:00:13 wvcii - * added prototyping to MapParity - * created MapQ - * - * Revision 1.13 1995/10/05 22:20:48 jimz - * free_1d_array() takes two args; provide them both - * - * Revision 1.12 1995/09/06 19:26:33 wvcii - * offset cfgBuf by sparemap length (ConfigureDeclusteredPQ) - * - * Revision 1.11 95/06/23 13:41:11 robby - * updeated to prototypes in rf_layout.h - * - * Revision 1.10 1995/05/02 22:46:53 holland - * minor code cleanups. - * - * Revision 1.9 1995/03/15 20:45:23 holland - * distr sparing changes. - * - * Revision 1.8 1995/03/01 20:25:48 holland - * kernelization changes - * - * Revision 1.7 1995/02/17 19:39:56 holland - * added size param to all calls to Free(). - * this is ignored at user level, but necessary in the kernel. - * - * Revision 1.6 1995/02/10 17:34:10 holland - * kernelization changes - * - * Revision 1.5 1995/02/03 22:31:36 holland - * many changes related to kernelization - * - * Revision 1.4 1995/02/01 15:13:05 holland - * moved #include of general.h out of raid.h and into each file - * - * Revision 1.3 1995/02/01 14:25:19 holland - * began changes for kernelization: - * changed all instances of mutex_t and cond_t to DECLARE macros - * converted configuration code to use config structure - * - * Revision 1.2 1994/11/28 22:13:56 danner - * corrected some mapping bugs. - * - */ #include "rf_types.h" #include "rf_raid.h" @@ -172,418 +48,442 @@ /* configuration code */ -int rf_ConfigureDeclusteredPQ( - RF_ShutdownList_t **listp, - RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr) +int +rf_ConfigureDeclusteredPQ( + RF_ShutdownList_t ** listp, + RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr) { - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - int b, v, k, r, lambda; /* block design params */ - int i, j, l; - int *first_avail_slot; - int complete_FT_count, SUID; - RF_DeclusteredConfigInfo_t *info; - int numCompleteFullTablesPerDisk; - int PUsPerDisk, spareRegionDepthInPUs, numCompleteSpareRegionsPerDisk = 0, extraPUsPerDisk; - int totSparePUsPerDisk; - int diskOffsetOfLastFullTableInSUs, SpareSpaceInSUs; - char *cfgBuf = (char *) (cfgPtr->layoutSpecific); - - cfgBuf += RF_SPAREMAP_NAME_LEN; - - b = *( (int *) cfgBuf); cfgBuf += sizeof(int); - v = *( (int *) cfgBuf); cfgBuf += sizeof(int); - k = *( (int *) cfgBuf); cfgBuf += sizeof(int); - r = *( (int *) cfgBuf); cfgBuf += sizeof(int); - lambda = *( (int *) cfgBuf); cfgBuf += sizeof(int); - raidPtr->noRotate = *( (int *) cfgBuf); cfgBuf += sizeof(int); - - if (k <= 2) { - printf("RAIDFRAME: k=%d, minimum value 2\n", k); - return(EINVAL); - } - - /* 1. create layout specific structure */ - RF_MallocAndAdd(info, sizeof(RF_DeclusteredConfigInfo_t), (RF_DeclusteredConfigInfo_t *), raidPtr->cleanupList); - if (info == NULL) - return(ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - - /* the sparemaps are generated assuming that parity is rotated, so we issue - * a warning if both distributed sparing and no-rotate are on at the same time - */ - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) && raidPtr->noRotate) { - RF_ERRORMSG("Warning: distributed sparing specified without parity rotation.\n"); - } - - if (raidPtr->numCol != v) { - RF_ERRORMSG2("RAID: config error: table element count (%d) not equal to no. of cols (%d)\n", v, raidPtr->numCol); - return(EINVAL); - } - - /* 3. set up the values used in devRaidMap */ - info->BlocksPerTable = b; - info->NumParityReps = info->groupSize = k; - info->PUsPerBlock = k-2; /* PQ */ - info->SUsPerTable = b * info->PUsPerBlock * layoutPtr->SUsPerPU;/* b blks, k-1 SUs each */ - info->SUsPerFullTable = k * info->SUsPerTable; /* rot k times */ - info->SUsPerBlock = info->PUsPerBlock * layoutPtr->SUsPerPU; - info->TableDepthInPUs = (b*k) / v; - info->FullTableDepthInPUs = info->TableDepthInPUs * k; /* k repetitions */ - - /* used only in distributed sparing case */ - info->FullTablesPerSpareRegion = (v-1) / rf_gcd(r, v-1); /* (v-1)/gcd fulltables */ - info->TablesPerSpareRegion = k * info->FullTablesPerSpareRegion; - info->SpareSpaceDepthPerRegionInSUs = (r * info->TablesPerSpareRegion / (v-1)) * layoutPtr->SUsPerPU; - - /* check to make sure the block design is sufficiently small */ - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - if (info->FullTableDepthInPUs * layoutPtr->SUsPerPU + info->SpareSpaceDepthPerRegionInSUs > layoutPtr->stripeUnitsPerDisk) { - RF_ERRORMSG3("RAID: config error: Full Table depth (%d) + Spare Space (%d) larger than disk size (%d) (BD too big)\n", - (int)info->FullTableDepthInPUs, - (int)info->SpareSpaceDepthPerRegionInSUs, - (int)layoutPtr->stripeUnitsPerDisk); - return(EINVAL); + RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); + int b, v, k, r, lambda; /* block design params */ + int i, j, l; + int *first_avail_slot; + int complete_FT_count, SUID; + RF_DeclusteredConfigInfo_t *info; + int numCompleteFullTablesPerDisk; + int PUsPerDisk, spareRegionDepthInPUs, numCompleteSpareRegionsPerDisk = 0, + extraPUsPerDisk; + int totSparePUsPerDisk; + int diskOffsetOfLastFullTableInSUs, SpareSpaceInSUs; + char *cfgBuf = (char *) (cfgPtr->layoutSpecific); + + cfgBuf += RF_SPAREMAP_NAME_LEN; + + b = *((int *) cfgBuf); + cfgBuf += sizeof(int); + v = *((int *) cfgBuf); + cfgBuf += sizeof(int); + k = *((int *) cfgBuf); + cfgBuf += sizeof(int); + r = *((int *) cfgBuf); + cfgBuf += sizeof(int); + lambda = *((int *) cfgBuf); + cfgBuf += sizeof(int); + raidPtr->noRotate = *((int *) cfgBuf); + cfgBuf += sizeof(int); + + if (k <= 2) { + printf("RAIDFRAME: k=%d, minimum value 2\n", k); + return (EINVAL); + } + /* 1. create layout specific structure */ + RF_MallocAndAdd(info, sizeof(RF_DeclusteredConfigInfo_t), (RF_DeclusteredConfigInfo_t *), raidPtr->cleanupList); + if (info == NULL) + return (ENOMEM); + layoutPtr->layoutSpecificInfo = (void *) info; + + /* the sparemaps are generated assuming that parity is rotated, so we + * issue a warning if both distributed sparing and no-rotate are on at + * the same time */ + if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) && raidPtr->noRotate) { + RF_ERRORMSG("Warning: distributed sparing specified without parity rotation.\n"); + } + if (raidPtr->numCol != v) { + RF_ERRORMSG2("RAID: config error: table element count (%d) not equal to no. of cols (%d)\n", v, raidPtr->numCol); + return (EINVAL); + } + /* 3. set up the values used in devRaidMap */ + info->BlocksPerTable = b; + info->NumParityReps = info->groupSize = k; + info->PUsPerBlock = k - 2; /* PQ */ + info->SUsPerTable = b * info->PUsPerBlock * layoutPtr->SUsPerPU; /* b blks, k-1 SUs each */ + info->SUsPerFullTable = k * info->SUsPerTable; /* rot k times */ + info->SUsPerBlock = info->PUsPerBlock * layoutPtr->SUsPerPU; + info->TableDepthInPUs = (b * k) / v; + info->FullTableDepthInPUs = info->TableDepthInPUs * k; /* k repetitions */ + + /* used only in distributed sparing case */ + info->FullTablesPerSpareRegion = (v - 1) / rf_gcd(r, v - 1); /* (v-1)/gcd fulltables */ + info->TablesPerSpareRegion = k * info->FullTablesPerSpareRegion; + info->SpareSpaceDepthPerRegionInSUs = (r * info->TablesPerSpareRegion / (v - 1)) * layoutPtr->SUsPerPU; + + /* check to make sure the block design is sufficiently small */ + if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { + if (info->FullTableDepthInPUs * layoutPtr->SUsPerPU + info->SpareSpaceDepthPerRegionInSUs > layoutPtr->stripeUnitsPerDisk) { + RF_ERRORMSG3("RAID: config error: Full Table depth (%d) + Spare Space (%d) larger than disk size (%d) (BD too big)\n", + (int) info->FullTableDepthInPUs, + (int) info->SpareSpaceDepthPerRegionInSUs, + (int) layoutPtr->stripeUnitsPerDisk); + return (EINVAL); + } + } else { + if (info->TableDepthInPUs * layoutPtr->SUsPerPU > layoutPtr->stripeUnitsPerDisk) { + RF_ERRORMSG2("RAID: config error: Table depth (%d) larger than disk size (%d) (BD too big)\n", + (int) (info->TableDepthInPUs * layoutPtr->SUsPerPU), + (int) layoutPtr->stripeUnitsPerDisk); + return (EINVAL); + } + } + + + /* compute the size of each disk, and the number of tables in the last + * fulltable (which need not be complete) */ + if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { + + PUsPerDisk = layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU; + spareRegionDepthInPUs = (info->TablesPerSpareRegion * info->TableDepthInPUs + + (info->TablesPerSpareRegion * info->TableDepthInPUs) / (v - 1)); + info->SpareRegionDepthInSUs = spareRegionDepthInPUs * layoutPtr->SUsPerPU; + + numCompleteSpareRegionsPerDisk = PUsPerDisk / spareRegionDepthInPUs; + info->NumCompleteSRs = numCompleteSpareRegionsPerDisk; + extraPUsPerDisk = PUsPerDisk % spareRegionDepthInPUs; + + /* assume conservatively that we need the full amount of spare + * space in one region in order to provide spares for the + * partial spare region at the end of the array. We set "i" + * to the number of tables in the partial spare region. This + * may actually include some fulltables. */ + extraPUsPerDisk -= (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU); + if (extraPUsPerDisk <= 0) + i = 0; + else + i = extraPUsPerDisk / info->TableDepthInPUs; + + complete_FT_count = raidPtr->numRow * (numCompleteSpareRegionsPerDisk * (info->TablesPerSpareRegion / k) + i / k); + info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable; + info->ExtraTablesPerDisk = i % k; + + /* note that in the last spare region, the spare space is + * complete even though data/parity space is not */ + totSparePUsPerDisk = (numCompleteSpareRegionsPerDisk + 1) * (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU); + info->TotSparePUsPerDisk = totSparePUsPerDisk; + + layoutPtr->stripeUnitsPerDisk = + ((complete_FT_count / raidPtr->numRow) * info->FullTableDepthInPUs + /* data & parity space */ + info->ExtraTablesPerDisk * info->TableDepthInPUs + + totSparePUsPerDisk /* spare space */ + ) * layoutPtr->SUsPerPU; + layoutPtr->dataStripeUnitsPerDisk = + (complete_FT_count * info->FullTableDepthInPUs + info->ExtraTablesPerDisk * info->TableDepthInPUs) + * layoutPtr->SUsPerPU * (k - 1) / k; + + } else { + /* non-dist spare case: force each disk to contain an + * integral number of tables */ + layoutPtr->stripeUnitsPerDisk /= (info->TableDepthInPUs * layoutPtr->SUsPerPU); + layoutPtr->stripeUnitsPerDisk *= (info->TableDepthInPUs * layoutPtr->SUsPerPU); + + /* compute the number of tables in the last fulltable, which + * need not be complete */ + complete_FT_count = + ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) / info->FullTableDepthInPUs) * raidPtr->numRow; + + info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable; + info->ExtraTablesPerDisk = + ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) / info->TableDepthInPUs) % k; + } + + raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; + + /* find the disk offset of the stripe unit where the last fulltable + * starts */ + numCompleteFullTablesPerDisk = complete_FT_count / raidPtr->numRow; + diskOffsetOfLastFullTableInSUs = numCompleteFullTablesPerDisk * info->FullTableDepthInPUs * layoutPtr->SUsPerPU; + if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { + SpareSpaceInSUs = numCompleteSpareRegionsPerDisk * info->SpareSpaceDepthPerRegionInSUs; + diskOffsetOfLastFullTableInSUs += SpareSpaceInSUs; + info->DiskOffsetOfLastSpareSpaceChunkInSUs = + diskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU; } - } else { - if (info->TableDepthInPUs * layoutPtr->SUsPerPU > layoutPtr->stripeUnitsPerDisk) { - RF_ERRORMSG2("RAID: config error: Table depth (%d) larger than disk size (%d) (BD too big)\n", - (int)(info->TableDepthInPUs * layoutPtr->SUsPerPU), - (int)layoutPtr->stripeUnitsPerDisk); - return(EINVAL); + info->DiskOffsetOfLastFullTableInSUs = diskOffsetOfLastFullTableInSUs; + info->numCompleteFullTablesPerDisk = numCompleteFullTablesPerDisk; + + /* 4. create and initialize the lookup tables */ + info->LayoutTable = rf_make_2d_array(b, k, raidPtr->cleanupList); + if (info->LayoutTable == NULL) + return (ENOMEM); + info->OffsetTable = rf_make_2d_array(b, k, raidPtr->cleanupList); + if (info->OffsetTable == NULL) + return (ENOMEM); + info->BlockTable = rf_make_2d_array(info->TableDepthInPUs * layoutPtr->SUsPerPU, raidPtr->numCol, raidPtr->cleanupList); + if (info->BlockTable == NULL) + return (ENOMEM); + + first_avail_slot = (int *) rf_make_1d_array(v, NULL); + if (first_avail_slot == NULL) + return (ENOMEM); + + for (i = 0; i < b; i++) + for (j = 0; j < k; j++) + info->LayoutTable[i][j] = *cfgBuf++; + + /* initialize offset table */ + for (i = 0; i < b; i++) + for (j = 0; j < k; j++) { + info->OffsetTable[i][j] = first_avail_slot[info->LayoutTable[i][j]]; + first_avail_slot[info->LayoutTable[i][j]]++; + } + + /* initialize block table */ + for (SUID = l = 0; l < layoutPtr->SUsPerPU; l++) { + for (i = 0; i < b; i++) { + for (j = 0; j < k; j++) { + info->BlockTable[(info->OffsetTable[i][j] * layoutPtr->SUsPerPU) + l] + [info->LayoutTable[i][j]] = SUID; + } + SUID++; + } } - } - - - /* compute the size of each disk, and the number of tables in the last fulltable (which - * need not be complete) - */ - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - - PUsPerDisk = layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU; - spareRegionDepthInPUs = (info->TablesPerSpareRegion * info->TableDepthInPUs + - (info->TablesPerSpareRegion * info->TableDepthInPUs) / (v-1)); - info->SpareRegionDepthInSUs = spareRegionDepthInPUs * layoutPtr->SUsPerPU; - - numCompleteSpareRegionsPerDisk = PUsPerDisk / spareRegionDepthInPUs; - info->NumCompleteSRs = numCompleteSpareRegionsPerDisk; - extraPUsPerDisk = PUsPerDisk % spareRegionDepthInPUs; - - /* assume conservatively that we need the full amount of spare space in one region in order - * to provide spares for the partial spare region at the end of the array. We set "i" to - * the number of tables in the partial spare region. This may actually include some fulltables. - */ - extraPUsPerDisk -= (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU); - if (extraPUsPerDisk <= 0) i = 0; - else i = extraPUsPerDisk/info->TableDepthInPUs; - - complete_FT_count = raidPtr->numRow * (numCompleteSpareRegionsPerDisk * (info->TablesPerSpareRegion/k) + i/k); - info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable; - info->ExtraTablesPerDisk = i % k; - - /* note that in the last spare region, the spare space is complete even though data/parity space is not */ - totSparePUsPerDisk = (numCompleteSpareRegionsPerDisk+1) * (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU); - info->TotSparePUsPerDisk = totSparePUsPerDisk; - - layoutPtr->stripeUnitsPerDisk = - ((complete_FT_count/raidPtr->numRow) * info->FullTableDepthInPUs + /* data & parity space */ - info->ExtraTablesPerDisk * info->TableDepthInPUs + - totSparePUsPerDisk /* spare space */ - ) * layoutPtr->SUsPerPU; - layoutPtr->dataStripeUnitsPerDisk = - (complete_FT_count * info->FullTableDepthInPUs + info->ExtraTablesPerDisk * info->TableDepthInPUs) - * layoutPtr->SUsPerPU * (k-1) / k; - - } else { - /* non-dist spare case: force each disk to contain an integral number of tables */ - layoutPtr->stripeUnitsPerDisk /= (info->TableDepthInPUs * layoutPtr->SUsPerPU); - layoutPtr->stripeUnitsPerDisk *= (info->TableDepthInPUs * layoutPtr->SUsPerPU); - - /* compute the number of tables in the last fulltable, which need not be complete */ - complete_FT_count = - ((layoutPtr->stripeUnitsPerDisk/layoutPtr->SUsPerPU) / info->FullTableDepthInPUs) * raidPtr->numRow; - - info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable; - info->ExtraTablesPerDisk = - ((layoutPtr->stripeUnitsPerDisk/layoutPtr->SUsPerPU) / info->TableDepthInPUs) % k; - } - - raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; - - /* find the disk offset of the stripe unit where the last fulltable starts */ - numCompleteFullTablesPerDisk = complete_FT_count / raidPtr->numRow; - diskOffsetOfLastFullTableInSUs = numCompleteFullTablesPerDisk * info->FullTableDepthInPUs * layoutPtr->SUsPerPU; - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - SpareSpaceInSUs = numCompleteSpareRegionsPerDisk * info->SpareSpaceDepthPerRegionInSUs; - diskOffsetOfLastFullTableInSUs += SpareSpaceInSUs; - info->DiskOffsetOfLastSpareSpaceChunkInSUs = - diskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU; - } - info->DiskOffsetOfLastFullTableInSUs = diskOffsetOfLastFullTableInSUs; - info->numCompleteFullTablesPerDisk = numCompleteFullTablesPerDisk; - - /* 4. create and initialize the lookup tables */ - info->LayoutTable = rf_make_2d_array(b, k, raidPtr->cleanupList); - if (info->LayoutTable == NULL) - return(ENOMEM); - info->OffsetTable = rf_make_2d_array(b, k, raidPtr->cleanupList); - if (info->OffsetTable == NULL) - return(ENOMEM); - info->BlockTable = rf_make_2d_array(info->TableDepthInPUs*layoutPtr->SUsPerPU, raidPtr->numCol, raidPtr->cleanupList); - if (info->BlockTable == NULL) - return(ENOMEM); - - first_avail_slot = (int *) rf_make_1d_array(v, NULL); - if (first_avail_slot == NULL) - return(ENOMEM); - - for (i=0; i<b; i++) - for (j=0; j<k; j++) - info->LayoutTable[i][j] = *cfgBuf++; - - /* initialize offset table */ - for (i=0; i<b; i++) for (j=0; j<k; j++) { - info->OffsetTable[i][j] = first_avail_slot[ info->LayoutTable[i][j] ]; - first_avail_slot[ info->LayoutTable[i][j] ]++; - } - - /* initialize block table */ - for (SUID=l=0; l<layoutPtr->SUsPerPU; l++) { - for (i=0; i<b; i++) { - for (j=0; j<k; j++) { - info->BlockTable[ (info->OffsetTable[i][j] * layoutPtr->SUsPerPU) + l ] - [ info->LayoutTable[i][j] ] = SUID; - } - SUID++; - } - } - - rf_free_1d_array(first_avail_slot, v); - - /* 5. set up the remaining redundant-but-useful parameters */ - - raidPtr->totalSectors = (k*complete_FT_count + raidPtr->numRow*info->ExtraTablesPerDisk) * - info->SUsPerTable * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numStripe = (raidPtr->totalSectors / layoutPtr->sectorsPerStripeUnit) / (k-2); - - /* strange evaluation order below to try and minimize overflow problems */ - - layoutPtr->dataSectorsPerStripe = (k-2) * layoutPtr->sectorsPerStripeUnit; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = k-2; - layoutPtr->numParityCol = 2; - - return(0); + + rf_free_1d_array(first_avail_slot, v); + + /* 5. set up the remaining redundant-but-useful parameters */ + + raidPtr->totalSectors = (k * complete_FT_count + raidPtr->numRow * info->ExtraTablesPerDisk) * + info->SUsPerTable * layoutPtr->sectorsPerStripeUnit; + layoutPtr->numStripe = (raidPtr->totalSectors / layoutPtr->sectorsPerStripeUnit) / (k - 2); + + /* strange evaluation order below to try and minimize overflow + * problems */ + + layoutPtr->dataSectorsPerStripe = (k - 2) * layoutPtr->sectorsPerStripeUnit; + layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; + layoutPtr->numDataCol = k - 2; + layoutPtr->numParityCol = 2; + + return (0); } -int rf_GetDefaultNumFloatingReconBuffersPQ(RF_Raid_t *raidPtr) +int +rf_GetDefaultNumFloatingReconBuffersPQ(RF_Raid_t * raidPtr) { - int def_decl; + int def_decl; - def_decl = rf_GetDefaultNumFloatingReconBuffersDeclustered(raidPtr); - return(RF_MAX(3 * raidPtr->numCol, def_decl)); + def_decl = rf_GetDefaultNumFloatingReconBuffersDeclustered(raidPtr); + return (RF_MAX(3 * raidPtr->numCol, def_decl)); } -void rf_MapSectorDeclusteredPQ( - RF_Raid_t *raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t *row, - RF_RowCol_t *col, - RF_SectorNum_t *diskSector, - int remap) +void +rf_MapSectorDeclusteredPQ( + RF_Raid_t * raidPtr, + RF_RaidAddr_t raidSector, + RF_RowCol_t * row, + RF_RowCol_t * col, + RF_SectorNum_t * diskSector, + int remap) { - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit; - RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset; - RF_StripeNum_t BlockID, BlockOffset, RepIndex; - RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; - RF_StripeNum_t base_suid = 0, outSU, SpareRegion=0, SpareSpace=0; - - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); - - FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array (across rows) */ - *row = FullTableID % raidPtr->numRow; - FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this disk */ - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - SpareRegion = FullTableID / info->FullTablesPerSpareRegion; - SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs; - } - FullTableOffset = SUID % sus_per_fulltable; - TableID = FullTableOffset / info->SUsPerTable; - TableOffset = FullTableOffset - TableID * info->SUsPerTable; - BlockID = TableOffset / info->PUsPerBlock; - BlockOffset = TableOffset - BlockID * info->PUsPerBlock; - BlockID %= info->BlocksPerTable; - RF_ASSERT(BlockOffset < info->groupSize-2 ); - /* - TableIDs go from 0 .. GroupSize-1 inclusive. - PUsPerBlock is k-2. - We want the tableIDs to rotate from the - right, so use GroupSize - */ - RepIndex = info->groupSize - 1 - TableID; - RF_ASSERT(RepIndex >= 0); - if (!raidPtr->noRotate) - { - if (TableID==0) - BlockOffset++; /* P on last drive, Q on first */ - else - BlockOffset += ((BlockOffset >= RepIndex) ? 2 : 0); /* skip over PQ */ - RF_ASSERT(BlockOffset < info->groupSize); - *col = info->LayoutTable[BlockID][BlockOffset]; - } - - /* remap to distributed spare space if indicated */ - if (remap) { - rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU); - } else { - - outSU = base_suid; - outSU += FullTableID * fulltable_depth; /* offs to strt of FT */ - outSU += SpareSpace; /* skip rsvd spare space */ - outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; /* offs to strt of tble */ - outSU += info->OffsetTable[BlockID][BlockOffset] * layoutPtr->SUsPerPU; /* offs to the PU */ - } - outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); /* offs to the SU within a PU */ - - /* convert SUs to sectors, and, if not aligned to SU boundary, add in offset to sector */ - *diskSector = outSU*layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit); + RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); + RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; + RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit; + RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset; + RF_StripeNum_t BlockID, BlockOffset, RepIndex; + RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; + RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; + RF_StripeNum_t base_suid = 0, outSU, SpareRegion = 0, SpareSpace = 0; + + rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); + + FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array + * (across rows) */ + *row = FullTableID % raidPtr->numRow; + FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this + * disk */ + if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { + SpareRegion = FullTableID / info->FullTablesPerSpareRegion; + SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs; + } + FullTableOffset = SUID % sus_per_fulltable; + TableID = FullTableOffset / info->SUsPerTable; + TableOffset = FullTableOffset - TableID * info->SUsPerTable; + BlockID = TableOffset / info->PUsPerBlock; + BlockOffset = TableOffset - BlockID * info->PUsPerBlock; + BlockID %= info->BlocksPerTable; + RF_ASSERT(BlockOffset < info->groupSize - 2); + /* + TableIDs go from 0 .. GroupSize-1 inclusive. + PUsPerBlock is k-2. + We want the tableIDs to rotate from the + right, so use GroupSize + */ + RepIndex = info->groupSize - 1 - TableID; + RF_ASSERT(RepIndex >= 0); + if (!raidPtr->noRotate) { + if (TableID == 0) + BlockOffset++; /* P on last drive, Q on first */ + else + BlockOffset += ((BlockOffset >= RepIndex) ? 2 : 0); /* skip over PQ */ + RF_ASSERT(BlockOffset < info->groupSize); + *col = info->LayoutTable[BlockID][BlockOffset]; + } + /* remap to distributed spare space if indicated */ + if (remap) { + rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU); + } else { + + outSU = base_suid; + outSU += FullTableID * fulltable_depth; /* offs to strt of FT */ + outSU += SpareSpace; /* skip rsvd spare space */ + outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; /* offs to strt of tble */ + outSU += info->OffsetTable[BlockID][BlockOffset] * layoutPtr->SUsPerPU; /* offs to the PU */ + } + outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); /* offs to the SU within + * a PU */ + + /* convert SUs to sectors, and, if not aligned to SU boundary, add in + * offset to sector */ + *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit); } -void rf_MapParityDeclusteredPQ( - RF_Raid_t *raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t *row, - RF_RowCol_t *col, - RF_SectorNum_t *diskSector, - int remap) +void +rf_MapParityDeclusteredPQ( + RF_Raid_t * raidPtr, + RF_RaidAddr_t raidSector, + RF_RowCol_t * row, + RF_RowCol_t * col, + RF_SectorNum_t * diskSector, + int remap) { - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit; - RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset; - RF_StripeNum_t BlockID, BlockOffset, RepIndex; - RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; - RF_StripeNum_t base_suid = 0, outSU, SpareRegion, SpareSpace=0; - - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); - - /* compute row & (possibly) spare space exactly as before */ - FullTableID = SUID / sus_per_fulltable; - *row = FullTableID % raidPtr->numRow; - FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this disk */ - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - SpareRegion = FullTableID / info->FullTablesPerSpareRegion; - SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs; - } - - /* compute BlockID and RepIndex exactly as before */ - FullTableOffset = SUID % sus_per_fulltable; - TableID = FullTableOffset / info->SUsPerTable; - TableOffset = FullTableOffset - TableID * info->SUsPerTable; - BlockID = TableOffset / info->PUsPerBlock; - BlockOffset = TableOffset - BlockID * info->PUsPerBlock; - BlockID %= info->BlocksPerTable; - - /* the parity block is in the position indicated by RepIndex */ - RepIndex = (raidPtr->noRotate) ? info->PUsPerBlock : info->groupSize - 1 - TableID; - *col = info->LayoutTable[BlockID][RepIndex]; - - if (remap) - RF_PANIC(); - - /* compute sector as before, except use RepIndex instead of BlockOffset */ - outSU = base_suid; - outSU += FullTableID * fulltable_depth; - outSU += SpareSpace; /* skip rsvd spare space */ - outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; - outSU += info->OffsetTable[BlockID][RepIndex] * layoutPtr->SUsPerPU; - outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); - - *diskSector = outSU*layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit); + RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); + RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; + RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit; + RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset; + RF_StripeNum_t BlockID, BlockOffset, RepIndex; + RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; + RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; + RF_StripeNum_t base_suid = 0, outSU, SpareRegion, SpareSpace = 0; + + rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); + + /* compute row & (possibly) spare space exactly as before */ + FullTableID = SUID / sus_per_fulltable; + *row = FullTableID % raidPtr->numRow; + FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this + * disk */ + if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { + SpareRegion = FullTableID / info->FullTablesPerSpareRegion; + SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs; + } + /* compute BlockID and RepIndex exactly as before */ + FullTableOffset = SUID % sus_per_fulltable; + TableID = FullTableOffset / info->SUsPerTable; + TableOffset = FullTableOffset - TableID * info->SUsPerTable; + BlockID = TableOffset / info->PUsPerBlock; + BlockOffset = TableOffset - BlockID * info->PUsPerBlock; + BlockID %= info->BlocksPerTable; + + /* the parity block is in the position indicated by RepIndex */ + RepIndex = (raidPtr->noRotate) ? info->PUsPerBlock : info->groupSize - 1 - TableID; + *col = info->LayoutTable[BlockID][RepIndex]; + + if (remap) + RF_PANIC(); + + /* compute sector as before, except use RepIndex instead of + * BlockOffset */ + outSU = base_suid; + outSU += FullTableID * fulltable_depth; + outSU += SpareSpace; /* skip rsvd spare space */ + outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; + outSU += info->OffsetTable[BlockID][RepIndex] * layoutPtr->SUsPerPU; + outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); + + *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit); } -void rf_MapQDeclusteredPQ( - RF_Raid_t *raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t *row, - RF_RowCol_t *col, - RF_SectorNum_t *diskSector, - int remap) +void +rf_MapQDeclusteredPQ( + RF_Raid_t * raidPtr, + RF_RaidAddr_t raidSector, + RF_RowCol_t * row, + RF_RowCol_t * col, + RF_SectorNum_t * diskSector, + int remap) { - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit; - RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset; - RF_StripeNum_t BlockID, BlockOffset, RepIndex, RepIndexQ; - RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; - RF_StripeNum_t base_suid = 0, outSU, SpareRegion, SpareSpace=0; - - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); - - /* compute row & (possibly) spare space exactly as before */ - FullTableID = SUID / sus_per_fulltable; - *row = FullTableID % raidPtr->numRow; - FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this disk */ - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - SpareRegion = FullTableID / info->FullTablesPerSpareRegion; - SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs; - } - - /* compute BlockID and RepIndex exactly as before */ - FullTableOffset = SUID % sus_per_fulltable; - TableID = FullTableOffset / info->SUsPerTable; - TableOffset = FullTableOffset - TableID * info->SUsPerTable; - BlockID = TableOffset / info->PUsPerBlock; - BlockOffset = TableOffset - BlockID * info->PUsPerBlock; - BlockID %= info->BlocksPerTable; - - /* the q block is in the position indicated by RepIndex */ - RepIndex = (raidPtr->noRotate) ? info->PUsPerBlock : info->groupSize - 1 - TableID; - RepIndexQ = ((RepIndex == (info->groupSize-1)) ? 0 : RepIndex+1); - *col = info->LayoutTable[BlockID][RepIndexQ]; - - if (remap) - RF_PANIC(); - - /* compute sector as before, except use RepIndex instead of BlockOffset */ - outSU = base_suid; - outSU += FullTableID * fulltable_depth; - outSU += SpareSpace; /* skip rsvd spare space */ - outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; - outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); - - outSU += info->OffsetTable[BlockID][RepIndexQ] * layoutPtr->SUsPerPU; - *diskSector = outSU*layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit); + RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); + RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; + RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit; + RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset; + RF_StripeNum_t BlockID, BlockOffset, RepIndex, RepIndexQ; + RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; + RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; + RF_StripeNum_t base_suid = 0, outSU, SpareRegion, SpareSpace = 0; + + rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); + + /* compute row & (possibly) spare space exactly as before */ + FullTableID = SUID / sus_per_fulltable; + *row = FullTableID % raidPtr->numRow; + FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this + * disk */ + if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { + SpareRegion = FullTableID / info->FullTablesPerSpareRegion; + SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs; + } + /* compute BlockID and RepIndex exactly as before */ + FullTableOffset = SUID % sus_per_fulltable; + TableID = FullTableOffset / info->SUsPerTable; + TableOffset = FullTableOffset - TableID * info->SUsPerTable; + BlockID = TableOffset / info->PUsPerBlock; + BlockOffset = TableOffset - BlockID * info->PUsPerBlock; + BlockID %= info->BlocksPerTable; + + /* the q block is in the position indicated by RepIndex */ + RepIndex = (raidPtr->noRotate) ? info->PUsPerBlock : info->groupSize - 1 - TableID; + RepIndexQ = ((RepIndex == (info->groupSize - 1)) ? 0 : RepIndex + 1); + *col = info->LayoutTable[BlockID][RepIndexQ]; + + if (remap) + RF_PANIC(); + + /* compute sector as before, except use RepIndex instead of + * BlockOffset */ + outSU = base_suid; + outSU += FullTableID * fulltable_depth; + outSU += SpareSpace; /* skip rsvd spare space */ + outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; + outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); + + outSU += info->OffsetTable[BlockID][RepIndexQ] * layoutPtr->SUsPerPU; + *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit); } - /* returns an array of ints identifying the disks that comprise the stripe containing the indicated address. * the caller must _never_ attempt to modify this array. */ -void rf_IdentifyStripeDeclusteredPQ( - RF_Raid_t *raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t **diskids, - RF_RowCol_t *outRow) +void +rf_IdentifyStripeDeclusteredPQ( + RF_Raid_t * raidPtr, + RF_RaidAddr_t addr, + RF_RowCol_t ** diskids, + RF_RowCol_t * outRow) { - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; - RF_StripeNum_t base_suid = 0; - RF_StripeNum_t SUID = rf_RaidAddressToStripeUnitID(layoutPtr, addr); - RF_StripeNum_t stripeID, FullTableID; - int tableOffset; - - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); - FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array (across rows) */ - *outRow = FullTableID % raidPtr->numRow; - stripeID = rf_StripeUnitIDToStripeID(layoutPtr, SUID); /* find stripe offset into array */ - tableOffset = (stripeID % info->BlocksPerTable); /* find offset into block design table */ - *diskids = info->LayoutTable[tableOffset]; + RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); + RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; + RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; + RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; + RF_StripeNum_t base_suid = 0; + RF_StripeNum_t SUID = rf_RaidAddressToStripeUnitID(layoutPtr, addr); + RF_StripeNum_t stripeID, FullTableID; + int tableOffset; + + rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); + FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array + * (across rows) */ + *outRow = FullTableID % raidPtr->numRow; + stripeID = rf_StripeUnitIDToStripeID(layoutPtr, SUID); /* find stripe offset + * into array */ + tableOffset = (stripeID % info->BlocksPerTable); /* find offset into + * block design table */ + *diskids = info->LayoutTable[tableOffset]; } diff --git a/sys/dev/raidframe/rf_declusterPQ.h b/sys/dev/raidframe/rf_declusterPQ.h index 2ef5d4c220e..f83ec3d7d5d 100644 --- a/sys/dev/raidframe/rf_declusterPQ.h +++ b/sys/dev/raidframe/rf_declusterPQ.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_declusterPQ.h,v 1.1 1999/01/11 14:29:14 niklas Exp $ */ -/* $NetBSD: rf_declusterPQ.h,v 1.1 1998/11/13 04:20:28 oster Exp $ */ +/* $OpenBSD: rf_declusterPQ.h,v 1.2 1999/02/16 00:02:35 niklas Exp $ */ +/* $NetBSD: rf_declusterPQ.h,v 1.3 1999/02/05 00:06:09 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,74 +27,26 @@ * rights to redistribute these changes. */ -/* : - * Log: rf_declusterPQ.h,v - * Revision 1.13 1996/08/20 22:42:08 jimz - * missing prototype of IdentifyStripeDeclusteredPQ added - * - * Revision 1.12 1996/07/13 00:00:59 jimz - * sanitized generalized reconstruction architecture - * cleaned up head sep, rbuf problems - * - * Revision 1.11 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.10 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.9 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.8 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.7 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.6 1995/12/01 15:59:20 root - * added copyright info - * - * Revision 1.5 1995/11/17 19:08:23 wvcii - * added prototyping to MapParity - * - * Revision 1.4 1995/11/07 15:30:33 wvcii - * changed PQDagSelect prototype - * function no longer generates numHdrSucc, numTermAnt - * removed ParityLoggingDagSelect prototype - * - * Revision 1.3 1995/06/23 13:40:57 robby - * updeated to prototypes in rf_layout.h - * - * Revision 1.2 1995/05/02 22:46:53 holland - * minor code cleanups. - * - * Revision 1.1 1994/11/19 20:26:57 danner - * Initial revision - * - */ - #ifndef _RF__RF_DECLUSTERPQ_H_ #define _RF__RF_DECLUSTERPQ_H_ #include "rf_types.h" -int rf_ConfigureDeclusteredPQ(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr); -int rf_GetDefaultNumFloatingReconBuffersPQ(RF_Raid_t *raidPtr); -void rf_MapSectorDeclusteredPQ(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap); -void rf_MapParityDeclusteredPQ(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap); -void rf_MapQDeclusteredPQ(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap); -void rf_IdentifyStripeDeclusteredPQ(RF_Raid_t *raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t **diskids, RF_RowCol_t *outRow); +int +rf_ConfigureDeclusteredPQ(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr); +int rf_GetDefaultNumFloatingReconBuffersPQ(RF_Raid_t * raidPtr); +void +rf_MapSectorDeclusteredPQ(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); +void +rf_MapParityDeclusteredPQ(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); +void +rf_MapQDeclusteredPQ(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); +void +rf_IdentifyStripeDeclusteredPQ(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, + RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -#endif /* !_RF__RF_DECLUSTERPQ_H_ */ +#endif /* !_RF__RF_DECLUSTERPQ_H_ */ diff --git a/sys/dev/raidframe/rf_demo.c b/sys/dev/raidframe/rf_demo.c deleted file mode 100644 index 91212482c37..00000000000 --- a/sys/dev/raidframe/rf_demo.c +++ /dev/null @@ -1,506 +0,0 @@ -/* $OpenBSD: rf_demo.c,v 1.1 1999/01/11 14:29:15 niklas Exp $ */ -/* $NetBSD: rf_demo.c,v 1.1 1998/11/13 04:20:28 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, Khalil Amiri - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/********************************************************************************** - * - * rf_demo.c -- code for supporting demos. this is not actually part of the driver. - * - **********************************************************************************/ - -/* : - * Log: rf_demo.c,v - * Revision 1.24 1996/06/17 14:38:33 jimz - * properly #if out RF_DEMO code - * fix bug in MakeConfig that was causing weird behavior - * in configuration routines (config was not zeroed at start) - * clean up genplot handling of stacks - * - * Revision 1.23 1996/06/17 03:23:09 jimz - * explicitly do pthread stuff (for join) - * NOTE: this should be changed! - * - * Revision 1.22 1996/06/14 23:15:38 jimz - * attempt to deal with thread GC problem - * - * Revision 1.21 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.20 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.19 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.18 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.17 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.16 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.15 1996/05/20 16:14:08 jimz - * switch to rf_{mutex,cond}_{init,destroy} - * - * Revision 1.14 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.13 1995/12/01 15:56:07 root - * added copyright info - * - */ - -#include "rf_archs.h" - -#if RF_DEMO > 0 - -#include <stdio.h> -#include <sys/types.h> -#include <sys/socket.h> -#include <strings.h> -#include <unistd.h> -#include <sys/time.h> -#include <signal.h> - -#include "rf_threadstuff.h" -#include "rf_demo.h" -#include "rf_utils.h" -#include "rf_general.h" -#include "rf_options.h" - -#ifdef SIMULATE -#include "rf_diskevent.h" -#endif /* SIMULATE */ - -static int doMax = 0; /* currently no way to set this */ - -/**************************************************************************************** - * fault-free demo code - ***************************************************************************************/ - -static int user_iops_meter = -1; -static int disk_iops_meter = -1; -static int max_user_meter = -1; -static int max_disk_meter = -1; -static int recon_pctg_meter = -1; -static int avg_resp_time_meter = -1; -static int recon_time_meter = -1; -static int ff_avg_resp_time_meter = -1; -static int deg_avg_resp_time_meter = -1; -static int recon_avg_resp_time_meter = -1; -static int user_ios_ff=0; -static int user_ios_deg=0; -static int user_ios_recon=0; -static long user_resp_time_sum_ff = 0; -static long user_resp_time_sum_deg = 0; -static long user_resp_time_sum_recon = 0; - -int rf_demo_op_mode = 0; - -RF_DECLARE_STATIC_MUTEX(iops_mutex) -static int user_ios_so_far, disk_ios_so_far, max_user, max_disk; -static long user_resp_time_sum_ms; -static int recon_pctg; -static struct timeval iops_starttime; -#ifndef SIMULATE -static RF_Thread_t update_thread_desc; -#endif /* !SIMULATE */ -static int meter_update_terminate; - -static int meter_update_interval = 2; /* seconds between meter updates */ -static int iops_initialized = 0, recon_initialized = 0; - -static char *demoMeterTags[] = {"FF", "Degr", "Recon"}; - -static int vpos=0; - -static int rf_CreateMeter(char *title, char *geom, char *color); -static void rf_UpdateMeter(int meterid, int value); -static void rf_DestroyMeter(int meterid, int killproc); - -void rf_startup_iops_demo(meter_vpos, C, G) - int meter_vpos; - int C; - int G; -{ - char buf[100], title[100]; - int rc; - - vpos = meter_vpos; - sprintf(buf, "%dx%d-0+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE)); - sprintf(title,"%s %d/%d User IOs/sec",demoMeterTags[rf_demoMeterTag],C,G); - user_iops_meter = rf_CreateMeter(title, buf, "black"); - sprintf(buf, "%dx%d-%d+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, RF_DEMO_METER_WIDTH+RF_DEMO_METER_SPACING,vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE)); - sprintf(title,"%s %d/%d Disk IOs/sec",demoMeterTags[rf_demoMeterTag],C,G); - disk_iops_meter = rf_CreateMeter(title, buf, "red"); - if (doMax) { - sprintf(buf, "%dx%d-%d+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, 2*(RF_DEMO_METER_WIDTH+RF_DEMO_METER_SPACING),vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE)); - sprintf(title,"%s %d/%d Avg User IOs/s",demoMeterTags[rf_demoMeterTag],C,G); - max_user_meter = rf_CreateMeter(title, buf, "black"); - sprintf(buf, "%dx%d-%d+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, 3*(RF_DEMO_METER_WIDTH+RF_DEMO_METER_SPACING), vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE)); - sprintf(title,"%s %d/%d Avg Disk IOs/s",demoMeterTags[rf_demoMeterTag],C,G); - max_disk_meter = rf_CreateMeter(title, buf, "red"); - sprintf(buf, "%dx%d-%d+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, 4*(RF_DEMO_METER_WIDTH+RF_DEMO_METER_SPACING), vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE)); - } else { - sprintf(buf, "%dx%d-%d+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, 2*(RF_DEMO_METER_WIDTH+RF_DEMO_METER_SPACING), vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE)); - } - sprintf(title,"%s %d/%d Avg User Resp Time (ms)",demoMeterTags[rf_demoMeterTag],C,G); - avg_resp_time_meter = rf_CreateMeter(title, buf, "blue"); - rc = rf_mutex_init(&iops_mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return; - } - user_ios_so_far = disk_ios_so_far = max_user = max_disk = 0; - user_resp_time_sum_ms = 0; - - meter_update_terminate = 0; -#ifndef SIMULATE - pthread_create(&update_thread_desc, raidframe_attr_default, (pthread_startroutine_t)rf_meter_update_thread, NULL); -#endif /* !SIMULATE */ - gettimeofday(&iops_starttime, NULL); - iops_initialized = 1; -} - - -void rf_update_user_stats(resptime) - int resptime; -{ - if (!iops_initialized && !recon_initialized) return; - RF_LOCK_MUTEX(iops_mutex); - user_ios_so_far++; - user_resp_time_sum_ms += resptime; - RF_UNLOCK_MUTEX(iops_mutex); -} - -void rf_update_disk_iops(val) - int val; -{ - if (!iops_initialized) return; - RF_LOCK_MUTEX(iops_mutex); - disk_ios_so_far += val; - RF_UNLOCK_MUTEX(iops_mutex); -} - -void rf_meter_update_thread() -{ - struct timeval now, diff; - int iops, resptime; - float secs; - -#ifndef SIMULATE - while (!meter_update_terminate) { - gettimeofday(&now, NULL); - RF_TIMEVAL_DIFF(&iops_starttime, &now, &diff); - secs = ((float) diff.tv_sec) + ((float) diff.tv_usec)/1000000.0; -#else /* !SIMULATE */ - secs = rf_cur_time; -#endif /* !SIMULATE */ - if (user_iops_meter >= 0) { - iops = (secs!=0.0) ? (int) (((float) user_ios_so_far) / secs) : 0; - rf_UpdateMeter(user_iops_meter, iops); - if (max_user_meter && iops > max_user) {max_user = iops; rf_UpdateMeter(max_user_meter, iops);} - } - - if (disk_iops_meter >= 0) { - iops = (secs!=0.0) ? (int) (((float) disk_ios_so_far) / secs) : 0; - rf_UpdateMeter(disk_iops_meter, iops); - if (max_disk_meter && iops > max_disk) {max_disk = iops; rf_UpdateMeter(max_disk_meter, iops);} - } - - if (recon_pctg_meter >= 0) { - rf_UpdateMeter(recon_pctg_meter, recon_pctg); - } - - switch (rf_demo_op_mode){ - case RF_DEMO_FAULT_FREE: - resptime = (user_ios_so_far != 0) ? user_resp_time_sum_ms / user_ios_so_far : 0; - if (resptime && (ff_avg_resp_time_meter >=0)) - rf_UpdateMeter(ff_avg_resp_time_meter, resptime); - user_ios_ff += user_ios_so_far; - user_resp_time_sum_ff += user_resp_time_sum_ms; - break; - case RF_DEMO_DEGRADED: - resptime = (user_ios_so_far != 0) ? user_resp_time_sum_ms / user_ios_so_far : 0; - if (resptime &&(deg_avg_resp_time_meter >=0)) - rf_UpdateMeter(deg_avg_resp_time_meter, resptime); - user_ios_deg += user_ios_so_far; - user_resp_time_sum_deg += user_resp_time_sum_ms; - case RF_DEMO_RECON: - resptime = (user_ios_so_far != 0) ? user_resp_time_sum_ms / user_ios_so_far : 0; - if (resptime && (recon_avg_resp_time_meter >= 0)) - rf_UpdateMeter(recon_avg_resp_time_meter, resptime); - user_ios_recon += user_ios_so_far; - user_resp_time_sum_recon += user_resp_time_sum_ms; - break; - default: printf("WARNING: demo meter update thread: Invalid op mode! \n"); - } - user_ios_so_far = 0; - user_resp_time_sum_ms = 0; -#ifndef SIMULATE - RF_DELAY_THREAD(1,0); - } -#endif /* !SIMULATE */ -} - -void rf_finish_iops_demo() -{ - long status; - - if (!iops_initialized) return; - iops_initialized = 0; /* make sure any subsequent update calls don't do anything */ - meter_update_terminate = 1; -#ifndef SIMULATE - pthread_join(update_thread_desc, (pthread_addr_t)&status); -#endif /* !SIMULATE */ - - rf_DestroyMeter(user_iops_meter, (doMax) ? 1 : 0); - rf_DestroyMeter(disk_iops_meter, (doMax) ? 1 : 0); - rf_DestroyMeter(max_user_meter, 0); - rf_DestroyMeter(max_disk_meter, 0); - rf_DestroyMeter(avg_resp_time_meter, 0); - rf_mutex_destroy(&iops_mutex); -} - -void rf_demo_update_mode(arg_mode) - int arg_mode; -{ - int hpos; - char buf[100], title[100]; - - switch (rf_demo_op_mode = arg_mode) { - case RF_DEMO_DEGRADED: - - /* freeze fault-free response time meter; create degraded mode meter */ - hpos=rf_demoMeterHpos+2; - sprintf(buf, "%dx%d-%d+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, hpos * (RF_DEMO_METER_WIDTH+RF_DEMO_METER_SPACING), vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE)); - sprintf(title,"Degraded Mode Average Response Time (ms)",demoMeterTags[rf_demoMeterTag]); - deg_avg_resp_time_meter = rf_CreateMeter(title, buf, "purple"); - rf_UpdateMeter(ff_avg_resp_time_meter, (user_ios_ff == 0)? 0: user_resp_time_sum_ff/user_ios_ff); - break; - - case RF_DEMO_RECON: - - /* freeze degraded mode response time meter; create recon meters */ - hpos = rf_demoMeterHpos+1; - sprintf(buf, "%dx%d-%d+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, hpos * (RF_DEMO_METER_WIDTH+RF_DEMO_METER_SPACING), vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE)); - sprintf(title,"Reconstruction Average Response Time (ms)",demoMeterTags[rf_demoMeterTag]); - recon_avg_resp_time_meter = rf_CreateMeter(title, buf, "darkgreen"); - sprintf(buf, "%dx%d-%d+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, (rf_demoMeterHpos) * (RF_DEMO_METER_WIDTH + RF_DEMO_METER_SPACING), vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE)); - sprintf(title,"Percent Complete / Recon Time"); - recon_pctg_meter = rf_CreateMeter(title,buf,"red"); - rf_UpdateMeter(deg_avg_resp_time_meter, (user_ios_deg == 0)? 0: user_resp_time_sum_deg/user_ios_deg); - break; - - default: /*do nothing -- finish_recon_demo will update rest of meters */; - } - -} - - -/**************************************************************************************** - * reconstruction demo code - ***************************************************************************************/ - - -void rf_startup_recon_demo(meter_vpos, C, G, init) - int meter_vpos; - int C; - int G; - int init; -{ - char buf[100], title[100]; - int rc; - - vpos = meter_vpos; - if (init) { - /* init demo -- display ff resp time meter */ - sprintf(buf, "%dx%d-%d+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, (rf_demoMeterHpos+3) * (RF_DEMO_METER_WIDTH+RF_DEMO_METER_SPACING), vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE)); - sprintf(title,"%s %d/%d Fault-Free Avg User Resp Time (ms)",demoMeterTags[rf_demoMeterTag],C,G); - ff_avg_resp_time_meter = rf_CreateMeter(title, buf, "blue"); - } - rc = rf_mutex_init(&iops_mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - } - - meter_update_terminate = 0; -#ifndef SIMULATE - pthread_create(&update_thread_desc, raidframe_attr_default, (pthread_startroutine_t)rf_meter_update_thread, NULL); -#endif /* !SIMULATE */ - gettimeofday(&iops_starttime, NULL); - recon_initialized = 1; -} - -void rf_update_recon_meter(val) - int val; -{ - recon_pctg = val; -} - - -void rf_finish_recon_demo(etime) - struct timeval *etime; -{ - long status; - int hpos; - - hpos = rf_demoMeterHpos; - - recon_initialized = 0; /* make sure any subsequent - update calls don't do anything */ - recon_pctg = etime->tv_sec; /* display recon time on meter */ - - rf_UpdateMeter(recon_avg_resp_time_meter, (user_ios_recon == 0)? 0: user_resp_time_sum_recon/user_ios_recon); - - rf_UpdateMeter(recon_pctg_meter, etime->tv_sec); - - meter_update_terminate = 1; - -#ifndef SIMULATE - pthread_join(update_thread_desc, (pthread_addr_t)&status); /* join the meter update thread */ -#endif /* !SIMULATE */ - rf_DestroyMeter(recon_pctg_meter, 0); - rf_DestroyMeter(ff_avg_resp_time_meter, 0); - rf_DestroyMeter(deg_avg_resp_time_meter, 0); - rf_DestroyMeter(recon_avg_resp_time_meter, 0); - rf_mutex_destroy(&iops_mutex); -} - - -/**************************************************************************************** - * meter manipulation code - ***************************************************************************************/ - -#define MAXMETERS 50 -static struct meter_info { int sd; int pid; char name[100]; } minfo[MAXMETERS]; -static int meter_num = 0; - -int rf_ConfigureMeters() -{ - int i; - for (i=0; i<MAXMETERS; i++) - minfo[i].sd = -1; - return(0); -} - -/* forks a dmeter process to create a 4-digit meter window - * "title" appears in the title bar of the meter window - * returns an integer handle (really a socket descriptor) by which - * the new meter can be accessed. - */ -static int rf_CreateMeter(title, geom, color) - char *title; - char *geom; - char *color; -{ - char geombuf[100], *clr; - int sd, pid, i, status; - struct sockaddr sa; - - if (!geom) sprintf(geombuf,"120x40-0+%d", 50*meter_num); else sprintf(geombuf, "%s", geom); - clr = (color) ? color : "black"; - sprintf(minfo[meter_num].name,"/tmp/xm_%d",meter_num); - unlink(minfo[meter_num].name); - - if ( !(pid = fork()) ) { - execlp("dmeter","dmeter","-noscroll","-t",title,"-geometry",geombuf,"-sa",minfo[meter_num].name,"-fg",clr,NULL); - perror("rf_CreateMeter: exec failed"); - return(-1); - } - - sd = socket(AF_UNIX,SOCK_STREAM,0); - sa.sa_family = AF_UNIX; - strcpy(sa.sa_data, minfo[meter_num].name); - for (i=0; i<50; i++) { /* this give us 25 seconds to get the meter running */ - if ( (status = connect(sd,&sa,sizeof(sa))) != -1) break; -#ifdef SIMULATE - sleep (1); -#else /* SIMULATE */ - RF_DELAY_THREAD(0, 500); -#endif /* SIMULATE */ - } - if (status == -1) { - perror("Unable to connect to meter"); - exit(1); - } - minfo[meter_num].sd = sd; - minfo[meter_num].pid = pid; - return(meter_num++); -} - -/* causes the meter to display the given value */ -void rf_UpdateMeter(meterid, value) - int meterid; - int value; -{ - if (write(minfo[meterid].sd, &value, sizeof(int)) < sizeof(int)) { - fprintf(stderr,"Unable to write to meter %d\n",meterid); - } -} - -void rf_DestroyMeter(meterid, killproc) - int meterid; - int killproc; -{ - close(minfo[meterid].sd); - if (killproc) kill(minfo[meterid].pid, SIGTERM); - minfo[meterid].sd = -1; -} - -int rf_ShutdownAllMeters() -{ - int i; - - for (i=0; i<MAXMETERS; i++) - if (minfo[i].sd >= 0) - rf_DestroyMeter(i, 0); - return(0); -} - -#endif /* RF_DEMO > 0 */ diff --git a/sys/dev/raidframe/rf_demo.h b/sys/dev/raidframe/rf_demo.h deleted file mode 100644 index 90a20935d57..00000000000 --- a/sys/dev/raidframe/rf_demo.h +++ /dev/null @@ -1,83 +0,0 @@ -/* $OpenBSD: rf_demo.h,v 1.1 1999/01/11 14:29:15 niklas Exp $ */ -/* $NetBSD: rf_demo.h,v 1.1 1998/11/13 04:20:28 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland, Khalil Amiri - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_demo.h - * some constants for demo'ing software - */ - -/* : - * Log: rf_demo.h,v - * Revision 1.8 1996/06/14 23:15:38 jimz - * attempt to deal with thread GC problem - * - * Revision 1.7 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.6 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.5 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.4 1995/12/01 15:58:53 root - * added copyright info - * - */ - -#ifndef _RF__RF_DEMO_H_ -#define _RF__RF_DEMO_H_ - -#include "rf_types.h" - -#define RF_DEMO_METER_WIDTH 300 /* how wide each meter is */ -#define RF_DEMO_METER_HEIGHT 150 /* how tall */ -#define RF_DEMO_METER_SPACING 15 /* how much space between horizontally */ -#define RF_DEMO_METER_VSPACE 20 /* how much space between vertically */ -#define RF_DEMO_FAULT_FREE 0 -#define RF_DEMO_DEGRADED 1 -#define RF_DEMO_RECON 2 - -void rf_startup_iops_demo(int meter_vpos, int C, int G); -void rf_update_user_stats(int resptime); -void rf_update_disk_iops(int val); -void rf_meter_update_thread(void); -void rf_finish_iops_demo(void); -void rf_demo_update_mode(int arg_mode); -void rf_startup_recon_demo(int meter_vpos, int C, int G, int init); -void rf_update_recon_meter(int val); -void rf_finish_recon_demo(struct timeval *etime); - -extern int rf_demo_op_mode; - -#endif /* !_RF__RF_DEMO_H_ */ diff --git a/sys/dev/raidframe/rf_desc.h b/sys/dev/raidframe/rf_desc.h index a1a8e4f3684..b76466f298a 100644 --- a/sys/dev/raidframe/rf_desc.h +++ b/sys/dev/raidframe/rf_desc.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_desc.h,v 1.1 1999/01/11 14:29:15 niklas Exp $ */ -/* $NetBSD: rf_desc.h,v 1.1 1998/11/13 04:20:28 oster Exp $ */ +/* $OpenBSD: rf_desc.h,v 1.2 1999/02/16 00:02:38 niklas Exp $ */ +/* $NetBSD: rf_desc.h,v 1.4 1999/02/05 00:06:09 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,78 +27,6 @@ * rights to redistribute these changes. */ -/* - * : - * Log: rf_desc.h,v - * Revision 1.29 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.28 1996/06/07 22:49:22 jimz - * fix up raidPtr typing - * - * Revision 1.27 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.26 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.25 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.24 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.23 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.22 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.21 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.20 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.19 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.18 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.17 1995/12/01 15:58:43 root - * added copyright info - * - * Revision 1.16 1995/11/19 16:31:30 wvcii - * descriptors now contain an array of dag lists as opposed to a dag header - * - * Revision 1.15 1995/11/07 16:24:17 wvcii - * updated def of _AccessState - * - */ - #ifndef _RF__RF_DESC_H_ #define _RF__RF_DESC_H_ @@ -108,74 +36,80 @@ #include "rf_dag.h" struct RF_RaidReconDesc_s { - RF_Raid_t *raidPtr; /* raid device descriptor */ - RF_RowCol_t row; /* row of failed disk */ - RF_RowCol_t col; /* col of failed disk */ - int state; /* how far along the reconstruction operation has gotten */ - RF_RaidDisk_t *spareDiskPtr; /* describes target disk for recon (not used in dist sparing) */ - int numDisksDone; /* the number of surviving disks that have completed their work */ - RF_RowCol_t srow; /* row ID of the spare disk (not used in dist sparing) */ - RF_RowCol_t scol; /* col ID of the spare disk (not used in dist sparing) */ -#ifdef KERNEL - /* - * Prevent recon from hogging CPU - */ - RF_Etimer_t recon_exec_timer; - RF_uint64 reconExecTimerRunning; - RF_uint64 reconExecTicks; - RF_uint64 maxReconExecTicks; -#endif /* KERNEL */ + RF_Raid_t *raidPtr; /* raid device descriptor */ + RF_RowCol_t row; /* row of failed disk */ + RF_RowCol_t col; /* col of failed disk */ + int state; /* how far along the reconstruction operation + * has gotten */ + RF_RaidDisk_t *spareDiskPtr; /* describes target disk for recon + * (not used in dist sparing) */ + int numDisksDone; /* the number of surviving disks that have + * completed their work */ + RF_RowCol_t srow; /* row ID of the spare disk (not used in dist + * sparing) */ + RF_RowCol_t scol; /* col ID of the spare disk (not used in dist + * sparing) */ + /* + * Prevent recon from hogging CPU + */ + RF_Etimer_t recon_exec_timer; + RF_uint64 reconExecTimerRunning; + RF_uint64 reconExecTicks; + RF_uint64 maxReconExecTicks; #if RF_RECON_STATS > 0 - RF_uint64 hsStallCount; /* head sep stall count */ - RF_uint64 numReconExecDelays; - RF_uint64 numReconEventWaits; -#endif /* RF_RECON_STATS > 0 */ - RF_RaidReconDesc_t *next; + RF_uint64 hsStallCount; /* head sep stall count */ + RF_uint64 numReconExecDelays; + RF_uint64 numReconEventWaits; +#endif /* RF_RECON_STATS > 0 */ + RF_RaidReconDesc_t *next; }; struct RF_RaidAccessDesc_s { - RF_Raid_t *raidPtr; /* raid device descriptor */ - RF_IoType_t type; /* read or write */ - RF_RaidAddr_t raidAddress; /* starting address in raid address space */ - RF_SectorCount_t numBlocks; /* number of blocks (sectors) to transfer */ - RF_StripeCount_t numStripes; /* number of stripes involved in access */ - caddr_t bufPtr; /* pointer to data buffer */ - -#if !defined(KERNEL) && !defined(SIMULATE) - caddr_t obufPtr; /* real pointer to data buffer */ -#endif /* !KERNEL && !SIMULATE */ - - RF_RaidAccessFlags_t flags; /* flags controlling operation */ - int state; /* index into states telling how far along the RAID operation has gotten */ - RF_AccessState_t *states; /* array of states to be run */ - int status; /* pass/fail status of the last operation */ - RF_DagList_t *dagArray; /* array of dag lists, one list per stripe */ - RF_AccessStripeMapHeader_t *asmap; /* the asm for this I/O */ - void *bp; /* buf pointer for this RAID acc. ignored outside the kernel */ - RF_DagHeader_t **paramDAG; /* allows the DAG to be returned to the caller after I/O completion */ - RF_AccessStripeMapHeader_t **paramASM; /* allows the ASM to be returned to the caller after I/O completion */ - RF_AccTraceEntry_t tracerec; /* perf monitoring information for a user access (not for dag stats) */ - void (*callbackFunc)(RF_CBParam_t); /* callback function for this I/O */ - void *callbackArg; /* arg to give to callback func */ - int tid; /* debug only, user-level only: thread id of thr that did this access */ + RF_Raid_t *raidPtr; /* raid device descriptor */ + RF_IoType_t type; /* read or write */ + RF_RaidAddr_t raidAddress; /* starting address in raid address + * space */ + RF_SectorCount_t numBlocks; /* number of blocks (sectors) to + * transfer */ + RF_StripeCount_t numStripes; /* number of stripes involved in + * access */ + caddr_t bufPtr; /* pointer to data buffer */ + RF_RaidAccessFlags_t flags; /* flags controlling operation */ + int state; /* index into states telling how far along the + * RAID operation has gotten */ + RF_AccessState_t *states; /* array of states to be run */ + int status; /* pass/fail status of the last operation */ + RF_DagList_t *dagArray; /* array of dag lists, one list per stripe */ + RF_AccessStripeMapHeader_t *asmap; /* the asm for this I/O */ + void *bp; /* buf pointer for this RAID acc. ignored + * outside the kernel */ + RF_DagHeader_t **paramDAG; /* allows the DAG to be returned to + * the caller after I/O completion */ + RF_AccessStripeMapHeader_t **paramASM; /* allows the ASM to be + * returned to the caller + * after I/O completion */ + RF_AccTraceEntry_t tracerec; /* perf monitoring information for a + * user access (not for dag stats) */ + void (*callbackFunc) (RF_CBParam_t); /* callback function for this + * I/O */ + void *callbackArg; /* arg to give to callback func */ + int tid; /* debug only, user-level only: thread id of + * thr that did this access */ - RF_AllocListElem_t *cleanupList; /* memory to be freed at the end of the access*/ + RF_AllocListElem_t *cleanupList; /* memory to be freed at the + * end of the access */ - RF_RaidAccessDesc_t *next; - RF_RaidAccessDesc_t *head; + RF_RaidAccessDesc_t *next; + RF_RaidAccessDesc_t *head; - int numPending; + int numPending; - RF_DECLARE_MUTEX(mutex) /* these are used to implement blocking I/O */ - RF_DECLARE_COND(cond) + RF_DECLARE_MUTEX(mutex) /* these are used to implement + * blocking I/O */ + RF_DECLARE_COND(cond) + int async_flag; -#ifdef SIMULATE - RF_Owner_t owner; - int async_flag; -#endif /* SIMULATE */ - - RF_Etimer_t timer; /* used for timing this access */ + RF_Etimer_t timer; /* used for timing this access */ }; - -#endif /* !_RF__RF_DESC_H_ */ +#endif /* !_RF__RF_DESC_H_ */ diff --git a/sys/dev/raidframe/rf_diskevent.c b/sys/dev/raidframe/rf_diskevent.c deleted file mode 100644 index 927f9ef0e29..00000000000 --- a/sys/dev/raidframe/rf_diskevent.c +++ /dev/null @@ -1,291 +0,0 @@ -/* $OpenBSD: rf_diskevent.c,v 1.1 1999/01/11 14:29:16 niklas Exp $ */ -/* $NetBSD: rf_diskevent.c,v 1.1 1998/11/13 04:20:28 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Rachad Youssef - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * rf_diskevent. - support for disk device, by managing a heap of future events - * adapted from original code by David Kotz, Song Bac Toh (1994) - */ - -/* : - * Log: rf_diskevent.c,v - * Revision 1.18 1996/07/28 20:31:39 jimz - * i386netbsd port - * true/false fixup - * - * Revision 1.17 1996/07/27 16:05:19 jimz - * return ENOMEM if DDEventInit fails its call to InitHeap - * - * Revision 1.16 1996/06/10 12:06:24 jimz - * fix spelling errors - * - * Revision 1.15 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.14 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.13 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.12 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.11 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.10 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.9 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.8 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.7 1995/12/01 15:57:56 root - * added copyright info - * - */ - -#include "rf_types.h" -#include "rf_heap.h" -#include "rf_diskevent.h" -#include "rf_general.h" -#include "rf_dag.h" -#include "rf_diskthreads.h" -#include "rf_states.h" -#include "rf_shutdown.h" - -/* trace printing can be turned on/off in the Makefile */ - -RF_TICS_t rf_cur_time; -static RF_Owner_t cur_owner; -static RF_Heap_t heap; - -static void rf_DDEventShutdown(ignored) - void *ignored; -{ - rf_FreeHeap(heap); -} - -/* ======================================================================== */ -/* DDEventInit - * - * Initialize the event heap. - */ -int rf_DDEventInit(listp) - RF_ShutdownList_t **listp; -{ - int rc; - - heap = rf_InitHeap(RF_HEAP_MAX); /* initialize the heap */ - if (heap == NULL) - return(ENOMEM); - rc = rf_ShutdownCreate(listp, rf_DDEventShutdown, NULL); - if (rc) { - RF_ERRORMSG3("RAIDFRAME: failed creating shutdown event file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - rf_FreeHeap(heap); - return(rc); - } - rf_cur_time=(RF_TICS_t)0; - return(0); -} - - - -/* DDEventRequest - * - * Put an event request into the event heap. - */ -void rf_DDEventRequest( - RF_TICS_t eventTime, - int (*CompleteFunc)(), - void *argument, - RF_Owner_t owner, - RF_RowCol_t row, - RF_RowCol_t col, - RF_Raid_t *raidPtr, - void *diskid) -{ - RF_HeapData_t *hpdat; - - RF_Malloc(hpdat,sizeof(RF_HeapData_t),(RF_HeapData_t *) ); - if (hpdat == NULL) { - fprintf(stderr, "DDEventRequest: malloc failed\n"); - RF_PANIC(); - } - - hpdat->eventTime = eventTime; - hpdat->CompleteFunc = CompleteFunc; - hpdat->argument = argument; - hpdat->owner = owner; - hpdat->row = row; - hpdat->col = col; - hpdat->raidPtr = raidPtr; - hpdat->diskid = diskid; - rf_AddHeap(heap, hpdat, (hpdat->eventTime)); -} - -void rf_DAGEventRequest( - RF_TICS_t eventTime, - RF_Owner_t owner, - RF_RowCol_t row, - RF_RowCol_t col, - RF_RaidAccessDesc_t *desc, - RF_Raid_t *raidPtr) -{ - RF_HeapData_t *hpdat; - - RF_Malloc(hpdat,sizeof(RF_HeapData_t),(RF_HeapData_t *) ); - if (hpdat == NULL) { - fprintf(stderr, "DDEventRequest: malloc failed\n"); - RF_PANIC(); - } - - hpdat->eventTime = eventTime; - hpdat->CompleteFunc = NULL; - hpdat->argument = NULL; - hpdat->owner = owner; - hpdat->row = row; - hpdat->col = col; - hpdat->desc=desc; - hpdat->raidPtr = raidPtr; - - rf_AddHeap(heap, hpdat, (hpdat->eventTime)); -} - - -/* ------------------------------------------------------------------------ */ -/* @SUBTITLE "Print out the request queue" */ -/* There is only 1 request queue so no argument is needed for this - function */ -void rf_DDPrintRequests() -{ - RF_HeapData_t *Hpdat; - RF_HeapKey_t Hpkey; - RF_Heap_t tempHp; - - printf("Events on heap:\n"); - - tempHp = rf_InitHeap(RF_HEAP_MAX); - while (rf_RemHeap(heap, &Hpdat, &Hpkey) != RF_HEAP_NONE) - { - printf ("at %5g HpKey there is: something for owner %d at disk %d %d\n",Hpkey, - Hpdat->owner,Hpdat->row,Hpdat->col); - rf_AddHeap(tempHp, Hpdat, Hpdat->eventTime); - } - - printf("END heap:\n"); - rf_FreeHeap(heap); /* free the empty old heap */ - - heap = tempHp; /* restore the recycled heap */ -} -/* ------------------------------------------------------------------------ */ - -int rf_ProcessEvent() -{ - RF_HeapData_t *Hpdat; - RF_HeapKey_t Hpkey; - int retcode; - - retcode = rf_RemHeap(heap, &Hpdat, &Hpkey); - - if (retcode==RF_HEAP_FOUND) { - if (rf_eventDebug) { - rf_DDPrintRequests(); - printf ("Now processing: at %5g something for owner %d at disk %d %d\n", - Hpkey, Hpdat->owner, Hpdat->row, Hpdat->col); - } - rf_cur_time=Hpkey; - - rf_SetCurrentOwner(Hpdat->owner); - - if (Hpdat->row>=0) {/* ongoing dag event */ - rf_SetDiskIdle (Hpdat->raidPtr, Hpdat->row, Hpdat->col); - if (Hpdat->diskid != NULL) { - rf_simulator_complete_io(Hpdat->diskid); - } - retcode=(Hpdat->CompleteFunc)(Hpdat->argument,0); - if (retcode==RF_HEAP_FOUND) - (((RF_DagNode_t *) (Hpdat->argument))->dagHdr->cbFunc)(((RF_DagNode_t *) (Hpdat->argument))->dagHdr->cbArg); - RF_Free(Hpdat,sizeof(RF_HeapData_t)); - return(retcode); - } - else { - /* this is a dag event or reconstruction event */ - if (Hpdat->row==RF_DD_DAGEVENT_ROW){ /* dag event */ - rf_ContinueRaidAccess(Hpdat->desc); - retcode = RF_FALSE; - RF_Free(Hpdat,sizeof(RF_HeapData_t)); - return (RF_FALSE); - } - else { - /* recon event */ - retcode=(Hpdat->CompleteFunc)(Hpdat->argument,0); - retcode = RF_FALSE; - RF_Free(Hpdat,sizeof(RF_HeapData_t)); - return (RF_FALSE); - } - } - } - if (rf_eventDebug) - printf("HEAP is empty\n"); - return(RF_DD_NOTHING_THERE); -} - -RF_Owner_t rf_GetCurrentOwner() -{ - return(cur_owner); -} - -void rf_SetCurrentOwner(RF_Owner_t owner) -{ - cur_owner=owner; -} - -RF_TICS_t rf_CurTime() -{ - return(rf_cur_time); -} diff --git a/sys/dev/raidframe/rf_diskevent.h b/sys/dev/raidframe/rf_diskevent.h deleted file mode 100644 index 103ddde7d13..00000000000 --- a/sys/dev/raidframe/rf_diskevent.h +++ /dev/null @@ -1,97 +0,0 @@ -/* $OpenBSD: rf_diskevent.h,v 1.1 1999/01/11 14:29:16 niklas Exp $ */ -/* $NetBSD: rf_diskevent.h,v 1.1 1998/11/13 04:20:28 oster Exp $ */ -/* - * rf_diskevent.h - * Adapted from original code by David Kotz (1994) - * - * The disk-device module is event driven. This module keeps the event - * request mechanism, which is based on proteus SimRequests, - * abstracted away from the bulk of the disk device code. - * - * Functions - * DDEventInit - * DDEventRequest - * DDEventPrint - * DDEventCancel - */ - -/* : - * Log: rf_diskevent.h,v - * Revision 1.10 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.9 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.8 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.7 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.6 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.5 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.4 1995/12/01 15:57:16 root - * added copyright info - * - */ - -#ifndef _RF__RF_DISKEVENT_H_ -#define _RF__RF_DISKEVENT_H_ - -#include "rf_types.h" -#include "rf_heap.h" -#if !defined(__NetBSD__) && !defined(__OpenBSD__) -#include "time.h" -#endif - -#define RF_DD_NOTHING_THERE (-1) -#define RF_DD_DAGEVENT_ROW (-3) -#define RF_DD_DAGEVENT_COL RF_DD_DAGEVENT_ROW - -extern RF_TICS_t rf_cur_time; - -/* - * list of disk-device request types, - * initialized in diskdevice.c, - * used in diskevent.c - */ -typedef void (*RF_DDhandler)(int disk, RF_TICS_t eventTime); -struct RF_dd_handlers_s { - RF_DDhandler handler; /* function implementing this event type */ - char name[20]; /* name of that event type */ -}; -extern struct RF_dd_handlers_s rf_DDhandlers[]; - -int rf_DDEventInit(RF_ShutdownList_t **listp); -void rf_DDEventRequest(RF_TICS_t eventTime, int (*CompleteFunc)(), - void *argument, RF_Owner_t owner, RF_RowCol_t row, RF_RowCol_t col, - RF_Raid_t *raidPtr, void *diskid); -void rf_DAGEventRequest(RF_TICS_t eventTime, RF_Owner_t owner, - RF_RowCol_t row, RF_RowCol_t col, RF_RaidAccessDesc_t *desc, - RF_Raid_t *raidPtr); -void rf_DDPrintRequests(void); -int rf_ProcessEvent(void); -RF_Owner_t rf_GetCurrentOwner(void); -void rf_SetCurrentOwner(RF_Owner_t owner); -RF_TICS_t rf_CurTime(void); - -#endif /* !_RF__RF_DISKEVENT_H_ */ diff --git a/sys/dev/raidframe/rf_diskqueue.c b/sys/dev/raidframe/rf_diskqueue.c index cd01f3c531f..14bccb06ca2 100644 --- a/sys/dev/raidframe/rf_diskqueue.c +++ b/sys/dev/raidframe/rf_diskqueue.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_diskqueue.c,v 1.1 1999/01/11 14:29:17 niklas Exp $ */ -/* $NetBSD: rf_diskqueue.c,v 1.2 1998/12/03 14:58:24 oster Exp $ */ +/* $OpenBSD: rf_diskqueue.c,v 1.2 1999/02/16 00:02:39 niklas Exp $ */ +/* $NetBSD: rf_diskqueue.c,v 1.6 1999/02/05 00:06:09 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -32,7 +32,7 @@ * rf_diskqueue.c -- higher-level disk queue code * * the routines here are a generic wrapper around the actual queueing - * routines. The code here implements thread scheduling, synchronization, + * routines. The code here implements thread scheduling, synchronization, * and locking ops (see below) on top of the lower-level queueing code. * * to support atomic RMW, we implement "locking operations". When a locking op @@ -55,165 +55,13 @@ * Unfortunately, the code is different in the 3 different operating states * (user level, kernel, simulator). In the kernel, I/O is non-blocking, and * we have no disk threads to dispatch for us. Therefore, we have to dispatch - * new I/Os to the scsi driver at the time of enqueue, and also at the time - * of completion. At user level, I/O is blocking, and so only the disk threads - * may dispatch I/Os. Thus at user level, all we can do at enqueue time is + * new I/Os to the scsi driver at the time of enqueue, and also at the time + * of completion. At user level, I/O is blocking, and so only the disk threads + * may dispatch I/Os. Thus at user level, all we can do at enqueue time is * enqueue and wake up the disk thread to do the dispatch. * ***************************************************************************************/ -/* - * : - * - * Log: rf_diskqueue.c,v - * Revision 1.50 1996/08/07 21:08:38 jimz - * b_proc -> kb_proc - * - * Revision 1.49 1996/07/05 20:36:14 jimz - * make rf_ConfigureDiskQueueSystem return 0 - * - * Revision 1.48 1996/06/18 20:53:11 jimz - * fix up disk queueing (remove configure routine, - * add shutdown list arg to create routines) - * - * Revision 1.47 1996/06/14 14:16:36 jimz - * fix handling of bogus queue type - * - * Revision 1.46 1996/06/13 20:41:44 jimz - * add scan, cscan, random queueing - * - * Revision 1.45 1996/06/11 01:27:50 jimz - * Fixed bug where diskthread shutdown would crash or hang. This - * turned out to be two distinct bugs: - * (1) [crash] The thread shutdown code wasn't properly waiting for - * all the diskthreads to complete. This caused diskthreads that were - * exiting+cleaning up to unlock a destroyed mutex. - * (2) [hang] TerminateDiskQueues wasn't locking, and DiskIODequeue - * only checked for termination _after_ a wakeup if the queues were - * empty. This was a race where the termination wakeup could be lost - * by the dequeueing thread, and the system would hang waiting for the - * thread to exit, while the thread waited for an I/O or a signal to - * check the termination flag. - * - * Revision 1.44 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.43 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.42 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.41 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.40 1996/06/06 17:28:04 jimz - * track sector number of last I/O dequeued - * - * Revision 1.39 1996/06/06 01:14:13 jimz - * fix crashing bug when tracerec is NULL (ie, from copyback) - * initialize req->queue - * - * Revision 1.38 1996/06/05 19:38:32 jimz - * fixed up disk queueing types config - * added sstf disk queueing - * fixed exit bug on diskthreads (ref-ing bad mem) - * - * Revision 1.37 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.36 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.35 1996/05/30 12:59:18 jimz - * make etimer happier, more portable - * - * Revision 1.34 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.33 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.32 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.31 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.30 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.29 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.28 1996/05/20 16:14:29 jimz - * switch to rf_{mutex,cond}_{init,destroy} - * - * Revision 1.27 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.26 1996/05/16 19:21:49 wvcii - * fixed typo in init_dqd - * - * Revision 1.25 1996/05/16 16:02:51 jimz - * switch to RF_FREELIST stuff for DiskQueueData - * - * Revision 1.24 1996/05/10 16:24:14 jimz - * new cvscan function names - * - * Revision 1.23 1996/05/01 16:27:54 jimz - * don't use ccmn bp management - * - * Revision 1.22 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.21 1995/12/01 15:59:59 root - * added copyright info - * - * Revision 1.20 1995/11/07 16:27:20 wvcii - * added Peek() function to diskqueuesw - * non-locking accesses are never blocked (assume clients enforce proper - * respect for lock acquisition) - * - * Revision 1.19 1995/10/05 18:56:52 jimz - * fix req handling in IOComplete - * - * Revision 1.18 1995/10/04 20:13:50 wvcii - * added asserts to monitor numOutstanding queueLength - * - * Revision 1.17 1995/10/04 07:43:52 wvcii - * queue->numOutstanding now valid for user & sim - * added queue->queueLength - * user tested & verified, sim untested - * - * Revision 1.16 1995/09/12 00:21:19 wvcii - * added support for tracing disk queue time - * - */ - #include "rf_types.h" #include "rf_threadstuff.h" #include "rf_threadid.h" @@ -231,19 +79,11 @@ #include "rf_sstf.h" #include "rf_fifo.h" -#ifdef SIMULATE -#include "rf_diskevent.h" -#endif /* SIMULATE */ - -#if !defined(__NetBSD__) && !defined(__OpenBSD__) -extern struct buf *ubc_bufget(); -#endif - static int init_dqd(RF_DiskQueueData_t *); static void clean_dqd(RF_DiskQueueData_t *); static void rf_ShutdownDiskQueueSystem(void *); /* From rf_kintf.c */ -int rf_DispatchKernelIO(RF_DiskQueue_t *,RF_DiskQueueData_t *); +int rf_DispatchKernelIO(RF_DiskQueue_t *, RF_DiskQueueData_t *); #define Dprintf1(s,a) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL) @@ -252,31 +92,10 @@ int rf_DispatchKernelIO(RF_DiskQueue_t *,RF_DiskQueueData_t *); #define Dprintf4(s,a,b,c,d) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),NULL,NULL,NULL,NULL) #define Dprintf5(s,a,b,c,d,e) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),NULL,NULL,NULL) -#if !defined(KERNEL) && !defined(SIMULATE) - -/* queue must be locked before invoking this */ -#define SIGNAL_DISK_QUEUE(_q_,_wh_) \ -{ \ - if ( (_q_)->numWaiting > 0) { \ - (_q_)->numWaiting--; \ - RF_SIGNAL_COND( ((_q_)->cond) ); \ - } \ -} - -/* queue must be locked before invoking this */ -#define WAIT_DISK_QUEUE(_q_,_wh_) \ -{ \ - (_q_)->numWaiting++; \ - RF_WAIT_COND( ((_q_)->cond), ((_q_)->mutex) ); \ -} - -#else /* !defined(KERNEL) && !defined(SIMULATE) */ #define SIGNAL_DISK_QUEUE(_q_,_wh_) #define WAIT_DISK_QUEUE(_q_,_wh_) -#endif /* !defined(KERNEL) && !defined(SIMULATE) */ - /***************************************************************************************** * * the disk queue switch defines all the functions used in the different queueing @@ -286,50 +105,50 @@ int rf_DispatchKernelIO(RF_DiskQueue_t *,RF_DiskQueueData_t *); ****************************************************************************************/ static RF_DiskQueueSW_t diskqueuesw[] = { - {"fifo", /* FIFO */ - rf_FifoCreate, - rf_FifoEnqueue, - rf_FifoDequeue, - rf_FifoPeek, + {"fifo", /* FIFO */ + rf_FifoCreate, + rf_FifoEnqueue, + rf_FifoDequeue, + rf_FifoPeek, rf_FifoPromote}, - {"cvscan", /* cvscan */ - rf_CvscanCreate, - rf_CvscanEnqueue, - rf_CvscanDequeue, - rf_CvscanPeek, - rf_CvscanPromote }, - - {"sstf", /* shortest seek time first */ - rf_SstfCreate, - rf_SstfEnqueue, - rf_SstfDequeue, - rf_SstfPeek, + {"cvscan", /* cvscan */ + rf_CvscanCreate, + rf_CvscanEnqueue, + rf_CvscanDequeue, + rf_CvscanPeek, + rf_CvscanPromote}, + + {"sstf", /* shortest seek time first */ + rf_SstfCreate, + rf_SstfEnqueue, + rf_SstfDequeue, + rf_SstfPeek, rf_SstfPromote}, - {"scan", /* SCAN (two-way elevator) */ - rf_ScanCreate, - rf_SstfEnqueue, - rf_ScanDequeue, - rf_ScanPeek, + {"scan", /* SCAN (two-way elevator) */ + rf_ScanCreate, + rf_SstfEnqueue, + rf_ScanDequeue, + rf_ScanPeek, rf_SstfPromote}, - {"cscan", /* CSCAN (one-way elevator) */ - rf_CscanCreate, - rf_SstfEnqueue, - rf_CscanDequeue, - rf_CscanPeek, + {"cscan", /* CSCAN (one-way elevator) */ + rf_CscanCreate, + rf_SstfEnqueue, + rf_CscanDequeue, + rf_CscanPeek, rf_SstfPromote}, -#if !defined(KERNEL) && RF_INCLUDE_QUEUE_RANDOM > 0 +#if !defined(_KERNEL) && RF_INCLUDE_QUEUE_RANDOM > 0 /* to make a point to Chris :-> */ - {"random", /* random */ - rf_FifoCreate, - rf_FifoEnqueue, - rf_RandomDequeue, - rf_RandomPeek, + {"random", /* random */ + rf_FifoCreate, + rf_FifoEnqueue, + rf_RandomDequeue, + rf_RandomPeek, rf_FifoPromote}, -#endif /* !KERNEL && RF_INCLUDE_QUEUE_RANDOM > 0 */ +#endif /* !KERNEL && RF_INCLUDE_QUEUE_RANDOM > 0 */ }; #define NUM_DISK_QUEUE_TYPES (sizeof(diskqueuesw)/sizeof(RF_DiskQueueSW_t)) @@ -339,196 +158,159 @@ static RF_FreeList_t *rf_dqd_freelist; #define RF_DQD_INC 16 #define RF_DQD_INITIAL 64 -#if defined(__NetBSD__) || defined(__OpenBSD__) -#ifdef _KERNEL #include <sys/buf.h> -#endif -#endif -static int init_dqd(dqd) - RF_DiskQueueData_t *dqd; +static int +init_dqd(dqd) + RF_DiskQueueData_t *dqd; { -#ifdef KERNEL -#if defined(__NetBSD__) || defined(__OpenBSD__) - /* XXX not sure if the following malloc is appropriate... probably not quite... */ - dqd->bp = (struct buf *) malloc( sizeof(struct buf), M_DEVBUF, M_NOWAIT); - memset(dqd->bp,0,sizeof(struct buf)); /* if you don't do it, nobody else will.. */ - /* XXX */ - /* printf("NEED TO IMPLEMENT THIS BETTER!\n"); */ -#else - dqd->bp = ubc_bufget(); -#endif + /* XXX not sure if the following malloc is appropriate... probably not + * quite... */ + dqd->bp = (struct buf *) malloc(sizeof(struct buf), M_RAIDFRAME, M_NOWAIT); if (dqd->bp == NULL) { - return(ENOMEM); + return (ENOMEM); } -#endif /* KERNEL */ - return(0); + memset(dqd->bp, 0, sizeof(struct buf)); /* if you don't do it, nobody + * else will.. */ + return (0); } -static void clean_dqd(dqd) - RF_DiskQueueData_t *dqd; +static void +clean_dqd(dqd) + RF_DiskQueueData_t *dqd; { -#ifdef KERNEL -#if defined(__NetBSD__) || defined(__OpenBSD__) - /* printf("NEED TO IMPLEMENT THIS BETTER(2)!\n"); */ - /* XXX ? */ - free( dqd->bp, M_DEVBUF ); -#else - ubc_buffree(dqd->bp); -#endif - -#endif /* KERNEL */ + free(dqd->bp, M_RAIDFRAME); } - /* configures a single disk queue */ -static int config_disk_queue( - RF_Raid_t *raidPtr, - RF_DiskQueue_t *diskqueue, - RF_RowCol_t r, /* row & col -- debug only. BZZT not any more... */ - RF_RowCol_t c, - RF_DiskQueueSW_t *p, - RF_SectorCount_t sectPerDisk, - dev_t dev, - int maxOutstanding, - RF_ShutdownList_t **listp, - RF_AllocListElem_t *clList) +static int +config_disk_queue( + RF_Raid_t * raidPtr, + RF_DiskQueue_t * diskqueue, + RF_RowCol_t r, /* row & col -- debug only. BZZT not any + * more... */ + RF_RowCol_t c, + RF_DiskQueueSW_t * p, + RF_SectorCount_t sectPerDisk, + dev_t dev, + int maxOutstanding, + RF_ShutdownList_t ** listp, + RF_AllocListElem_t * clList) { - int rc; - - diskqueue->row = r; - diskqueue->col = c; - diskqueue->qPtr = p; - diskqueue->qHdr = (p->Create)(sectPerDisk, clList, listp); - diskqueue->dev = dev; - diskqueue->numOutstanding = 0; - diskqueue->queueLength = 0; - diskqueue->maxOutstanding = maxOutstanding; - diskqueue->curPriority = RF_IO_NORMAL_PRIORITY; - diskqueue->nextLockingOp = NULL; - diskqueue->unlockingOp = NULL; - diskqueue->numWaiting=0; - diskqueue->flags = 0; - diskqueue->raidPtr = raidPtr; -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) - diskqueue->rf_cinfo = &raidPtr->raid_cinfo[r][c]; -#endif - rc = rf_create_managed_mutex(listp, &diskqueue->mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return(rc); - } - rc = rf_create_managed_cond(listp, &diskqueue->cond); - if (rc) { - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return(rc); - } - return(0); + int rc; + + diskqueue->row = r; + diskqueue->col = c; + diskqueue->qPtr = p; + diskqueue->qHdr = (p->Create) (sectPerDisk, clList, listp); + diskqueue->dev = dev; + diskqueue->numOutstanding = 0; + diskqueue->queueLength = 0; + diskqueue->maxOutstanding = maxOutstanding; + diskqueue->curPriority = RF_IO_NORMAL_PRIORITY; + diskqueue->nextLockingOp = NULL; + diskqueue->unlockingOp = NULL; + diskqueue->numWaiting = 0; + diskqueue->flags = 0; + diskqueue->raidPtr = raidPtr; + diskqueue->rf_cinfo = &raidPtr->raid_cinfo[r][c]; + rc = rf_create_managed_mutex(listp, &diskqueue->mutex); + if (rc) { + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + return (rc); + } + rc = rf_create_managed_cond(listp, &diskqueue->cond); + if (rc) { + RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + return (rc); + } + return (0); } -static void rf_ShutdownDiskQueueSystem(ignored) - void *ignored; +static void +rf_ShutdownDiskQueueSystem(ignored) + void *ignored; { - RF_FREELIST_DESTROY_CLEAN(rf_dqd_freelist,next,(RF_DiskQueueData_t *),clean_dqd); + RF_FREELIST_DESTROY_CLEAN(rf_dqd_freelist, next, (RF_DiskQueueData_t *), clean_dqd); } -int rf_ConfigureDiskQueueSystem(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureDiskQueueSystem(listp) + RF_ShutdownList_t **listp; { - int rc; - - RF_FREELIST_CREATE(rf_dqd_freelist, RF_MAX_FREE_DQD, - RF_DQD_INC, sizeof(RF_DiskQueueData_t)); - if (rf_dqd_freelist == NULL) - return(ENOMEM); - rc = rf_ShutdownCreate(listp, rf_ShutdownDiskQueueSystem, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - rf_ShutdownDiskQueueSystem(NULL); - return(rc); - } - RF_FREELIST_PRIME_INIT(rf_dqd_freelist, RF_DQD_INITIAL,next, - (RF_DiskQueueData_t *),init_dqd); - return(0); + int rc; + + RF_FREELIST_CREATE(rf_dqd_freelist, RF_MAX_FREE_DQD, + RF_DQD_INC, sizeof(RF_DiskQueueData_t)); + if (rf_dqd_freelist == NULL) + return (ENOMEM); + rc = rf_ShutdownCreate(listp, rf_ShutdownDiskQueueSystem, NULL); + if (rc) { + RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", + __FILE__, __LINE__, rc); + rf_ShutdownDiskQueueSystem(NULL); + return (rc); + } + RF_FREELIST_PRIME_INIT(rf_dqd_freelist, RF_DQD_INITIAL, next, + (RF_DiskQueueData_t *), init_dqd); + return (0); } -#ifndef KERNEL -/* this is called prior to shutdown to wakeup everyone waiting on a disk queue - * and tell them to exit - */ -void rf_TerminateDiskQueues(raidPtr) - RF_Raid_t *raidPtr; +int +rf_ConfigureDiskQueues( + RF_ShutdownList_t ** listp, + RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr) { - RF_RowCol_t r, c; - - raidPtr->terminate_disk_queues = 1; - for (r=0; r<raidPtr->numRow; r++) { - for (c=0; c<raidPtr->numCol + ((r==0) ? raidPtr->numSpare : 0); c++) { - RF_LOCK_QUEUE_MUTEX(&raidPtr->Queues[r][c], "TerminateDiskQueues"); - RF_BROADCAST_COND(raidPtr->Queues[r][c].cond); - RF_UNLOCK_QUEUE_MUTEX(&raidPtr->Queues[r][c], "TerminateDiskQueues"); - } - } -} -#endif /* !KERNEL */ + RF_DiskQueue_t **diskQueues, *spareQueues; + RF_DiskQueueSW_t *p; + RF_RowCol_t r, c; + int rc, i; + + raidPtr->maxQueueDepth = cfgPtr->maxOutstandingDiskReqs; + + for (p = NULL, i = 0; i < NUM_DISK_QUEUE_TYPES; i++) { + if (!strcmp(diskqueuesw[i].queueType, cfgPtr->diskQueueType)) { + p = &diskqueuesw[i]; + break; + } + } + if (p == NULL) { + RF_ERRORMSG2("Unknown queue type \"%s\". Using %s\n", cfgPtr->diskQueueType, diskqueuesw[0].queueType); + p = &diskqueuesw[0]; + } + RF_CallocAndAdd(diskQueues, raidPtr->numRow, sizeof(RF_DiskQueue_t *), (RF_DiskQueue_t **), raidPtr->cleanupList); + if (diskQueues == NULL) { + return (ENOMEM); + } + raidPtr->Queues = diskQueues; + for (r = 0; r < raidPtr->numRow; r++) { + RF_CallocAndAdd(diskQueues[r], raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0), sizeof(RF_DiskQueue_t), (RF_DiskQueue_t *), raidPtr->cleanupList); + if (diskQueues[r] == NULL) + return (ENOMEM); + for (c = 0; c < raidPtr->numCol; c++) { + rc = config_disk_queue(raidPtr, &diskQueues[r][c], r, c, p, + raidPtr->sectorsPerDisk, raidPtr->Disks[r][c].dev, + cfgPtr->maxOutstandingDiskReqs, listp, raidPtr->cleanupList); + if (rc) + return (rc); + } + } -int rf_ConfigureDiskQueues( - RF_ShutdownList_t **listp, - RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr) -{ - RF_DiskQueue_t **diskQueues, *spareQueues; - RF_DiskQueueSW_t *p; - RF_RowCol_t r, c; - int rc, i; - - raidPtr->maxQueueDepth = cfgPtr->maxOutstandingDiskReqs; - - for(p=NULL,i=0;i<NUM_DISK_QUEUE_TYPES;i++) { - if (!strcmp(diskqueuesw[i].queueType, cfgPtr->diskQueueType)) { - p = &diskqueuesw[i]; - break; - } - } - if (p == NULL) { - RF_ERRORMSG2("Unknown queue type \"%s\". Using %s\n",cfgPtr->diskQueueType, diskqueuesw[0].queueType); - p = &diskqueuesw[0]; - } - - RF_CallocAndAdd(diskQueues, raidPtr->numRow, sizeof(RF_DiskQueue_t *), (RF_DiskQueue_t **), raidPtr->cleanupList); - if (diskQueues == NULL) { - return(ENOMEM); - } - raidPtr->Queues = diskQueues; - for (r=0; r<raidPtr->numRow; r++) { - RF_CallocAndAdd(diskQueues[r], raidPtr->numCol + ((r==0) ? raidPtr->numSpare : 0), sizeof(RF_DiskQueue_t), (RF_DiskQueue_t *), raidPtr->cleanupList); - if (diskQueues[r] == NULL) - return(ENOMEM); - for (c=0; c<raidPtr->numCol; c++) { - rc = config_disk_queue(raidPtr, &diskQueues[r][c], r, c, p, - raidPtr->sectorsPerDisk, raidPtr->Disks[r][c].dev, - cfgPtr->maxOutstandingDiskReqs, listp, raidPtr->cleanupList); - if (rc) - return(rc); - } - } - - spareQueues = &raidPtr->Queues[0][raidPtr->numCol]; - for (r=0; r<raidPtr->numSpare; r++) { - rc = config_disk_queue(raidPtr, &spareQueues[r], - 0, raidPtr->numCol+r, p, - raidPtr->sectorsPerDisk, - raidPtr->Disks[0][raidPtr->numCol+r].dev, - cfgPtr->maxOutstandingDiskReqs, listp, - raidPtr->cleanupList); - if (rc) - return(rc); - } - return(0); + spareQueues = &raidPtr->Queues[0][raidPtr->numCol]; + for (r = 0; r < raidPtr->numSpare; r++) { + rc = config_disk_queue(raidPtr, &spareQueues[r], + 0, raidPtr->numCol + r, p, + raidPtr->sectorsPerDisk, + raidPtr->Disks[0][raidPtr->numCol + r].dev, + cfgPtr->maxOutstandingDiskReqs, listp, + raidPtr->cleanupList); + if (rc) + return (rc); + } + return (0); } - /* Enqueue a disk I/O * * Unfortunately, we have to do things differently in the different @@ -558,372 +340,258 @@ int rf_ConfigureDiskQueues( * simulator rules: * Do the same as at user level, with the sleeps and wakeups suppressed. */ -void rf_DiskIOEnqueue(queue, req, pri) - RF_DiskQueue_t *queue; - RF_DiskQueueData_t *req; - int pri; +void +rf_DiskIOEnqueue(queue, req, pri) + RF_DiskQueue_t *queue; + RF_DiskQueueData_t *req; + int pri; { - int tid; - - RF_ETIMER_START(req->qtime); - rf_get_threadid(tid); - RF_ASSERT(req->type == RF_IO_TYPE_NOP || req->numSector); - req->priority = pri; - - if (rf_queueDebug && (req->numSector == 0)) { - printf("Warning: Enqueueing zero-sector access\n"); - } - -#ifdef KERNEL - /* - * kernel - */ - RF_LOCK_QUEUE_MUTEX( queue, "DiskIOEnqueue" ); - /* locking request */ - if (RF_LOCKING_REQ(req)) { - if (RF_QUEUE_EMPTY(queue)) { - Dprintf3("Dispatching pri %d locking op to r %d c %d (queue empty)\n",pri,queue->row, queue->col); - RF_LOCK_QUEUE(queue); - rf_DispatchKernelIO(queue, req); - } else { - queue->queueLength++; /* increment count of number of requests waiting in this queue */ - Dprintf3("Enqueueing pri %d locking op to r %d c %d (queue not empty)\n",pri,queue->row, queue->col); - req->queue = (void *)queue; - (queue->qPtr->Enqueue)(queue->qHdr, req, pri); - } - } - /* unlocking request */ - else if (RF_UNLOCKING_REQ(req)) { /* we'll do the actual unlock when this I/O completes */ - Dprintf3("Dispatching pri %d unlocking op to r %d c %d\n",pri,queue->row, queue->col); - RF_ASSERT(RF_QUEUE_LOCKED(queue)); - rf_DispatchKernelIO(queue, req); - } - /* normal request */ - else if (RF_OK_TO_DISPATCH(queue, req)) { - Dprintf3("Dispatching pri %d regular op to r %d c %d (ok to dispatch)\n",pri,queue->row, queue->col); - rf_DispatchKernelIO(queue, req); - } else { - queue->queueLength++; /* increment count of number of requests waiting in this queue */ - Dprintf3("Enqueueing pri %d regular op to r %d c %d (not ok to dispatch)\n",pri,queue->row, queue->col); - req->queue = (void *)queue; - (queue->qPtr->Enqueue)(queue->qHdr, req, pri); - } - RF_UNLOCK_QUEUE_MUTEX( queue, "DiskIOEnqueue" ); - -#else /* KERNEL */ - /* - * user-level - */ - RF_LOCK_QUEUE_MUTEX( queue, "DiskIOEnqueue" ); - queue->queueLength++; /* increment count of number of requests waiting in this queue */ - /* unlocking request */ - if (RF_UNLOCKING_REQ(req)) { - Dprintf4("[%d] enqueueing pri %d unlocking op & signalling r %d c %d\n", tid, pri, queue->row, queue->col); - RF_ASSERT(RF_QUEUE_LOCKED(queue) && queue->unlockingOp == NULL); - queue->unlockingOp = req; - } - /* locking and normal requests */ - else { - req->queue = (void *)queue; - Dprintf5("[%d] enqueueing pri %d %s op & signalling r %d c %d\n", tid, pri, - (RF_LOCKING_REQ(req)) ? "locking" : "regular",queue->row,queue->col); - (queue->qPtr->Enqueue)(queue->qHdr, req, pri); - } - SIGNAL_DISK_QUEUE( queue, "DiskIOEnqueue"); - RF_UNLOCK_QUEUE_MUTEX( queue, "DiskIOEnqueue" ); -#endif /* KERNEL */ -} - -#if !defined(KERNEL) && !defined(SIMULATE) -/* user-level only: tell all threads to wake up & recheck the queue */ -void rf_BroadcastOnQueue(queue) - RF_DiskQueue_t *queue; -{ - int i; - - if (queue->maxOutstanding > 1) for (i=0; i<queue->maxOutstanding; i++) { - SIGNAL_DISK_QUEUE(queue, "BroadcastOnQueue" ); - } -} -#endif /* !KERNEL && !SIMULATE */ + int tid; -#ifndef KERNEL /* not used in kernel */ + RF_ETIMER_START(req->qtime); + rf_get_threadid(tid); + RF_ASSERT(req->type == RF_IO_TYPE_NOP || req->numSector); + req->priority = pri; -RF_DiskQueueData_t *rf_DiskIODequeue(queue) - RF_DiskQueue_t *queue; -{ - RF_DiskQueueData_t *p, *headItem; - int tid; - - rf_get_threadid(tid); - RF_LOCK_QUEUE_MUTEX( queue, "DiskIODequeue" ); - for (p=NULL; !p; ) { - if (queue->unlockingOp) { - /* unlocking request */ - RF_ASSERT(RF_QUEUE_LOCKED(queue)); - p = queue->unlockingOp; - queue->unlockingOp = NULL; - Dprintf4("[%d] dequeueing pri %d unlocking op r %d c %d\n", tid, p->priority, queue->row,queue->col); - } - else { - headItem = (queue->qPtr->Peek)(queue->qHdr); - if (headItem) { - if (RF_LOCKING_REQ(headItem)) { - /* locking request */ - if (!RF_QUEUE_LOCKED(queue)) { - /* queue isn't locked, so dequeue the request & lock the queue */ - p = (queue->qPtr->Dequeue)( queue->qHdr ); - if (p) - Dprintf4("[%d] dequeueing pri %d locking op r %d c %d\n", tid, p->priority, queue->row, queue->col); - else - Dprintf3("[%d] no dequeue -- raw queue empty r %d c %d\n", tid, queue->row, queue->col); - } - else { - /* queue already locked, no dequeue occurs */ - Dprintf3("[%d] no dequeue -- queue is locked r %d c %d\n", tid, queue->row, queue->col); - p = NULL; - } - } - else { - /* normal request, always dequeue and assume caller already has lock (if needed) */ - p = (queue->qPtr->Dequeue)( queue->qHdr ); - if (p) - Dprintf4("[%d] dequeueing pri %d regular op r %d c %d\n", tid, p->priority, queue->row, queue->col); - else - Dprintf3("[%d] no dequeue -- raw queue empty r %d c %d\n", tid, queue->row, queue->col); - } - } - else { - Dprintf3("[%d] no dequeue -- raw queue empty r %d c %d\n", tid, queue->row, queue->col); - } - } - - if (queue->raidPtr->terminate_disk_queues) { - p = NULL; - break; - } -#ifdef SIMULATE - break; /* in simulator, return NULL on empty queue instead of blocking */ -#else /* SIMULATE */ - if (!p) { - Dprintf3("[%d] nothing to dequeue: waiting r %d c %d\n", tid, queue->row, queue->col); - WAIT_DISK_QUEUE( queue, "DiskIODequeue" ); - } -#endif /* SIMULATE */ - } - - if (p) { - queue->queueLength--; /* decrement count of number of requests waiting in this queue */ - RF_ASSERT(queue->queueLength >= 0); - queue->numOutstanding++; - queue->last_deq_sector = p->sectorOffset; - /* record the amount of time this request spent in the disk queue */ - RF_ETIMER_STOP(p->qtime); - RF_ETIMER_EVAL(p->qtime); - if (p->tracerec) - p->tracerec->diskqueue_us += RF_ETIMER_VAL_US(p->qtime); - } - - if (p && RF_LOCKING_REQ(p)) { - RF_ASSERT(!RF_QUEUE_LOCKED(queue)); - Dprintf3("[%d] locking queue r %d c %d\n",tid,queue->row,queue->col); - RF_LOCK_QUEUE(queue); - } - RF_UNLOCK_QUEUE_MUTEX( queue, "DiskIODequeue" ); - - return(p); + if (rf_queueDebug && (req->numSector == 0)) { + printf("Warning: Enqueueing zero-sector access\n"); + } + /* + * kernel + */ + RF_LOCK_QUEUE_MUTEX(queue, "DiskIOEnqueue"); + /* locking request */ + if (RF_LOCKING_REQ(req)) { + if (RF_QUEUE_EMPTY(queue)) { + Dprintf3("Dispatching pri %d locking op to r %d c %d (queue empty)\n", pri, queue->row, queue->col); + RF_LOCK_QUEUE(queue); + rf_DispatchKernelIO(queue, req); + } else { + queue->queueLength++; /* increment count of number + * of requests waiting in this + * queue */ + Dprintf3("Enqueueing pri %d locking op to r %d c %d (queue not empty)\n", pri, queue->row, queue->col); + req->queue = (void *) queue; + (queue->qPtr->Enqueue) (queue->qHdr, req, pri); + } + } + /* unlocking request */ + else + if (RF_UNLOCKING_REQ(req)) { /* we'll do the actual unlock + * when this I/O completes */ + Dprintf3("Dispatching pri %d unlocking op to r %d c %d\n", pri, queue->row, queue->col); + RF_ASSERT(RF_QUEUE_LOCKED(queue)); + rf_DispatchKernelIO(queue, req); + } + /* normal request */ + else + if (RF_OK_TO_DISPATCH(queue, req)) { + Dprintf3("Dispatching pri %d regular op to r %d c %d (ok to dispatch)\n", pri, queue->row, queue->col); + rf_DispatchKernelIO(queue, req); + } else { + queue->queueLength++; /* increment count of + * number of requests + * waiting in this queue */ + Dprintf3("Enqueueing pri %d regular op to r %d c %d (not ok to dispatch)\n", pri, queue->row, queue->col); + req->queue = (void *) queue; + (queue->qPtr->Enqueue) (queue->qHdr, req, pri); + } + RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOEnqueue"); } -#else /* !KERNEL */ /* get the next set of I/Os started, kernel version only */ -void rf_DiskIOComplete(queue, req, status) - RF_DiskQueue_t *queue; - RF_DiskQueueData_t *req; - int status; +void +rf_DiskIOComplete(queue, req, status) + RF_DiskQueue_t *queue; + RF_DiskQueueData_t *req; + int status; { - int done=0; - - RF_LOCK_QUEUE_MUTEX( queue, "DiskIOComplete" ); - - /* unlock the queue: - (1) after an unlocking req completes - (2) after a locking req fails - */ - if (RF_UNLOCKING_REQ(req) || (RF_LOCKING_REQ(req) && status)) { - Dprintf2("DiskIOComplete: unlocking queue at r %d c %d\n", queue->row, queue->col); - RF_ASSERT(RF_QUEUE_LOCKED(queue) && (queue->unlockingOp == NULL)); - RF_UNLOCK_QUEUE(queue); - } - - queue->numOutstanding--; - RF_ASSERT(queue->numOutstanding >= 0); - - /* dispatch requests to the disk until we find one that we can't. */ - /* no reason to continue once we've filled up the queue */ - /* no reason to even start if the queue is locked */ - - while (!done && !RF_QUEUE_FULL(queue) && !RF_QUEUE_LOCKED(queue)) { - if (queue->nextLockingOp) { - req = queue->nextLockingOp; queue->nextLockingOp = NULL; - Dprintf3("DiskIOComplete: a pri %d locking req was pending at r %d c %d\n",req->priority,queue->row, queue->col); - } else { - req = (queue->qPtr->Dequeue)( queue->qHdr ); - if (req != NULL) { - Dprintf3("DiskIOComplete: extracting pri %d req from queue at r %d c %d\n",req->priority,queue->row, queue->col); - } else { - Dprintf1("DiskIOComplete: no more requests to extract.\n",""); - } - } - if (req) { - queue->queueLength--; /* decrement count of number of requests waiting in this queue */ - RF_ASSERT(queue->queueLength >= 0); - } - if (!req) done=1; - else if (RF_LOCKING_REQ(req)) { - if (RF_QUEUE_EMPTY(queue)) { /* dispatch it */ - Dprintf3("DiskIOComplete: dispatching pri %d locking req to r %d c %d (queue empty)\n",req->priority,queue->row, queue->col); - RF_LOCK_QUEUE(queue); - rf_DispatchKernelIO(queue, req); - done = 1; - } else { /* put it aside to wait for the queue to drain */ - Dprintf3("DiskIOComplete: postponing pri %d locking req to r %d c %d\n",req->priority,queue->row, queue->col); - RF_ASSERT(queue->nextLockingOp == NULL); - queue->nextLockingOp = req; - done = 1; - } - } else if (RF_UNLOCKING_REQ(req)) { /* should not happen: unlocking ops should not get queued */ - RF_ASSERT(RF_QUEUE_LOCKED(queue)); /* support it anyway for the future */ - Dprintf3("DiskIOComplete: dispatching pri %d unl req to r %d c %d (SHOULD NOT SEE THIS)\n",req->priority,queue->row, queue->col); - rf_DispatchKernelIO(queue, req); - done = 1; - } else if (RF_OK_TO_DISPATCH(queue, req)) { - Dprintf3("DiskIOComplete: dispatching pri %d regular req to r %d c %d (ok to dispatch)\n",req->priority,queue->row, queue->col); - rf_DispatchKernelIO(queue, req); - } else { /* we can't dispatch it, so just re-enqueue it. */ - /* potential trouble here if disk queues batch reqs */ - Dprintf3("DiskIOComplete: re-enqueueing pri %d regular req to r %d c %d\n",req->priority,queue->row, queue->col); - queue->queueLength++; - (queue->qPtr->Enqueue)(queue->qHdr, req, req->priority); - done = 1; - } - } - - RF_UNLOCK_QUEUE_MUTEX( queue, "DiskIOComplete" ); -} -#endif /* !KERNEL */ + int done = 0; + + RF_LOCK_QUEUE_MUTEX(queue, "DiskIOComplete"); + /* unlock the queue: (1) after an unlocking req completes (2) after a + * locking req fails */ + if (RF_UNLOCKING_REQ(req) || (RF_LOCKING_REQ(req) && status)) { + Dprintf2("DiskIOComplete: unlocking queue at r %d c %d\n", queue->row, queue->col); + RF_ASSERT(RF_QUEUE_LOCKED(queue) && (queue->unlockingOp == NULL)); + RF_UNLOCK_QUEUE(queue); + } + queue->numOutstanding--; + RF_ASSERT(queue->numOutstanding >= 0); + + /* dispatch requests to the disk until we find one that we can't. */ + /* no reason to continue once we've filled up the queue */ + /* no reason to even start if the queue is locked */ + + while (!done && !RF_QUEUE_FULL(queue) && !RF_QUEUE_LOCKED(queue)) { + if (queue->nextLockingOp) { + req = queue->nextLockingOp; + queue->nextLockingOp = NULL; + Dprintf3("DiskIOComplete: a pri %d locking req was pending at r %d c %d\n", req->priority, queue->row, queue->col); + } else { + req = (queue->qPtr->Dequeue) (queue->qHdr); + if (req != NULL) { + Dprintf3("DiskIOComplete: extracting pri %d req from queue at r %d c %d\n", req->priority, queue->row, queue->col); + } else { + Dprintf1("DiskIOComplete: no more requests to extract.\n", ""); + } + } + if (req) { + queue->queueLength--; /* decrement count of number + * of requests waiting in this + * queue */ + RF_ASSERT(queue->queueLength >= 0); + } + if (!req) + done = 1; + else + if (RF_LOCKING_REQ(req)) { + if (RF_QUEUE_EMPTY(queue)) { /* dispatch it */ + Dprintf3("DiskIOComplete: dispatching pri %d locking req to r %d c %d (queue empty)\n", req->priority, queue->row, queue->col); + RF_LOCK_QUEUE(queue); + rf_DispatchKernelIO(queue, req); + done = 1; + } else { /* put it aside to wait for + * the queue to drain */ + Dprintf3("DiskIOComplete: postponing pri %d locking req to r %d c %d\n", req->priority, queue->row, queue->col); + RF_ASSERT(queue->nextLockingOp == NULL); + queue->nextLockingOp = req; + done = 1; + } + } else + if (RF_UNLOCKING_REQ(req)) { /* should not happen: + * unlocking ops should + * not get queued */ + RF_ASSERT(RF_QUEUE_LOCKED(queue)); /* support it anyway for + * the future */ + Dprintf3("DiskIOComplete: dispatching pri %d unl req to r %d c %d (SHOULD NOT SEE THIS)\n", req->priority, queue->row, queue->col); + rf_DispatchKernelIO(queue, req); + done = 1; + } else + if (RF_OK_TO_DISPATCH(queue, req)) { + Dprintf3("DiskIOComplete: dispatching pri %d regular req to r %d c %d (ok to dispatch)\n", req->priority, queue->row, queue->col); + rf_DispatchKernelIO(queue, req); + } else { /* we can't dispatch it, + * so just re-enqueue + * it. */ + /* potential trouble here if + * disk queues batch reqs */ + Dprintf3("DiskIOComplete: re-enqueueing pri %d regular req to r %d c %d\n", req->priority, queue->row, queue->col); + queue->queueLength++; + (queue->qPtr->Enqueue) (queue->qHdr, req, req->priority); + done = 1; + } + } + + RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOComplete"); +} /* promotes accesses tagged with the given parityStripeID from low priority * to normal priority. This promotion is optional, meaning that a queue * need not implement it. If there is no promotion routine associated with * a queue, this routine does nothing and returns -1. */ -int rf_DiskIOPromote(queue, parityStripeID, which_ru) - RF_DiskQueue_t *queue; - RF_StripeNum_t parityStripeID; - RF_ReconUnitNum_t which_ru; +int +rf_DiskIOPromote(queue, parityStripeID, which_ru) + RF_DiskQueue_t *queue; + RF_StripeNum_t parityStripeID; + RF_ReconUnitNum_t which_ru; { - int retval; - - if (!queue->qPtr->Promote) - return(-1); - RF_LOCK_QUEUE_MUTEX( queue, "DiskIOPromote" ); - retval = (queue->qPtr->Promote)( queue->qHdr, parityStripeID, which_ru ); - RF_UNLOCK_QUEUE_MUTEX( queue, "DiskIOPromote" ); - return(retval); + int retval; + + if (!queue->qPtr->Promote) + return (-1); + RF_LOCK_QUEUE_MUTEX(queue, "DiskIOPromote"); + retval = (queue->qPtr->Promote) (queue->qHdr, parityStripeID, which_ru); + RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOPromote"); + return (retval); } -RF_DiskQueueData_t *rf_CreateDiskQueueData( - RF_IoType_t typ, - RF_SectorNum_t ssect, - RF_SectorCount_t nsect, - caddr_t buf, - RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru, - int (*wakeF)(void *,int), - void *arg, - RF_DiskQueueData_t *next, - RF_AccTraceEntry_t *tracerec, - void *raidPtr, - RF_DiskQueueDataFlags_t flags, - void *kb_proc) +RF_DiskQueueData_t * +rf_CreateDiskQueueData( + RF_IoType_t typ, + RF_SectorNum_t ssect, + RF_SectorCount_t nsect, + caddr_t buf, + RF_StripeNum_t parityStripeID, + RF_ReconUnitNum_t which_ru, + int (*wakeF) (void *, int), + void *arg, + RF_DiskQueueData_t * next, + RF_AccTraceEntry_t * tracerec, + void *raidPtr, + RF_DiskQueueDataFlags_t flags, + void *kb_proc) { - RF_DiskQueueData_t *p; - - RF_FREELIST_GET_INIT(rf_dqd_freelist,p,next,(RF_DiskQueueData_t *),init_dqd); - - p->sectorOffset = ssect + rf_protectedSectors; - p->numSector = nsect; - p->type = typ; - p->buf = buf; - p->parityStripeID= parityStripeID; - p->which_ru = which_ru; - p->CompleteFunc = wakeF; - p->argument = arg; - p->next = next; - p->tracerec = tracerec; - p->priority = RF_IO_NORMAL_PRIORITY; - p->AuxFunc = NULL; - p->buf2 = NULL; -#ifdef SIMULATE - p->owner = rf_GetCurrentOwner(); -#endif /* SIMULATE */ - p->raidPtr = raidPtr; - p->flags = flags; -#ifdef KERNEL - p->b_proc = kb_proc; -#endif /* KERNEL */ - return(p); + RF_DiskQueueData_t *p; + + RF_FREELIST_GET_INIT(rf_dqd_freelist, p, next, (RF_DiskQueueData_t *), init_dqd); + + p->sectorOffset = ssect + rf_protectedSectors; + p->numSector = nsect; + p->type = typ; + p->buf = buf; + p->parityStripeID = parityStripeID; + p->which_ru = which_ru; + p->CompleteFunc = wakeF; + p->argument = arg; + p->next = next; + p->tracerec = tracerec; + p->priority = RF_IO_NORMAL_PRIORITY; + p->AuxFunc = NULL; + p->buf2 = NULL; + p->raidPtr = raidPtr; + p->flags = flags; + p->b_proc = kb_proc; + return (p); } -RF_DiskQueueData_t *rf_CreateDiskQueueDataFull( - RF_IoType_t typ, - RF_SectorNum_t ssect, - RF_SectorCount_t nsect, - caddr_t buf, - RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru, - int (*wakeF)(void *,int), - void *arg, - RF_DiskQueueData_t *next, - RF_AccTraceEntry_t *tracerec, - int priority, - int (*AuxFunc)(void *,...), - caddr_t buf2, - void *raidPtr, - RF_DiskQueueDataFlags_t flags, - void *kb_proc) +RF_DiskQueueData_t * +rf_CreateDiskQueueDataFull( + RF_IoType_t typ, + RF_SectorNum_t ssect, + RF_SectorCount_t nsect, + caddr_t buf, + RF_StripeNum_t parityStripeID, + RF_ReconUnitNum_t which_ru, + int (*wakeF) (void *, int), + void *arg, + RF_DiskQueueData_t * next, + RF_AccTraceEntry_t * tracerec, + int priority, + int (*AuxFunc) (void *,...), + caddr_t buf2, + void *raidPtr, + RF_DiskQueueDataFlags_t flags, + void *kb_proc) { - RF_DiskQueueData_t *p; - - RF_FREELIST_GET_INIT(rf_dqd_freelist,p,next,(RF_DiskQueueData_t *),init_dqd); - - p->sectorOffset = ssect + rf_protectedSectors; - p->numSector = nsect; - p->type = typ; - p->buf = buf; - p->parityStripeID= parityStripeID; - p->which_ru = which_ru; - p->CompleteFunc = wakeF; - p->argument = arg; - p->next = next; - p->tracerec = tracerec; - p->priority = priority; - p->AuxFunc = AuxFunc; - p->buf2 = buf2; -#ifdef SIMULATE - p->owner = rf_GetCurrentOwner(); -#endif /* SIMULATE */ - p->raidPtr = raidPtr; - p->flags = flags; -#ifdef KERNEL - p->b_proc = kb_proc; -#endif /* KERNEL */ - return(p); + RF_DiskQueueData_t *p; + + RF_FREELIST_GET_INIT(rf_dqd_freelist, p, next, (RF_DiskQueueData_t *), init_dqd); + + p->sectorOffset = ssect + rf_protectedSectors; + p->numSector = nsect; + p->type = typ; + p->buf = buf; + p->parityStripeID = parityStripeID; + p->which_ru = which_ru; + p->CompleteFunc = wakeF; + p->argument = arg; + p->next = next; + p->tracerec = tracerec; + p->priority = priority; + p->AuxFunc = AuxFunc; + p->buf2 = buf2; + p->raidPtr = raidPtr; + p->flags = flags; + p->b_proc = kb_proc; + return (p); } -void rf_FreeDiskQueueData(p) - RF_DiskQueueData_t *p; +void +rf_FreeDiskQueueData(p) + RF_DiskQueueData_t *p; { - RF_FREELIST_FREE_CLEAN(rf_dqd_freelist,p,next,clean_dqd); + RF_FREELIST_FREE_CLEAN(rf_dqd_freelist, p, next, clean_dqd); } diff --git a/sys/dev/raidframe/rf_diskqueue.h b/sys/dev/raidframe/rf_diskqueue.h index 20878553479..246c799d8a1 100644 --- a/sys/dev/raidframe/rf_diskqueue.h +++ b/sys/dev/raidframe/rf_diskqueue.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_diskqueue.h,v 1.1 1999/01/11 14:29:17 niklas Exp $ */ -/* $NetBSD: rf_diskqueue.h,v 1.1 1998/11/13 04:20:29 oster Exp $ */ +/* $OpenBSD: rf_diskqueue.h,v 1.2 1999/02/16 00:02:40 niklas Exp $ */ +/* $NetBSD: rf_diskqueue.h,v 1.3 1999/02/05 00:06:09 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -34,131 +34,6 @@ * see comments in rf_diskqueue.c * ****************************************************************************************/ -/* - * - * : - * - * Log: rf_diskqueue.h,v - * Revision 1.31 1996/08/07 21:08:49 jimz - * b_proc -> kb_proc (IRIX complained) - * - * Revision 1.30 1996/06/18 20:53:11 jimz - * fix up disk queueing (remove configure routine, - * add shutdown list arg to create routines) - * - * Revision 1.29 1996/06/13 20:38:19 jimz - * fix queue type in DiskQueueData - * - * Revision 1.28 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.27 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.26 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.25 1996/06/06 17:29:12 jimz - * track arm position of last I/O dequeued - * - * Revision 1.24 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.23 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.22 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.21 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.20 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.19 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.18 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.17 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.16 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.15 1996/05/10 19:39:31 jimz - * add prev pointer to DiskQueueData - * - * Revision 1.14 1996/05/10 16:24:04 jimz - * mark old defines as deprecated, add RF_ defines - * - * Revision 1.13 1995/12/01 15:59:04 root - * added copyright info - * - * Revision 1.12 1995/11/07 16:26:44 wvcii - * added Peek() function to diskqueuesw - * - * Revision 1.11 1995/10/05 02:33:15 jimz - * made queue lens longs (less instructions to read :-) - * - * Revision 1.10 1995/10/04 07:07:07 wvcii - * queue->numOutstanding now valid for user & sim - * user tested & verified, sim untested - * - * Revision 1.9 1995/09/12 00:21:37 wvcii - * added support for tracing disk queue time - * - * Revision 1.8 95/04/24 13:25:51 holland - * rewrite to move disk queues, recon, & atomic RMW to kernel - * - * Revision 1.6.10.2 1995/04/03 20:13:56 holland - * added numOutstanding and maxOutstanding to support moving - * disk queues into kernel code - * - * Revision 1.6.10.1 1995/04/03 20:03:56 holland - * initial checkin on branch - * - * Revision 1.6 1995/03/03 18:34:33 rachad - * Simulator mechanism added - * - * Revision 1.5 1995/03/01 20:25:48 holland - * kernelization changes - * - * Revision 1.4 1995/02/03 22:31:36 holland - * many changes related to kernelization - * - * Revision 1.3 1995/02/01 14:25:19 holland - * began changes for kernelization: - * changed all instances of mutex_t and cond_t to DECLARE macros - * converted configuration code to use config structure - * - * Revision 1.2 1994/11/29 20:36:02 danner - * Added symbolic constants for io_type (e.g,IO_TYPE_READ) - * and support for READ_OP_WRITE - * - */ #ifndef _RF__RF_DISKQUEUE_H_ @@ -171,13 +46,11 @@ #include "rf_etimer.h" -#ifdef _KERNEL #if defined(__NetBSD__) #include "rf_netbsd.h" #elif defined(__OpenBSD__) #include "rf_openbsd.h" #endif -#endif #define RF_IO_NORMAL_PRIORITY 1 @@ -185,74 +58,88 @@ /* the data held by a disk queue entry */ struct RF_DiskQueueData_s { - RF_SectorNum_t sectorOffset; /* sector offset into the disk */ - RF_SectorCount_t numSector; /* number of sectors to read/write */ - RF_IoType_t type; /* read/write/nop */ - caddr_t buf; /* buffer pointer */ - RF_StripeNum_t parityStripeID; /* the RAID parity stripe ID this access is for */ - RF_ReconUnitNum_t which_ru; /* which RU within this parity stripe */ - int priority; /* the priority of this request */ - int (*CompleteFunc)(void *,int);/* function to be called upon completion */ - int (*AuxFunc)(void *,...); /* function called upon completion of the first I/O of a Read_Op_Write pair*/ - void *argument; /* argument to be passed to CompleteFunc */ -#ifdef SIMULATE - RF_Owner_t owner; /* which task is responsible for this request */ -#endif /* SIMULATE */ - void *raidPtr; /* needed for simulation */ - RF_AccTraceEntry_t *tracerec; /* perf mon only */ - RF_Etimer_t qtime; /* perf mon only - time request is in queue */ - long entryTime; - RF_DiskQueueData_t *next; - RF_DiskQueueData_t *prev; - caddr_t buf2; /* for read-op-write */ - dev_t dev; /* the device number for in-kernel version */ - RF_DiskQueue_t *queue; /* the disk queue to which this req is targeted */ - RF_DiskQueueDataFlags_t flags; /* flags controlling operation */ - -#ifdef KERNEL - struct proc *b_proc; /* the b_proc from the original bp passed into the driver for this I/O */ - struct buf *bp; /* a bp to use to get this I/O done */ -#endif /* KERNEL */ + RF_SectorNum_t sectorOffset; /* sector offset into the disk */ + RF_SectorCount_t numSector; /* number of sectors to read/write */ + RF_IoType_t type; /* read/write/nop */ + caddr_t buf; /* buffer pointer */ + RF_StripeNum_t parityStripeID; /* the RAID parity stripe ID this + * access is for */ + RF_ReconUnitNum_t which_ru; /* which RU within this parity stripe */ + int priority; /* the priority of this request */ + int (*CompleteFunc) (void *, int); /* function to be called upon + * completion */ + int (*AuxFunc) (void *,...); /* function called upon + * completion of the first I/O + * of a Read_Op_Write pair */ + void *argument; /* argument to be passed to CompleteFunc */ + void *raidPtr; /* needed for simulation */ + RF_AccTraceEntry_t *tracerec; /* perf mon only */ + RF_Etimer_t qtime; /* perf mon only - time request is in queue */ + long entryTime; + RF_DiskQueueData_t *next; + RF_DiskQueueData_t *prev; + caddr_t buf2; /* for read-op-write */ + dev_t dev; /* the device number for in-kernel version */ + RF_DiskQueue_t *queue; /* the disk queue to which this req is + * targeted */ + RF_DiskQueueDataFlags_t flags; /* flags controlling operation */ + + struct proc *b_proc; /* the b_proc from the original bp passed into + * the driver for this I/O */ + struct buf *bp; /* a bp to use to get this I/O done */ }; - #define RF_LOCK_DISK_QUEUE 0x01 #define RF_UNLOCK_DISK_QUEUE 0x02 /* note: "Create" returns type-specific queue header pointer cast to (void *) */ struct RF_DiskQueueSW_s { - RF_DiskQueueType_t queueType; - void *(*Create)(RF_SectorCount_t, RF_AllocListElem_t *, RF_ShutdownList_t **); /* creation routine -- one call per queue in system */ - void (*Enqueue)(void *,RF_DiskQueueData_t * ,int); /* enqueue routine */ - RF_DiskQueueData_t *(*Dequeue)(void *); /* dequeue routine */ - RF_DiskQueueData_t *(*Peek)(void *); /* peek at head of queue */ - - /* the rest are optional: they improve performance, but the driver will deal with it if they don't exist */ - int (*Promote)(void *, RF_StripeNum_t, RF_ReconUnitNum_t); /* promotes priority of tagged accesses */ + RF_DiskQueueType_t queueType; + void *(*Create) (RF_SectorCount_t, RF_AllocListElem_t *, RF_ShutdownList_t **); /* creation routine -- + * one call per queue in + * system */ + void (*Enqueue) (void *, RF_DiskQueueData_t *, int); /* enqueue routine */ + RF_DiskQueueData_t *(*Dequeue) (void *); /* dequeue routine */ + RF_DiskQueueData_t *(*Peek) (void *); /* peek at head of queue */ + + /* the rest are optional: they improve performance, but the driver + * will deal with it if they don't exist */ + int (*Promote) (void *, RF_StripeNum_t, RF_ReconUnitNum_t); /* promotes priority of + * tagged accesses */ }; struct RF_DiskQueue_s { - RF_DiskQueueSW_t *qPtr; /* access point to queue functions */ - void *qHdr; /* queue header, of whatever type */ - RF_DECLARE_MUTEX(mutex) /* mutex locking data structures */ - RF_DECLARE_COND(cond) /* condition variable for synchronization */ - long numOutstanding; /* number of I/Os currently outstanding on disk */ - long maxOutstanding; /* max # of I/Os that can be outstanding on a disk (in-kernel only) */ - int curPriority; /* the priority of accs all that are currently outstanding */ - long queueLength; /* number of requests in queue */ - RF_DiskQueueData_t *nextLockingOp; /* a locking op that has arrived at the head of the queue & is waiting for drainage */ - RF_DiskQueueData_t *unlockingOp; /* used at user level to communicate unlocking op b/w user (or dag exec) & disk threads */ - int numWaiting; /* number of threads waiting on this variable. user-level only */ - RF_DiskQueueFlags_t flags; /* terminate, locked */ - RF_Raid_t *raidPtr; /* associated array */ - dev_t dev; /* device number for kernel version */ - RF_SectorNum_t last_deq_sector; /* last sector number dequeued or dispatched */ - int row, col; /* debug only */ -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) - struct raidcinfo *rf_cinfo; /* disks component info.. */ -#endif + RF_DiskQueueSW_t *qPtr; /* access point to queue functions */ + void *qHdr; /* queue header, of whatever type */ + RF_DECLARE_MUTEX(mutex) /* mutex locking data structures */ + RF_DECLARE_COND(cond) /* condition variable for + * synchronization */ + long numOutstanding; /* number of I/Os currently outstanding on + * disk */ + long maxOutstanding; /* max # of I/Os that can be outstanding on a + * disk (in-kernel only) */ + int curPriority; /* the priority of accs all that are currently + * outstanding */ + long queueLength; /* number of requests in queue */ + RF_DiskQueueData_t *nextLockingOp; /* a locking op that has + * arrived at the head of the + * queue & is waiting for + * drainage */ + RF_DiskQueueData_t *unlockingOp; /* used at user level to + * communicate unlocking op + * b/w user (or dag exec) & + * disk threads */ + int numWaiting; /* number of threads waiting on this variable. + * user-level only */ + RF_DiskQueueFlags_t flags; /* terminate, locked */ + RF_Raid_t *raidPtr; /* associated array */ + dev_t dev; /* device number for kernel version */ + RF_SectorNum_t last_deq_sector; /* last sector number dequeued or + * dispatched */ + int row, col; /* debug only */ + struct raidcinfo *rf_cinfo; /* disks component info.. */ }; - -#define RF_DQ_LOCKED 0x02 /* no new accs allowed until queue is explicitly unlocked */ +#define RF_DQ_LOCKED 0x02 /* no new accs allowed until queue is + * explicitly unlocked */ /* macros setting & returning information about queues and requests */ #define RF_QUEUE_LOCKED(_q) ((_q)->flags & RF_DQ_LOCKED) @@ -273,43 +160,39 @@ struct RF_DiskQueue_s { (RF_QUEUE_EMPTY(_q_) || \ (!RF_QUEUE_FULL(_q_) && ((_r_)->priority >= (_q_)->curPriority))) -int rf_ConfigureDiskQueueSystem(RF_ShutdownList_t **listp); +int rf_ConfigureDiskQueueSystem(RF_ShutdownList_t ** listp); -void rf_TerminateDiskQueues(RF_Raid_t *raidPtr); +void rf_TerminateDiskQueues(RF_Raid_t * raidPtr); -int rf_ConfigureDiskQueues(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr); +int +rf_ConfigureDiskQueues(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr); -void rf_DiskIOEnqueue(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req, int pri); +void rf_DiskIOEnqueue(RF_DiskQueue_t * queue, RF_DiskQueueData_t * req, int pri); -#if !defined(KERNEL) && !defined(SIMULATE) -void rf_BroadcastOnQueue(RF_DiskQueue_t *queue); -#endif /* !KERNEL && !SIMULATE */ -#ifndef KERNEL -RF_DiskQueueData_t *rf_DiskIODequeue(RF_DiskQueue_t *queue); -#else /* !KERNEL */ -void rf_DiskIOComplete(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req, int status); -#endif /* !KERNEL */ +void rf_DiskIOComplete(RF_DiskQueue_t * queue, RF_DiskQueueData_t * req, int status); -int rf_DiskIOPromote(RF_DiskQueue_t *queue, RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru); +int +rf_DiskIOPromote(RF_DiskQueue_t * queue, RF_StripeNum_t parityStripeID, + RF_ReconUnitNum_t which_ru); -RF_DiskQueueData_t *rf_CreateDiskQueueData(RF_IoType_t typ, - RF_SectorNum_t ssect, RF_SectorCount_t nsect, caddr_t buf, - RF_StripeNum_t parityStripeID, RF_ReconUnitNum_t which_ru, - int (*wakeF)(void *, int), - void *arg, RF_DiskQueueData_t *next, RF_AccTraceEntry_t *tracerec, - void *raidPtr, RF_DiskQueueDataFlags_t flags, void *kb_proc); +RF_DiskQueueData_t * +rf_CreateDiskQueueData(RF_IoType_t typ, + RF_SectorNum_t ssect, RF_SectorCount_t nsect, caddr_t buf, + RF_StripeNum_t parityStripeID, RF_ReconUnitNum_t which_ru, + int (*wakeF) (void *, int), + void *arg, RF_DiskQueueData_t * next, RF_AccTraceEntry_t * tracerec, + void *raidPtr, RF_DiskQueueDataFlags_t flags, void *kb_proc); -RF_DiskQueueData_t *rf_CreateDiskQueueDataFull(RF_IoType_t typ, - RF_SectorNum_t ssect, RF_SectorCount_t nsect, caddr_t buf, - RF_StripeNum_t parityStripeID, RF_ReconUnitNum_t which_ru, - int (*wakeF)(void *, int), - void *arg, RF_DiskQueueData_t *next, RF_AccTraceEntry_t *tracerec, - int priority, int (*AuxFunc)(void *,...), caddr_t buf2, - void *raidPtr, RF_DiskQueueDataFlags_t flags, void *kb_proc); + RF_DiskQueueData_t *rf_CreateDiskQueueDataFull(RF_IoType_t typ, + RF_SectorNum_t ssect, RF_SectorCount_t nsect, caddr_t buf, + RF_StripeNum_t parityStripeID, RF_ReconUnitNum_t which_ru, + int (*wakeF) (void *, int), + void *arg, RF_DiskQueueData_t * next, RF_AccTraceEntry_t * tracerec, + int priority, int (*AuxFunc) (void *,...), caddr_t buf2, + void *raidPtr, RF_DiskQueueDataFlags_t flags, void *kb_proc); -void rf_FreeDiskQueueData(RF_DiskQueueData_t *p); + void rf_FreeDiskQueueData(RF_DiskQueueData_t * p); -#endif /* !_RF__RF_DISKQUEUE_H_ */ +#endif /* !_RF__RF_DISKQUEUE_H_ */ diff --git a/sys/dev/raidframe/rf_disks.c b/sys/dev/raidframe/rf_disks.c index fc89d407f47..20daa65340e 100644 --- a/sys/dev/raidframe/rf_disks.c +++ b/sys/dev/raidframe/rf_disks.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_disks.c,v 1.1 1999/01/11 14:29:17 niklas Exp $ */ -/* $NetBSD: rf_disks.c,v 1.2 1998/12/03 15:06:25 oster Exp $ */ +/* $OpenBSD: rf_disks.c,v 1.2 1999/02/16 00:02:40 niklas Exp $ */ +/* $NetBSD: rf_disks.c,v 1.5 1999/02/05 00:06:09 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -31,102 +31,6 @@ * rf_disks.c -- code to perform operations on the actual disks ***************************************************************/ -/* : - * Log: rf_disks.c,v - * Revision 1.32 1996/07/27 18:40:24 jimz - * cleanup sweep - * - * Revision 1.31 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.30 1996/07/19 16:11:21 jimz - * pass devname to DoReadCapacity - * - * Revision 1.29 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.28 1996/07/10 22:28:38 jimz - * get rid of obsolete row statuses (dead,degraded2) - * - * Revision 1.27 1996/06/10 12:06:14 jimz - * don't do any SCSI op stuff in simulator at all - * - * Revision 1.26 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.25 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.24 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.23 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.22 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.21 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.20 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.19 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.18 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.17 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.16 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.15 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.14 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.13 1996/05/02 14:57:43 jimz - * initialize sectorMask - * - * Revision 1.12 1995/12/01 15:57:04 root - * added copyright info - * - */ - #include "rf_types.h" #include "rf_raid.h" #include "rf_alloclist.h" @@ -139,7 +43,6 @@ #include "rf_options.h" #include "rf_sys.h" -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) #include <sys/types.h> #include <sys/param.h> #include <sys/systm.h> @@ -150,19 +53,12 @@ #include <sys/vnode.h> #endif -int raidlookup __P((char *, struct proc *p, struct vnode **)); -#endif +int raidlookup __P((char *, struct proc * p, struct vnode **)); -#ifdef SIMULATE -static char disk_db_file_name[120], disk_type_name[120]; -static double init_offset; -#endif /* SIMULATE */ #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f) #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g) -#include "rf_ccmn.h" - /**************************************************************************************** * * initialize the disks comprising the array @@ -175,183 +71,176 @@ static double init_offset; * spares in it. This probably needs to get changed eventually. * ***************************************************************************************/ -int rf_ConfigureDisks( - RF_ShutdownList_t **listp, - RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr) +int +rf_ConfigureDisks( + RF_ShutdownList_t ** listp, + RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr) { - RF_RaidDisk_t **disks; - RF_SectorCount_t min_numblks = (RF_SectorCount_t)0x7FFFFFFFFFFFLL; - RF_RowCol_t r, c; - int bs, ret; - unsigned i, count, foundone=0, numFailuresThisRow; - RF_DiskOp_t *rdcap_op = NULL, *tur_op = NULL; - int num_rows_done,num_cols_done; - -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) - struct proc *proc = 0; -#endif -#ifndef SIMULATE + RF_RaidDisk_t **disks; + RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; + RF_RowCol_t r, c; + int bs, ret; + unsigned i, count, foundone = 0, numFailuresThisRow; + RF_DiskOp_t *rdcap_op = NULL, *tur_op = NULL; + int num_rows_done, num_cols_done; + + struct proc *proc = 0; #if !defined(__NetBSD__) && !defined(__OpenBSD__) - ret = rf_SCSI_AllocReadCapacity(&rdcap_op); - if (ret) - goto fail; - ret = rf_SCSI_AllocTUR(&tur_op); - if (ret) - goto fail; -#endif /* !__NetBSD__ && !__OpenBSD__ */ -#endif /* !SIMULATE */ - - num_rows_done = 0; - num_cols_done = 0; - - - RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *), (RF_RaidDisk_t **), raidPtr->cleanupList); - if (disks == NULL) { - ret = ENOMEM; - goto fail; - } - raidPtr->Disks = disks; - -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) + ret = rf_SCSI_AllocReadCapacity(&rdcap_op); + if (ret) + goto fail; + ret = rf_SCSI_AllocTUR(&tur_op); + if (ret) + goto fail; +#endif /* !__NetBSD__ && !__OpenBSD__ */ + + num_rows_done = 0; + num_cols_done = 0; + + + RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *), (RF_RaidDisk_t **), raidPtr->cleanupList); + if (disks == NULL) { + ret = ENOMEM; + goto fail; + } + raidPtr->Disks = disks; - proc = raidPtr->proc; /* Blah XXX */ - /* get space for the device-specific stuff... */ - RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow, - sizeof(struct raidcinfo *), (struct raidcinfo **), - raidPtr->cleanupList); - if (raidPtr->raid_cinfo == NULL) { - ret = ENOMEM; - goto fail; - } -#endif + proc = raidPtr->proc; /* Blah XXX */ - for (r=0; r<raidPtr->numRow; r++) { - numFailuresThisRow = 0; - RF_CallocAndAdd(disks[r], raidPtr->numCol + ((r==0) ? raidPtr->numSpare : 0), sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *), raidPtr->cleanupList); - if (disks[r] == NULL) { - ret = ENOMEM; - goto fail; - } - - /* get more space for device specific stuff.. */ - RF_CallocAndAdd(raidPtr->raid_cinfo[r], - raidPtr->numCol + ((r==0) ? raidPtr->numSpare : 0), - sizeof(struct raidcinfo), (struct raidcinfo *), + /* get space for the device-specific stuff... */ + RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow, + sizeof(struct raidcinfo *), (struct raidcinfo **), + raidPtr->cleanupList); + if (raidPtr->raid_cinfo == NULL) { + ret = ENOMEM; + goto fail; + } + for (r = 0; r < raidPtr->numRow; r++) { + numFailuresThisRow = 0; + RF_CallocAndAdd(disks[r], raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0), sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *), raidPtr->cleanupList); + if (disks[r] == NULL) { + ret = ENOMEM; + goto fail; + } + /* get more space for device specific stuff.. */ + RF_CallocAndAdd(raidPtr->raid_cinfo[r], + raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0), + sizeof(struct raidcinfo), (struct raidcinfo *), raidPtr->cleanupList); - if (raidPtr->raid_cinfo[r] == NULL) { - ret = ENOMEM; - goto fail; - } - - - for (c=0; c<raidPtr->numCol; c++) { - ret = rf_ConfigureDisk(raidPtr,&cfgPtr->devnames[r][c][0], - &disks[r][c], rdcap_op, tur_op, - cfgPtr->devs[r][c],r,c); - if (ret) - goto fail; - if (disks[r][c].status != rf_ds_optimal) { - numFailuresThisRow++; - } - else { - if (disks[r][c].numBlocks < min_numblks) - min_numblks = disks[r][c].numBlocks; - DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", - r,c,disks[r][c].devname, - (long int) disks[r][c].numBlocks, - disks[r][c].blockSize, - (long int) disks[r][c].numBlocks * disks[r][c].blockSize / 1024 / 1024); - } - num_cols_done++; - } - /* XXX fix for n-fault tolerant */ - if (numFailuresThisRow > 0) - raidPtr->status[r] = rf_rs_degraded; - num_rows_done++; - } -#ifndef SIMULATE + if (raidPtr->raid_cinfo[r] == NULL) { + ret = ENOMEM; + goto fail; + } + for (c = 0; c < raidPtr->numCol; c++) { + ret = rf_ConfigureDisk(raidPtr, &cfgPtr->devnames[r][c][0], + &disks[r][c], rdcap_op, tur_op, + cfgPtr->devs[r][c], r, c); + if (ret) + goto fail; + if (disks[r][c].status != rf_ds_optimal) { + numFailuresThisRow++; + } else { + if (disks[r][c].numBlocks < min_numblks) + min_numblks = disks[r][c].numBlocks; + DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", + r, c, disks[r][c].devname, + (long int) disks[r][c].numBlocks, + disks[r][c].blockSize, + (long int) disks[r][c].numBlocks * disks[r][c].blockSize / 1024 / 1024); + } + num_cols_done++; + } + /* XXX fix for n-fault tolerant */ + if (numFailuresThisRow > 0) + raidPtr->status[r] = rf_rs_degraded; + num_rows_done++; + } #if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) - /* we do nothing */ + /* we do nothing */ #else - rf_SCSI_FreeDiskOp(rdcap_op, 1); rdcap_op = NULL; - rf_SCSI_FreeDiskOp(tur_op, 0); tur_op = NULL; + rf_SCSI_FreeDiskOp(rdcap_op, 1); + rdcap_op = NULL; + rf_SCSI_FreeDiskOp(tur_op, 0); + tur_op = NULL; #endif -#endif /* !SIMULATE */ - /* all disks must be the same size & have the same block size, bs must be a power of 2 */ - bs = 0; - for (foundone=r=0; !foundone && r<raidPtr->numRow; r++) { - for (c=0; !foundone && c<raidPtr->numCol; c++) { - if (disks[r][c].status == rf_ds_optimal) { - bs = disks[r][c].blockSize; - foundone = 1; - } - } - } - if (!foundone) { - RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n"); - ret = EINVAL; - goto fail; - } - for (count=0,i=1; i; i<<=1) if (bs & i) - count++; - if (count != 1) { - RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n",bs); - ret = EINVAL; - goto fail; - } - for (r=0; r<raidPtr->numRow; r++) { - for (c=0; c<raidPtr->numCol; c++) { - if (disks[r][c].status == rf_ds_optimal) { - if (disks[r][c].blockSize != bs) { - RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n",r,c); - ret = EINVAL; - goto fail; + /* all disks must be the same size & have the same block size, bs must + * be a power of 2 */ + bs = 0; + for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) { + for (c = 0; !foundone && c < raidPtr->numCol; c++) { + if (disks[r][c].status == rf_ds_optimal) { + bs = disks[r][c].blockSize; + foundone = 1; + } + } } - if (disks[r][c].numBlocks != min_numblks) { - RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n", - r,c,(int) min_numblks); - disks[r][c].numBlocks = min_numblks; + if (!foundone) { + RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n"); + ret = EINVAL; + goto fail; + } + for (count = 0, i = 1; i; i <<= 1) + if (bs & i) + count++; + if (count != 1) { + RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs); + ret = EINVAL; + goto fail; + } + for (r = 0; r < raidPtr->numRow; r++) { + for (c = 0; c < raidPtr->numCol; c++) { + if (disks[r][c].status == rf_ds_optimal) { + if (disks[r][c].blockSize != bs) { + RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c); + ret = EINVAL; + goto fail; + } + if (disks[r][c].numBlocks != min_numblks) { + RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n", + r, c, (int) min_numblks); + disks[r][c].numBlocks = min_numblks; + } + } + } } - } - } - } - raidPtr->sectorsPerDisk = min_numblks; - raidPtr->logBytesPerSector = ffs(bs) - 1; - raidPtr->bytesPerSector = bs; - raidPtr->sectorMask = bs-1; - return(0); + raidPtr->sectorsPerDisk = min_numblks; + raidPtr->logBytesPerSector = ffs(bs) - 1; + raidPtr->bytesPerSector = bs; + raidPtr->sectorMask = bs - 1; + return (0); fail: -#ifndef SIMULATE #if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) - for(r=0;r<raidPtr->numRow;r++) { - for(c=0;c<raidPtr->numCol;c++) { - /* Cleanup.. */ + for (r = 0; r < raidPtr->numRow; r++) { + for (c = 0; c < raidPtr->numCol; c++) { + /* Cleanup.. */ #ifdef DEBUG - printf("Cleaning up row: %d col: %d\n",r,c); + printf("Cleaning up row: %d col: %d\n", r, c); #endif - if (raidPtr->raid_cinfo[r][c].ci_vp) { - (void)vn_close(raidPtr->raid_cinfo[r][c].ci_vp, - FREAD|FWRITE, proc->p_ucred, proc); - } - } - } - /* Space allocated for raid_vpp will get cleaned up at some other point */ - /* XXX Need more #ifdefs in the above... */ + if (raidPtr->raid_cinfo[r][c].ci_vp) { + (void) vn_close(raidPtr->raid_cinfo[r][c].ci_vp, + FREAD | FWRITE, proc->p_ucred, proc); + } + } + } + /* Space allocated for raid_vpp will get cleaned up at some other + * point */ + /* XXX Need more #ifdefs in the above... */ -#else +#else - if (rdcap_op) rf_SCSI_FreeDiskOp(rdcap_op, 1); - if (tur_op) rf_SCSI_FreeDiskOp(tur_op, 0); + if (rdcap_op) + rf_SCSI_FreeDiskOp(rdcap_op, 1); + if (tur_op) + rf_SCSI_FreeDiskOp(tur_op, 0); #endif -#endif /* !SIMULATE */ - return(ret); + return (ret); } @@ -360,292 +249,237 @@ fail: * recall from the above comment that the spare disk descriptors are stored * in row zero, which is specially expanded to hold them. ***************************************************************************************/ -int rf_ConfigureSpareDisks( - RF_ShutdownList_t **listp, - RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr) +int +rf_ConfigureSpareDisks( + RF_ShutdownList_t ** listp, + RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr) { - char buf[256]; - int r,c,i, ret; - RF_DiskOp_t *rdcap_op = NULL, *tur_op = NULL; - unsigned bs; - RF_RaidDisk_t *disks; - int num_spares_done; + char buf[256]; + int r, c, i, ret; + RF_DiskOp_t *rdcap_op = NULL, *tur_op = NULL; + unsigned bs; + RF_RaidDisk_t *disks; + int num_spares_done; -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) - struct proc *proc; -#endif + struct proc *proc; -#ifndef SIMULATE #if !defined(__NetBSD__) && !defined(__OpenBSD__) - ret = rf_SCSI_AllocReadCapacity(&rdcap_op); - if (ret) - goto fail; - ret = rf_SCSI_AllocTUR(&tur_op); - if (ret) - goto fail; -#endif /* !__NetBSD__ && !__OpenBSD__ */ -#endif /* !SIMULATE */ - - num_spares_done = 0; - -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) - proc = raidPtr->proc; - /* The space for the spares should have already been - allocated by ConfigureDisks() */ -#endif - - disks = &raidPtr->Disks[0][raidPtr->numCol]; - for (i=0; i<raidPtr->numSpare; i++) { - ret = rf_ConfigureDisk(raidPtr,&cfgPtr->spare_names[i][0], - &disks[i], rdcap_op, tur_op, - cfgPtr->spare_devs[i],0,raidPtr->numCol+i); - if (ret) - goto fail; - if (disks[i].status != rf_ds_optimal) { - RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",buf); - } else { - disks[i].status = rf_ds_spare; /* change status to spare */ - DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",i, - disks[i].devname, - (long int) disks[i].numBlocks,disks[i].blockSize, - (long int) disks[i].numBlocks * disks[i].blockSize / 1024 / 1024); - } - num_spares_done++; - } -#ifndef SIMULATE + ret = rf_SCSI_AllocReadCapacity(&rdcap_op); + if (ret) + goto fail; + ret = rf_SCSI_AllocTUR(&tur_op); + if (ret) + goto fail; +#endif /* !__NetBSD__ && !__OpenBSD__ */ + + num_spares_done = 0; + + proc = raidPtr->proc; + /* The space for the spares should have already been allocated by + * ConfigureDisks() */ + + disks = &raidPtr->Disks[0][raidPtr->numCol]; + for (i = 0; i < raidPtr->numSpare; i++) { + ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0], + &disks[i], rdcap_op, tur_op, + cfgPtr->spare_devs[i], 0, raidPtr->numCol + i); + if (ret) + goto fail; + if (disks[i].status != rf_ds_optimal) { + RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", buf); + } else { + disks[i].status = rf_ds_spare; /* change status to + * spare */ + DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i, + disks[i].devname, + (long int) disks[i].numBlocks, disks[i].blockSize, + (long int) disks[i].numBlocks * disks[i].blockSize / 1024 / 1024); + } + num_spares_done++; + } #if (defined(__NetBSD__) || defined(__OpenBSD__)) && (_KERNEL) #else - rf_SCSI_FreeDiskOp(rdcap_op, 1); rdcap_op = NULL; - rf_SCSI_FreeDiskOp(tur_op, 0); tur_op = NULL; + rf_SCSI_FreeDiskOp(rdcap_op, 1); + rdcap_op = NULL; + rf_SCSI_FreeDiskOp(tur_op, 0); + tur_op = NULL; #endif -#endif /* !SIMULATE */ - - /* check sizes and block sizes on spare disks */ - bs = 1 << raidPtr->logBytesPerSector; - for (i=0; i<raidPtr->numSpare; i++) { - if (disks[i].blockSize != bs) { - RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n",disks[i].blockSize, disks[i].devname, bs); - ret = EINVAL; - goto fail; - } - if (disks[i].numBlocks < raidPtr->sectorsPerDisk) { - RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n", - disks[i].devname, disks[i].blockSize, (long int)raidPtr->sectorsPerDisk); - ret = EINVAL; - goto fail; - } else if (disks[i].numBlocks > raidPtr->sectorsPerDisk) { - RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n",disks[i].devname, (long int) raidPtr->sectorsPerDisk); - - disks[i].numBlocks = raidPtr->sectorsPerDisk; - } - } - - return(0); + + /* check sizes and block sizes on spare disks */ + bs = 1 << raidPtr->logBytesPerSector; + for (i = 0; i < raidPtr->numSpare; i++) { + if (disks[i].blockSize != bs) { + RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs); + ret = EINVAL; + goto fail; + } + if (disks[i].numBlocks < raidPtr->sectorsPerDisk) { + RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n", + disks[i].devname, disks[i].blockSize, (long int) raidPtr->sectorsPerDisk); + ret = EINVAL; + goto fail; + } else + if (disks[i].numBlocks > raidPtr->sectorsPerDisk) { + RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk); + + disks[i].numBlocks = raidPtr->sectorsPerDisk; + } + } + + return (0); fail: -#ifndef SIMULATE #if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) - /* Release the hold on the main components. We've failed to allocate a - spare, and since we're failing, we need to free things.. */ + /* Release the hold on the main components. We've failed to allocate + * a spare, and since we're failing, we need to free things.. */ - for(r=0;r<raidPtr->numRow;r++) { - for(c=0;c<raidPtr->numCol;c++) { - /* Cleanup.. */ + for (r = 0; r < raidPtr->numRow; r++) { + for (c = 0; c < raidPtr->numCol; c++) { + /* Cleanup.. */ #ifdef DEBUG - printf("Cleaning up row: %d col: %d\n",r,c); + printf("Cleaning up row: %d col: %d\n", r, c); #endif - if (raidPtr->raid_cinfo[r][c].ci_vp) { - (void)vn_close(raidPtr->raid_cinfo[r][c].ci_vp, - FREAD|FWRITE, proc->p_ucred, proc); - } - } - } - - for(i=0;i<raidPtr->numSpare;i++) { - /* Cleanup.. */ + if (raidPtr->raid_cinfo[r][c].ci_vp) { + (void) vn_close(raidPtr->raid_cinfo[r][c].ci_vp, + FREAD | FWRITE, proc->p_ucred, proc); + } + } + } + + for (i = 0; i < raidPtr->numSpare; i++) { + /* Cleanup.. */ #ifdef DEBUG - printf("Cleaning up spare: %d\n",i); + printf("Cleaning up spare: %d\n", i); #endif - if (raidPtr->raid_cinfo[0][raidPtr->numCol+i].ci_vp) { - (void)vn_close(raidPtr->raid_cinfo[0][raidPtr->numCol+i].ci_vp, - FREAD|FWRITE, proc->p_ucred, proc); - } - } + if (raidPtr->raid_cinfo[0][raidPtr->numCol + i].ci_vp) { + (void) vn_close(raidPtr->raid_cinfo[0][raidPtr->numCol + i].ci_vp, + FREAD | FWRITE, proc->p_ucred, proc); + } + } -#else +#else - if (rdcap_op) rf_SCSI_FreeDiskOp(rdcap_op, 1); - if (tur_op) rf_SCSI_FreeDiskOp(tur_op, 0); + if (rdcap_op) + rf_SCSI_FreeDiskOp(rdcap_op, 1); + if (tur_op) + rf_SCSI_FreeDiskOp(tur_op, 0); #endif -#endif /* !SIMULATE */ - return(ret); + return (ret); } /* configure a single disk in the array */ -int rf_ConfigureDisk(raidPtr, buf, diskPtr, rdcap_op, tur_op, dev, row, col) - RF_Raid_t *raidPtr; /* We need this down here too!! GO */ - char *buf; - RF_RaidDisk_t *diskPtr; - RF_DiskOp_t *rdcap_op; - RF_DiskOp_t *tur_op; - dev_t dev; /* device number used only in kernel */ - RF_RowCol_t row; - RF_RowCol_t col; +int +rf_ConfigureDisk(raidPtr, buf, diskPtr, rdcap_op, tur_op, dev, row, col) + RF_Raid_t *raidPtr; /* We need this down here too!! GO */ + char *buf; + RF_RaidDisk_t *diskPtr; + RF_DiskOp_t *rdcap_op; + RF_DiskOp_t *tur_op; + dev_t dev; /* device number used only in kernel */ + RF_RowCol_t row; + RF_RowCol_t col; { - char *p; -#ifdef SIMULATE - double init_offset; -#else /* SIMULATE */ -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) - int retcode; -#else - int busid, targid, lun, retcode; -#endif -#endif /* SIMULATE */ + char *p; + int retcode; -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) struct partinfo dpart; struct vnode *vp; struct vattr va; struct proc *proc; - int error; -#endif + int error; + + retcode = 0; + p = rf_find_non_white(buf); + if (p[strlen(p) - 1] == '\n') { + /* strip off the newline */ + p[strlen(p) - 1] = '\0'; + } + (void) strcpy(diskPtr->devname, p); -retcode = 0; - p = rf_find_non_white(buf); - if (p[strlen(p)-1] == '\n') { - /* strip off the newline */ - p[strlen(p)-1] = '\0'; - } - (void) strcpy(diskPtr->devname, p); - -#ifdef SIMULATE - - init_offset = 0.0; - rf_InitDisk(&diskPtr->diskState, disk_db_file_name,diskPtr->devname,0,0,init_offset,row,col); - rf_GeometryDoReadCapacity(&diskPtr->diskState, &diskPtr->numBlocks, &diskPtr->blockSize); - diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage / 100; - - /* we allow the user to specify that only a fraction of the disks should be used - * this is just for debug: it speeds up the parity scan - */ - -#else /* SIMULATE */ #if !defined(__NetBSD__) && !defined(__OpenBSD__) - /* get bus, target, lun */ - retcode = rf_extract_ids(p, &busid, &targid, &lun); - if (retcode) - return(retcode); - - /* required in kernel, nop at user level */ - retcode = rf_SCSI_OpenUnit(dev); - if (retcode) - return(retcode); - - diskPtr->dev = dev; - if (rf_SCSI_DoTUR(tur_op, (u_char)busid, (u_char)targid, (u_char)lun, dev)) { - RF_ERRORMSG1("Disk %s failed TUR. Marked as dead.\n",diskPtr->devname); - diskPtr->status = rf_ds_failed; - } else { - diskPtr->status = rf_ds_optimal; - retcode = rf_SCSI_DoReadCapacity(raidPtr,rdcap_op, busid, targid, lun, dev, - &diskPtr->numBlocks, &diskPtr->blockSize, diskPtr->devname); - if (retcode) - return(retcode); - - /* we allow the user to specify that only a fraction of the disks should be used - * this is just for debug: it speeds up the parity scan - */ - diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage / 100; - } + /* get bus, target, lun */ + retcode = rf_extract_ids(p, &busid, &targid, &lun); + if (retcode) + return (retcode); + + /* required in kernel, nop at user level */ + retcode = rf_SCSI_OpenUnit(dev); + if (retcode) + return (retcode); + + diskPtr->dev = dev; + if (rf_SCSI_DoTUR(tur_op, (u_char) busid, (u_char) targid, (u_char) lun, dev)) { + RF_ERRORMSG1("Disk %s failed TUR. Marked as dead.\n", diskPtr->devname); + diskPtr->status = rf_ds_failed; + } else { + diskPtr->status = rf_ds_optimal; + retcode = rf_SCSI_DoReadCapacity(raidPtr, rdcap_op, busid, targid, lun, dev, + &diskPtr->numBlocks, &diskPtr->blockSize, diskPtr->devname); + if (retcode) + return (retcode); + + /* we allow the user to specify that only a fraction of the + * disks should be used this is just for debug: it speeds up + * the parity scan */ + diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage / 100; + } #endif -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) - - proc = raidPtr->proc; /* XXX Yes, this is not nice.. */ - - /* Let's start by claiming the component is fine and well... */ - /* XXX not the case if the disk is toast.. */ - diskPtr->status = rf_ds_optimal; - - - raidPtr->raid_cinfo[row][col].ci_vp = NULL; - raidPtr->raid_cinfo[row][col].ci_dev = NULL; - - error = raidlookup(diskPtr->devname, proc, &vp); - if (error) { - printf("raidlookup on device: %s failed!\n",diskPtr->devname); - if (error == ENXIO) { - /* XXX the component isn't there... must be dead :-( */ - diskPtr->status = rf_ds_failed; - } else { - return(error); - } - } - - if (diskPtr->status == rf_ds_optimal) { - - if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) { - return(error); - } - - error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, - FREAD, proc->p_ucred, proc); - if (error) { - return(error); - } - - - diskPtr->blockSize = dpart.disklab->d_secsize; - - diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors; - - raidPtr->raid_cinfo[row][col].ci_vp = vp; - raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev; -#if 0 - diskPtr->dev = dev; -#endif + proc = raidPtr->proc; /* XXX Yes, this is not nice.. */ - diskPtr->dev = va.va_rdev; /* XXX or the above? */ + /* Let's start by claiming the component is fine and well... */ + /* XXX not the case if the disk is toast.. */ + diskPtr->status = rf_ds_optimal; - /* we allow the user to specify that only a fraction of the disks should be used - * this is just for debug: it speeds up the parity scan - */ - diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage / 100; - - } -#endif /* !__NetBSD__ && !__OpenBSD__ */ -#endif /* SIMULATE */ + raidPtr->raid_cinfo[row][col].ci_vp = NULL; + raidPtr->raid_cinfo[row][col].ci_dev = NULL; - return(0); -} + error = raidlookup(diskPtr->devname, proc, &vp); + if (error) { + printf("raidlookup on device: %s failed!\n", diskPtr->devname); + if (error == ENXIO) { + /* XXX the component isn't there... must be dead :-( */ + diskPtr->status = rf_ds_failed; + } else { + return (error); + } + } + if (diskPtr->status == rf_ds_optimal) { -#ifdef SIMULATE + if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) { + return (error); + } + error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart, + FREAD, proc->p_ucred, proc); + if (error) { + return (error); + } + diskPtr->blockSize = dpart.disklab->d_secsize; -void rf_default_disk_names() -{ - sprintf(disk_db_file_name,"disk.db"); - sprintf(disk_type_name,"HP2247"); -} + diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors; -void rf_set_disk_db_name(s) - char *s; -{ - strcpy(disk_db_file_name,s); -} + raidPtr->raid_cinfo[row][col].ci_vp = vp; + raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev; -void rf_set_disk_type_name(s) - char *s; -{ - strcpy(disk_type_name,s); -} +#if 0 + diskPtr->dev = dev; +#endif + + diskPtr->dev = va.va_rdev; /* XXX or the above? */ + + /* we allow the user to specify that only a fraction of the + * disks should be used this is just for debug: it speeds up + * the parity scan */ + diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage / 100; -#endif /* SIMULATE */ + } + return (0); +} diff --git a/sys/dev/raidframe/rf_disks.h b/sys/dev/raidframe/rf_disks.h index 8857391a8bd..7fc8f58502f 100644 --- a/sys/dev/raidframe/rf_disks.h +++ b/sys/dev/raidframe/rf_disks.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_disks.h,v 1.1 1999/01/11 14:29:18 niklas Exp $ */ -/* $NetBSD: rf_disks.h,v 1.1 1998/11/13 04:20:29 oster Exp $ */ +/* $OpenBSD: rf_disks.h,v 1.2 1999/02/16 00:02:40 niklas Exp $ */ +/* $NetBSD: rf_disks.h,v 1.3 1999/02/05 00:06:09 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -28,59 +28,7 @@ */ /* - * rf_disks.h -- header file for code related to physical disks - */ - -/* : - * Log: rf_disks.h,v - * Revision 1.15 1996/08/20 23:05:13 jimz - * add nreads, nwrites to RaidDisk - * - * Revision 1.14 1996/06/17 03:20:15 jimz - * increase devname len to 56 - * - * Revision 1.13 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.12 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.11 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.10 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.9 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.8 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.7 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.6 1996/05/02 22:06:57 jimz - * add RF_RaidDisk_t - * - * Revision 1.5 1995/12/01 15:56:53 root - * added copyright info - * + * rf_disks.h -- header file for code related to physical disks */ #ifndef _RF__RF_DISKS_H_ @@ -90,48 +38,48 @@ #include "rf_archs.h" #include "rf_types.h" -#ifdef SIMULATE -#include "rf_geometry.h" -#endif /* SIMULATE */ /* * A physical disk can be in one of several states: * IF YOU ADD A STATE, CHECK TO SEE IF YOU NEED TO MODIFY RF_DEAD_DISK() BELOW. */ enum RF_DiskStatus_e { - rf_ds_optimal, /* no problems */ - rf_ds_failed, /* reconstruction ongoing */ - rf_ds_reconstructing, /* reconstruction complete to spare, dead disk not yet replaced */ - rf_ds_dist_spared, /* reconstruction complete to distributed spare space, dead disk not yet replaced */ - rf_ds_spared, /* reconstruction complete to distributed spare space, dead disk not yet replaced */ - rf_ds_spare, /* an available spare disk */ - rf_ds_used_spare /* a spare which has been used, and hence is not available */ + rf_ds_optimal, /* no problems */ + rf_ds_failed, /* reconstruction ongoing */ + rf_ds_reconstructing, /* reconstruction complete to spare, dead disk + * not yet replaced */ + rf_ds_dist_spared, /* reconstruction complete to distributed + * spare space, dead disk not yet replaced */ + rf_ds_spared, /* reconstruction complete to distributed + * spare space, dead disk not yet replaced */ + rf_ds_spare, /* an available spare disk */ + rf_ds_used_spare /* a spare which has been used, and hence is + * not available */ }; typedef enum RF_DiskStatus_e RF_DiskStatus_t; struct RF_RaidDisk_s { - char devname[56]; /* name of device file */ - RF_DiskStatus_t status; /* whether it is up or down */ - RF_RowCol_t spareRow; /* if in status "spared", this identifies the spare disk */ - RF_RowCol_t spareCol; /* if in status "spared", this identifies the spare disk */ - RF_SectorCount_t numBlocks; /* number of blocks, obtained via READ CAPACITY */ - int blockSize; - /* XXX the folling is needed since we seem to need SIMULATE defined - in order to get user-land stuff to compile, but we *don't* want - this in the structure for the user-land utilities, as the - kernel doesn't know about it!! (and it messes up the size of - the structure, so there is a communication problem between - the kernel and the userland utils :-( GO */ -#if defined(SIMULATE) && !defined(RF_UTILITY) - RF_DiskState_t diskState; /* the name of the disk as used in the disk module */ -#endif /* SIMULATE */ + char devname[56]; /* name of device file */ + RF_DiskStatus_t status; /* whether it is up or down */ + RF_RowCol_t spareRow; /* if in status "spared", this identifies the + * spare disk */ + RF_RowCol_t spareCol; /* if in status "spared", this identifies the + * spare disk */ + RF_SectorCount_t numBlocks; /* number of blocks, obtained via READ + * CAPACITY */ + int blockSize; + /* XXX the folling is needed since we seem to need SIMULATE defined in + * order to get user-land stuff to compile, but we *don't* want this + * in the structure for the user-land utilities, as the kernel doesn't + * know about it!! (and it messes up the size of the structure, so + * there is a communication problem between the kernel and the + * userland utils :-( GO */ #if RF_KEEP_DISKSTATS > 0 - RF_uint64 nreads; - RF_uint64 nwrites; -#endif /* RF_KEEP_DISKSTATS > 0 */ - dev_t dev; + RF_uint64 nreads; + RF_uint64 nwrites; +#endif /* RF_KEEP_DISKSTATS > 0 */ + dev_t dev; }; - /* * An RF_DiskOp_t ptr is really a pointer to a UAGT_CCB, but I want * to isolate the cam layer from all other layers, so I typecast to/from @@ -144,18 +92,15 @@ typedef void RF_DiskOp_t; ((_dstat_) == rf_ds_reconstructing) || ((_dstat_) == rf_ds_failed) || \ ((_dstat_) == rf_ds_dist_spared)) -int rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr); -int rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr); -int rf_ConfigureDisk(RF_Raid_t *raidPtr, char *buf, RF_RaidDisk_t *diskPtr, - RF_DiskOp_t *rdcap_op, RF_DiskOp_t *tur_op, dev_t dev, - RF_RowCol_t row, RF_RowCol_t col); - -#ifdef SIMULATE -void rf_default_disk_names(void); -void rf_set_disk_db_name(char *s); -void rf_set_disk_type_name(char *s); -#endif /* SIMULATE */ +int +rf_ConfigureDisks(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr); +int +rf_ConfigureSpareDisks(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr); +int +rf_ConfigureDisk(RF_Raid_t * raidPtr, char *buf, RF_RaidDisk_t * diskPtr, + RF_DiskOp_t * rdcap_op, RF_DiskOp_t * tur_op, dev_t dev, + RF_RowCol_t row, RF_RowCol_t col); -#endif /* !_RF__RF_DISKS_H_ */ +#endif /* !_RF__RF_DISKS_H_ */ diff --git a/sys/dev/raidframe/rf_diskthreads.h b/sys/dev/raidframe/rf_diskthreads.h index 60181759b6d..af446b6b96c 100644 --- a/sys/dev/raidframe/rf_diskthreads.h +++ b/sys/dev/raidframe/rf_diskthreads.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_diskthreads.h,v 1.1 1999/01/11 14:29:18 niklas Exp $ */ -/* $NetBSD: rf_diskthreads.h,v 1.1 1998/11/13 04:20:29 oster Exp $ */ +/* $OpenBSD: rf_diskthreads.h,v 1.2 1999/02/16 00:02:40 niklas Exp $ */ +/* $NetBSD: rf_diskthreads.h,v 1.3 1999/02/05 00:06:10 oster Exp $ */ /* * rf_diskthreads.h */ @@ -29,44 +29,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ -/* - * : - * Log: rf_diskthreads.h,v - * Revision 1.7 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.6 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.5 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.4 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.3 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.2 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.1 1996/05/18 19:55:58 jimz - * Initial revision - * - */ + /* * rf_diskthreads.h -- types and prototypes for disk thread system */ @@ -78,26 +41,17 @@ /* this is the information that a disk thread needs to do its job */ struct RF_DiskId_s { - RF_DiskQueue_t *queue; - RF_Raid_t *raidPtr; - RF_RaidDisk_t *disk; - int fd; /* file descriptor */ - RF_RowCol_t row, col; /* debug only */ -#ifdef SIMULATE - int state; -#endif /* SIMULATE */ + RF_DiskQueue_t *queue; + RF_Raid_t *raidPtr; + RF_RaidDisk_t *disk; + int fd; /* file descriptor */ + RF_RowCol_t row, col; /* debug only */ }; -int rf_ConfigureDiskThreads(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr); +int +rf_ConfigureDiskThreads(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr); -#ifdef SIMULATE -int rf_SetDiskIdle(RF_Raid_t *raidPtr, RF_RowCol_t r, RF_RowCol_t c); -int rf_ScanDiskQueues(RF_Raid_t *raidPtr); -void rf_simulator_complete_io(RF_DiskId_t *id); -void rf_PrintDiskStat(RF_Raid_t *raidPtr); -#else /* SIMULATE */ -int rf_ShutdownDiskThreads(RF_Raid_t *raidPtr); -#endif /* SIMULATE */ +int rf_ShutdownDiskThreads(RF_Raid_t * raidPtr); -#endif /* !_RF__RF_DISKTHREADS_H_ */ +#endif /* !_RF__RF_DISKTHREADS_H_ */ diff --git a/sys/dev/raidframe/rf_driver.c b/sys/dev/raidframe/rf_driver.c index f8db8f5baf0..8b30b825bed 100644 --- a/sys/dev/raidframe/rf_driver.c +++ b/sys/dev/raidframe/rf_driver.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_driver.c,v 1.1 1999/01/11 14:29:18 niklas Exp $ */ -/* $NetBSD: rf_driver.c,v 1.2 1998/11/13 13:45:15 drochner Exp $ */ +/* $OpenBSD: rf_driver.c,v 1.2 1999/02/16 00:02:41 niklas Exp $ */ +/* $NetBSD: rf_driver.c,v 1.6 1999/02/05 00:06:10 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -36,582 +36,19 @@ * ******************************************************************************/ -/* - * : - * Log: rf_driver.c,v - * Revision 1.147 1996/08/21 04:12:46 jimz - * added hook for starting out req_hist w/ more distributed values - * (currently not done) - * - * Revision 1.146 1996/07/29 14:05:12 jimz - * fix numPUs/numRUs confusion (everything is now numRUs) - * clean up some commenting, return values - * - * Revision 1.145 1996/07/28 20:31:39 jimz - * i386netbsd port - * true/false fixup - * - * Revision 1.144 1996/07/27 18:40:24 jimz - * cleanup sweep - * - * Revision 1.143 1996/07/22 21:11:53 jimz - * fix formatting on DoAccess error msg - * - * Revision 1.142 1996/07/19 16:10:06 jimz - * added call to rf_ResetDebugOptions() in rf_ConfigureDebug() - * - * Revision 1.141 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.140 1996/07/17 21:00:58 jimz - * clean up timer interface, tracing - * - * Revision 1.139 1996/07/15 05:40:41 jimz - * some recon datastructure cleanup - * better handling of multiple failures - * added undocumented double-recon test - * - * Revision 1.138 1996/07/11 19:08:00 jimz - * generalize reconstruction mechanism - * allow raid1 reconstructs via copyback (done with array - * quiesced, not online, therefore not disk-directed) - * - * Revision 1.137 1996/07/10 22:28:00 jimz - * get rid of obsolete row statuses (dead,degraded2) - * - * Revision 1.136 1996/06/17 14:38:33 jimz - * properly #if out RF_DEMO code - * fix bug in MakeConfig that was causing weird behavior - * in configuration routines (config was not zeroed at start) - * clean up genplot handling of stacks - * - * Revision 1.135 1996/06/17 03:20:32 jimz - * move out raidframe_attr_default - * don't monkey with stack sizes - * - * Revision 1.134 1996/06/14 23:15:38 jimz - * attempt to deal with thread GC problem - * - * Revision 1.133 1996/06/14 21:24:08 jimz - * new ConfigureEtimer init - * moved out timer vars - * - * Revision 1.132 1996/06/14 16:19:03 jimz - * remove include of pdllib.h (beginning of PDL cleanup) - * - * Revision 1.131 1996/06/14 14:35:24 jimz - * clean up dfstrace protection - * - * Revision 1.130 1996/06/14 14:16:09 jimz - * engine config is now array-specific - * - * Revision 1.129 1996/06/13 19:08:10 jimz - * add debug var to force keep_acc_totals on - * - * Revision 1.128 1996/06/11 10:57:08 jimz - * init recon_done_proc_mutex - * - * Revision 1.127 1996/06/10 14:18:58 jimz - * move user, throughput stats into per-array structure - * - * Revision 1.126 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.125 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.124 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.123 1996/06/05 19:38:32 jimz - * fixed up disk queueing types config - * added sstf disk queueing - * fixed exit bug on diskthreads (ref-ing bad mem) - * - * Revision 1.122 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.121 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.120 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.119 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.118 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.117 1996/05/30 16:28:33 jimz - * typo in rf_SignalQuiescenceLock() fixed - * - * Revision 1.116 1996/05/30 12:59:18 jimz - * make etimer happier, more portable - * - * Revision 1.115 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.114 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.113 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.112 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.111 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.110 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.109 1996/05/23 00:39:56 jimz - * demoMode -> rf_demoMode - * - * Revision 1.108 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.107 1996/05/21 14:30:04 jimz - * idler_desc_mutex should be ifndef SIMULATE - * - * Revision 1.106 1996/05/20 19:31:12 jimz - * add atomic debug (mutex and cond leak finder) stuff - * - * Revision 1.105 1996/05/20 16:12:45 jimz - * switch to rf_{mutex,cond}_{init,destroy} - * - * Revision 1.104 1996/05/18 20:09:41 jimz - * bit of cleanup to compile cleanly in kernel, once again - * - * Revision 1.103 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.102 1996/05/16 21:20:51 jimz - * use FREELIST stuff to manage access descriptors - * - * Revision 1.101 1996/05/16 14:21:10 jimz - * remove bogus copies from write path on user - * - * Revision 1.100 1996/05/15 22:33:54 jimz - * appropriately #ifdef cache stuff - * - * Revision 1.99 1996/05/08 21:34:41 jimz - * #if 0 ShutdownCache() and ConfigureCache() - * - * Revision 1.98 1996/05/08 21:01:24 jimz - * fixed up enum type names that were conflicting with other - * enums and function names (ie, "panic") - * future naming trends will be towards RF_ and rf_ for - * everything raidframe-related - * - * Revision 1.97 1996/05/07 19:02:58 wvcii - * corrected header comment of rf_DoAccess() - * reordered free of desc in FreeRaidAccDesc() The desc is now - * freed last. - * - * Revision 1.96 1996/05/07 17:40:50 jimz - * add doDebug - * - * Revision 1.95 1996/05/06 21:35:23 jimz - * fixed ordering of cleanup and removed extra decrement of configureCount - * - * Revision 1.94 1996/05/06 18:44:14 jimz - * reorder cleanup to not blow alloclist out from under various modules - * zero raidPtr contents on config - * - * Revision 1.93 1996/05/04 17:06:53 jimz - * Fail the I/O with ENOSPC if reading past end of the array in the kernel. - * - * Revision 1.92 1996/05/03 19:44:22 wvcii - * debug vars degDagDebug and enableAtomicRMW now defined - * in this file. - * - * Revision 1.91 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.90 1995/12/08 15:07:03 arw - * cache code cleanup - * - * Revision 1.89 1995/12/06 20:53:58 wvcii - * created debug var forceParityLogReint - * this variable forces reintegration of all parity logs at shutdown - * - * Revision 1.88 1995/12/01 15:59:10 root - * added copyright info - * - * Revision 1.87 1995/11/28 21:34:02 amiri - * modified SetReconfiguredMode so that it installs the - * spare table only if arch is declustered based on block designs - * - * Revision 1.86 1995/11/21 23:06:11 amiri - * added division by zero check in printing - * throughput stats. - * - * Revision 1.85 1995/11/19 16:27:25 wvcii - * disableParityVerify now defined locally, only read from config - * file for !KERNEL compiles - * - * Revision 1.84 1995/11/17 15:08:31 wvcii - * added debug var disableParityVerify - * used in RealLoopTest to disable parity verification - * - * Revision 1.83 1995/11/07 15:48:43 wvcii - * deleted debug vars: suppressAtomicRMW, enableRollAway, concatDagDebug - * deleted debug vars: debugSelectUnit, debugSelectBlock - * added debug var: enableAtomicRMW - * - * Revision 1.82 1995/10/18 19:28:45 amiri - * added support for reconstruction demos in the - * simulator, by updating some simulator - * variables in Faildisk. - * - * Revision 1.81 1995/10/09 18:36:33 jimz - * move rf_StopThroughputStats() into FreeAccDesc() - * changed throughput output print format - * added user-level copy to write path to emulate kernel hack - * - * Revision 1.80 1995/10/09 18:07:47 wvcii - * moved call to rf_StopThroughputStats to rf_states.c - * - * Revision 1.79 1995/10/09 17:38:53 jimz - * quiesce an array for user-level testing before shutting it down - * (should this also be done in the kernel?) - * - * Revision 1.78 1995/10/09 15:35:43 wvcii - * added code to measure throughput in user mode - * - * Revision 1.77 1995/10/05 06:18:59 jimz - * Changed DDEventRequest() to take additional arg, used by simulator - * to cache diskid so queue length can be decremented on io complete - * (this is a hack to get around the fact that the event mechanism - * assumes it can dereference arbitrary handles on enqueued events) - * - * Revision 1.76 1995/10/04 07:25:10 jimz - * turn off bigstacks by default - * - * Revision 1.75 1995/10/04 07:24:34 jimz - * code for bigstacks in user process - * - * Revision 1.74 1995/09/26 21:42:51 wvcii - * removed calls to ConfigureCache, ShutdownCache when building kernel - * kernel currently does not support any cached architectures - * - * Revision 1.73 1995/09/20 21:05:35 jimz - * add missing unit arg to IO_BUF_ERR() in non-kernel case - * - * Revision 1.72 1995/09/19 23:02:44 jimz - * call RF_DKU_END_IO in the appropriate places - * - * Revision 1.71 1995/09/07 19:02:31 jimz - * mods to get raidframe to compile and link - * in kernel environment - * - * Revision 1.70 1995/09/06 19:24:01 wvcii - * added debug vars enableRollAway and debugRecovery - * - * Revision 1.69 1995/08/24 19:25:36 rachad - * Fixes to LSS GC in the simulater - * - * Revision 1.68 1995/07/28 21:43:42 robby - * checkin after leaving for Rice. Bye - * - * Revision 1.67 1995/07/26 18:06:52 cfb - * *** empty log message *** - * - * Revision 1.66 1995/07/26 03:25:24 robby - * fixed accesses mutex and updated call to ConfigureCache - * - * Revision 1.65 1995/07/25 14:36:52 rachad - * *** empty log message *** - * - * Revision 1.64 1995/07/21 19:29:05 robby - * added total_accesses - * - * Revision 1.63 1995/07/20 19:43:35 cfb - * *** empty log message *** - * - * Revision 1.62 1995/07/20 16:10:24 rachad - * *** empty log message *** - * - * Revision 1.61 1995/07/20 03:36:53 rachad - * Added suport for cache warming - * - * Revision 1.60 1995/07/17 22:31:31 cfb - * *** empty log message *** - * - * Revision 1.59 1995/07/16 17:02:23 cfb - * *** empty log message *** - * - * Revision 1.58 1995/07/16 15:19:27 cfb - * *** empty log message *** - * - * Revision 1.57 1995/07/16 03:17:01 cfb - * *** empty log message *** - * - * Revision 1.56 1995/07/13 16:11:59 cfb - * *** empty log message *** - * - * Revision 1.55 1995/07/13 15:42:40 cfb - * added cacheDebug variable ... - * - * Revision 1.54 1995/07/13 14:28:27 rachad - * *** empty log message *** - * - * Revision 1.53 1995/07/10 21:48:52 robby - * added virtualStripingWarnings - * - * Revision 1.52 1995/07/10 20:41:13 rachad - * *** empty log message *** - * - * Revision 1.51 1995/07/09 19:46:49 cfb - * Added cache Shutdown - * - * Revision 1.50 1995/07/08 21:38:53 rachad - * Added support for interactive traces - * in the simulator - * - * Revision 1.49 1995/07/08 18:05:39 rachad - * Linked up Claudsons code with the real cache - * - * Revision 1.48 1995/07/07 16:00:22 cfb - * Added initialization of cacheDesc to AllocRaidAccDesc - * - * Revision 1.47 1995/07/06 14:22:37 rachad - * Merge complete - * - * Revision 1.46.50.2 1995/06/21 17:48:30 robby - * test - * - * Revision 1.46.50.1 1995/06/21 17:34:49 robby - * branching to work on "meta-dag" capabilities - * - * Revision 1.46.10.5 1995/07/03 21:58:34 holland - * added support for suppressing both stripe locks & large writes - * - * Revision 1.46.10.4 1995/06/27 03:42:48 holland - * typo fix - * - * Revision 1.46.10.3 1995/06/27 03:31:42 holland - * prototypes - * - * Revision 1.46.10.2 1995/06/27 03:17:57 holland - * fixed callback bug in kernel rf_DoAccess - * - * Revision 1.46.10.1 1995/06/25 14:32:44 holland - * initial checkin on new branch - * - * Revision 1.46 1995/06/13 17:52:41 holland - * added UserStats stuff - * - * Revision 1.45 1995/06/13 16:03:41 rachad - * *** empty log message *** - * - * Revision 1.44 1995/06/12 15:54:40 rachad - * Added garbege collection for log structured storage - * - * Revision 1.43 1995/06/09 18:01:09 holland - * various changes related to in-kernel recon, multiple-row arrays, - * trace extraction from kernel, etc. - * - * Revision 1.42 1995/06/08 19:52:28 rachad - * *** empty log message *** - * - * Revision 1.41 1995/06/08 00:11:49 robby - * added a debug variable -- showVirtualSizeRequirements - * - * Revision 1.40 1995/06/05 00:33:30 holland - * protectedSectors bug fix - * - * Revision 1.39 1995/06/01 22:45:03 holland - * made compilation of parity logging and virtual striping - * stuff conditional on some constants defined in rf_archs.h - * - * Revision 1.38 1995/06/01 21:52:37 holland - * replaced NULL sizes in calls to Free() by -1, and caused this - * to suppress the size-mismatch error - * - * Revision 1.37 1995/05/26 20:04:54 wvcii - * modified parity logging debug vars - * - * Revision 1.36 95/05/21 15:32:41 wvcii - * added debug vars: parityLogDebug, numParityRegions, numParityLogs, - * numReintegrationThreads - * - * Revision 1.35 95/05/19 20:58:21 holland - * cleanups on error cases in rf_DoAccess - * - * Revision 1.34 1995/05/16 17:35:53 holland - * added rf_copyback_in_progress. this is debug-only. - * - * Revision 1.33 1995/05/15 12:25:35 holland - * bug fix in test code: no stripe locks were getting acquired in RAID0 mode - * - * Revision 1.32 1995/05/10 18:54:12 holland - * bug fixes related to deadlock problem at time of disk failure - * eliminated read-op-write code - * beefed up parity checking in loop test - * various small changes & new ASSERTs - * - * Revision 1.31 1995/05/02 22:49:02 holland - * add shutdown calls for each architecture - * - * Revision 1.30 1995/05/01 14:43:37 holland - * merged changes from Bill - * - * Revision 1.29 1995/05/01 13:28:00 holland - * parity range locks, locking disk requests, recon+parityscan in kernel, etc. - * - * Revision 1.28 1995/04/24 13:25:51 holland - * rewrite to move disk queues, recon, & atomic RMW to kernel - * - * Revision 1.27 1995/04/06 14:47:56 rachad - * merge completed - * - * Revision 1.26 1995/04/03 20:32:35 rachad - * added reconstruction to simulator - * - * Revision 1.25.10.2 1995/04/03 20:41:00 holland - * misc changes related to distributed sparing - * - * Revision 1.25.10.1 1995/03/17 20:04:01 holland - * initial checkin on new branch - * - * Revision 1.25 1995/03/15 20:34:30 holland - * changes for distributed sparing. - * - * Revision 1.24 1995/03/09 19:53:05 rachad - * *** empty log message *** - * - * Revision 1.23 1995/03/03 18:36:16 rachad - * Simulator mechanism added - * - * Revision 1.22 1995/03/01 20:25:48 holland - * kernelization changes - * - * Revision 1.21 1995/02/17 19:39:56 holland - * added size param to all calls to Free(). - * this is ignored at user level, but necessary in the kernel. - * - * Revision 1.20 1995/02/17 13:37:49 holland - * kernelization changes -- not yet complete - * - * Revision 1.19 1995/02/10 18:08:07 holland - * fixed a few things I broke during kernelization - * - * Revision 1.18 1995/02/10 17:34:10 holland - * kernelization changes - * - * Revision 1.17 1995/02/04 15:51:35 holland - * kernelization changes - * - * Revision 1.16 1995/02/03 22:31:36 holland - * many changes related to kernelization - * - * Revision 1.15 1995/02/01 15:13:05 holland - * moved #include of general.h out of raid.h and into each file - * - * Revision 1.14 1995/02/01 14:25:19 holland - * began changes for kernelization: - * changed all instances of mutex_t and cond_t to DECLARE macros - * converted configuration code to use config structure - * - * Revision 1.13 1995/01/30 14:53:46 holland - * extensive changes related to making DoIO non-blocking - * - * Revision 1.12 1995/01/25 00:26:21 holland - * eliminated support for aio - * - * Revision 1.11 1995/01/24 23:58:46 holland - * multi-way recon XOR, plus various small changes - * - * Revision 1.10 1995/01/11 19:27:02 holland - * various changes related to performance tuning - * - * Revision 1.9 1994/12/05 15:29:09 holland - * added trace run time limitation (maxTraceRunTimeSec) - * - * Revision 1.8 1994/12/05 04:18:12 holland - * various new control vars in the config file - * - * Revision 1.7 1994/11/29 23:11:36 holland - * tracerec bug on dag retry fixed - * - * Revision 1.6 1994/11/29 22:11:38 danner - * holland updates - * - * Revision 1.5 1994/11/29 21:09:47 danner - * Detailed tracing support (holland). - * - * Revision 1.4 1994/11/29 20:36:02 danner - * Added suppressAtomicRMW option. - * - * Revision 1.3 1994/11/21 15:34:06 danner - * Added ConfigureAllocList() call. - * - */ - -#ifdef _KERNEL -#define KERNEL -#endif - -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) #include <sys/types.h> #include <sys/param.h> #include <sys/systm.h> #include <sys/ioctl.h> #include <sys/fcntl.h> -#ifdef __NETBSD__ +#ifdef __NetBSD__ #include <sys/vnode.h> #endif -#endif -#ifdef KERNEL -#if !defined(__NetBSD__) && !defined(__OpenBSD__) -#include <dkusage.h> -#include <dfstrace.h> -#endif /* !__NetBSD__ && !__OpenBSD__ */ -#endif /* KERNEL */ #include "rf_archs.h" #include "rf_threadstuff.h" -#ifndef KERNEL -#include <stdio.h> -#include <stdlib.h> -#endif /* KERNEL */ #include <sys/errno.h> @@ -651,20 +88,7 @@ #include "rf_sys.h" #include "rf_cpuutil.h" -#ifdef SIMULATE -#include "rf_diskevent.h" -#endif /* SIMULATE */ - -#ifdef KERNEL #include <sys/buf.h> -#if !defined(__NetBSD__) && !defined(__OpenBSD__) -#include <io/common/devdriver.h> -#endif /* !__NetBSD__ && !__OpenBSD__ */ - -#if DFSTRACE > 0 -#include <sys/dfs_log.h> -#include <sys/dfstracebuf.h> -#endif /* DFSTRACE > 0 */ #if DKUSAGE > 0 #include <sys/dkusage.h> @@ -672,12 +96,7 @@ #include <io/cam/dec_cam.h> #include <io/cam/cam.h> #include <io/cam/pdrv.h> -#endif /* DKUSAGE > 0 */ -#endif /* KERNEL */ - -#if RF_DEMO > 0 -#include "rf_demo.h" -#endif /* RF_DEMO > 0 */ +#endif /* DKUSAGE > 0 */ /* rad == RF_RaidAccessDesc_t */ static RF_FreeList_t *rf_rad_freelist; @@ -686,12 +105,12 @@ static RF_FreeList_t *rf_rad_freelist; #define RF_RAD_INITIAL 32 /* debug variables */ -char rf_panicbuf[2048]; /* a buffer to hold an error msg when we panic */ +char rf_panicbuf[2048]; /* a buffer to hold an error msg when we panic */ /* main configuration routines */ static int raidframe_booted = 0; -static void rf_ConfigureDebug(RF_Config_t *cfgPtr); +static void rf_ConfigureDebug(RF_Config_t * cfgPtr); static void set_debug_option(char *name, long val); static void rf_UnconfigureArray(void); static int init_rad(RF_RaidAccessDesc_t *); @@ -700,26 +119,15 @@ static void rf_ShutdownRDFreeList(void *); static int rf_ConfigureRDFreeList(RF_ShutdownList_t **); -RF_DECLARE_MUTEX(rf_printf_mutex) /* debug only: avoids interleaved printfs by different stripes */ -RF_DECLARE_GLOBAL_THREADID /* declarations for threadid.h */ +RF_DECLARE_MUTEX(rf_printf_mutex) /* debug only: avoids interleaved + * printfs by different stripes */ +RF_DECLARE_GLOBAL_THREADID /* declarations for threadid.h */ -#if !defined(KERNEL) && !defined(SIMULATE) -static int rf_InitThroughputStats(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, RF_Config_t *cfgPtr); -static void rf_StopThroughputStats(RF_Raid_t *raidPtr); -static void rf_PrintThroughputStats(RF_Raid_t *raidPtr); -#endif /* !KERNEL && !SIMULATE */ -#ifdef KERNEL #define SIGNAL_QUIESCENT_COND(_raid_) wakeup(&((_raid_)->accesses_suspended)) -#if !defined(__NetBSD__) && !defined(__OpenBSD__) -#define WAIT_FOR_QUIESCENCE(_raid_) \ - mpsleep(&((_raid_)->accesses_suspended), PZERO, "raidframe quiesce", 0, \ - (void *) simple_lock_addr((_raid_)->access_suspend_mutex), MS_LOCK_SIMPLE) -#else #define WAIT_FOR_QUIESCENCE(_raid_) \ tsleep(&((_raid_)->accesses_suspended),PRIBIO|PCATCH,"raidframe quiesce", 0); -#endif #if DKUSAGE > 0 #define IO_BUF_ERR(bp, err, unit) { \ bp->b_flags |= B_ERROR; \ @@ -736,215 +144,172 @@ static void rf_PrintThroughputStats(RF_Raid_t *raidPtr); RF_DKU_END_IO(unit); \ biodone(bp); \ } -#endif /* DKUSAGE > 0 */ -#else /* KERNEL */ - -#define SIGNAL_QUIESCENT_COND(_raid_) RF_SIGNAL_COND((_raid_)->quiescent_cond) -#define WAIT_FOR_QUIESCENCE(_raid_) RF_WAIT_COND((_raid_)->quiescent_cond, (_raid_)->access_suspend_mutex) -#define IO_BUF_ERR(bp, err, unit) - -#endif /* KERNEL */ +#endif /* DKUSAGE > 0 */ -static int configureCount=0; /* number of active configurations */ -static int isconfigged=0; /* is basic raidframe (non per-array) stuff configged */ -RF_DECLARE_STATIC_MUTEX(configureMutex) /* used to lock the configuration stuff */ + static int configureCount = 0; /* number of active configurations */ + static int isconfigged = 0; /* is basic raidframe (non per-array) + * stuff configged */ +RF_DECLARE_STATIC_MUTEX(configureMutex) /* used to lock the configuration + * stuff */ + static RF_ShutdownList_t *globalShutdown; /* non array-specific + * stuff */ -static RF_ShutdownList_t *globalShutdown; /* non array-specific stuff */ - -static int rf_ConfigureRDFreeList(RF_ShutdownList_t **listp); + static int rf_ConfigureRDFreeList(RF_ShutdownList_t ** listp); /* called at system boot time */ -int rf_BootRaidframe() + int rf_BootRaidframe() { -#if 0 - long stacksize; -#endif - int rc; + int rc; - if (raidframe_booted) - return(EBUSY); - raidframe_booted = 1; + if (raidframe_booted) + return (EBUSY); + raidframe_booted = 1; #if RF_DEBUG_ATOMIC > 0 - rf_atent_init(); -#endif /* RF_DEBUG_ATOMIC > 0 */ - - rf_setup_threadid(); - rf_assign_threadid(); - -#if !defined(KERNEL) && !defined(SIMULATE) - if (RF_THREAD_ATTR_CREATE(raidframe_attr_default)) { - fprintf(stderr, "Unable to create default thread attr\n"); - exit(1); - } -#if 0 - stacksize = RF_THREAD_ATTR_GETSTACKSIZE(raidframe_attr_default); - if (stacksize < 0) { - fprintf(stderr, "Unable to get stack size of default thread attr\n"); - exit(1); - } - stacksize += 16384; - rc = RF_THREAD_ATTR_SETSTACKSIZE(raidframe_attr_default, stacksize); - if (rc) { - fprintf(stderr, "Unable to set stack size of default thread attr\n"); - exit(1); - } -#endif /* 0 */ -#endif /* !KERNEL && !SIMULATE */ - rc = rf_mutex_init(&configureMutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - RF_PANIC(); - } - configureCount = 0; - isconfigged = 0; - globalShutdown = NULL; - return(0); -} + rf_atent_init(); +#endif /* RF_DEBUG_ATOMIC > 0 */ + + rf_setup_threadid(); + rf_assign_threadid(); + rc = rf_mutex_init(&configureMutex); + if (rc) { + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + RF_PANIC(); + } + configureCount = 0; + isconfigged = 0; + globalShutdown = NULL; + return (0); +} /* * This function is really just for debugging user-level stuff: it * frees up all memory, other RAIDframe resources which might otherwise * be kept around. This is used with systems like "sentinel" to detect * memory leaks. */ -int rf_UnbootRaidframe() +int +rf_UnbootRaidframe() { - int rc; + int rc; RF_LOCK_MUTEX(configureMutex); if (configureCount) { RF_UNLOCK_MUTEX(configureMutex); - return(EBUSY); + return (EBUSY); } raidframe_booted = 0; RF_UNLOCK_MUTEX(configureMutex); rc = rf_mutex_destroy(&configureMutex); if (rc) { RF_ERRORMSG3("Unable to destroy mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + __LINE__, rc); RF_PANIC(); } #if RF_DEBUG_ATOMIC > 0 rf_atent_shutdown(); -#endif /* RF_DEBUG_ATOMIC > 0 */ - return(0); +#endif /* RF_DEBUG_ATOMIC > 0 */ + return (0); } - /* * Called whenever an array is shutdown */ -static void rf_UnconfigureArray() +static void +rf_UnconfigureArray() { - int rc; - - RF_LOCK_MUTEX(configureMutex); - if (--configureCount == 0) { /* if no active configurations, shut everything down */ - isconfigged = 0; - - rc = rf_ShutdownList(&globalShutdown); - if (rc) { - RF_ERRORMSG1("RAIDFRAME: unable to do global shutdown, rc=%d\n", rc); - } - - rf_shutdown_threadid(); - - /* - * We must wait until now, because the AllocList module - * uses the DebugMem module. - */ - if (rf_memDebug) - rf_print_unfreed(); - } - RF_UNLOCK_MUTEX(configureMutex); -} + int rc; + RF_LOCK_MUTEX(configureMutex); + if (--configureCount == 0) { /* if no active configurations, shut + * everything down */ + isconfigged = 0; + + rc = rf_ShutdownList(&globalShutdown); + if (rc) { + RF_ERRORMSG1("RAIDFRAME: unable to do global shutdown, rc=%d\n", rc); + } + rf_shutdown_threadid(); + + /* + * We must wait until now, because the AllocList module + * uses the DebugMem module. + */ + if (rf_memDebug) + rf_print_unfreed(); + } + RF_UNLOCK_MUTEX(configureMutex); +} /* * Called to shut down an array. */ -int rf_Shutdown(raidPtr) - RF_Raid_t *raidPtr; +int +rf_Shutdown(raidPtr) + RF_Raid_t *raidPtr; { -#if !defined(__NetBSD__) && !defined(__OpenBSD__) && !defined(_KERNEL) - int rc; -#endif - int r,c; + int r, c; -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) - struct proc *p; -#endif + struct proc *p; + + if (!raidPtr->valid) { + RF_ERRORMSG("Attempt to shut down unconfigured RAIDframe driver. Aborting shutdown\n"); + return (EINVAL); + } + /* + * wait for outstanding IOs to land + * As described in rf_raid.h, we use the rad_freelist lock + * to protect the per-array info about outstanding descs + * since we need to do freelist locking anyway, and this + * cuts down on the amount of serialization we've got going + * on. + */ + RF_FREELIST_DO_LOCK(rf_rad_freelist); + if (raidPtr->waitShutdown) { + RF_FREELIST_DO_UNLOCK(rf_rad_freelist); + return (EBUSY); + } + raidPtr->waitShutdown = 1; + while (raidPtr->nAccOutstanding) { + RF_WAIT_COND(raidPtr->outstandingCond, RF_FREELIST_MUTEX_OF(rf_rad_freelist)); + } + RF_FREELIST_DO_UNLOCK(rf_rad_freelist); - if (!raidPtr->valid) { - RF_ERRORMSG("Attempt to shut down unconfigured RAIDframe driver. Aborting shutdown\n"); - return(EINVAL); - } - - /* - * wait for outstanding IOs to land - * As described in rf_raid.h, we use the rad_freelist lock - * to protect the per-array info about outstanding descs - * since we need to do freelist locking anyway, and this - * cuts down on the amount of serialization we've got going - * on. - */ - RF_FREELIST_DO_LOCK(rf_rad_freelist); - if (raidPtr->waitShutdown) { - RF_FREELIST_DO_UNLOCK(rf_rad_freelist); - return(EBUSY); - } - raidPtr->waitShutdown = 1; - while (raidPtr->nAccOutstanding) { - RF_WAIT_COND(raidPtr->outstandingCond, RF_FREELIST_MUTEX_OF(rf_rad_freelist)); - } - RF_FREELIST_DO_UNLOCK(rf_rad_freelist); - -#if !defined(KERNEL) && !defined(SIMULATE) - rf_PrintThroughputStats(raidPtr); -#endif /* !KERNEL && !SIMULATE */ - - raidPtr->valid = 0; - -#if !defined(KERNEL) && !defined(SIMULATE) - rf_TerminateDiskQueues(raidPtr); /* tell all disk queues to release any waiting threads */ - rf_ShutdownDiskThreads(raidPtr); /* wait for all threads to exit */ -#endif /* !KERNEL && !SIMULATE */ - -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) - /* We take this opportunity to close the vnodes like we should.. */ - - p = raidPtr->proc; /* XXX */ - - for(r=0;r<raidPtr->numRow;r++) { - for(c=0;c<raidPtr->numCol;c++) { - printf("Closing vnode for row: %d col: %d\n",r,c); - if (raidPtr->raid_cinfo[r][c].ci_vp) { - (void)vn_close(raidPtr->raid_cinfo[r][c].ci_vp, - FREAD|FWRITE, p->p_ucred, p); - } else { - printf("vnode was NULL\n"); - } - - } - } - for(r=0;r<raidPtr->numSpare;r++) { - printf("Closing vnode for spare: %d\n",r); - if (raidPtr->raid_cinfo[0][raidPtr->numCol+r].ci_vp) { - (void)vn_close(raidPtr->raid_cinfo[0][raidPtr->numCol+r].ci_vp, - FREAD|FWRITE, p->p_ucred, p); - } else { - printf("vnode was NULL\n"); - } - } + raidPtr->valid = 0; -#endif + /* We take this opportunity to close the vnodes like we should.. */ + + p = raidPtr->proc; /* XXX */ + + for (r = 0; r < raidPtr->numRow; r++) { + for (c = 0; c < raidPtr->numCol; c++) { + printf("Closing vnode for row: %d col: %d\n", r, c); + if (raidPtr->raid_cinfo[r][c].ci_vp) { + (void) vn_close(raidPtr->raid_cinfo[r][c].ci_vp, + FREAD | FWRITE, p->p_ucred, p); + } else { + printf("vnode was NULL\n"); + } + + } + } + for (r = 0; r < raidPtr->numSpare; r++) { + printf("Closing vnode for spare: %d\n", r); + if (raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp) { + (void) vn_close(raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp, + FREAD | FWRITE, p->p_ucred, p); + } else { + printf("vnode was NULL\n"); + } + } + - rf_ShutdownList(&raidPtr->shutdownList); - rf_UnconfigureArray(); + rf_ShutdownList(&raidPtr->shutdownList); - return(0); + rf_UnconfigureArray(); + + return (0); } #define DO_INIT_CONFIGURE(f) { \ @@ -992,774 +357,679 @@ int rf_Shutdown(raidPtr) } \ } -int rf_Configure(raidPtr, cfgPtr) - RF_Raid_t *raidPtr; - RF_Config_t *cfgPtr; +int +rf_Configure(raidPtr, cfgPtr) + RF_Raid_t *raidPtr; + RF_Config_t *cfgPtr; { - RF_RowCol_t row, col; - int i, rc; - int unit; - struct proc *p; - - if (raidPtr->valid) { - RF_ERRORMSG("RAIDframe configuration not shut down. Aborting configure.\n"); - return(EINVAL); - } - - RF_LOCK_MUTEX(configureMutex); - configureCount++; - if (isconfigged == 0) { - rc = rf_create_managed_mutex(&globalShutdown, &rf_printf_mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownList(&globalShutdown); - return(rc); - } - - /* initialize globals */ - printf("RAIDFRAME: protectedSectors is %ld\n",rf_protectedSectors); - - rf_clear_debug_print_buffer(); - - DO_INIT_CONFIGURE(rf_ConfigureAllocList); - DO_INIT_CONFIGURE(rf_ConfigureEtimer); - /* - * Yes, this does make debugging general to the whole system instead - * of being array specific. Bummer, drag. - */ - rf_ConfigureDebug(cfgPtr); - DO_INIT_CONFIGURE(rf_ConfigureDebugMem); -#ifdef SIMULATE - rf_default_disk_names(); - DO_INIT_CONFIGURE(rf_DDEventInit); -#endif /* SIMULATE */ - DO_INIT_CONFIGURE(rf_ConfigureAccessTrace); - DO_INIT_CONFIGURE(rf_ConfigureMapModule); - DO_INIT_CONFIGURE(rf_ConfigureReconEvent); - DO_INIT_CONFIGURE(rf_ConfigureCallback); - DO_INIT_CONFIGURE(rf_ConfigureMemChunk); - DO_INIT_CONFIGURE(rf_ConfigureRDFreeList); - DO_INIT_CONFIGURE(rf_ConfigureNWayXor); - DO_INIT_CONFIGURE(rf_ConfigureStripeLockFreeList); - DO_INIT_CONFIGURE(rf_ConfigureMCPair); -#ifndef SIMULATE + RF_RowCol_t row, col; + int i, rc; + int unit; + struct proc *p; + + if (raidPtr->valid) { + RF_ERRORMSG("RAIDframe configuration not shut down. Aborting configure.\n"); + return (EINVAL); + } + RF_LOCK_MUTEX(configureMutex); + configureCount++; + if (isconfigged == 0) { + rc = rf_create_managed_mutex(&globalShutdown, &rf_printf_mutex); + if (rc) { + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + rf_ShutdownList(&globalShutdown); + return (rc); + } + /* initialize globals */ + printf("RAIDFRAME: protectedSectors is %ld\n", rf_protectedSectors); + + rf_clear_debug_print_buffer(); + + DO_INIT_CONFIGURE(rf_ConfigureAllocList); + DO_INIT_CONFIGURE(rf_ConfigureEtimer); + /* + * Yes, this does make debugging general to the whole system instead + * of being array specific. Bummer, drag. + */ + rf_ConfigureDebug(cfgPtr); + DO_INIT_CONFIGURE(rf_ConfigureDebugMem); + DO_INIT_CONFIGURE(rf_ConfigureAccessTrace); + DO_INIT_CONFIGURE(rf_ConfigureMapModule); + DO_INIT_CONFIGURE(rf_ConfigureReconEvent); + DO_INIT_CONFIGURE(rf_ConfigureCallback); + DO_INIT_CONFIGURE(rf_ConfigureMemChunk); + DO_INIT_CONFIGURE(rf_ConfigureRDFreeList); + DO_INIT_CONFIGURE(rf_ConfigureNWayXor); + DO_INIT_CONFIGURE(rf_ConfigureStripeLockFreeList); + DO_INIT_CONFIGURE(rf_ConfigureMCPair); #if !defined(__NetBSD__) && !defined(__OpenBSD__) - DO_INIT_CONFIGURE(rf_ConfigureCamLayer); -#endif -#endif /* !SIMULATE */ - DO_INIT_CONFIGURE(rf_ConfigureDAGs); - DO_INIT_CONFIGURE(rf_ConfigureDAGFuncs); - DO_INIT_CONFIGURE(rf_ConfigureDebugPrint); - DO_INIT_CONFIGURE(rf_ConfigureReconstruction); - DO_INIT_CONFIGURE(rf_ConfigureCopyback); - DO_INIT_CONFIGURE(rf_ConfigureDiskQueueSystem); - DO_INIT_CONFIGURE(rf_ConfigureCpuMonitor); - isconfigged = 1; - } - RF_UNLOCK_MUTEX(configureMutex); - - /* - * Null out the entire raid descriptor to avoid problems when we reconfig. - * This also clears the valid bit. - */ - /* XXX this clearing should be moved UP to outside of here.... that, or - rf_Configure() needs to take more arguments... XXX */ -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) - unit = raidPtr->raidid; - p = raidPtr->proc; /* XXX save these... */ -#endif - bzero((char *)raidPtr, sizeof(RF_Raid_t)); -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) - raidPtr->raidid = unit; - raidPtr->proc = p; /* XXX and then recover them..*/ + DO_INIT_CONFIGURE(rf_ConfigureCamLayer); #endif - DO_RAID_MUTEX(&raidPtr->mutex); - /* set up the cleanup list. Do this after ConfigureDebug so that value of memDebug will be set */ - - rf_MakeAllocList(raidPtr->cleanupList); - if (raidPtr->cleanupList == NULL) { - DO_RAID_FAIL(); - return(ENOMEM); - } - - rc = rf_ShutdownCreate(&raidPtr->shutdownList, - (void (*)(void *))rf_FreeAllocList, - raidPtr->cleanupList); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - DO_RAID_FAIL(); - return(rc); - } - - raidPtr->numRow = cfgPtr->numRow; - raidPtr->numCol = cfgPtr->numCol; - raidPtr->numSpare = cfgPtr->numSpare; - -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) - /* XXX we don't even pretend to support more than one row - in the kernel... */ - if (raidPtr->numRow != 1) { - RF_ERRORMSG("Only one row supported in kernel.\n"); - DO_RAID_FAIL(); - return(EINVAL); - } -#endif - - - - RF_CallocAndAdd(raidPtr->status, raidPtr->numRow, sizeof(RF_RowStatus_t), - (RF_RowStatus_t *), raidPtr->cleanupList); - if (raidPtr->status == NULL) { - DO_RAID_FAIL(); - return(ENOMEM); - } - - RF_CallocAndAdd(raidPtr->reconControl, raidPtr->numRow, - sizeof(RF_ReconCtrl_t *), (RF_ReconCtrl_t **), raidPtr->cleanupList); - if (raidPtr->reconControl == NULL) { - DO_RAID_FAIL(); - return(ENOMEM); - } - for (i=0; i<raidPtr->numRow; i++) { - raidPtr->status[i] = rf_rs_optimal; - raidPtr->reconControl[i] = NULL; - } - - DO_RAID_INIT_CONFIGURE(rf_ConfigureEngine); -#if !defined(KERNEL) && !defined(SIMULATE) - DO_RAID_INIT_CONFIGURE(rf_InitThroughputStats); -#endif /* !KERNEL && !SIMULATE */ + DO_INIT_CONFIGURE(rf_ConfigureDAGs); + DO_INIT_CONFIGURE(rf_ConfigureDAGFuncs); + DO_INIT_CONFIGURE(rf_ConfigureDebugPrint); + DO_INIT_CONFIGURE(rf_ConfigureReconstruction); + DO_INIT_CONFIGURE(rf_ConfigureCopyback); + DO_INIT_CONFIGURE(rf_ConfigureDiskQueueSystem); + DO_INIT_CONFIGURE(rf_ConfigureCpuMonitor); + isconfigged = 1; + } + RF_UNLOCK_MUTEX(configureMutex); - DO_RAID_INIT_CONFIGURE(rf_ConfigureStripeLocks); + /* + * Null out the entire raid descriptor to avoid problems when we reconfig. + * This also clears the valid bit. + */ + /* XXX this clearing should be moved UP to outside of here.... that, + * or rf_Configure() needs to take more arguments... XXX */ + unit = raidPtr->raidid; + p = raidPtr->proc; /* XXX save these... */ + bzero((char *) raidPtr, sizeof(RF_Raid_t)); + raidPtr->raidid = unit; + raidPtr->proc = p; /* XXX and then recover them.. */ + DO_RAID_MUTEX(&raidPtr->mutex); + /* set up the cleanup list. Do this after ConfigureDebug so that + * value of memDebug will be set */ + + rf_MakeAllocList(raidPtr->cleanupList); + if (raidPtr->cleanupList == NULL) { + DO_RAID_FAIL(); + return (ENOMEM); + } + rc = rf_ShutdownCreate(&raidPtr->shutdownList, + (void (*) (void *)) rf_FreeAllocList, + raidPtr->cleanupList); + if (rc) { + RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", + __FILE__, __LINE__, rc); + DO_RAID_FAIL(); + return (rc); + } + raidPtr->numRow = cfgPtr->numRow; + raidPtr->numCol = cfgPtr->numCol; + raidPtr->numSpare = cfgPtr->numSpare; + + /* XXX we don't even pretend to support more than one row in the + * kernel... */ + if (raidPtr->numRow != 1) { + RF_ERRORMSG("Only one row supported in kernel.\n"); + DO_RAID_FAIL(); + return (EINVAL); + } + RF_CallocAndAdd(raidPtr->status, raidPtr->numRow, sizeof(RF_RowStatus_t), + (RF_RowStatus_t *), raidPtr->cleanupList); + if (raidPtr->status == NULL) { + DO_RAID_FAIL(); + return (ENOMEM); + } + RF_CallocAndAdd(raidPtr->reconControl, raidPtr->numRow, + sizeof(RF_ReconCtrl_t *), (RF_ReconCtrl_t **), raidPtr->cleanupList); + if (raidPtr->reconControl == NULL) { + DO_RAID_FAIL(); + return (ENOMEM); + } + for (i = 0; i < raidPtr->numRow; i++) { + raidPtr->status[i] = rf_rs_optimal; + raidPtr->reconControl[i] = NULL; + } - DO_RAID_COND(&raidPtr->outstandingCond); + DO_RAID_INIT_CONFIGURE(rf_ConfigureEngine); + DO_RAID_INIT_CONFIGURE(rf_ConfigureStripeLocks); - raidPtr->nAccOutstanding = 0; - raidPtr->waitShutdown = 0; + DO_RAID_COND(&raidPtr->outstandingCond); - DO_RAID_MUTEX(&raidPtr->access_suspend_mutex); - DO_RAID_COND(&raidPtr->quiescent_cond); + raidPtr->nAccOutstanding = 0; + raidPtr->waitShutdown = 0; - DO_RAID_COND(&raidPtr->waitForReconCond); + DO_RAID_MUTEX(&raidPtr->access_suspend_mutex); + DO_RAID_COND(&raidPtr->quiescent_cond); - DO_RAID_MUTEX(&raidPtr->recon_done_proc_mutex); - DO_RAID_INIT_CONFIGURE(rf_ConfigureDisks); - DO_RAID_INIT_CONFIGURE(rf_ConfigureSpareDisks); - /* do this after ConfigureDisks & ConfigureSpareDisks to be sure dev no. is set */ - DO_RAID_INIT_CONFIGURE(rf_ConfigureDiskQueues); -#ifndef KERNEL - DO_RAID_INIT_CONFIGURE(rf_ConfigureDiskThreads); -#endif /* !KERNEL */ + DO_RAID_COND(&raidPtr->waitForReconCond); - DO_RAID_INIT_CONFIGURE(rf_ConfigureLayout); + DO_RAID_MUTEX(&raidPtr->recon_done_proc_mutex); + DO_RAID_INIT_CONFIGURE(rf_ConfigureDisks); + DO_RAID_INIT_CONFIGURE(rf_ConfigureSpareDisks); + /* do this after ConfigureDisks & ConfigureSpareDisks to be sure dev + * no. is set */ + DO_RAID_INIT_CONFIGURE(rf_ConfigureDiskQueues); - DO_RAID_INIT_CONFIGURE(rf_ConfigurePSStatus); + DO_RAID_INIT_CONFIGURE(rf_ConfigureLayout); - for(row=0;row<raidPtr->numRow;row++) { - for(col=0;col<raidPtr->numCol;col++) { - /* - * XXX better distribution - */ - raidPtr->hist_diskreq[row][col] = 0; - } - } + DO_RAID_INIT_CONFIGURE(rf_ConfigurePSStatus); - if (rf_keepAccTotals) { - raidPtr->keep_acc_totals = 1; - } + for (row = 0; row < raidPtr->numRow; row++) { + for (col = 0; col < raidPtr->numCol; col++) { + /* + * XXX better distribution + */ + raidPtr->hist_diskreq[row][col] = 0; + } + } - rf_StartUserStats(raidPtr); + if (rf_keepAccTotals) { + raidPtr->keep_acc_totals = 1; + } + rf_StartUserStats(raidPtr); - raidPtr->valid = 1; - return(0); + raidPtr->valid = 1; + return (0); } -static int init_rad(desc) - RF_RaidAccessDesc_t *desc; +static int +init_rad(desc) + RF_RaidAccessDesc_t *desc; { - int rc; + int rc; rc = rf_mutex_init(&desc->mutex); if (rc) { RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return(rc); + __LINE__, rc); + return (rc); } rc = rf_cond_init(&desc->cond); if (rc) { RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + __LINE__, rc); rf_mutex_destroy(&desc->mutex); - return(rc); + return (rc); } - return(0); + return (0); } -static void clean_rad(desc) - RF_RaidAccessDesc_t *desc; +static void +clean_rad(desc) + RF_RaidAccessDesc_t *desc; { rf_mutex_destroy(&desc->mutex); rf_cond_destroy(&desc->cond); } -static void rf_ShutdownRDFreeList(ignored) - void *ignored; +static void +rf_ShutdownRDFreeList(ignored) + void *ignored; { - RF_FREELIST_DESTROY_CLEAN(rf_rad_freelist,next,(RF_RaidAccessDesc_t *),clean_rad); + RF_FREELIST_DESTROY_CLEAN(rf_rad_freelist, next, (RF_RaidAccessDesc_t *), clean_rad); } -static int rf_ConfigureRDFreeList(listp) - RF_ShutdownList_t **listp; +static int +rf_ConfigureRDFreeList(listp) + RF_ShutdownList_t **listp; { - int rc; + int rc; RF_FREELIST_CREATE(rf_rad_freelist, RF_MAX_FREE_RAD, - RF_RAD_INC, sizeof(RF_RaidAccessDesc_t)); + RF_RAD_INC, sizeof(RF_RaidAccessDesc_t)); if (rf_rad_freelist == NULL) { - return(ENOMEM); + return (ENOMEM); } rc = rf_ShutdownCreate(listp, rf_ShutdownRDFreeList, NULL); if (rc) { RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + __LINE__, rc); rf_ShutdownRDFreeList(NULL); - return(rc); + return (rc); } - RF_FREELIST_PRIME_INIT(rf_rad_freelist, RF_RAD_INITIAL,next, - (RF_RaidAccessDesc_t *),init_rad); - return(0); + RF_FREELIST_PRIME_INIT(rf_rad_freelist, RF_RAD_INITIAL, next, + (RF_RaidAccessDesc_t *), init_rad); + return (0); } -RF_RaidAccessDesc_t *rf_AllocRaidAccDesc( - RF_Raid_t *raidPtr, - RF_IoType_t type, - RF_RaidAddr_t raidAddress, - RF_SectorCount_t numBlocks, - caddr_t bufPtr, - void *bp, - RF_DagHeader_t **paramDAG, - RF_AccessStripeMapHeader_t **paramASM, - RF_RaidAccessFlags_t flags, - void (*cbF)(struct buf *), - void *cbA, - RF_AccessState_t *states) +RF_RaidAccessDesc_t * +rf_AllocRaidAccDesc( + RF_Raid_t * raidPtr, + RF_IoType_t type, + RF_RaidAddr_t raidAddress, + RF_SectorCount_t numBlocks, + caddr_t bufPtr, + void *bp, + RF_DagHeader_t ** paramDAG, + RF_AccessStripeMapHeader_t ** paramASM, + RF_RaidAccessFlags_t flags, + void (*cbF) (struct buf *), + void *cbA, + RF_AccessState_t * states) { - RF_RaidAccessDesc_t *desc; - - RF_FREELIST_GET_INIT_NOUNLOCK(rf_rad_freelist,desc,next,(RF_RaidAccessDesc_t *),init_rad); - if (raidPtr->waitShutdown) { - /* - * Actually, we're shutting the array down. Free the desc - * and return NULL. - */ - RF_FREELIST_DO_UNLOCK(rf_rad_freelist); - RF_FREELIST_FREE_CLEAN(rf_rad_freelist,desc,next,clean_rad); - return(NULL); - } - raidPtr->nAccOutstanding++; - RF_FREELIST_DO_UNLOCK(rf_rad_freelist); - - desc->raidPtr = (void*)raidPtr; - desc->type = type; - desc->raidAddress = raidAddress; - desc->numBlocks = numBlocks; - desc->bufPtr = bufPtr; - desc->bp = bp; - desc->paramDAG = paramDAG; - desc->paramASM = paramASM; - desc->flags = flags; - desc -> states = states; - desc -> state = 0; - - desc->status = 0; - bzero((char *)&desc->tracerec, sizeof(RF_AccTraceEntry_t)); - desc->callbackFunc= (void (*)(RF_CBParam_t))cbF; /* XXX */ - desc->callbackArg = cbA; - desc->next = NULL; - desc->head = desc; - desc->numPending = 0; - desc->cleanupList = NULL; - rf_MakeAllocList(desc->cleanupList); - rf_get_threadid(desc->tid); -#ifdef SIMULATE - desc->owner = rf_GetCurrentOwner(); -#endif /* SIMULATE */ - return(desc); + RF_RaidAccessDesc_t *desc; + + RF_FREELIST_GET_INIT_NOUNLOCK(rf_rad_freelist, desc, next, (RF_RaidAccessDesc_t *), init_rad); + if (raidPtr->waitShutdown) { + /* + * Actually, we're shutting the array down. Free the desc + * and return NULL. + */ + RF_FREELIST_DO_UNLOCK(rf_rad_freelist); + RF_FREELIST_FREE_CLEAN(rf_rad_freelist, desc, next, clean_rad); + return (NULL); + } + raidPtr->nAccOutstanding++; + RF_FREELIST_DO_UNLOCK(rf_rad_freelist); + + desc->raidPtr = (void *) raidPtr; + desc->type = type; + desc->raidAddress = raidAddress; + desc->numBlocks = numBlocks; + desc->bufPtr = bufPtr; + desc->bp = bp; + desc->paramDAG = paramDAG; + desc->paramASM = paramASM; + desc->flags = flags; + desc->states = states; + desc->state = 0; + + desc->status = 0; + bzero((char *) &desc->tracerec, sizeof(RF_AccTraceEntry_t)); + desc->callbackFunc = (void (*) (RF_CBParam_t)) cbF; /* XXX */ + desc->callbackArg = cbA; + desc->next = NULL; + desc->head = desc; + desc->numPending = 0; + desc->cleanupList = NULL; + rf_MakeAllocList(desc->cleanupList); + rf_get_threadid(desc->tid); + return (desc); } -void rf_FreeRaidAccDesc(RF_RaidAccessDesc_t *desc) +void +rf_FreeRaidAccDesc(RF_RaidAccessDesc_t * desc) { - RF_Raid_t *raidPtr = desc->raidPtr; + RF_Raid_t *raidPtr = desc->raidPtr; - RF_ASSERT(desc); + RF_ASSERT(desc); -#if !defined(KERNEL) && !defined(SIMULATE) - rf_StopThroughputStats(raidPtr); -#endif /* !KERNEL && !SIMULATE */ - - rf_FreeAllocList(desc->cleanupList); - RF_FREELIST_FREE_CLEAN_NOUNLOCK(rf_rad_freelist,desc,next,clean_rad); - raidPtr->nAccOutstanding--; - if (raidPtr->waitShutdown) { - RF_SIGNAL_COND(raidPtr->outstandingCond); - } - RF_FREELIST_DO_UNLOCK(rf_rad_freelist); + rf_FreeAllocList(desc->cleanupList); + RF_FREELIST_FREE_CLEAN_NOUNLOCK(rf_rad_freelist, desc, next, clean_rad); + raidPtr->nAccOutstanding--; + if (raidPtr->waitShutdown) { + RF_SIGNAL_COND(raidPtr->outstandingCond); + } + RF_FREELIST_DO_UNLOCK(rf_rad_freelist); } - -#ifdef JIMZ -#define THREAD_NUMDESC 1024 -#define THREAD_NUM 600 -static RF_RaidAccessDesc_t *dest_hist[THREAD_NUM*THREAD_NUMDESC]; -int jimz_access_num[THREAD_NUM]; -#endif /* JIMZ */ - /********************************************************************* * Main routine for performing an access. * Accesses are retried until a DAG can not be selected. This occurs * when either the DAG library is incomplete or there are too many * failures in a parity group. ********************************************************************/ -int rf_DoAccess( - RF_Raid_t *raidPtr, - RF_IoType_t type, - int async_flag, - RF_RaidAddr_t raidAddress, - RF_SectorCount_t numBlocks, - caddr_t bufPtr, - void *bp_in, - RF_DagHeader_t **paramDAG, - RF_AccessStripeMapHeader_t **paramASM, - RF_RaidAccessFlags_t flags, - RF_RaidAccessDesc_t **paramDesc, - void (*cbF)(struct buf *), - void *cbA) +int +rf_DoAccess( + RF_Raid_t * raidPtr, + RF_IoType_t type, + int async_flag, + RF_RaidAddr_t raidAddress, + RF_SectorCount_t numBlocks, + caddr_t bufPtr, + void *bp_in, + RF_DagHeader_t ** paramDAG, + RF_AccessStripeMapHeader_t ** paramASM, + RF_RaidAccessFlags_t flags, + RF_RaidAccessDesc_t ** paramDesc, + void (*cbF) (struct buf *), + void *cbA) /* type should be read or write async_flag should be RF_TRUE or RF_FALSE bp_in is a buf pointer. void * to facilitate ignoring it outside the kernel */ { - int tid; - RF_RaidAccessDesc_t *desc; - caddr_t lbufPtr = bufPtr; -#ifdef KERNEL - struct buf *bp = (struct buf *) bp_in; + int tid; + RF_RaidAccessDesc_t *desc; + caddr_t lbufPtr = bufPtr; + struct buf *bp = (struct buf *) bp_in; #if DFSTRACE > 0 - struct { RF_uint64 raidAddr; int numBlocks; char type;} dfsrecord; -#endif /* DFSTRACE > 0 */ -#else /* KERNEL */ - void *bp = bp_in; -#endif /* KERNEL */ + struct { + RF_uint64 raidAddr; + int numBlocks; + char type; + } dfsrecord; +#endif /* DFSTRACE > 0 */ + + raidAddress += rf_raidSectorOffset; + + if (!raidPtr->valid) { + RF_ERRORMSG("RAIDframe driver not successfully configured. Rejecting access.\n"); + IO_BUF_ERR(bp, EINVAL, raidPtr->raidid); + return (EINVAL); + } +#if defined(KERNEL) && DFSTRACE > 0 + if (rf_DFSTraceAccesses) { + dfsrecord.raidAddr = raidAddress; + dfsrecord.numBlocks = numBlocks; + dfsrecord.type = type; + dfs_log(DFS_NOTE, (char *) &dfsrecord, sizeof(dfsrecord), 0); + } +#endif /* KERNEL && DFSTRACE > 0 */ + + rf_get_threadid(tid); + if (rf_accessDebug) { + + printf("logBytes is: %d %d %d\n", raidPtr->raidid, + raidPtr->logBytesPerSector, + (int) rf_RaidAddressToByte(raidPtr, numBlocks)); + printf("[%d] %s raidAddr %d (stripeid %d-%d) numBlocks %d (%d bytes) buf 0x%lx\n", tid, + (type == RF_IO_TYPE_READ) ? "READ" : "WRITE", (int) raidAddress, + (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress), + (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress + numBlocks - 1), + (int) numBlocks, + (int) rf_RaidAddressToByte(raidPtr, numBlocks), + (long) bufPtr); + } + if (raidAddress + numBlocks > raidPtr->totalSectors) { + + printf("DoAccess: raid addr %lu too large to access %lu sectors. Max legal addr is %lu\n", + (u_long) raidAddress, (u_long) numBlocks, (u_long) raidPtr->totalSectors); + + if (type == RF_IO_TYPE_READ) { + IO_BUF_ERR(bp, ENOSPC, raidPtr->raidid); + return (ENOSPC); + } else { + IO_BUF_ERR(bp, ENOSPC, raidPtr->raidid); + return (ENOSPC); + } + } + desc = rf_AllocRaidAccDesc(raidPtr, type, raidAddress, + numBlocks, lbufPtr, bp, paramDAG, paramASM, + flags, cbF, cbA, raidPtr->Layout.map->states); - raidAddress += rf_raidSectorOffset; + if (desc == NULL) { + return (ENOMEM); + } + RF_ETIMER_START(desc->tracerec.tot_timer); - if (!raidPtr->valid) { - RF_ERRORMSG("RAIDframe driver not successfully configured. Rejecting access.\n"); - IO_BUF_ERR(bp, EINVAL, raidPtr->raidid); - return(EINVAL); - } + desc->async_flag = async_flag; -#if defined(KERNEL) && DFSTRACE > 0 - if (rf_DFSTraceAccesses) { - dfsrecord.raidAddr = raidAddress; - dfsrecord.numBlocks = numBlocks; - dfsrecord.type = type; - dfs_log(DFS_NOTE, (char *) &dfsrecord, sizeof(dfsrecord), 0); - } -#endif /* KERNEL && DFSTRACE > 0 */ - - rf_get_threadid(tid); - if (rf_accessDebug) { - - printf("logBytes is: %d %d %d\n",raidPtr->raidid, - raidPtr->logBytesPerSector, - (int)rf_RaidAddressToByte(raidPtr,numBlocks)); - printf("[%d] %s raidAddr %d (stripeid %d-%d) numBlocks %d (%d bytes) buf 0x%lx\n",tid, - (type==RF_IO_TYPE_READ) ? "READ":"WRITE", (int)raidAddress, - (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress), - (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress+numBlocks-1), - (int) numBlocks, - (int) rf_RaidAddressToByte(raidPtr,numBlocks), - (long) bufPtr); - } - - if (raidAddress + numBlocks > raidPtr->totalSectors) { - - printf("DoAccess: raid addr %lu too large to access %lu sectors. Max legal addr is %lu\n", - (u_long)raidAddress,(u_long)numBlocks,(u_long)raidPtr->totalSectors); - -#ifdef KERNEL - if (type == RF_IO_TYPE_READ) { - IO_BUF_ERR(bp, ENOSPC, raidPtr->raidid); - return(ENOSPC); - } else { - IO_BUF_ERR(bp, ENOSPC, raidPtr->raidid); - return(ENOSPC); - } -#elif defined(SIMULATE) - RF_PANIC(); -#else /* SIMULATE */ - return(EIO); -#endif /* SIMULATE */ - } - -#if !defined(KERNEL) && !defined(SIMULATE) - rf_StartThroughputStats(raidPtr); -#endif /* !KERNEL && !SIMULATE */ - - desc = rf_AllocRaidAccDesc(raidPtr, type, raidAddress, - numBlocks, lbufPtr, bp, paramDAG, paramASM, - flags, cbF, cbA, raidPtr->Layout.map->states); - - if (desc == NULL) { - return(ENOMEM); - } -#ifdef JIMZ - dest_hist[(tid*THREAD_NUMDESC)+jimz_access_num[tid]]; jimz_access_num[tid]++; -#endif /* JIMZ */ - - RF_ETIMER_START(desc->tracerec.tot_timer); - -#ifdef SIMULATE - /* simulator uses paramDesc to continue dag from test function */ - desc->async_flag=async_flag; - - *paramDesc=desc; - - return(0); -#endif /* SIMULATE */ - - rf_ContinueRaidAccess(desc); - -#ifndef KERNEL - if (!(flags & RF_DAG_NONBLOCKING_IO)) { - RF_LOCK_MUTEX(desc->mutex); - while (!(desc->flags & RF_DAG_ACCESS_COMPLETE)) { - RF_WAIT_COND(desc->cond, desc->mutex); - } - RF_UNLOCK_MUTEX(desc->mutex); - rf_FreeRaidAccDesc(desc); - } -#endif /* !KERNEL */ - - return(0); -} + rf_ContinueRaidAccess(desc); + return (0); +} /* force the array into reconfigured mode without doing reconstruction */ -int rf_SetReconfiguredMode(raidPtr, row, col) - RF_Raid_t *raidPtr; - int row; - int col; +int +rf_SetReconfiguredMode(raidPtr, row, col) + RF_Raid_t *raidPtr; + int row; + int col; { - if (!(raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - printf("Can't set reconfigured mode in dedicated-spare array\n"); - RF_PANIC(); - } - RF_LOCK_MUTEX(raidPtr->mutex); - raidPtr->numFailures++; - raidPtr->Disks[row][col].status = rf_ds_dist_spared; - raidPtr->status[row] = rf_rs_reconfigured; - /* install spare table only if declustering + distributed sparing architecture. */ - if ( raidPtr->Layout.map->flags & RF_BD_DECLUSTERED ) - rf_InstallSpareTable(raidPtr, row, col); - RF_UNLOCK_MUTEX(raidPtr->mutex); - return(0); + if (!(raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { + printf("Can't set reconfigured mode in dedicated-spare array\n"); + RF_PANIC(); + } + RF_LOCK_MUTEX(raidPtr->mutex); + raidPtr->numFailures++; + raidPtr->Disks[row][col].status = rf_ds_dist_spared; + raidPtr->status[row] = rf_rs_reconfigured; + /* install spare table only if declustering + distributed sparing + * architecture. */ + if (raidPtr->Layout.map->flags & RF_BD_DECLUSTERED) + rf_InstallSpareTable(raidPtr, row, col); + RF_UNLOCK_MUTEX(raidPtr->mutex); + return (0); } extern int fail_row, fail_col, fail_time; extern int delayed_recon; -int rf_FailDisk( - RF_Raid_t *raidPtr, - int frow, - int fcol, - int initRecon) -{ - int tid; - - rf_get_threadid(tid); - printf("[%d] Failing disk r%d c%d\n",tid,frow,fcol); - RF_LOCK_MUTEX(raidPtr->mutex); - raidPtr->numFailures++; - raidPtr->Disks[frow][fcol].status = rf_ds_failed; - raidPtr->status[frow] = rf_rs_degraded; - RF_UNLOCK_MUTEX(raidPtr->mutex); -#ifdef SIMULATE -#if RF_DEMO > 0 - if (rf_demoMode) { - rf_demo_update_mode (RF_DEMO_DEGRADED); - fail_col = fcol; fail_row = frow; - fail_time = rf_CurTime(); /* XXX */ - if (initRecon) - delayed_recon = RF_TRUE; - } - else { - if (initRecon) - rf_ReconstructFailedDisk(raidPtr, frow, fcol); - } -#else /* RF_DEMO > 0 */ - if (initRecon) - rf_ReconstructFailedDisk(raidPtr, frow, fcol); -#endif /* RF_DEMO > 0 */ -#else /* SIMULATE */ - if (initRecon) - rf_ReconstructFailedDisk(raidPtr, frow, fcol); -#endif /* SIMULATE */ - return(0); -} - -#ifdef SIMULATE -extern RF_Owner_t recon_owner; - -void rf_ScheduleContinueReconstructFailedDisk(reconDesc) - RF_RaidReconDesc_t *reconDesc; +int +rf_FailDisk( + RF_Raid_t * raidPtr, + int frow, + int fcol, + int initRecon) { - rf_DDEventRequest(rf_CurTime(), rf_ContinueReconstructFailedDisk, - (void *) reconDesc, recon_owner, -4, -4, reconDesc->raidPtr, NULL); + int tid; + + rf_get_threadid(tid); + printf("[%d] Failing disk r%d c%d\n", tid, frow, fcol); + RF_LOCK_MUTEX(raidPtr->mutex); + raidPtr->numFailures++; + raidPtr->Disks[frow][fcol].status = rf_ds_failed; + raidPtr->status[frow] = rf_rs_degraded; + RF_UNLOCK_MUTEX(raidPtr->mutex); + if (initRecon) + rf_ReconstructFailedDisk(raidPtr, frow, fcol); + return (0); } -#endif /* SIMULATE */ - /* releases a thread that is waiting for the array to become quiesced. * access_suspend_mutex should be locked upon calling this */ -void rf_SignalQuiescenceLock(raidPtr, reconDesc) - RF_Raid_t *raidPtr; - RF_RaidReconDesc_t *reconDesc; +void +rf_SignalQuiescenceLock(raidPtr, reconDesc) + RF_Raid_t *raidPtr; + RF_RaidReconDesc_t *reconDesc; { - int tid; - - if (rf_quiesceDebug) { - rf_get_threadid(tid); - printf("[%d] Signalling quiescence lock\n", tid); - } - raidPtr->access_suspend_release = 1; - - if (raidPtr->waiting_for_quiescence) { -#ifndef SIMULATE - SIGNAL_QUIESCENT_COND(raidPtr); -#else /* !SIMULATE */ - if (reconDesc) { - rf_ScheduleContinueReconstructFailedDisk(reconDesc); - } -#endif /* !SIMULATE */ - } -} + int tid; + + if (rf_quiesceDebug) { + rf_get_threadid(tid); + printf("[%d] Signalling quiescence lock\n", tid); + } + raidPtr->access_suspend_release = 1; + if (raidPtr->waiting_for_quiescence) { + SIGNAL_QUIESCENT_COND(raidPtr); + } +} /* suspends all new requests to the array. No effect on accesses that are in flight. */ -int rf_SuspendNewRequestsAndWait(raidPtr) - RF_Raid_t *raidPtr; +int +rf_SuspendNewRequestsAndWait(raidPtr) + RF_Raid_t *raidPtr; { - if (rf_quiesceDebug) - printf("Suspending new reqs\n"); - - RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); - raidPtr->accesses_suspended++; - raidPtr->waiting_for_quiescence = (raidPtr->accs_in_flight == 0) ? 0 : 1; - -#ifndef SIMULATE - if (raidPtr->waiting_for_quiescence) { - raidPtr->access_suspend_release=0; - while (!raidPtr->access_suspend_release) { - printf("Suspending: Waiting for Quiescence\n"); - WAIT_FOR_QUIESCENCE(raidPtr); - raidPtr->waiting_for_quiescence = 0; - } - } - printf("Quiescence reached..\n"); -#endif /* !SIMULATE */ - - RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); - return (raidPtr->waiting_for_quiescence); -} + if (rf_quiesceDebug) + printf("Suspending new reqs\n"); + + RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); + raidPtr->accesses_suspended++; + raidPtr->waiting_for_quiescence = (raidPtr->accs_in_flight == 0) ? 0 : 1; + + if (raidPtr->waiting_for_quiescence) { + raidPtr->access_suspend_release = 0; + while (!raidPtr->access_suspend_release) { + printf("Suspending: Waiting for Quiesence\n"); + WAIT_FOR_QUIESCENCE(raidPtr); + raidPtr->waiting_for_quiescence = 0; + } + } + printf("Quiesence reached..\n"); + RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); + return (raidPtr->waiting_for_quiescence); +} /* wake up everyone waiting for quiescence to be released */ -void rf_ResumeNewRequests(raidPtr) - RF_Raid_t *raidPtr; +void +rf_ResumeNewRequests(raidPtr) + RF_Raid_t *raidPtr; { - RF_CallbackDesc_t *t, *cb; - - if (rf_quiesceDebug) - printf("Resuming new reqs\n"); - - RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); - raidPtr->accesses_suspended--; - if (raidPtr->accesses_suspended == 0) - cb = raidPtr->quiesce_wait_list; - else - cb = NULL; - raidPtr->quiesce_wait_list = NULL; - RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); - - while (cb) { - t = cb; - cb = cb->next; - (t->callbackFunc)(t->callbackArg); - rf_FreeCallbackDesc(t); - } + RF_CallbackDesc_t *t, *cb; + + if (rf_quiesceDebug) + printf("Resuming new reqs\n"); + + RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); + raidPtr->accesses_suspended--; + if (raidPtr->accesses_suspended == 0) + cb = raidPtr->quiesce_wait_list; + else + cb = NULL; + raidPtr->quiesce_wait_list = NULL; + RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); + + while (cb) { + t = cb; + cb = cb->next; + (t->callbackFunc) (t->callbackArg); + rf_FreeCallbackDesc(t); + } } - /***************************************************************************************** * * debug routines * ****************************************************************************************/ -static void set_debug_option(name, val) - char *name; - long val; +static void +set_debug_option(name, val) + char *name; + long val; { - RF_DebugName_t *p; - - for (p = rf_debugNames; p->name; p++) { - if (!strcmp(p->name, name)) { - *(p->ptr) = val; - printf("[Set debug variable %s to %ld]\n",name,val); - return; - } - } - RF_ERRORMSG1("Unknown debug string \"%s\"\n",name); + RF_DebugName_t *p; + + for (p = rf_debugNames; p->name; p++) { + if (!strcmp(p->name, name)) { + *(p->ptr) = val; + printf("[Set debug variable %s to %ld]\n", name, val); + return; + } + } + RF_ERRORMSG1("Unknown debug string \"%s\"\n", name); } /* would like to use sscanf here, but apparently not available in kernel */ /*ARGSUSED*/ -static void rf_ConfigureDebug(cfgPtr) - RF_Config_t *cfgPtr; +static void +rf_ConfigureDebug(cfgPtr) + RF_Config_t *cfgPtr; { - char *val_p, *name_p, *white_p; - long val; - int i; - - rf_ResetDebugOptions(); - for (i=0; cfgPtr->debugVars[i][0] && i < RF_MAXDBGV; i++) { - name_p = rf_find_non_white(&cfgPtr->debugVars[i][0]); - white_p = rf_find_white(name_p); /* skip to start of 2nd word */ - val_p = rf_find_non_white(white_p); - if (*val_p == '0' && *(val_p+1) == 'x') val = rf_htoi(val_p+2); - else val = rf_atoi(val_p); - *white_p = '\0'; - set_debug_option(name_p, val); - } + char *val_p, *name_p, *white_p; + long val; + int i; + + rf_ResetDebugOptions(); + for (i = 0; cfgPtr->debugVars[i][0] && i < RF_MAXDBGV; i++) { + name_p = rf_find_non_white(&cfgPtr->debugVars[i][0]); + white_p = rf_find_white(name_p); /* skip to start of 2nd + * word */ + val_p = rf_find_non_white(white_p); + if (*val_p == '0' && *(val_p + 1) == 'x') + val = rf_htoi(val_p + 2); + else + val = rf_atoi(val_p); + *white_p = '\0'; + set_debug_option(name_p, val); + } } - /* performance monitoring stuff */ #define TIMEVAL_TO_US(t) (((long) t.tv_sec) * 1000000L + (long) t.tv_usec) -#if !defined(KERNEL) && !defined(SIMULATE) +#if !defined(_KERNEL) && !defined(SIMULATE) /* * Throughput stats currently only used in user-level RAIDframe */ -static int rf_InitThroughputStats( - RF_ShutdownList_t **listp, - RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr) +static int +rf_InitThroughputStats( + RF_ShutdownList_t ** listp, + RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr) { - int rc; - - /* these used by user-level raidframe only */ - rc = rf_create_managed_mutex(listp, &raidPtr->throughputstats.mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return(rc); - } - raidPtr->throughputstats.sum_io_us = 0; - raidPtr->throughputstats.num_ios = 0; - raidPtr->throughputstats.num_out_ios = 0; - return(0); + int rc; + + /* these used by user-level raidframe only */ + rc = rf_create_managed_mutex(listp, &raidPtr->throughputstats.mutex); + if (rc) { + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + return (rc); + } + raidPtr->throughputstats.sum_io_us = 0; + raidPtr->throughputstats.num_ios = 0; + raidPtr->throughputstats.num_out_ios = 0; + return (0); } -void rf_StartThroughputStats(RF_Raid_t *raidPtr) +void +rf_StartThroughputStats(RF_Raid_t * raidPtr) { - RF_LOCK_MUTEX(raidPtr->throughputstats.mutex); - raidPtr->throughputstats.num_ios++; - raidPtr->throughputstats.num_out_ios++; - if (raidPtr->throughputstats.num_out_ios == 1) - RF_GETTIME(raidPtr->throughputstats.start); - RF_UNLOCK_MUTEX(raidPtr->throughputstats.mutex); + RF_LOCK_MUTEX(raidPtr->throughputstats.mutex); + raidPtr->throughputstats.num_ios++; + raidPtr->throughputstats.num_out_ios++; + if (raidPtr->throughputstats.num_out_ios == 1) + RF_GETTIME(raidPtr->throughputstats.start); + RF_UNLOCK_MUTEX(raidPtr->throughputstats.mutex); } -static void rf_StopThroughputStats(RF_Raid_t *raidPtr) +static void +rf_StopThroughputStats(RF_Raid_t * raidPtr) { - struct timeval diff; - - RF_LOCK_MUTEX(raidPtr->throughputstats.mutex); - raidPtr->throughputstats.num_out_ios--; - if (raidPtr->throughputstats.num_out_ios == 0) { - RF_GETTIME(raidPtr->throughputstats.stop); - RF_TIMEVAL_DIFF(&raidPtr->throughputstats.start, &raidPtr->throughputstats.stop, &diff); - raidPtr->throughputstats.sum_io_us += TIMEVAL_TO_US(diff); - } - RF_UNLOCK_MUTEX(raidPtr->throughputstats.mutex); + struct timeval diff; + + RF_LOCK_MUTEX(raidPtr->throughputstats.mutex); + raidPtr->throughputstats.num_out_ios--; + if (raidPtr->throughputstats.num_out_ios == 0) { + RF_GETTIME(raidPtr->throughputstats.stop); + RF_TIMEVAL_DIFF(&raidPtr->throughputstats.start, &raidPtr->throughputstats.stop, &diff); + raidPtr->throughputstats.sum_io_us += TIMEVAL_TO_US(diff); + } + RF_UNLOCK_MUTEX(raidPtr->throughputstats.mutex); } -static void rf_PrintThroughputStats(RF_Raid_t *raidPtr) +static void +rf_PrintThroughputStats(RF_Raid_t * raidPtr) { - RF_ASSERT(raidPtr->throughputstats.num_out_ios == 0); - if ( raidPtr->throughputstats.sum_io_us != 0 ) { - printf("[Througphut: %8.2f IOs/second]\n", raidPtr->throughputstats.num_ios - / (raidPtr->throughputstats.sum_io_us / 1000000.0)); - } + RF_ASSERT(raidPtr->throughputstats.num_out_ios == 0); + if (raidPtr->throughputstats.sum_io_us != 0) { + printf("[Througphut: %8.2f IOs/second]\n", raidPtr->throughputstats.num_ios + / (raidPtr->throughputstats.sum_io_us / 1000000.0)); + } } +#endif /* !KERNEL && !SIMULATE */ -#endif /* !KERNEL && !SIMULATE */ - -void rf_StartUserStats(RF_Raid_t *raidPtr) +void +rf_StartUserStats(RF_Raid_t * raidPtr) { - RF_GETTIME(raidPtr->userstats.start); - raidPtr->userstats.sum_io_us = 0; - raidPtr->userstats.num_ios = 0; - raidPtr->userstats.num_sect_moved = 0; + RF_GETTIME(raidPtr->userstats.start); + raidPtr->userstats.sum_io_us = 0; + raidPtr->userstats.num_ios = 0; + raidPtr->userstats.num_sect_moved = 0; } -void rf_StopUserStats(RF_Raid_t *raidPtr) +void +rf_StopUserStats(RF_Raid_t * raidPtr) { - RF_GETTIME(raidPtr->userstats.stop); + RF_GETTIME(raidPtr->userstats.stop); } -void rf_UpdateUserStats(raidPtr, rt, numsect) - RF_Raid_t *raidPtr; - int rt; /* resp time in us */ - int numsect; /* number of sectors for this access */ +void +rf_UpdateUserStats(raidPtr, rt, numsect) + RF_Raid_t *raidPtr; + int rt; /* resp time in us */ + int numsect; /* number of sectors for this access */ { - raidPtr->userstats.sum_io_us += rt; - raidPtr->userstats.num_ios++; - raidPtr->userstats.num_sect_moved += numsect; + raidPtr->userstats.sum_io_us += rt; + raidPtr->userstats.num_ios++; + raidPtr->userstats.num_sect_moved += numsect; } -void rf_PrintUserStats(RF_Raid_t *raidPtr) +void +rf_PrintUserStats(RF_Raid_t * raidPtr) { - long elapsed_us, mbs, mbs_frac; - struct timeval diff; - - RF_TIMEVAL_DIFF(&raidPtr->userstats.start, &raidPtr->userstats.stop, &diff); - elapsed_us = TIMEVAL_TO_US(diff); - - /* 2000 sectors per megabyte, 10000000 microseconds per second */ - if (elapsed_us) - mbs = (raidPtr->userstats.num_sect_moved / 2000) / (elapsed_us / 1000000); - else - mbs = 0; - - /* this computes only the first digit of the fractional mb/s moved */ - if (elapsed_us) { - mbs_frac = ((raidPtr->userstats.num_sect_moved / 200) / (elapsed_us / 1000000)) - - (mbs * 10); - } - else { - mbs_frac = 0; - } - - printf("Number of I/Os: %ld\n",raidPtr->userstats.num_ios); - printf("Elapsed time (us): %ld\n",elapsed_us); - printf("User I/Os per second: %ld\n",RF_DB0_CHECK(raidPtr->userstats.num_ios, (elapsed_us/1000000))); - printf("Average user response time: %ld us\n",RF_DB0_CHECK(raidPtr->userstats.sum_io_us, raidPtr->userstats.num_ios)); - printf("Total sectors moved: %ld\n",raidPtr->userstats.num_sect_moved); - printf("Average access size (sect): %ld\n",RF_DB0_CHECK(raidPtr->userstats.num_sect_moved, raidPtr->userstats.num_ios)); - printf("Achieved data rate: %ld.%ld MB/sec\n",mbs,mbs_frac); + long elapsed_us, mbs, mbs_frac; + struct timeval diff; + + RF_TIMEVAL_DIFF(&raidPtr->userstats.start, &raidPtr->userstats.stop, &diff); + elapsed_us = TIMEVAL_TO_US(diff); + + /* 2000 sectors per megabyte, 10000000 microseconds per second */ + if (elapsed_us) + mbs = (raidPtr->userstats.num_sect_moved / 2000) / (elapsed_us / 1000000); + else + mbs = 0; + + /* this computes only the first digit of the fractional mb/s moved */ + if (elapsed_us) { + mbs_frac = ((raidPtr->userstats.num_sect_moved / 200) / (elapsed_us / 1000000)) + - (mbs * 10); + } else { + mbs_frac = 0; + } + + printf("Number of I/Os: %ld\n", raidPtr->userstats.num_ios); + printf("Elapsed time (us): %ld\n", elapsed_us); + printf("User I/Os per second: %ld\n", RF_DB0_CHECK(raidPtr->userstats.num_ios, (elapsed_us / 1000000))); + printf("Average user response time: %ld us\n", RF_DB0_CHECK(raidPtr->userstats.sum_io_us, raidPtr->userstats.num_ios)); + printf("Total sectors moved: %ld\n", raidPtr->userstats.num_sect_moved); + printf("Average access size (sect): %ld\n", RF_DB0_CHECK(raidPtr->userstats.num_sect_moved, raidPtr->userstats.num_ios)); + printf("Achieved data rate: %ld.%ld MB/sec\n", mbs, mbs_frac); } diff --git a/sys/dev/raidframe/rf_driver.h b/sys/dev/raidframe/rf_driver.h index 7c9a1c4084b..50eccc9491e 100644 --- a/sys/dev/raidframe/rf_driver.h +++ b/sys/dev/raidframe/rf_driver.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_driver.h,v 1.1 1999/01/11 14:29:19 niklas Exp $ */ -/* $NetBSD: rf_driver.h,v 1.1 1998/11/13 04:20:29 oster Exp $ */ +/* $OpenBSD: rf_driver.h,v 1.2 1999/02/16 00:02:41 niklas Exp $ */ +/* $NetBSD: rf_driver.h,v 1.3 1999/02/05 00:06:11 oster Exp $ */ /* * rf_driver.h */ @@ -29,60 +29,6 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ -/* - * : - * Log: rf_driver.h,v - * Revision 1.11 1996/07/11 19:08:00 jimz - * generalize reconstruction mechanism - * allow raid1 reconstructs via copyback (done with array - * quiesced, not online, therefore not disk-directed) - * - * Revision 1.10 1996/06/10 14:18:58 jimz - * move user, throughput stats into per-array structure - * - * Revision 1.9 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.8 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.7 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.6 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.5 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.4 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.3 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.2 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.1 1996/05/18 19:56:10 jimz - * Initial revision - * - */ #ifndef _RF__RF_DRIVER_H_ #define _RF__RF_DRIVER_H_ @@ -91,36 +37,32 @@ #include "rf_types.h" RF_DECLARE_EXTERN_MUTEX(rf_printf_mutex) + int rf_BootRaidframe(void); + int rf_UnbootRaidframe(void); + int rf_Shutdown(RF_Raid_t * raidPtr); + int rf_Configure(RF_Raid_t * raidPtr, RF_Config_t * cfgPtr); + RF_RaidAccessDesc_t *rf_AllocRaidAccDesc(RF_Raid_t * raidPtr, RF_IoType_t type, + RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks, caddr_t bufPtr, + void *bp, RF_DagHeader_t ** paramDAG, RF_AccessStripeMapHeader_t ** paramASM, + RF_RaidAccessFlags_t flags, void (*cbF) (struct buf *), void *cbA, + RF_AccessState_t * states); + void rf_FreeRaidAccDesc(RF_RaidAccessDesc_t * desc); + int rf_DoAccess(RF_Raid_t * raidPtr, RF_IoType_t type, int async_flag, + RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks, caddr_t bufPtr, + void *bp_in, RF_DagHeader_t ** paramDAG, + RF_AccessStripeMapHeader_t ** paramASM, RF_RaidAccessFlags_t flags, + RF_RaidAccessDesc_t ** paramDesc, void (*cbF) (struct buf *), void *cbA); + int rf_SetReconfiguredMode(RF_Raid_t * raidPtr, RF_RowCol_t row, + RF_RowCol_t col); + int rf_FailDisk(RF_Raid_t * raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol, + int initRecon); + void rf_SignalQuiescenceLock(RF_Raid_t * raidPtr, RF_RaidReconDesc_t * reconDesc); + int rf_SuspendNewRequestsAndWait(RF_Raid_t * raidPtr); + void rf_ResumeNewRequests(RF_Raid_t * raidPtr); + void rf_StartThroughputStats(RF_Raid_t * raidPtr); + void rf_StartUserStats(RF_Raid_t * raidPtr); + void rf_StopUserStats(RF_Raid_t * raidPtr); + void rf_UpdateUserStats(RF_Raid_t * raidPtr, int rt, int numsect); + void rf_PrintUserStats(RF_Raid_t * raidPtr); -int rf_BootRaidframe(void); -int rf_UnbootRaidframe(void); -int rf_Shutdown(RF_Raid_t *raidPtr); -int rf_Configure(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr); -RF_RaidAccessDesc_t *rf_AllocRaidAccDesc(RF_Raid_t *raidPtr, RF_IoType_t type, - RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks, caddr_t bufPtr, - void *bp, RF_DagHeader_t **paramDAG, RF_AccessStripeMapHeader_t **paramASM, - RF_RaidAccessFlags_t flags, void (*cbF)(struct buf *), void *cbA, - RF_AccessState_t *states); -void rf_FreeRaidAccDesc(RF_RaidAccessDesc_t *desc); -int rf_DoAccess(RF_Raid_t *raidPtr, RF_IoType_t type, int async_flag, - RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks, caddr_t bufPtr, - void *bp_in, RF_DagHeader_t **paramDAG, - RF_AccessStripeMapHeader_t **paramASM, RF_RaidAccessFlags_t flags, - RF_RaidAccessDesc_t **paramDesc, void (*cbF)(struct buf *), void *cbA); -int rf_SetReconfiguredMode(RF_Raid_t *raidPtr, RF_RowCol_t row, - RF_RowCol_t col); -int rf_FailDisk(RF_Raid_t *raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol, - int initRecon); -#ifdef SIMULATE -void rf_ScheduleContinueReconstructFailedDisk(RF_RaidReconDesc_t *reconDesc); -#endif /* SIMULATE */ -void rf_SignalQuiescenceLock(RF_Raid_t *raidPtr, RF_RaidReconDesc_t *reconDesc); -int rf_SuspendNewRequestsAndWait(RF_Raid_t *raidPtr); -void rf_ResumeNewRequests(RF_Raid_t *raidPtr); -void rf_StartThroughputStats(RF_Raid_t *raidPtr); -void rf_StartUserStats(RF_Raid_t *raidPtr); -void rf_StopUserStats(RF_Raid_t *raidPtr); -void rf_UpdateUserStats(RF_Raid_t *raidPtr, int rt, int numsect); -void rf_PrintUserStats(RF_Raid_t *raidPtr); - -#endif /* !_RF__RF_DRIVER_H_ */ +#endif /* !_RF__RF_DRIVER_H_ */ diff --git a/sys/dev/raidframe/rf_engine.c b/sys/dev/raidframe/rf_engine.c index c99782cbed5..36ae0642b3a 100644 --- a/sys/dev/raidframe/rf_engine.c +++ b/sys/dev/raidframe/rf_engine.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_engine.c,v 1.1 1999/01/11 14:29:19 niklas Exp $ */ -/* $NetBSD: rf_engine.c,v 1.2 1998/11/13 11:48:26 simonb Exp $ */ +/* $OpenBSD: rf_engine.c,v 1.2 1999/02/16 00:02:41 niklas Exp $ */ +/* $NetBSD: rf_engine.c,v 1.4 1999/02/05 00:06:11 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -55,163 +55,8 @@ * * ****************************************************************************/ -/* - * : - * - * Log: rf_engine.c,v - * Revision 1.56 1996/07/28 20:31:39 jimz - * i386netbsd port - * true/false fixup - * - * Revision 1.55 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.54 1996/07/17 21:00:58 jimz - * clean up timer interface, tracing - * - * Revision 1.53 1996/07/15 17:22:18 jimz - * nit-pick code cleanup - * resolve stdlib problems on DEC OSF - * - * Revision 1.52 1996/06/17 03:17:08 jimz - * correctly shut down engine thread in kernel - * - * Revision 1.51 1996/06/14 15:02:10 jimz - * make new engine code happy in simulator - * - * Revision 1.50 1996/06/14 14:19:48 jimz - * use diskgroup to control engine thread, make all engine-thread-related - * stuff per-array - * - * Revision 1.49 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.48 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.47 1996/06/06 01:23:23 jimz - * fix bug in node traversal when firing multiple nodes simultaneously - * - * Revision 1.46 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.45 1996/05/30 12:59:18 jimz - * make etimer happier, more portable - * - * Revision 1.44 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.43 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.42 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.41 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.40 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.39 1996/05/20 16:15:17 jimz - * switch to rf_{mutex,cond}_{init,destroy} - * - * Revision 1.38 1996/05/18 20:09:54 jimz - * bit of cleanup to compile cleanly in kernel, once again - * - * Revision 1.37 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.36 1996/05/15 20:24:19 wvcii - * fixed syntax bug in SIMULATE clause above ProcessNode - * - * Revision 1.35 1996/05/08 21:01:24 jimz - * fixed up enum type names that were conflicting with other - * enums and function names (ie, "panic") - * future naming trends will be towards RF_ and rf_ for - * everything raidframe-related - * - * Revision 1.34 1996/05/08 15:25:28 wvcii - * eliminated dead code - * merged common cases (sim/user/kernel) - * entire node lists (arrays) now fired atomically - * reordered source code for readability - * beefed-up & corrected comments - * - * Revision 1.33 1996/05/07 19:39:40 jimz - * 1. fixed problems in PropogateResults() with nodes being referenced - * after they were no longer valid - * 2. fixed problems in PropogateResults() with the node list being - * incorrectly threaded - * - * Revision 1.32 1996/05/07 19:03:56 wvcii - * in PropagateResults, fixed a bug in the rollBackward case: - * node data is copied before the call to FinishNode which - * frees the node and destroys its data. - * - * Revision 1.31 1996/05/07 17:45:17 jimz - * remove old #if 0 code from PropogateResults() (was kept in - * previous version for archival purposes (rcsdiff)) - * - * Revision 1.30 1996/05/07 17:44:19 jimz - * fix threading of nodes to be fired in PropagateResults() - * fix iteration through skiplist in PropagateResults() - * fix incorrect accesses to freed memory (dereferencing a - * node that was freed by the action of calling FinishNode() - * on it, which in turn completed its DAG) in PropagateResults() - * - * Revision 1.29 1996/05/02 15:04:15 wvcii - * fixed bad array index in PropagateResults - * - * Revision 1.28 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.27 1995/12/08 15:07:03 arw - * cache code cleanup - * - * Revision 1.26 1995/11/07 16:18:01 wvcii - * numerous changes associated with roll-away error recovery - * when a node fails, dag enters rollForward or rollBackward state - * - * Revision 1.25 1995/09/06 19:27:17 wvcii - * added debug vars enableRollAway and debugRecovery - * - */ - -#ifdef _KERNEL -#define KERNEL -#endif - #include "rf_threadstuff.h" -#ifndef KERNEL -#include <stdio.h> -#include <stdlib.h> -#endif /* !KERNEL */ - #include <sys/errno.h> #include "rf_dag.h" @@ -223,9 +68,7 @@ #include "rf_shutdown.h" #include "rf_raid.h" -#ifndef SIMULATE static void DAGExecutionThread(RF_ThreadArg_t arg); -#endif /* !SIMULATE */ #define DO_INIT(_l_,_r_) { \ int _rc; \ @@ -240,190 +83,158 @@ static void DAGExecutionThread(RF_ThreadArg_t arg); } /* synchronization primitives for this file. DO_WAIT should be enclosed in a while loop. */ -#ifndef KERNEL - -#define DO_LOCK(_r_) RF_LOCK_MUTEX((_r_)->node_queue_mutex) -#define DO_UNLOCK(_r_) RF_UNLOCK_MUTEX((_r_)->node_queue_mutex) -#define DO_WAIT(_r_) RF_WAIT_COND((_r_)->node_queue_cond, (_r_)->node_queue_mutex) -#define DO_SIGNAL(_r_) RF_SIGNAL_COND((_r_)->node_queue_cond) - -#else /* !KERNEL */ /* * XXX Is this spl-ing really necessary? */ #define DO_LOCK(_r_) { ks = splbio(); RF_LOCK_MUTEX((_r_)->node_queue_mutex); } #define DO_UNLOCK(_r_) { RF_UNLOCK_MUTEX((_r_)->node_queue_mutex); splx(ks); } -#if !defined(__NetBSD__) && !defined(__OpenBSD__) -#define DO_WAIT(_r_) mpsleep(&(_r_)->node_queue, PZERO, "raidframe nq", 0, (void *) simple_lock_addr((_r_)->node_queue_mutex), MS_LOCK_SIMPLE) -#else #define DO_WAIT(_r_) tsleep(&(_r_)->node_queue, PRIBIO | PCATCH, "raidframe nq",0) -#endif #define DO_SIGNAL(_r_) wakeup(&(_r_)->node_queue) -#endif /* !KERNEL */ - static void rf_ShutdownEngine(void *); -static void rf_ShutdownEngine(arg) - void *arg; +static void +rf_ShutdownEngine(arg) + void *arg; { - RF_Raid_t *raidPtr; + RF_Raid_t *raidPtr; - raidPtr = (RF_Raid_t *)arg; -#ifndef SIMULATE - raidPtr->shutdown_engine = 1; - DO_SIGNAL(raidPtr); - /* XXX something is missing here... */ + raidPtr = (RF_Raid_t *) arg; + raidPtr->shutdown_engine = 1; + DO_SIGNAL(raidPtr); + /* XXX something is missing here... */ #ifdef DEBUG - printf("IGNORING WAIT_STOP\n"); + printf("IGNORING WAIT_STOP\n"); #endif -#if 0 - RF_THREADGROUP_WAIT_STOP(&raidPtr->engine_tg); -#endif -#endif /* !SIMULATE */ } -int rf_ConfigureEngine( - RF_ShutdownList_t **listp, - RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr) +int +rf_ConfigureEngine( + RF_ShutdownList_t ** listp, + RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr) { - int rc, tid=0; - - if (rf_engineDebug) { - rf_get_threadid(tid); - } - - DO_INIT(listp,raidPtr); - - raidPtr->node_queue = NULL; - raidPtr->dags_in_flight = 0; - -#ifndef SIMULATE - rc = rf_init_managed_threadgroup(listp, &raidPtr->engine_tg); - if (rc) - return(rc); - - /* we create the execution thread only once per system boot. - * no need to check return code b/c the kernel panics if it can't create the thread. - */ - if (rf_engineDebug) { - printf("[%d] Creating engine thread\n", tid); - } - - if (RF_CREATE_THREAD(raidPtr->engine_thread, DAGExecutionThread, raidPtr)) { - RF_ERRORMSG("RAIDFRAME: Unable to create engine thread\n"); - return(ENOMEM); - } - if (rf_engineDebug) { - printf("[%d] Created engine thread\n", tid); - } - RF_THREADGROUP_STARTED(&raidPtr->engine_tg); - /* XXX something is missing here... */ + int rc, tid = 0; + + if (rf_engineDebug) { + rf_get_threadid(tid); + } + DO_INIT(listp, raidPtr); + + raidPtr->node_queue = NULL; + raidPtr->dags_in_flight = 0; + + rc = rf_init_managed_threadgroup(listp, &raidPtr->engine_tg); + if (rc) + return (rc); + + /* we create the execution thread only once per system boot. no need + * to check return code b/c the kernel panics if it can't create the + * thread. */ + if (rf_engineDebug) { + printf("[%d] Creating engine thread\n", tid); + } + if (RF_CREATE_THREAD(raidPtr->engine_thread, DAGExecutionThread, raidPtr)) { + RF_ERRORMSG("RAIDFRAME: Unable to create engine thread\n"); + return (ENOMEM); + } + if (rf_engineDebug) { + printf("[%d] Created engine thread\n", tid); + } + RF_THREADGROUP_STARTED(&raidPtr->engine_tg); + /* XXX something is missing here... */ #ifdef debug - printf("Skipping the WAIT_START!!\n"); + printf("Skipping the WAIT_START!!\n"); #endif #if 0 - RF_THREADGROUP_WAIT_START(&raidPtr->engine_tg); + RF_THREADGROUP_WAIT_START(&raidPtr->engine_tg); #endif - /* engine thread is now running and waiting for work */ - if (rf_engineDebug) { - printf("[%d] Engine thread running and waiting for events\n", tid); - } -#endif /* !SIMULATE */ - - rc = rf_ShutdownCreate(listp, rf_ShutdownEngine, raidPtr); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownEngine(NULL); - } - - return(rc); + /* engine thread is now running and waiting for work */ + if (rf_engineDebug) { + printf("[%d] Engine thread running and waiting for events\n", tid); + } + rc = rf_ShutdownCreate(listp, rf_ShutdownEngine, raidPtr); + if (rc) { + RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + rf_ShutdownEngine(NULL); + } + return (rc); } -static int BranchDone(RF_DagNode_t *node) +static int +BranchDone(RF_DagNode_t * node) { - int i; - - /* return true if forward execution is completed for a node and it's succedents */ - switch (node->status) { - case rf_wait : - /* should never be called in this state */ - RF_PANIC(); - break; - case rf_fired : - /* node is currently executing, so we're not done */ - return(RF_FALSE); - case rf_good : - for (i = 0; i < node->numSuccedents; i++) /* for each succedent */ - if (!BranchDone(node->succedents[i])) /* recursively check branch */ - return RF_FALSE; - return RF_TRUE; /* node and all succedent branches aren't in fired state */ - break; - case rf_bad : - /* succedents can't fire */ - return(RF_TRUE); - case rf_recover : - /* should never be called in this state */ - RF_PANIC(); - break; - case rf_undone : - case rf_panic : - /* XXX need to fix this case */ - /* for now, assume that we're done */ - return(RF_TRUE); - break; - default : - /* illegal node status */ - RF_PANIC(); - break; - } + int i; + + /* return true if forward execution is completed for a node and it's + * succedents */ + switch (node->status) { + case rf_wait: + /* should never be called in this state */ + RF_PANIC(); + break; + case rf_fired: + /* node is currently executing, so we're not done */ + return (RF_FALSE); + case rf_good: + for (i = 0; i < node->numSuccedents; i++) /* for each succedent */ + if (!BranchDone(node->succedents[i])) /* recursively check + * branch */ + return RF_FALSE; + return RF_TRUE; /* node and all succedent branches aren't in + * fired state */ + break; + case rf_bad: + /* succedents can't fire */ + return (RF_TRUE); + case rf_recover: + /* should never be called in this state */ + RF_PANIC(); + break; + case rf_undone: + case rf_panic: + /* XXX need to fix this case */ + /* for now, assume that we're done */ + return (RF_TRUE); + break; + default: + /* illegal node status */ + RF_PANIC(); + break; + } } -#ifdef SIMULATE -/* this is only ifdef SIMULATE because nothing else needs it */ -/* recursively determine if a DAG has completed execution */ -static int DAGDone(RF_DagHeader_t *dag) +static int +NodeReady(RF_DagNode_t * node) { - int i; - - for (i = 0; i < dag->numSuccedents; i++) - if (!BranchDone(dag->succedents[i])) - return RF_FALSE; - return RF_TRUE; -} -#endif /* SIMULATE */ + int ready; + + switch (node->dagHdr->status) { + case rf_enable: + case rf_rollForward: + if ((node->status == rf_wait) && (node->numAntecedents == node->numAntDone)) + ready = RF_TRUE; + else + ready = RF_FALSE; + break; + case rf_rollBackward: + RF_ASSERT(node->numSuccDone <= node->numSuccedents); + RF_ASSERT(node->numSuccFired <= node->numSuccedents); + RF_ASSERT(node->numSuccFired <= node->numSuccDone); + if ((node->status == rf_good) && (node->numSuccDone == node->numSuccedents)) + ready = RF_TRUE; + else + ready = RF_FALSE; + break; + default: + printf("Execution engine found illegal DAG status in NodeReady\n"); + RF_PANIC(); + break; + } -static int NodeReady(RF_DagNode_t *node) -{ - int ready; - - switch (node->dagHdr->status) { - case rf_enable : - case rf_rollForward : - if ((node->status == rf_wait) && (node->numAntecedents == node->numAntDone)) - ready = RF_TRUE; - else - ready = RF_FALSE; - break; - case rf_rollBackward : - RF_ASSERT(node->numSuccDone <= node->numSuccedents); - RF_ASSERT(node->numSuccFired <= node->numSuccedents); - RF_ASSERT(node->numSuccFired <= node->numSuccDone); - if ((node->status == rf_good) && (node->numSuccDone == node->numSuccedents)) - ready = RF_TRUE; - else - ready = RF_FALSE; - break; - default : - printf("Execution engine found illegal DAG status in NodeReady\n"); - RF_PANIC(); - break; - } - - return(ready); + return (ready); } @@ -434,54 +245,51 @@ static int NodeReady(RF_DagNode_t *node) * This routine assumes that the node's status field has alread been set to * "fired" or "recover" to indicate the direction of execution. */ -static void FireNode(RF_DagNode_t *node) +static void +FireNode(RF_DagNode_t * node) { - int tid; - - switch (node->status) { - case rf_fired : - /* fire the do function of a node */ - if (rf_engineDebug) { - rf_get_threadid(tid); - printf("[%d] Firing node 0x%lx (%s)\n",tid,(unsigned long) node, node->name); - } -#ifdef KERNEL - if (node->flags & RF_DAGNODE_FLAG_YIELD) { + int tid; + + switch (node->status) { + case rf_fired: + /* fire the do function of a node */ + if (rf_engineDebug) { + rf_get_threadid(tid); + printf("[%d] Firing node 0x%lx (%s)\n", tid, (unsigned long) node, node->name); + } + if (node->flags & RF_DAGNODE_FLAG_YIELD) { #if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) - /* thread_block(); */ - /* printf("Need to block the thread here...\n"); */ - /* XXX thread_block is actually mentioned in - /usr/include/vm/vm_extern.h */ + /* thread_block(); */ + /* printf("Need to block the thread here...\n"); */ + /* XXX thread_block is actually mentioned in + * /usr/include/vm/vm_extern.h */ #else - thread_block(); + thread_block(); #endif - } -#endif /* KERNEL */ - (*(node->doFunc)) (node); - break; - case rf_recover : - /* fire the undo function of a node */ - if (rf_engineDebug || 1) { - rf_get_threadid(tid); - printf("[%d] Firing (undo) node 0x%lx (%s)\n",tid,(unsigned long) node, node->name); - } -#ifdef KERNEL - if (node->flags & RF_DAGNODE_FLAG_YIELD) + } + (*(node->doFunc)) (node); + break; + case rf_recover: + /* fire the undo function of a node */ + if (rf_engineDebug || 1) { + rf_get_threadid(tid); + printf("[%d] Firing (undo) node 0x%lx (%s)\n", tid, (unsigned long) node, node->name); + } + if (node->flags & RF_DAGNODE_FLAG_YIELD) #if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) - /* thread_block(); */ - /* printf("Need to block the thread here...\n"); */ - /* XXX thread_block is actually mentioned in - /usr/include/vm/vm_extern.h */ + /* thread_block(); */ + /* printf("Need to block the thread here...\n"); */ + /* XXX thread_block is actually mentioned in + * /usr/include/vm/vm_extern.h */ #else - thread_block(); + thread_block(); #endif -#endif /* KERNEL */ - (*(node->undoFunc)) (node); - break; - default : - RF_PANIC(); - break; - } + (*(node->undoFunc)) (node); + break; + default: + RF_PANIC(); + break; + } } @@ -490,90 +298,87 @@ static void FireNode(RF_DagNode_t *node) * Attempt to fire each node in a linear array. * The entire list is fired atomically. */ -static void FireNodeArray( - int numNodes, - RF_DagNode_t **nodeList) +static void +FireNodeArray( + int numNodes, + RF_DagNode_t ** nodeList) { - RF_DagStatus_t dstat; - RF_DagNode_t *node; - int i, j; - - /* first, mark all nodes which are ready to be fired */ - for (i = 0; i < numNodes; i++) { - node = nodeList[i]; - dstat = node->dagHdr->status; - RF_ASSERT((node->status == rf_wait) || (node->status == rf_good)); - if (NodeReady(node)) { - if ((dstat == rf_enable) || (dstat == rf_rollForward)) { - RF_ASSERT(node->status == rf_wait); - if (node->commitNode) - node->dagHdr->numCommits++; - node->status = rf_fired; - for (j = 0; j < node->numAntecedents; j++) - node->antecedents[j]->numSuccFired++; - } - else { - RF_ASSERT(dstat == rf_rollBackward); - RF_ASSERT(node->status == rf_good); - RF_ASSERT(node->commitNode == RF_FALSE); /* only one commit node per graph */ - node->status = rf_recover; - } - } - } - /* now, fire the nodes */ - for (i = 0; i < numNodes; i++) { - if ((nodeList[i]->status == rf_fired) || (nodeList[i]->status == rf_recover)) - FireNode(nodeList[i]); - } + RF_DagStatus_t dstat; + RF_DagNode_t *node; + int i, j; + + /* first, mark all nodes which are ready to be fired */ + for (i = 0; i < numNodes; i++) { + node = nodeList[i]; + dstat = node->dagHdr->status; + RF_ASSERT((node->status == rf_wait) || (node->status == rf_good)); + if (NodeReady(node)) { + if ((dstat == rf_enable) || (dstat == rf_rollForward)) { + RF_ASSERT(node->status == rf_wait); + if (node->commitNode) + node->dagHdr->numCommits++; + node->status = rf_fired; + for (j = 0; j < node->numAntecedents; j++) + node->antecedents[j]->numSuccFired++; + } else { + RF_ASSERT(dstat == rf_rollBackward); + RF_ASSERT(node->status == rf_good); + RF_ASSERT(node->commitNode == RF_FALSE); /* only one commit node + * per graph */ + node->status = rf_recover; + } + } + } + /* now, fire the nodes */ + for (i = 0; i < numNodes; i++) { + if ((nodeList[i]->status == rf_fired) || (nodeList[i]->status == rf_recover)) + FireNode(nodeList[i]); + } } -#ifndef SIMULATE /* user context: * Attempt to fire each node in a linked list. * The entire list is fired atomically. */ -static void FireNodeList(RF_DagNode_t *nodeList) +static void +FireNodeList(RF_DagNode_t * nodeList) { - RF_DagNode_t *node, *next; - RF_DagStatus_t dstat; - int j; - - if (nodeList) { - /* first, mark all nodes which are ready to be fired */ - for (node = nodeList; node; node = next) { - next = node->next; - dstat = node->dagHdr->status; - RF_ASSERT((node->status == rf_wait) || (node->status == rf_good)); - if (NodeReady(node)) { - if ((dstat == rf_enable) || (dstat == rf_rollForward)) { - RF_ASSERT(node->status == rf_wait); - if (node->commitNode) - node->dagHdr->numCommits++; - node->status = rf_fired; - for (j = 0; j < node->numAntecedents; j++) - node->antecedents[j]->numSuccFired++; - } - else { - RF_ASSERT(dstat == rf_rollBackward); - RF_ASSERT(node->status == rf_good); - RF_ASSERT(node->commitNode == RF_FALSE); /* only one commit node per graph */ - node->status = rf_recover; - } - } - } - /* now, fire the nodes */ - for (node = nodeList; node; node = next) { - next = node->next; - if ((node->status == rf_fired) || (node->status == rf_recover)) - FireNode(node); - } - } + RF_DagNode_t *node, *next; + RF_DagStatus_t dstat; + int j; + + if (nodeList) { + /* first, mark all nodes which are ready to be fired */ + for (node = nodeList; node; node = next) { + next = node->next; + dstat = node->dagHdr->status; + RF_ASSERT((node->status == rf_wait) || (node->status == rf_good)); + if (NodeReady(node)) { + if ((dstat == rf_enable) || (dstat == rf_rollForward)) { + RF_ASSERT(node->status == rf_wait); + if (node->commitNode) + node->dagHdr->numCommits++; + node->status = rf_fired; + for (j = 0; j < node->numAntecedents; j++) + node->antecedents[j]->numSuccFired++; + } else { + RF_ASSERT(dstat == rf_rollBackward); + RF_ASSERT(node->status == rf_good); + RF_ASSERT(node->commitNode == RF_FALSE); /* only one commit node + * per graph */ + node->status = rf_recover; + } + } + } + /* now, fire the nodes */ + for (node = nodeList; node; node = next) { + next = node->next; + if ((node->status == rf_fired) || (node->status == rf_recover)) + FireNode(node); + } + } } -#endif /* !SIMULATE */ - - - /* interrupt context: * for each succedent * propagate required results from node to succedent @@ -590,213 +395,200 @@ static void FireNodeList(RF_DagNode_t *nodeList) * must be locked. I used the node queue mutex and locked down the * entire function, but this is certainly overkill. */ -static void PropagateResults( - RF_DagNode_t *node, - int context) +static void +PropagateResults( + RF_DagNode_t * node, + int context) { - RF_DagNode_t *s, *a; - RF_Raid_t *raidPtr; - int tid, i, ks; -#ifdef SIMULATE - RF_PropHeader_t *p; /* prop list for succedent i */ -#else /* SIMULATE */ - RF_DagNode_t *finishlist = NULL; /* a list of NIL nodes to be finished */ - RF_DagNode_t *skiplist = NULL; /* list of nodes with failed truedata antecedents */ - RF_DagNode_t *firelist = NULL; /* a list of nodes to be fired */ - RF_DagNode_t *q = NULL, *qh = NULL, *next; - int j, skipNode; -#endif /* SIMULATE */ - - rf_get_threadid(tid); - - raidPtr = node->dagHdr->raidPtr; - - DO_LOCK(raidPtr); - - /* debug - validate fire counts */ - for (i = 0; i < node->numAntecedents; i++) { - a = *(node->antecedents + i); - RF_ASSERT(a->numSuccFired >= a->numSuccDone); - RF_ASSERT(a->numSuccFired <= a->numSuccedents); - a->numSuccDone++; - } - - switch (node->dagHdr->status) { - case rf_enable : - case rf_rollForward : -#ifdef SIMULATE - /* currently we never propagate results unless in simulation */ - for (i = 0; i < node->numSuccedents; i++) { - s = *(node->succedents + i); - RF_ASSERT(s->status == rf_wait); - (s->numAntDone)++; - if (node->propList == NULL) - /* null propList implies no results to be propagated */ - p = NULL; - else - /* p=head of prop list for succedent i */ - p = *(node->propList + i); - while (p != NULL) { - /* bind node results to succedent's parameters */ -#if 0 - *(s->params + p->paramNum) = *(node->results + p->resultNum); -#else - s->params[p->paramNum].p = node->results[p->resultNum]; -#endif - p = p->next; - } - } -#else /* SIMULATE */ - for (i = 0; i < node->numSuccedents; i++) { - s = *(node->succedents + i); - RF_ASSERT(s->status == rf_wait); - (s->numAntDone)++; - if (s->numAntDone == s->numAntecedents) { - /* look for NIL nodes */ - if (s->doFunc == rf_NullNodeFunc) { - /* don't fire NIL nodes, just process them */ - s->next = finishlist; - finishlist = s; - } - else { - /* look to see if the node is to be skipped */ - skipNode = RF_FALSE; - for (j = 0; j < s->numAntecedents; j++) - if ((s->antType[j] == rf_trueData) && (s->antecedents[j]->status == rf_bad)) - skipNode = RF_TRUE; - if (skipNode) { - /* this node has one or more failed true data dependencies, so skip it */ - s->next = skiplist; - skiplist = s; - } - else - /* add s to list of nodes (q) to execute */ - if (context != RF_INTR_CONTEXT) { - /* we only have to enqueue if we're at intr context */ - s->next = firelist; /* put node on a list to be fired after we unlock */ - firelist = s; - } else { /* enqueue the node for the dag exec thread to fire */ - RF_ASSERT(NodeReady(s)); - if (q) { - q->next = s; - q = s; - } - else { - qh = q = s; - qh->next = NULL; - } - } + RF_DagNode_t *s, *a; + RF_Raid_t *raidPtr; + int tid, i, ks; + RF_DagNode_t *finishlist = NULL; /* a list of NIL nodes to be + * finished */ + RF_DagNode_t *skiplist = NULL; /* list of nodes with failed truedata + * antecedents */ + RF_DagNode_t *firelist = NULL; /* a list of nodes to be fired */ + RF_DagNode_t *q = NULL, *qh = NULL, *next; + int j, skipNode; + + rf_get_threadid(tid); + + raidPtr = node->dagHdr->raidPtr; + + DO_LOCK(raidPtr); + + /* debug - validate fire counts */ + for (i = 0; i < node->numAntecedents; i++) { + a = *(node->antecedents + i); + RF_ASSERT(a->numSuccFired >= a->numSuccDone); + RF_ASSERT(a->numSuccFired <= a->numSuccedents); + a->numSuccDone++; } - } - } - - if (q) { - /* xfer our local list of nodes to the node queue */ - q->next = raidPtr->node_queue; raidPtr->node_queue = qh; - DO_SIGNAL(raidPtr); - } - DO_UNLOCK(raidPtr); - - for (; skiplist; skiplist = next) { - next = skiplist->next; - skiplist->status = rf_skipped; - for (i = 0; i < skiplist->numAntecedents; i++) { - skiplist->antecedents[i]->numSuccFired++; - } - if (skiplist->commitNode) { - skiplist->dagHdr->numCommits++; - } - rf_FinishNode(skiplist, context); - } - for (; finishlist; finishlist = next) { - /* NIL nodes: no need to fire them */ - next = finishlist->next; - finishlist->status = rf_good; - for (i = 0; i < finishlist->numAntecedents; i++) { - finishlist->antecedents[i]->numSuccFired++; - } - if (finishlist->commitNode) - finishlist->dagHdr->numCommits++; - /* - * Okay, here we're calling rf_FinishNode() on nodes that - * have the null function as their work proc. Such a node - * could be the terminal node in a DAG. If so, it will - * cause the DAG to complete, which will in turn free - * memory used by the DAG, which includes the node in - * question. Thus, we must avoid referencing the node - * at all after calling rf_FinishNode() on it. - */ - rf_FinishNode(finishlist, context); /* recursive call */ - } - /* fire all nodes in firelist */ - FireNodeList(firelist); -#endif /* SIMULATE */ - break; - - case rf_rollBackward : -#ifdef SIMULATE -#else /* SIMULATE */ - for (i = 0; i < node->numAntecedents; i++) { - a = *(node->antecedents + i); - RF_ASSERT(a->status == rf_good); - RF_ASSERT(a->numSuccDone <= a->numSuccedents); - RF_ASSERT(a->numSuccDone <= a->numSuccFired); - - if (a->numSuccDone == a->numSuccFired) { - if (a->undoFunc == rf_NullNodeFunc) { - /* don't fire NIL nodes, just process them */ - a->next = finishlist; - finishlist = a; - } else { - if (context != RF_INTR_CONTEXT) { - /* we only have to enqueue if we're at intr context */ - a->next = firelist; /* put node on a list to be fired after we unlock */ - firelist = a; - } else { /* enqueue the node for the dag exec thread to fire */ - RF_ASSERT(NodeReady(a)); - if (q) { - q->next = a; - q = a; - } - else { - qh = q = a; - qh->next = NULL; - } - } + + switch (node->dagHdr->status) { + case rf_enable: + case rf_rollForward: + for (i = 0; i < node->numSuccedents; i++) { + s = *(node->succedents + i); + RF_ASSERT(s->status == rf_wait); + (s->numAntDone)++; + if (s->numAntDone == s->numAntecedents) { + /* look for NIL nodes */ + if (s->doFunc == rf_NullNodeFunc) { + /* don't fire NIL nodes, just process + * them */ + s->next = finishlist; + finishlist = s; + } else { + /* look to see if the node is to be + * skipped */ + skipNode = RF_FALSE; + for (j = 0; j < s->numAntecedents; j++) + if ((s->antType[j] == rf_trueData) && (s->antecedents[j]->status == rf_bad)) + skipNode = RF_TRUE; + if (skipNode) { + /* this node has one or more + * failed true data + * dependencies, so skip it */ + s->next = skiplist; + skiplist = s; + } else + /* add s to list of nodes (q) + * to execute */ + if (context != RF_INTR_CONTEXT) { + /* we only have to + * enqueue if we're at + * intr context */ + s->next = firelist; /* put node on a list to + * be fired after we + * unlock */ + firelist = s; + } else { /* enqueue the node for + * the dag exec thread + * to fire */ + RF_ASSERT(NodeReady(s)); + if (q) { + q->next = s; + q = s; + } else { + qh = q = s; + qh->next = NULL; + } + } + } + } + } + + if (q) { + /* xfer our local list of nodes to the node queue */ + q->next = raidPtr->node_queue; + raidPtr->node_queue = qh; + DO_SIGNAL(raidPtr); + } + DO_UNLOCK(raidPtr); + + for (; skiplist; skiplist = next) { + next = skiplist->next; + skiplist->status = rf_skipped; + for (i = 0; i < skiplist->numAntecedents; i++) { + skiplist->antecedents[i]->numSuccFired++; + } + if (skiplist->commitNode) { + skiplist->dagHdr->numCommits++; + } + rf_FinishNode(skiplist, context); + } + for (; finishlist; finishlist = next) { + /* NIL nodes: no need to fire them */ + next = finishlist->next; + finishlist->status = rf_good; + for (i = 0; i < finishlist->numAntecedents; i++) { + finishlist->antecedents[i]->numSuccFired++; + } + if (finishlist->commitNode) + finishlist->dagHdr->numCommits++; + /* + * Okay, here we're calling rf_FinishNode() on nodes that + * have the null function as their work proc. Such a node + * could be the terminal node in a DAG. If so, it will + * cause the DAG to complete, which will in turn free + * memory used by the DAG, which includes the node in + * question. Thus, we must avoid referencing the node + * at all after calling rf_FinishNode() on it. + */ + rf_FinishNode(finishlist, context); /* recursive call */ + } + /* fire all nodes in firelist */ + FireNodeList(firelist); + break; + + case rf_rollBackward: + for (i = 0; i < node->numAntecedents; i++) { + a = *(node->antecedents + i); + RF_ASSERT(a->status == rf_good); + RF_ASSERT(a->numSuccDone <= a->numSuccedents); + RF_ASSERT(a->numSuccDone <= a->numSuccFired); + + if (a->numSuccDone == a->numSuccFired) { + if (a->undoFunc == rf_NullNodeFunc) { + /* don't fire NIL nodes, just process + * them */ + a->next = finishlist; + finishlist = a; + } else { + if (context != RF_INTR_CONTEXT) { + /* we only have to enqueue if + * we're at intr context */ + a->next = firelist; /* put node on a list to + * be fired after we + * unlock */ + firelist = a; + } else { /* enqueue the node for + * the dag exec thread + * to fire */ + RF_ASSERT(NodeReady(a)); + if (q) { + q->next = a; + q = a; + } else { + qh = q = a; + qh->next = NULL; + } + } + } + } + } + if (q) { + /* xfer our local list of nodes to the node queue */ + q->next = raidPtr->node_queue; + raidPtr->node_queue = qh; + DO_SIGNAL(raidPtr); + } + DO_UNLOCK(raidPtr); + for (; finishlist; finishlist = next) { /* NIL nodes: no need to + * fire them */ + next = finishlist->next; + finishlist->status = rf_good; + /* + * Okay, here we're calling rf_FinishNode() on nodes that + * have the null function as their work proc. Such a node + * could be the first node in a DAG. If so, it will + * cause the DAG to complete, which will in turn free + * memory used by the DAG, which includes the node in + * question. Thus, we must avoid referencing the node + * at all after calling rf_FinishNode() on it. + */ + rf_FinishNode(finishlist, context); /* recursive call */ + } + /* fire all nodes in firelist */ + FireNodeList(firelist); + + break; + default: + printf("Engine found illegal DAG status in PropagateResults()\n"); + RF_PANIC(); + break; } - } - - } - if (q) { - /* xfer our local list of nodes to the node queue */ - q->next = raidPtr->node_queue; raidPtr->node_queue = qh; - DO_SIGNAL(raidPtr); - } - DO_UNLOCK(raidPtr); - for (; finishlist; finishlist = next) { /* NIL nodes: no need to fire them */ - next = finishlist->next; - finishlist->status = rf_good; - /* - * Okay, here we're calling rf_FinishNode() on nodes that - * have the null function as their work proc. Such a node - * could be the first node in a DAG. If so, it will - * cause the DAG to complete, which will in turn free - * memory used by the DAG, which includes the node in - * question. Thus, we must avoid referencing the node - * at all after calling rf_FinishNode() on it. - */ - rf_FinishNode(finishlist, context); /* recursive call */ - } - /* fire all nodes in firelist */ - FireNodeList(firelist); -#endif /* SIMULATE */ - - break; - default : - printf("Engine found illegal DAG status in PropagateResults()\n"); - RF_PANIC(); - break; - } } @@ -804,102 +596,53 @@ static void PropagateResults( /* * Process a fired node which has completed */ -static void ProcessNode( - RF_DagNode_t *node, - int context) +static void +ProcessNode( + RF_DagNode_t * node, + int context) { - RF_Raid_t *raidPtr; - int tid; - - raidPtr = node->dagHdr->raidPtr; - - switch (node->status) { - case rf_good : - /* normal case, don't need to do anything */ - break; - case rf_bad : - if ((node->dagHdr->numCommits > 0) || (node->dagHdr->numCommitNodes == 0)) { - node->dagHdr->status = rf_rollForward; /* crossed commit barrier */ - if (rf_engineDebug || 1) { - rf_get_threadid(tid); - printf("[%d] node (%s) returned fail, rolling forward\n", tid, node->name); - } - } - else { - node->dagHdr->status = rf_rollBackward; /* never reached commit barrier */ - if (rf_engineDebug || 1) { - rf_get_threadid(tid); - printf("[%d] node (%s) returned fail, rolling backward\n", tid, node->name); - } - } - break; - case rf_undone : - /* normal rollBackward case, don't need to do anything */ - break; - case rf_panic : - /* an undo node failed!!! */ - printf("UNDO of a node failed!!!/n"); - break; - default : - printf("node finished execution with an illegal status!!!\n"); - RF_PANIC(); - break; - } - -#ifdef SIMULATE - /* simulator fires nodes here. - * user/kernel rely upon PropagateResults to do this. - * XXX seems like this code should be merged so that the same thing happens for - * both sim, user, and kernel. -wvcii - */ - switch (node->dagHdr->status) { - case rf_enable : - case rf_rollForward : - if (node->numSuccedents == 0) { - /* process terminal node */ - if (rf_engineDebug) if (!DAGDone(node->dagHdr)) { - rf_get_threadid(tid); - printf("[%d] ProcessNode: !!!done but dag still in flight\n",tid); - RF_PANIC(); - } - if (rf_engineDebug) printf("[%d] ProcessNode: !!!done will return true\n",tid); - /* Mark dag as done */ - (node->dagHdr)->done=RF_TRUE; - raidPtr->dags_in_flight--; - } - else { - PropagateResults(node, context); - FireNodeArray(node->numSuccedents, node->succedents); - } - break; - case rf_rollBackward : - if (node->numAntecedents == 0) { - /* reached head of dag, we're done */ - if (rf_engineDebug) if (!DAGDone(node->dagHdr)) { - rf_get_threadid(tid); - printf("[%d] ProcessNode: !!!done but dag still in flight\n",tid); - RF_PANIC(); - } - if (rf_engineDebug) printf("[%d] ProcessNode: !!!done will return true\n",tid); - /* Mark dag as done */ - (node->dagHdr)->done=RF_TRUE; - raidPtr->dags_in_flight--; - } - else { - PropagateResults(node, context); - FireNodeArray(node->numAntecedents, node->antecedents); - } - break; - default : - RF_PANIC(); - break; - } - - -#else /* SIMULATE */ - /* enqueue node's succedents (antecedents if rollBackward) for execution */ - PropagateResults(node, context); -#endif /* SIMULATE */ + RF_Raid_t *raidPtr; + int tid; + + raidPtr = node->dagHdr->raidPtr; + + switch (node->status) { + case rf_good: + /* normal case, don't need to do anything */ + break; + case rf_bad: + if ((node->dagHdr->numCommits > 0) || (node->dagHdr->numCommitNodes == 0)) { + node->dagHdr->status = rf_rollForward; /* crossed commit + * barrier */ + if (rf_engineDebug || 1) { + rf_get_threadid(tid); + printf("[%d] node (%s) returned fail, rolling forward\n", tid, node->name); + } + } else { + node->dagHdr->status = rf_rollBackward; /* never reached commit + * barrier */ + if (rf_engineDebug || 1) { + rf_get_threadid(tid); + printf("[%d] node (%s) returned fail, rolling backward\n", tid, node->name); + } + } + break; + case rf_undone: + /* normal rollBackward case, don't need to do anything */ + break; + case rf_panic: + /* an undo node failed!!! */ + printf("UNDO of a node failed!!!/n"); + break; + default: + printf("node finished execution with an illegal status!!!\n"); + RF_PANIC(); + break; + } + + /* enqueue node's succedents (antecedents if rollBackward) for + * execution */ + PropagateResults(node, context); } @@ -909,21 +652,17 @@ static void ProcessNode( * This routine is called by each node execution function to mark the node * as complete and fire off any successors that have been enabled. */ -int rf_FinishNode( - RF_DagNode_t *node, - int context) +int +rf_FinishNode( + RF_DagNode_t * node, + int context) { - /* as far as I can tell, retcode is not used -wvcii */ - int retcode = RF_FALSE; - node->dagHdr->numNodesCompleted++; - ProcessNode(node, context); - -#ifdef SIMULATE - if ((node->dagHdr)->done == RF_TRUE) - retcode = RF_TRUE; -#endif /* SIMULATE */ + /* as far as I can tell, retcode is not used -wvcii */ + int retcode = RF_FALSE; + node->dagHdr->numNodesCompleted++; + ProcessNode(node, context); - return(retcode); + return (retcode); } @@ -939,37 +678,36 @@ int rf_FinishNode( * All we do here is fire the direct successors of the header node. The * DAG execution thread does the rest of the dag processing. */ -int rf_DispatchDAG( - RF_DagHeader_t *dag, - void (*cbFunc)(void *), - void *cbArg) +int +rf_DispatchDAG( + RF_DagHeader_t * dag, + void (*cbFunc) (void *), + void *cbArg) { - RF_Raid_t *raidPtr; - int tid; - - raidPtr = dag->raidPtr; - if (dag->tracerec) { - RF_ETIMER_START(dag->tracerec->timer); - } - - if (rf_engineDebug || rf_validateDAGDebug) { - if (rf_ValidateDAG(dag)) - RF_PANIC(); - } - if (rf_engineDebug) { - rf_get_threadid(tid); - printf("[%d] Entering DispatchDAG\n",tid); - } - - raidPtr->dags_in_flight++; /* debug only: blow off proper locking */ - dag->cbFunc = cbFunc; - dag->cbArg = cbArg; - dag->numNodesCompleted = 0; - dag->status = rf_enable; - FireNodeArray(dag->numSuccedents, dag->succedents); - return(1); -} + RF_Raid_t *raidPtr; + int tid; + raidPtr = dag->raidPtr; + if (dag->tracerec) { + RF_ETIMER_START(dag->tracerec->timer); + } + if (rf_engineDebug || rf_validateDAGDebug) { + if (rf_ValidateDAG(dag)) + RF_PANIC(); + } + if (rf_engineDebug) { + rf_get_threadid(tid); + printf("[%d] Entering DispatchDAG\n", tid); + } + raidPtr->dags_in_flight++; /* debug only: blow off proper + * locking */ + dag->cbFunc = cbFunc; + dag->cbArg = cbArg; + dag->numNodesCompleted = 0; + dag->status = rf_enable; + FireNodeArray(dag->numSuccedents, dag->succedents); + return (1); +} /* dedicated kernel thread: * the thread that handles all DAG node firing. * To minimize locking and unlocking, we grab a copy of the entire node queue and then set the @@ -981,116 +719,108 @@ int rf_DispatchDAG( * characteristics from the aio_completion_thread. */ -#ifndef SIMULATE -static void DAGExecutionThread(RF_ThreadArg_t arg) +static void +DAGExecutionThread(RF_ThreadArg_t arg) { - RF_DagNode_t *nd, *local_nq, *term_nq, *fire_nq; - RF_Raid_t *raidPtr; - int ks, tid; - int s; -#if !defined(__NetBSD__) && !defined(__OpenBSD__) - RF_Thread_t thread; -#endif - - raidPtr = (RF_Raid_t *)arg; + RF_DagNode_t *nd, *local_nq, *term_nq, *fire_nq; + RF_Raid_t *raidPtr; + int ks, tid; + int s; - rf_assign_threadid(); - if (rf_engineDebug) { - rf_get_threadid(tid); - printf("[%d] Engine thread is running\n", tid); - } + raidPtr = (RF_Raid_t *) arg; -#ifdef KERNEL + rf_assign_threadid(); + if (rf_engineDebug) { + rf_get_threadid(tid); + printf("[%d] Engine thread is running\n", tid); + } #if !defined(__NetBSD__) && !defined(__OpenBSD__) - thread = current_thread(); - thread_swappable(thread, RF_FALSE); - thread->priority = thread->sched_pri = BASEPRI_SYSTEM; - s = spl0(); + thread = current_thread(); + thread_swappable(thread, RF_FALSE); + thread->priority = thread->sched_pri = BASEPRI_SYSTEM; + s = spl0(); #endif - /* XXX what to put here XXX */ - - s=splbio(); - -#endif /* KERNEL */ - - RF_THREADGROUP_RUNNING(&raidPtr->engine_tg); - - DO_LOCK(raidPtr); - while (!raidPtr->shutdown_engine) { - - while (raidPtr->node_queue != NULL) { - local_nq = raidPtr->node_queue; - fire_nq = NULL; - term_nq = NULL; - raidPtr->node_queue = NULL; - DO_UNLOCK(raidPtr); - - /* first, strip out the terminal nodes */ - while (local_nq) { - nd = local_nq; - local_nq = local_nq->next; - switch(nd->dagHdr->status) { - case rf_enable : - case rf_rollForward : - if (nd->numSuccedents == 0) { - /* end of the dag, add to callback list */ - nd->next = term_nq; - term_nq = nd; - } - else { - /* not the end, add to the fire queue */ - nd->next = fire_nq; - fire_nq = nd; - } - break; - case rf_rollBackward : - if (nd->numAntecedents == 0) { - /* end of the dag, add to the callback list */ - nd->next = term_nq; - term_nq = nd; - } - else { - /* not the end, add to the fire queue */ - nd->next = fire_nq; - fire_nq = nd; - } - break; - default : - RF_PANIC(); - break; + /* XXX what to put here XXX */ + + s = splbio(); + + RF_THREADGROUP_RUNNING(&raidPtr->engine_tg); + + DO_LOCK(raidPtr); + while (!raidPtr->shutdown_engine) { + + while (raidPtr->node_queue != NULL) { + local_nq = raidPtr->node_queue; + fire_nq = NULL; + term_nq = NULL; + raidPtr->node_queue = NULL; + DO_UNLOCK(raidPtr); + + /* first, strip out the terminal nodes */ + while (local_nq) { + nd = local_nq; + local_nq = local_nq->next; + switch (nd->dagHdr->status) { + case rf_enable: + case rf_rollForward: + if (nd->numSuccedents == 0) { + /* end of the dag, add to + * callback list */ + nd->next = term_nq; + term_nq = nd; + } else { + /* not the end, add to the + * fire queue */ + nd->next = fire_nq; + fire_nq = nd; + } + break; + case rf_rollBackward: + if (nd->numAntecedents == 0) { + /* end of the dag, add to the + * callback list */ + nd->next = term_nq; + term_nq = nd; + } else { + /* not the end, add to the + * fire queue */ + nd->next = fire_nq; + fire_nq = nd; + } + break; + default: + RF_PANIC(); + break; + } + } + + /* execute callback of dags which have reached the + * terminal node */ + while (term_nq) { + nd = term_nq; + term_nq = term_nq->next; + nd->next = NULL; + (nd->dagHdr->cbFunc) (nd->dagHdr->cbArg); + raidPtr->dags_in_flight--; /* debug only */ + } + + /* fire remaining nodes */ + FireNodeList(fire_nq); + + DO_LOCK(raidPtr); + } + while (!raidPtr->shutdown_engine && raidPtr->node_queue == NULL) + DO_WAIT(raidPtr); } - } - - /* execute callback of dags which have reached the terminal node */ - while (term_nq) { - nd = term_nq; - term_nq = term_nq->next; - nd->next = NULL; - (nd->dagHdr->cbFunc)(nd->dagHdr->cbArg); - raidPtr->dags_in_flight--; /* debug only */ - } - - /* fire remaining nodes */ - FireNodeList(fire_nq); - - DO_LOCK(raidPtr); - } - while (!raidPtr->shutdown_engine && raidPtr->node_queue == NULL) - DO_WAIT(raidPtr); - } - DO_UNLOCK(raidPtr); - - RF_THREADGROUP_DONE(&raidPtr->engine_tg); -#ifdef KERNEL + DO_UNLOCK(raidPtr); + + RF_THREADGROUP_DONE(&raidPtr->engine_tg); #if defined(__NetBSD__) || defined(__OpenBSD__) - splx(s); - kthread_exit(0); + splx(s); + kthread_exit(0); #else - splx(s); - thread_terminate(thread); - thread_halt_self(); + splx(s); + thread_terminate(thread); + thread_halt_self(); #endif -#endif /* KERNEL */ } - -#endif /* !SIMULATE */ diff --git a/sys/dev/raidframe/rf_engine.h b/sys/dev/raidframe/rf_engine.h index c3186aa791f..d6eeb744b1a 100644 --- a/sys/dev/raidframe/rf_engine.h +++ b/sys/dev/raidframe/rf_engine.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_engine.h,v 1.1 1999/01/11 14:29:19 niklas Exp $ */ -/* $NetBSD: rf_engine.h,v 1.1 1998/11/13 04:20:29 oster Exp $ */ +/* $OpenBSD: rf_engine.h,v 1.2 1999/02/16 00:02:43 niklas Exp $ */ +/* $NetBSD: rf_engine.h,v 1.3 1999/02/05 00:06:11 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -33,43 +33,16 @@ * * **********************************************************/ -/* : - * Log: rf_engine.h,v - * Revision 1.11 1996/06/14 14:16:22 jimz - * new decl of ConfigureEngine - * - * Revision 1.10 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.9 1996/05/30 12:59:18 jimz - * make etimer happier, more portable - * - * Revision 1.8 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.7 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.6 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.5 1995/12/01 18:12:17 root - * added copyright info - * - */ - #ifndef _RF__RF_ENGINE_H_ #define _RF__RF_ENGINE_H_ -int rf_ConfigureEngine(RF_ShutdownList_t **listp, - RF_Raid_t *raidPtr, RF_Config_t *cfgPtr); +int +rf_ConfigureEngine(RF_ShutdownList_t ** listp, + RF_Raid_t * raidPtr, RF_Config_t * cfgPtr); -int rf_FinishNode(RF_DagNode_t *node, int context); /* return finished node to engine */ +int rf_FinishNode(RF_DagNode_t * node, int context); /* return finished node + * to engine */ -int rf_DispatchDAG(RF_DagHeader_t *dag, void (*cbFunc)(void *), void *cbArg); /* execute dag */ +int rf_DispatchDAG(RF_DagHeader_t * dag, void (*cbFunc) (void *), void *cbArg); /* execute dag */ -#endif /* !_RF__RF_ENGINE_H_ */ +#endif /* !_RF__RF_ENGINE_H_ */ diff --git a/sys/dev/raidframe/rf_etimer.h b/sys/dev/raidframe/rf_etimer.h index 5d78b80eac2..2033b9b46c6 100644 --- a/sys/dev/raidframe/rf_etimer.h +++ b/sys/dev/raidframe/rf_etimer.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_etimer.h,v 1.1 1999/01/11 14:29:20 niklas Exp $ */ -/* $NetBSD: rf_etimer.h,v 1.1 1998/11/13 04:20:29 oster Exp $ */ +/* $OpenBSD: rf_etimer.h,v 1.2 1999/02/16 00:02:43 niklas Exp $ */ +/* $NetBSD: rf_etimer.h,v 1.3 1999/02/05 00:06:11 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -38,103 +38,6 @@ * 125 MHz 34 sec * * - * : - * Log: rf_etimer.h,v - * Revision 1.32 1996/08/13 18:11:09 jimz - * want MACH&&!__osf__, not just MACH for mach timing (MACH defined under OSF/1) - * - * Revision 1.31 1996/08/12 20:11:38 jimz - * use read_real_time() on AIX4+ - * - * Revision 1.30 1996/08/09 18:48:12 jimz - * for now, use gettimeofday() on MACH - * (should eventually use better clock stuff) - * - * Revision 1.29 1996/08/07 21:09:08 jimz - * add IRIX as a gettimeofday system - * - * Revision 1.28 1996/08/06 22:25:23 jimz - * add LINUX_I386 - * - * Revision 1.27 1996/07/30 04:45:53 jimz - * add ultrix stuff - * - * Revision 1.26 1996/07/28 20:31:39 jimz - * i386netbsd port - * true/false fixup - * - * Revision 1.25 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.24 1996/07/27 18:40:24 jimz - * cleanup sweep - * - * Revision 1.23 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.22 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.21 1996/07/17 21:00:58 jimz - * clean up timer interface, tracing - * - * Revision 1.20 1996/07/17 14:26:28 jimz - * rf_scc -> rf_rpcc - * - * Revision 1.19 1996/06/14 21:24:48 jimz - * move out ConfigureEtimer - * - * Revision 1.18 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.17 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.16 1996/05/30 12:59:18 jimz - * make etimer happier, more portable - * - * Revision 1.15 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.14 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.13 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.12 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.11 1995/12/01 18:10:40 root - * added copyright info - * - * Revision 1.10 1995/09/29 14:27:32 wvcii - * removed printfs from ConfigureEtimer() - * - * Revision 1.9 95/09/19 22:57:31 jimz - * added kernel version of ConfigureEtimer - * - * Revision 1.8 1995/09/14 13:03:04 amiri - * set default CPU speed to 125Mhz to avoid divide by zero problems. - * - * Revision 1.7 1995/09/11 19:04:36 wvcii - * timer autoconfigs using pdl routine to check cpu speed - * value may still be overridden via config debug var timerTicksPerSec - * */ @@ -143,76 +46,19 @@ #include "rf_options.h" -#ifdef _KERNEL -#define KERNEL -#endif - -#if defined(__NetBSD__) || defined(__OpenBSD__) - -#ifdef KERNEL extern unsigned int rpcc(void); #define rf_read_cycle_counter rpcc -#else /* KERNEL */ -#if !defined(__NetBSD__) && !defined(__OpenBSD__) -/* XXX does this function even exist anywhere??? GO */ -extern unsigned int rf_rpcc(); -#endif -#define rf_read_cycle_counter rf_rpcc -#endif /* KERNEL */ - -#define RF_DEF_TIMER_MAX_VAL 0xFFFFFFFF - -typedef struct RF_EtimerVal_s { - unsigned ccnt; /* cycle count */ -} RF_EtimerVal_t; - -struct RF_Etimer_s { - RF_EtimerVal_t st; - RF_EtimerVal_t et; - unsigned long ticks; /* elapsed time in ticks */ -}; - -extern long rf_timer_max_val; -extern long rf_timer_ticks_per_second; -extern unsigned long rf_timer_ticks_per_usec; - -#define RF_ETIMER_TICKS2US(_tcks_) ( (_tcks_) / rf_timer_ticks_per_usec ) -#define RF_ETIMER_START(_t_) { (_t_).st.ccnt = rf_read_cycle_counter(); } -#define RF_ETIMER_STOP(_t_) { (_t_).et.ccnt = rf_read_cycle_counter(); } -#define RF_ETIMER_EVAL(_t_) { \ - if ((_t_).st.ccnt < (_t_).et.ccnt) \ - (_t_).ticks = (_t_).et.ccnt - (_t_).st.ccnt; \ - else \ - (_t_).ticks = rf_timer_max_val - ((_t_).st.ccnt - (_t_).et.ccnt); \ -} - -#define RF_ETIMER_VAL_TICKS(_t_) ((_t_).ticks) -#define RF_ETIMER_VAL_US(_t_) (RF_ETIMER_TICKS2US((_t_).ticks)) -#define RF_ETIMER_VAL_MS(_t_) (RF_ETIMER_TICKS2US((_t_).ticks)/1000) - -#endif /* __NetBSD__ || __OpenBSD__ */ - - -#if defined(__alpha) && !defined(__NetBSD__) && !defined(__OpenBSD__) - -#ifdef KERNEL -extern unsigned int rpcc(); -#define rf_read_cycle_counter rpcc -#else /* KERNEL */ -extern unsigned int rf_rpcc(); -#define rf_read_cycle_counter rf_rpcc -#endif /* KERNEL */ #define RF_DEF_TIMER_MAX_VAL 0xFFFFFFFF typedef struct RF_EtimerVal_s { - unsigned ccnt; /* cycle count */ -} RF_EtimerVal_t; + unsigned ccnt; /* cycle count */ +} RF_EtimerVal_t; struct RF_Etimer_s { - RF_EtimerVal_t st; - RF_EtimerVal_t et; - unsigned long ticks; /* elapsed time in ticks */ + RF_EtimerVal_t st; + RF_EtimerVal_t et; + unsigned long ticks; /* elapsed time in ticks */ }; extern long rf_timer_max_val; @@ -233,121 +79,5 @@ extern unsigned long rf_timer_ticks_per_usec; #define RF_ETIMER_VAL_US(_t_) (RF_ETIMER_TICKS2US((_t_).ticks)) #define RF_ETIMER_VAL_MS(_t_) (RF_ETIMER_TICKS2US((_t_).ticks)/1000) -#endif /* __alpha */ - -#ifdef _IBMR2 - -extern void rf_rtclock(unsigned int *secs, unsigned int *nsecs); - -#define RF_MSEC_PER_SEC 1000 -#define RF_USEC_PER_SEC 1000000 -#define RF_NSEC_PER_SEC 1000000000 - -typedef struct RF_EtimerVal_s { - unsigned int secs; - unsigned int nsecs; -} RF_EtimerVal_t; - -struct RF_Etimer_s { - RF_EtimerVal_t start; - RF_EtimerVal_t end; - RF_EtimerVal_t elapsed; -}; - -#if RF_AIXVERS >= 4 - -#include <sys/time.h> - -#define RF_ETIMER_START(_t_) { \ - timebasestruct_t tb; \ - tb.flag = 1; \ - read_real_time(&tb, TIMEBASE_SZ); \ - (_t_).start.secs = tb.tb_high; \ - (_t_).start.nsecs = tb.tb_low; \ -} - -#define RF_ETIMER_STOP(_t_) { \ - timebasestruct_t tb; \ - tb.flag = 1; \ - read_real_time(&tb, TIMEBASE_SZ); \ - (_t_).end.secs = tb.tb_high; \ - (_t_).end.nsecs = tb.tb_low; \ -} - -#else /* RF_AIXVERS >= 4 */ - -#define RF_ETIMER_START(_t_) { \ - rf_rtclock(&((_t_).start.secs), &((_t_).start.nsecs)); \ -} - -#define RF_ETIMER_STOP(_t_) { \ - rf_rtclock(&((_t_).end.secs), &((_t_).end.nsecs)); \ -} - -#endif /* RF_AIXVERS >= 4 */ - -#define RF_ETIMER_EVAL(_t_) { \ - if ((_t_).end.nsecs >= (_t_).start.nsecs) { \ - (_t_).elapsed.nsecs = (_t_).end.nsecs - (_t_).start.nsecs; \ - (_t_).elapsed.secs = (_t_).end.secs - (_t_).start.nsecs; \ - } \ - else { \ - (_t_).elapsed.nsecs = RF_NSEC_PER_SEC + (_t_).end.nsecs; \ - (_t_).elapsed.nsecs -= (_t_).start.nsecs; \ - (_t_).elapsed.secs = (_t_).end.secs - (_t_).start.secs + 1; \ - } \ -} - -#define RF_ETIMER_VAL_US(_t_) (((_t_).elapsed.secs*RF_USEC_PER_SEC)+((_t_).elapsed.nsecs/1000)) -#define RF_ETIMER_VAL_MS(_t_) (((_t_).elapsed.secs*RF_MSEC_PER_SEC)+((_t_).elapsed.nsecs/1000000)) - -#endif /* _IBMR2 */ - -/* - * XXX investigate better timing for these - */ -#if defined(hpux) || defined(sun) || defined(NETBSD_I386) || defined(OPENBSD_I386) || defined(ultrix) || defined(LINUX_I386) || defined(IRIX) || (defined(MACH) && !defined(__osf__)) -#include <sys/time.h> - -#define RF_USEC_PER_SEC 1000000 - -struct RF_Etimer_s { - struct timeval start; - struct timeval end; - struct timeval elapsed; -}; -#if !defined(__NetBSD__) && !defined(__OpenBSD__) -#define RF_ETIMER_START(_t_) { \ - gettimeofday(&((_t_).start), NULL); \ -} - -#define RF_ETIMER_STOP(_t_) { \ - gettimeofday(&((_t_).end), NULL); \ -} - -#else -#define RF_ETIMER_START(_t_) { \ -} -/* XXX these just drop off the end of the world... */ -#define RF_ETIMER_STOP(_t_) { \ -} -#endif - -#define RF_ETIMER_EVAL(_t_) { \ - if ((_t_).end.tv_usec >= (_t_).start.tv_usec) { \ - (_t_).elapsed.tv_usec = (_t_).end.tv_usec - (_t_).start.tv_usec; \ - (_t_).elapsed.tv_sec = (_t_).end.tv_sec - (_t_).start.tv_usec; \ - } \ - else { \ - (_t_).elapsed.tv_usec = RF_USEC_PER_SEC + (_t_).end.tv_usec; \ - (_t_).elapsed.tv_usec -= (_t_).start.tv_usec; \ - (_t_).elapsed.tv_sec = (_t_).end.tv_sec - (_t_).start.tv_sec + 1; \ - } \ -} - -#define RF_ETIMER_VAL_US(_t_) (((_t_).elapsed.tv_sec*RF_USEC_PER_SEC)+(_t_).elapsed.tv_usec) -#define RF_ETIMER_VAL_MS(_t_) (((_t_).elapsed.tv_sec*RF_MSEC_PER_SEC)+((_t_).elapsed.tv_usec/1000)) - -#endif /* hpux || sun || NETBSD_I386 || OPENBSD_I386 || ultrix || LINUX_I386 || IRIX || (MACH && !__osf__) */ -#endif /* !_RF__RF_TIMER_H_ */ +#endif /* !_RF__RF_TIMER_H_ */ diff --git a/sys/dev/raidframe/rf_evenodd.c b/sys/dev/raidframe/rf_evenodd.c index 90d18653cda..7b40675d9df 100644 --- a/sys/dev/raidframe/rf_evenodd.c +++ b/sys/dev/raidframe/rf_evenodd.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_evenodd.c,v 1.1 1999/01/11 14:29:21 niklas Exp $ */ -/* $NetBSD: rf_evenodd.c,v 1.1 1998/11/13 04:20:29 oster Exp $ */ +/* $OpenBSD: rf_evenodd.c,v 1.2 1999/02/16 00:02:44 niklas Exp $ */ +/* $NetBSD: rf_evenodd.c,v 1.2 1999/02/05 00:06:11 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -63,85 +63,92 @@ #include "rf_engine.h" typedef struct RF_EvenOddConfigInfo_s { - RF_RowCol_t **stripeIdentifier; /* filled in at config time & used by IdentifyStripe */ -} RF_EvenOddConfigInfo_t; - -int rf_ConfigureEvenOdd(listp, raidPtr, cfgPtr) - RF_ShutdownList_t **listp; - RF_Raid_t *raidPtr; - RF_Config_t *cfgPtr; + RF_RowCol_t **stripeIdentifier; /* filled in at config time & used by + * IdentifyStripe */ +} RF_EvenOddConfigInfo_t; + +int +rf_ConfigureEvenOdd(listp, raidPtr, cfgPtr) + RF_ShutdownList_t **listp; + RF_Raid_t *raidPtr; + RF_Config_t *cfgPtr; { - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_EvenOddConfigInfo_t *info; - RF_RowCol_t i, j, startdisk; - - RF_MallocAndAdd(info, sizeof(RF_EvenOddConfigInfo_t), (RF_EvenOddConfigInfo_t *), raidPtr->cleanupList); - layoutPtr->layoutSpecificInfo = (void *) info; - - RF_ASSERT(raidPtr->numRow == 1); - - info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, raidPtr->numCol, raidPtr->cleanupList); - startdisk = 0; - for (i=0; i<raidPtr->numCol; i++) { - for (j=0; j<raidPtr->numCol; j++) { - info->stripeIdentifier[i][j] = (startdisk + j) % raidPtr->numCol; - } - if ((startdisk -= 2) < 0) startdisk += raidPtr->numCol; - } - - /* fill in the remaining layout parameters */ - layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = raidPtr->numCol-2; /* ORIG: layoutPtr->numDataCol = raidPtr->numCol-1; */ + RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; + RF_EvenOddConfigInfo_t *info; + RF_RowCol_t i, j, startdisk; + + RF_MallocAndAdd(info, sizeof(RF_EvenOddConfigInfo_t), (RF_EvenOddConfigInfo_t *), raidPtr->cleanupList); + layoutPtr->layoutSpecificInfo = (void *) info; + + RF_ASSERT(raidPtr->numRow == 1); + + info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, raidPtr->numCol, raidPtr->cleanupList); + startdisk = 0; + for (i = 0; i < raidPtr->numCol; i++) { + for (j = 0; j < raidPtr->numCol; j++) { + info->stripeIdentifier[i][j] = (startdisk + j) % raidPtr->numCol; + } + if ((startdisk -= 2) < 0) + startdisk += raidPtr->numCol; + } + + /* fill in the remaining layout parameters */ + layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; + layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; + layoutPtr->numDataCol = raidPtr->numCol - 2; /* ORIG: + * layoutPtr->numDataCol + * = raidPtr->numCol-1; */ #if RF_EO_MATRIX_DIM > 17 - if (raidPtr->numCol <= 17){ - printf("Number of stripe units in a parity stripe is smaller than 17. Please\n"); - printf("define the macro RF_EO_MATRIX_DIM in file rf_evenodd_dagfuncs.h to \n"); - printf("be 17 to increase performance. \n"); - return(EINVAL); - } + if (raidPtr->numCol <= 17) { + printf("Number of stripe units in a parity stripe is smaller than 17. Please\n"); + printf("define the macro RF_EO_MATRIX_DIM in file rf_evenodd_dagfuncs.h to \n"); + printf("be 17 to increase performance. \n"); + return (EINVAL); + } #elif RF_EO_MATRIX_DIM == 17 - if (raidPtr->numCol > 17) { - printf("Number of stripe units in a parity stripe is bigger than 17. Please\n"); - printf("define the macro RF_EO_MATRIX_DIM in file rf_evenodd_dagfuncs.h to \n"); - printf("be 257 for encoding and decoding functions to work. \n"); - return(EINVAL); - } + if (raidPtr->numCol > 17) { + printf("Number of stripe units in a parity stripe is bigger than 17. Please\n"); + printf("define the macro RF_EO_MATRIX_DIM in file rf_evenodd_dagfuncs.h to \n"); + printf("be 257 for encoding and decoding functions to work. \n"); + return (EINVAL); + } #endif - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numParityCol = 2; - layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk; - raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; + layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; + layoutPtr->numParityCol = 2; + layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk; + raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; + raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - return(0); + return (0); } -int rf_GetDefaultNumFloatingReconBuffersEvenOdd(RF_Raid_t *raidPtr) +int +rf_GetDefaultNumFloatingReconBuffersEvenOdd(RF_Raid_t * raidPtr) { - return(20); + return (20); } -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitEvenOdd(RF_Raid_t *raidPtr) +RF_HeadSepLimit_t +rf_GetDefaultHeadSepLimitEvenOdd(RF_Raid_t * raidPtr) { - return(10); + return (10); } -void rf_IdentifyStripeEvenOdd( - RF_Raid_t *raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t **diskids, - RF_RowCol_t *outRow) +void +rf_IdentifyStripeEvenOdd( + RF_Raid_t * raidPtr, + RF_RaidAddr_t addr, + RF_RowCol_t ** diskids, + RF_RowCol_t * outRow) { - RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr); - RF_EvenOddConfigInfo_t *info = (RF_EvenOddConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr); + RF_EvenOddConfigInfo_t *info = (RF_EvenOddConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - *outRow = 0; - *diskids = info->stripeIdentifier[ stripeID % raidPtr->numCol ]; + *outRow = 0; + *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol]; } - -/* The layout of stripe unit on the disks are: c0 c1 c2 c3 c4 +/* The layout of stripe unit on the disks are: c0 c1 c2 c3 c4 0 1 2 E P 5 E P 3 4 @@ -150,407 +157,402 @@ void rf_IdentifyStripeEvenOdd( E P 12 13 14 .... - We use the MapSectorRAID5 to map data information because the routine can be shown to map exactly + We use the MapSectorRAID5 to map data information because the routine can be shown to map exactly the layout of data stripe unit as shown above although we have 2 redundant information now. But for E and P, we use rf_MapEEvenOdd and rf_MapParityEvenOdd which are different method from raid-5. */ -void rf_MapParityEvenOdd( - RF_Raid_t *raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t *row, - RF_RowCol_t *col, - RF_SectorNum_t *diskSector, - int remap) +void +rf_MapParityEvenOdd( + RF_Raid_t * raidPtr, + RF_RaidAddr_t raidSector, + RF_RowCol_t * row, + RF_RowCol_t * col, + RF_SectorNum_t * diskSector, + int remap) { - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - RF_StripeNum_t endSUIDofthisStrip = (SUID/raidPtr->Layout.numDataCol + 1)*raidPtr->Layout.numDataCol - 1; + RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; + RF_StripeNum_t endSUIDofthisStrip = (SUID / raidPtr->Layout.numDataCol + 1) * raidPtr->Layout.numDataCol - 1; - *row = 0; - *col = ( endSUIDofthisStrip + 2)%raidPtr->numCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + *row = 0; + *col = (endSUIDofthisStrip + 2) % raidPtr->numCol; + *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); } -void rf_MapEEvenOdd( - RF_Raid_t *raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t *row, - RF_RowCol_t *col, - RF_SectorNum_t *diskSector, - int remap) +void +rf_MapEEvenOdd( + RF_Raid_t * raidPtr, + RF_RaidAddr_t raidSector, + RF_RowCol_t * row, + RF_RowCol_t * col, + RF_SectorNum_t * diskSector, + int remap) { - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - RF_StripeNum_t endSUIDofthisStrip = (SUID/raidPtr->Layout.numDataCol + 1)*raidPtr->Layout.numDataCol - 1; + RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; + RF_StripeNum_t endSUIDofthisStrip = (SUID / raidPtr->Layout.numDataCol + 1) * raidPtr->Layout.numDataCol - 1; - *row = 0; - *col = ( endSUIDofthisStrip + 1)%raidPtr->numCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + *row = 0; + *col = (endSUIDofthisStrip + 1) % raidPtr->numCol; + *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); } -void rf_EODagSelect( - RF_Raid_t *raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t *asmap, - RF_VoidFuncPtr *createFunc) +void +rf_EODagSelect( + RF_Raid_t * raidPtr, + RF_IoType_t type, + RF_AccessStripeMap_t * asmap, + RF_VoidFuncPtr * createFunc) { - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - unsigned ndfail = asmap->numDataFailed; - unsigned npfail = asmap->numParityFailed +asmap->numQFailed; - unsigned ntfail = npfail + ndfail; - - RF_ASSERT(RF_IO_IS_R_OR_W(type)); - if (ntfail > 2) - { - RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n"); - /* *infoFunc = */ *createFunc = NULL; - return; - } - - /* ok, we can do this I/O */ - if (type == RF_IO_TYPE_READ) - { - switch (ndfail) - { - case 0: - /* fault free read */ - *createFunc = (RF_VoidFuncPtr)rf_CreateFaultFreeReadDAG; /* same as raid 5 */ - break; - case 1: - /* lost a single data unit */ - /* two cases: - (1) parity is not lost. - do a normal raid 5 reconstruct read. - (2) parity is lost. - do a reconstruct read using "e". - */ - if (ntfail == 2) /* also lost redundancy */ - { - if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) - *createFunc = (RF_VoidFuncPtr)rf_EO_110_CreateReadDAG; - else - *createFunc = (RF_VoidFuncPtr)rf_EO_101_CreateReadDAG; - } - else - { - /* P and E are ok. But is there a failure - in some unaccessed data unit? - */ - if (rf_NumFailedDataUnitsInStripe(raidPtr,asmap)==2) - *createFunc = (RF_VoidFuncPtr)rf_EO_200_CreateReadDAG; - else - *createFunc = (RF_VoidFuncPtr)rf_EO_100_CreateReadDAG; - } - break; - case 2: - /* *createFunc = rf_EO_200_CreateReadDAG; */ - *createFunc = NULL; - break; - } - return; - } - - /* a write */ - switch (ntfail) - { - case 0: /* fault free */ - if (rf_suppressLocksAndLargeWrites || - (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) || - (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) { - - *createFunc = (RF_VoidFuncPtr)rf_EOCreateSmallWriteDAG; - } - else { - *createFunc = (RF_VoidFuncPtr)rf_EOCreateLargeWriteDAG; - } - break; - - case 1: /* single disk fault */ - if (npfail==1) - { - RF_ASSERT ((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q)); - if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) - { /* q died, treat like normal mode raid5 write.*/ - if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) - || (asmap->parityInfo->next!=NULL) || rf_NumFailedDataUnitsInStripe(raidPtr,asmap)) - *createFunc = (RF_VoidFuncPtr)rf_EO_001_CreateSmallWriteDAG; - else - *createFunc = (RF_VoidFuncPtr)rf_EO_001_CreateLargeWriteDAG; - } - else - { /* parity died, small write only updating Q */ - if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) - || (asmap->qInfo->next!=NULL) || rf_NumFailedDataUnitsInStripe(raidPtr,asmap)) - *createFunc = (RF_VoidFuncPtr)rf_EO_010_CreateSmallWriteDAG; - else - *createFunc = (RF_VoidFuncPtr)rf_EO_010_CreateLargeWriteDAG; - } + RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); + unsigned ndfail = asmap->numDataFailed; + unsigned npfail = asmap->numParityFailed + asmap->numQFailed; + unsigned ntfail = npfail + ndfail; + + RF_ASSERT(RF_IO_IS_R_OR_W(type)); + if (ntfail > 2) { + RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n"); + /* *infoFunc = */ *createFunc = NULL; + return; } - else - { /* data missing. - Do a P reconstruct write if only a single data unit - is lost in the stripe, otherwise a reconstruct - write which employnig both P and E units. */ - if (rf_NumFailedDataUnitsInStripe(raidPtr,asmap)==2) - { - if (asmap->numStripeUnitsAccessed == 1) - *createFunc = (RF_VoidFuncPtr)rf_EO_200_CreateWriteDAG; - else - *createFunc = NULL; /* No direct support for this case now, like that in Raid-5 */ - } - else - { - if (asmap->numStripeUnitsAccessed != 1 && asmap->failedPDAs[0]->numSector != layoutPtr->sectorsPerStripeUnit) - *createFunc = NULL; /* No direct support for this case now, like that in Raid-5 */ - else *createFunc = (RF_VoidFuncPtr)rf_EO_100_CreateWriteDAG; - } + /* ok, we can do this I/O */ + if (type == RF_IO_TYPE_READ) { + switch (ndfail) { + case 0: + /* fault free read */ + *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; /* same as raid 5 */ + break; + case 1: + /* lost a single data unit */ + /* two cases: (1) parity is not lost. do a normal raid + * 5 reconstruct read. (2) parity is lost. do a + * reconstruct read using "e". */ + if (ntfail == 2) { /* also lost redundancy */ + if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) + *createFunc = (RF_VoidFuncPtr) rf_EO_110_CreateReadDAG; + else + *createFunc = (RF_VoidFuncPtr) rf_EO_101_CreateReadDAG; + } else { + /* P and E are ok. But is there a failure in + * some unaccessed data unit? */ + if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2) + *createFunc = (RF_VoidFuncPtr) rf_EO_200_CreateReadDAG; + else + *createFunc = (RF_VoidFuncPtr) rf_EO_100_CreateReadDAG; + } + break; + case 2: + /* *createFunc = rf_EO_200_CreateReadDAG; */ + *createFunc = NULL; + break; + } + return; } - break; - - case 2: /* two disk faults */ - switch (npfail) - { - case 2: /* both p and q dead */ - *createFunc = (RF_VoidFuncPtr)rf_EO_011_CreateWriteDAG; - break; - case 1: /* either p or q and dead data */ - RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA); - RF_ASSERT ((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)); - if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q) - { - if(asmap->numStripeUnitsAccessed != 1 && asmap->failedPDAs[0]->numSector != layoutPtr->sectorsPerStripeUnit) - *createFunc = NULL; /* In both PQ and EvenOdd, no direct support for this case now, like that in Raid-5 */ - else - *createFunc = (RF_VoidFuncPtr)rf_EO_101_CreateWriteDAG; - } - else - { - if (asmap->numStripeUnitsAccessed != 1 && asmap->failedPDAs[0]->numSector != layoutPtr->sectorsPerStripeUnit) - *createFunc = NULL; /* No direct support for this case, like that in Raid-5 */ - else - *createFunc = (RF_VoidFuncPtr)rf_EO_110_CreateWriteDAG; - } - break; - case 0: /* double data loss */ - /* if(asmap->failedPDAs[0]->numSector + asmap->failedPDAs[1]->numSector == 2 * layoutPtr->sectorsPerStripeUnit ) - *createFunc = rf_EOCreateLargeWriteDAG; - else */ - *createFunc = NULL; /* currently, in Evenodd, No support for simultaneous access of both failed SUs */ - break; + /* a write */ + switch (ntfail) { + case 0: /* fault free */ + if (rf_suppressLocksAndLargeWrites || + (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) || + (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) { + + *createFunc = (RF_VoidFuncPtr) rf_EOCreateSmallWriteDAG; + } else { + *createFunc = (RF_VoidFuncPtr) rf_EOCreateLargeWriteDAG; + } + break; + + case 1: /* single disk fault */ + if (npfail == 1) { + RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q)); + if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) { /* q died, treat like + * normal mode raid5 + * write. */ + if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) + || (asmap->parityInfo->next != NULL) || rf_NumFailedDataUnitsInStripe(raidPtr, asmap)) + *createFunc = (RF_VoidFuncPtr) rf_EO_001_CreateSmallWriteDAG; + else + *createFunc = (RF_VoidFuncPtr) rf_EO_001_CreateLargeWriteDAG; + } else {/* parity died, small write only updating Q */ + if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) + || (asmap->qInfo->next != NULL) || rf_NumFailedDataUnitsInStripe(raidPtr, asmap)) + *createFunc = (RF_VoidFuncPtr) rf_EO_010_CreateSmallWriteDAG; + else + *createFunc = (RF_VoidFuncPtr) rf_EO_010_CreateLargeWriteDAG; + } + } else { /* data missing. Do a P reconstruct write if + * only a single data unit is lost in the + * stripe, otherwise a reconstruct write which + * employnig both P and E units. */ + if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2) { + if (asmap->numStripeUnitsAccessed == 1) + *createFunc = (RF_VoidFuncPtr) rf_EO_200_CreateWriteDAG; + else + *createFunc = NULL; /* No direct support for + * this case now, like + * that in Raid-5 */ + } else { + if (asmap->numStripeUnitsAccessed != 1 && asmap->failedPDAs[0]->numSector != layoutPtr->sectorsPerStripeUnit) + *createFunc = NULL; /* No direct support for + * this case now, like + * that in Raid-5 */ + else + *createFunc = (RF_VoidFuncPtr) rf_EO_100_CreateWriteDAG; + } + } + break; + + case 2: /* two disk faults */ + switch (npfail) { + case 2: /* both p and q dead */ + *createFunc = (RF_VoidFuncPtr) rf_EO_011_CreateWriteDAG; + break; + case 1: /* either p or q and dead data */ + RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA); + RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)); + if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q) { + if (asmap->numStripeUnitsAccessed != 1 && asmap->failedPDAs[0]->numSector != layoutPtr->sectorsPerStripeUnit) + *createFunc = NULL; /* In both PQ and + * EvenOdd, no direct + * support for this case + * now, like that in + * Raid-5 */ + else + *createFunc = (RF_VoidFuncPtr) rf_EO_101_CreateWriteDAG; + } else { + if (asmap->numStripeUnitsAccessed != 1 && asmap->failedPDAs[0]->numSector != layoutPtr->sectorsPerStripeUnit) + *createFunc = NULL; /* No direct support for + * this case, like that + * in Raid-5 */ + else + *createFunc = (RF_VoidFuncPtr) rf_EO_110_CreateWriteDAG; + } + break; + case 0: /* double data loss */ + /* if(asmap->failedPDAs[0]->numSector + + * asmap->failedPDAs[1]->numSector == 2 * + * layoutPtr->sectorsPerStripeUnit ) createFunc = + * rf_EOCreateLargeWriteDAG; else */ + *createFunc = NULL; /* currently, in Evenodd, No + * support for simultaneous + * access of both failed SUs */ + break; + } + break; + + default: /* more than 2 disk faults */ + *createFunc = NULL; + RF_PANIC(); } - break; - - default: /* more than 2 disk faults */ - *createFunc = NULL; - RF_PANIC(); - } - return; + return; } -int rf_VerifyParityEvenOdd(raidPtr, raidAddr, parityPDA, correct_it, flags) - RF_Raid_t *raidPtr; - RF_RaidAddr_t raidAddr; - RF_PhysDiskAddr_t *parityPDA; - int correct_it; - RF_RaidAccessFlags_t flags; +int +rf_VerifyParityEvenOdd(raidPtr, raidAddr, parityPDA, correct_it, flags) + RF_Raid_t *raidPtr; + RF_RaidAddr_t raidAddr; + RF_PhysDiskAddr_t *parityPDA; + int correct_it; + RF_RaidAccessFlags_t flags; { - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_RaidAddr_t startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr); - RF_SectorCount_t numsector = parityPDA->numSector; - int numbytes = rf_RaidAddressToByte(raidPtr, numsector); - int bytesPerStripe = numbytes * layoutPtr->numDataCol; - RF_DagHeader_t *rd_dag_h, *wr_dag_h; /* read, write dag */ - RF_DagNode_t *blockNode, *unblockNode, *wrBlock, *wrUnblock; - RF_AccessStripeMapHeader_t *asm_h; - RF_AccessStripeMap_t *asmap; - RF_AllocListElem_t *alloclist; - RF_PhysDiskAddr_t *pda; - char *pbuf, *buf, *end_p, *p; - char *redundantbuf2; - int redundantTwoErr = 0, redundantOneErr = 0; - int parity_cant_correct = RF_FALSE, red2_cant_correct = RF_FALSE, parity_corrected = RF_FALSE, red2_corrected = RF_FALSE; - int i, retcode; - RF_ReconUnitNum_t which_ru; - RF_StripeNum_t psID = rf_RaidAddressToParityStripeID(layoutPtr, raidAddr, &which_ru); - int stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; - RF_AccTraceEntry_t tracerec; - RF_MCPair_t *mcpair; - - retcode = RF_PARITY_OKAY; - - mcpair = rf_AllocMCPair(); - rf_MakeAllocList(alloclist); - RF_MallocAndAdd(buf, numbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol), (char *), alloclist); - RF_CallocAndAdd(pbuf, 1, numbytes, (char *), alloclist); /* use calloc to make sure buffer is zeroed */ - end_p = buf + bytesPerStripe; - RF_CallocAndAdd(redundantbuf2, 1, numbytes, (char *), alloclist); /* use calloc to make sure buffer is zeroed */ - - rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, numbytes, buf, rf_DiskReadFunc, rf_DiskReadUndoFunc, - "Rod", alloclist, flags, RF_IO_NORMAL_PRIORITY); - blockNode = rd_dag_h->succedents[0]; - unblockNode = blockNode->succedents[0]->succedents[0]; - - /* map the stripe and fill in the PDAs in the dag */ - asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, buf, RF_DONT_REMAP); - asmap = asm_h->stripeMap; - - for (pda=asmap->physInfo,i=0; i<layoutPtr->numDataCol; i++,pda=pda->next) { - RF_ASSERT(pda); - rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); - RF_ASSERT(pda->numSector != 0); - if (rf_TryToRedirectPDA(raidPtr, pda, 0)) goto out; /* no way to verify parity if disk is dead. return w/ good status */ - blockNode->succedents[i]->params[0].p = pda; - blockNode->succedents[i]->params[2].v = psID; - blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - - RF_ASSERT(!asmap->parityInfo->next); - rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->parityInfo, 0, 1); - RF_ASSERT(asmap->parityInfo->numSector != 0); - if (rf_TryToRedirectPDA(raidPtr, asmap->parityInfo, 1)) - goto out; - blockNode->succedents[ layoutPtr->numDataCol ]->params[0].p = asmap->parityInfo; - - RF_ASSERT(!asmap->qInfo->next); - rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->qInfo, 0, 1); - RF_ASSERT(asmap->qInfo->numSector != 0); - if (rf_TryToRedirectPDA(raidPtr, asmap->qInfo, 1)) goto out; - /* - * if disk is dead, b/c no reconstruction is implemented right now, - * the function "rf_TryToRedirectPDA" always return one, which cause - * go to out and return w/ good status - */ - blockNode->succedents[ layoutPtr->numDataCol +1 ]->params[0].p = asmap->qInfo; - - /* fire off the DAG */ - bzero((char *)&tracerec,sizeof(tracerec)); - rd_dag_h->tracerec = &tracerec; - - if (rf_verifyParityDebug) { - printf("Parity verify read dag:\n"); - rf_PrintDAGList(rd_dag_h); - } - - RF_LOCK_MUTEX(mcpair->mutex); - mcpair->flag = 0; - rf_DispatchDAG(rd_dag_h, (void (*)(void *))rf_MCPairWakeupFunc, - (void *) mcpair); - while (!mcpair->flag) RF_WAIT_COND(mcpair->cond, mcpair->mutex); - RF_UNLOCK_MUTEX(mcpair->mutex); - if (rd_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to verify parity: can't read the stripe\n"); - retcode = RF_PARITY_COULD_NOT_VERIFY; - goto out; - } - - for (p=buf, i=0; p<end_p; p+=numbytes, i++) { - rf_e_encToBuf(raidPtr, i, p, RF_EO_MATRIX_DIM - 2, redundantbuf2, numsector); - /* - * the corresponding columes in EvenOdd encoding Matrix for these p pointers which point - * to the databuffer in a full stripe are sequentially from 0 to layoutPtr->numDataCol-1 - */ - rf_bxor(p, pbuf, numbytes, NULL); - } - RF_ASSERT(i==layoutPtr->numDataCol); - - for (i=0; i<numbytes; i++) { - if (pbuf[i] != buf[bytesPerStripe+i]) { - if (!correct_it) { - RF_ERRORMSG3("Parity verify error: byte %d of parity is 0x%x should be 0x%x\n", - i,(u_char) buf[bytesPerStripe+i],(u_char) pbuf[i]); - } - } - redundantOneErr = 1; - break; - } - - for (i=0; i<numbytes; i++) { - if (redundantbuf2[i] != buf[bytesPerStripe+numbytes+i]) { - if (!correct_it) { - RF_ERRORMSG3("Parity verify error: byte %d of second redundant information is 0x%x should be 0x%x\n", - i,(u_char) buf[bytesPerStripe+numbytes+i],(u_char) redundantbuf2[i]); - } - redundantTwoErr = 1; - break; - } - } - if (redundantOneErr || redundantTwoErr ) - retcode = RF_PARITY_BAD; - - /* correct the first redundant disk, ie parity if it is error */ - if (redundantOneErr && correct_it) { - wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, pbuf, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - "Wnp", alloclist, flags, RF_IO_NORMAL_PRIORITY); - wrBlock = wr_dag_h->succedents[0]; wrUnblock = wrBlock->succedents[0]->succedents[0]; - wrBlock->succedents[0]->params[0].p = asmap->parityInfo; - wrBlock->succedents[0]->params[2].v = psID; - wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - bzero((char *)&tracerec,sizeof(tracerec)); - wr_dag_h->tracerec = &tracerec; - if (rf_verifyParityDebug) { - printf("Parity verify write dag:\n"); - rf_PrintDAGList(wr_dag_h); - } - RF_LOCK_MUTEX(mcpair->mutex); - mcpair->flag = 0; - rf_DispatchDAG(wr_dag_h, (void (*)(void *))rf_MCPairWakeupFunc, - (void *) mcpair); - while (!mcpair->flag) - RF_WAIT_COND(mcpair->cond, mcpair->mutex); - RF_UNLOCK_MUTEX(mcpair->mutex); - if (wr_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to correct parity in VerifyParity: can't write the stripe\n"); - parity_cant_correct = RF_TRUE; - } else { - parity_corrected = RF_TRUE; - } - rf_FreeDAG(wr_dag_h); - } - - if (redundantTwoErr && correct_it) { - wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, redundantbuf2, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - "Wnred2", alloclist, flags, RF_IO_NORMAL_PRIORITY); - wrBlock = wr_dag_h->succedents[0]; wrUnblock = wrBlock->succedents[0]->succedents[0]; - wrBlock->succedents[0]->params[0].p = asmap->qInfo; - wrBlock->succedents[0]->params[2].v = psID; - wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - bzero((char *)&tracerec,sizeof(tracerec)); - wr_dag_h->tracerec = &tracerec; - if (rf_verifyParityDebug) { - printf("Dag of write new second redundant information in parity verify :\n"); - rf_PrintDAGList(wr_dag_h); - } - RF_LOCK_MUTEX(mcpair->mutex); - mcpair->flag = 0; - rf_DispatchDAG(wr_dag_h, (void (*)(void *))rf_MCPairWakeupFunc, - (void *) mcpair); - while (!mcpair->flag) - RF_WAIT_COND(mcpair->cond, mcpair->mutex); - RF_UNLOCK_MUTEX(mcpair->mutex); - if (wr_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to correct second redundant information in VerifyParity: can't write the stripe\n"); - red2_cant_correct = RF_TRUE; - } else { - red2_corrected = RF_TRUE; - } - rf_FreeDAG(wr_dag_h); - } - if ( (redundantOneErr && parity_cant_correct) || - (redundantTwoErr && red2_cant_correct )) - retcode = RF_PARITY_COULD_NOT_CORRECT; - if ( (retcode = RF_PARITY_BAD) && parity_corrected && red2_corrected ) - retcode = RF_PARITY_CORRECTED; + RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); + RF_RaidAddr_t startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr); + RF_SectorCount_t numsector = parityPDA->numSector; + int numbytes = rf_RaidAddressToByte(raidPtr, numsector); + int bytesPerStripe = numbytes * layoutPtr->numDataCol; + RF_DagHeader_t *rd_dag_h, *wr_dag_h; /* read, write dag */ + RF_DagNode_t *blockNode, *unblockNode, *wrBlock, *wrUnblock; + RF_AccessStripeMapHeader_t *asm_h; + RF_AccessStripeMap_t *asmap; + RF_AllocListElem_t *alloclist; + RF_PhysDiskAddr_t *pda; + char *pbuf, *buf, *end_p, *p; + char *redundantbuf2; + int redundantTwoErr = 0, redundantOneErr = 0; + int parity_cant_correct = RF_FALSE, red2_cant_correct = RF_FALSE, + parity_corrected = RF_FALSE, red2_corrected = RF_FALSE; + int i, retcode; + RF_ReconUnitNum_t which_ru; + RF_StripeNum_t psID = rf_RaidAddressToParityStripeID(layoutPtr, raidAddr, &which_ru); + int stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; + RF_AccTraceEntry_t tracerec; + RF_MCPair_t *mcpair; + + retcode = RF_PARITY_OKAY; + + mcpair = rf_AllocMCPair(); + rf_MakeAllocList(alloclist); + RF_MallocAndAdd(buf, numbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol), (char *), alloclist); + RF_CallocAndAdd(pbuf, 1, numbytes, (char *), alloclist); /* use calloc to make + * sure buffer is zeroed */ + end_p = buf + bytesPerStripe; + RF_CallocAndAdd(redundantbuf2, 1, numbytes, (char *), alloclist); /* use calloc to make + * sure buffer is zeroed */ + + rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, numbytes, buf, rf_DiskReadFunc, rf_DiskReadUndoFunc, + "Rod", alloclist, flags, RF_IO_NORMAL_PRIORITY); + blockNode = rd_dag_h->succedents[0]; + unblockNode = blockNode->succedents[0]->succedents[0]; + + /* map the stripe and fill in the PDAs in the dag */ + asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, buf, RF_DONT_REMAP); + asmap = asm_h->stripeMap; + + for (pda = asmap->physInfo, i = 0; i < layoutPtr->numDataCol; i++, pda = pda->next) { + RF_ASSERT(pda); + rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); + RF_ASSERT(pda->numSector != 0); + if (rf_TryToRedirectPDA(raidPtr, pda, 0)) + goto out; /* no way to verify parity if disk is + * dead. return w/ good status */ + blockNode->succedents[i]->params[0].p = pda; + blockNode->succedents[i]->params[2].v = psID; + blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + } + + RF_ASSERT(!asmap->parityInfo->next); + rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->parityInfo, 0, 1); + RF_ASSERT(asmap->parityInfo->numSector != 0); + if (rf_TryToRedirectPDA(raidPtr, asmap->parityInfo, 1)) + goto out; + blockNode->succedents[layoutPtr->numDataCol]->params[0].p = asmap->parityInfo; + + RF_ASSERT(!asmap->qInfo->next); + rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->qInfo, 0, 1); + RF_ASSERT(asmap->qInfo->numSector != 0); + if (rf_TryToRedirectPDA(raidPtr, asmap->qInfo, 1)) + goto out; + /* if disk is dead, b/c no reconstruction is implemented right now, + * the function "rf_TryToRedirectPDA" always return one, which cause + * go to out and return w/ good status */ + blockNode->succedents[layoutPtr->numDataCol + 1]->params[0].p = asmap->qInfo; + + /* fire off the DAG */ + bzero((char *) &tracerec, sizeof(tracerec)); + rd_dag_h->tracerec = &tracerec; + + if (rf_verifyParityDebug) { + printf("Parity verify read dag:\n"); + rf_PrintDAGList(rd_dag_h); + } + RF_LOCK_MUTEX(mcpair->mutex); + mcpair->flag = 0; + rf_DispatchDAG(rd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, + (void *) mcpair); + while (!mcpair->flag) + RF_WAIT_COND(mcpair->cond, mcpair->mutex); + RF_UNLOCK_MUTEX(mcpair->mutex); + if (rd_dag_h->status != rf_enable) { + RF_ERRORMSG("Unable to verify parity: can't read the stripe\n"); + retcode = RF_PARITY_COULD_NOT_VERIFY; + goto out; + } + for (p = buf, i = 0; p < end_p; p += numbytes, i++) { + rf_e_encToBuf(raidPtr, i, p, RF_EO_MATRIX_DIM - 2, redundantbuf2, numsector); + /* the corresponding columes in EvenOdd encoding Matrix for + * these p pointers which point to the databuffer in a full + * stripe are sequentially from 0 to layoutPtr->numDataCol-1 */ + rf_bxor(p, pbuf, numbytes, NULL); + } + RF_ASSERT(i == layoutPtr->numDataCol); + + for (i = 0; i < numbytes; i++) { + if (pbuf[i] != buf[bytesPerStripe + i]) { + if (!correct_it) { + RF_ERRORMSG3("Parity verify error: byte %d of parity is 0x%x should be 0x%x\n", + i, (u_char) buf[bytesPerStripe + i], (u_char) pbuf[i]); + } + } + redundantOneErr = 1; + break; + } + + for (i = 0; i < numbytes; i++) { + if (redundantbuf2[i] != buf[bytesPerStripe + numbytes + i]) { + if (!correct_it) { + RF_ERRORMSG3("Parity verify error: byte %d of second redundant information is 0x%x should be 0x%x\n", + i, (u_char) buf[bytesPerStripe + numbytes + i], (u_char) redundantbuf2[i]); + } + redundantTwoErr = 1; + break; + } + } + if (redundantOneErr || redundantTwoErr) + retcode = RF_PARITY_BAD; + + /* correct the first redundant disk, ie parity if it is error */ + if (redundantOneErr && correct_it) { + wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, pbuf, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + "Wnp", alloclist, flags, RF_IO_NORMAL_PRIORITY); + wrBlock = wr_dag_h->succedents[0]; + wrUnblock = wrBlock->succedents[0]->succedents[0]; + wrBlock->succedents[0]->params[0].p = asmap->parityInfo; + wrBlock->succedents[0]->params[2].v = psID; + wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + bzero((char *) &tracerec, sizeof(tracerec)); + wr_dag_h->tracerec = &tracerec; + if (rf_verifyParityDebug) { + printf("Parity verify write dag:\n"); + rf_PrintDAGList(wr_dag_h); + } + RF_LOCK_MUTEX(mcpair->mutex); + mcpair->flag = 0; + rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, + (void *) mcpair); + while (!mcpair->flag) + RF_WAIT_COND(mcpair->cond, mcpair->mutex); + RF_UNLOCK_MUTEX(mcpair->mutex); + if (wr_dag_h->status != rf_enable) { + RF_ERRORMSG("Unable to correct parity in VerifyParity: can't write the stripe\n"); + parity_cant_correct = RF_TRUE; + } else { + parity_corrected = RF_TRUE; + } + rf_FreeDAG(wr_dag_h); + } + if (redundantTwoErr && correct_it) { + wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, redundantbuf2, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + "Wnred2", alloclist, flags, RF_IO_NORMAL_PRIORITY); + wrBlock = wr_dag_h->succedents[0]; + wrUnblock = wrBlock->succedents[0]->succedents[0]; + wrBlock->succedents[0]->params[0].p = asmap->qInfo; + wrBlock->succedents[0]->params[2].v = psID; + wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + bzero((char *) &tracerec, sizeof(tracerec)); + wr_dag_h->tracerec = &tracerec; + if (rf_verifyParityDebug) { + printf("Dag of write new second redundant information in parity verify :\n"); + rf_PrintDAGList(wr_dag_h); + } + RF_LOCK_MUTEX(mcpair->mutex); + mcpair->flag = 0; + rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, + (void *) mcpair); + while (!mcpair->flag) + RF_WAIT_COND(mcpair->cond, mcpair->mutex); + RF_UNLOCK_MUTEX(mcpair->mutex); + if (wr_dag_h->status != rf_enable) { + RF_ERRORMSG("Unable to correct second redundant information in VerifyParity: can't write the stripe\n"); + red2_cant_correct = RF_TRUE; + } else { + red2_corrected = RF_TRUE; + } + rf_FreeDAG(wr_dag_h); + } + if ((redundantOneErr && parity_cant_correct) || + (redundantTwoErr && red2_cant_correct)) + retcode = RF_PARITY_COULD_NOT_CORRECT; + if ((retcode = RF_PARITY_BAD) && parity_corrected && red2_corrected) + retcode = RF_PARITY_CORRECTED; out: - rf_FreeAccessStripeMap(asm_h); - rf_FreeAllocList(alloclist); - rf_FreeDAG(rd_dag_h); - rf_FreeMCPair(mcpair); - return(retcode); + rf_FreeAccessStripeMap(asm_h); + rf_FreeAllocList(alloclist); + rf_FreeDAG(rd_dag_h); + rf_FreeMCPair(mcpair); + return (retcode); } - -#endif /* RF_INCLUDE_EVENODD > 0 */ +#endif /* RF_INCLUDE_EVENODD > 0 */ diff --git a/sys/dev/raidframe/rf_evenodd.h b/sys/dev/raidframe/rf_evenodd.h index 24e5a811447..63b0d75c9b9 100644 --- a/sys/dev/raidframe/rf_evenodd.h +++ b/sys/dev/raidframe/rf_evenodd.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_evenodd.h,v 1.1 1999/01/11 14:29:21 niklas Exp $ */ -/* $NetBSD: rf_evenodd.h,v 1.1 1998/11/13 04:20:29 oster Exp $ */ +/* $OpenBSD: rf_evenodd.h,v 1.2 1999/02/16 00:02:44 niklas Exp $ */ +/* $NetBSD: rf_evenodd.h,v 1.2 1999/02/05 00:06:11 oster Exp $ */ /* * Copyright (c) 1995, 1996 Carnegie-Mellon University. * All rights reserved. @@ -31,19 +31,25 @@ #define _RF__RF_EVENODD_H_ /* extern declerations of the failure mode functions. */ -int rf_ConfigureEvenOdd(RF_ShutdownList_t **shutdownListp, RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr); -int rf_GetDefaultNumFloatingReconBuffersEvenOdd(RF_Raid_t *raidPtr); -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitEvenOdd(RF_Raid_t *raidPtr); -void rf_IdentifyStripeEvenOdd(RF_Raid_t *raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t **diskids, RF_RowCol_t *outrow); -void rf_MapParityEvenOdd(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap); -void rf_MapEEvenOdd(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap); -void rf_EODagSelect(RF_Raid_t *raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc); -int rf_VerifyParityEvenOdd(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t *parityPDA, int correct_it, RF_RaidAccessFlags_t flags); +int +rf_ConfigureEvenOdd(RF_ShutdownList_t ** shutdownListp, RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr); +int rf_GetDefaultNumFloatingReconBuffersEvenOdd(RF_Raid_t * raidPtr); +RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitEvenOdd(RF_Raid_t * raidPtr); +void +rf_IdentifyStripeEvenOdd(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, + RF_RowCol_t ** diskids, RF_RowCol_t * outrow); +void +rf_MapParityEvenOdd(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); +void +rf_MapEEvenOdd(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); +void +rf_EODagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, + RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); +int +rf_VerifyParityEvenOdd(RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr, + RF_PhysDiskAddr_t * parityPDA, int correct_it, RF_RaidAccessFlags_t flags); -#endif /* !_RF__RF_EVENODD_H_ */ +#endif /* !_RF__RF_EVENODD_H_ */ diff --git a/sys/dev/raidframe/rf_evenodd_dagfuncs.c b/sys/dev/raidframe/rf_evenodd_dagfuncs.c index 2762ac725af..3557c391841 100644 --- a/sys/dev/raidframe/rf_evenodd_dagfuncs.c +++ b/sys/dev/raidframe/rf_evenodd_dagfuncs.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_evenodd_dagfuncs.c,v 1.1 1999/01/11 14:29:21 niklas Exp $ */ -/* $NetBSD: rf_evenodd_dagfuncs.c,v 1.1 1998/11/13 04:20:29 oster Exp $ */ +/* $OpenBSD: rf_evenodd_dagfuncs.c,v 1.2 1999/02/16 00:02:44 niklas Exp $ */ +/* $NetBSD: rf_evenodd_dagfuncs.c,v 1.2 1999/02/05 00:06:11 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -50,23 +50,22 @@ #include "rf_evenodd_dagfuncs.h" /* These redundant functions are for small write */ -RF_RedFuncs_t rf_EOSmallWritePFuncs = { rf_RegularXorFunc, "Regular Old-New P", rf_SimpleXorFunc, "Simple Old-New P" }; -RF_RedFuncs_t rf_EOSmallWriteEFuncs = { rf_RegularONEFunc, "Regular Old-New E", rf_SimpleONEFunc, "Regular Old-New E" }; - +RF_RedFuncs_t rf_EOSmallWritePFuncs = {rf_RegularXorFunc, "Regular Old-New P", rf_SimpleXorFunc, "Simple Old-New P"}; +RF_RedFuncs_t rf_EOSmallWriteEFuncs = {rf_RegularONEFunc, "Regular Old-New E", rf_SimpleONEFunc, "Regular Old-New E"}; /* These redundant functions are for degraded read */ -RF_RedFuncs_t rf_eoPRecoveryFuncs = { rf_RecoveryXorFunc, "Recovery Xr", rf_RecoveryXorFunc, "Recovery Xr"}; -RF_RedFuncs_t rf_eoERecoveryFuncs = { rf_RecoveryEFunc, "Recovery E Func", rf_RecoveryEFunc, "Recovery E Func" }; - +RF_RedFuncs_t rf_eoPRecoveryFuncs = {rf_RecoveryXorFunc, "Recovery Xr", rf_RecoveryXorFunc, "Recovery Xr"}; +RF_RedFuncs_t rf_eoERecoveryFuncs = {rf_RecoveryEFunc, "Recovery E Func", rf_RecoveryEFunc, "Recovery E Func"}; /********************************************************************************************** - * the following encoding node functions is used in EO_000_CreateLargeWriteDAG + * the following encoding node functions is used in EO_000_CreateLargeWriteDAG **********************************************************************************************/ -int rf_RegularPEFunc(node) - RF_DagNode_t *node; +int +rf_RegularPEFunc(node) + RF_DagNode_t *node; { - rf_RegularESubroutine(node,node->results[1]); - rf_RegularXorFunc(node); /* does the wakeup here! */ + rf_RegularESubroutine(node, node->results[1]); + rf_RegularXorFunc(node);/* does the wakeup here! */ #if 1 - return(0); /* XXX This was missing... GO */ + return (0); /* XXX This was missing... GO */ #endif } @@ -80,808 +79,894 @@ int rf_RegularPEFunc(node) * writes( totally four). This simple old-new write and regular old-new write happen as in RAID-5 ************************************************************************************************/ -/* Algorithm: +/* Algorithm: 1. Store the difference of old data and new data in the Rod buffer. - 2. then encode this buffer into the buffer which already have old 'E' information inside it, + 2. then encode this buffer into the buffer which already have old 'E' information inside it, the result can be shown to be the new 'E' information. 3. xor the Wnd buffer into the difference buffer to recover the original old data. - Here we have another alternative: to allocate a temporary buffer for storing the difference of - old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach + Here we have another alternative: to allocate a temporary buffer for storing the difference of + old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach take the same speed as the previous, and need more memory. */ -int rf_RegularONEFunc(node) - RF_DagNode_t *node; +int +rf_RegularONEFunc(node) + RF_DagNode_t *node; { - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout; - int EpdaIndex = (node->numParams-1)/2 - 1; /* the parameter of node where you can find e-pda */ - int i, k, retcode = 0; - int suoffset, length; - RF_RowCol_t scol; - char *srcbuf, *destbuf; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - RF_PhysDiskAddr_t *pda, *EPDA = (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p; - int ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector); /* generally zero */ - - RF_ASSERT( EPDA->type == RF_PDA_TYPE_Q ); - RF_ASSERT(ESUOffset == 0); - - RF_ETIMER_START(timer); - - /* Xor the Wnd buffer into Rod buffer, the difference of old data and new data is stored in Rod buffer */ - for( k=0; k< EpdaIndex; k += 2) { - length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *)node->params[k].p)->numSector ); - retcode = rf_bxor( node->params[k+EpdaIndex+3].p, node->params[k+1].p, length, node->dagHdr->bp); - } - /* Start to encoding the buffer storing the difference of old data and new data into 'E' buffer */ - for (i=0; i<EpdaIndex; i+=2) if (node->params[i+1].p != node->results[0]) { /* results[0] is buf ptr of E */ - pda = (RF_PhysDiskAddr_t *) node->params[i].p; - srcbuf = (char *) node->params[i+1].p; - scol = rf_EUCol(layoutPtr, pda->raidAddress ); - suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); - destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr,suoffset); - rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); - } - /* Recover the original old data to be used by parity encoding function in XorNode */ - for( k=0; k< EpdaIndex; k += 2) { - length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *)node->params[k].p)->numSector ); - retcode = rf_bxor( node->params[k+EpdaIndex+3].p, node->params[k+1].p, length, node->dagHdr->bp); - } - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->q_us += RF_ETIMER_VAL_US(timer); - rf_GenericWakeupFunc(node, 0); + RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; + RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; + int EpdaIndex = (node->numParams - 1) / 2 - 1; /* the parameter of node + * where you can find + * e-pda */ + int i, k, retcode = 0; + int suoffset, length; + RF_RowCol_t scol; + char *srcbuf, *destbuf; + RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; + RF_Etimer_t timer; + RF_PhysDiskAddr_t *pda, *EPDA = (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p; + int ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector); /* generally zero */ + + RF_ASSERT(EPDA->type == RF_PDA_TYPE_Q); + RF_ASSERT(ESUOffset == 0); + + RF_ETIMER_START(timer); + + /* Xor the Wnd buffer into Rod buffer, the difference of old data and + * new data is stored in Rod buffer */ + for (k = 0; k < EpdaIndex; k += 2) { + length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector); + retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length, node->dagHdr->bp); + } + /* Start to encoding the buffer storing the difference of old data and + * new data into 'E' buffer */ + for (i = 0; i < EpdaIndex; i += 2) + if (node->params[i + 1].p != node->results[0]) { /* results[0] is buf ptr + * of E */ + pda = (RF_PhysDiskAddr_t *) node->params[i].p; + srcbuf = (char *) node->params[i + 1].p; + scol = rf_EUCol(layoutPtr, pda->raidAddress); + suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); + destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset); + rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); + } + /* Recover the original old data to be used by parity encoding + * function in XorNode */ + for (k = 0; k < EpdaIndex; k += 2) { + length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector); + retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length, node->dagHdr->bp); + } + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->q_us += RF_ETIMER_VAL_US(timer); + rf_GenericWakeupFunc(node, 0); #if 1 - return(0); /* XXX this was missing.. GO */ + return (0); /* XXX this was missing.. GO */ #endif } -int rf_SimpleONEFunc(node) - RF_DagNode_t *node; +int +rf_SimpleONEFunc(node) + RF_DagNode_t *node; { - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout; - RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; - int retcode = 0; - char *srcbuf, *destbuf; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - int length; - RF_RowCol_t scol; - RF_Etimer_t timer; - - RF_ASSERT( ((RF_PhysDiskAddr_t *)node->params[2].p)->type == RF_PDA_TYPE_Q ); - if (node->dagHdr->status == rf_enable) { - RF_ETIMER_START(timer); - length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *)node->params[4].p)->numSector );/* this is a pda of writeDataNodes */ - /* bxor to buffer of readDataNodes */ - retcode = rf_bxor( node->params[5].p, node->params[1].p, length, node->dagHdr->bp); - /* find out the corresponding colume in encoding matrix for write colume to be encoded into redundant disk 'E' */ - scol = rf_EUCol(layoutPtr, pda->raidAddress ); - srcbuf = node->params[1].p; - destbuf = node->params[3].p; - /* Start encoding process */ - rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); - rf_bxor( node->params[5].p, node->params[1].p, length, node->dagHdr->bp); - RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->q_us += RF_ETIMER_VAL_US(timer); - - } - return(rf_GenericWakeupFunc(node, retcode)); /* call wake func explicitly since no I/O in this node */ + RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; + RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; + RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; + int retcode = 0; + char *srcbuf, *destbuf; + RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; + int length; + RF_RowCol_t scol; + RF_Etimer_t timer; + + RF_ASSERT(((RF_PhysDiskAddr_t *) node->params[2].p)->type == RF_PDA_TYPE_Q); + if (node->dagHdr->status == rf_enable) { + RF_ETIMER_START(timer); + length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[4].p)->numSector); /* this is a pda of + * writeDataNodes */ + /* bxor to buffer of readDataNodes */ + retcode = rf_bxor(node->params[5].p, node->params[1].p, length, node->dagHdr->bp); + /* find out the corresponding colume in encoding matrix for + * write colume to be encoded into redundant disk 'E' */ + scol = rf_EUCol(layoutPtr, pda->raidAddress); + srcbuf = node->params[1].p; + destbuf = node->params[3].p; + /* Start encoding process */ + rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); + rf_bxor(node->params[5].p, node->params[1].p, length, node->dagHdr->bp); + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->q_us += RF_ETIMER_VAL_US(timer); + + } + return (rf_GenericWakeupFunc(node, retcode)); /* call wake func + * explicitly since no + * I/O in this node */ } /****** called by rf_RegularPEFunc(node) and rf_RegularEFunc(node) in f.f. large write ********/ -void rf_RegularESubroutine(node, ebuf) - RF_DagNode_t *node; - char *ebuf; +void +rf_RegularESubroutine(node, ebuf) + RF_DagNode_t *node; + char *ebuf; { - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout; - RF_PhysDiskAddr_t *pda; - int i, suoffset; - RF_RowCol_t scol; - char *srcbuf, *destbuf; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - - RF_ETIMER_START(timer); - for (i=0; i<node->numParams-2; i+=2) { - RF_ASSERT( node->params[i+1].p != ebuf ); - pda = (RF_PhysDiskAddr_t *) node->params[i].p; - suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); - scol = rf_EUCol(layoutPtr, pda->raidAddress ); - srcbuf = (char *) node->params[i+1].p; - destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset ); - rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); - } - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->xor_us += RF_ETIMER_VAL_US(timer); + RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; + RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; + RF_PhysDiskAddr_t *pda; + int i, suoffset; + RF_RowCol_t scol; + char *srcbuf, *destbuf; + RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; + RF_Etimer_t timer; + + RF_ETIMER_START(timer); + for (i = 0; i < node->numParams - 2; i += 2) { + RF_ASSERT(node->params[i + 1].p != ebuf); + pda = (RF_PhysDiskAddr_t *) node->params[i].p; + suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); + scol = rf_EUCol(layoutPtr, pda->raidAddress); + srcbuf = (char *) node->params[i + 1].p; + destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset); + rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); + } + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->xor_us += RF_ETIMER_VAL_US(timer); } /******************************************************************************************* - * Used in EO_001_CreateLargeWriteDAG + * Used in EO_001_CreateLargeWriteDAG ******************************************************************************************/ -int rf_RegularEFunc(node) - RF_DagNode_t *node; +int +rf_RegularEFunc(node) + RF_DagNode_t *node; { - rf_RegularESubroutine(node, node->results[0]); - rf_GenericWakeupFunc(node, 0); + rf_RegularESubroutine(node, node->results[0]); + rf_GenericWakeupFunc(node, 0); #if 1 - return(0); /* XXX this was missing?.. GO */ + return (0); /* XXX this was missing?.. GO */ #endif } - /******************************************************************************************* - * This degraded function allow only two case: - * 1. when write access the full failed stripe unit, then the access can be more than + * This degraded function allow only two case: + * 1. when write access the full failed stripe unit, then the access can be more than * one tripe units. - * 2. when write access only part of the failed SU, we assume accesses of more than - * one stripe unit is not allowed so that the write can be dealt with like a - * large write. - * The following function is based on these assumptions. So except in the second case, + * 2. when write access only part of the failed SU, we assume accesses of more than + * one stripe unit is not allowed so that the write can be dealt with like a + * large write. + * The following function is based on these assumptions. So except in the second case, * it looks the same as a large write encodeing function. But this is not exactly the - * normal way for doing a degraded write, since raidframe have to break cases of access - * other than the above two into smaller accesses. We may have to change - * DegrESubroutin in the future. + * normal way for doing a degraded write, since raidframe have to break cases of access + * other than the above two into smaller accesses. We may have to change + * DegrESubroutin in the future. *******************************************************************************************/ -void rf_DegrESubroutine(node, ebuf) - RF_DagNode_t *node; - char *ebuf; +void +rf_DegrESubroutine(node, ebuf) + RF_DagNode_t *node; + char *ebuf; { - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout; - RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams-2].p; - RF_PhysDiskAddr_t *pda; - int i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); - RF_RowCol_t scol; - char *srcbuf, *destbuf; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - - RF_ETIMER_START(timer); - for (i=0; i<node->numParams-2; i+=2) { - RF_ASSERT( node->params[i+1].p != ebuf ); - pda = (RF_PhysDiskAddr_t *) node->params[i].p; - suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); - scol = rf_EUCol(layoutPtr, pda->raidAddress ); - srcbuf = (char *) node->params[i+1].p; - destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset-failedSUOffset); - rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); - } - - RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->q_us += RF_ETIMER_VAL_US(timer); + RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; + RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; + RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; + RF_PhysDiskAddr_t *pda; + int i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); + RF_RowCol_t scol; + char *srcbuf, *destbuf; + RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; + RF_Etimer_t timer; + + RF_ETIMER_START(timer); + for (i = 0; i < node->numParams - 2; i += 2) { + RF_ASSERT(node->params[i + 1].p != ebuf); + pda = (RF_PhysDiskAddr_t *) node->params[i].p; + suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); + scol = rf_EUCol(layoutPtr, pda->raidAddress); + srcbuf = (char *) node->params[i + 1].p; + destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); + rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); + } + + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->q_us += RF_ETIMER_VAL_US(timer); } /************************************************************************************** - * This function is used in case where one data disk failed and both redundant disks + * This function is used in case where one data disk failed and both redundant disks * alive. It is used in the EO_100_CreateWriteDAG. Note: if there is another disk * failed in the stripe but not accessed at this time, then we should, instead, use * the rf_EOWriteDoubleRecoveryFunc(). **************************************************************************************/ -int rf_Degraded_100_EOFunc(node) - RF_DagNode_t *node; +int +rf_Degraded_100_EOFunc(node) + RF_DagNode_t *node; { - rf_DegrESubroutine(node, node->results[1]); - rf_RecoveryXorFunc(node); /* does the wakeup here! */ + rf_DegrESubroutine(node, node->results[1]); + rf_RecoveryXorFunc(node); /* does the wakeup here! */ #if 1 - return(0); /* XXX this was missing... SHould these be void functions??? GO */ + return (0); /* XXX this was missing... SHould these be + * void functions??? GO */ #endif } - /************************************************************************************** * This function is to encode one sector in one of the data disks to the E disk. - * However, in evenodd this function can also be used as decoding function to recover + * However, in evenodd this function can also be used as decoding function to recover * data from dead disk in the case of parity failure and a single data failure. **************************************************************************************/ -void rf_e_EncOneSect( - RF_RowCol_t srcLogicCol, - char *srcSecbuf, - RF_RowCol_t destLogicCol, - char *destSecbuf, - int bytesPerSector) +void +rf_e_EncOneSect( + RF_RowCol_t srcLogicCol, + char *srcSecbuf, + RF_RowCol_t destLogicCol, + char *destSecbuf, + int bytesPerSector) { - int S_index; /* index of the EU in the src col which need be Xored into all EUs in a dest sector */ - int numRowInEncMatix = (RF_EO_MATRIX_DIM) -1; - RF_RowCol_t j, indexInDest, /* row index of an encoding unit in the destination colume of encoding matrix */ - indexInSrc; /* row index of an encoding unit in the source colume used for recovery */ - int bytesPerEU = bytesPerSector/numRowInEncMatix; + int S_index; /* index of the EU in the src col which need + * be Xored into all EUs in a dest sector */ + int numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1; + RF_RowCol_t j, indexInDest, /* row index of an encoding unit in + * the destination colume of encoding + * matrix */ + indexInSrc; /* row index of an encoding unit in the source + * colume used for recovery */ + int bytesPerEU = bytesPerSector / numRowInEncMatix; #if RF_EO_MATRIX_DIM > 17 - int shortsPerEU = bytesPerEU/sizeof(short); - short *destShortBuf, *srcShortBuf1, *srcShortBuf2; - register short temp1; + int shortsPerEU = bytesPerEU / sizeof(short); + short *destShortBuf, *srcShortBuf1, *srcShortBuf2; + register short temp1; #elif RF_EO_MATRIX_DIM == 17 - int longsPerEU = bytesPerEU/sizeof(long); - long *destLongBuf, *srcLongBuf1, *srcLongBuf2; - register long temp1; + int longsPerEU = bytesPerEU / sizeof(long); + long *destLongBuf, *srcLongBuf1, *srcLongBuf2; + register long temp1; #endif #if RF_EO_MATRIX_DIM > 17 - RF_ASSERT( sizeof(short) == 2 || sizeof(short) == 1 ); - RF_ASSERT( bytesPerEU % sizeof(short) == 0 ); + RF_ASSERT(sizeof(short) == 2 || sizeof(short) == 1); + RF_ASSERT(bytesPerEU % sizeof(short) == 0); #elif RF_EO_MATRIX_DIM == 17 - RF_ASSERT( sizeof(long) == 8 || sizeof(long) == 4 ); - RF_ASSERT( bytesPerEU % sizeof(long) == 0); + RF_ASSERT(sizeof(long) == 8 || sizeof(long) == 4); + RF_ASSERT(bytesPerEU % sizeof(long) == 0); #endif - S_index = rf_EO_Mod( ( RF_EO_MATRIX_DIM -1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM); + S_index = rf_EO_Mod((RF_EO_MATRIX_DIM - 1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM); #if RF_EO_MATRIX_DIM > 17 - srcShortBuf1 = (short *)(srcSecbuf + S_index * bytesPerEU); + srcShortBuf1 = (short *) (srcSecbuf + S_index * bytesPerEU); #elif RF_EO_MATRIX_DIM == 17 - srcLongBuf1 = (long *)(srcSecbuf + S_index * bytesPerEU); + srcLongBuf1 = (long *) (srcSecbuf + S_index * bytesPerEU); #endif - for( indexInDest = 0; indexInDest < numRowInEncMatix ; indexInDest++){ - indexInSrc = rf_EO_Mod( (indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM ); + for (indexInDest = 0; indexInDest < numRowInEncMatix; indexInDest++) { + indexInSrc = rf_EO_Mod((indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM); #if RF_EO_MATRIX_DIM > 17 - destShortBuf = (short *)(destSecbuf + indexInDest * bytesPerEU); - srcShortBuf2 = (short *)(srcSecbuf + indexInSrc * bytesPerEU); - for(j=0; j < shortsPerEU; j++) { - temp1 = destShortBuf[j]^srcShortBuf1[j]; - /* note: S_index won't be at the end row for any src col! */ - if(indexInSrc != RF_EO_MATRIX_DIM -1) destShortBuf[j] = (srcShortBuf2[j])^temp1; - /* if indexInSrc is at the end row, ie. RF_EO_MATRIX_DIM -1, then all elements are zero! */ - else destShortBuf[j] = temp1; - } + destShortBuf = (short *) (destSecbuf + indexInDest * bytesPerEU); + srcShortBuf2 = (short *) (srcSecbuf + indexInSrc * bytesPerEU); + for (j = 0; j < shortsPerEU; j++) { + temp1 = destShortBuf[j] ^ srcShortBuf1[j]; + /* note: S_index won't be at the end row for any src + * col! */ + if (indexInSrc != RF_EO_MATRIX_DIM - 1) + destShortBuf[j] = (srcShortBuf2[j]) ^ temp1; + /* if indexInSrc is at the end row, ie. + * RF_EO_MATRIX_DIM -1, then all elements are zero! */ + else + destShortBuf[j] = temp1; + } #elif RF_EO_MATRIX_DIM == 17 - destLongBuf = (long *)(destSecbuf + indexInDest * bytesPerEU); - srcLongBuf2 = (long *)(srcSecbuf + indexInSrc * bytesPerEU); - for(j=0; j < longsPerEU; j++) { - temp1 = destLongBuf[j]^srcLongBuf1[j]; - if(indexInSrc != RF_EO_MATRIX_DIM -1) destLongBuf[j] = (srcLongBuf2[j])^temp1; - else destLongBuf[j] = temp1; - } + destLongBuf = (long *) (destSecbuf + indexInDest * bytesPerEU); + srcLongBuf2 = (long *) (srcSecbuf + indexInSrc * bytesPerEU); + for (j = 0; j < longsPerEU; j++) { + temp1 = destLongBuf[j] ^ srcLongBuf1[j]; + if (indexInSrc != RF_EO_MATRIX_DIM - 1) + destLongBuf[j] = (srcLongBuf2[j]) ^ temp1; + else + destLongBuf[j] = temp1; + } #endif - } + } } -void rf_e_encToBuf( - RF_Raid_t *raidPtr, - RF_RowCol_t srcLogicCol, - char *srcbuf, - RF_RowCol_t destLogicCol, - char *destbuf, - int numSector) +void +rf_e_encToBuf( + RF_Raid_t * raidPtr, + RF_RowCol_t srcLogicCol, + char *srcbuf, + RF_RowCol_t destLogicCol, + char *destbuf, + int numSector) { - int i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); - - for (i=0; i < numSector; i++) - { - rf_e_EncOneSect( srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector); - srcbuf += bytesPerSector; - destbuf += bytesPerSector; - } -} + int i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); -/************************************************************************************** - * when parity die and one data die, We use second redundant information, 'E', - * to recover the data in dead disk. This function is used in the recovery node of - * for EO_110_CreateReadDAG + for (i = 0; i < numSector; i++) { + rf_e_EncOneSect(srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector); + srcbuf += bytesPerSector; + destbuf += bytesPerSector; + } +} +/************************************************************************************** + * when parity die and one data die, We use second redundant information, 'E', + * to recover the data in dead disk. This function is used in the recovery node of + * for EO_110_CreateReadDAG **************************************************************************************/ -int rf_RecoveryEFunc(node) - RF_DagNode_t *node; +int +rf_RecoveryEFunc(node) + RF_DagNode_t *node; { - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout; - RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams-2].p; - RF_RowCol_t scol, /*source logical column*/ - fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress ); /* logical column of failed SU */ - int i; - RF_PhysDiskAddr_t *pda; - int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr,failedPDA->startSector); - char *srcbuf, *destbuf; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - - bzero( (char *)node->results[0], rf_RaidAddressToByte(raidPtr,failedPDA->numSector)); - if (node->dagHdr->status == rf_enable) { - RF_ETIMER_START(timer); - for (i=0; i<node->numParams-2; i+=2) if (node->params[i+1].p != node->results[0]) { - pda = (RF_PhysDiskAddr_t *) node->params[i].p; - if( i == node->numParams - 4 ) scol = RF_EO_MATRIX_DIM - 2; /* the colume of redundant E */ - else scol = rf_EUCol(layoutPtr, pda->raidAddress ); - srcbuf = (char *) node->params[i+1].p; - suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); - destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr,suoffset-failedSUOffset); - rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector); - } - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->xor_us += RF_ETIMER_VAL_US(timer); - } - return (rf_GenericWakeupFunc(node, 0)); /* node execute successfully */ + RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; + RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; + RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; + RF_RowCol_t scol, /* source logical column */ + fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress); /* logical column of + * failed SU */ + int i; + RF_PhysDiskAddr_t *pda; + int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); + char *srcbuf, *destbuf; + RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; + RF_Etimer_t timer; + + bzero((char *) node->results[0], rf_RaidAddressToByte(raidPtr, failedPDA->numSector)); + if (node->dagHdr->status == rf_enable) { + RF_ETIMER_START(timer); + for (i = 0; i < node->numParams - 2; i += 2) + if (node->params[i + 1].p != node->results[0]) { + pda = (RF_PhysDiskAddr_t *) node->params[i].p; + if (i == node->numParams - 4) + scol = RF_EO_MATRIX_DIM - 2; /* the colume of + * redundant E */ + else + scol = rf_EUCol(layoutPtr, pda->raidAddress); + srcbuf = (char *) node->params[i + 1].p; + suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); + destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); + rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector); + } + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->xor_us += RF_ETIMER_VAL_US(timer); + } + return (rf_GenericWakeupFunc(node, 0)); /* node execute successfully */ } - /************************************************************************************** * This function is used in the case where one data and the parity have filed. * (in EO_110_CreateWriteDAG ) **************************************************************************************/ -int rf_EO_DegradedWriteEFunc(RF_DagNode_t *node) +int +rf_EO_DegradedWriteEFunc(RF_DagNode_t * node) { - rf_DegrESubroutine(node, node->results[0]); - rf_GenericWakeupFunc(node, 0); + rf_DegrESubroutine(node, node->results[0]); + rf_GenericWakeupFunc(node, 0); #if 1 - return(0); /* XXX Yet another one!! GO */ + return (0); /* XXX Yet another one!! GO */ #endif } - + /************************************************************************************** * THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES **************************************************************************************/ -void rf_doubleEOdecode( - RF_Raid_t *raidPtr, - char **rrdbuf, - char **dest, - RF_RowCol_t *fcol, - char *pbuf, - char *ebuf) +void +rf_doubleEOdecode( + RF_Raid_t * raidPtr, + char **rrdbuf, + char **dest, + RF_RowCol_t * fcol, + char *pbuf, + char *ebuf) { - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout); - int i, j, k, f1, f2, row; - int rrdrow, erow, count = 0; - int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1 ); - int numRowInEncMatix = (RF_EO_MATRIX_DIM) -1; + RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); + int i, j, k, f1, f2, row; + int rrdrow, erow, count = 0; + int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); + int numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1; #if 0 - int pcol = (RF_EO_MATRIX_DIM) - 1; + int pcol = (RF_EO_MATRIX_DIM) - 1; #endif - int ecol = (RF_EO_MATRIX_DIM) - 2; - int bytesPerEU = bytesPerSector/numRowInEncMatix; - int numDataCol = layoutPtr->numDataCol; + int ecol = (RF_EO_MATRIX_DIM) - 2; + int bytesPerEU = bytesPerSector / numRowInEncMatix; + int numDataCol = layoutPtr->numDataCol; #if RF_EO_MATRIX_DIM > 17 - int shortsPerEU = bytesPerEU/sizeof(short); - short *rrdbuf_current, *pbuf_current, *ebuf_current; - short *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current; - register short *temp; - short *P; - - RF_ASSERT( bytesPerEU % sizeof(short) == 0); - RF_Malloc(P, bytesPerEU, (short *)); - RF_Malloc(temp, bytesPerEU, (short *)); + int shortsPerEU = bytesPerEU / sizeof(short); + short *rrdbuf_current, *pbuf_current, *ebuf_current; + short *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current; + register short *temp; + short *P; + + RF_ASSERT(bytesPerEU % sizeof(short) == 0); + RF_Malloc(P, bytesPerEU, (short *)); + RF_Malloc(temp, bytesPerEU, (short *)); #elif RF_EO_MATRIX_DIM == 17 - int longsPerEU = bytesPerEU/sizeof(long); - long *rrdbuf_current, *pbuf_current, *ebuf_current; - long *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current; - register long *temp; - long *P; - - RF_ASSERT( bytesPerEU % sizeof(long) == 0); - RF_Malloc(P, bytesPerEU, (long *)); - RF_Malloc(temp, bytesPerEU, (long *)); + int longsPerEU = bytesPerEU / sizeof(long); + long *rrdbuf_current, *pbuf_current, *ebuf_current; + long *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current; + register long *temp; + long *P; + + RF_ASSERT(bytesPerEU % sizeof(long) == 0); + RF_Malloc(P, bytesPerEU, (long *)); + RF_Malloc(temp, bytesPerEU, (long *)); #endif - RF_ASSERT( *((long *)dest[0]) == 0); - RF_ASSERT( *((long *)dest[1]) == 0); - bzero((char *)P, bytesPerEU); - bzero((char *)temp, bytesPerEU); - RF_ASSERT( *P == 0 ); - /* calculate the 'P' parameter, which, not parity, is the Xor of all elements in - the last two column, ie. 'E' and 'parity' colume, see the Ref. paper by Blaum, et al 1993 */ - for( i=0; i< numRowInEncMatix; i++) - for( k=0; k< longsPerEU; k++) { + RF_ASSERT(*((long *) dest[0]) == 0); + RF_ASSERT(*((long *) dest[1]) == 0); + bzero((char *) P, bytesPerEU); + bzero((char *) temp, bytesPerEU); + RF_ASSERT(*P == 0); + /* calculate the 'P' parameter, which, not parity, is the Xor of all + * elements in the last two column, ie. 'E' and 'parity' colume, see + * the Ref. paper by Blaum, et al 1993 */ + for (i = 0; i < numRowInEncMatix; i++) + for (k = 0; k < longsPerEU; k++) { #if RF_EO_MATRIX_DIM > 17 - ebuf_current = ((short *)ebuf) + i*shortsPerEU + k; - pbuf_current = ((short *)pbuf) + i*shortsPerEU + k; + ebuf_current = ((short *) ebuf) + i * shortsPerEU + k; + pbuf_current = ((short *) pbuf) + i * shortsPerEU + k; #elif RF_EO_MATRIX_DIM == 17 - ebuf_current = ((long *)ebuf) + i*longsPerEU + k; - pbuf_current = ((long *)pbuf) + i*longsPerEU + k; + ebuf_current = ((long *) ebuf) + i * longsPerEU + k; + pbuf_current = ((long *) pbuf) + i * longsPerEU + k; #endif - P[k] ^= *ebuf_current; - P[k] ^= *pbuf_current; - } - RF_ASSERT( fcol[0] != fcol[1] ); - if( fcol[0] < fcol[1] ) { + P[k] ^= *ebuf_current; + P[k] ^= *pbuf_current; + } + RF_ASSERT(fcol[0] != fcol[1]); + if (fcol[0] < fcol[1]) { #if RF_EO_MATRIX_DIM > 17 - dest_smaller = (short *)(dest[0]); - dest_larger = (short *)(dest[1]); + dest_smaller = (short *) (dest[0]); + dest_larger = (short *) (dest[1]); #elif RF_EO_MATRIX_DIM == 17 - dest_smaller = (long *)(dest[0]); - dest_larger = (long *)(dest[1]); + dest_smaller = (long *) (dest[0]); + dest_larger = (long *) (dest[1]); #endif - f1 = fcol[0]; - f2 = fcol[1]; - } - else { + f1 = fcol[0]; + f2 = fcol[1]; + } else { #if RF_EO_MATRIX_DIM > 17 - dest_smaller = (short *)(dest[1]); - dest_larger = (short *)(dest[0]); + dest_smaller = (short *) (dest[1]); + dest_larger = (short *) (dest[0]); #elif RF_EO_MATRIX_DIM == 17 - dest_smaller = (long *)(dest[1]); - dest_larger = (long *)(dest[0]); + dest_smaller = (long *) (dest[1]); + dest_larger = (long *) (dest[0]); #endif - f1 = fcol[1]; - f2 = fcol[0]; - } - row = (RF_EO_MATRIX_DIM) -1; - while( (row = rf_EO_Mod( (row+f1-f2), RF_EO_MATRIX_DIM )) != ( (RF_EO_MATRIX_DIM) -1) ) - { + f1 = fcol[1]; + f2 = fcol[0]; + } + row = (RF_EO_MATRIX_DIM) - 1; + while ((row = rf_EO_Mod((row + f1 - f2), RF_EO_MATRIX_DIM)) != ((RF_EO_MATRIX_DIM) - 1)) { #if RF_EO_MATRIX_DIM > 17 - dest_larger_current = dest_larger + row*shortsPerEU; - dest_smaller_current = dest_smaller + row*shortsPerEU; + dest_larger_current = dest_larger + row * shortsPerEU; + dest_smaller_current = dest_smaller + row * shortsPerEU; #elif RF_EO_MATRIX_DIM == 17 - dest_larger_current = dest_larger + row*longsPerEU; - dest_smaller_current = dest_smaller + row*longsPerEU; + dest_larger_current = dest_larger + row * longsPerEU; + dest_smaller_current = dest_smaller + row * longsPerEU; #endif - /** Do the diagonal recovery. Initially, temp[k] = (failed 1), - which is the failed data in the colume which has smaller col index. **/ - /* step 1: ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3)) */ - for( j=0; j< numDataCol; j++) - { - if( j == f1 || j == f2 ) continue; - rrdrow = rf_EO_Mod( (row+f2-j), RF_EO_MATRIX_DIM ); - if ( rrdrow != (RF_EO_MATRIX_DIM) -1 ) { + /** Do the diagonal recovery. Initially, temp[k] = (failed 1), + which is the failed data in the colume which has smaller col index. **/ + /* step 1: ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3)) */ + for (j = 0; j < numDataCol; j++) { + if (j == f1 || j == f2) + continue; + rrdrow = rf_EO_Mod((row + f2 - j), RF_EO_MATRIX_DIM); + if (rrdrow != (RF_EO_MATRIX_DIM) - 1) { #if RF_EO_MATRIX_DIM > 17 - rrdbuf_current = (short *)(rrdbuf[j]) + rrdrow * shortsPerEU; - for (k=0; k< shortsPerEU; k++) temp[k] ^= *(rrdbuf_current + k); + rrdbuf_current = (short *) (rrdbuf[j]) + rrdrow * shortsPerEU; + for (k = 0; k < shortsPerEU; k++) + temp[k] ^= *(rrdbuf_current + k); #elif RF_EO_MATRIX_DIM == 17 - rrdbuf_current = (long *)(rrdbuf[j]) + rrdrow * longsPerEU; - for (k=0; k< longsPerEU; k++) temp[k] ^= *(rrdbuf_current + k); + rrdbuf_current = (long *) (rrdbuf[j]) + rrdrow * longsPerEU; + for (k = 0; k < longsPerEU; k++) + temp[k] ^= *(rrdbuf_current + k); #endif - } - } - /* step 2: ^E(erow,m-2), If erow is at the buttom row, don't Xor into it - E(erow,m-2) = (principle diagonal) ^ (failed 1) ^ (failed 2) - ^ ( SUM of nonfailed in-diagonal A(rrdrow,0..m-3) ) - After this step, temp[k] = (principle diagonal) ^ (failed 2) */ - - erow = rf_EO_Mod( (row+f2-ecol), (RF_EO_MATRIX_DIM) ); - if ( erow != (RF_EO_MATRIX_DIM) -1) { + } + } + /* step 2: ^E(erow,m-2), If erow is at the buttom row, don't + * Xor into it E(erow,m-2) = (principle diagonal) ^ (failed + * 1) ^ (failed 2) ^ ( SUM of nonfailed in-diagonal + * A(rrdrow,0..m-3) ) After this step, temp[k] = (principle + * diagonal) ^ (failed 2) */ + + erow = rf_EO_Mod((row + f2 - ecol), (RF_EO_MATRIX_DIM)); + if (erow != (RF_EO_MATRIX_DIM) - 1) { #if RF_EO_MATRIX_DIM > 17 - ebuf_current = (short *)ebuf + shortsPerEU * erow; - for (k=0; k< shortsPerEU; k++) temp[k] ^= *(ebuf_current+k); + ebuf_current = (short *) ebuf + shortsPerEU * erow; + for (k = 0; k < shortsPerEU; k++) + temp[k] ^= *(ebuf_current + k); #elif RF_EO_MATRIX_DIM == 17 - ebuf_current = (long *)ebuf + longsPerEU * erow; - for (k=0; k< longsPerEU; k++) temp[k] ^= *(ebuf_current+k); + ebuf_current = (long *) ebuf + longsPerEU * erow; + for (k = 0; k < longsPerEU; k++) + temp[k] ^= *(ebuf_current + k); #endif - } - /* step 3: ^P to obtain the failed data (failed 2). - P can be proved to be actually (principle diagonal) - After this step, temp[k] = (failed 2), the failed data to be recovered */ + } + /* step 3: ^P to obtain the failed data (failed 2). P can be + * proved to be actually (principle diagonal) After this + * step, temp[k] = (failed 2), the failed data to be recovered */ #if RF_EO_MATRIX_DIM > 17 - for (k=0; k< shortsPerEU; k++) temp[k] ^= P[k]; - /* Put the data to the destination buffer */ - for (k=0; k< shortsPerEU; k++) dest_larger_current[k] = temp[k]; + for (k = 0; k < shortsPerEU; k++) + temp[k] ^= P[k]; + /* Put the data to the destination buffer */ + for (k = 0; k < shortsPerEU; k++) + dest_larger_current[k] = temp[k]; #elif RF_EO_MATRIX_DIM == 17 - for (k=0; k< longsPerEU; k++) temp[k] ^= P[k]; - /* Put the data to the destination buffer */ - for (k=0; k< longsPerEU; k++) dest_larger_current[k] = temp[k]; + for (k = 0; k < longsPerEU; k++) + temp[k] ^= P[k]; + /* Put the data to the destination buffer */ + for (k = 0; k < longsPerEU; k++) + dest_larger_current[k] = temp[k]; #endif - /** THE FOLLOWING DO THE HORIZONTAL XOR **/ - /* step 1: ^(SUM of A(row,0..m-3)), ie. all nonfailed data columes */ - for (j=0; j< numDataCol; j++) - { - if( j == f1 || j == f2 ) continue; + /** THE FOLLOWING DO THE HORIZONTAL XOR **/ + /* step 1: ^(SUM of A(row,0..m-3)), ie. all nonfailed data + * columes */ + for (j = 0; j < numDataCol; j++) { + if (j == f1 || j == f2) + continue; #if RF_EO_MATRIX_DIM > 17 - rrdbuf_current = (short *)(rrdbuf[j]) + row * shortsPerEU; - for (k=0; k< shortsPerEU; k++) temp[k] ^= *(rrdbuf_current+k); + rrdbuf_current = (short *) (rrdbuf[j]) + row * shortsPerEU; + for (k = 0; k < shortsPerEU; k++) + temp[k] ^= *(rrdbuf_current + k); #elif RF_EO_MATRIX_DIM == 17 - rrdbuf_current = (long *)(rrdbuf[j]) + row * longsPerEU; - for (k=0; k< longsPerEU; k++) temp[k] ^= *(rrdbuf_current+k); + rrdbuf_current = (long *) (rrdbuf[j]) + row * longsPerEU; + for (k = 0; k < longsPerEU; k++) + temp[k] ^= *(rrdbuf_current + k); #endif - } - /* step 2: ^A(row,m-1) */ - /* step 3: Put the data to the destination buffer */ + } + /* step 2: ^A(row,m-1) */ + /* step 3: Put the data to the destination buffer */ #if RF_EO_MATRIX_DIM > 17 - pbuf_current = (short *)pbuf + shortsPerEU * row; - for (k=0; k< shortsPerEU; k++) temp[k] ^= *(pbuf_current+k); - for (k=0; k< shortsPerEU; k++) dest_smaller_current[k] = temp[k]; + pbuf_current = (short *) pbuf + shortsPerEU * row; + for (k = 0; k < shortsPerEU; k++) + temp[k] ^= *(pbuf_current + k); + for (k = 0; k < shortsPerEU; k++) + dest_smaller_current[k] = temp[k]; #elif RF_EO_MATRIX_DIM == 17 - pbuf_current = (long *)pbuf + longsPerEU * row; - for (k=0; k< longsPerEU; k++) temp[k] ^= *(pbuf_current+k); - for (k=0; k< longsPerEU; k++) dest_smaller_current[k] = temp[k]; + pbuf_current = (long *) pbuf + longsPerEU * row; + for (k = 0; k < longsPerEU; k++) + temp[k] ^= *(pbuf_current + k); + for (k = 0; k < longsPerEU; k++) + dest_smaller_current[k] = temp[k]; #endif - count++; - } - /* Check if all Encoding Unit in the data buffer have been decoded, - according EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime number, - this algorithm will covered all buffer */ - RF_ASSERT( count == numRowInEncMatix ); - RF_Free((char *)P, bytesPerEU); - RF_Free((char *)temp, bytesPerEU); + count++; + } + /* Check if all Encoding Unit in the data buffer have been decoded, + * according EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime number, + * this algorithm will covered all buffer */ + RF_ASSERT(count == numRowInEncMatix); + RF_Free((char *) P, bytesPerEU); + RF_Free((char *) temp, bytesPerEU); } - + /*************************************************************************************** * This function is called by double degragded read -* EO_200_CreateReadDAG +* EO_200_CreateReadDAG * ***************************************************************************************/ -int rf_EvenOddDoubleRecoveryFunc(node) - RF_DagNode_t *node; +int +rf_EvenOddDoubleRecoveryFunc(node) + RF_DagNode_t *node; { - int ndataParam = 0; - int np = node->numParams; - RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np-1].p; - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-2].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout); - int i, prm, sector, nresults = node->numResults; - RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; - unsigned sosAddr; - int two = 0, mallc_one= 0, mallc_two = 0; /* flags to indicate if memory is allocated */ - int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1 ); - RF_PhysDiskAddr_t *ppda,*ppda2,*epda,*epda2,*pda, *pda0, *pda1, npda; - RF_RowCol_t fcol[2], fsuoff[2], fsuend[2], numDataCol = layoutPtr->numDataCol; - char **buf, *ebuf, *pbuf, *dest[2]; - long *suoff=NULL, *suend=NULL, *prmToCol=NULL, psuoff, esuoff; - RF_SectorNum_t startSector, endSector; - RF_Etimer_t timer; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - - RF_ETIMER_START(timer); - - /* Find out the number of parameters which are pdas for data information */ - for (i = 0; i<= np; i++) - if( ((RF_PhysDiskAddr_t *)node->params[i].p)->type != RF_PDA_TYPE_DATA) {ndataParam = i ; break; } - - RF_Malloc(buf, numDataCol*sizeof(char *), (char **)); - if (ndataParam != 0 ){ - RF_Malloc(suoff, ndataParam*sizeof(long), (long *) ); - RF_Malloc(suend, ndataParam*sizeof(long), (long *) ); - RF_Malloc(prmToCol, ndataParam*sizeof(long), (long *) ); - } - - if (asmap->failedPDAs[1] && - (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) { - RF_ASSERT(0); /* currently, no support for this situation */ - ppda = node->params[np-6].p; - ppda2 = node->params[np-5].p; - RF_ASSERT( ppda2->type == RF_PDA_TYPE_PARITY ); - epda = node->params[np-4].p; - epda2 = node->params[np-3].p; - RF_ASSERT( epda2->type == RF_PDA_TYPE_Q ); - two = 1; - } - else { - ppda = node->params[np-4].p; - epda = node->params[np-3].p; - psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector); - esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector); - RF_ASSERT( psuoff == esuoff ); - } - /* - the followings have three goals: - 1. determine the startSector to begin decoding and endSector to end decoding. - 2. determine the colume numbers of the two failed disks. - 3. determine the offset and end offset of the access within each failed stripe unit. - */ - if( nresults == 1 ) { - /* find the startSector to begin decoding */ - pda = node->results[0]; - bzero(pda->bufPtr, bytesPerSector*pda->numSector ); - fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector ); - fsuend[0] = fsuoff[0] + pda->numSector; - startSector = fsuoff[0]; - endSector = fsuend[0]; - - /* find out the the column of failed disk being accessed */ - fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress ); - - /* find out the other failed colume not accessed */ - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - for (i=0; i < numDataCol; i++) { - npda.raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector)(raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0); - /* skip over dead disks */ - if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status)) - if (i != fcol[0]) break; - } - RF_ASSERT (i < numDataCol); - fcol[1] = i; - } - else { - RF_ASSERT ( nresults == 2 ); - pda0 = node->results[0]; bzero(pda0->bufPtr, bytesPerSector*pda0->numSector ); - pda1 = node->results[1]; bzero(pda1->bufPtr, bytesPerSector*pda1->numSector ); - /* determine the failed colume numbers of the two failed disks. */ - fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress ); - fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress ); - /* determine the offset and end offset of the access within each failed stripe unit. */ - fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector ); - fsuend[0] = fsuoff[0] + pda0->numSector; - fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector ); - fsuend[1] = fsuoff[1] + pda1->numSector; - /* determine the startSector to begin decoding */ - startSector = RF_MIN( pda0->startSector, pda1->startSector ); - /* determine the endSector to end decoding */ - endSector = RF_MAX( fsuend[0], fsuend[1] ); - } - /* - assign the beginning sector and the end sector for each parameter - find out the corresponding colume # for each parameter - */ - for( prm=0; prm < ndataParam; prm++ ) { - pda = node->params[prm].p; - suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector); - suend[prm] = suoff[prm] + pda->numSector; - prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress ); - } - /* 'sector' is the sector for the current decoding algorithm. For each sector in the failed SU, - find out the corresponding parameters that cover the current sector and that are needed for - decoding of this sector in failed SU. 2. Find out if sector is in the shadow of any accessed - failed SU. If not, malloc a temporary space of a sector in size. - */ - for( sector = startSector; sector < endSector; sector++ ){ - if ( nresults == 2 ) - if( !(fsuoff[0]<=sector && sector<fsuend[0]) && !(fsuoff[1]<=sector && sector<fsuend[1]) )continue; - for( prm=0; prm < ndataParam; prm++ ) - if( suoff[prm] <= sector && sector < suend[prm] ) - buf[(prmToCol[prm])] = ((RF_PhysDiskAddr_t *)node->params[prm].p)->bufPtr + - rf_RaidAddressToByte(raidPtr, sector-suoff[prm]); - /* find out if sector is in the shadow of any accessed failed SU. If yes, assign dest[0], dest[1] to point - at suitable position of the buffer corresponding to failed SUs. if no, malloc a temporary space of - a sector in size for destination of decoding. - */ - RF_ASSERT( nresults == 1 || nresults == 2 ); - if ( nresults == 1) { - dest[0] = ((RF_PhysDiskAddr_t *)node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector-fsuoff[0]); - /* Always malloc temp buffer to dest[1] */ - RF_Malloc( dest[1], bytesPerSector, (char *) ); - bzero(dest[1],bytesPerSector); mallc_two = 1; } - else { - if( fsuoff[0] <= sector && sector < fsuend[0] ) - dest[0] = ((RF_PhysDiskAddr_t *)node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector-fsuoff[0]); - else { RF_Malloc( dest[0], bytesPerSector, (char *) ); - bzero(dest[0],bytesPerSector); mallc_one = 1; } - if( fsuoff[1] <= sector && sector < fsuend[1] ) - dest[1] = ((RF_PhysDiskAddr_t *)node->results[1])->bufPtr + rf_RaidAddressToByte(raidPtr, sector-fsuoff[1]); - else { RF_Malloc( dest[1], bytesPerSector, (char *) ); - bzero(dest[1],bytesPerSector); mallc_two = 1; } - RF_ASSERT( mallc_one == 0 || mallc_two == 0 ); - } - pbuf = ppda->bufPtr + rf_RaidAddressToByte(raidPtr, sector-psuoff ); - ebuf = epda->bufPtr + rf_RaidAddressToByte(raidPtr, sector-esuoff ); - /* - * After finish finding all needed sectors, call doubleEOdecode function for decoding - * one sector to destination. - */ - rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf ); - /* free all allocated memory, and mark flag to indicate no memory is being allocated */ - if( mallc_one == 1) RF_Free( dest[0], bytesPerSector ); - if( mallc_two == 1) RF_Free( dest[1], bytesPerSector ); - mallc_one = mallc_two = 0; - } - RF_Free(buf, numDataCol*sizeof(char *)); - if (ndataParam != 0){ - RF_Free(suoff, ndataParam*sizeof(long)); - RF_Free(suend, ndataParam*sizeof(long)); - RF_Free(prmToCol, ndataParam*sizeof(long)); - } - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - if (tracerec) { - tracerec->q_us += RF_ETIMER_VAL_US(timer); - } - rf_GenericWakeupFunc(node,0); + int ndataParam = 0; + int np = node->numParams; + RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p; + RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p; + RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); + int i, prm, sector, nresults = node->numResults; + RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; + unsigned sosAddr; + int two = 0, mallc_one = 0, mallc_two = 0; /* flags to indicate if + * memory is allocated */ + int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); + RF_PhysDiskAddr_t *ppda, *ppda2, *epda, *epda2, *pda, *pda0, *pda1, + npda; + RF_RowCol_t fcol[2], fsuoff[2], fsuend[2], numDataCol = layoutPtr->numDataCol; + char **buf, *ebuf, *pbuf, *dest[2]; + long *suoff = NULL, *suend = NULL, *prmToCol = NULL, psuoff, esuoff; + RF_SectorNum_t startSector, endSector; + RF_Etimer_t timer; + RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; + + RF_ETIMER_START(timer); + + /* Find out the number of parameters which are pdas for data + * information */ + for (i = 0; i <= np; i++) + if (((RF_PhysDiskAddr_t *) node->params[i].p)->type != RF_PDA_TYPE_DATA) { + ndataParam = i; + break; + } + RF_Malloc(buf, numDataCol * sizeof(char *), (char **)); + if (ndataParam != 0) { + RF_Malloc(suoff, ndataParam * sizeof(long), (long *)); + RF_Malloc(suend, ndataParam * sizeof(long), (long *)); + RF_Malloc(prmToCol, ndataParam * sizeof(long), (long *)); + } + if (asmap->failedPDAs[1] && + (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) { + RF_ASSERT(0); /* currently, no support for this situation */ + ppda = node->params[np - 6].p; + ppda2 = node->params[np - 5].p; + RF_ASSERT(ppda2->type == RF_PDA_TYPE_PARITY); + epda = node->params[np - 4].p; + epda2 = node->params[np - 3].p; + RF_ASSERT(epda2->type == RF_PDA_TYPE_Q); + two = 1; + } else { + ppda = node->params[np - 4].p; + epda = node->params[np - 3].p; + psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector); + esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector); + RF_ASSERT(psuoff == esuoff); + } + /* + the followings have three goals: + 1. determine the startSector to begin decoding and endSector to end decoding. + 2. determine the colume numbers of the two failed disks. + 3. determine the offset and end offset of the access within each failed stripe unit. + */ + if (nresults == 1) { + /* find the startSector to begin decoding */ + pda = node->results[0]; + bzero(pda->bufPtr, bytesPerSector * pda->numSector); + fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector); + fsuend[0] = fsuoff[0] + pda->numSector; + startSector = fsuoff[0]; + endSector = fsuend[0]; + + /* find out the the column of failed disk being accessed */ + fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress); + + /* find out the other failed colume not accessed */ + sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); + for (i = 0; i < numDataCol; i++) { + npda.raidAddress = sosAddr + (i * secPerSU); + (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0); + /* skip over dead disks */ + if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status)) + if (i != fcol[0]) + break; + } + RF_ASSERT(i < numDataCol); + fcol[1] = i; + } else { + RF_ASSERT(nresults == 2); + pda0 = node->results[0]; + bzero(pda0->bufPtr, bytesPerSector * pda0->numSector); + pda1 = node->results[1]; + bzero(pda1->bufPtr, bytesPerSector * pda1->numSector); + /* determine the failed colume numbers of the two failed + * disks. */ + fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress); + fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress); + /* determine the offset and end offset of the access within + * each failed stripe unit. */ + fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector); + fsuend[0] = fsuoff[0] + pda0->numSector; + fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector); + fsuend[1] = fsuoff[1] + pda1->numSector; + /* determine the startSector to begin decoding */ + startSector = RF_MIN(pda0->startSector, pda1->startSector); + /* determine the endSector to end decoding */ + endSector = RF_MAX(fsuend[0], fsuend[1]); + } + /* + assign the beginning sector and the end sector for each parameter + find out the corresponding colume # for each parameter + */ + for (prm = 0; prm < ndataParam; prm++) { + pda = node->params[prm].p; + suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector); + suend[prm] = suoff[prm] + pda->numSector; + prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress); + } + /* 'sector' is the sector for the current decoding algorithm. For each + * sector in the failed SU, find out the corresponding parameters that + * cover the current sector and that are needed for decoding of this + * sector in failed SU. 2. Find out if sector is in the shadow of any + * accessed failed SU. If not, malloc a temporary space of a sector in + * size. */ + for (sector = startSector; sector < endSector; sector++) { + if (nresults == 2) + if (!(fsuoff[0] <= sector && sector < fsuend[0]) && !(fsuoff[1] <= sector && sector < fsuend[1])) + continue; + for (prm = 0; prm < ndataParam; prm++) + if (suoff[prm] <= sector && sector < suend[prm]) + buf[(prmToCol[prm])] = ((RF_PhysDiskAddr_t *) node->params[prm].p)->bufPtr + + rf_RaidAddressToByte(raidPtr, sector - suoff[prm]); + /* find out if sector is in the shadow of any accessed failed + * SU. If yes, assign dest[0], dest[1] to point at suitable + * position of the buffer corresponding to failed SUs. if no, + * malloc a temporary space of a sector in size for + * destination of decoding. */ + RF_ASSERT(nresults == 1 || nresults == 2); + if (nresults == 1) { + dest[0] = ((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]); + /* Always malloc temp buffer to dest[1] */ + RF_Malloc(dest[1], bytesPerSector, (char *)); + bzero(dest[1], bytesPerSector); + mallc_two = 1; + } else { + if (fsuoff[0] <= sector && sector < fsuend[0]) + dest[0] = ((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]); + else { + RF_Malloc(dest[0], bytesPerSector, (char *)); + bzero(dest[0], bytesPerSector); + mallc_one = 1; + } + if (fsuoff[1] <= sector && sector < fsuend[1]) + dest[1] = ((RF_PhysDiskAddr_t *) node->results[1])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[1]); + else { + RF_Malloc(dest[1], bytesPerSector, (char *)); + bzero(dest[1], bytesPerSector); + mallc_two = 1; + } + RF_ASSERT(mallc_one == 0 || mallc_two == 0); + } + pbuf = ppda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - psuoff); + ebuf = epda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - esuoff); + /* + * After finish finding all needed sectors, call doubleEOdecode function for decoding + * one sector to destination. + */ + rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf); + /* free all allocated memory, and mark flag to indicate no + * memory is being allocated */ + if (mallc_one == 1) + RF_Free(dest[0], bytesPerSector); + if (mallc_two == 1) + RF_Free(dest[1], bytesPerSector); + mallc_one = mallc_two = 0; + } + RF_Free(buf, numDataCol * sizeof(char *)); + if (ndataParam != 0) { + RF_Free(suoff, ndataParam * sizeof(long)); + RF_Free(suend, ndataParam * sizeof(long)); + RF_Free(prmToCol, ndataParam * sizeof(long)); + } + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + if (tracerec) { + tracerec->q_us += RF_ETIMER_VAL_US(timer); + } + rf_GenericWakeupFunc(node, 0); #if 1 - return(0); /* XXX is this even close!!?!?!!? GO */ + return (0); /* XXX is this even close!!?!?!!? GO */ #endif } -/* currently, only access of one of the two failed SU is allowed in this function. - * also, asmap->numStripeUnitsAccessed is limited to be one, the RaidFrame will break large access into +/* currently, only access of one of the two failed SU is allowed in this function. + * also, asmap->numStripeUnitsAccessed is limited to be one, the RaidFrame will break large access into * many accesses of single stripe unit. */ -int rf_EOWriteDoubleRecoveryFunc(node) - RF_DagNode_t *node; +int +rf_EOWriteDoubleRecoveryFunc(node) + RF_DagNode_t *node; { - int np = node->numParams; - RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np-1].p; - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-2].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout); - RF_SectorNum_t sector; - RF_RowCol_t col, scol; - int prm, i, j; - RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; - unsigned sosAddr; - unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1 ); - RF_int64 numbytes; - RF_SectorNum_t startSector, endSector; - RF_PhysDiskAddr_t *ppda,*epda,*pda, *fpda, npda; - RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol; - char **buf; /* buf[0], buf[1], buf[2], ...etc. point to buffer storing data read from col0, col1, col2 */ - char *ebuf, *pbuf, *dest[2], *olddata[2]; - RF_Etimer_t timer; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - - RF_ASSERT( asmap->numDataFailed == 1 ); /* currently only support this case, the other failed SU is not being accessed */ - RF_ETIMER_START(timer); - RF_Malloc(buf, numDataCol*sizeof(char *), (char **)); - - ppda = node->results[0]; /* Instead of being buffers, node->results[0] and [1] are Ppda and Epda */ - epda = node->results[1]; - fpda = asmap->failedPDAs[0]; - - /* First, recovery the failed old SU using EvenOdd double decoding */ - /* determine the startSector and endSector for decoding */ - startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector ); - endSector = startSector + fpda->numSector; - /* Assign buf[col] pointers to point to each non-failed colume and initialize the pbuf - and ebuf to point at the beginning of each source buffers and destination buffers */ - for( prm=0; prm < numDataCol-2; prm++ ) { - pda = (RF_PhysDiskAddr_t *)node->params[prm].p; - col = rf_EUCol(layoutPtr, pda->raidAddress ); - buf[col] = pda->bufPtr; - } - /* pbuf and ebuf: they will change values as double recovery decoding goes on */ - pbuf = ppda->bufPtr; - ebuf = epda->bufPtr; - /* find out the logical colume numbers in the encoding matrix of the two failed columes */ - fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress ); - - /* find out the other failed colume not accessed this time */ - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - for (i=0; i < numDataCol; i++) { - npda.raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector)(raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0); - /* skip over dead disks */ - if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status)) - if (i != fcol[0]) break; - } - RF_ASSERT (i < numDataCol); - fcol[1] = i; - /* assign temporary space to put recovered failed SU */ - numbytes = fpda->numSector * bytesPerSector; - RF_Malloc(olddata[0], numbytes, (char *) ); - RF_Malloc(olddata[1], numbytes, (char *) ); - dest[0] = olddata[0]; - dest[1] = olddata[1]; - bzero(olddata[0], numbytes); - bzero(olddata[1], numbytes); - /* Begin the recovery decoding, initially buf[j], ebuf, pbuf, dest[j] have already - pointed at the beginning of each source buffers and destination buffers */ - for( sector = startSector, i=0; sector < endSector; sector++ , i++){ - rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf ); - for (j=0; j < numDataCol; j++) - if( ( j != fcol[0]) && ( j != fcol[1] ) ) buf[j] += bytesPerSector; - dest[0] += bytesPerSector; - dest[1] += bytesPerSector; - ebuf += bytesPerSector; - pbuf += bytesPerSector; - } - /* after recovery, the buffer pointed by olddata[0] is the old failed data. - With new writing data and this old data, use small write to calculate - the new redundant informations - */ - /* node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of Rrd; - params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ; - params[ PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1] - are Pdas of wudNodes; - For current implementation, we assume the simplest case: - asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1 ie. PDAPerDisk = 1 - then node->params[numDataCol] must be the new data to be writen to the failed disk. We first bxor the new data - into the old recovered data, then do the same things as small write. - */ - - rf_bxor( ((RF_PhysDiskAddr_t *)node->params[numDataCol].p)->bufPtr, olddata[0], numbytes, node->dagHdr->bp); - /* do new 'E' calculation */ - /* find out the corresponding colume in encoding matrix for write colume to be encoded into redundant disk 'E' */ - scol = rf_EUCol(layoutPtr, fpda->raidAddress ); - /* olddata[0] now is source buffer pointer; epda->bufPtr is the dest buffer pointer */ - rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2, epda->bufPtr, fpda->numSector); - - /* do new 'P' calculation */ - rf_bxor( olddata[0], ppda->bufPtr, numbytes, node->dagHdr->bp); - /* Free the allocated buffer */ - RF_Free( olddata[0], numbytes ); - RF_Free( olddata[1], numbytes ); - RF_Free( buf, numDataCol*sizeof(char *)); - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - if (tracerec) { - tracerec->q_us += RF_ETIMER_VAL_US(timer); - } - - rf_GenericWakeupFunc(node,0); - return(0); + int np = node->numParams; + RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p; + RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p; + RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); + RF_SectorNum_t sector; + RF_RowCol_t col, scol; + int prm, i, j; + RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; + unsigned sosAddr; + unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); + RF_int64 numbytes; + RF_SectorNum_t startSector, endSector; + RF_PhysDiskAddr_t *ppda, *epda, *pda, *fpda, npda; + RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol; + char **buf; /* buf[0], buf[1], buf[2], ...etc. point to + * buffer storing data read from col0, col1, + * col2 */ + char *ebuf, *pbuf, *dest[2], *olddata[2]; + RF_Etimer_t timer; + RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; + + RF_ASSERT(asmap->numDataFailed == 1); /* currently only support this + * case, the other failed SU + * is not being accessed */ + RF_ETIMER_START(timer); + RF_Malloc(buf, numDataCol * sizeof(char *), (char **)); + + ppda = node->results[0];/* Instead of being buffers, node->results[0] + * and [1] are Ppda and Epda */ + epda = node->results[1]; + fpda = asmap->failedPDAs[0]; + + /* First, recovery the failed old SU using EvenOdd double decoding */ + /* determine the startSector and endSector for decoding */ + startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector); + endSector = startSector + fpda->numSector; + /* Assign buf[col] pointers to point to each non-failed colume and + * initialize the pbuf and ebuf to point at the beginning of each + * source buffers and destination buffers */ + for (prm = 0; prm < numDataCol - 2; prm++) { + pda = (RF_PhysDiskAddr_t *) node->params[prm].p; + col = rf_EUCol(layoutPtr, pda->raidAddress); + buf[col] = pda->bufPtr; + } + /* pbuf and ebuf: they will change values as double recovery decoding + * goes on */ + pbuf = ppda->bufPtr; + ebuf = epda->bufPtr; + /* find out the logical colume numbers in the encoding matrix of the + * two failed columes */ + fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress); + + /* find out the other failed colume not accessed this time */ + sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); + for (i = 0; i < numDataCol; i++) { + npda.raidAddress = sosAddr + (i * secPerSU); + (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0); + /* skip over dead disks */ + if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status)) + if (i != fcol[0]) + break; + } + RF_ASSERT(i < numDataCol); + fcol[1] = i; + /* assign temporary space to put recovered failed SU */ + numbytes = fpda->numSector * bytesPerSector; + RF_Malloc(olddata[0], numbytes, (char *)); + RF_Malloc(olddata[1], numbytes, (char *)); + dest[0] = olddata[0]; + dest[1] = olddata[1]; + bzero(olddata[0], numbytes); + bzero(olddata[1], numbytes); + /* Begin the recovery decoding, initially buf[j], ebuf, pbuf, dest[j] + * have already pointed at the beginning of each source buffers and + * destination buffers */ + for (sector = startSector, i = 0; sector < endSector; sector++, i++) { + rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf); + for (j = 0; j < numDataCol; j++) + if ((j != fcol[0]) && (j != fcol[1])) + buf[j] += bytesPerSector; + dest[0] += bytesPerSector; + dest[1] += bytesPerSector; + ebuf += bytesPerSector; + pbuf += bytesPerSector; + } + /* after recovery, the buffer pointed by olddata[0] is the old failed + * data. With new writing data and this old data, use small write to + * calculate the new redundant informations */ + /* node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of + * Rrd; params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol + * -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ; params[ + * PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol + * +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1] are Pdas of + * wudNodes; For current implementation, we assume the simplest case: + * asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1 + * ie. PDAPerDisk = 1 then node->params[numDataCol] must be the new + * data to be writen to the failed disk. We first bxor the new data + * into the old recovered data, then do the same things as small + * write. */ + + rf_bxor(((RF_PhysDiskAddr_t *) node->params[numDataCol].p)->bufPtr, olddata[0], numbytes, node->dagHdr->bp); + /* do new 'E' calculation */ + /* find out the corresponding colume in encoding matrix for write + * colume to be encoded into redundant disk 'E' */ + scol = rf_EUCol(layoutPtr, fpda->raidAddress); + /* olddata[0] now is source buffer pointer; epda->bufPtr is the dest + * buffer pointer */ + rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2, epda->bufPtr, fpda->numSector); + + /* do new 'P' calculation */ + rf_bxor(olddata[0], ppda->bufPtr, numbytes, node->dagHdr->bp); + /* Free the allocated buffer */ + RF_Free(olddata[0], numbytes); + RF_Free(olddata[1], numbytes); + RF_Free(buf, numDataCol * sizeof(char *)); + + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + if (tracerec) { + tracerec->q_us += RF_ETIMER_VAL_US(timer); + } + rf_GenericWakeupFunc(node, 0); + return (0); } diff --git a/sys/dev/raidframe/rf_evenodd_dagfuncs.h b/sys/dev/raidframe/rf_evenodd_dagfuncs.h index 9773e57cedd..c00a1d82530 100644 --- a/sys/dev/raidframe/rf_evenodd_dagfuncs.h +++ b/sys/dev/raidframe/rf_evenodd_dagfuncs.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_evenodd_dagfuncs.h,v 1.1 1999/01/11 14:29:22 niklas Exp $ */ -/* $NetBSD: rf_evenodd_dagfuncs.h,v 1.1 1998/11/13 04:20:29 oster Exp $ */ +/* $OpenBSD: rf_evenodd_dagfuncs.h,v 1.2 1999/02/16 00:02:45 niklas Exp $ */ +/* $NetBSD: rf_evenodd_dagfuncs.h,v 1.2 1999/02/05 00:06:11 oster Exp $ */ /* * rf_evenodd_dagfuncs.h */ @@ -39,23 +39,26 @@ extern RF_RedFuncs_t rf_eoERecoveryFuncs; extern RF_RedFuncs_t rf_eoPRecoveryFuncs; extern RF_RedFuncs_t rf_eoERecoveryFuncs; -int rf_RegularPEFunc(RF_DagNode_t *node); -int rf_RegularONEFunc(RF_DagNode_t *node); -int rf_SimpleONEFunc(RF_DagNode_t *node); -void rf_RegularESubroutine(RF_DagNode_t *node, char *ebuf); -int rf_RegularEFunc(RF_DagNode_t *node); -void rf_DegrESubroutine(RF_DagNode_t *node, char *ebuf); -int rf_Degraded_100_EOFunc(RF_DagNode_t *node); -void rf_e_EncOneSect(RF_RowCol_t srcLogicCol, char *srcSecbuf, - RF_RowCol_t destLogicCol, char *destSecbuf, int bytesPerSector); -void rf_e_encToBuf(RF_Raid_t *raidPtr, RF_RowCol_t srcLogicCol, - char *srcbuf, RF_RowCol_t destLogicCol, char *destbuf, int numSector); -int rf_RecoveryEFunc(RF_DagNode_t *node); -int rf_EO_DegradedWriteEFunc(RF_DagNode_t *node); -void rf_doubleEOdecode(RF_Raid_t *raidPtr, char **rrdbuf, char **dest, - RF_RowCol_t *fcol, char *pbuf, char *ebuf); -int rf_EvenOddDoubleRecoveryFunc(RF_DagNode_t *node); -int rf_EOWriteDoubleRecoveryFunc(RF_DagNode_t *node); +int rf_RegularPEFunc(RF_DagNode_t * node); +int rf_RegularONEFunc(RF_DagNode_t * node); +int rf_SimpleONEFunc(RF_DagNode_t * node); +void rf_RegularESubroutine(RF_DagNode_t * node, char *ebuf); +int rf_RegularEFunc(RF_DagNode_t * node); +void rf_DegrESubroutine(RF_DagNode_t * node, char *ebuf); +int rf_Degraded_100_EOFunc(RF_DagNode_t * node); +void +rf_e_EncOneSect(RF_RowCol_t srcLogicCol, char *srcSecbuf, + RF_RowCol_t destLogicCol, char *destSecbuf, int bytesPerSector); +void +rf_e_encToBuf(RF_Raid_t * raidPtr, RF_RowCol_t srcLogicCol, + char *srcbuf, RF_RowCol_t destLogicCol, char *destbuf, int numSector); +int rf_RecoveryEFunc(RF_DagNode_t * node); +int rf_EO_DegradedWriteEFunc(RF_DagNode_t * node); +void +rf_doubleEOdecode(RF_Raid_t * raidPtr, char **rrdbuf, char **dest, + RF_RowCol_t * fcol, char *pbuf, char *ebuf); +int rf_EvenOddDoubleRecoveryFunc(RF_DagNode_t * node); +int rf_EOWriteDoubleRecoveryFunc(RF_DagNode_t * node); #define rf_EUCol(_layoutPtr_, _addr_ ) \ ( (_addr_)%( (_layoutPtr_)->dataSectorsPerStripe ) )/((_layoutPtr_)->sectorsPerStripeUnit) @@ -74,4 +77,4 @@ int rf_EOWriteDoubleRecoveryFunc(RF_DagNode_t *node); * for disk array larger than 17 columns totally. */ -#endif /* !_RF__RF_EVENODD_DAGFUNCS_H_ */ +#endif /* !_RF__RF_EVENODD_DAGFUNCS_H_ */ diff --git a/sys/dev/raidframe/rf_evenodd_dags.c b/sys/dev/raidframe/rf_evenodd_dags.c index 775fd5008f9..512b73b257c 100644 --- a/sys/dev/raidframe/rf_evenodd_dags.c +++ b/sys/dev/raidframe/rf_evenodd_dags.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_evenodd_dags.c,v 1.1 1999/01/11 14:29:22 niklas Exp $ */ -/* $NetBSD: rf_evenodd_dags.c,v 1.1 1998/11/13 04:20:29 oster Exp $ */ +/* $OpenBSD: rf_evenodd_dags.c,v 1.2 1999/02/16 00:02:45 niklas Exp $ */ +/* $NetBSD: rf_evenodd_dags.c,v 1.2 1999/02/05 00:06:11 oster Exp $ */ /* * rf_evenodd_dags.c */ @@ -57,18 +57,16 @@ */ RF_CREATE_DAG_FUNC_DECL(rf_EO_100_CreateReadDAG) { - rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_eoPRecoveryFuncs); + rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_eoPRecoveryFuncs); } - /* * Lost data + E. * Use P to reconstruct missing data. */ RF_CREATE_DAG_FUNC_DECL(rf_EO_101_CreateReadDAG) { - rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_eoPRecoveryFuncs); + rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_eoPRecoveryFuncs); } - /* * Lost data + P. * Make E look like P, and use Eor for Xor, and we can @@ -76,124 +74,116 @@ RF_CREATE_DAG_FUNC_DECL(rf_EO_101_CreateReadDAG) */ RF_CREATE_DAG_FUNC_DECL(rf_EO_110_CreateReadDAG) { - RF_PhysDiskAddr_t *temp; - /* swap P and E pointers to fake out the DegradedReadDAG code */ - temp = asmap->parityInfo; asmap->parityInfo = asmap->qInfo; asmap->qInfo = temp; - rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_eoERecoveryFuncs); + RF_PhysDiskAddr_t *temp; + /* swap P and E pointers to fake out the DegradedReadDAG code */ + temp = asmap->parityInfo; + asmap->parityInfo = asmap->qInfo; + asmap->qInfo = temp; + rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_eoERecoveryFuncs); } - /* * Lost two data. */ RF_CREATE_DAG_FUNC_DECL(rf_EOCreateDoubleDegradedReadDAG) { - rf_EO_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList); + rf_EO_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList); } - /* * Lost two data. */ RF_CREATE_DAG_FUNC_DECL(rf_EO_200_CreateReadDAG) { - rf_EOCreateDoubleDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList); + rf_EOCreateDoubleDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList); } - RF_CREATE_DAG_FUNC_DECL(rf_EO_100_CreateWriteDAG) { - if (asmap->numStripeUnitsAccessed != 1 && - asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) - RF_PANIC(); - rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2, (int (*)(RF_DagNode_t *))rf_Degraded_100_EOFunc, RF_TRUE); + if (asmap->numStripeUnitsAccessed != 1 && + asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) + RF_PANIC(); + rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2, (int (*) (RF_DagNode_t *)) rf_Degraded_100_EOFunc, RF_TRUE); } - /* * E is dead. Small write. */ RF_CREATE_DAG_FUNC_DECL(rf_EO_001_CreateSmallWriteDAG) { - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_EOSmallWritePFuncs, NULL); + rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_EOSmallWritePFuncs, NULL); } - /* * E is dead. Large write. */ RF_CREATE_DAG_FUNC_DECL(rf_EO_001_CreateLargeWriteDAG) { - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularPFunc, RF_TRUE); + rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularPFunc, RF_TRUE); } - /* * P is dead. Small write. * Swap E + P, use single-degraded stuff. */ RF_CREATE_DAG_FUNC_DECL(rf_EO_010_CreateSmallWriteDAG) { - RF_PhysDiskAddr_t *temp; - /* swap P and E pointers to fake out the DegradedReadDAG code */ - temp = asmap->parityInfo; asmap->parityInfo = asmap->qInfo; asmap->qInfo = temp; - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_EOSmallWriteEFuncs, NULL); + RF_PhysDiskAddr_t *temp; + /* swap P and E pointers to fake out the DegradedReadDAG code */ + temp = asmap->parityInfo; + asmap->parityInfo = asmap->qInfo; + asmap->qInfo = temp; + rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_EOSmallWriteEFuncs, NULL); } - /* * P is dead. Large write. * Swap E + P, use single-degraded stuff. */ RF_CREATE_DAG_FUNC_DECL(rf_EO_010_CreateLargeWriteDAG) { - RF_PhysDiskAddr_t *temp; - /* swap P and E pointers to fake out the code */ - temp = asmap->parityInfo; asmap->parityInfo = asmap->qInfo; asmap->qInfo = temp; - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularEFunc, RF_FALSE); + RF_PhysDiskAddr_t *temp; + /* swap P and E pointers to fake out the code */ + temp = asmap->parityInfo; + asmap->parityInfo = asmap->qInfo; + asmap->qInfo = temp; + rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularEFunc, RF_FALSE); } - RF_CREATE_DAG_FUNC_DECL(rf_EO_011_CreateWriteDAG) { - rf_CreateNonRedundantWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - RF_IO_TYPE_WRITE); + rf_CreateNonRedundantWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, + RF_IO_TYPE_WRITE); } - RF_CREATE_DAG_FUNC_DECL(rf_EO_110_CreateWriteDAG) { - RF_PhysDiskAddr_t *temp; + RF_PhysDiskAddr_t *temp; - if (asmap->numStripeUnitsAccessed != 1 && - asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) - { - RF_PANIC(); - } - /* swap P and E to fake out parity code */ - temp = asmap->parityInfo; asmap->parityInfo = asmap->qInfo; asmap->qInfo = temp; - rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList,1, (int (*)(RF_DagNode_t *))rf_EO_DegradedWriteEFunc, RF_FALSE); - /* is the regular E func the right one to call? */ + if (asmap->numStripeUnitsAccessed != 1 && + asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) { + RF_PANIC(); + } + /* swap P and E to fake out parity code */ + temp = asmap->parityInfo; + asmap->parityInfo = asmap->qInfo; + asmap->qInfo = temp; + rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, (int (*) (RF_DagNode_t *)) rf_EO_DegradedWriteEFunc, RF_FALSE); + /* is the regular E func the right one to call? */ } - RF_CREATE_DAG_FUNC_DECL(rf_EO_101_CreateWriteDAG) { - if (asmap->numStripeUnitsAccessed != 1 && - asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) - RF_PANIC(); - rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList,1, rf_RecoveryXorFunc, RF_TRUE); + if (asmap->numStripeUnitsAccessed != 1 && + asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) + RF_PANIC(); + rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RecoveryXorFunc, RF_TRUE); } - RF_CREATE_DAG_FUNC_DECL(rf_EO_DoubleDegRead) { - rf_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList, - "Re", "EvenOddRecovery", rf_EvenOddDoubleRecoveryFunc); + rf_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList, + "Re", "EvenOddRecovery", rf_EvenOddDoubleRecoveryFunc); } - RF_CREATE_DAG_FUNC_DECL(rf_EOCreateSmallWriteDAG) { - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_EOSmallWriteEFuncs); + rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_EOSmallWriteEFuncs); } - RF_CREATE_DAG_FUNC_DECL(rf_EOCreateLargeWriteDAG) { - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2, rf_RegularPEFunc, RF_FALSE); + rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2, rf_RegularPEFunc, RF_FALSE); } - RF_CREATE_DAG_FUNC_DECL(rf_EO_200_CreateWriteDAG) { - rf_DoubleDegSmallWrite(raidPtr, asmap, dag_h, bp, flags, allocList, "Re", "We", "EOWrDDRecovery", rf_EOWriteDoubleRecoveryFunc); + rf_DoubleDegSmallWrite(raidPtr, asmap, dag_h, bp, flags, allocList, "Re", "We", "EOWrDDRecovery", rf_EOWriteDoubleRecoveryFunc); } - -#endif /* RF_INCLUDE_EVENODD > 0 */ +#endif /* RF_INCLUDE_EVENODD > 0 */ diff --git a/sys/dev/raidframe/rf_evenodd_dags.h b/sys/dev/raidframe/rf_evenodd_dags.h index 3d125e8aa25..b2df9a3c7a4 100644 --- a/sys/dev/raidframe/rf_evenodd_dags.h +++ b/sys/dev/raidframe/rf_evenodd_dags.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_evenodd_dags.h,v 1.1 1999/01/11 14:29:22 niklas Exp $ */ -/* $NetBSD: rf_evenodd_dags.h,v 1.1 1998/11/13 04:20:29 oster Exp $ */ +/* $OpenBSD: rf_evenodd_dags.h,v 1.2 1999/02/16 00:02:45 niklas Exp $ */ +/* $NetBSD: rf_evenodd_dags.h,v 1.2 1999/02/05 00:06:11 oster Exp $ */ /* * rf_evenodd_dags.h */ @@ -59,6 +59,6 @@ RF_CREATE_DAG_FUNC_DECL(rf_EO_DoubleDegRead); RF_CREATE_DAG_FUNC_DECL(rf_EOCreateSmallWriteDAG); RF_CREATE_DAG_FUNC_DECL(rf_EOCreateLargeWriteDAG); RF_CREATE_DAG_FUNC_DECL(rf_EO_200_CreateWriteDAG); -#endif /* RF_UTILITY == 0 */ +#endif /* RF_UTILITY == 0 */ -#endif /* !_RF__RF_EVENODD_DAGS_H_ */ +#endif /* !_RF__RF_EVENODD_DAGS_H_ */ diff --git a/sys/dev/raidframe/rf_fifo.c b/sys/dev/raidframe/rf_fifo.c index 63367aeb4ab..5c5c35af41b 100644 --- a/sys/dev/raidframe/rf_fifo.c +++ b/sys/dev/raidframe/rf_fifo.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_fifo.c,v 1.1 1999/01/11 14:29:22 niklas Exp $ */ -/* $NetBSD: rf_fifo.c,v 1.1 1998/11/13 04:20:29 oster Exp $ */ +/* $OpenBSD: rf_fifo.c,v 1.2 1999/02/16 00:02:46 niklas Exp $ */ +/* $NetBSD: rf_fifo.c,v 1.3 1999/02/05 00:06:11 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,75 +29,13 @@ /*************************************************** * - * rf_fifo.c -- prioritized fifo queue code. + * rf_fifo.c -- prioritized fifo queue code. * There are only two priority levels: hi and lo. * * Aug 4, 1994, adapted from raidSim version (MCH) * ***************************************************/ -/* - * : - * Log: rf_fifo.c,v - * Revision 1.20 1996/06/18 20:53:11 jimz - * fix up disk queueing (remove configure routine, - * add shutdown list arg to create routines) - * - * Revision 1.19 1996/06/14 00:08:21 jimz - * make happier in all environments - * - * Revision 1.18 1996/06/13 20:41:24 jimz - * add random queueing - * - * Revision 1.17 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.16 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.15 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.14 1996/06/06 01:15:02 jimz - * added debugging - * - * Revision 1.13 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.12 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.11 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.10 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.9 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.8 1995/12/01 18:22:15 root - * added copyright info - * - * Revision 1.7 1995/11/07 15:32:16 wvcii - * added function FifoPeek() - * - */ - #include "rf_types.h" #include "rf_alloclist.h" #include "rf_stripelocks.h" @@ -109,215 +47,211 @@ #include "rf_threadid.h" #include "rf_options.h" -#if !defined(KERNEL) && RF_INCLUDE_QUEUE_RANDOM > 0 -#include "rf_randmacros.h" -RF_DECLARE_STATIC_RANDOM -#endif /* !KERNEL && RF_INCLUDE_QUEUE_RANDOM > 0 */ - /* just malloc a header, zero it (via calloc), and return it */ /*ARGSUSED*/ -void *rf_FifoCreate(sectPerDisk, clList, listp) - RF_SectorCount_t sectPerDisk; - RF_AllocListElem_t *clList; - RF_ShutdownList_t **listp; +void * +rf_FifoCreate(sectPerDisk, clList, listp) + RF_SectorCount_t sectPerDisk; + RF_AllocListElem_t *clList; + RF_ShutdownList_t **listp; { - RF_FifoHeader_t *q; + RF_FifoHeader_t *q; -#if !defined(KERNEL) && RF_INCLUDE_QUEUE_RANDOM > 0 - RF_INIT_STATIC_RANDOM(1); -#endif /* !KERNEL && RF_INCLUDE_QUEUE_RANDOM > 0 */ - RF_CallocAndAdd(q, 1, sizeof(RF_FifoHeader_t), (RF_FifoHeader_t *), clList); - q->hq_count = q->lq_count = 0; -#if !defined(KERNEL) && RF_INCLUDE_QUEUE_RANDOM > 0 - q->rval = (long)RF_STATIC_RANDOM(); -#endif /* !KERNEL && RF_INCLUDE_QUEUE_RANDOM > 0 */ - return((void *)q); + RF_CallocAndAdd(q, 1, sizeof(RF_FifoHeader_t), (RF_FifoHeader_t *), clList); + q->hq_count = q->lq_count = 0; + return ((void *) q); } -void rf_FifoEnqueue(q_in, elem, priority) - void *q_in; - RF_DiskQueueData_t *elem; - int priority; +void +rf_FifoEnqueue(q_in, elem, priority) + void *q_in; + RF_DiskQueueData_t *elem; + int priority; { - RF_FifoHeader_t *q = (RF_FifoHeader_t *)q_in; + RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in; - RF_ASSERT(priority == RF_IO_NORMAL_PRIORITY || priority == RF_IO_LOW_PRIORITY); + RF_ASSERT(priority == RF_IO_NORMAL_PRIORITY || priority == RF_IO_LOW_PRIORITY); - elem->next = NULL; - if (priority == RF_IO_NORMAL_PRIORITY) { - if (!q->hq_tail) { - RF_ASSERT(q->hq_count == 0 && q->hq_head == NULL); - q->hq_head = q->hq_tail = elem; - } else { - RF_ASSERT(q->hq_count != 0 && q->hq_head != NULL); - q->hq_tail->next = elem; - q->hq_tail = elem; - } - q->hq_count++; - } - else { - RF_ASSERT(elem->next == NULL); - if (rf_fifoDebug) { - int tid; - rf_get_threadid(tid); - printf("[%d] fifo: ENQ lopri\n", tid); - } - if (!q->lq_tail) { - RF_ASSERT(q->lq_count == 0 && q->lq_head == NULL); - q->lq_head = q->lq_tail = elem; - } else { - RF_ASSERT(q->lq_count != 0 && q->lq_head != NULL); - q->lq_tail->next = elem; - q->lq_tail = elem; - } - q->lq_count++; - } - if ((q->hq_count + q->lq_count)!= elem->queue->queueLength) { - printf("Queue lengths differ!: %d %d %d\n", - q->hq_count, q->lq_count, (int)elem->queue->queueLength); - printf("%d %d %d %d\n", - (int)elem->queue->numOutstanding, - (int)elem->queue->maxOutstanding, - (int)elem->queue->row, - (int)elem->queue->col); - } - RF_ASSERT((q->hq_count + q->lq_count) == elem->queue->queueLength); + elem->next = NULL; + if (priority == RF_IO_NORMAL_PRIORITY) { + if (!q->hq_tail) { + RF_ASSERT(q->hq_count == 0 && q->hq_head == NULL); + q->hq_head = q->hq_tail = elem; + } else { + RF_ASSERT(q->hq_count != 0 && q->hq_head != NULL); + q->hq_tail->next = elem; + q->hq_tail = elem; + } + q->hq_count++; + } else { + RF_ASSERT(elem->next == NULL); + if (rf_fifoDebug) { + int tid; + rf_get_threadid(tid); + printf("[%d] fifo: ENQ lopri\n", tid); + } + if (!q->lq_tail) { + RF_ASSERT(q->lq_count == 0 && q->lq_head == NULL); + q->lq_head = q->lq_tail = elem; + } else { + RF_ASSERT(q->lq_count != 0 && q->lq_head != NULL); + q->lq_tail->next = elem; + q->lq_tail = elem; + } + q->lq_count++; + } + if ((q->hq_count + q->lq_count) != elem->queue->queueLength) { + printf("Queue lengths differ!: %d %d %d\n", + q->hq_count, q->lq_count, (int) elem->queue->queueLength); + printf("%d %d %d %d\n", + (int) elem->queue->numOutstanding, + (int) elem->queue->maxOutstanding, + (int) elem->queue->row, + (int) elem->queue->col); + } + RF_ASSERT((q->hq_count + q->lq_count) == elem->queue->queueLength); } -RF_DiskQueueData_t *rf_FifoDequeue(q_in) - void *q_in; +RF_DiskQueueData_t * +rf_FifoDequeue(q_in) + void *q_in; { - RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in; - RF_DiskQueueData_t *nd; + RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in; + RF_DiskQueueData_t *nd; - RF_ASSERT(q); - if (q->hq_head) { - RF_ASSERT(q->hq_count != 0 && q->hq_tail != NULL); - nd = q->hq_head; q->hq_head = q->hq_head->next; - if (!q->hq_head) q->hq_tail = NULL; - nd->next = NULL; - q->hq_count--; - } else if (q->lq_head) { - RF_ASSERT(q->lq_count != 0 && q->lq_tail != NULL); - nd = q->lq_head; q->lq_head = q->lq_head->next; - if (!q->lq_head) q->lq_tail = NULL; - nd->next = NULL; - q->lq_count--; - if (rf_fifoDebug) { - int tid; - rf_get_threadid(tid); - printf("[%d] fifo: DEQ lopri %lx\n", tid, (long)nd); - } - } else { - RF_ASSERT(q->hq_count == 0 && q->lq_count == 0 && q->hq_tail == NULL && q->lq_tail == NULL); - nd = NULL; - } - return(nd); + RF_ASSERT(q); + if (q->hq_head) { + RF_ASSERT(q->hq_count != 0 && q->hq_tail != NULL); + nd = q->hq_head; + q->hq_head = q->hq_head->next; + if (!q->hq_head) + q->hq_tail = NULL; + nd->next = NULL; + q->hq_count--; + } else + if (q->lq_head) { + RF_ASSERT(q->lq_count != 0 && q->lq_tail != NULL); + nd = q->lq_head; + q->lq_head = q->lq_head->next; + if (!q->lq_head) + q->lq_tail = NULL; + nd->next = NULL; + q->lq_count--; + if (rf_fifoDebug) { + int tid; + rf_get_threadid(tid); + printf("[%d] fifo: DEQ lopri %lx\n", tid, (long) nd); + } + } else { + RF_ASSERT(q->hq_count == 0 && q->lq_count == 0 && q->hq_tail == NULL && q->lq_tail == NULL); + nd = NULL; + } + return (nd); } - /* This never gets used!! No loss (I hope) if we don't include it... GO */ #if !defined(__NetBSD__) && !defined(__OpenBSD__) && !defined(_KERNEL) -static RF_DiskQueueData_t *n_in_q(headp, tailp, countp, n, deq) - RF_DiskQueueData_t **headp; - RF_DiskQueueData_t **tailp; - int *countp; - int n; - int deq; +static RF_DiskQueueData_t * +n_in_q(headp, tailp, countp, n, deq) + RF_DiskQueueData_t **headp; + RF_DiskQueueData_t **tailp; + int *countp; + int n; + int deq; { - RF_DiskQueueData_t *r, *s; - int i; + RF_DiskQueueData_t *r, *s; + int i; - for(s=NULL,i=n,r=*headp;r;s=r,r=r->next) { - if (i == 0) - break; - i--; - } - RF_ASSERT(r != NULL); - if (deq == 0) - return(r); - if (s) { - s->next = r->next; - } - else { - *headp = r->next; - } - if (*tailp == r) - *tailp = s; - (*countp)--; - return(r); + for (s = NULL, i = n, r = *headp; r; s = r, r = r->next) { + if (i == 0) + break; + i--; + } + RF_ASSERT(r != NULL); + if (deq == 0) + return (r); + if (s) { + s->next = r->next; + } else { + *headp = r->next; + } + if (*tailp == r) + *tailp = s; + (*countp)--; + return (r); } #endif #if !defined(KERNEL) && RF_INCLUDE_QUEUE_RANDOM > 0 -RF_DiskQueueData_t *rf_RandomPeek(q_in) - void *q_in; +RF_DiskQueueData_t * +rf_RandomPeek(q_in) + void *q_in; { - RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in; - RF_DiskQueueData_t *req; - int n; + RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in; + RF_DiskQueueData_t *req; + int n; - if (q->hq_head) { - n = q->rval % q->hq_count; - req = n_in_q(&q->hq_head, &q->hq_tail, &q->hq_count, n, 0); - } - else { - RF_ASSERT(q->hq_count == 0); - if (q->lq_head == NULL) { - RF_ASSERT(q->lq_count == 0); - return(NULL); - } - n = q->rval % q->lq_count; - req = n_in_q(&q->lq_head, &q->lq_tail, &q->lq_count, n, 0); - } - RF_ASSERT((q->hq_count + q->lq_count) == req->queue->queueLength); - RF_ASSERT(req != NULL); - return(req); + if (q->hq_head) { + n = q->rval % q->hq_count; + req = n_in_q(&q->hq_head, &q->hq_tail, &q->hq_count, n, 0); + } else { + RF_ASSERT(q->hq_count == 0); + if (q->lq_head == NULL) { + RF_ASSERT(q->lq_count == 0); + return (NULL); + } + n = q->rval % q->lq_count; + req = n_in_q(&q->lq_head, &q->lq_tail, &q->lq_count, n, 0); + } + RF_ASSERT((q->hq_count + q->lq_count) == req->queue->queueLength); + RF_ASSERT(req != NULL); + return (req); } -RF_DiskQueueData_t *rf_RandomDequeue(q_in) - void *q_in; +RF_DiskQueueData_t * +rf_RandomDequeue(q_in) + void *q_in; { - RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in; - RF_DiskQueueData_t *req; - int n; + RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in; + RF_DiskQueueData_t *req; + int n; - if (q->hq_head) { - n = q->rval % q->hq_count; - q->rval = (long)RF_STATIC_RANDOM(); - req = n_in_q(&q->hq_head, &q->hq_tail, &q->hq_count, n, 1); - } - else { - RF_ASSERT(q->hq_count == 0); - if (q->lq_head == NULL) { - RF_ASSERT(q->lq_count == 0); - return(NULL); - } - n = q->rval % q->lq_count; - q->rval = (long)RF_STATIC_RANDOM(); - req = n_in_q(&q->lq_head, &q->lq_tail, &q->lq_count, n, 1); - } - RF_ASSERT((q->hq_count + q->lq_count) == (req->queue->queueLength-1)); - return(req); + if (q->hq_head) { + n = q->rval % q->hq_count; + q->rval = (long) RF_STATIC_RANDOM(); + req = n_in_q(&q->hq_head, &q->hq_tail, &q->hq_count, n, 1); + } else { + RF_ASSERT(q->hq_count == 0); + if (q->lq_head == NULL) { + RF_ASSERT(q->lq_count == 0); + return (NULL); + } + n = q->rval % q->lq_count; + q->rval = (long) RF_STATIC_RANDOM(); + req = n_in_q(&q->lq_head, &q->lq_tail, &q->lq_count, n, 1); + } + RF_ASSERT((q->hq_count + q->lq_count) == (req->queue->queueLength - 1)); + return (req); } -#endif /* !KERNEL && RF_INCLUDE_QUEUE_RANDOM > 0 */ +#endif /* !KERNEL && RF_INCLUDE_QUEUE_RANDOM > 0 */ /* Return ptr to item at head of queue. Used to examine request * info without actually dequeueing the request. */ -RF_DiskQueueData_t *rf_FifoPeek(void *q_in) +RF_DiskQueueData_t * +rf_FifoPeek(void *q_in) { - RF_DiskQueueData_t *headElement = NULL; - RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in; + RF_DiskQueueData_t *headElement = NULL; + RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in; - RF_ASSERT(q); - if (q->hq_head) - headElement = q->hq_head; - else if (q->lq_head) - headElement = q->lq_head; - return(headElement); + RF_ASSERT(q); + if (q->hq_head) + headElement = q->hq_head; + else + if (q->lq_head) + headElement = q->lq_head; + return (headElement); } - /* We sometimes need to promote a low priority access to a regular priority access. * Currently, this is only used when the user wants to write a stripe which is currently * under reconstruction. @@ -325,47 +259,70 @@ RF_DiskQueueData_t *rf_FifoPeek(void *q_in) * the low priority queue to the end of the normal priority queue. * We assume the queue is locked upon entry. */ -int rf_FifoPromote(q_in, parityStripeID, which_ru) - void *q_in; - RF_StripeNum_t parityStripeID; - RF_ReconUnitNum_t which_ru; +int +rf_FifoPromote(q_in, parityStripeID, which_ru) + void *q_in; + RF_StripeNum_t parityStripeID; + RF_ReconUnitNum_t which_ru; { - RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in; - RF_DiskQueueData_t *lp = q->lq_head, *pt = NULL; /* lp = lo-pri queue pointer, pt = trailer */ - int retval = 0; + RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in; + RF_DiskQueueData_t *lp = q->lq_head, *pt = NULL; /* lp = lo-pri queue + * pointer, pt = trailer */ + int retval = 0; + + while (lp) { + + /* search for the indicated parity stripe in the low-pri queue */ + if (lp->parityStripeID == parityStripeID && lp->which_ru == which_ru) { + /* printf("FifoPromote: promoting access for psid + * %ld\n",parityStripeID); */ + if (pt) + pt->next = lp->next; /* delete an entry other + * than the first */ + else + q->lq_head = lp->next; /* delete the head entry */ + + if (!q->lq_head) + q->lq_tail = NULL; /* we deleted the only + * entry */ + else + if (lp == q->lq_tail) + q->lq_tail = pt; /* we deleted the tail + * entry */ + + lp->next = NULL; + q->lq_count--; + + if (q->hq_tail) { + q->hq_tail->next = lp; + q->hq_tail = lp; + } + /* append to hi-priority queue */ + else { + q->hq_head = q->hq_tail = lp; + } + q->hq_count++; - while (lp) { + /* UpdateShortestSeekFinishTimeForced(lp->requestPtr, + * lp->diskState); *//* deal with this later, if ever */ - /* search for the indicated parity stripe in the low-pri queue */ - if (lp->parityStripeID == parityStripeID && lp->which_ru == which_ru) { - /*printf("FifoPromote: promoting access for psid %ld\n",parityStripeID);*/ - if (pt) pt->next = lp->next; /* delete an entry other than the first */ - else q->lq_head = lp->next; /* delete the head entry */ - - if (!q->lq_head) q->lq_tail = NULL; /* we deleted the only entry */ - else if (lp == q->lq_tail) q->lq_tail = pt; /* we deleted the tail entry */ - - lp->next = NULL; - q->lq_count--; - - if (q->hq_tail) {q->hq_tail->next = lp; q->hq_tail = lp;} /* append to hi-priority queue */ - else {q->hq_head = q->hq_tail = lp;} - q->hq_count++; + lp = (pt) ? pt->next : q->lq_head; /* reset low-pri pointer + * and continue */ + retval++; - /*UpdateShortestSeekFinishTimeForced(lp->requestPtr, lp->diskState);*/ /* deal with this later, if ever */ + } else { + pt = lp; + lp = lp->next; + } + } - lp = (pt) ? pt->next : q->lq_head; /* reset low-pri pointer and continue */ - retval++; - - } else {pt = lp; lp = lp->next;} - } - - /* sanity check. delete this if you ever put more than one entry in the low-pri queue */ - RF_ASSERT(retval == 0 || retval == 1); - if (rf_fifoDebug) { - int tid; - rf_get_threadid(tid); - printf("[%d] fifo: promote %d\n", tid, retval); - } - return(retval); + /* sanity check. delete this if you ever put more than one entry in + * the low-pri queue */ + RF_ASSERT(retval == 0 || retval == 1); + if (rf_fifoDebug) { + int tid; + rf_get_threadid(tid); + printf("[%d] fifo: promote %d\n", tid, retval); + } + return (retval); } diff --git a/sys/dev/raidframe/rf_fifo.h b/sys/dev/raidframe/rf_fifo.h index 44d2cc577f4..f23d7dcb573 100644 --- a/sys/dev/raidframe/rf_fifo.h +++ b/sys/dev/raidframe/rf_fifo.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_fifo.h,v 1.1 1999/01/11 14:29:23 niklas Exp $ */ -/* $NetBSD: rf_fifo.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */ +/* $OpenBSD: rf_fifo.h,v 1.2 1999/02/16 00:02:46 niklas Exp $ */ +/* $NetBSD: rf_fifo.h,v 1.3 1999/02/05 00:06:11 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -33,55 +33,6 @@ * 4-9-93 Created (MCH) */ -/* - * : - * Log: rf_fifo.h,v - * Revision 1.12 1996/06/18 20:53:11 jimz - * fix up disk queueing (remove configure routine, - * add shutdown list arg to create routines) - * - * Revision 1.11 1996/06/13 20:41:28 jimz - * add random queueing - * - * Revision 1.10 1996/06/13 20:38:28 jimz - * add random dequeue, peek - * - * Revision 1.9 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.8 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.7 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.6 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.5 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.4 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.3 1995/12/01 18:22:26 root - * added copyright info - * - * Revision 1.2 1995/11/07 15:31:57 wvcii - * added Peek() function - * - */ #ifndef _RF__RF_FIFO_H_ #define _RF__RF_FIFO_H_ @@ -91,25 +42,21 @@ #include "rf_diskqueue.h" typedef struct RF_FifoHeader_s { - RF_DiskQueueData_t *hq_head, *hq_tail; /* high priority requests */ - RF_DiskQueueData_t *lq_head, *lq_tail; /* low priority requests */ - int hq_count, lq_count; /* debug only */ -#if !defined(KERNEL) && RF_INCLUDE_QUEUE_RANDOM > 0 - long rval; /* next random number (random qpolicy) */ -#endif /* !KERNEL && RF_INCLUDE_QUEUE_RANDOM > 0 */ -} RF_FifoHeader_t; + RF_DiskQueueData_t *hq_head, *hq_tail; /* high priority requests */ + RF_DiskQueueData_t *lq_head, *lq_tail; /* low priority requests */ + int hq_count, lq_count; /* debug only */ +} RF_FifoHeader_t; -extern void *rf_FifoCreate(RF_SectorCount_t sectPerDisk, - RF_AllocListElem_t *clList, RF_ShutdownList_t **listp); -extern void rf_FifoEnqueue(void *q_in, RF_DiskQueueData_t *elem, - int priority); +extern void * +rf_FifoCreate(RF_SectorCount_t sectPerDisk, + RF_AllocListElem_t * clList, RF_ShutdownList_t ** listp); +extern void +rf_FifoEnqueue(void *q_in, RF_DiskQueueData_t * elem, + int priority); extern RF_DiskQueueData_t *rf_FifoDequeue(void *q_in); extern RF_DiskQueueData_t *rf_FifoPeek(void *q_in); -extern int rf_FifoPromote(void *q_in, RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru); -#if !defined(KERNEL) && RF_INCLUDE_QUEUE_RANDOM > 0 -extern RF_DiskQueueData_t *rf_RandomDequeue(void *q_in); -extern RF_DiskQueueData_t *rf_RandomPeek(void *q_in); -#endif /* !KERNEL && RF_INCLUDE_QUEUE_RANDOM > 0 */ +extern int +rf_FifoPromote(void *q_in, RF_StripeNum_t parityStripeID, + RF_ReconUnitNum_t which_ru); -#endif /* !_RF__RF_FIFO_H_ */ +#endif /* !_RF__RF_FIFO_H_ */ diff --git a/sys/dev/raidframe/rf_freelist.h b/sys/dev/raidframe/rf_freelist.h index 8f8e4f5120d..c2535109ec3 100644 --- a/sys/dev/raidframe/rf_freelist.h +++ b/sys/dev/raidframe/rf_freelist.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_freelist.h,v 1.1 1999/01/11 14:29:23 niklas Exp $ */ -/* $NetBSD: rf_freelist.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */ +/* $OpenBSD: rf_freelist.h,v 1.2 1999/02/16 00:02:47 niklas Exp $ */ +/* $NetBSD: rf_freelist.h,v 1.3 1999/02/05 00:06:11 oster Exp $ */ /* * rf_freelist.h */ @@ -29,71 +29,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ -/* - * : - * Log: rf_freelist.h,v - * Revision 1.13 1996/06/10 12:50:57 jimz - * Add counters to freelists to track number of allocations, frees, - * grows, max size, etc. Adjust a couple sets of PRIME params based - * on the results. - * - * Revision 1.12 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.11 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.10 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.9 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.8 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.7 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.6 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.5 1996/05/20 16:16:12 jimz - * switch to rf_{mutex,cond}_{init,destroy} - * - * Revision 1.4 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.3 1996/05/16 16:04:52 jimz - * allow init func to fail for FREELIST ops - * - * Revision 1.2 1996/05/16 14:54:08 jimz - * added _INIT and _CLEAN versions of ops for objects with - * internal allocations - * - * Revision 1.1 1996/05/15 23:37:53 jimz - * Initial revision - * - */ + /* * rf_freelist.h -- code to manage counted freelists * @@ -115,16 +51,15 @@ #if RF_FREELIST_STATS > 0 typedef struct RF_FreeListStats_s { - char *file; - int line; - int allocations; - int frees; - int max_free; - int grows; - int outstanding; - int max_outstanding; -} RF_FreeListStats_t; - + char *file; + int line; + int allocations; + int frees; + int max_free; + int grows; + int outstanding; + int max_outstanding; +} RF_FreeListStats_t; #define RF_FREELIST_STAT_INIT(_fl_) { \ bzero((char *)&((_fl_)->stats), sizeof(RF_FreeListStats_t)); \ (_fl_)->stats.file = __FILE__; \ @@ -163,7 +98,7 @@ typedef struct RF_FreeListStats_s { printf(" %d outstanding (max)\n", (_fl_)->stats.max_outstanding); \ } -#else /* RF_FREELIST_STATS > 0 */ +#else /* RF_FREELIST_STATS > 0 */ #define RF_FREELIST_STAT_INIT(_fl_) #define RF_FREELIST_STAT_ALLOC(_fl_) @@ -172,20 +107,19 @@ typedef struct RF_FreeListStats_s { #define RF_FREELIST_STAT_GROW(_fl_) #define RF_FREELIST_STAT_REPORT(_fl_) -#endif /* RF_FREELIST_STATS > 0 */ +#endif /* RF_FREELIST_STATS > 0 */ struct RF_FreeList_s { - void *objlist; /* list of free obj */ - int free_cnt; /* how many free obj */ - int max_free_cnt; /* max free arena size */ - int obj_inc; /* how many to allocate at a time */ - int obj_size; /* size of objects */ - RF_DECLARE_MUTEX(lock) + void *objlist; /* list of free obj */ + int free_cnt; /* how many free obj */ + int max_free_cnt; /* max free arena size */ + int obj_inc; /* how many to allocate at a time */ + int obj_size; /* size of objects */ + RF_DECLARE_MUTEX(lock) #if RF_FREELIST_STATS > 0 - RF_FreeListStats_t stats; /* statistics */ -#endif /* RF_FREELIST_STATS > 0 */ + RF_FreeListStats_t stats; /* statistics */ +#endif /* RF_FREELIST_STATS > 0 */ }; - /* * fl = freelist * maxcnt = max number of items in arena @@ -731,4 +665,4 @@ struct RF_FreeList_s { RF_Free(_fl_,sizeof(RF_FreeList_t)); \ } -#endif /* !_RF__RF_FREELIST_H_ */ +#endif /* !_RF__RF_FREELIST_H_ */ diff --git a/sys/dev/raidframe/rf_general.h b/sys/dev/raidframe/rf_general.h index 3879520133f..60ed42f550b 100644 --- a/sys/dev/raidframe/rf_general.h +++ b/sys/dev/raidframe/rf_general.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_general.h,v 1.1 1999/01/11 14:29:23 niklas Exp $ */ -/* $NetBSD: rf_general.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */ +/* $OpenBSD: rf_general.h,v 1.2 1999/02/16 00:02:47 niklas Exp $ */ +/* $NetBSD: rf_general.h,v 1.3 1999/02/05 00:06:12 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -31,136 +31,17 @@ * rf_general.h -- some general-use definitions */ -/* - * : - * Log: rf_general.h,v - * Revision 1.26 1996/08/09 16:44:57 jimz - * sunos port - * - * Revision 1.25 1996/08/07 21:08:57 jimz - * get NBPG defined for IRIX - * - * Revision 1.24 1996/08/06 22:02:06 jimz - * include linux/user.h for linux to get NBPG - * - * Revision 1.23 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.22 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.21 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.20 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.19 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.18 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.17 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.16 1996/05/21 18:53:13 jimz - * be sure that noop macros don't confuse conditionals and loops - * - * Revision 1.15 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.14 1996/05/08 21:01:24 jimz - * fixed up enum type names that were conflicting with other - * enums and function names (ie, "panic") - * future naming trends will be towards RF_ and rf_ for - * everything raidframe-related - * - * Revision 1.13 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.12 1995/12/01 18:29:08 root - * added copyright info - * - * Revision 1.11 1995/09/19 22:59:52 jimz - * Add kernel macro RF_DKU_END_IO(). When DKUSAGE is not defined, - * this is a no-op. When it is defined, it calls dku_end_io() - * correctly given a raidframe unit number and a buf pointer. - * - * Revision 1.10 1995/07/03 18:13:56 holland - * changed kernel defn of GETTIME - * - * Revision 1.9 1995/07/02 15:07:42 holland - * bug fixes related to getting distributed sparing numbers - * - * Revision 1.8 1995/06/12 15:54:40 rachad - * Added garbege collection for log structured storage - * - * Revision 1.7 1995/06/03 19:18:16 holland - * changes related to kernelization: access traces - * changes related to distributed sparing: some bug fixes - * - * Revision 1.6 1995/05/01 13:28:00 holland - * parity range locks, locking disk requests, recon+parityscan in kernel, etc. - * - * Revision 1.5 1995/04/06 14:47:56 rachad - * merge completed - * - * Revision 1.4 1995/03/15 20:45:23 holland - * distr sparing changes. - * - * Revision 1.3 1995/02/03 22:31:36 holland - * many changes related to kernelization - * - * Revision 1.2 1994/11/29 21:37:10 danner - * Added divide by zero check. - * - */ - /*#define NOASSERT*/ #ifndef _RF__RF_GENERAL_H_ #define _RF__RF_GENERAL_H_ -#ifdef _KERNEL -#define KERNEL -#endif - -#if !defined(KERNEL) && !defined(NOASSERT) -#include <assert.h> -#endif /* !KERNEL && !NOASSERT */ - /* error reporting and handling */ -#ifndef KERNEL - -#define RF_ERRORMSG(s) fprintf(stderr,(s)) -#define RF_ERRORMSG1(s,a) fprintf(stderr,(s),(a)) -#define RF_ERRORMSG2(s,a,b) fprintf(stderr,(s),(a),(b)) -#define RF_ERRORMSG3(s,a,b,c) fprintf(stderr,(s),(a),(b),(c)) -#define RF_ERRORMSG4(s,a,b,c,d) fprintf(stderr,(s),(a),(b),(c),(d)) -#define RF_ERRORMSG5(s,a,b,c,d,e) fprintf(stderr,(s),(a),(b),(c),(d),(e)) -#ifndef NOASSERT -#define RF_ASSERT(x) {assert(x);} -#else /* !NOASSERT */ -#define RF_ASSERT(x) {/*noop*/} -#endif /* !NOASSERT */ -#define RF_PANIC() {printf("YIKES! Something terrible happened at line %d of file %s. Use a debugger.\n",__LINE__,__FILE__); abort();} - -#else /* !KERNEL */ -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) -#include<sys/systm.h> /* printf, sprintf, and friends */ +#ifdef _KERNEL +#include<sys/systm.h> /* printf, sprintf, and friends */ #endif + #define RF_ERRORMSG(s) printf((s)) #define RF_ERRORMSG1(s,a) printf((s),(a)) #define RF_ERRORMSG2(s,a,b) printf((s),(a),(b)) @@ -171,9 +52,10 @@ extern char rf_panicbuf[]; #define RF_PANIC() {sprintf(rf_panicbuf,"raidframe error at line %d file %s",__LINE__,__FILE__); panic(rf_panicbuf);} +#ifdef _KERNEL #ifdef RF_ASSERT #undef RF_ASSERT -#endif /* RF_ASSERT */ +#endif /* RF_ASSERT */ #ifndef NOASSERT #define RF_ASSERT(_x_) { \ if (!(_x_)) { \ @@ -183,11 +65,12 @@ extern char rf_panicbuf[]; panic(rf_panicbuf); \ } \ } -#else /* !NOASSERT */ +#else /* !NOASSERT */ #define RF_ASSERT(x) {/*noop*/} -#endif /* !NOASSERT */ - -#endif /* !KERNEL */ +#endif /* !NOASSERT */ +#else /* _KERNEL */ +#define RF_ASSERT(x) {/*noop*/} +#endif /* _KERNEL */ /* random stuff */ #define RF_MAX(a,b) (((a) > (b)) ? (a) : (b)) @@ -197,55 +80,22 @@ extern char rf_panicbuf[]; #define RF_DB0_CHECK(a,b) ( ((b)==0) ? 0 : (a)/(b) ) /* get time of day */ -#ifdef KERNEL -#if !defined(__NetBSD__) && !defined(__OpenBSD__) -extern struct timeval time; -#endif /* !__NetBSD__ && !__OpenBSD__ */ #define RF_GETTIME(_t) microtime(&(_t)) -#else /* KERNEL */ -#define RF_GETTIME(_t) gettimeofday(&(_t), NULL); -#endif /* KERNEL */ /* * zero memory- not all bzero calls go through here, only * those which in the kernel may have a user address */ -#ifdef KERNEL -#if !defined(__NetBSD__) && !defined(__OpenBSD__) -#define RF_BZERO(_bp,_b,_l) if (IS_SYS_VA(_b)) bzero(_b,_l); else rf_BzeroWithRemap(_bp,_b,_l) -#else - -#define RF_BZERO(_bp,_b,_l) bzero(_b,_l) /* XXX This is likely incorrect. GO*/ -#endif /* __NetBSD__ || __OpenBSD__ */ -#else /* KERNEL */ -#define RF_BZERO(_bp,_b,_l) bzero(_b,_l) -#endif /* KERNEL */ -#ifdef sun -#include <sys/param.h> -#ifndef NBPG -#define NBPG PAGESIZE -#endif /* !NBPG */ -#endif /* sun */ +#define RF_BZERO(_bp,_b,_l) bzero(_b,_l) /* XXX This is likely + * incorrect. GO */ -#ifdef IRIX -#include <sys/tfp.h> -#define NBPG _PAGESZ -#endif /* IRIX */ - -#ifdef LINUX -#include <linux/user.h> -#endif /* LINUX */ #define RF_UL(x) ((unsigned long) (x)) #define RF_PGMASK RF_UL(NBPG-1) -#define RF_BLIP(x) (NBPG - (RF_UL(x) & RF_PGMASK)) /* bytes left in page */ +#define RF_BLIP(x) (NBPG - (RF_UL(x) & RF_PGMASK)) /* bytes left in page */ #define RF_PAGE_ALIGNED(x) ((RF_UL(x) & RF_PGMASK) == 0) -#ifdef KERNEL -#if !defined(__NetBSD__) && !defined(__OpenBSD__) -#include <dkusage.h> -#endif #if DKUSAGE > 0 #define RF_DKU_END_IO(_unit_,_bp_) { \ int s = splbio(); \ @@ -255,15 +105,14 @@ extern struct timeval time; (_bp_)->b_bcount); \ splx(s); \ } -#else /* DKUSAGE > 0 */ +#else /* DKUSAGE > 0 */ #define RF_DKU_END_IO(unit) { /* noop */ } -#endif /* DKUSAGE > 0 */ -#endif /* KERNEL */ +#endif /* DKUSAGE > 0 */ #ifdef __STDC__ #define RF_STRING(_str_) #_str_ -#else /* __STDC__ */ +#else /* __STDC__ */ #define RF_STRING(_str_) "_str_" -#endif /* __STDC__ */ +#endif /* __STDC__ */ -#endif /* !_RF__RF_GENERAL_H_ */ +#endif /* !_RF__RF_GENERAL_H_ */ diff --git a/sys/dev/raidframe/rf_geniq.c b/sys/dev/raidframe/rf_geniq.c index bfe55cb87d2..41c190028ef 100644 --- a/sys/dev/raidframe/rf_geniq.c +++ b/sys/dev/raidframe/rf_geniq.c @@ -1,4 +1,4 @@ -/* $NetBSD: rf_geniq.c,v 1.1 1998/11/13 04:20:30 oster Exp $ */ +/* $NetBSD: rf_geniq.c,v 1.3 1999/02/05 00:06:12 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -30,38 +30,6 @@ * code which implements Reed-Solomon encoding for RAID level 6 */ -/* : - * Log: rf_geniq.c,v - * Revision 1.12 1996/07/29 16:37:00 jimz - * remove archs.h include to avoid VPATH problems in kernel - * rf_invertq.c now must include archs.h before invertq.h - * - * Revision 1.11 1996/07/29 15:04:16 jimz - * correct rf_archs.h path for kernel - * - * Revision 1.10 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.9 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.8 1996/07/15 17:22:18 jimz - * nit-pick code cleanup - * resolve stdlib problems on DEC OSF - * - * Revision 1.7 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.6 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.5 1995/12/01 18:29:18 root - * added copyright info - * - */ #define RF_UTILITY 1 #include "rf_pqdeg.h" @@ -72,128 +40,123 @@ val = value; */ -int lsfr_shift(val,poly) -unsigned val, poly; +int +lsfr_shift(val, poly) + unsigned val, poly; { - unsigned new; - unsigned int i; - unsigned high = (val >> 4) & 1; - unsigned bit; + unsigned new; + unsigned int i; + unsigned high = (val >> 4) & 1; + unsigned bit; - new = (poly & 1) ? high : 0; - - for (i=1; i <=4; i++) - { - bit = (val >> (i-1)) & 1; - if (poly & (1<<i)) /* there is a feedback connection */ - new = new | ((bit ^ high)<<i); - else - new = new | (bit << i); - } - return new; -} + new = (poly & 1) ? high : 0; + for (i = 1; i <= 4; i++) { + bit = (val >> (i - 1)) & 1; + if (poly & (1 << i)) /* there is a feedback connection */ + new = new | ((bit ^ high) << i); + else + new = new | (bit << i); + } + return new; +} /* generate Q matricies for the data */ RF_ua32_t rf_qfor[32]; -void main() +void +main() { - unsigned int i,j,l,a,b; - unsigned int val; - unsigned int r; - unsigned int m,p,q; + unsigned int i, j, l, a, b; + unsigned int val; + unsigned int r; + unsigned int m, p, q; - RF_ua32_t k; + RF_ua32_t k; - printf("/*\n"); - printf(" * rf_invertq.h\n"); - printf(" */\n"); - printf("/*\n"); - printf(" * GENERATED FILE -- DO NOT EDIT\n"); - printf(" */\n"); - printf("\n"); - printf("#ifndef _RF__RF_INVERTQ_H_\n"); - printf("#define _RF__RF_INVERTQ_H_\n"); - printf("\n"); - printf("/*\n"); - printf(" * rf_geniq.c must include rf_archs.h before including\n"); - printf(" * this file (to get VPATH magic right with the way we\n"); - printf(" * generate this file in kernel trees)\n"); - printf(" */\n"); - printf("/* #include \"rf_archs.h\" */\n"); - printf("\n"); - printf("#if (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0)\n"); - printf("\n"); - printf("#define RF_Q_COLS 32\n"); - printf("RF_ua32_t rf_rn = {\n"); - k[0] = 1; - for (j=0 ; j < 31; j++) - k[j+1] = lsfr_shift(k[j],5); - for (j=0; j < 32; j++) - printf("%d, ",k[j]); - printf("};\n"); + printf("/*\n"); + printf(" * rf_invertq.h\n"); + printf(" */\n"); + printf("/*\n"); + printf(" * GENERATED FILE -- DO NOT EDIT\n"); + printf(" */\n"); + printf("\n"); + printf("#ifndef _RF__RF_INVERTQ_H_\n"); + printf("#define _RF__RF_INVERTQ_H_\n"); + printf("\n"); + printf("/*\n"); + printf(" * rf_geniq.c must include rf_archs.h before including\n"); + printf(" * this file (to get VPATH magic right with the way we\n"); + printf(" * generate this file in kernel trees)\n"); + printf(" */\n"); + printf("/* #include \"rf_archs.h\" */\n"); + printf("\n"); + printf("#if (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0)\n"); + printf("\n"); + printf("#define RF_Q_COLS 32\n"); + printf("RF_ua32_t rf_rn = {\n"); + k[0] = 1; + for (j = 0; j < 31; j++) + k[j + 1] = lsfr_shift(k[j], 5); + for (j = 0; j < 32; j++) + printf("%d, ", k[j]); + printf("};\n"); - printf("RF_ua32_t rf_qfor[32] = {\n"); - for (i=0; i < 32; i++) - { - printf("/* i = %d */ { 0, ",i); - rf_qfor[i][0] = 0; - for (j=1; j < 32; j++) - { - val = j; - for (l=0; l < i; l++) - val = lsfr_shift(val,5); - rf_qfor[i][j] = val; - printf("%d, ",val); + printf("RF_ua32_t rf_qfor[32] = {\n"); + for (i = 0; i < 32; i++) { + printf("/* i = %d */ { 0, ", i); + rf_qfor[i][0] = 0; + for (j = 1; j < 32; j++) { + val = j; + for (l = 0; l < i; l++) + val = lsfr_shift(val, 5); + rf_qfor[i][j] = val; + printf("%d, ", val); + } + printf("},\n"); } - printf("},\n"); - } - printf("};\n"); - printf("#define RF_Q_DATA_COL(col_num) rf_rn[col_num],rf_qfor[28-(col_num)]\n"); + printf("};\n"); + printf("#define RF_Q_DATA_COL(col_num) rf_rn[col_num],rf_qfor[28-(col_num)]\n"); - /* generate the inverse tables. (i,j,p,q) */ - /* The table just stores a. Get b back from - the parity */ - printf("#ifdef KERNEL\n"); - printf("RF_ua1024_t rf_qinv[1]; /* don't compile monster table into kernel */\n"); - printf("#elif defined(NO_PQ)\n"); - printf("RF_ua1024_t rf_qinv[29*29];\n"); - printf("#else /* !KERNEL && NO_PQ */\n"); - printf("RF_ua1024_t rf_qinv[29*29] = {\n"); - for (i=0; i < 29; i++) - { - for (j =0; j < 29; j++) - { - printf("/* i %d, j %d */{ ",i,j); - if (i==j) - for (l=0; l < 1023; l++) printf("0, "); - else - { - for (p=0; p < 32; p++) - for (q=0; q < 32; q++) - { - /* What are a, b such that - a ^ b = p; and - qfor[(28-i)][a ^ rf_rn[i+1]] ^ qfor[(28-j)][b ^ rf_rn[j+1]] = q. - Solve by guessing a. Then testing. - */ - for ( a =0 ; a < 32; a++ ) - { - b = a ^ p; - if ( (rf_qfor[28-i][a^ k[i+1]] ^ rf_qfor[28-j][b ^ k[j+1]]) == q ) - break; - } - if (a == 32) printf("unable to solve %d %d %d %d\n",i,j,p,q); - printf("%d,",a); - } - } - printf("},\n"); + /* generate the inverse tables. (i,j,p,q) */ + /* The table just stores a. Get b back from the parity */ + printf("#ifdef KERNEL\n"); + printf("RF_ua1024_t rf_qinv[1]; /* don't compile monster table into kernel */\n"); + printf("#elif defined(NO_PQ)\n"); + printf("RF_ua1024_t rf_qinv[29*29];\n"); + printf("#else /* !KERNEL && NO_PQ */\n"); + printf("RF_ua1024_t rf_qinv[29*29] = {\n"); + for (i = 0; i < 29; i++) { + for (j = 0; j < 29; j++) { + printf("/* i %d, j %d */{ ", i, j); + if (i == j) + for (l = 0; l < 1023; l++) + printf("0, "); + else { + for (p = 0; p < 32; p++) + for (q = 0; q < 32; q++) { + /* What are a, b such that a ^ + * b = p; and qfor[(28-i)][a + * ^ rf_rn[i+1]] ^ + * qfor[(28-j)][b ^ + * rf_rn[j+1]] = q. Solve by + * guessing a. Then testing. */ + for (a = 0; a < 32; a++) { + b = a ^ p; + if ((rf_qfor[28 - i][a ^ k[i + 1]] ^ rf_qfor[28 - j][b ^ k[j + 1]]) == q) + break; + } + if (a == 32) + printf("unable to solve %d %d %d %d\n", i, j, p, q); + printf("%d,", a); + } + } + printf("},\n"); + } } - } - printf("};\n"); - printf("\n#endif /* (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */\n\n"); - printf("#endif /* !KERNEL && NO_PQ */\n"); - printf("#endif /* !_RF__RF_INVERTQ_H_ */\n"); - exit(0); + printf("};\n"); + printf("\n#endif /* (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */\n\n"); + printf("#endif /* !KERNEL && NO_PQ */\n"); + printf("#endif /* !_RF__RF_INVERTQ_H_ */\n"); + exit(0); } diff --git a/sys/dev/raidframe/rf_geometry.c b/sys/dev/raidframe/rf_geometry.c deleted file mode 100644 index 15da7cdda8e..00000000000 --- a/sys/dev/raidframe/rf_geometry.c +++ /dev/null @@ -1,891 +0,0 @@ -/* $OpenBSD: rf_geometry.c,v 1.1 1999/01/11 14:29:24 niklas Exp $ */ -/* $NetBSD: rf_geometry.c,v 1.1 1998/11/13 04:20:30 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * Changes: - * 10/24/91 Changes to support disk bus contention model - * (MCH) 1. Added media_done_time param to Access_time() - * - * 08/18/92 Geometry routines have been modified to support zone-bit - * recording. - * (AS) 1. Each routine which originally referenced the variable - * 'disk->geom->sectors_per_track' has been modified, - * since the number of sectors per track varies on disks - * with zone-bit recording. - */ - -/* : - * Log: rf_geometry.c,v - * Revision 1.18 1996/08/11 00:40:57 jimz - * fix up broken comment - * - * Revision 1.17 1996/07/28 20:31:39 jimz - * i386netbsd port - * true/false fixup - * - * Revision 1.16 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.15 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.14 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.13 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.12 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.11 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.10 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.9 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.8 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.7 1995/12/01 18:29:34 root - * added copyright info - * - */ - -#include "rf_types.h" -#include "rf_geometry.h" -#include "rf_raid.h" -#include "rf_general.h" -#include "rf_debugMem.h" - -#define DISK_DB "disk_db" -#define DISK_NAME "HP2247" - -#define ABS_DIFF(a,b) ( ((a)>(b)) ? ((a)-(b)) : ((b)-(a)) ) - -static RF_GeometryList_t *geom_list = (RF_GeometryList_t *) NULL; - -RF_TICS_t rf_globalSpinup = 1.5; - -#define NM_LGTH 80 -#define NM_PATN " %80s" - -static RF_GeometryList_t *Fetch_geometry_db(FILE *fd); -static void Format_disk(RF_DiskState_t *disk, long sectors_per_block); -static long Find_cyl(RF_SectorNum_t block, RF_DiskState_t *disk); -static long Find_track(RF_SectorNum_t block, RF_DiskState_t *disk); -static long Find_phys_sector(RF_SectorNum_t block, RF_DiskState_t *disk); -static RF_TICS_t Delay_to(RF_TICS_t cur_time, RF_SectorNum_t block, - RF_DiskState_t *disk); -static RF_TICS_t Seek_time(long to_cyl, long to_track, long from_cyl, - long from_track, RF_DiskState_t *disk); -static RF_TICS_t Seek(RF_TICS_t cur_time, RF_SectorNum_t block, - RF_DiskState_t *disk, long update); -static RF_TICS_t Rotate(RF_TICS_t cur_time, RF_SectorNum_t block, - RF_DiskState_t *disk, long update); -static RF_TICS_t Seek_Rotate(RF_TICS_t cur_time, RF_SectorNum_t block, - RF_DiskState_t *disk, long update); -static RF_TICS_t GAP(long sec_per_track, RF_DiskState_t *disk); -static RF_TICS_t Block_access_time(RF_TICS_t cur_time, RF_SectorNum_t block, - RF_SectorCount_t numblocks, RF_DiskState_t *disk, long update); -static void Zero_stats(RF_DiskState_t *disk); -static RF_TICS_t Update_stats(RF_TICS_t cur_time, RF_TICS_t seek, RF_TICS_t rotate, - RF_TICS_t transfer, RF_DiskState_t *disk); -static void rf_DiskParam(long numCyls, RF_TICS_t minSeek, RF_TICS_t avgSeek, RF_TICS_t maxSeek, - RF_TICS_t *a, RF_TICS_t *b, RF_TICS_t *c); - -static RF_GeometryList_t *Fetch_geometry_db(fd) - FILE *fd; -{ - long ret, lineno; - char name[NM_LGTH], title[20]; - RF_GeometryList_t * list = (RF_GeometryList_t *) NULL, - ** next_ptr = & list; - - if( RF_MAX_DISKNAME_LEN<NM_LGTH ) RF_PANIC(); - lineno = 0; - while( (ret = fscanf( fd, " %20s", title )) != EOF ) { - float tmp_f1, tmp_f2, tmp_f3, tmp_f4; - float tmp_f5=0.0; - float tmp_f6=0.0; - RF_Geometry_t *g; - long i, x, y, z, num_cylinders; - RF_ZoneList_t ** znext_ptr; - - if( ret == 1 && strncmp( "enddisk", title, 8 ) == 0 ) break; - - RF_Calloc(*next_ptr, 1, sizeof(RF_GeometryList_t), (RF_GeometryList_t *)); - (*next_ptr)->next = (RF_GeometryList_t *) NULL; - RF_Calloc(g, 1, sizeof(RF_Geometry_t), (RF_Geometry_t *)); - (*next_ptr)->disk = g; - next_ptr = &( (*next_ptr)->next ); /*prep for next iteration */ - lineno++; - if (fscanf( fd, NM_PATN, name ) != 1) { - fprintf(stderr,"Disk DB Error: Can't get disk name from disk db\n"); - fprintf(stderr,"lineno=%d\n", lineno); - fprintf(stderr,"name=\"%s\"\n", name); - exit(1); - } - lineno++; - if ( (fscanf(fd, " tracks per cylinder %ld", &(g->tracks_per_cyl)) != 1) || g->tracks_per_cyl <= 0) { - fprintf(stderr,"Disk DB Error: Missing or invalid tracks/cyl for disk %s\n", name); exit(1); - } - lineno++; - if ( (fscanf(fd, " number of disk zones %ld", &(g->num_zones)) != 1) || g->num_zones <= 0) { - fprintf(stderr,"Disk DB Error: Missing or invalid number of zones for disk %s\n", name); exit(1); - } - - - - /* This section of code creates the linked list which - contains the disk's zone information. */ - g->zbr_data = (RF_ZoneList_t *) NULL; - znext_ptr = &(g->zbr_data); - num_cylinders = 0; - - /* This for-loop reads in the cylinder count, the sectors - per track, and track skew for each zone on the disk. */ - for (i=1; i <= g->num_zones; i++) { - lineno++; - if ( (fscanf(fd, " number of cylinders in zone %ld", &x) != 1) || x < 1) { - fprintf(stderr,"Disk DB Error: Zone %ld: Missing or invalid cyls/zone for disk %s\n", i, name); exit(1); - } - lineno++; - if ( (fscanf(fd, " sectors per track in zone %ld", &y) != 1) || y < 1 ) { - fprintf(stderr,"Disk DB Error: Zone %ld: Missing or invalid sectors/track for disk %s\n", i, name); exit(1); - } - lineno++; - if ( (fscanf(fd, " track skew in zone %ld", &z) != 1) || z < 0 ) { - fprintf(stderr,"Disk DB Error: Zone %ld: Missing or invalid track skew for disk %s\n",i, name); exit(1); - } - - RF_Calloc(*znext_ptr, 1, sizeof(RF_ZoneList_t), (RF_ZoneList_t *)); - (*znext_ptr)->next = (RF_ZoneList_t *) NULL; - (*znext_ptr)->zone.num_cylinders = x; - (*znext_ptr)->zone.sec_per_track = y; - (*znext_ptr)->zone.track_skew = z; - (*znext_ptr)->zone.num_sectors = - (*znext_ptr)->zone.num_cylinders * - g->tracks_per_cyl * - (*znext_ptr)->zone.sec_per_track; - znext_ptr = &((*znext_ptr)->next); - num_cylinders = num_cylinders + x; - } /* End of for-loop */ - - lineno++; - if ( (fscanf(fd, " revolution time %f", &tmp_f1) != 1) || tmp_f1 <= 0) { - fprintf(stderr,"Disk DB Error: Missing or invalid revolution time for disk %s\n",name); exit(1); - } - lineno++; - if ( (fscanf(fd, " 1 cylinder seek time %f", &tmp_f2 ) != 1) || tmp_f2 <= 0) { - fprintf(stderr,"Disk DB Error: Missing or invalid 1-cyl seek time for disk %s\n",name); exit(1); - } - lineno++; - if ( (fscanf(fd, " max stroke seek time %f", &tmp_f3) != 1) || tmp_f3 <= 0) { - fprintf(stderr,"Disk DB Error: Missing or invalid max seek time for disk %s\n",name); exit(1); - } - lineno++; - if ( (fscanf(fd, " average seek time %f", &tmp_f4) != 1) || tmp_f4 <= 0) { - fprintf(stderr,"Disk DB Error: Missing or invalid avg seek time for disk %s\n",name); exit(1); - } - lineno++; - if ( (fscanf(fd, " time to sleep %f", &tmp_f5) != 1) || tmp_f4 <= 0) { - fprintf(stderr,"Disk DB Error: Missing or invalid time to sleep for disk %s\n",name); exit(1); - } - lineno++; - if ( (fscanf(fd, " time to spinup %f", &tmp_f6) != 1) || tmp_f4 <= 0) { - fprintf(stderr,"Disk DB Error: Missing or invalid time to sleep for disk %s\n",name); exit(1); - } - strcpy( g->disk_name, name ); - g->revolution_time = tmp_f1; - g->seek_one_cyl = tmp_f2; - g->seek_max_stroke = tmp_f3; - g->seek_avg = tmp_f4; - g->time_to_sleep = tmp_f5; - g->time_to_spinup = tmp_f6; - /* convert disk specs to seek equation coeff */ - rf_DiskParam( num_cylinders, g->seek_one_cyl, - g->seek_avg, g->seek_max_stroke, - &g->seek_sqrt_coeff, &g->seek_linear_coeff, - &g->seek_constant_coeff ); - } - return( list ); -} - -static void Format_disk(disk, sectors_per_block) - RF_DiskState_t *disk; - long sectors_per_block; -{ - long sector_count = 0; - RF_ZoneList_t *z; - - if( disk == (RF_DiskState_t *) NULL ) RF_PANIC(); - if( disk->geom == (RF_Geometry_t *) NULL ) RF_PANIC(); - if( sectors_per_block <=0 ) RF_PANIC(); - - disk->sectors_per_block = sectors_per_block; - z = disk->geom->zbr_data; - /* This while-loop visits each disk zone and computes the total - number of sectors on the disk. */ - while (z != (RF_ZoneList_t *) NULL) { - sector_count = sector_count + (z->zone.num_cylinders * - disk->geom->tracks_per_cyl * - z->zone.sec_per_track); - z = z->next; - } - - disk->last_block_index = (sector_count / sectors_per_block) - 1; -} - -void rf_InitDisk( disk, disk_db, disk_name, init_cyl, init_track, init_offset, row, col) - RF_DiskState_t *disk; - char *disk_db; - char *disk_name; - long init_cyl; - long init_track; - RF_TICS_t init_offset; - int row; - int col; -{ - RF_GeometryList_t *gp; - FILE *f; - - RF_ASSERT( disk != (RF_DiskState_t *) NULL ); - - disk->cur_cyl = init_cyl; - disk->cur_track = init_track; - disk->index_offset = init_offset; - disk->geom = (RF_Geometry_t *) NULL; - disk->queueFinishTime = 0.0; - disk->lastBlock = 0; - disk->row=row; - disk->col=col; - Zero_stats(disk); - - if (strncmp(disk_name,"/dev",4 )==0) strcpy(disk_name,"HP2247"); - - if( geom_list == (RF_GeometryList_t *) NULL ) { - f = fopen(disk_db,"r"); - if (f == NULL) { - fprintf(stderr, "ERROR: RAIDframe could not open disk db %s\n", disk_db); - exit(1); - } - geom_list = Fetch_geometry_db( f ); - fclose( f ); - } - for( gp = geom_list; gp != (RF_GeometryList_t *) NULL; gp = gp->next ) { - RF_ASSERT( gp->disk != (RF_Geometry_t *) NULL - && gp->disk->disk_name != (char *) NULL ); - if( strncmp( disk_name, gp->disk->disk_name, RF_MAX_DISKNAME_LEN ) - == 0 ) { - disk->geom = gp->disk; - break; - } - } - if( disk->geom == (RF_Geometry_t *) NULL ) { - fprintf( stderr, "Disk %s not found in database %s\n", - disk_name, disk_db ); - exit(1); - } - - Format_disk( disk, 1 ); -} - -static long Find_cyl( block, disk ) - RF_SectorNum_t block; - RF_DiskState_t *disk; -{ - RF_ZoneList_t * z; - long tmp; - - long log_sector = block * disk->sectors_per_block; - long cylinder = 0; - z = disk->geom->zbr_data; - /* This while-loop finds the zone to which log_sector belongs, - computes the starting cylinder number of this zone, and - computes the sector offset into this zone. */ - while (log_sector >= z->zone.num_sectors) { - log_sector = log_sector - z->zone.num_sectors; - cylinder = cylinder + z->zone.num_cylinders; - z = z->next; - } - - /* The cylinder to which log_sector belongs equals the starting - cylinder number of its zone plus the cylinder offset into - the zone. */ - tmp = cylinder + (log_sector / (z->zone.sec_per_track * - disk->geom->tracks_per_cyl)); - - return( tmp ); -} - -static long Find_track( block, disk ) - RF_SectorNum_t block; - RF_DiskState_t *disk; -{ - RF_ZoneList_t * z; - long tmp; - - long log_sector = block * disk->sectors_per_block; - long track = 0; - z = disk->geom->zbr_data; - /* This while-loop finds the zone to which log_sector belongs, - computes the starting track number of this zone, and computes - the sector offset into this zone. */ - while (log_sector >= z->zone.num_sectors) { - log_sector = log_sector - z->zone.num_sectors; - track = track + (z->zone.num_cylinders * - disk->geom->tracks_per_cyl); - z = z->next; - } - - /* The track to which log_sector belongs equals the starting - track number of its zone plus the track offset into the zone, - modulo the number of tracks per cylinder on the disk. */ - tmp = (track + (log_sector / z->zone.sec_per_track)) % - disk->geom->tracks_per_cyl; - - return( tmp ); -} - -/* - ** The position of a logical sector relative to the index mark on any track - ** is not simple. A simple organization would be: -** -** track 0 : 0, 1, 2, 3, ... N-1 -** track 1 : N,N+1,N+2,N+3, ... 2N-1 -** ^ -** Index mark just before this point -** -** This is not good because sequential access of sectors N-1 then N -** will require a full revolution in between (because track switch requires -** a couple of sectors to recalibrate from embedded servo). So frequently -** sequentially numbered sectors are physically skewed so that the next -** accessible sector after N-1 will be N (with a skew of 2) -** -** track 0 : 0, 1, 2, 3, ... N-1 -** track 1 : 2N-2,2N-1, N, N+1, ... 2N-3 -** ^ -** Index mark just before this point -** -** Layout gets even more complex with cylinder boundaries. Seek time -** is A + B*M where M is the number of cylinders to seek over. On a sequential -** access that crosses a cylinder boundary, the disk will rotate for -** A+B seconds, then "track skew" sectors (inter-sector gaps actually) -** before it can access another sector, so the cylinder to cylinder skew -** is "track skew" + CEIL( sectors_per_track*(A+B)/revolution_time ). -** -** So if sector 0 is 0 sectors from the index mark on the first track, -** where is sector X relative to the index mark on its track? -** -** ( ( X % sectors_per_track ) basic relative position ** -** + track_skew * ( X / sectors_per_track ) skewed for each track ** -** + CEIL( sectors_per_track*(A+B)/revolution_time ) -** * ( X / sectors_per_cylinder ) skewed more for each cyl ** -** ) % sectors_per_track wrapped around in the track ** -** -** -*/ - -static long Find_phys_sector(block, disk) - RF_SectorNum_t block; - RF_DiskState_t *disk; -{ - long phys = 0; - RF_ZoneList_t * z; - long previous_spt = 1; - long sector = block * disk->sectors_per_block; - - z = disk->geom->zbr_data; - /* This while-loop finds the zone to which sector belongs, - and computes the physical sector up to that zone. */ - while (sector >= z->zone.num_sectors) { - sector = sector - z->zone.num_sectors; - /* By first multiplying 'phys' by the sectors per track in - the current zone divided by the sectors per track in the - previous zone, we convert a given physical sector in one - zone to an equivalent physical sector in another zone. */ - phys = ((phys * z->zone.sec_per_track / previous_spt) + - (((z->zone.num_sectors - 1) % z->zone.sec_per_track) + - (z->zone.track_skew * z->zone.num_cylinders * - disk->geom->tracks_per_cyl) + - (long) ceil( (double) z->zone.sec_per_track * - (disk->geom->seek_constant_coeff) / - disk->geom->revolution_time) * - z->zone.num_cylinders)) % - z->zone.sec_per_track; - previous_spt = z->zone.sec_per_track; - z = z->next; - } - - /* The final physical sector equals the physical sector up to - the particular zone, plus the physical sector caused by the - sector offset into this zone. */ - phys = ((phys * z->zone.sec_per_track / previous_spt) + - ((sector % z->zone.sec_per_track) + - (z->zone.track_skew * (sector / z->zone.sec_per_track)) + - (long) ceil( (RF_TICS_t) z->zone.sec_per_track * - (disk->geom->seek_constant_coeff) / - disk->geom->revolution_time) * - (sector / (z->zone.sec_per_track * - disk->geom->tracks_per_cyl)))) % - z->zone.sec_per_track; - - - return( phys ); -} - -/* - ** When each disk starts up, its index mark is a fraction (f) of a rotation - ** ahead from its heads (in the direction of rotation). The sector - ** under its heads is at a fraction f of a rotation from the index - ** mark. After T time has past, T/rotation_time revolutions have occured, so - ** the sector under the heads is at a fraction FRAC(f+T/rotation_time) of a - ** rotation from the index mark. If the target block is at physical sector - ** X relative to its index mark, then it is at fraction (X/sectors_per_track), - ** so the rotational delay is - ** ((X/sectors_per_track)-FRAC(f+T/rotation_time)) * revolution_time - ** if this is positive, otherwise it is - ** (1+(X/sectors_per_track)-FRAC(f+T/rotation_time)) * revolution_time - */ - -#define FRAC(a) ( (a) - (long) floor(a) ) - -static RF_TICS_t Delay_to(cur_time, block, disk) - RF_TICS_t cur_time; - RF_SectorNum_t block; - RF_DiskState_t *disk; -{ - RF_TICS_t tmp; - RF_ZoneList_t *z; - - long sector = block * disk->sectors_per_block; - z = disk->geom->zbr_data; - /* This while-loop finds the zone to which sector belongs. */ - while (sector >= z->zone.num_sectors) { - sector = sector - z->zone.num_sectors; - z = z->next; - } - - tmp = ( - (RF_TICS_t) Find_phys_sector(block,disk)/z->zone.sec_per_track - - FRAC(disk->index_offset+cur_time/disk->geom->revolution_time) - ) * disk->geom->revolution_time; - if( tmp < 0 ) tmp += disk->geom->revolution_time; - if( tmp < 0 ) RF_PANIC(); - return( tmp ); -} - -/* Hmmm...they seem to be computing the head switch time as - * equal to the track skew penalty. Is this an approximation? - * (MCH) - */ -static RF_TICS_t Seek_time( to_cyl, to_track, from_cyl, from_track, disk ) - long to_cyl; - long to_track; - long from_cyl; - long from_track; - RF_DiskState_t *disk; -{ - long cyls = ABS_DIFF( from_cyl, to_cyl ) - 1; - RF_TICS_t seek = 0.0; - RF_ZoneList_t * z; - - /* printf("Seek_time: from_cyl %ld, to_cyl %ld, from_trk %ld, to_trk %ld\n",from_cyl, to_cyl, from_track, to_track); */ - if( from_cyl != to_cyl ) { - z = disk->geom->zbr_data; - /* This while-loop finds the zone to which to_cyl belongs. */ - while (to_cyl >= z->zone.num_cylinders) { - to_cyl = to_cyl - z->zone.num_cylinders; - z = z->next; - } - - seek = disk->geom->seek_constant_coeff - + disk->geom->seek_linear_coeff * cyls - + disk->geom->seek_sqrt_coeff * sqrt( (double) cyls ) - + z->zone.track_skew * disk->geom->revolution_time / - z->zone.sec_per_track; - - } else if( from_track != to_track ) { - /* from_track and to_track must lie in the same zone. */ - z = disk->geom->zbr_data; - /* This while-loop finds the zone to which from_cyl belongs. */ - while (from_cyl >= z->zone.num_cylinders) { - from_cyl = from_cyl - z->zone.num_cylinders; - z = z->next; - } - - seek = z->zone.track_skew - * disk->geom->revolution_time - / z->zone.sec_per_track; - } - return( seek ); -} - -static RF_TICS_t Seek(cur_time, block, disk, update) - RF_TICS_t cur_time; - RF_SectorNum_t block; - RF_DiskState_t *disk; - long update; -{ - long cur_cyl, cur_track; - /* - ** current location is derived from the time, - ** current track and current cylinder - ** - ** update current location as you go - */ - - RF_ASSERT( block <= disk->last_block_index ); - cur_cyl = disk->cur_cyl; - cur_track = disk->cur_track; - if (update) { - disk->cur_cyl = Find_cyl( block, disk ); - disk->cur_track = Find_track( block, disk ); - } - return( Seek_time( disk->cur_cyl, disk->cur_track, - cur_cyl, cur_track, disk ) ); -} - -static RF_TICS_t Rotate(cur_time, block, disk, update) - RF_TICS_t cur_time; - RF_SectorNum_t block; - RF_DiskState_t *disk; - long update; -{ - /* - ** current location is derived from the time, - ** current track and current cylinder - ** - ** block the process until at the appropriate block - ** updating current location as you go - */ - - RF_ASSERT( block <= disk->last_block_index ); - return( Delay_to( cur_time, block, disk ) ); -} - -static RF_TICS_t Seek_Rotate(cur_time, block, disk, update) - RF_TICS_t cur_time; - RF_SectorNum_t block; - RF_DiskState_t *disk; - long update; -{ - RF_TICS_t seek, delay; - - RF_ASSERT( block <= disk->last_block_index ); - seek = Seek( cur_time, block, disk, update ); - delay = seek + Rotate( cur_time+seek, block, disk, update ); - return( delay ); -} - -static RF_TICS_t GAP(sec_per_track, disk) - long sec_per_track; - RF_DiskState_t *disk; -{ - RF_TICS_t tmp = (disk->geom->revolution_time/(100*sec_per_track)); - return (tmp); -} - -RF_TICS_t Block_access_time(cur_time, block, numblocks, disk, update) - RF_TICS_t cur_time; - RF_SectorNum_t block; - RF_SectorCount_t numblocks; - RF_DiskState_t *disk; - long update; -{ - RF_TICS_t delay = 0; - long cur = block, end = block + numblocks; - long sector, tmp; - RF_ZoneList_t * z; - /* - ** this is the same as Seek_Rotate by merit of the mapping - ** except that the access ends before the gap to the next block - */ - RF_ASSERT( numblocks > 0 && end-1 <= disk->last_block_index ); - - while( cur < end ) { - sector = cur * disk->sectors_per_block; - z = disk->geom->zbr_data; - /* This while-loop finds the zone to which sector belongs. */ - while (sector >= z->zone.num_sectors) { - sector = sector - z->zone.num_sectors; - z = z->next; - } - - tmp = RF_MIN( end - cur, z->zone.sec_per_track - - cur % z->zone.sec_per_track ); - delay += tmp * disk->geom->revolution_time / - z->zone.sec_per_track - - GAP(z->zone.sec_per_track, disk); - cur += tmp; - if( cur != end ) - delay += Seek_Rotate( cur_time+delay, cur, disk, update ); - } - return( delay ); -} - -static void Zero_stats(disk) - RF_DiskState_t *disk; -{ - char traceFileName[64]; - disk->stats.num_events = 0; - disk->stats.seek_sum = 0; - disk->stats.seekSq_sum = 0; - disk->stats.rotate_sum = 0; - disk->stats.rotateSq_sum = 0; - disk->stats.transfer_sum = 0; - disk->stats.transferSq_sum = 0; - disk->stats.access_sum = 0; - disk->stats.accessSq_sum = 0; - disk->stats.sleep_sum=0; - disk->stats.idle_sum=0; - disk->stats.rw_sum=0; - disk->stats.spinup_sum=0; - disk->stats.last_acc=0; - if (rf_diskTrace){ - sprintf (traceFileName,"rf_diskTracer%dc%d\0",disk->row,disk->col); - if ( (disk->traceFile= fopen(traceFileName, "w")) == NULL) { - perror(traceFileName); RF_PANIC();} - } -} - -static RF_TICS_t Update_stats(cur_time, seek, rotate, transfer, disk) - RF_TICS_t cur_time; - RF_TICS_t seek; - RF_TICS_t rotate; - RF_TICS_t transfer; - RF_DiskState_t *disk; -{ - RF_TICS_t spinup=0; - RF_TICS_t sleep=0; - RF_TICS_t idle=0; - - disk->stats.num_events++; - disk->stats.seek_sum += seek; - disk->stats.seekSq_sum += seek*seek; - disk->stats.rotate_sum += rotate; - disk->stats.rotateSq_sum += rotate*rotate; - disk->stats.transfer_sum += transfer; - disk->stats.transferSq_sum += transfer*transfer; - disk->stats.access_sum += seek+rotate+transfer; - disk->stats.accessSq_sum += - (seek+rotate+transfer)*(seek+rotate+transfer); - -/* ASSERT (cur_time - disk->stats.last_acc >= 0); */ - - if (cur_time-disk->stats.last_acc>disk->geom->time_to_sleep){ - idle=disk->geom->time_to_sleep; - - sleep = cur_time - disk->stats.last_acc - idle; - spinup=disk->geom->time_to_spinup; - rf_globalSpinup = spinup; - } - - else{ - idle=cur_time - disk->stats.last_acc; - } - - - disk->stats.sleep_sum+=sleep; - disk->stats.idle_sum+=idle; - disk->stats.rw_sum+=seek+rotate+transfer; - disk->stats.spinup_sum+=spinup; - - if (rf_diskTrace){ - fprintf(disk->traceFile,"%g %g\n",disk->stats.last_acc,2.0); - fprintf(disk->traceFile,"%g %g\n",(disk->stats.last_acc+idle),2.0); - if (sleep){ - fprintf(disk->traceFile,"%g %g\n",(disk->stats.last_acc+idle),1.0); - fprintf(disk->traceFile,"%g %g\n",(disk->stats.last_acc+idle+sleep),1.0); - } - - if (spinup){ - fprintf(disk->traceFile,"%g %g\n",(cur_time),4.0); - fprintf(disk->traceFile,"%g %g\n",(cur_time+spinup),4.0); - } - - fprintf(disk->traceFile,"%g %g\n",(cur_time+spinup),3.0); - fprintf(disk->traceFile,"%g %g\n",(cur_time+spinup+seek+rotate+transfer),3.0); - - - } - - disk->stats.last_acc=cur_time+spinup+seek+rotate+transfer; - - return(spinup); -} - - -void rf_StopStats(disk, cur_time) - RF_DiskState_t *disk; - RF_TICS_t cur_time; -{ - - RF_TICS_t sleep=0; - RF_TICS_t idle=0; - - if (cur_time - disk->stats.last_acc > disk->geom->time_to_sleep){ - - sleep = cur_time - disk->stats.last_acc-disk->geom->time_to_sleep; - idle = disk->geom->time_to_sleep; - - } - - - - else{ - idle=cur_time - disk->stats.last_acc; - } - - disk->stats.sleep_sum+=sleep; - disk->stats.idle_sum+=idle; - - if (rf_diskTrace){ - fprintf(disk->traceFile,"%g %g\n",disk->stats.last_acc,2.0); - fprintf(disk->traceFile,"%g %g\n",(disk->stats.last_acc+idle),2.0); - if (sleep){ - fprintf(disk->traceFile,"%g %g\n",(disk->stats.last_acc+idle),1.0); - fprintf(disk->traceFile,"%g %g\n",(disk->stats.last_acc+idle+sleep),1.0); - } - fclose(disk->traceFile); - } -} - -/* Sometimes num_events is zero because the disk was failed at the start - * of the simulation and never replaced. This causes a crash on some - * architectures, which is why we have the conditional. - */ -void rf_Report_stats( - RF_DiskState_t *disk, - long *numEventsPtr, - RF_TICS_t *avgSeekPtr, - RF_TICS_t *avgRotatePtr, - RF_TICS_t *avgTransferPtr, - RF_TICS_t *avgAccessPtr, - RF_TICS_t *SleepPtr, - RF_TICS_t *IdlePtr, - RF_TICS_t *RwPtr, - RF_TICS_t *SpinupPtr) -{ - *numEventsPtr = disk->stats.num_events; - if (disk->stats.num_events) { - *avgSeekPtr = disk->stats.seek_sum / disk->stats.num_events; - *avgRotatePtr = disk->stats.rotate_sum / disk->stats.num_events; - *avgTransferPtr = disk->stats.transfer_sum / disk->stats.num_events; - *avgAccessPtr = disk->stats.access_sum / disk->stats.num_events; - } else { - *avgSeekPtr = 0; - *avgRotatePtr = 0; - *avgTransferPtr = 0; - *avgAccessPtr = 0; - } - *SleepPtr = disk->stats.sleep_sum; - *IdlePtr = disk->stats.idle_sum; - *RwPtr = disk->stats.rw_sum ; - *SpinupPtr = disk->stats.spinup_sum ; -} - -int rf_Access_time( access_time, cur_time, block, numblocks, disk, media_done_time, update ) - RF_TICS_t *access_time; - RF_TICS_t cur_time; - RF_SectorNum_t block; - RF_SectorCount_t numblocks; - RF_DiskState_t *disk; - RF_TICS_t *media_done_time; - long update; /* 1 => update disk state, 0 => don't */ -{ - /* - * first move to the start of the data, then sweep to the end - */ - RF_TICS_t spinup=0; - RF_TICS_t seek = Seek( cur_time, block, disk, update ); - RF_TICS_t rotate = Rotate( cur_time+seek, block, disk, update ); - RF_TICS_t transfer = Block_access_time( cur_time+seek+rotate, block, - numblocks, disk, update ); - - if (update) spinup=Update_stats(cur_time, seek, rotate, transfer, disk ); - *media_done_time = seek+rotate+transfer; - *access_time =( seek+rotate+transfer+spinup); - return(0); -} - -/* added to take into account the fact that maping code acounts for the disk label */ - -void rf_GeometryDoReadCapacity(disk, numBlocks, blockSize) - RF_DiskState_t *disk; - RF_SectorCount_t *numBlocks; - int *blockSize; -{ - *numBlocks= (disk->last_block_index + 1 )-rf_protectedSectors; - - *blockSize= (disk->sectors_per_block*512 ); - - /* in bytes */ -} - - -/* END GEOMETRY ROUTINES **********************************************/ - - -static void rf_DiskParam(numCyls, minSeek, avgSeek, maxSeek, a, b, c) - long numCyls; - RF_TICS_t minSeek; - RF_TICS_t avgSeek; - RF_TICS_t maxSeek; - RF_TICS_t *a; - RF_TICS_t *b; - RF_TICS_t *c; -{ - if (minSeek == avgSeek && minSeek == maxSeek) { - *a = 0.0; *b = 0.0; *c = minSeek; - } else { - *a = ( 15 * avgSeek - 10 * minSeek - 5 * maxSeek ) / ( 3 * sqrt( (double) numCyls )); - *b = ( 7 * minSeek + 8 * maxSeek - 15 * avgSeek ) / ( 3 * numCyls ); - *c = minSeek; - } -} diff --git a/sys/dev/raidframe/rf_geometry.h b/sys/dev/raidframe/rf_geometry.h deleted file mode 100644 index 3d77b1ea402..00000000000 --- a/sys/dev/raidframe/rf_geometry.h +++ /dev/null @@ -1,155 +0,0 @@ -/* $OpenBSD: rf_geometry.h,v 1.1 1999/01/11 14:29:24 niklas Exp $ */ -/* $NetBSD: rf_geometry.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */ -/* geometry.h - * code from raidSim to model disk behavior - */ -/* - * Changes: - * 8/18/92 Additional structures have been declared and existing - * structures have been modified in order to support zone- - * bit recording. - * (AS) 1. The types 'Zone_data' and 'Zone_list' have been defined. - * (AS) 2. The type 'Geometry' has been modified. - */ - -/* : - * Log: rf_geometry.h,v - * Revision 1.10 1996/08/06 22:25:08 jimz - * include raidframe stuff before system stuff - * - * Revision 1.9 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.8 1996/05/31 10:16:14 jimz - * add raidsim note - * - * Revision 1.7 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.6 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.5 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.4 1995/12/01 18:29:45 root - * added copyright info - * - */ - -#ifndef _RF__RF_GEOMETRY_H_ -#define _RF__RF_GEOMETRY_H_ - -#include "rf_types.h" -#include "rf_sys.h" -#ifndef _KERNEL -#include <string.h> -#include <math.h> -#if defined(__NetBSD__) || defined(__OpenBSD__) -#include <stdio.h> -#endif /* __NetBSD__ || __OpenBSD__ */ -#endif - -#define RF_MAX_DISKNAME_LEN 80 - -typedef struct RF_ZoneData_s { - long num_cylinders; /* Number of cylinders in zone */ - long sec_per_track; /* Sectors per track in zone */ - long track_skew; /* Skew of each track in zone */ - long num_sectors; /* Number of sectors in zone */ -} RF_ZoneData_t; - -/* - * Linked list containing zone data - */ -typedef struct RF_ZoneList_s RF_ZoneList_t; -struct RF_ZoneList_s { - RF_ZoneData_t zone; /* for each disk */ - RF_ZoneList_t *next; -}; - -typedef struct RF_Geometry_s { - char disk_name[RF_MAX_DISKNAME_LEN]; /* name for a type of disk */ - long tracks_per_cyl; /* tracks in a cylinder */ - /* assume 1 head per track, 1 set of read/write electronics */ - long num_zones; /* number of ZBR zones on disk */ - RF_TICS_t revolution_time; /* milliseconds per revolution */ - RF_TICS_t seek_one_cyl; /* adjacent cylinder seek time */ - RF_TICS_t seek_max_stroke; /* end to end seek time */ - RF_TICS_t seek_avg; /* random from/to average time */ - /* - * seek time = a * (x-1)^0.5 + b * (x-1) + c - * x >= 1 is the seek distance in cylinders - */ - RF_TICS_t seek_sqrt_coeff; /* a */ - RF_TICS_t seek_linear_coeff; /* b */ - RF_TICS_t seek_constant_coeff; /* c */ - RF_ZoneList_t *zbr_data; /* linked list with ZBR data */ - RF_TICS_t time_to_sleep; /* seconds of idle time before disks goes to sleep */ - RF_TICS_t time_to_spinup; /* seconds spin up takes */ -} RF_Geometry_t; - -typedef struct RF_GeometryList_s RF_GeometryList_t; -struct RF_GeometryList_s { - RF_Geometry_t *disk; - RF_GeometryList_t *next; -}; - -typedef struct RF_DiskStats_s { - long num_events; - RF_TICS_t seek_sum; - RF_TICS_t seekSq_sum; - RF_TICS_t rotate_sum; - RF_TICS_t rotateSq_sum; - RF_TICS_t transfer_sum; - RF_TICS_t transferSq_sum; - RF_TICS_t access_sum; - RF_TICS_t accessSq_sum; - RF_TICS_t sleep_sum; - RF_TICS_t idle_sum; - RF_TICS_t rw_sum; - RF_TICS_t spinup_sum; - RF_TICS_t last_acc; /* time the last acces was finished */ -} RF_DiskStats_t; - -struct RF_DiskState_s { - int row; - int col; - RF_Geometry_t *geom; - long sectors_per_block; /* formatted per disk */ - long last_block_index; /* format result for convenience */ - RF_TICS_t index_offset; /* powerup head offset to index mark */ - long cur_track; /* current track */ - long cur_cyl; /* current cylinder */ - RF_DiskStats_t stats; /* disk statistics */ - - RF_TICS_t queueFinishTime; /* used by shortest-seek code */ - long lastBlock; - FILE *traceFile; -}; -typedef struct RF_DiskState_s RF_DiskState_t; - -extern RF_TICS_t rf_globalSpinup; - -void rf_InitDisk(RF_DiskState_t *disk, char *disk_name, char *disk_db, long init_cyl, - long init_track, RF_TICS_t init_offset, int row, int col); -void rf_StopStats(RF_DiskState_t *disk, RF_TICS_t cur_time); -void rf_Report_stats(RF_DiskState_t *disk, long *numEventsPtr, RF_TICS_t *avgSeekPtr, - RF_TICS_t *avgRotatePtr, RF_TICS_t *avgTransferPtr, RF_TICS_t *avgAccessPtr, - RF_TICS_t *SleepPtr, RF_TICS_t *IdlePtr, RF_TICS_t *RwPtr, RF_TICS_t *SpinupPtr); -int rf_Access_time(RF_TICS_t *access_time, RF_TICS_t cur_time, - RF_SectorNum_t block, RF_SectorCount_t numblocks, RF_DiskState_t *disk, - RF_TICS_t *media_done_time, long update); -void rf_GeometryDoReadCapacity(RF_DiskState_t *disk, RF_SectorCount_t *numBlocks, - int *blockSize); - -#endif /* !_RF__RF_GEOMETRY_H_ */ diff --git a/sys/dev/raidframe/rf_heap.c b/sys/dev/raidframe/rf_heap.c deleted file mode 100644 index ecb7a14518d..00000000000 --- a/sys/dev/raidframe/rf_heap.c +++ /dev/null @@ -1,274 +0,0 @@ -/* $OpenBSD: rf_heap.c,v 1.1 1999/01/11 14:29:25 niklas Exp $ */ -/* $NetBSD: rf_heap.c,v 1.1 1998/11/13 04:20:30 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* We manage a heap of data,key pairs, where the key a simple data type - * and the data is any singular data type. We allow the caller to add - * pairs, remote pairs, peek at the top pair, and do delete/add combinations. - * The latter are efficient because we only reheap once. - * - * David Kotz 1990? and 1993 - * - * Modify the heap to work with events, with the smallest time on the top. - * Song Bac Toh, 1994 - */ - -/* : - * Log: rf_heap.c,v - * Revision 1.8 1996/07/28 20:31:39 jimz - * i386netbsd port - * true/false fixup - * - * Revision 1.7 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.6 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.5 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.4 1995/12/01 19:03:58 root - * added copyright info - * - */ - -#include "rf_types.h" -#include "rf_heap.h" -#include "rf_general.h" - -/* return RF_TRUE if the two requests in the heap match */ -#define Matching_REQUESTS(HeapData1, HeapData2) \ -((HeapData1->disk == HeapData2->disk) && \ - (HeapData1->req_code == HeapData2->req_code)) - -/* getting around in the heap */ -/* we don't use the 0th element of the array */ -#define ROOT 1 -#define LCHILD(p) (2 * (p)) -#define RCHILD(p) (2 * (p) + 1) -#define PARENT(c) ((c) / 2) - -/* @SUBTITLE "Debugging macros" */ -/* The following are used for debugging our callers - * as well as internal stuff - */ - -#define CHECK_INVARIANTS 1 - -#ifdef CHECK_INVARIANTS -#define INVARIANT2(x, y) \ -{ \ - if (!(x)) { \ - fprintf(stderr, "INVARIANT false: in \"%s\", line %d\n", \ - __FILE__, __LINE__); \ - fprintf(stderr, (y)); \ - exit(1); \ - } \ -} - -/* -#define INVARIANT3(x, y, z) \ - { \ - if (!(x)) { \ - fprintf(stderr, "INVARIANT false: in \"%s\", line %d\n", \ - __FILE__, __LINE__); \ - fprintf(stderr, (y), (z)); \ - exit(1); \ - } \ - } - */ -#else /* CHECK_INVARIANTS */ -/* #define INVARIANT2(x, y) */ -/* #define INVARIANT3(x, y, z) already defined in modularize.h */ -#endif /* CHECK_INVARIANTS */ - -/**** Rachad, must add to general debug structure */ - - -/* @SUBTITLE "InitHeap: Allocate a new heap" */ -/* might return NULL if no free memory */ -RF_Heap_t rf_InitHeap(int maxsize) -{ - RF_Heap_t hp; - - RF_ASSERT(maxsize > 0); - RF_Malloc(hp, sizeof(struct RF_Heap_s),(RF_Heap_t)); - if (hp == NULL) { - fprintf(stderr, "InitHeap: No memory for heap\n"); - return(NULL); - } - - RF_Malloc(hp->heap,sizeof(RF_HeapEntry_t)*(maxsize+1),(RF_HeapEntry_t *)); - if (hp->heap == NULL) { - fprintf(stderr, "InitHeap: No memory for heap of %d elements\n", - maxsize); - RF_Free(hp,-1); /* -1 means don't cause an error if the size does not match */ - return(NULL); - } - - hp->numheap = 0; - hp->maxsize = maxsize; - - return(hp); -} - -/* @SUBTITLE "FreeHeap: delete a heap" */ -void rf_FreeHeap(RF_Heap_t hp) -{ - if (hp != NULL) { - RF_Free(hp->heap,sizeof(RF_HeapEntry_t)*(hp->maxsize+1)); - RF_Free(hp,sizeof(struct RF_Heap_s)); - } -} - -/* @SUBTITLE "AddHeap: Add an element to the heap" */ -void rf_AddHeap(RF_Heap_t hp, RF_HeapData_t *data, RF_HeapKey_t key) -{ - int node; - - INVARIANT2(hp != NULL, "AddHeap: NULL heap\n"); - INVARIANT2((hp->numheap < RF_HEAP_MAX), "AddHeap: Heap overflowed\n"); - - /* use new space end of heap */ - node = ++(hp->numheap); - - /* and reheap */ - while (node != ROOT && hp->heap[PARENT(node)].key > key) { - hp->heap[node] = hp->heap[PARENT(node)]; - node = PARENT(node); - } - - hp->heap[node].data = data; - hp->heap[node].key = key; -} - -/* @SUBTITLE "TopHeap: Return top element of heap" */ -int rf_TopHeap(RF_Heap_t hp, RF_HeapData_t **data, RF_HeapKey_t *key) -{ - INVARIANT2(hp != NULL, "TopHeap: NULL heap\n"); - - if (hp->numheap > 0) { - if (data) - *data = hp->heap[ROOT].data; - if (key) - *key = hp->heap[ROOT].key; - return(RF_HEAP_FOUND); - } - else { - return(RF_HEAP_NONE); - } -} - -/* @SUBTITLE "RepHeap: Replace top of heap with given element and reheap" */ -/* note that hp->numheap does not change, and should already be > 0 */ -void rf_RepHeap(RF_Heap_t hp, RF_HeapData_t *data, RF_HeapKey_t key) -{ - int node; /* node in heap */ - int lchild, rchild; /* left and right children of node */ - int left, right; /* left and right children exist? */ - int swapped; /* swap was made? */ - RF_HeapEntry_t *heap; /* pointer to the base of this heap array */ - - INVARIANT2(hp != NULL, "RepHeap: NULL heap\n"); - - /* If heap is empty just add this element */ - /* if used properly this case should never come up */ - if (hp->numheap == 0) { - rf_AddHeap(hp, data, key); - - return; - } - - heap = hp->heap; /* cache the heap base pointer */ - - node = ROOT; - - do { - lchild = LCHILD(node); - rchild = RCHILD(node); - left = (lchild <= hp->numheap); - right = (rchild <= hp->numheap); - - /* Both children exist: which is smaller? */ - if (left && right) - if (heap[lchild].key < heap[rchild].key) - right = RF_HEAP_NONE; - else - left = RF_HEAP_NONE; - - /* Now only one of left and right is true. compare it with us */ - if (left && heap[lchild].key < key) { - /* swap with left child */ - heap[node] = heap[lchild]; - node = lchild; - swapped = RF_HEAP_FOUND; - } else if (right && heap[rchild].key < key) { - /* swap with right child */ - heap[node] = heap[rchild]; - node = rchild; - swapped = RF_HEAP_FOUND; - } else - swapped = RF_HEAP_NONE; - } while (swapped); - - /* final resting place for new element */ - heap[node].key = key; - heap[node].data = data; -} - -/* @SUBTITLE "RemHeap: Remove top element and reheap" */ -int rf_RemHeap(RF_Heap_t hp, RF_HeapData_t **data, RF_HeapKey_t *key) -{ - int node; - - /* we don't check hp's validity because TopHeap will do it for us */ - - /* get the top element into data and key, if any */ - if (rf_TopHeap(hp, data, key)) { - /* there was something there, so replace top with last element */ - node = hp->numheap--; - if (hp->numheap > 0) - rf_RepHeap(hp, hp->heap[node].data, hp->heap[node].key); - - return(RF_HEAP_FOUND); - } else{ - return(RF_HEAP_NONE); - } -} - diff --git a/sys/dev/raidframe/rf_heap.h b/sys/dev/raidframe/rf_heap.h deleted file mode 100644 index bf8f8cfdaf9..00000000000 --- a/sys/dev/raidframe/rf_heap.h +++ /dev/null @@ -1,128 +0,0 @@ -/* $OpenBSD: rf_heap.h,v 1.1 1999/01/11 14:29:25 niklas Exp $ */ -/* $NetBSD: rf_heap.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* @TITLE "heap.h - interface to heap management implementation */ -/* We manage a heap of data,key pairs, where the key could be any - * simple data type - * and the data is any pointer data type. We allow the caller to add - * pairs, remote pairs, peek at the top pair, and do delete/add combinations. - * The latter are efficient because we only reheap once. - * - * David Kotz 1990? and 1993 - */ - -/* : - * Log: rf_heap.h,v - * Revision 1.8 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.7 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.6 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.5 1995/12/01 19:04:07 root - * added copyright info - * - */ - -#ifndef _RF__RF_HEAP_H_ -#define _RF__RF_HEAP_H_ - -#include "rf_types.h" -#include "rf_raid.h" -#include "rf_dag.h" -#include "rf_desc.h" - -#define RF_HEAP_MAX 10240 - -#define RF_HEAP_FOUND 1 -#define RF_HEAP_NONE 0 - -typedef RF_TICS_t RF_HeapKey_t; - -typedef struct RF_HeapData_s RF_HeapData_t; -typedef struct RF_Heap_s *RF_Heap_t; -typedef struct RF_HeapEntry_s RF_HeapEntry_t; - -/* heap data */ -struct RF_HeapData_s { - RF_TICS_t eventTime; - int disk; - int (*CompleteFunc)(); /* function to be called upon completion */ - void *argument; /* argument to be passed to CompleteFunc */ - int owner; /* which task is resposable for this request */ - int row; - int col; /* coordinates of disk */ - RF_Raid_t *raidPtr; - void *diskid; - /* Dag event */ - RF_RaidAccessDesc_t *desc; -}; - -struct RF_HeapEntry_s { - RF_HeapData_t *data; /* the arbitrary data */ - RF_HeapKey_t key; /* key for comparison */ -}; - -struct RF_Heap_s { - RF_HeapEntry_t *heap; /* the heap in use (an array) */ - int numheap; /* number of elements in heap */ - int maxsize; -}; - -/* set up heap to hold maxsize nodes */ -RF_Heap_t rf_InitHeap(int maxsize); - -/* delete a heap data structure */ -void rf_FreeHeap(RF_Heap_t hp); - -/* add the element to the heap */ -void rf_AddHeap(RF_Heap_t hp, RF_HeapData_t *data, RF_HeapKey_t key); - -/* return top of the heap, without removing it from heap (FALSE if empty) */ -int rf_TopHeap(RF_Heap_t hp, RF_HeapData_t **data, RF_HeapKey_t *key); - -/* replace the heap's top item with a new item, and reheap */ -void rf_RepHeap(RF_Heap_t hp, RF_HeapData_t *data, RF_HeapKey_t key); - -/* remove the heap's top item, if any (FALSE if empty heap) */ -int rf_RemHeap(RF_Heap_t hp, RF_HeapData_t **data, RF_HeapKey_t *key); - -#endif /* !_RF__RF_HEAP_H_ */ diff --git a/sys/dev/raidframe/rf_hist.h b/sys/dev/raidframe/rf_hist.h index 371c544d316..70104aa4f90 100644 --- a/sys/dev/raidframe/rf_hist.h +++ b/sys/dev/raidframe/rf_hist.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_hist.h,v 1.1 1999/01/11 14:29:25 niklas Exp $ */ -/* $NetBSD: rf_hist.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */ +/* $OpenBSD: rf_hist.h,v 1.2 1999/02/16 00:02:51 niklas Exp $ */ +/* $NetBSD: rf_hist.h,v 1.3 1999/02/05 00:06:12 oster Exp $ */ /* * rf_hist.h * @@ -31,22 +31,6 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ -/* : - * Log: rf_hist.h,v - * Revision 1.3 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.2 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.1 1996/05/31 10:33:05 jimz - * Initial revision - * - */ #ifndef _RF__RF_HIST_H_ #define _RF__RF_HIST_H_ @@ -70,4 +54,4 @@ typedef RF_uint32 RF_Hist_t; _hist_[(val - RF_HIST_MIN_VAL) / RF_HIST_RESOLUTION]++; \ } -#endif /* !_RF__RF_HIST_H_ */ +#endif /* !_RF__RF_HIST_H_ */ diff --git a/sys/dev/raidframe/rf_interdecluster.c b/sys/dev/raidframe/rf_interdecluster.c index 3ce97d075ee..8a5fb70d939 100644 --- a/sys/dev/raidframe/rf_interdecluster.c +++ b/sys/dev/raidframe/rf_interdecluster.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_interdecluster.c,v 1.1 1999/01/11 14:29:26 niklas Exp $ */ -/* $NetBSD: rf_interdecluster.c,v 1.1 1998/11/13 04:20:30 oster Exp $ */ +/* $OpenBSD: rf_interdecluster.c,v 1.2 1999/02/16 00:02:52 niklas Exp $ */ +/* $NetBSD: rf_interdecluster.c,v 1.3 1999/02/05 00:06:12 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,98 +29,10 @@ /************************************************************ * - * rf_interdecluster.c -- implements interleaved declustering + * rf_interdecluster.c -- implements interleaved declustering * ************************************************************/ -/* : - * Log: rf_interdecluster.c,v - * Revision 1.24 1996/08/02 13:20:38 jimz - * get rid of bogus (long) casts - * - * Revision 1.23 1996/07/31 16:56:18 jimz - * dataBytesPerStripe, sectorsPerDisk init arch-indep. - * - * Revision 1.22 1996/07/29 14:05:12 jimz - * fix numPUs/numRUs confusion (everything is now numRUs) - * clean up some commenting, return values - * - * Revision 1.21 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.20 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.19 1996/07/13 00:00:59 jimz - * sanitized generalized reconstruction architecture - * cleaned up head sep, rbuf problems - * - * Revision 1.18 1996/06/19 17:53:48 jimz - * move GetNumSparePUs, InstallSpareTable ops into layout switch - * - * Revision 1.17 1996/06/11 15:17:55 wvcii - * added include of rf_interdecluster.h - * fixed parameter list of rf_ConfigureInterDecluster - * fixed return type of rf_GetNumSparePUsInterDecluster - * removed include of rf_raid1.h - * - * Revision 1.16 1996/06/11 08:55:15 jimz - * improved error-checking at configuration time - * - * Revision 1.15 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.14 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.13 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.12 1996/06/06 18:41:48 jimz - * add interleaved declustering dag selection - * - * Revision 1.11 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.10 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.9 1996/05/31 05:03:01 amiri - * fixed a bug related to sparing layout. - * - * Revision 1.8 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.7 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.6 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.5 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.4 1996/05/03 19:50:38 wvcii - * removed include of rf_redstripe.h - * fixed change log parameters in header - * - */ #include "rf_types.h" #include "rf_raid.h" @@ -137,195 +49,203 @@ #include "rf_dagdegwr.h" typedef struct RF_InterdeclusterConfigInfo_s { - RF_RowCol_t **stripeIdentifier; /* filled in at config time - * and used by IdentifyStripe */ - RF_StripeCount_t numSparingRegions; - RF_StripeCount_t stripeUnitsPerSparingRegion; - RF_SectorNum_t mirrorStripeOffset; -} RF_InterdeclusterConfigInfo_t; + RF_RowCol_t **stripeIdentifier; /* filled in at config time and used + * by IdentifyStripe */ + RF_StripeCount_t numSparingRegions; + RF_StripeCount_t stripeUnitsPerSparingRegion; + RF_SectorNum_t mirrorStripeOffset; +} RF_InterdeclusterConfigInfo_t; -int rf_ConfigureInterDecluster( - RF_ShutdownList_t **listp, - RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr) +int +rf_ConfigureInterDecluster( + RF_ShutdownList_t ** listp, + RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr) { - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_StripeCount_t num_used_stripeUnitsPerDisk; - RF_InterdeclusterConfigInfo_t *info; - RF_RowCol_t i, tmp, SUs_per_region; - - /* create an Interleaved Declustering configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_InterdeclusterConfigInfo_t), (RF_InterdeclusterConfigInfo_t *), - raidPtr->cleanupList); - if (info == NULL) - return(ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; + RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; + RF_StripeCount_t num_used_stripeUnitsPerDisk; + RF_InterdeclusterConfigInfo_t *info; + RF_RowCol_t i, tmp, SUs_per_region; + + /* create an Interleaved Declustering configuration structure */ + RF_MallocAndAdd(info, sizeof(RF_InterdeclusterConfigInfo_t), (RF_InterdeclusterConfigInfo_t *), + raidPtr->cleanupList); + if (info == NULL) + return (ENOMEM); + layoutPtr->layoutSpecificInfo = (void *) info; - /* fill in the config structure. */ - SUs_per_region = raidPtr->numCol * (raidPtr->numCol - 1); - info->stripeIdentifier = rf_make_2d_array(SUs_per_region, 2 , raidPtr->cleanupList); - if (info->stripeIdentifier == NULL) - return(ENOMEM); - for (i=0; i< SUs_per_region; i++) { - info->stripeIdentifier[i][0] = i / (raidPtr->numCol-1); - tmp = i / raidPtr->numCol; - info->stripeIdentifier[i][1] = (i+1+tmp) % raidPtr->numCol; - } + /* fill in the config structure. */ + SUs_per_region = raidPtr->numCol * (raidPtr->numCol - 1); + info->stripeIdentifier = rf_make_2d_array(SUs_per_region, 2, raidPtr->cleanupList); + if (info->stripeIdentifier == NULL) + return (ENOMEM); + for (i = 0; i < SUs_per_region; i++) { + info->stripeIdentifier[i][0] = i / (raidPtr->numCol - 1); + tmp = i / raidPtr->numCol; + info->stripeIdentifier[i][1] = (i + 1 + tmp) % raidPtr->numCol; + } - /* no spare tables */ - RF_ASSERT(raidPtr->numRow == 1); + /* no spare tables */ + RF_ASSERT(raidPtr->numRow == 1); - /* fill in the remaining layout parameters */ + /* fill in the remaining layout parameters */ - /* total number of stripes should a multiple of 2*numCol: Each sparing region consists of - 2*numCol stripes: n-1 primary copy, n-1 secondary copy and 2 for spare .. */ - num_used_stripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk - (layoutPtr->stripeUnitsPerDisk % - (2*raidPtr->numCol) ); - info->numSparingRegions = num_used_stripeUnitsPerDisk / (2*raidPtr->numCol); - /* this is in fact the number of stripe units (that are primary data copies) in the sparing region */ - info->stripeUnitsPerSparingRegion = raidPtr->numCol * (raidPtr->numCol - 1); - info->mirrorStripeOffset = info->numSparingRegions * (raidPtr->numCol+1); - layoutPtr->numStripe = info->numSparingRegions * info->stripeUnitsPerSparingRegion; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = 1; - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numParityCol = 1; - - layoutPtr->dataStripeUnitsPerDisk = num_used_stripeUnitsPerDisk; + /* total number of stripes should a multiple of 2*numCol: Each sparing + * region consists of 2*numCol stripes: n-1 primary copy, n-1 + * secondary copy and 2 for spare .. */ + num_used_stripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk - (layoutPtr->stripeUnitsPerDisk % + (2 * raidPtr->numCol)); + info->numSparingRegions = num_used_stripeUnitsPerDisk / (2 * raidPtr->numCol); + /* this is in fact the number of stripe units (that are primary data + * copies) in the sparing region */ + info->stripeUnitsPerSparingRegion = raidPtr->numCol * (raidPtr->numCol - 1); + info->mirrorStripeOffset = info->numSparingRegions * (raidPtr->numCol + 1); + layoutPtr->numStripe = info->numSparingRegions * info->stripeUnitsPerSparingRegion; + layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; + layoutPtr->numDataCol = 1; + layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; + layoutPtr->numParityCol = 1; - raidPtr->sectorsPerDisk = - num_used_stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; + layoutPtr->dataStripeUnitsPerDisk = num_used_stripeUnitsPerDisk; - raidPtr->totalSectors = - (layoutPtr->numStripe) * layoutPtr->sectorsPerStripeUnit; + raidPtr->sectorsPerDisk = + num_used_stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; - layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit; + raidPtr->totalSectors = + (layoutPtr->numStripe) * layoutPtr->sectorsPerStripeUnit; - return(0); + layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit; + + return (0); } -int rf_GetDefaultNumFloatingReconBuffersInterDecluster(RF_Raid_t *raidPtr) +int +rf_GetDefaultNumFloatingReconBuffersInterDecluster(RF_Raid_t * raidPtr) { - return(30); + return (30); } -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitInterDecluster(RF_Raid_t *raidPtr) +RF_HeadSepLimit_t +rf_GetDefaultHeadSepLimitInterDecluster(RF_Raid_t * raidPtr) { - return(raidPtr->sectorsPerDisk); + return (raidPtr->sectorsPerDisk); } -RF_ReconUnitCount_t rf_GetNumSpareRUsInterDecluster( - RF_Raid_t *raidPtr) +RF_ReconUnitCount_t +rf_GetNumSpareRUsInterDecluster( + RF_Raid_t * raidPtr) { - RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - return ( 2 * ((RF_ReconUnitCount_t) info->numSparingRegions) ); - /* the layout uses two stripe units per disk as spare within each sparing region */ + return (2 * ((RF_ReconUnitCount_t) info->numSparingRegions)); + /* the layout uses two stripe units per disk as spare within each + * sparing region */ } - /* Maps to the primary copy of the data, i.e. the first mirror pair */ -void rf_MapSectorInterDecluster( - RF_Raid_t *raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t *row, - RF_RowCol_t *col, - RF_SectorNum_t *diskSector, - int remap) +void +rf_MapSectorInterDecluster( + RF_Raid_t * raidPtr, + RF_RaidAddr_t raidSector, + RF_RowCol_t * row, + RF_RowCol_t * col, + RF_SectorNum_t * diskSector, + int remap) { - RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - RF_StripeNum_t su_offset_into_disk, mirror_su_offset_into_disk; - RF_StripeNum_t sparing_region_id, index_within_region; - int col_before_remap; + RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; + RF_StripeNum_t su_offset_into_disk, mirror_su_offset_into_disk; + RF_StripeNum_t sparing_region_id, index_within_region; + int col_before_remap; - *row = 0; - sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; - index_within_region = SUID % info->stripeUnitsPerSparingRegion; - su_offset_into_disk = index_within_region % (raidPtr->numCol-1); - mirror_su_offset_into_disk = index_within_region / raidPtr->numCol; - col_before_remap = index_within_region / (raidPtr->numCol-1); + *row = 0; + sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; + index_within_region = SUID % info->stripeUnitsPerSparingRegion; + su_offset_into_disk = index_within_region % (raidPtr->numCol - 1); + mirror_su_offset_into_disk = index_within_region / raidPtr->numCol; + col_before_remap = index_within_region / (raidPtr->numCol - 1); - if (!remap) { - *col = col_before_remap;; - *diskSector = ( su_offset_into_disk + ( (raidPtr->numCol-1) * sparing_region_id) ) * - raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - } - else { - /* remap sector to spare space...*/ - *diskSector = sparing_region_id * (raidPtr->numCol+1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidPtr->numCol-1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - *col = (index_within_region + 1 + mirror_su_offset_into_disk) % raidPtr->numCol; - *col = (*col + 1) % raidPtr->numCol; - if (*col == col_before_remap) *col = (*col + 1) % raidPtr->numCol; - } + if (!remap) { + *col = col_before_remap;; + *diskSector = (su_offset_into_disk + ((raidPtr->numCol - 1) * sparing_region_id)) * + raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + } else { + /* remap sector to spare space... */ + *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += (raidPtr->numCol - 1) * raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + *col = (index_within_region + 1 + mirror_su_offset_into_disk) % raidPtr->numCol; + *col = (*col + 1) % raidPtr->numCol; + if (*col == col_before_remap) + *col = (*col + 1) % raidPtr->numCol; + } } - /* Maps to the second copy of the mirror pair. */ -void rf_MapParityInterDecluster( - RF_Raid_t *raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t *row, - RF_RowCol_t *col, - RF_SectorNum_t *diskSector, - int remap) +void +rf_MapParityInterDecluster( + RF_Raid_t * raidPtr, + RF_RaidAddr_t raidSector, + RF_RowCol_t * row, + RF_RowCol_t * col, + RF_SectorNum_t * diskSector, + int remap) { - RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - RF_StripeNum_t sparing_region_id, index_within_region, mirror_su_offset_into_disk; - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - int col_before_remap; - - sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; - index_within_region = SUID % info->stripeUnitsPerSparingRegion; - mirror_su_offset_into_disk = index_within_region / raidPtr->numCol; - col_before_remap = (index_within_region + 1 + mirror_su_offset_into_disk) % raidPtr->numCol; + RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + RF_StripeNum_t sparing_region_id, index_within_region, mirror_su_offset_into_disk; + RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; + int col_before_remap; + + sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; + index_within_region = SUID % info->stripeUnitsPerSparingRegion; + mirror_su_offset_into_disk = index_within_region / raidPtr->numCol; + col_before_remap = (index_within_region + 1 + mirror_su_offset_into_disk) % raidPtr->numCol; - *row = 0; - if (!remap) { - *col = col_before_remap; - *diskSector = info->mirrorStripeOffset * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += sparing_region_id * (raidPtr->numCol-1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += mirror_su_offset_into_disk * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - } - else { - /* remap parity to spare space ... */ - *diskSector = sparing_region_id * (raidPtr->numCol+1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - *col = index_within_region / (raidPtr->numCol-1); - *col = (*col + 1) % raidPtr->numCol; - if (*col == col_before_remap) *col = (*col + 1) % raidPtr->numCol; - } + *row = 0; + if (!remap) { + *col = col_before_remap; + *diskSector = info->mirrorStripeOffset * raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += sparing_region_id * (raidPtr->numCol - 1) * raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += mirror_su_offset_into_disk * raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + } else { + /* remap parity to spare space ... */ + *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += (raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + *col = index_within_region / (raidPtr->numCol - 1); + *col = (*col + 1) % raidPtr->numCol; + if (*col == col_before_remap) + *col = (*col + 1) % raidPtr->numCol; + } } -void rf_IdentifyStripeInterDecluster( - RF_Raid_t *raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t **diskids, - RF_RowCol_t *outRow) +void +rf_IdentifyStripeInterDecluster( + RF_Raid_t * raidPtr, + RF_RaidAddr_t addr, + RF_RowCol_t ** diskids, + RF_RowCol_t * outRow) { - RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - RF_StripeNum_t SUID; + RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + RF_StripeNum_t SUID; - SUID = addr / raidPtr->Layout.sectorsPerStripeUnit; - SUID = SUID % info->stripeUnitsPerSparingRegion; + SUID = addr / raidPtr->Layout.sectorsPerStripeUnit; + SUID = SUID % info->stripeUnitsPerSparingRegion; - *outRow = 0; - *diskids = info->stripeIdentifier[ SUID ]; + *outRow = 0; + *diskids = info->stripeIdentifier[SUID]; } -void rf_MapSIDToPSIDInterDecluster( - RF_RaidLayout_t *layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t *psID, - RF_ReconUnitNum_t *which_ru) +void +rf_MapSIDToPSIDInterDecluster( + RF_RaidLayout_t * layoutPtr, + RF_StripeNum_t stripeID, + RF_StripeNum_t * psID, + RF_ReconUnitNum_t * which_ru) { - *which_ru = 0; - *psID = stripeID; + *which_ru = 0; + *psID = stripeID; } - /****************************************************************************** * select a graph to perform a single-stripe access * @@ -335,27 +255,26 @@ void rf_MapSIDToPSIDInterDecluster( * createFunc - name of function to use to create the graph *****************************************************************************/ -void rf_RAIDIDagSelect( - RF_Raid_t *raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t *asmap, - RF_VoidFuncPtr *createFunc) +void +rf_RAIDIDagSelect( + RF_Raid_t * raidPtr, + RF_IoType_t type, + RF_AccessStripeMap_t * asmap, + RF_VoidFuncPtr * createFunc) { - RF_ASSERT(RF_IO_IS_R_OR_W(type)); + RF_ASSERT(RF_IO_IS_R_OR_W(type)); - if (asmap->numDataFailed + asmap->numParityFailed > 1) { - RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); - *createFunc = NULL; - return; - } - - *createFunc = (type == RF_IO_TYPE_READ) ? (RF_VoidFuncPtr)rf_CreateFaultFreeReadDAG : (RF_VoidFuncPtr)rf_CreateRaidOneWriteDAG; - if (type == RF_IO_TYPE_READ) { - if (asmap->numDataFailed == 0) - *createFunc = (RF_VoidFuncPtr)rf_CreateMirrorPartitionReadDAG; - else - *createFunc = (RF_VoidFuncPtr)rf_CreateRaidOneDegradedReadDAG; - } - else - *createFunc = (RF_VoidFuncPtr)rf_CreateRaidOneWriteDAG; + if (asmap->numDataFailed + asmap->numParityFailed > 1) { + RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); + *createFunc = NULL; + return; + } + *createFunc = (type == RF_IO_TYPE_READ) ? (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG : (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; + if (type == RF_IO_TYPE_READ) { + if (asmap->numDataFailed == 0) + *createFunc = (RF_VoidFuncPtr) rf_CreateMirrorPartitionReadDAG; + else + *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneDegradedReadDAG; + } else + *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; } diff --git a/sys/dev/raidframe/rf_interdecluster.h b/sys/dev/raidframe/rf_interdecluster.h index a76ea9dcb46..ac9388b45ae 100644 --- a/sys/dev/raidframe/rf_interdecluster.h +++ b/sys/dev/raidframe/rf_interdecluster.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_interdecluster.h,v 1.1 1999/01/11 14:29:26 niklas Exp $ */ -/* $NetBSD: rf_interdecluster.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */ +/* $OpenBSD: rf_interdecluster.h,v 1.2 1999/02/16 00:02:52 niklas Exp $ */ +/* $NetBSD: rf_interdecluster.h,v 1.3 1999/02/05 00:06:12 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -31,82 +31,30 @@ * header file for Interleaved Declustering */ -/* - * : - * Log: rf_interdecluster.h,v - * Revision 1.13 1996/07/29 14:05:12 jimz - * fix numPUs/numRUs confusion (everything is now numRUs) - * clean up some commenting, return values - * - * Revision 1.12 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.11 1996/07/13 00:00:59 jimz - * sanitized generalized reconstruction architecture - * cleaned up head sep, rbuf problems - * - * Revision 1.10 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.9 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.8 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.7 1996/06/06 18:41:58 jimz - * add RAIDIDagSelect - * - * Revision 1.6 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.5 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.4 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.3 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.2 1995/12/01 19:07:25 root - * added copyright info - * - * Revision 1.1 1995/11/28 21:38:27 amiri - * Initial revision - */ - #ifndef _RF__RF_INTERDECLUSTER_H_ #define _RF__RF_INTERDECLUSTER_H_ -int rf_ConfigureInterDecluster(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr); -int rf_GetDefaultNumFloatingReconBuffersInterDecluster(RF_Raid_t *raidPtr); -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitInterDecluster(RF_Raid_t *raidPtr); -RF_ReconUnitCount_t rf_GetNumSpareRUsInterDecluster(RF_Raid_t *raidPtr); -void rf_MapSectorInterDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap); -void rf_MapParityInterDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap); -void rf_IdentifyStripeInterDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t **diskids, RF_RowCol_t *outRow); -void rf_MapSIDToPSIDInterDecluster(RF_RaidLayout_t *layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t *psID, - RF_ReconUnitNum_t *which_ru); -void rf_RAIDIDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc); +int +rf_ConfigureInterDecluster(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr); +int rf_GetDefaultNumFloatingReconBuffersInterDecluster(RF_Raid_t * raidPtr); +RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitInterDecluster(RF_Raid_t * raidPtr); +RF_ReconUnitCount_t rf_GetNumSpareRUsInterDecluster(RF_Raid_t * raidPtr); +void +rf_MapSectorInterDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); +void +rf_MapParityInterDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); +void +rf_IdentifyStripeInterDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, + RF_RowCol_t ** diskids, RF_RowCol_t * outRow); +void +rf_MapSIDToPSIDInterDecluster(RF_RaidLayout_t * layoutPtr, + RF_StripeNum_t stripeID, RF_StripeNum_t * psID, + RF_ReconUnitNum_t * which_ru); +void +rf_RAIDIDagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, + RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); -#endif /* !_RF__RF_INTERDECLUSTER_H_ */ +#endif /* !_RF__RF_INTERDECLUSTER_H_ */ diff --git a/sys/dev/raidframe/rf_invertq.c b/sys/dev/raidframe/rf_invertq.c index c1e07aa257f..224e331fe39 100644 --- a/sys/dev/raidframe/rf_invertq.c +++ b/sys/dev/raidframe/rf_invertq.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_invertq.c,v 1.1 1999/01/11 14:29:26 niklas Exp $ */ -/* $NetBSD: rf_invertq.c,v 1.1 1998/11/13 04:20:30 oster Exp $ */ +/* $OpenBSD: rf_invertq.c,v 1.2 1999/02/16 00:02:53 niklas Exp $ */ +/* $NetBSD: rf_invertq.c,v 1.3 1999/02/05 00:06:12 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,29 +27,6 @@ * rights to redistribute these changes. */ -/* : - * Log: rf_invertq.c,v - * Revision 1.5 1996/07/29 16:36:36 jimz - * include rf_archs.h here, not rf_invertq.h, to avoid VPATH - * problems in OSF/1 kernel - * - * Revision 1.4 1995/11/30 15:57:27 wvcii - * added copyright info - * - */ - -#ifdef _KERNEL -#define KERNEL -#endif - #include "rf_archs.h" #include "rf_pqdeg.h" -#ifdef KERNEL -#if !defined(__NetBSD__) && !defined(__OpenBSD__) -#include <raidframe/du_data/rf_invertq.h> -#else -#include "rf_invertq.h" /* XXX this is a hack. */ -#endif /* !__NetBSD__ && !__OpenBSD__ */ -#else /* KERNEL */ #include "rf_invertq.h" -#endif /* KERNEL */ diff --git a/sys/dev/raidframe/rf_invertq.h b/sys/dev/raidframe/rf_invertq.h index e9c1e69d768..35d387ae70a 100644 --- a/sys/dev/raidframe/rf_invertq.h +++ b/sys/dev/raidframe/rf_invertq.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_invertq.h,v 1.1 1999/01/11 14:29:27 niklas Exp $ */ -/* $NetBSD: rf_invertq.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */ +/* $OpenBSD: rf_invertq.h,v 1.2 1999/02/16 00:02:53 niklas Exp $ */ +/* $NetBSD: rf_invertq.h,v 1.3 1999/02/05 00:06:12 oster Exp $ */ /* * rf_invertq.h */ @@ -10,10 +10,6 @@ #ifndef _RF__RF_INVERTQ_H_ #define _RF__RF_INVERTQ_H_ -#ifdef _KERNEL -#define KERNEL -#endif - /* * rf_geniq.c must include rf_archs.h before including * this file (to get VPATH magic right with the way we @@ -25,49 +21,44 @@ #define RF_Q_COLS 32 RF_ua32_t rf_rn = { -1, 2, 4, 8, 16, 5, 10, 20, 13, 26, 17, 7, 14, 28, 29, 31, 27, 19, 3, 6, 12, 24, 21, 15, 30, 25, 23, 11, 22, 9, 18, 1, }; +1, 2, 4, 8, 16, 5, 10, 20, 13, 26, 17, 7, 14, 28, 29, 31, 27, 19, 3, 6, 12, 24, 21, 15, 30, 25, 23, 11, 22, 9, 18, 1,}; RF_ua32_t rf_qfor[32] = { -/* i = 0 */ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, }, -/* i = 1 */ { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 5, 7, 1, 3, 13, 15, 9, 11, 21, 23, 17, 19, 29, 31, 25, 27, }, -/* i = 2 */ { 0, 4, 8, 12, 16, 20, 24, 28, 5, 1, 13, 9, 21, 17, 29, 25, 10, 14, 2, 6, 26, 30, 18, 22, 15, 11, 7, 3, 31, 27, 23, 19, }, -/* i = 3 */ { 0, 8, 16, 24, 5, 13, 21, 29, 10, 2, 26, 18, 15, 7, 31, 23, 20, 28, 4, 12, 17, 25, 1, 9, 30, 22, 14, 6, 27, 19, 11, 3, }, -/* i = 4 */ { 0, 16, 5, 21, 10, 26, 15, 31, 20, 4, 17, 1, 30, 14, 27, 11, 13, 29, 8, 24, 7, 23, 2, 18, 25, 9, 28, 12, 19, 3, 22, 6, }, -/* i = 5 */ { 0, 5, 10, 15, 20, 17, 30, 27, 13, 8, 7, 2, 25, 28, 19, 22, 26, 31, 16, 21, 14, 11, 4, 1, 23, 18, 29, 24, 3, 6, 9, 12, }, -/* i = 6 */ { 0, 10, 20, 30, 13, 7, 25, 19, 26, 16, 14, 4, 23, 29, 3, 9, 17, 27, 5, 15, 28, 22, 8, 2, 11, 1, 31, 21, 6, 12, 18, 24, }, -/* i = 7 */ { 0, 20, 13, 25, 26, 14, 23, 3, 17, 5, 28, 8, 11, 31, 6, 18, 7, 19, 10, 30, 29, 9, 16, 4, 22, 2, 27, 15, 12, 24, 1, 21, }, -/* i = 8 */ { 0, 13, 26, 23, 17, 28, 11, 6, 7, 10, 29, 16, 22, 27, 12, 1, 14, 3, 20, 25, 31, 18, 5, 8, 9, 4, 19, 30, 24, 21, 2, 15, }, -/* i = 9 */ { 0, 26, 17, 11, 7, 29, 22, 12, 14, 20, 31, 5, 9, 19, 24, 2, 28, 6, 13, 23, 27, 1, 10, 16, 18, 8, 3, 25, 21, 15, 4, 30, }, -/* i = 10 */ { 0, 17, 7, 22, 14, 31, 9, 24, 28, 13, 27, 10, 18, 3, 21, 4, 29, 12, 26, 11, 19, 2, 20, 5, 1, 16, 6, 23, 15, 30, 8, 25, }, -/* i = 11 */ { 0, 7, 14, 9, 28, 27, 18, 21, 29, 26, 19, 20, 1, 6, 15, 8, 31, 24, 17, 22, 3, 4, 13, 10, 2, 5, 12, 11, 30, 25, 16, 23, }, -/* i = 12 */ { 0, 14, 28, 18, 29, 19, 1, 15, 31, 17, 3, 13, 2, 12, 30, 16, 27, 21, 7, 9, 6, 8, 26, 20, 4, 10, 24, 22, 25, 23, 5, 11, }, -/* i = 13 */ { 0, 28, 29, 1, 31, 3, 2, 30, 27, 7, 6, 26, 4, 24, 25, 5, 19, 15, 14, 18, 12, 16, 17, 13, 8, 20, 21, 9, 23, 11, 10, 22, }, -/* i = 14 */ { 0, 29, 31, 2, 27, 6, 4, 25, 19, 14, 12, 17, 8, 21, 23, 10, 3, 30, 28, 1, 24, 5, 7, 26, 16, 13, 15, 18, 11, 22, 20, 9, }, -/* i = 15 */ { 0, 31, 27, 4, 19, 12, 8, 23, 3, 28, 24, 7, 16, 15, 11, 20, 6, 25, 29, 2, 21, 10, 14, 17, 5, 26, 30, 1, 22, 9, 13, 18, }, -/* i = 16 */ { 0, 27, 19, 8, 3, 24, 16, 11, 6, 29, 21, 14, 5, 30, 22, 13, 12, 23, 31, 4, 15, 20, 28, 7, 10, 17, 25, 2, 9, 18, 26, 1, }, -/* i = 17 */ { 0, 19, 3, 16, 6, 21, 5, 22, 12, 31, 15, 28, 10, 25, 9, 26, 24, 11, 27, 8, 30, 13, 29, 14, 20, 7, 23, 4, 18, 1, 17, 2, }, -/* i = 18 */ { 0, 3, 6, 5, 12, 15, 10, 9, 24, 27, 30, 29, 20, 23, 18, 17, 21, 22, 19, 16, 25, 26, 31, 28, 13, 14, 11, 8, 1, 2, 7, 4, }, -/* i = 19 */ { 0, 6, 12, 10, 24, 30, 20, 18, 21, 19, 25, 31, 13, 11, 1, 7, 15, 9, 3, 5, 23, 17, 27, 29, 26, 28, 22, 16, 2, 4, 14, 8, }, -/* i = 20 */ { 0, 12, 24, 20, 21, 25, 13, 1, 15, 3, 23, 27, 26, 22, 2, 14, 30, 18, 6, 10, 11, 7, 19, 31, 17, 29, 9, 5, 4, 8, 28, 16, }, -/* i = 21 */ { 0, 24, 21, 13, 15, 23, 26, 2, 30, 6, 11, 19, 17, 9, 4, 28, 25, 1, 12, 20, 22, 14, 3, 27, 7, 31, 18, 10, 8, 16, 29, 5, }, -/* i = 22 */ { 0, 21, 15, 26, 30, 11, 17, 4, 25, 12, 22, 3, 7, 18, 8, 29, 23, 2, 24, 13, 9, 28, 6, 19, 14, 27, 1, 20, 16, 5, 31, 10, }, -/* i = 23 */ { 0, 15, 30, 17, 25, 22, 7, 8, 23, 24, 9, 6, 14, 1, 16, 31, 11, 4, 21, 26, 18, 29, 12, 3, 28, 19, 2, 13, 5, 10, 27, 20, }, -/* i = 24 */ { 0, 30, 25, 7, 23, 9, 14, 16, 11, 21, 18, 12, 28, 2, 5, 27, 22, 8, 15, 17, 1, 31, 24, 6, 29, 3, 4, 26, 10, 20, 19, 13, }, -/* i = 25 */ { 0, 25, 23, 14, 11, 18, 28, 5, 22, 15, 1, 24, 29, 4, 10, 19, 9, 16, 30, 7, 2, 27, 21, 12, 31, 6, 8, 17, 20, 13, 3, 26, }, -/* i = 26 */ { 0, 23, 11, 28, 22, 1, 29, 10, 9, 30, 2, 21, 31, 8, 20, 3, 18, 5, 25, 14, 4, 19, 15, 24, 27, 12, 16, 7, 13, 26, 6, 17, }, -/* i = 27 */ { 0, 11, 22, 29, 9, 2, 31, 20, 18, 25, 4, 15, 27, 16, 13, 6, 1, 10, 23, 28, 8, 3, 30, 21, 19, 24, 5, 14, 26, 17, 12, 7, }, -/* i = 28 */ { 0, 22, 9, 31, 18, 4, 27, 13, 1, 23, 8, 30, 19, 5, 26, 12, 2, 20, 11, 29, 16, 6, 25, 15, 3, 21, 10, 28, 17, 7, 24, 14, }, -/* i = 29 */ { 0, 9, 18, 27, 1, 8, 19, 26, 2, 11, 16, 25, 3, 10, 17, 24, 4, 13, 22, 31, 5, 12, 23, 30, 6, 15, 20, 29, 7, 14, 21, 28, }, -/* i = 30 */ { 0, 18, 1, 19, 2, 16, 3, 17, 4, 22, 5, 23, 6, 20, 7, 21, 8, 26, 9, 27, 10, 24, 11, 25, 12, 30, 13, 31, 14, 28, 15, 29, }, -/* i = 31 */ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, }, + /* i = 0 */ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,}, + /* i = 1 */ {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 5, 7, 1, 3, 13, 15, 9, 11, 21, 23, 17, 19, 29, 31, 25, 27,}, + /* i = 2 */ {0, 4, 8, 12, 16, 20, 24, 28, 5, 1, 13, 9, 21, 17, 29, 25, 10, 14, 2, 6, 26, 30, 18, 22, 15, 11, 7, 3, 31, 27, 23, 19,}, + /* i = 3 */ {0, 8, 16, 24, 5, 13, 21, 29, 10, 2, 26, 18, 15, 7, 31, 23, 20, 28, 4, 12, 17, 25, 1, 9, 30, 22, 14, 6, 27, 19, 11, 3,}, + /* i = 4 */ {0, 16, 5, 21, 10, 26, 15, 31, 20, 4, 17, 1, 30, 14, 27, 11, 13, 29, 8, 24, 7, 23, 2, 18, 25, 9, 28, 12, 19, 3, 22, 6,}, + /* i = 5 */ {0, 5, 10, 15, 20, 17, 30, 27, 13, 8, 7, 2, 25, 28, 19, 22, 26, 31, 16, 21, 14, 11, 4, 1, 23, 18, 29, 24, 3, 6, 9, 12,}, + /* i = 6 */ {0, 10, 20, 30, 13, 7, 25, 19, 26, 16, 14, 4, 23, 29, 3, 9, 17, 27, 5, 15, 28, 22, 8, 2, 11, 1, 31, 21, 6, 12, 18, 24,}, + /* i = 7 */ {0, 20, 13, 25, 26, 14, 23, 3, 17, 5, 28, 8, 11, 31, 6, 18, 7, 19, 10, 30, 29, 9, 16, 4, 22, 2, 27, 15, 12, 24, 1, 21,}, + /* i = 8 */ {0, 13, 26, 23, 17, 28, 11, 6, 7, 10, 29, 16, 22, 27, 12, 1, 14, 3, 20, 25, 31, 18, 5, 8, 9, 4, 19, 30, 24, 21, 2, 15,}, + /* i = 9 */ {0, 26, 17, 11, 7, 29, 22, 12, 14, 20, 31, 5, 9, 19, 24, 2, 28, 6, 13, 23, 27, 1, 10, 16, 18, 8, 3, 25, 21, 15, 4, 30,}, + /* i = 10 */ {0, 17, 7, 22, 14, 31, 9, 24, 28, 13, 27, 10, 18, 3, 21, 4, 29, 12, 26, 11, 19, 2, 20, 5, 1, 16, 6, 23, 15, 30, 8, 25,}, + /* i = 11 */ {0, 7, 14, 9, 28, 27, 18, 21, 29, 26, 19, 20, 1, 6, 15, 8, 31, 24, 17, 22, 3, 4, 13, 10, 2, 5, 12, 11, 30, 25, 16, 23,}, + /* i = 12 */ {0, 14, 28, 18, 29, 19, 1, 15, 31, 17, 3, 13, 2, 12, 30, 16, 27, 21, 7, 9, 6, 8, 26, 20, 4, 10, 24, 22, 25, 23, 5, 11,}, + /* i = 13 */ {0, 28, 29, 1, 31, 3, 2, 30, 27, 7, 6, 26, 4, 24, 25, 5, 19, 15, 14, 18, 12, 16, 17, 13, 8, 20, 21, 9, 23, 11, 10, 22,}, + /* i = 14 */ {0, 29, 31, 2, 27, 6, 4, 25, 19, 14, 12, 17, 8, 21, 23, 10, 3, 30, 28, 1, 24, 5, 7, 26, 16, 13, 15, 18, 11, 22, 20, 9,}, + /* i = 15 */ {0, 31, 27, 4, 19, 12, 8, 23, 3, 28, 24, 7, 16, 15, 11, 20, 6, 25, 29, 2, 21, 10, 14, 17, 5, 26, 30, 1, 22, 9, 13, 18,}, + /* i = 16 */ {0, 27, 19, 8, 3, 24, 16, 11, 6, 29, 21, 14, 5, 30, 22, 13, 12, 23, 31, 4, 15, 20, 28, 7, 10, 17, 25, 2, 9, 18, 26, 1,}, + /* i = 17 */ {0, 19, 3, 16, 6, 21, 5, 22, 12, 31, 15, 28, 10, 25, 9, 26, 24, 11, 27, 8, 30, 13, 29, 14, 20, 7, 23, 4, 18, 1, 17, 2,}, + /* i = 18 */ {0, 3, 6, 5, 12, 15, 10, 9, 24, 27, 30, 29, 20, 23, 18, 17, 21, 22, 19, 16, 25, 26, 31, 28, 13, 14, 11, 8, 1, 2, 7, 4,}, + /* i = 19 */ {0, 6, 12, 10, 24, 30, 20, 18, 21, 19, 25, 31, 13, 11, 1, 7, 15, 9, 3, 5, 23, 17, 27, 29, 26, 28, 22, 16, 2, 4, 14, 8,}, + /* i = 20 */ {0, 12, 24, 20, 21, 25, 13, 1, 15, 3, 23, 27, 26, 22, 2, 14, 30, 18, 6, 10, 11, 7, 19, 31, 17, 29, 9, 5, 4, 8, 28, 16,}, + /* i = 21 */ {0, 24, 21, 13, 15, 23, 26, 2, 30, 6, 11, 19, 17, 9, 4, 28, 25, 1, 12, 20, 22, 14, 3, 27, 7, 31, 18, 10, 8, 16, 29, 5,}, + /* i = 22 */ {0, 21, 15, 26, 30, 11, 17, 4, 25, 12, 22, 3, 7, 18, 8, 29, 23, 2, 24, 13, 9, 28, 6, 19, 14, 27, 1, 20, 16, 5, 31, 10,}, + /* i = 23 */ {0, 15, 30, 17, 25, 22, 7, 8, 23, 24, 9, 6, 14, 1, 16, 31, 11, 4, 21, 26, 18, 29, 12, 3, 28, 19, 2, 13, 5, 10, 27, 20,}, + /* i = 24 */ {0, 30, 25, 7, 23, 9, 14, 16, 11, 21, 18, 12, 28, 2, 5, 27, 22, 8, 15, 17, 1, 31, 24, 6, 29, 3, 4, 26, 10, 20, 19, 13,}, + /* i = 25 */ {0, 25, 23, 14, 11, 18, 28, 5, 22, 15, 1, 24, 29, 4, 10, 19, 9, 16, 30, 7, 2, 27, 21, 12, 31, 6, 8, 17, 20, 13, 3, 26,}, + /* i = 26 */ {0, 23, 11, 28, 22, 1, 29, 10, 9, 30, 2, 21, 31, 8, 20, 3, 18, 5, 25, 14, 4, 19, 15, 24, 27, 12, 16, 7, 13, 26, 6, 17,}, + /* i = 27 */ {0, 11, 22, 29, 9, 2, 31, 20, 18, 25, 4, 15, 27, 16, 13, 6, 1, 10, 23, 28, 8, 3, 30, 21, 19, 24, 5, 14, 26, 17, 12, 7,}, + /* i = 28 */ {0, 22, 9, 31, 18, 4, 27, 13, 1, 23, 8, 30, 19, 5, 26, 12, 2, 20, 11, 29, 16, 6, 25, 15, 3, 21, 10, 28, 17, 7, 24, 14,}, + /* i = 29 */ {0, 9, 18, 27, 1, 8, 19, 26, 2, 11, 16, 25, 3, 10, 17, 24, 4, 13, 22, 31, 5, 12, 23, 30, 6, 15, 20, 29, 7, 14, 21, 28,}, + /* i = 30 */ {0, 18, 1, 19, 2, 16, 3, 17, 4, 22, 5, 23, 6, 20, 7, 21, 8, 26, 9, 27, 10, 24, 11, 25, 12, 30, 13, 31, 14, 28, 15, 29,}, + /* i = 31 */ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,}, }; #define RF_Q_DATA_COL(col_num) rf_rn[col_num],rf_qfor[28-(col_num)] -#ifdef KERNEL -RF_ua1024_t rf_qinv[1]; /* don't compile monster table into kernel */ -#elif defined(NO_PQ) -RF_ua1024_t rf_qinv[29*29]; -#else /* !KERNEL && NO_PQ */ - -#endif /* !KERNEL && NO_PQ */ +RF_ua1024_t rf_qinv[1]; /* don't compile monster table into kernel */ -#endif /* (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */ -#endif /* !_RF__RF_INVERTQ_H_ */ +#endif /* (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > + * 0) */ +#endif /* !_RF__RF_INVERTQ_H_ */ diff --git a/sys/dev/raidframe/rf_kintf.h b/sys/dev/raidframe/rf_kintf.h index e270aa0b933..819a7ff9e94 100644 --- a/sys/dev/raidframe/rf_kintf.h +++ b/sys/dev/raidframe/rf_kintf.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_kintf.h,v 1.1 1999/01/11 14:29:27 niklas Exp $ */ -/* $NetBSD: rf_kintf.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */ +/* $OpenBSD: rf_kintf.h,v 1.2 1999/02/16 00:02:53 niklas Exp $ */ +/* $NetBSD: rf_kintf.h,v 1.3 1999/02/05 00:06:12 oster Exp $ */ /* * rf_kintf.h * @@ -31,41 +31,28 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ -/* - * : - * Log: rf_kintf.h,v - * Revision 1.2 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.1 1996/05/31 18:59:14 jimz - * Initial revision - * - */ #ifndef _RF__RF_KINTF_H_ #define _RF__RF_KINTF_H_ #include "rf_types.h" -int rf_boot(void); -int rf_open(dev_t dev, int flag, int fmt); -int rf_close(dev_t dev, int flag, int fmt); -void rf_strategy(struct buf *bp); -void rf_minphys(struct buf *bp); -int rf_read(dev_t dev, struct uio *uio); -int rf_write(dev_t dev, struct uio *uio); -int rf_size(dev_t dev); -int rf_ioctl(dev_t dev, int cmd, caddr_t data, int flag); -void rf_ReconKernelThread(void); -int rf_GetSpareTableFromDaemon(RF_SparetWait_t *req); -caddr_t rf_MapToKernelSpace(struct buf *bp, caddr_t addr); -int rf_BzeroWithRemap(struct buf *bp, char *databuf, int len); -int rf_DoAccessKernel(RF_Raid_t *raidPtr, struct buf *bp, - RF_RaidAccessFlags_t flags, void (*cbFunc)(struct buf *), void *cbArg); -int rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req); +int rf_boot(void); +int rf_open(dev_t dev, int flag, int fmt); +int rf_close(dev_t dev, int flag, int fmt); +void rf_strategy(struct buf * bp); +void rf_minphys(struct buf * bp); +int rf_read(dev_t dev, struct uio * uio); +int rf_write(dev_t dev, struct uio * uio); +int rf_size(dev_t dev); +int rf_ioctl(dev_t dev, int cmd, caddr_t data, int flag); +void rf_ReconKernelThread(void); +int rf_GetSpareTableFromDaemon(RF_SparetWait_t * req); +caddr_t rf_MapToKernelSpace(struct buf * bp, caddr_t addr); +int rf_BzeroWithRemap(struct buf * bp, char *databuf, int len); +int +rf_DoAccessKernel(RF_Raid_t * raidPtr, struct buf * bp, + RF_RaidAccessFlags_t flags, void (*cbFunc) (struct buf *), void *cbArg); + int rf_DispatchKernelIO(RF_DiskQueue_t * queue, RF_DiskQueueData_t * req); -#endif /* _RF__RF_KINTF_H_ */ +#endif /* _RF__RF_KINTF_H_ */ diff --git a/sys/dev/raidframe/rf_layout.c b/sys/dev/raidframe/rf_layout.c index a8a06e044ff..babc1d691d9 100644 --- a/sys/dev/raidframe/rf_layout.c +++ b/sys/dev/raidframe/rf_layout.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_layout.c,v 1.1 1999/01/11 14:29:27 niklas Exp $ */ -/* $NetBSD: rf_layout.c,v 1.1 1998/11/13 04:20:30 oster Exp $ */ +/* $OpenBSD: rf_layout.c,v 1.2 1999/02/16 00:02:54 niklas Exp $ */ +/* $NetBSD: rf_layout.c,v 1.3 1999/02/05 00:06:12 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -30,212 +30,6 @@ /* rf_layout.c -- driver code dealing with layout and mapping issues */ -/* - * : - * Log: rf_layout.c,v - * Revision 1.71 1996/08/20 22:41:30 jimz - * add declustered evenodd - * - * Revision 1.70 1996/07/31 16:56:18 jimz - * dataBytesPerStripe, sectorsPerDisk init arch-indep. - * - * Revision 1.69 1996/07/31 15:34:46 jimz - * add EvenOdd - * - * Revision 1.68 1996/07/29 14:05:12 jimz - * fix numPUs/numRUs confusion (everything is now numRUs) - * clean up some commenting, return values - * - * Revision 1.67 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.66 1996/07/27 18:40:24 jimz - * cleanup sweep - * - * Revision 1.65 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.64 1996/07/15 17:22:18 jimz - * nit-pick code cleanup - * resolve stdlib problems on DEC OSF - * - * Revision 1.63 1996/07/13 00:00:59 jimz - * sanitized generalized reconstruction architecture - * cleaned up head sep, rbuf problems - * - * Revision 1.62 1996/07/11 19:08:00 jimz - * generalize reconstruction mechanism - * allow raid1 reconstructs via copyback (done with array - * quiesced, not online, therefore not disk-directed) - * - * Revision 1.61 1996/06/19 22:23:01 jimz - * parity verification is now a layout-configurable thing - * not all layouts currently support it (correctly, anyway) - * - * Revision 1.60 1996/06/19 17:53:48 jimz - * move GetNumSparePUs, InstallSpareTable ops into layout switch - * - * Revision 1.59 1996/06/19 14:57:58 jimz - * move layout-specific config parsing hooks into RF_LayoutSW_t - * table in rf_layout.c - * - * Revision 1.58 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.57 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.56 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.55 1996/06/06 18:41:35 jimz - * change interleaved declustering dag selection to an - * interleaved-declustering-specific routine (so we can - * use the partitioned mirror node) - * - * Revision 1.54 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.53 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.52 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.51 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.50 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.49 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.48 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.47 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.46 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.45 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.44 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.43 1996/02/22 16:46:35 amiri - * modified chained declustering to use a seperate DAG selection routine - * - * Revision 1.42 1995/12/01 19:16:11 root - * added copyright info - * - * Revision 1.41 1995/11/28 21:31:02 amiri - * added Interleaved Declustering to switch table - * - * Revision 1.40 1995/11/20 14:35:17 arw - * moved rf_StartThroughputStats in DefaultWrite and DefaultRead - * - * Revision 1.39 1995/11/19 16:28:46 wvcii - * replaced LaunchDAGState with CreateDAGState, ExecuteDAGState - * - * Revision 1.38 1995/11/17 19:00:41 wvcii - * added MapQ entries to switch table - * - * Revision 1.37 1995/11/17 16:58:13 amiri - * Added the Chained Declustering architecture ('C'), - * essentially a variant of mirroring. - * - * Revision 1.36 1995/11/16 16:16:10 amiri - * Added RAID5 with rotated sparing ('R' configuration) - * - * Revision 1.35 1995/11/07 15:41:17 wvcii - * modified state lists: DefaultStates, VSReadStates - * necessary to support new states (LaunchDAGState, ProcessDAGState) - * - * Revision 1.34 1995/10/18 01:23:20 amiri - * added ifndef SIMULATE wrapper around rf_StartThroughputStats() - * - * Revision 1.33 1995/10/13 15:05:46 arw - * added rf_StartThroughputStats to DefaultRead and DefaultWrite - * - * Revision 1.32 1995/10/12 16:04:23 jimz - * added config names to mapsw entires - * - * Revision 1.31 1995/10/04 03:57:48 wvcii - * added raid level 1 to mapsw - * - * Revision 1.30 1995/09/07 01:26:55 jimz - * Achive basic compilation in kernel. Kernel functionality - * is not guaranteed at all, but it'll compile. Mostly. I hope. - * - * Revision 1.29 1995/07/28 21:43:42 robby - * checkin after leaving for Rice. Bye - * - * Revision 1.28 1995/07/26 03:26:14 robby - * *** empty log message *** - * - * Revision 1.27 1995/07/21 19:47:52 rachad - * Added raid 0 /5 with caching architectures - * - * Revision 1.26 1995/07/21 19:29:27 robby - * added virtual striping states - * - * Revision 1.25 1995/07/10 21:41:47 robby - * switched to have my own virtual stripng write function from the cache - * - * Revision 1.24 1995/07/10 20:51:59 robby - * added virtual striping states - * - * Revision 1.23 1995/07/10 16:57:42 robby - * updated alloclistelem struct to the correct struct name - * - * Revision 1.22 1995/07/08 20:06:11 rachad - * *** empty log message *** - * - * Revision 1.21 1995/07/08 19:43:16 cfb - * *** empty log message *** - * - * Revision 1.20 1995/07/08 18:05:39 rachad - * Linked up Claudsons code with the real cache - * - * Revision 1.19 1995/07/06 14:29:36 robby - * added defaults states list to the layout switch - * - * Revision 1.18 1995/06/23 13:40:34 robby - * updeated to prototypes in rf_layout.h - * - */ - #include "rf_types.h" #include "rf_archs.h" #include "rf_raid.h" @@ -252,19 +46,19 @@ #include "rf_states.h" #if RF_INCLUDE_RAID5_RS > 0 #include "rf_raid5_rotatedspare.h" -#endif /* RF_INCLUDE_RAID5_RS > 0 */ +#endif /* RF_INCLUDE_RAID5_RS > 0 */ #if RF_INCLUDE_CHAINDECLUSTER > 0 #include "rf_chaindecluster.h" -#endif /* RF_INCLUDE_CHAINDECLUSTER > 0 */ +#endif /* RF_INCLUDE_CHAINDECLUSTER > 0 */ #if RF_INCLUDE_INTERDECLUSTER > 0 #include "rf_interdecluster.h" -#endif /* RF_INCLUDE_INTERDECLUSTER > 0 */ +#endif /* RF_INCLUDE_INTERDECLUSTER > 0 */ #if RF_INCLUDE_PARITYLOGGING > 0 #include "rf_paritylogging.h" -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ +#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ #if RF_INCLUDE_EVENODD > 0 #include "rf_evenodd.h" -#endif /* RF_INCLUDE_EVENODD > 0 */ +#endif /* RF_INCLUDE_EVENODD > 0 */ #include "rf_general.h" #include "rf_driver.h" #include "rf_parityscan.h" @@ -284,10 +78,9 @@ static RF_AccessState_t DefaultStates[] = {rf_QuiesceState, rf_IncrAccessesCountState, rf_MapState, rf_LockState, rf_CreateDAGState, rf_ExecuteDAGState, rf_ProcessDAGState, rf_DecrAccessesCountState, - rf_CleanupState, rf_LastState}; - +rf_CleanupState, rf_LastState}; #if (defined(__NetBSD__) || defined(__OpenBSD__)) && !defined(_KERNEL) -/* XXX Gross hack to shutup gcc -- it complains that DefaultStates is not +/* XXX Gross hack to shutup gcc -- it complains that DefaultStates is not used when compiling this in userland.. I hate to burst it's bubble, but DefaultStates is used all over the place here in the initialization of lots of data structures. GO */ @@ -296,401 +89,399 @@ RF_AccessState_t *NothingAtAll = DefaultStates; #if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) /* XXX Remove static so GCC doesn't complain about these being unused! */ -int distSpareYes = 1; -int distSpareNo = 0; +int distSpareYes = 1; +int distSpareNo = 0; #else static int distSpareYes = 1; -static int distSpareNo = 0; +static int distSpareNo = 0; #endif -#ifdef KERNEL +#ifdef _KERNEL #define RF_NK2(a,b) -#else /* KERNEL */ +#else /* _KERNEL */ #define RF_NK2(a,b) a,b, -#endif /* KERNEL */ +#endif /* _KERNEL */ #if RF_UTILITY > 0 #define RF_NU(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) -#else /* RF_UTILITY > 0 */ +#else /* RF_UTILITY > 0 */ #define RF_NU(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p -#endif /* RF_UTILITY > 0 */ +#endif /* RF_UTILITY > 0 */ static RF_LayoutSW_t mapsw[] = { /* parity declustering */ {'T', "Parity declustering", - RF_NK2(rf_MakeLayoutSpecificDeclustered, &distSpareNo) - RF_NU( - rf_ConfigureDeclustered, - rf_MapSectorDeclustered, rf_MapParityDeclustered, NULL, - rf_IdentifyStripeDeclustered, - rf_RaidFiveDagSelect, - rf_MapSIDToPSIDDeclustered, - rf_GetDefaultHeadSepLimitDeclustered, - rf_GetDefaultNumFloatingReconBuffersDeclustered, - NULL, NULL, - rf_SubmitReconBufferBasic, - rf_VerifyParityBasic, - 1, - DefaultStates, - 0) + RF_NK2(rf_MakeLayoutSpecificDeclustered, &distSpareNo) + RF_NU( + rf_ConfigureDeclustered, + rf_MapSectorDeclustered, rf_MapParityDeclustered, NULL, + rf_IdentifyStripeDeclustered, + rf_RaidFiveDagSelect, + rf_MapSIDToPSIDDeclustered, + rf_GetDefaultHeadSepLimitDeclustered, + rf_GetDefaultNumFloatingReconBuffersDeclustered, + NULL, NULL, + rf_SubmitReconBufferBasic, + rf_VerifyParityBasic, + 1, + DefaultStates, + 0) }, /* parity declustering with distributed sparing */ {'D', "Distributed sparing parity declustering", - RF_NK2(rf_MakeLayoutSpecificDeclustered, &distSpareYes) - RF_NU( - rf_ConfigureDeclusteredDS, - rf_MapSectorDeclustered, rf_MapParityDeclustered, NULL, - rf_IdentifyStripeDeclustered, - rf_RaidFiveDagSelect, - rf_MapSIDToPSIDDeclustered, - rf_GetDefaultHeadSepLimitDeclustered, - rf_GetDefaultNumFloatingReconBuffersDeclustered, - rf_GetNumSpareRUsDeclustered, rf_InstallSpareTable, - rf_SubmitReconBufferBasic, - rf_VerifyParityBasic, - 1, - DefaultStates, - RF_DISTRIBUTE_SPARE|RF_BD_DECLUSTERED) + RF_NK2(rf_MakeLayoutSpecificDeclustered, &distSpareYes) + RF_NU( + rf_ConfigureDeclusteredDS, + rf_MapSectorDeclustered, rf_MapParityDeclustered, NULL, + rf_IdentifyStripeDeclustered, + rf_RaidFiveDagSelect, + rf_MapSIDToPSIDDeclustered, + rf_GetDefaultHeadSepLimitDeclustered, + rf_GetDefaultNumFloatingReconBuffersDeclustered, + rf_GetNumSpareRUsDeclustered, rf_InstallSpareTable, + rf_SubmitReconBufferBasic, + rf_VerifyParityBasic, + 1, + DefaultStates, + RF_DISTRIBUTE_SPARE | RF_BD_DECLUSTERED) }, #if RF_INCLUDE_DECL_PQ > 0 /* declustered P+Q */ {'Q', "Declustered P+Q", - RF_NK2(rf_MakeLayoutSpecificDeclustered, &distSpareNo) - RF_NU( - rf_ConfigureDeclusteredPQ, - rf_MapSectorDeclusteredPQ, rf_MapParityDeclusteredPQ, rf_MapQDeclusteredPQ, - rf_IdentifyStripeDeclusteredPQ, - rf_PQDagSelect, - rf_MapSIDToPSIDDeclustered, - rf_GetDefaultHeadSepLimitDeclustered, - rf_GetDefaultNumFloatingReconBuffersPQ, - NULL, NULL, - NULL, - rf_VerifyParityBasic, - 2, - DefaultStates, - 0) + RF_NK2(rf_MakeLayoutSpecificDeclustered, &distSpareNo) + RF_NU( + rf_ConfigureDeclusteredPQ, + rf_MapSectorDeclusteredPQ, rf_MapParityDeclusteredPQ, rf_MapQDeclusteredPQ, + rf_IdentifyStripeDeclusteredPQ, + rf_PQDagSelect, + rf_MapSIDToPSIDDeclustered, + rf_GetDefaultHeadSepLimitDeclustered, + rf_GetDefaultNumFloatingReconBuffersPQ, + NULL, NULL, + NULL, + rf_VerifyParityBasic, + 2, + DefaultStates, + 0) }, -#endif /* RF_INCLUDE_DECL_PQ > 0 */ +#endif /* RF_INCLUDE_DECL_PQ > 0 */ #if RF_INCLUDE_RAID5_RS > 0 /* RAID 5 with rotated sparing */ {'R', "RAID Level 5 rotated sparing", - RF_NK2(rf_MakeLayoutSpecificNULL, NULL) - RF_NU( - rf_ConfigureRAID5_RS, - rf_MapSectorRAID5_RS, rf_MapParityRAID5_RS, NULL, - rf_IdentifyStripeRAID5_RS, - rf_RaidFiveDagSelect, - rf_MapSIDToPSIDRAID5_RS, - rf_GetDefaultHeadSepLimitRAID5, - rf_GetDefaultNumFloatingReconBuffersRAID5, - rf_GetNumSpareRUsRAID5_RS, NULL, - rf_SubmitReconBufferBasic, - rf_VerifyParityBasic, - 1, - DefaultStates, - RF_DISTRIBUTE_SPARE) + RF_NK2(rf_MakeLayoutSpecificNULL, NULL) + RF_NU( + rf_ConfigureRAID5_RS, + rf_MapSectorRAID5_RS, rf_MapParityRAID5_RS, NULL, + rf_IdentifyStripeRAID5_RS, + rf_RaidFiveDagSelect, + rf_MapSIDToPSIDRAID5_RS, + rf_GetDefaultHeadSepLimitRAID5, + rf_GetDefaultNumFloatingReconBuffersRAID5, + rf_GetNumSpareRUsRAID5_RS, NULL, + rf_SubmitReconBufferBasic, + rf_VerifyParityBasic, + 1, + DefaultStates, + RF_DISTRIBUTE_SPARE) }, -#endif /* RF_INCLUDE_RAID5_RS > 0 */ +#endif /* RF_INCLUDE_RAID5_RS > 0 */ #if RF_INCLUDE_CHAINDECLUSTER > 0 /* Chained Declustering */ {'C', "Chained Declustering", - RF_NK2(rf_MakeLayoutSpecificNULL, NULL) - RF_NU( - rf_ConfigureChainDecluster, - rf_MapSectorChainDecluster, rf_MapParityChainDecluster, NULL, - rf_IdentifyStripeChainDecluster, - rf_RAIDCDagSelect, - rf_MapSIDToPSIDChainDecluster, - NULL, - NULL, - rf_GetNumSpareRUsChainDecluster, NULL, - rf_SubmitReconBufferBasic, - rf_VerifyParityBasic, - 1, - DefaultStates, - 0) + RF_NK2(rf_MakeLayoutSpecificNULL, NULL) + RF_NU( + rf_ConfigureChainDecluster, + rf_MapSectorChainDecluster, rf_MapParityChainDecluster, NULL, + rf_IdentifyStripeChainDecluster, + rf_RAIDCDagSelect, + rf_MapSIDToPSIDChainDecluster, + NULL, + NULL, + rf_GetNumSpareRUsChainDecluster, NULL, + rf_SubmitReconBufferBasic, + rf_VerifyParityBasic, + 1, + DefaultStates, + 0) }, -#endif /* RF_INCLUDE_CHAINDECLUSTER > 0 */ +#endif /* RF_INCLUDE_CHAINDECLUSTER > 0 */ #if RF_INCLUDE_INTERDECLUSTER > 0 /* Interleaved Declustering */ {'I', "Interleaved Declustering", - RF_NK2(rf_MakeLayoutSpecificNULL, NULL) - RF_NU( - rf_ConfigureInterDecluster, - rf_MapSectorInterDecluster, rf_MapParityInterDecluster, NULL, - rf_IdentifyStripeInterDecluster, - rf_RAIDIDagSelect, - rf_MapSIDToPSIDInterDecluster, - rf_GetDefaultHeadSepLimitInterDecluster, - rf_GetDefaultNumFloatingReconBuffersInterDecluster, - rf_GetNumSpareRUsInterDecluster, NULL, - rf_SubmitReconBufferBasic, - rf_VerifyParityBasic, - 1, - DefaultStates, - RF_DISTRIBUTE_SPARE) + RF_NK2(rf_MakeLayoutSpecificNULL, NULL) + RF_NU( + rf_ConfigureInterDecluster, + rf_MapSectorInterDecluster, rf_MapParityInterDecluster, NULL, + rf_IdentifyStripeInterDecluster, + rf_RAIDIDagSelect, + rf_MapSIDToPSIDInterDecluster, + rf_GetDefaultHeadSepLimitInterDecluster, + rf_GetDefaultNumFloatingReconBuffersInterDecluster, + rf_GetNumSpareRUsInterDecluster, NULL, + rf_SubmitReconBufferBasic, + rf_VerifyParityBasic, + 1, + DefaultStates, + RF_DISTRIBUTE_SPARE) }, -#endif /* RF_INCLUDE_INTERDECLUSTER > 0 */ +#endif /* RF_INCLUDE_INTERDECLUSTER > 0 */ #if RF_INCLUDE_RAID0 > 0 /* RAID level 0 */ {'0', "RAID Level 0", - RF_NK2(rf_MakeLayoutSpecificNULL, NULL) - RF_NU( - rf_ConfigureRAID0, - rf_MapSectorRAID0, rf_MapParityRAID0, NULL, - rf_IdentifyStripeRAID0, - rf_RAID0DagSelect, - rf_MapSIDToPSIDRAID0, - NULL, - NULL, - NULL, NULL, - NULL, - rf_VerifyParityRAID0, - 0, - DefaultStates, - 0) + RF_NK2(rf_MakeLayoutSpecificNULL, NULL) + RF_NU( + rf_ConfigureRAID0, + rf_MapSectorRAID0, rf_MapParityRAID0, NULL, + rf_IdentifyStripeRAID0, + rf_RAID0DagSelect, + rf_MapSIDToPSIDRAID0, + NULL, + NULL, + NULL, NULL, + NULL, + rf_VerifyParityRAID0, + 0, + DefaultStates, + 0) }, -#endif /* RF_INCLUDE_RAID0 > 0 */ +#endif /* RF_INCLUDE_RAID0 > 0 */ #if RF_INCLUDE_RAID1 > 0 /* RAID level 1 */ {'1', "RAID Level 1", - RF_NK2(rf_MakeLayoutSpecificNULL, NULL) - RF_NU( - rf_ConfigureRAID1, - rf_MapSectorRAID1, rf_MapParityRAID1, NULL, - rf_IdentifyStripeRAID1, - rf_RAID1DagSelect, - rf_MapSIDToPSIDRAID1, - NULL, - NULL, - NULL, NULL, - rf_SubmitReconBufferRAID1, - rf_VerifyParityRAID1, - 1, - DefaultStates, - 0) + RF_NK2(rf_MakeLayoutSpecificNULL, NULL) + RF_NU( + rf_ConfigureRAID1, + rf_MapSectorRAID1, rf_MapParityRAID1, NULL, + rf_IdentifyStripeRAID1, + rf_RAID1DagSelect, + rf_MapSIDToPSIDRAID1, + NULL, + NULL, + NULL, NULL, + rf_SubmitReconBufferRAID1, + rf_VerifyParityRAID1, + 1, + DefaultStates, + 0) }, -#endif /* RF_INCLUDE_RAID1 > 0 */ +#endif /* RF_INCLUDE_RAID1 > 0 */ #if RF_INCLUDE_RAID4 > 0 /* RAID level 4 */ {'4', "RAID Level 4", - RF_NK2(rf_MakeLayoutSpecificNULL, NULL) - RF_NU( - rf_ConfigureRAID4, - rf_MapSectorRAID4, rf_MapParityRAID4, NULL, - rf_IdentifyStripeRAID4, - rf_RaidFiveDagSelect, - rf_MapSIDToPSIDRAID4, - rf_GetDefaultHeadSepLimitRAID4, - rf_GetDefaultNumFloatingReconBuffersRAID4, - NULL, NULL, - rf_SubmitReconBufferBasic, - rf_VerifyParityBasic, - 1, - DefaultStates, - 0) + RF_NK2(rf_MakeLayoutSpecificNULL, NULL) + RF_NU( + rf_ConfigureRAID4, + rf_MapSectorRAID4, rf_MapParityRAID4, NULL, + rf_IdentifyStripeRAID4, + rf_RaidFiveDagSelect, + rf_MapSIDToPSIDRAID4, + rf_GetDefaultHeadSepLimitRAID4, + rf_GetDefaultNumFloatingReconBuffersRAID4, + NULL, NULL, + rf_SubmitReconBufferBasic, + rf_VerifyParityBasic, + 1, + DefaultStates, + 0) }, -#endif /* RF_INCLUDE_RAID4 > 0 */ +#endif /* RF_INCLUDE_RAID4 > 0 */ #if RF_INCLUDE_RAID5 > 0 /* RAID level 5 */ {'5', "RAID Level 5", - RF_NK2(rf_MakeLayoutSpecificNULL, NULL) - RF_NU( - rf_ConfigureRAID5, - rf_MapSectorRAID5, rf_MapParityRAID5, NULL, - rf_IdentifyStripeRAID5, - rf_RaidFiveDagSelect, - rf_MapSIDToPSIDRAID5, - rf_GetDefaultHeadSepLimitRAID5, - rf_GetDefaultNumFloatingReconBuffersRAID5, - NULL, NULL, - rf_SubmitReconBufferBasic, - rf_VerifyParityBasic, - 1, - DefaultStates, - 0) + RF_NK2(rf_MakeLayoutSpecificNULL, NULL) + RF_NU( + rf_ConfigureRAID5, + rf_MapSectorRAID5, rf_MapParityRAID5, NULL, + rf_IdentifyStripeRAID5, + rf_RaidFiveDagSelect, + rf_MapSIDToPSIDRAID5, + rf_GetDefaultHeadSepLimitRAID5, + rf_GetDefaultNumFloatingReconBuffersRAID5, + NULL, NULL, + rf_SubmitReconBufferBasic, + rf_VerifyParityBasic, + 1, + DefaultStates, + 0) }, -#endif /* RF_INCLUDE_RAID5 > 0 */ +#endif /* RF_INCLUDE_RAID5 > 0 */ #if RF_INCLUDE_EVENODD > 0 /* Evenodd */ {'E', "EvenOdd", - RF_NK2(rf_MakeLayoutSpecificNULL, NULL) - RF_NU( - rf_ConfigureEvenOdd, - rf_MapSectorRAID5, rf_MapParityEvenOdd, rf_MapEEvenOdd, - rf_IdentifyStripeEvenOdd, - rf_EODagSelect, - rf_MapSIDToPSIDRAID5, - NULL, - NULL, - NULL, NULL, - NULL, /* no reconstruction, yet */ - rf_VerifyParityEvenOdd, - 2, - DefaultStates, - 0) + RF_NK2(rf_MakeLayoutSpecificNULL, NULL) + RF_NU( + rf_ConfigureEvenOdd, + rf_MapSectorRAID5, rf_MapParityEvenOdd, rf_MapEEvenOdd, + rf_IdentifyStripeEvenOdd, + rf_EODagSelect, + rf_MapSIDToPSIDRAID5, + NULL, + NULL, + NULL, NULL, + NULL, /* no reconstruction, yet */ + rf_VerifyParityEvenOdd, + 2, + DefaultStates, + 0) }, -#endif /* RF_INCLUDE_EVENODD > 0 */ +#endif /* RF_INCLUDE_EVENODD > 0 */ #if RF_INCLUDE_EVENODD > 0 /* Declustered Evenodd */ {'e', "Declustered EvenOdd", - RF_NK2(rf_MakeLayoutSpecificDeclustered, &distSpareNo) - RF_NU( - rf_ConfigureDeclusteredPQ, - rf_MapSectorDeclusteredPQ, rf_MapParityDeclusteredPQ, rf_MapQDeclusteredPQ, - rf_IdentifyStripeDeclusteredPQ, - rf_EODagSelect, - rf_MapSIDToPSIDRAID5, - rf_GetDefaultHeadSepLimitDeclustered, - rf_GetDefaultNumFloatingReconBuffersPQ, - NULL, NULL, - NULL, /* no reconstruction, yet */ - rf_VerifyParityEvenOdd, - 2, - DefaultStates, - 0) + RF_NK2(rf_MakeLayoutSpecificDeclustered, &distSpareNo) + RF_NU( + rf_ConfigureDeclusteredPQ, + rf_MapSectorDeclusteredPQ, rf_MapParityDeclusteredPQ, rf_MapQDeclusteredPQ, + rf_IdentifyStripeDeclusteredPQ, + rf_EODagSelect, + rf_MapSIDToPSIDRAID5, + rf_GetDefaultHeadSepLimitDeclustered, + rf_GetDefaultNumFloatingReconBuffersPQ, + NULL, NULL, + NULL, /* no reconstruction, yet */ + rf_VerifyParityEvenOdd, + 2, + DefaultStates, + 0) }, -#endif /* RF_INCLUDE_EVENODD > 0 */ +#endif /* RF_INCLUDE_EVENODD > 0 */ #if RF_INCLUDE_PARITYLOGGING > 0 /* parity logging */ {'L', "Parity logging", - RF_NK2(rf_MakeLayoutSpecificNULL, NULL) - RF_NU( - rf_ConfigureParityLogging, - rf_MapSectorParityLogging, rf_MapParityParityLogging, NULL, - rf_IdentifyStripeParityLogging, - rf_ParityLoggingDagSelect, - rf_MapSIDToPSIDParityLogging, - rf_GetDefaultHeadSepLimitParityLogging, - rf_GetDefaultNumFloatingReconBuffersParityLogging, - NULL, NULL, - rf_SubmitReconBufferBasic, - NULL, - 1, - DefaultStates, - 0) + RF_NK2(rf_MakeLayoutSpecificNULL, NULL) + RF_NU( + rf_ConfigureParityLogging, + rf_MapSectorParityLogging, rf_MapParityParityLogging, NULL, + rf_IdentifyStripeParityLogging, + rf_ParityLoggingDagSelect, + rf_MapSIDToPSIDParityLogging, + rf_GetDefaultHeadSepLimitParityLogging, + rf_GetDefaultNumFloatingReconBuffersParityLogging, + NULL, NULL, + rf_SubmitReconBufferBasic, + NULL, + 1, + DefaultStates, + 0) }, -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ +#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ /* end-of-list marker */ - { '\0', NULL, - RF_NK2(NULL, NULL) - RF_NU( - NULL, - NULL, NULL, NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, NULL, - NULL, - NULL, - 0, - NULL, - 0) + {'\0', NULL, + RF_NK2(NULL, NULL) + RF_NU( + NULL, + NULL, NULL, NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, NULL, + NULL, + NULL, + 0, + NULL, + 0) } }; -RF_LayoutSW_t *rf_GetLayout(RF_ParityConfig_t parityConfig) +RF_LayoutSW_t * +rf_GetLayout(RF_ParityConfig_t parityConfig) { - RF_LayoutSW_t *p; - - /* look up the specific layout */ - for (p=&mapsw[0]; p->parityConfig; p++) - if (p->parityConfig == parityConfig) - break; - if (!p->parityConfig) - return(NULL); - RF_ASSERT(p->parityConfig == parityConfig); - return(p); + RF_LayoutSW_t *p; + + /* look up the specific layout */ + for (p = &mapsw[0]; p->parityConfig; p++) + if (p->parityConfig == parityConfig) + break; + if (!p->parityConfig) + return (NULL); + RF_ASSERT(p->parityConfig == parityConfig); + return (p); } - #if RF_UTILITY == 0 /***************************************************************************************** * - * ConfigureLayout -- + * ConfigureLayout -- * * read the configuration file and set up the RAID layout parameters. After reading * common params, invokes the layout-specific configuration routine to finish * the configuration. * ****************************************************************************************/ -int rf_ConfigureLayout( - RF_ShutdownList_t **listp, - RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr) +int +rf_ConfigureLayout( + RF_ShutdownList_t ** listp, + RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr) { - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_ParityConfig_t parityConfig; - RF_LayoutSW_t *p; - int retval; - - layoutPtr->sectorsPerStripeUnit = cfgPtr->sectPerSU; - layoutPtr->SUsPerPU = cfgPtr->SUsPerPU; - layoutPtr->SUsPerRU = cfgPtr->SUsPerRU; - parityConfig = cfgPtr->parityConfig; - - layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit; - - p = rf_GetLayout(parityConfig); - if (p == NULL) { - RF_ERRORMSG1("Unknown parity configuration '%c'", parityConfig); - return(EINVAL); - } - RF_ASSERT(p->parityConfig == parityConfig); - layoutPtr->map = p; - - /* initialize the specific layout */ - - retval = (p->Configure)(listp, raidPtr, cfgPtr); - - if (retval) - return(retval); - - layoutPtr->dataBytesPerStripe = layoutPtr->dataSectorsPerStripe << raidPtr->logBytesPerSector; - raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; - - if (rf_forceNumFloatingReconBufs >= 0) { - raidPtr->numFloatingReconBufs = rf_forceNumFloatingReconBufs; - } - else { - raidPtr->numFloatingReconBufs = rf_GetDefaultNumFloatingReconBuffers(raidPtr); - } - - if (rf_forceHeadSepLimit >= 0) { - raidPtr->headSepLimit = rf_forceHeadSepLimit; - } - else { - raidPtr->headSepLimit = rf_GetDefaultHeadSepLimit(raidPtr); - } - - printf("RAIDFRAME: Configure (%s): total number of sectors is %lu (%lu MB)\n", - layoutPtr->map->configName, - (unsigned long)raidPtr->totalSectors, - (unsigned long)(raidPtr->totalSectors / 1024 * (1<<raidPtr->logBytesPerSector) / 1024)); - if (raidPtr->headSepLimit >= 0) { - printf("RAIDFRAME(%s): Using %ld floating recon bufs with head sep limit %ld\n", - layoutPtr->map->configName, (long)raidPtr->numFloatingReconBufs, (long)raidPtr->headSepLimit); - } - else { - printf("RAIDFRAME(%s): Using %ld floating recon bufs with no head sep limit\n", - layoutPtr->map->configName, (long)raidPtr->numFloatingReconBufs); - } - - return(0); + RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); + RF_ParityConfig_t parityConfig; + RF_LayoutSW_t *p; + int retval; + + layoutPtr->sectorsPerStripeUnit = cfgPtr->sectPerSU; + layoutPtr->SUsPerPU = cfgPtr->SUsPerPU; + layoutPtr->SUsPerRU = cfgPtr->SUsPerRU; + parityConfig = cfgPtr->parityConfig; + + layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit; + + p = rf_GetLayout(parityConfig); + if (p == NULL) { + RF_ERRORMSG1("Unknown parity configuration '%c'", parityConfig); + return (EINVAL); + } + RF_ASSERT(p->parityConfig == parityConfig); + layoutPtr->map = p; + + /* initialize the specific layout */ + + retval = (p->Configure) (listp, raidPtr, cfgPtr); + + if (retval) + return (retval); + + layoutPtr->dataBytesPerStripe = layoutPtr->dataSectorsPerStripe << raidPtr->logBytesPerSector; + raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; + + if (rf_forceNumFloatingReconBufs >= 0) { + raidPtr->numFloatingReconBufs = rf_forceNumFloatingReconBufs; + } else { + raidPtr->numFloatingReconBufs = rf_GetDefaultNumFloatingReconBuffers(raidPtr); + } + + if (rf_forceHeadSepLimit >= 0) { + raidPtr->headSepLimit = rf_forceHeadSepLimit; + } else { + raidPtr->headSepLimit = rf_GetDefaultHeadSepLimit(raidPtr); + } + + printf("RAIDFRAME: Configure (%s): total number of sectors is %lu (%lu MB)\n", + layoutPtr->map->configName, + (unsigned long) raidPtr->totalSectors, + (unsigned long) (raidPtr->totalSectors / 1024 * (1 << raidPtr->logBytesPerSector) / 1024)); + if (raidPtr->headSepLimit >= 0) { + printf("RAIDFRAME(%s): Using %ld floating recon bufs with head sep limit %ld\n", + layoutPtr->map->configName, (long) raidPtr->numFloatingReconBufs, (long) raidPtr->headSepLimit); + } else { + printf("RAIDFRAME(%s): Using %ld floating recon bufs with no head sep limit\n", + layoutPtr->map->configName, (long) raidPtr->numFloatingReconBufs); + } + + return (0); } /* typically there is a 1-1 mapping between stripes and parity stripes. @@ -700,21 +491,21 @@ int rf_ConfigureLayout( * the parity stripe identifier associated with a stripe ID. There is also * a RaidAddressToParityStripeID macro in layout.h */ -RF_StripeNum_t rf_MapStripeIDToParityStripeID(layoutPtr, stripeID, which_ru) - RF_RaidLayout_t *layoutPtr; - RF_StripeNum_t stripeID; - RF_ReconUnitNum_t *which_ru; +RF_StripeNum_t +rf_MapStripeIDToParityStripeID(layoutPtr, stripeID, which_ru) + RF_RaidLayout_t *layoutPtr; + RF_StripeNum_t stripeID; + RF_ReconUnitNum_t *which_ru; { - RF_StripeNum_t parityStripeID; - - /* quick exit in the common case of SUsPerPU==1 */ - if ((layoutPtr->SUsPerPU == 1) || !layoutPtr->map->MapSIDToPSID) { - *which_ru = 0; - return(stripeID); - } - else { - (layoutPtr->map->MapSIDToPSID)(layoutPtr, stripeID, &parityStripeID, which_ru); - } - return(parityStripeID); + RF_StripeNum_t parityStripeID; + + /* quick exit in the common case of SUsPerPU==1 */ + if ((layoutPtr->SUsPerPU == 1) || !layoutPtr->map->MapSIDToPSID) { + *which_ru = 0; + return (stripeID); + } else { + (layoutPtr->map->MapSIDToPSID) (layoutPtr, stripeID, &parityStripeID, which_ru); + } + return (parityStripeID); } -#endif /* RF_UTILITY == 0 */ +#endif /* RF_UTILITY == 0 */ diff --git a/sys/dev/raidframe/rf_layout.h b/sys/dev/raidframe/rf_layout.h index 4259947f67f..5dcf56cc0df 100644 --- a/sys/dev/raidframe/rf_layout.h +++ b/sys/dev/raidframe/rf_layout.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_layout.h,v 1.1 1999/01/11 14:29:28 niklas Exp $ */ -/* $NetBSD: rf_layout.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */ +/* $OpenBSD: rf_layout.h,v 1.2 1999/02/16 00:02:54 niklas Exp $ */ +/* $NetBSD: rf_layout.h,v 1.3 1999/02/05 00:06:12 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -30,188 +30,6 @@ /* rf_layout.h -- header file defining layout data structures */ -/* - * : - * Log: rf_layout.h,v - * Revision 1.50 1996/11/05 21:10:40 jimz - * failed pda generalization - * - * Revision 1.49 1996/07/29 14:05:12 jimz - * fix numPUs/numRUs confusion (everything is now numRUs) - * clean up some commenting, return values - * - * Revision 1.48 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.47 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.46 1996/07/13 00:00:59 jimz - * sanitized generalized reconstruction architecture - * cleaned up head sep, rbuf problems - * - * Revision 1.45 1996/07/11 19:08:00 jimz - * generalize reconstruction mechanism - * allow raid1 reconstructs via copyback (done with array - * quiesced, not online, therefore not disk-directed) - * - * Revision 1.44 1996/06/19 22:23:01 jimz - * parity verification is now a layout-configurable thing - * not all layouts currently support it (correctly, anyway) - * - * Revision 1.43 1996/06/19 17:53:48 jimz - * move GetNumSparePUs, InstallSpareTable ops into layout switch - * - * Revision 1.42 1996/06/19 14:56:48 jimz - * move layout-specific config parsing hooks into RF_LayoutSW_t - * table in rf_layout.c - * - * Revision 1.41 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.40 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.39 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.38 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.37 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.36 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.35 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.34 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.33 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.32 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.31 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.30 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.29 1995/12/01 19:16:19 root - * added copyright info - * - * Revision 1.28 1995/11/28 21:26:49 amiri - * defined a declustering flag RF_BD_DECLUSTERED - * - * Revision 1.27 1995/11/17 19:00:59 wvcii - * created MapQ entry in switch table - * added prototyping to MapParity - * - * Revision 1.26 1995/11/07 15:40:27 wvcii - * changed prototype of SeclectionFunc in mapsw - * function no longer returns numHdrSucc, numTermAnt - * - * Revision 1.25 1995/10/12 20:57:08 arw - * added lots of comments - * - * Revision 1.24 1995/10/12 16:04:08 jimz - * added config name to mapsw - * - * Revision 1.23 1995/07/26 03:28:31 robby - * intermediary checkin - * - * Revision 1.22 1995/07/10 20:51:08 robby - * added to the asm info for the virtual striping locks - * - * Revision 1.21 1995/07/10 16:57:47 robby - * updated alloclistelem struct to the correct struct name - * - * Revision 1.20 1995/07/08 20:06:11 rachad - * *** empty log message *** - * - * Revision 1.19 1995/07/08 18:05:39 rachad - * Linked up Claudsons code with the real cache - * - * Revision 1.18 1995/07/06 14:29:36 robby - * added defaults states list to the layout switch - * - * Revision 1.17 1995/06/23 13:40:14 robby - * updeated to prototypes in rf_layout.h - * - * Revision 1.16 1995/06/08 22:11:03 holland - * bug fixes related to mutiple-row arrays - * - * Revision 1.15 1995/05/24 21:43:23 wvcii - * added field numParityLogCol to RaidLayout - * - * Revision 1.14 95/05/02 22:46:53 holland - * minor code cleanups. - * - * Revision 1.13 1995/05/02 12:48:01 holland - * eliminated some unused code. - * - * Revision 1.12 1995/05/01 13:28:00 holland - * parity range locks, locking disk requests, recon+parityscan in kernel, etc. - * - * Revision 1.11 1995/03/15 20:01:17 holland - * added REMAP and DONT_REMAP - * - * Revision 1.10 1995/03/09 19:54:11 rachad - * Added suport for threadless simulator - * - * Revision 1.9 1995/03/03 21:48:58 holland - * minor changes. - * - * Revision 1.8 1995/03/01 20:25:48 holland - * kernelization changes - * - * Revision 1.7 1995/02/03 22:31:36 holland - * many changes related to kernelization - * - * Revision 1.6 1995/01/30 14:53:46 holland - * extensive changes related to making DoIO non-blocking - * - * Revision 1.5 1995/01/24 23:58:46 holland - * multi-way recon XOR, plus various small changes - * - * Revision 1.4 1995/01/04 19:28:35 holland - * corrected comments around mapsw - * - * Revision 1.3 1994/11/28 22:15:45 danner - * Added type field to the physdiskaddr struct. - * - */ - #ifndef _RF__RF_LAYOUT_H_ #define _RF__RF_LAYOUT_H_ @@ -219,93 +37,99 @@ #include "rf_archs.h" #include "rf_alloclist.h" +#ifndef _KERNEL +#include <stdio.h> +#endif + /***************************************************************************************** * * This structure identifies all layout-specific operations and parameters. - * + * ****************************************************************************************/ typedef struct RF_LayoutSW_s { - RF_ParityConfig_t parityConfig; - char *configName; + RF_ParityConfig_t parityConfig; + char *configName; -#ifndef KERNEL - /* layout-specific parsing */ - int (*MakeLayoutSpecific)(FILE *fp, RF_Config_t *cfgPtr, void *arg); - void *makeLayoutSpecificArg; -#endif /* !KERNEL */ +#ifndef _KERNEL + /* layout-specific parsing */ + int (*MakeLayoutSpecific) (FILE * fp, RF_Config_t * cfgPtr, void *arg); + void *makeLayoutSpecificArg; +#endif /* !KERNEL */ #if RF_UTILITY == 0 - /* initialization routine */ - int (*Configure)(RF_ShutdownList_t **shutdownListp, RF_Raid_t *raidPtr, RF_Config_t *cfgPtr); - - /* routine to map RAID sector address -> physical (row, col, offset) */ - void (*MapSector)(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap); - - /* routine to map RAID sector address -> physical (r,c,o) of parity unit */ - void (*MapParity)(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap); - - /* routine to map RAID sector address -> physical (r,c,o) of Q unit */ - void (*MapQ)(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, RF_RowCol_t *row, - RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap); - - /* routine to identify the disks comprising a stripe */ - void (*IdentifyStripe)(RF_Raid_t *raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t **diskids, RF_RowCol_t *outRow); - - /* routine to select a dag */ - void (*SelectionFunc)(RF_Raid_t *raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t *asmap, - RF_VoidFuncPtr *); + /* initialization routine */ + int (*Configure) (RF_ShutdownList_t ** shutdownListp, RF_Raid_t * raidPtr, RF_Config_t * cfgPtr); + + /* routine to map RAID sector address -> physical (row, col, offset) */ + void (*MapSector) (RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); + + /* routine to map RAID sector address -> physical (r,c,o) of parity + * unit */ + void (*MapParity) (RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); + + /* routine to map RAID sector address -> physical (r,c,o) of Q unit */ + void (*MapQ) (RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, RF_RowCol_t * row, + RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); + + /* routine to identify the disks comprising a stripe */ + void (*IdentifyStripe) (RF_Raid_t * raidPtr, RF_RaidAddr_t addr, + RF_RowCol_t ** diskids, RF_RowCol_t * outRow); + + /* routine to select a dag */ + void (*SelectionFunc) (RF_Raid_t * raidPtr, RF_IoType_t type, + RF_AccessStripeMap_t * asmap, + RF_VoidFuncPtr *); #if 0 - void (**createFunc)(RF_Raid_t *, - RF_AccessStripeMap_t *, - RF_DagHeader_t *, void *, - RF_RaidAccessFlags_t, - RF_AllocListElem_t *)); - -#endif + void (**createFunc) (RF_Raid_t *, + RF_AccessStripeMap_t *, + RF_DagHeader_t *, void *, + RF_RaidAccessFlags_t, +/**INDENT** Warning@88: Extra ) */ + RF_AllocListElem_t *)); - /* map a stripe ID to a parity stripe ID. This is typically the identity mapping */ - void (*MapSIDToPSID)(RF_RaidLayout_t *layoutPtr, RF_StripeNum_t stripeID, - RF_StripeNum_t *psID, RF_ReconUnitNum_t *which_ru); +#endif - /* get default head separation limit (may be NULL) */ - RF_HeadSepLimit_t (*GetDefaultHeadSepLimit)(RF_Raid_t *raidPtr); + /* map a stripe ID to a parity stripe ID. This is typically the + * identity mapping */ + void (*MapSIDToPSID) (RF_RaidLayout_t * layoutPtr, RF_StripeNum_t stripeID, + RF_StripeNum_t * psID, RF_ReconUnitNum_t * which_ru); - /* get default num recon buffers (may be NULL) */ - int (*GetDefaultNumFloatingReconBuffers)(RF_Raid_t *raidPtr); + /* get default head separation limit (may be NULL) */ + RF_HeadSepLimit_t(*GetDefaultHeadSepLimit) (RF_Raid_t * raidPtr); - /* get number of spare recon units (may be NULL) */ - RF_ReconUnitCount_t (*GetNumSpareRUs)(RF_Raid_t *raidPtr); + /* get default num recon buffers (may be NULL) */ + int (*GetDefaultNumFloatingReconBuffers) (RF_Raid_t * raidPtr); - /* spare table installation (may be NULL) */ - int (*InstallSpareTable)(RF_Raid_t *raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol); + /* get number of spare recon units (may be NULL) */ + RF_ReconUnitCount_t(*GetNumSpareRUs) (RF_Raid_t * raidPtr); - /* recon buffer submission function */ - int (*SubmitReconBuffer)(RF_ReconBuffer_t *rbuf, int keep_it, - int use_committed); + /* spare table installation (may be NULL) */ + int (*InstallSpareTable) (RF_Raid_t * raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol); - /* - * verify that parity information for a stripe is correct - * see rf_parityscan.h for return vals - */ - int (*VerifyParity)(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t *parityPDA, int correct_it, RF_RaidAccessFlags_t flags); + /* recon buffer submission function */ + int (*SubmitReconBuffer) (RF_ReconBuffer_t * rbuf, int keep_it, + int use_committed); - /* number of faults tolerated by this mapping */ - int faultsTolerated; + /* + * verify that parity information for a stripe is correct + * see rf_parityscan.h for return vals + */ + int (*VerifyParity) (RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr, + RF_PhysDiskAddr_t * parityPDA, int correct_it, RF_RaidAccessFlags_t flags); - /* states to step through in an access. Must end with "LastState". - * The default is DefaultStates in rf_layout.c */ - RF_AccessState_t *states; + /* number of faults tolerated by this mapping */ + int faultsTolerated; - RF_AccessStripeMapFlags_t flags; -#endif /* RF_UTILITY == 0 */ -} RF_LayoutSW_t; + /* states to step through in an access. Must end with "LastState". The + * default is DefaultStates in rf_layout.c */ + RF_AccessState_t *states; + RF_AccessStripeMapFlags_t flags; +#endif /* RF_UTILITY == 0 */ +} RF_LayoutSW_t; /* enables remapping to spare location under dist sparing */ #define RF_REMAP 1 #define RF_DONT_REMAP 0 @@ -313,38 +137,48 @@ typedef struct RF_LayoutSW_s { /* * Flags values for RF_AccessStripeMapFlags_t */ -#define RF_NO_STRIPE_LOCKS 0x0001 /* suppress stripe locks */ -#define RF_DISTRIBUTE_SPARE 0x0002 /* distribute spare space in archs that support it */ -#define RF_BD_DECLUSTERED 0x0004 /* declustering uses block designs */ +#define RF_NO_STRIPE_LOCKS 0x0001 /* suppress stripe locks */ +#define RF_DISTRIBUTE_SPARE 0x0002 /* distribute spare space in archs + * that support it */ +#define RF_BD_DECLUSTERED 0x0004 /* declustering uses block designs */ /************************************************************************* * * this structure forms the layout component of the main Raid * structure. It describes everything needed to define and perform * the mapping of logical RAID addresses <-> physical disk addresses. - * + * *************************************************************************/ struct RF_RaidLayout_s { - /* configuration parameters */ - RF_SectorCount_t sectorsPerStripeUnit; /* number of sectors in one stripe unit */ - RF_StripeCount_t SUsPerPU; /* stripe units per parity unit */ - RF_StripeCount_t SUsPerRU; /* stripe units per reconstruction unit */ - - /* redundant-but-useful info computed from the above, used in all layouts */ - RF_StripeCount_t numStripe; /* total number of stripes in the array */ - RF_SectorCount_t dataSectorsPerStripe; - RF_StripeCount_t dataStripeUnitsPerDisk; - u_int bytesPerStripeUnit; - u_int dataBytesPerStripe; - RF_StripeCount_t numDataCol; /* number of SUs of data per stripe (name here is a la RAID4) */ - RF_StripeCount_t numParityCol; /* number of SUs of parity per stripe. Always 1 for now */ - RF_StripeCount_t numParityLogCol; /* number of SUs of parity log per stripe. Always 1 for now */ - RF_StripeCount_t stripeUnitsPerDisk; - - RF_LayoutSW_t *map; /* ptr to struct holding mapping fns and information */ - void *layoutSpecificInfo; /* ptr to a structure holding layout-specific params */ + /* configuration parameters */ + RF_SectorCount_t sectorsPerStripeUnit; /* number of sectors in one + * stripe unit */ + RF_StripeCount_t SUsPerPU; /* stripe units per parity unit */ + RF_StripeCount_t SUsPerRU; /* stripe units per reconstruction + * unit */ + + /* redundant-but-useful info computed from the above, used in all + * layouts */ + RF_StripeCount_t numStripe; /* total number of stripes in the + * array */ + RF_SectorCount_t dataSectorsPerStripe; + RF_StripeCount_t dataStripeUnitsPerDisk; + u_int bytesPerStripeUnit; + u_int dataBytesPerStripe; + RF_StripeCount_t numDataCol; /* number of SUs of data per stripe + * (name here is a la RAID4) */ + RF_StripeCount_t numParityCol; /* number of SUs of parity per stripe. + * Always 1 for now */ + RF_StripeCount_t numParityLogCol; /* number of SUs of parity log + * per stripe. Always 1 for + * now */ + RF_StripeCount_t stripeUnitsPerDisk; + + RF_LayoutSW_t *map; /* ptr to struct holding mapping fns and + * information */ + void *layoutSpecificInfo; /* ptr to a structure holding + * layout-specific params */ }; - /***************************************************************************************** * * The mapping code returns a pointer to a list of AccessStripeMap structures, which @@ -353,7 +187,7 @@ struct RF_RaidLayout_s { * contains a stripe identifier and a pointer to a list of PhysDiskAddr structuress. Each * element in this latter list describes the physical location of a stripe unit accessed * within the corresponding stripe. - * + * ****************************************************************************************/ #define RF_PDA_TYPE_DATA 0 @@ -361,56 +195,77 @@ struct RF_RaidLayout_s { #define RF_PDA_TYPE_Q 2 struct RF_PhysDiskAddr_s { - RF_RowCol_t row,col; /* disk identifier */ - RF_SectorNum_t startSector; /* sector offset into the disk */ - RF_SectorCount_t numSector; /* number of sectors accessed */ - int type; /* used by higher levels: currently, data, parity, or q */ - caddr_t bufPtr; /* pointer to buffer supplying/receiving data */ - RF_RaidAddr_t raidAddress; /* raid address corresponding to this physical disk address */ - RF_PhysDiskAddr_t *next; + RF_RowCol_t row, col; /* disk identifier */ + RF_SectorNum_t startSector; /* sector offset into the disk */ + RF_SectorCount_t numSector; /* number of sectors accessed */ + int type; /* used by higher levels: currently, data, + * parity, or q */ + caddr_t bufPtr; /* pointer to buffer supplying/receiving data */ + RF_RaidAddr_t raidAddress; /* raid address corresponding to this + * physical disk address */ + RF_PhysDiskAddr_t *next; }; - #define RF_MAX_FAILED_PDA RF_MAXCOL struct RF_AccessStripeMap_s { - RF_StripeNum_t stripeID; /* the stripe index */ - RF_RaidAddr_t raidAddress; /* the starting raid address within this stripe */ - RF_RaidAddr_t endRaidAddress; /* raid address one sector past the end of the access */ - RF_SectorCount_t totalSectorsAccessed; /* total num sectors identified in physInfo list */ - RF_StripeCount_t numStripeUnitsAccessed; /* total num elements in physInfo list */ - int numDataFailed; /* number of failed data disks accessed */ - int numParityFailed; /* number of failed parity disks accessed (0 or 1) */ - int numQFailed; /* number of failed Q units accessed (0 or 1) */ - RF_AccessStripeMapFlags_t flags; /* various flags */ + RF_StripeNum_t stripeID;/* the stripe index */ + RF_RaidAddr_t raidAddress; /* the starting raid address within + * this stripe */ + RF_RaidAddr_t endRaidAddress; /* raid address one sector past the + * end of the access */ + RF_SectorCount_t totalSectorsAccessed; /* total num sectors + * identified in physInfo list */ + RF_StripeCount_t numStripeUnitsAccessed; /* total num elements in + * physInfo list */ + int numDataFailed; /* number of failed data disks accessed */ + int numParityFailed;/* number of failed parity disks accessed (0 + * or 1) */ + int numQFailed; /* number of failed Q units accessed (0 or 1) */ + RF_AccessStripeMapFlags_t flags; /* various flags */ #if 0 - RF_PhysDiskAddr_t *failedPDA; /* points to the PDA that has failed */ - RF_PhysDiskAddr_t *failedPDAtwo; /* points to the second PDA that has failed, if any */ + RF_PhysDiskAddr_t *failedPDA; /* points to the PDA that has failed */ + RF_PhysDiskAddr_t *failedPDAtwo; /* points to the second PDA + * that has failed, if any */ #else - int numFailedPDAs; /* number of failed phys addrs */ - RF_PhysDiskAddr_t *failedPDAs[RF_MAX_FAILED_PDA]; /* array of failed phys addrs */ + int numFailedPDAs; /* number of failed phys addrs */ + RF_PhysDiskAddr_t *failedPDAs[RF_MAX_FAILED_PDA]; /* array of failed phys + * addrs */ #endif - RF_PhysDiskAddr_t *physInfo; /* a list of PhysDiskAddr structs */ - RF_PhysDiskAddr_t *parityInfo; /* list of physical addrs for the parity (P of P + Q ) */ - RF_PhysDiskAddr_t *qInfo; /* list of physical addrs for the Q of P + Q */ - RF_LockReqDesc_t lockReqDesc; /* used for stripe locking */ - RF_RowCol_t origRow; /* the original row: we may redirect the acc to a different row */ - RF_AccessStripeMap_t *next; + RF_PhysDiskAddr_t *physInfo; /* a list of PhysDiskAddr structs */ + RF_PhysDiskAddr_t *parityInfo; /* list of physical addrs for the + * parity (P of P + Q ) */ + RF_PhysDiskAddr_t *qInfo; /* list of physical addrs for the Q of + * P + Q */ + RF_LockReqDesc_t lockReqDesc; /* used for stripe locking */ + RF_RowCol_t origRow; /* the original row: we may redirect the acc + * to a different row */ + RF_AccessStripeMap_t *next; }; - /* flag values */ -#define RF_ASM_REDIR_LARGE_WRITE 0x00000001 /* allows large-write creation code to redirect failed accs */ -#define RF_ASM_BAILOUT_DAG_USED 0x00000002 /* allows us to detect recursive calls to the bailout write dag */ -#define RF_ASM_FLAGS_LOCK_TRIED 0x00000004 /* we've acquired the lock on the first parity range in this parity stripe */ -#define RF_ASM_FLAGS_LOCK_TRIED2 0x00000008 /* we've acquired the lock on the 2nd parity range in this parity stripe */ -#define RF_ASM_FLAGS_FORCE_TRIED 0x00000010 /* we've done the force-recon call on this parity stripe */ -#define RF_ASM_FLAGS_RECON_BLOCKED 0x00000020 /* we blocked recon => we must unblock it later */ +#define RF_ASM_REDIR_LARGE_WRITE 0x00000001 /* allows large-write creation + * code to redirect failed + * accs */ +#define RF_ASM_BAILOUT_DAG_USED 0x00000002 /* allows us to detect + * recursive calls to the + * bailout write dag */ +#define RF_ASM_FLAGS_LOCK_TRIED 0x00000004 /* we've acquired the lock on + * the first parity range in + * this parity stripe */ +#define RF_ASM_FLAGS_LOCK_TRIED2 0x00000008 /* we've acquired the lock on + * the 2nd parity range in + * this parity stripe */ +#define RF_ASM_FLAGS_FORCE_TRIED 0x00000010 /* we've done the force-recon + * call on this parity stripe */ +#define RF_ASM_FLAGS_RECON_BLOCKED 0x00000020 /* we blocked recon => we must + * unblock it later */ struct RF_AccessStripeMapHeader_s { - RF_StripeCount_t numStripes; /* total number of stripes touched by this acc */ - RF_AccessStripeMap_t *stripeMap; /* pointer to the actual map. Also used for making lists */ - RF_AccessStripeMapHeader_t *next; + RF_StripeCount_t numStripes; /* total number of stripes touched by + * this acc */ + RF_AccessStripeMap_t *stripeMap; /* pointer to the actual map. + * Also used for making lists */ + RF_AccessStripeMapHeader_t *next; }; - /***************************************************************************************** * * various routines mapping addresses in the RAID address space. These work across @@ -485,9 +340,11 @@ struct RF_AccessStripeMapHeader_s { ( (_psid_) * (_layoutPtr_)->SUsPerPU * (_layoutPtr_)->numDataCol * (_layoutPtr_)->sectorsPerStripeUnit ) RF_LayoutSW_t *rf_GetLayout(RF_ParityConfig_t parityConfig); -int rf_ConfigureLayout(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr); -RF_StripeNum_t rf_MapStripeIDToParityStripeID(RF_RaidLayout_t *layoutPtr, - RF_StripeNum_t stripeID, RF_ReconUnitNum_t *which_ru); - -#endif /* !_RF__RF_LAYOUT_H_ */ +int +rf_ConfigureLayout(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr); +RF_StripeNum_t +rf_MapStripeIDToParityStripeID(RF_RaidLayout_t * layoutPtr, + RF_StripeNum_t stripeID, RF_ReconUnitNum_t * which_ru); + +#endif /* !_RF__RF_LAYOUT_H_ */ diff --git a/sys/dev/raidframe/rf_map.c b/sys/dev/raidframe/rf_map.c index 11a3262a3a8..9d426580784 100644 --- a/sys/dev/raidframe/rf_map.c +++ b/sys/dev/raidframe/rf_map.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_map.c,v 1.1 1999/01/11 14:29:28 niklas Exp $ */ -/* $NetBSD: rf_map.c,v 1.1 1998/11/13 04:20:31 oster Exp $ */ +/* $OpenBSD: rf_map.c,v 1.2 1999/02/16 00:02:55 niklas Exp $ */ +/* $NetBSD: rf_map.c,v 1.3 1999/02/05 00:06:12 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -33,148 +33,6 @@ * **************************************************************************/ -/* - * : - * Log: rf_map.c,v - * Revision 1.53 1996/11/05 21:10:40 jimz - * failed pda generalization - * - * Revision 1.52 1996/08/20 19:58:39 jimz - * initialize numParityFailed and numQFailed to 0 in MarkFailuresInASMList - * - * Revision 1.51 1996/08/19 22:26:31 jimz - * add Chang's bugfixes for double-disk failures in MarkFailuresInASMList - * - * Revision 1.50 1996/08/19 21:38:06 jimz - * stripeOffset was uninitialized in CheckStripeForFailures - * - * Revision 1.49 1996/07/31 15:34:56 jimz - * evenodd changes; bugfixes for double-degraded archs, generalize - * some formerly PQ-only functions - * - * Revision 1.48 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.47 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.46 1996/06/10 12:50:57 jimz - * Add counters to freelists to track number of allocations, frees, - * grows, max size, etc. Adjust a couple sets of PRIME params based - * on the results. - * - * Revision 1.45 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.44 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.43 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.42 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.41 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.40 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.39 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.38 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.37 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.36 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.35 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.34 1996/05/20 16:14:45 jimz - * switch to rf_{mutex,cond}_{init,destroy} - * - * Revision 1.33 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.32 1996/05/17 00:51:47 jimz - * reformat for readability - * - * Revision 1.31 1996/05/16 23:06:26 jimz - * convert asmhdr to use RF_FREELIST stuff - * - * Revision 1.30 1996/05/16 19:09:42 jimz - * grow init asm freelist to 32 - * - * Revision 1.29 1996/05/16 15:27:55 jimz - * prime freelist pumps for asm and pda lists - * - * Revision 1.28 1996/05/02 14:58:35 jimz - * legibility cleanup - * - * Revision 1.27 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.26 1995/12/01 19:25:06 root - * added copyright info - * - * Revision 1.25 1995/11/17 19:01:57 wvcii - * added call to MapQ in two fault tolerant case - * - * Revision 1.24 1995/11/17 15:10:53 wvcii - * fixed bug in ASMCheckStatus - ASSERT was using disk sector addresses - * rather than raidAddress - * - * Revision 1.23 1995/07/26 03:26:51 robby - * map the allocation and freeing routines for some stuff non-static - * - * Revision 1.22 1995/06/28 09:33:45 holland - * bug fixes related to dist sparing and multiple-row arrays - * - * Revision 1.21 1995/06/28 04:51:08 holland - * added some asserts against zero-length accesses - * - * Revision 1.20 1995/06/23 13:40:06 robby - * updeated to prototypes in rf_layout.h - * - */ - #include "rf_types.h" #include "rf_threadstuff.h" #include "rf_raid.h" @@ -184,9 +42,10 @@ #include "rf_shutdown.h" #include "rf_sys.h" -static void rf_FreePDAList(RF_PhysDiskAddr_t *start, RF_PhysDiskAddr_t *end, int count); -static void rf_FreeASMList(RF_AccessStripeMap_t *start, RF_AccessStripeMap_t *end, - int count); +static void rf_FreePDAList(RF_PhysDiskAddr_t * start, RF_PhysDiskAddr_t * end, int count); +static void +rf_FreeASMList(RF_AccessStripeMap_t * start, RF_AccessStripeMap_t * end, + int count); /***************************************************************************************** * @@ -214,166 +73,179 @@ static void rf_FreeASMList(RF_AccessStripeMap_t *start, RF_AccessStripeMap_t *en * ****************************************************************************************/ -RF_AccessStripeMapHeader_t *rf_MapAccess(raidPtr, raidAddress, numBlocks, buffer, remap) - RF_Raid_t *raidPtr; - RF_RaidAddr_t raidAddress; /* starting address in RAID address space */ - RF_SectorCount_t numBlocks; /* number of blocks in RAID address space to access */ - caddr_t buffer; /* buffer to supply/receive data */ - int remap; /* 1 => remap addresses to spare space */ +RF_AccessStripeMapHeader_t * +rf_MapAccess(raidPtr, raidAddress, numBlocks, buffer, remap) + RF_Raid_t *raidPtr; + RF_RaidAddr_t raidAddress; /* starting address in RAID address + * space */ + RF_SectorCount_t numBlocks; /* number of blocks in RAID address + * space to access */ + caddr_t buffer; /* buffer to supply/receive data */ + int remap; /* 1 => remap addresses to spare space */ { - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_AccessStripeMapHeader_t *asm_hdr = NULL; - RF_AccessStripeMap_t *asm_list = NULL, *asm_p = NULL; - int faultsTolerated = layoutPtr->map->faultsTolerated; - RF_RaidAddr_t startAddress = raidAddress; /* we'll change raidAddress along the way */ - RF_RaidAddr_t endAddress = raidAddress + numBlocks; - RF_RaidDisk_t **disks = raidPtr->Disks; - - RF_PhysDiskAddr_t *pda_p, *pda_q; - RF_StripeCount_t numStripes = 0; - RF_RaidAddr_t stripeRealEndAddress, stripeEndAddress, nextStripeUnitAddress; - RF_RaidAddr_t startAddrWithinStripe, lastRaidAddr; - RF_StripeCount_t totStripes; - RF_StripeNum_t stripeID, lastSID, SUID, lastSUID; - RF_AccessStripeMap_t *asmList, *t_asm; - RF_PhysDiskAddr_t *pdaList, *t_pda; - - /* allocate all the ASMs and PDAs up front */ - lastRaidAddr = raidAddress + numBlocks - 1 ; - stripeID = rf_RaidAddressToStripeID(layoutPtr, raidAddress); - lastSID = rf_RaidAddressToStripeID(layoutPtr, lastRaidAddr); - totStripes = lastSID - stripeID + 1; - SUID = rf_RaidAddressToStripeUnitID(layoutPtr, raidAddress); - lastSUID = rf_RaidAddressToStripeUnitID(layoutPtr, lastRaidAddr); - - asmList = rf_AllocASMList(totStripes); - pdaList = rf_AllocPDAList(lastSUID - SUID + 1 + faultsTolerated * totStripes); /* may also need pda(s) per stripe for parity */ - - if (raidAddress+numBlocks > raidPtr->totalSectors) { - RF_ERRORMSG1("Unable to map access because offset (%d) was invalid\n", - (int)raidAddress); - return(NULL); - } - - if (rf_mapDebug) - rf_PrintRaidAddressInfo(raidPtr, raidAddress, numBlocks); - for (; raidAddress < endAddress; ) { - /* make the next stripe structure */ - RF_ASSERT(asmList); - t_asm = asmList; - asmList = asmList->next; - bzero((char *)t_asm, sizeof(RF_AccessStripeMap_t)); - if (!asm_p) - asm_list = asm_p = t_asm; - else { - asm_p->next = t_asm; - asm_p = asm_p->next; - } - numStripes++; - - /* map SUs from current location to the end of the stripe */ - asm_p->stripeID = /*rf_RaidAddressToStripeID(layoutPtr, raidAddress)*/ stripeID++; - stripeRealEndAddress = rf_RaidAddressOfNextStripeBoundary(layoutPtr, raidAddress); - stripeEndAddress = RF_MIN(endAddress,stripeRealEndAddress ); - asm_p->raidAddress = raidAddress; - asm_p->endRaidAddress = stripeEndAddress; - - /* map each stripe unit in the stripe */ - pda_p = NULL; - startAddrWithinStripe = raidAddress; /* Raid addr of start of portion of access that is within this stripe */ - for (; raidAddress < stripeEndAddress; ) { - RF_ASSERT(pdaList); - t_pda = pdaList; - pdaList = pdaList->next; - bzero((char *)t_pda, sizeof(RF_PhysDiskAddr_t)); - if (!pda_p) - asm_p->physInfo = pda_p = t_pda; - else { - pda_p->next = t_pda; - pda_p = pda_p->next; - } - - pda_p->type = RF_PDA_TYPE_DATA; - (layoutPtr->map->MapSector)(raidPtr, raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), remap); - - /* mark any failures we find. failedPDA is don't-care if there is more than one failure */ - pda_p->raidAddress = raidAddress; /* the RAID address corresponding to this physical disk address */ - nextStripeUnitAddress = rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, raidAddress); - pda_p->numSector = RF_MIN(endAddress, nextStripeUnitAddress) - raidAddress; - RF_ASSERT(pda_p->numSector != 0); - rf_ASMCheckStatus(raidPtr,pda_p,asm_p,disks,0); - pda_p->bufPtr = buffer + rf_RaidAddressToByte(raidPtr, (raidAddress - startAddress)); - asm_p->totalSectorsAccessed += pda_p->numSector; - asm_p->numStripeUnitsAccessed++; - asm_p->origRow = pda_p->row; /* redundant but harmless to do this in every loop iteration */ - - raidAddress = RF_MIN(endAddress, nextStripeUnitAddress); - } - - /* Map the parity. At this stage, the startSector and numSector fields - * for the parity unit are always set to indicate the entire parity unit. - * We may modify this after mapping the data portion. - */ - switch (faultsTolerated) - { - case 0: - break; - case 1: /* single fault tolerant */ - RF_ASSERT(pdaList); - t_pda = pdaList; - pdaList = pdaList->next; - bzero((char *)t_pda, sizeof(RF_PhysDiskAddr_t)); - pda_p = asm_p->parityInfo = t_pda; - pda_p->type = RF_PDA_TYPE_PARITY; - (layoutPtr->map->MapParity)(raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe), - &(pda_p->row), &(pda_p->col), &(pda_p->startSector), remap); - pda_p->numSector = layoutPtr->sectorsPerStripeUnit; - /* raidAddr may be needed to find unit to redirect to */ - pda_p->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe); - rf_ASMCheckStatus(raidPtr,pda_p,asm_p,disks,1); - rf_ASMParityAdjust(asm_p->parityInfo,startAddrWithinStripe,endAddress,layoutPtr,asm_p); - - break; - case 2: /* two fault tolerant */ - RF_ASSERT(pdaList && pdaList->next); - t_pda = pdaList; - pdaList = pdaList->next; - bzero((char *)t_pda, sizeof(RF_PhysDiskAddr_t)); - pda_p = asm_p->parityInfo = t_pda; - pda_p->type = RF_PDA_TYPE_PARITY; - t_pda = pdaList; - pdaList = pdaList->next; - bzero((char *)t_pda, sizeof(RF_PhysDiskAddr_t)); - pda_q = asm_p->qInfo = t_pda; - pda_q->type = RF_PDA_TYPE_Q; - (layoutPtr->map->MapParity)(raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe), - &(pda_p->row), &(pda_p->col), &(pda_p->startSector), remap); - (layoutPtr->map->MapQ)(raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe), - &(pda_q->row), &(pda_q->col), &(pda_q->startSector), remap); - pda_q->numSector = pda_p->numSector = layoutPtr->sectorsPerStripeUnit; - /* raidAddr may be needed to find unit to redirect to */ - pda_p->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe); - pda_q->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe); - /* failure mode stuff */ - rf_ASMCheckStatus(raidPtr,pda_p,asm_p,disks,1); - rf_ASMCheckStatus(raidPtr,pda_q,asm_p,disks,1); - rf_ASMParityAdjust(asm_p->parityInfo,startAddrWithinStripe,endAddress,layoutPtr,asm_p); - rf_ASMParityAdjust(asm_p->qInfo,startAddrWithinStripe,endAddress,layoutPtr,asm_p); - break; - } - } - RF_ASSERT(asmList == NULL && pdaList == NULL); - /* make the header structure */ - asm_hdr = rf_AllocAccessStripeMapHeader(); - RF_ASSERT(numStripes == totStripes); - asm_hdr->numStripes = numStripes; - asm_hdr->stripeMap = asm_list; - - if (rf_mapDebug) - rf_PrintAccessStripeMap(asm_hdr); - return(asm_hdr); + RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); + RF_AccessStripeMapHeader_t *asm_hdr = NULL; + RF_AccessStripeMap_t *asm_list = NULL, *asm_p = NULL; + int faultsTolerated = layoutPtr->map->faultsTolerated; + RF_RaidAddr_t startAddress = raidAddress; /* we'll change + * raidAddress along the + * way */ + RF_RaidAddr_t endAddress = raidAddress + numBlocks; + RF_RaidDisk_t **disks = raidPtr->Disks; + + RF_PhysDiskAddr_t *pda_p, *pda_q; + RF_StripeCount_t numStripes = 0; + RF_RaidAddr_t stripeRealEndAddress, stripeEndAddress, nextStripeUnitAddress; + RF_RaidAddr_t startAddrWithinStripe, lastRaidAddr; + RF_StripeCount_t totStripes; + RF_StripeNum_t stripeID, lastSID, SUID, lastSUID; + RF_AccessStripeMap_t *asmList, *t_asm; + RF_PhysDiskAddr_t *pdaList, *t_pda; + + /* allocate all the ASMs and PDAs up front */ + lastRaidAddr = raidAddress + numBlocks - 1; + stripeID = rf_RaidAddressToStripeID(layoutPtr, raidAddress); + lastSID = rf_RaidAddressToStripeID(layoutPtr, lastRaidAddr); + totStripes = lastSID - stripeID + 1; + SUID = rf_RaidAddressToStripeUnitID(layoutPtr, raidAddress); + lastSUID = rf_RaidAddressToStripeUnitID(layoutPtr, lastRaidAddr); + + asmList = rf_AllocASMList(totStripes); + pdaList = rf_AllocPDAList(lastSUID - SUID + 1 + faultsTolerated * totStripes); /* may also need pda(s) + * per stripe for parity */ + + if (raidAddress + numBlocks > raidPtr->totalSectors) { + RF_ERRORMSG1("Unable to map access because offset (%d) was invalid\n", + (int) raidAddress); + return (NULL); + } + if (rf_mapDebug) + rf_PrintRaidAddressInfo(raidPtr, raidAddress, numBlocks); + for (; raidAddress < endAddress;) { + /* make the next stripe structure */ + RF_ASSERT(asmList); + t_asm = asmList; + asmList = asmList->next; + bzero((char *) t_asm, sizeof(RF_AccessStripeMap_t)); + if (!asm_p) + asm_list = asm_p = t_asm; + else { + asm_p->next = t_asm; + asm_p = asm_p->next; + } + numStripes++; + + /* map SUs from current location to the end of the stripe */ + asm_p->stripeID = /* rf_RaidAddressToStripeID(layoutPtr, + raidAddress) */ stripeID++; + stripeRealEndAddress = rf_RaidAddressOfNextStripeBoundary(layoutPtr, raidAddress); + stripeEndAddress = RF_MIN(endAddress, stripeRealEndAddress); + asm_p->raidAddress = raidAddress; + asm_p->endRaidAddress = stripeEndAddress; + + /* map each stripe unit in the stripe */ + pda_p = NULL; + startAddrWithinStripe = raidAddress; /* Raid addr of start of + * portion of access + * that is within this + * stripe */ + for (; raidAddress < stripeEndAddress;) { + RF_ASSERT(pdaList); + t_pda = pdaList; + pdaList = pdaList->next; + bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t)); + if (!pda_p) + asm_p->physInfo = pda_p = t_pda; + else { + pda_p->next = t_pda; + pda_p = pda_p->next; + } + + pda_p->type = RF_PDA_TYPE_DATA; + (layoutPtr->map->MapSector) (raidPtr, raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), remap); + + /* mark any failures we find. failedPDA is don't-care + * if there is more than one failure */ + pda_p->raidAddress = raidAddress; /* the RAID address + * corresponding to this + * physical disk address */ + nextStripeUnitAddress = rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, raidAddress); + pda_p->numSector = RF_MIN(endAddress, nextStripeUnitAddress) - raidAddress; + RF_ASSERT(pda_p->numSector != 0); + rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 0); + pda_p->bufPtr = buffer + rf_RaidAddressToByte(raidPtr, (raidAddress - startAddress)); + asm_p->totalSectorsAccessed += pda_p->numSector; + asm_p->numStripeUnitsAccessed++; + asm_p->origRow = pda_p->row; /* redundant but + * harmless to do this + * in every loop + * iteration */ + + raidAddress = RF_MIN(endAddress, nextStripeUnitAddress); + } + + /* Map the parity. At this stage, the startSector and + * numSector fields for the parity unit are always set to + * indicate the entire parity unit. We may modify this after + * mapping the data portion. */ + switch (faultsTolerated) { + case 0: + break; + case 1: /* single fault tolerant */ + RF_ASSERT(pdaList); + t_pda = pdaList; + pdaList = pdaList->next; + bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t)); + pda_p = asm_p->parityInfo = t_pda; + pda_p->type = RF_PDA_TYPE_PARITY; + (layoutPtr->map->MapParity) (raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe), + &(pda_p->row), &(pda_p->col), &(pda_p->startSector), remap); + pda_p->numSector = layoutPtr->sectorsPerStripeUnit; + /* raidAddr may be needed to find unit to redirect to */ + pda_p->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe); + rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 1); + rf_ASMParityAdjust(asm_p->parityInfo, startAddrWithinStripe, endAddress, layoutPtr, asm_p); + + break; + case 2: /* two fault tolerant */ + RF_ASSERT(pdaList && pdaList->next); + t_pda = pdaList; + pdaList = pdaList->next; + bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t)); + pda_p = asm_p->parityInfo = t_pda; + pda_p->type = RF_PDA_TYPE_PARITY; + t_pda = pdaList; + pdaList = pdaList->next; + bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t)); + pda_q = asm_p->qInfo = t_pda; + pda_q->type = RF_PDA_TYPE_Q; + (layoutPtr->map->MapParity) (raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe), + &(pda_p->row), &(pda_p->col), &(pda_p->startSector), remap); + (layoutPtr->map->MapQ) (raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe), + &(pda_q->row), &(pda_q->col), &(pda_q->startSector), remap); + pda_q->numSector = pda_p->numSector = layoutPtr->sectorsPerStripeUnit; + /* raidAddr may be needed to find unit to redirect to */ + pda_p->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe); + pda_q->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe); + /* failure mode stuff */ + rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 1); + rf_ASMCheckStatus(raidPtr, pda_q, asm_p, disks, 1); + rf_ASMParityAdjust(asm_p->parityInfo, startAddrWithinStripe, endAddress, layoutPtr, asm_p); + rf_ASMParityAdjust(asm_p->qInfo, startAddrWithinStripe, endAddress, layoutPtr, asm_p); + break; + } + } + RF_ASSERT(asmList == NULL && pdaList == NULL); + /* make the header structure */ + asm_hdr = rf_AllocAccessStripeMapHeader(); + RF_ASSERT(numStripes == totStripes); + asm_hdr->numStripes = numStripes; + asm_hdr->stripeMap = asm_list; + + if (rf_mapDebug) + rf_PrintAccessStripeMap(asm_hdr); + return (asm_hdr); } - /***************************************************************************************** * This routine walks through an ASM list and marks the PDAs that have failed. * It's called only when a disk failure causes an in-flight DAG to fail. @@ -382,98 +254,110 @@ RF_AccessStripeMapHeader_t *rf_MapAccess(raidPtr, raidAddress, numBlocks, buffer * on the rest of the code to do the right thing with this. ****************************************************************************************/ -void rf_MarkFailuresInASMList(raidPtr, asm_h) - RF_Raid_t *raidPtr; - RF_AccessStripeMapHeader_t *asm_h; +void +rf_MarkFailuresInASMList(raidPtr, asm_h) + RF_Raid_t *raidPtr; + RF_AccessStripeMapHeader_t *asm_h; { - RF_RaidDisk_t **disks = raidPtr->Disks; - RF_AccessStripeMap_t *asmap; - RF_PhysDiskAddr_t *pda; - - for (asmap = asm_h->stripeMap; asmap; asmap = asmap->next) { - asmap->numDataFailed = asmap->numParityFailed = asmap->numQFailed = 0; - asmap->numFailedPDAs = 0; - bzero((char *)asmap->failedPDAs, - RF_MAX_FAILED_PDA*sizeof(RF_PhysDiskAddr_t *)); - for (pda = asmap->physInfo; pda; pda=pda->next) { - if (RF_DEAD_DISK(disks[pda->row][pda->col].status)) { - printf("DEAD DISK BOGUSLY DETECTED!!\n"); - asmap->numDataFailed++; - asmap->failedPDAs[asmap->numFailedPDAs] = pda; - asmap->numFailedPDAs++; - } - } - pda = asmap->parityInfo; - if (pda && RF_DEAD_DISK(disks[pda->row][pda->col].status)) { - asmap->numParityFailed++; - asmap->failedPDAs[asmap->numFailedPDAs] = pda; - asmap->numFailedPDAs++; - } - pda = asmap->qInfo; - if (pda && RF_DEAD_DISK(disks[pda->row][pda->col].status)) { - asmap->numQFailed++; - asmap->failedPDAs[asmap->numFailedPDAs] = pda; - asmap->numFailedPDAs++; - } - } + RF_RaidDisk_t **disks = raidPtr->Disks; + RF_AccessStripeMap_t *asmap; + RF_PhysDiskAddr_t *pda; + + for (asmap = asm_h->stripeMap; asmap; asmap = asmap->next) { + asmap->numDataFailed = asmap->numParityFailed = asmap->numQFailed = 0; + asmap->numFailedPDAs = 0; + bzero((char *) asmap->failedPDAs, + RF_MAX_FAILED_PDA * sizeof(RF_PhysDiskAddr_t *)); + for (pda = asmap->physInfo; pda; pda = pda->next) { + if (RF_DEAD_DISK(disks[pda->row][pda->col].status)) { + printf("DEAD DISK BOGUSLY DETECTED!!\n"); + asmap->numDataFailed++; + asmap->failedPDAs[asmap->numFailedPDAs] = pda; + asmap->numFailedPDAs++; + } + } + pda = asmap->parityInfo; + if (pda && RF_DEAD_DISK(disks[pda->row][pda->col].status)) { + asmap->numParityFailed++; + asmap->failedPDAs[asmap->numFailedPDAs] = pda; + asmap->numFailedPDAs++; + } + pda = asmap->qInfo; + if (pda && RF_DEAD_DISK(disks[pda->row][pda->col].status)) { + asmap->numQFailed++; + asmap->failedPDAs[asmap->numFailedPDAs] = pda; + asmap->numFailedPDAs++; + } + } } - /***************************************************************************************** * * DuplicateASM -- duplicates an ASM and returns the new one * ****************************************************************************************/ -RF_AccessStripeMap_t *rf_DuplicateASM(asmap) - RF_AccessStripeMap_t *asmap; +RF_AccessStripeMap_t * +rf_DuplicateASM(asmap) + RF_AccessStripeMap_t *asmap; { - RF_AccessStripeMap_t *new_asm; - RF_PhysDiskAddr_t *pda, *new_pda, *t_pda; - - new_pda = NULL; - new_asm = rf_AllocAccessStripeMapComponent(); - bcopy((char *)asmap, (char *)new_asm, sizeof(RF_AccessStripeMap_t)); - new_asm->numFailedPDAs = 0; /* ??? */ - new_asm->failedPDAs[0] = NULL; - new_asm->physInfo = NULL; - new_asm->parityInfo = NULL; - new_asm->next = NULL; - - for (pda = asmap->physInfo; pda; pda=pda->next) { /* copy the physInfo list */ - t_pda = rf_AllocPhysDiskAddr(); - bcopy((char *)pda, (char *)t_pda, sizeof(RF_PhysDiskAddr_t)); - t_pda->next = NULL; - if (!new_asm->physInfo) {new_asm->physInfo = t_pda; new_pda = t_pda;} - else {new_pda->next = t_pda; new_pda = new_pda->next;} - if (pda == asmap->failedPDAs[0]) - new_asm->failedPDAs[0] = t_pda; - } - for (pda = asmap->parityInfo; pda; pda=pda->next) { /* copy the parityInfo list */ - t_pda = rf_AllocPhysDiskAddr(); - bcopy((char *)pda, (char *)t_pda, sizeof(RF_PhysDiskAddr_t)); - t_pda->next = NULL; - if (!new_asm->parityInfo) {new_asm->parityInfo = t_pda; new_pda = t_pda;} - else {new_pda->next = t_pda; new_pda = new_pda->next;} - if (pda == asmap->failedPDAs[0]) - new_asm->failedPDAs[0] = t_pda; - } - return(new_asm); + RF_AccessStripeMap_t *new_asm; + RF_PhysDiskAddr_t *pda, *new_pda, *t_pda; + + new_pda = NULL; + new_asm = rf_AllocAccessStripeMapComponent(); + bcopy((char *) asmap, (char *) new_asm, sizeof(RF_AccessStripeMap_t)); + new_asm->numFailedPDAs = 0; /* ??? */ + new_asm->failedPDAs[0] = NULL; + new_asm->physInfo = NULL; + new_asm->parityInfo = NULL; + new_asm->next = NULL; + + for (pda = asmap->physInfo; pda; pda = pda->next) { /* copy the physInfo + * list */ + t_pda = rf_AllocPhysDiskAddr(); + bcopy((char *) pda, (char *) t_pda, sizeof(RF_PhysDiskAddr_t)); + t_pda->next = NULL; + if (!new_asm->physInfo) { + new_asm->physInfo = t_pda; + new_pda = t_pda; + } else { + new_pda->next = t_pda; + new_pda = new_pda->next; + } + if (pda == asmap->failedPDAs[0]) + new_asm->failedPDAs[0] = t_pda; + } + for (pda = asmap->parityInfo; pda; pda = pda->next) { /* copy the parityInfo + * list */ + t_pda = rf_AllocPhysDiskAddr(); + bcopy((char *) pda, (char *) t_pda, sizeof(RF_PhysDiskAddr_t)); + t_pda->next = NULL; + if (!new_asm->parityInfo) { + new_asm->parityInfo = t_pda; + new_pda = t_pda; + } else { + new_pda->next = t_pda; + new_pda = new_pda->next; + } + if (pda == asmap->failedPDAs[0]) + new_asm->failedPDAs[0] = t_pda; + } + return (new_asm); } - /***************************************************************************************** * * DuplicatePDA -- duplicates a PDA and returns the new one * ****************************************************************************************/ -RF_PhysDiskAddr_t *rf_DuplicatePDA(pda) - RF_PhysDiskAddr_t *pda; +RF_PhysDiskAddr_t * +rf_DuplicatePDA(pda) + RF_PhysDiskAddr_t *pda; { - RF_PhysDiskAddr_t *new; + RF_PhysDiskAddr_t *new; - new = rf_AllocPhysDiskAddr(); - bcopy((char *)pda, (char *)new, sizeof(RF_PhysDiskAddr_t)); - return(new); + new = rf_AllocPhysDiskAddr(); + bcopy((char *) pda, (char *) new, sizeof(RF_PhysDiskAddr_t)); + return (new); } - /***************************************************************************************** * * routines to allocate and free list elements. All allocation routines zero the @@ -501,185 +385,218 @@ static RF_FreeList_t *rf_pda_freelist; /* called at shutdown time. So far, all that is necessary is to release all the free lists */ static void rf_ShutdownMapModule(void *); -static void rf_ShutdownMapModule(ignored) - void *ignored; +static void +rf_ShutdownMapModule(ignored) + void *ignored; { - RF_FREELIST_DESTROY(rf_asmhdr_freelist,next,(RF_AccessStripeMapHeader_t *)); - RF_FREELIST_DESTROY(rf_pda_freelist,next,(RF_PhysDiskAddr_t *)); - RF_FREELIST_DESTROY(rf_asm_freelist,next,(RF_AccessStripeMap_t *)); + RF_FREELIST_DESTROY(rf_asmhdr_freelist, next, (RF_AccessStripeMapHeader_t *)); + RF_FREELIST_DESTROY(rf_pda_freelist, next, (RF_PhysDiskAddr_t *)); + RF_FREELIST_DESTROY(rf_asm_freelist, next, (RF_AccessStripeMap_t *)); } -int rf_ConfigureMapModule(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureMapModule(listp) + RF_ShutdownList_t **listp; { - int rc; + int rc; RF_FREELIST_CREATE(rf_asmhdr_freelist, RF_MAX_FREE_ASMHDR, - RF_ASMHDR_INC, sizeof(RF_AccessStripeMapHeader_t)); + RF_ASMHDR_INC, sizeof(RF_AccessStripeMapHeader_t)); if (rf_asmhdr_freelist == NULL) { - return(ENOMEM); + return (ENOMEM); } RF_FREELIST_CREATE(rf_asm_freelist, RF_MAX_FREE_ASM, - RF_ASM_INC, sizeof(RF_AccessStripeMap_t)); + RF_ASM_INC, sizeof(RF_AccessStripeMap_t)); if (rf_asm_freelist == NULL) { - RF_FREELIST_DESTROY(rf_asmhdr_freelist,next,(RF_AccessStripeMapHeader_t *)); - return(ENOMEM); + RF_FREELIST_DESTROY(rf_asmhdr_freelist, next, (RF_AccessStripeMapHeader_t *)); + return (ENOMEM); } RF_FREELIST_CREATE(rf_pda_freelist, RF_MAX_FREE_PDA, - RF_PDA_INC, sizeof(RF_PhysDiskAddr_t)); + RF_PDA_INC, sizeof(RF_PhysDiskAddr_t)); if (rf_pda_freelist == NULL) { - RF_FREELIST_DESTROY(rf_asmhdr_freelist,next,(RF_AccessStripeMapHeader_t *)); - RF_FREELIST_DESTROY(rf_pda_freelist,next,(RF_PhysDiskAddr_t *)); - return(ENOMEM); + RF_FREELIST_DESTROY(rf_asmhdr_freelist, next, (RF_AccessStripeMapHeader_t *)); + RF_FREELIST_DESTROY(rf_pda_freelist, next, (RF_PhysDiskAddr_t *)); + return (ENOMEM); } - rc = rf_ShutdownCreate(listp, rf_ShutdownMapModule, NULL); if (rc) { RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + __LINE__, rc); rf_ShutdownMapModule(NULL); - return(rc); + return (rc); } - - RF_FREELIST_PRIME(rf_asmhdr_freelist, RF_ASMHDR_INITIAL,next, - (RF_AccessStripeMapHeader_t *)); - RF_FREELIST_PRIME(rf_asm_freelist, RF_ASM_INITIAL,next, - (RF_AccessStripeMap_t *)); - RF_FREELIST_PRIME(rf_pda_freelist, RF_PDA_INITIAL,next, - (RF_PhysDiskAddr_t *)); - - return(0); + RF_FREELIST_PRIME(rf_asmhdr_freelist, RF_ASMHDR_INITIAL, next, + (RF_AccessStripeMapHeader_t *)); + RF_FREELIST_PRIME(rf_asm_freelist, RF_ASM_INITIAL, next, + (RF_AccessStripeMap_t *)); + RF_FREELIST_PRIME(rf_pda_freelist, RF_PDA_INITIAL, next, + (RF_PhysDiskAddr_t *)); + + return (0); } -RF_AccessStripeMapHeader_t *rf_AllocAccessStripeMapHeader() +RF_AccessStripeMapHeader_t * +rf_AllocAccessStripeMapHeader() { RF_AccessStripeMapHeader_t *p; - RF_FREELIST_GET(rf_asmhdr_freelist,p,next,(RF_AccessStripeMapHeader_t *)); - bzero((char *)p, sizeof(RF_AccessStripeMapHeader_t)); + RF_FREELIST_GET(rf_asmhdr_freelist, p, next, (RF_AccessStripeMapHeader_t *)); + bzero((char *) p, sizeof(RF_AccessStripeMapHeader_t)); - return(p); + return (p); } -void rf_FreeAccessStripeMapHeader(p) - RF_AccessStripeMapHeader_t *p; +void +rf_FreeAccessStripeMapHeader(p) + RF_AccessStripeMapHeader_t *p; { - RF_FREELIST_FREE(rf_asmhdr_freelist,p,next); + RF_FREELIST_FREE(rf_asmhdr_freelist, p, next); } -RF_PhysDiskAddr_t *rf_AllocPhysDiskAddr() +RF_PhysDiskAddr_t * +rf_AllocPhysDiskAddr() { RF_PhysDiskAddr_t *p; - RF_FREELIST_GET(rf_pda_freelist,p,next,(RF_PhysDiskAddr_t *)); - bzero((char *)p, sizeof(RF_PhysDiskAddr_t)); + RF_FREELIST_GET(rf_pda_freelist, p, next, (RF_PhysDiskAddr_t *)); + bzero((char *) p, sizeof(RF_PhysDiskAddr_t)); - return(p); + return (p); } - /* allocates a list of PDAs, locking the free list only once * when we have to call calloc, we do it one component at a time to simplify * the process of freeing the list at program shutdown. This should not be * much of a performance hit, because it should be very infrequently executed. */ -RF_PhysDiskAddr_t *rf_AllocPDAList(count) - int count; +RF_PhysDiskAddr_t * +rf_AllocPDAList(count) + int count; { RF_PhysDiskAddr_t *p = NULL; - RF_FREELIST_GET_N(rf_pda_freelist,p,next,(RF_PhysDiskAddr_t *),count); - return(p); + RF_FREELIST_GET_N(rf_pda_freelist, p, next, (RF_PhysDiskAddr_t *), count); + return (p); } -void rf_FreePhysDiskAddr(p) - RF_PhysDiskAddr_t *p; +void +rf_FreePhysDiskAddr(p) + RF_PhysDiskAddr_t *p; { - RF_FREELIST_FREE(rf_pda_freelist,p,next); + RF_FREELIST_FREE(rf_pda_freelist, p, next); } -static void rf_FreePDAList(l_start, l_end, count) - RF_PhysDiskAddr_t *l_start, *l_end; /* pointers to start and end of list */ - int count; /* number of elements in list */ +static void +rf_FreePDAList(l_start, l_end, count) + RF_PhysDiskAddr_t *l_start, *l_end; /* pointers to start and end + * of list */ + int count; /* number of elements in list */ { - RF_FREELIST_FREE_N(rf_pda_freelist,l_start,next,(RF_PhysDiskAddr_t *),count); + RF_FREELIST_FREE_N(rf_pda_freelist, l_start, next, (RF_PhysDiskAddr_t *), count); } -RF_AccessStripeMap_t *rf_AllocAccessStripeMapComponent() +RF_AccessStripeMap_t * +rf_AllocAccessStripeMapComponent() { RF_AccessStripeMap_t *p; - RF_FREELIST_GET(rf_asm_freelist,p,next,(RF_AccessStripeMap_t *)); - bzero((char *)p, sizeof(RF_AccessStripeMap_t)); + RF_FREELIST_GET(rf_asm_freelist, p, next, (RF_AccessStripeMap_t *)); + bzero((char *) p, sizeof(RF_AccessStripeMap_t)); - return(p); + return (p); } - /* this is essentially identical to AllocPDAList. I should combine the two. * when we have to call calloc, we do it one component at a time to simplify * the process of freeing the list at program shutdown. This should not be * much of a performance hit, because it should be very infrequently executed. */ -RF_AccessStripeMap_t *rf_AllocASMList(count) - int count; +RF_AccessStripeMap_t * +rf_AllocASMList(count) + int count; { RF_AccessStripeMap_t *p = NULL; - RF_FREELIST_GET_N(rf_asm_freelist,p,next,(RF_AccessStripeMap_t *),count); - return(p); + RF_FREELIST_GET_N(rf_asm_freelist, p, next, (RF_AccessStripeMap_t *), count); + return (p); } -void rf_FreeAccessStripeMapComponent(p) - RF_AccessStripeMap_t *p; +void +rf_FreeAccessStripeMapComponent(p) + RF_AccessStripeMap_t *p; { - RF_FREELIST_FREE(rf_asm_freelist,p,next); + RF_FREELIST_FREE(rf_asm_freelist, p, next); } -static void rf_FreeASMList(l_start, l_end, count) - RF_AccessStripeMap_t *l_start, *l_end; - int count; +static void +rf_FreeASMList(l_start, l_end, count) + RF_AccessStripeMap_t *l_start, *l_end; + int count; { - RF_FREELIST_FREE_N(rf_asm_freelist,l_start,next,(RF_AccessStripeMap_t *),count); + RF_FREELIST_FREE_N(rf_asm_freelist, l_start, next, (RF_AccessStripeMap_t *), count); } -void rf_FreeAccessStripeMap(hdr) - RF_AccessStripeMapHeader_t *hdr; +void +rf_FreeAccessStripeMap(hdr) + RF_AccessStripeMapHeader_t *hdr; { - RF_AccessStripeMap_t *p, *pt = NULL; - RF_PhysDiskAddr_t *pdp, *trailer, *pdaList = NULL, *pdaEnd = NULL; - int count = 0, t, asm_count = 0; - - for (p = hdr->stripeMap; p; p=p->next) { - - /* link the 3 pda lists into the accumulating pda list */ - - if (!pdaList) pdaList = p->qInfo; else pdaEnd->next = p->qInfo; - for (trailer=NULL,pdp=p->qInfo; pdp; ) {trailer = pdp; pdp=pdp->next; count++;} - if (trailer) pdaEnd = trailer; - - if (!pdaList) pdaList = p->parityInfo; else pdaEnd->next = p->parityInfo; - for (trailer=NULL,pdp=p->parityInfo; pdp; ) {trailer = pdp; pdp=pdp->next; count++;} - if (trailer) pdaEnd = trailer; - - if (!pdaList) pdaList = p->physInfo; else pdaEnd->next = p->physInfo; - for (trailer=NULL,pdp=p->physInfo; pdp; ) {trailer = pdp; pdp=pdp->next; count++;} - if (trailer) pdaEnd = trailer; - - pt = p; - asm_count++; - } - - /* debug only */ - for (t=0,pdp=pdaList; pdp; pdp=pdp->next) - t++; - RF_ASSERT(t == count); - - if (pdaList) - rf_FreePDAList(pdaList, pdaEnd, count); - rf_FreeASMList(hdr->stripeMap, pt, asm_count); - rf_FreeAccessStripeMapHeader(hdr); -} + RF_AccessStripeMap_t *p, *pt = NULL; + RF_PhysDiskAddr_t *pdp, *trailer, *pdaList = NULL, *pdaEnd = NULL; + int count = 0, t, asm_count = 0; + + for (p = hdr->stripeMap; p; p = p->next) { + + /* link the 3 pda lists into the accumulating pda list */ + + if (!pdaList) + pdaList = p->qInfo; + else + pdaEnd->next = p->qInfo; + for (trailer = NULL, pdp = p->qInfo; pdp;) { + trailer = pdp; + pdp = pdp->next; + count++; + } + if (trailer) + pdaEnd = trailer; + + if (!pdaList) + pdaList = p->parityInfo; + else + pdaEnd->next = p->parityInfo; + for (trailer = NULL, pdp = p->parityInfo; pdp;) { + trailer = pdp; + pdp = pdp->next; + count++; + } + if (trailer) + pdaEnd = trailer; + + if (!pdaList) + pdaList = p->physInfo; + else + pdaEnd->next = p->physInfo; + for (trailer = NULL, pdp = p->physInfo; pdp;) { + trailer = pdp; + pdp = pdp->next; + count++; + } + if (trailer) + pdaEnd = trailer; + + pt = p; + asm_count++; + } + /* debug only */ + for (t = 0, pdp = pdaList; pdp; pdp = pdp->next) + t++; + RF_ASSERT(t == count); + + if (pdaList) + rf_FreePDAList(pdaList, pdaEnd, count); + rf_FreeASMList(hdr->stripeMap, pt, asm_count); + rf_FreeAccessStripeMapHeader(hdr); +} /* We can't use the large write optimization if there are any failures in the stripe. * In the declustered layout, there is no way to immediately determine what disks * constitute a stripe, so we actually have to hunt through the stripe looking for failures. @@ -689,86 +606,87 @@ void rf_FreeAccessStripeMap(hdr) * * ASSUMES AT MOST ONE FAILURE IN THE STRIPE. */ -int rf_CheckStripeForFailures(raidPtr, asmap) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *asmap; +int +rf_CheckStripeForFailures(raidPtr, asmap) + RF_Raid_t *raidPtr; + RF_AccessStripeMap_t *asmap; { - RF_RowCol_t trow, tcol, prow, pcol, *diskids, row, i; - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_StripeCount_t stripeOffset; - int numFailures; - RF_RaidAddr_t sosAddr; - RF_SectorNum_t diskOffset, poffset; - RF_RowCol_t testrow; - - /* quick out in the fault-free case. */ - RF_LOCK_MUTEX(raidPtr->mutex); - numFailures = raidPtr->numFailures; - RF_UNLOCK_MUTEX(raidPtr->mutex); - if (numFailures == 0) return(0); - - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - row = asmap->physInfo->row; - (layoutPtr->map->IdentifyStripe)(raidPtr, asmap->raidAddress, &diskids, &testrow); - (layoutPtr->map->MapParity)(raidPtr, asmap->raidAddress, &prow, &pcol, &poffset, 0); /* get pcol */ - - /* this need not be true if we've redirected the access to a spare in another row - RF_ASSERT(row == testrow); - */ - stripeOffset = 0; - for (i=0; i<layoutPtr->numDataCol+layoutPtr->numParityCol; i++) { - if (diskids[i] != pcol) { - if (RF_DEAD_DISK(raidPtr->Disks[testrow][diskids[i]].status)) { - if (raidPtr->status[testrow] != rf_rs_reconstructing) - return(1); - RF_ASSERT(raidPtr->reconControl[testrow]->fcol == diskids[i]); - layoutPtr->map->MapSector(raidPtr, - sosAddr + stripeOffset * layoutPtr->sectorsPerStripeUnit, - &trow, &tcol, &diskOffset, 0); - RF_ASSERT( (trow == testrow) && (tcol == diskids[i]) ); - if (!rf_CheckRUReconstructed(raidPtr->reconControl[testrow]->reconMap, diskOffset)) - return(1); - asmap->flags |= RF_ASM_REDIR_LARGE_WRITE; - return(0); - } - stripeOffset++; - } - } - return(0); + RF_RowCol_t trow, tcol, prow, pcol, *diskids, row, i; + RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; + RF_StripeCount_t stripeOffset; + int numFailures; + RF_RaidAddr_t sosAddr; + RF_SectorNum_t diskOffset, poffset; + RF_RowCol_t testrow; + + /* quick out in the fault-free case. */ + RF_LOCK_MUTEX(raidPtr->mutex); + numFailures = raidPtr->numFailures; + RF_UNLOCK_MUTEX(raidPtr->mutex); + if (numFailures == 0) + return (0); + + sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); + row = asmap->physInfo->row; + (layoutPtr->map->IdentifyStripe) (raidPtr, asmap->raidAddress, &diskids, &testrow); + (layoutPtr->map->MapParity) (raidPtr, asmap->raidAddress, &prow, &pcol, &poffset, 0); /* get pcol */ + + /* this need not be true if we've redirected the access to a spare in + * another row RF_ASSERT(row == testrow); */ + stripeOffset = 0; + for (i = 0; i < layoutPtr->numDataCol + layoutPtr->numParityCol; i++) { + if (diskids[i] != pcol) { + if (RF_DEAD_DISK(raidPtr->Disks[testrow][diskids[i]].status)) { + if (raidPtr->status[testrow] != rf_rs_reconstructing) + return (1); + RF_ASSERT(raidPtr->reconControl[testrow]->fcol == diskids[i]); + layoutPtr->map->MapSector(raidPtr, + sosAddr + stripeOffset * layoutPtr->sectorsPerStripeUnit, + &trow, &tcol, &diskOffset, 0); + RF_ASSERT((trow == testrow) && (tcol == diskids[i])); + if (!rf_CheckRUReconstructed(raidPtr->reconControl[testrow]->reconMap, diskOffset)) + return (1); + asmap->flags |= RF_ASM_REDIR_LARGE_WRITE; + return (0); + } + stripeOffset++; + } + } + return (0); } - /* return the number of failed data units in the stripe. */ -int rf_NumFailedDataUnitsInStripe(raidPtr, asmap) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *asmap; +int +rf_NumFailedDataUnitsInStripe(raidPtr, asmap) + RF_Raid_t *raidPtr; + RF_AccessStripeMap_t *asmap; { - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_RowCol_t trow, tcol, row, i; - RF_SectorNum_t diskOffset; - RF_RaidAddr_t sosAddr; - int numFailures; - - /* quick out in the fault-free case. */ - RF_LOCK_MUTEX(raidPtr->mutex); - numFailures = raidPtr->numFailures; - RF_UNLOCK_MUTEX(raidPtr->mutex); - if (numFailures == 0) return(0); - numFailures = 0; - - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - row = asmap->physInfo->row; - for (i=0; i<layoutPtr->numDataCol; i++) - { - (layoutPtr->map->MapSector)(raidPtr, sosAddr + i * layoutPtr->sectorsPerStripeUnit, - &trow, &tcol, &diskOffset, 0); - if (RF_DEAD_DISK(raidPtr->Disks[trow][tcol].status)) - numFailures++; - } - - return numFailures; + RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; + RF_RowCol_t trow, tcol, row, i; + RF_SectorNum_t diskOffset; + RF_RaidAddr_t sosAddr; + int numFailures; + + /* quick out in the fault-free case. */ + RF_LOCK_MUTEX(raidPtr->mutex); + numFailures = raidPtr->numFailures; + RF_UNLOCK_MUTEX(raidPtr->mutex); + if (numFailures == 0) + return (0); + numFailures = 0; + + sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); + row = asmap->physInfo->row; + for (i = 0; i < layoutPtr->numDataCol; i++) { + (layoutPtr->map->MapSector) (raidPtr, sosAddr + i * layoutPtr->sectorsPerStripeUnit, + &trow, &tcol, &diskOffset, 0); + if (RF_DEAD_DISK(raidPtr->Disks[trow][tcol].status)) + numFailures++; + } + + return numFailures; } @@ -778,199 +696,214 @@ int rf_NumFailedDataUnitsInStripe(raidPtr, asmap) * ****************************************************************************************/ -void rf_PrintAccessStripeMap(asm_h) - RF_AccessStripeMapHeader_t *asm_h; +void +rf_PrintAccessStripeMap(asm_h) + RF_AccessStripeMapHeader_t *asm_h; { - rf_PrintFullAccessStripeMap(asm_h, 0); + rf_PrintFullAccessStripeMap(asm_h, 0); } -void rf_PrintFullAccessStripeMap(asm_h, prbuf) - RF_AccessStripeMapHeader_t *asm_h; - int prbuf; /* flag to print buffer pointers */ +void +rf_PrintFullAccessStripeMap(asm_h, prbuf) + RF_AccessStripeMapHeader_t *asm_h; + int prbuf; /* flag to print buffer pointers */ { - int i; - RF_AccessStripeMap_t *asmap = asm_h->stripeMap; - RF_PhysDiskAddr_t *p; - printf("%d stripes total\n", (int)asm_h->numStripes); - for (; asmap; asmap = asmap->next) { - /* printf("Num failures: %d\n",asmap->numDataFailed); */ - /* printf("Num sectors: %d\n",(int)asmap->totalSectorsAccessed); */ - printf("Stripe %d (%d sectors), failures: %d data, %d parity: ", - (int) asmap->stripeID, - (int) asmap->totalSectorsAccessed, - (int) asmap->numDataFailed, - (int) asmap->numParityFailed); - if (asmap->parityInfo) { - printf("Parity [r%d c%d s%d-%d", asmap->parityInfo->row, asmap->parityInfo->col, - (int)asmap->parityInfo->startSector, - (int)(asmap->parityInfo->startSector + - asmap->parityInfo->numSector - 1)); - if (prbuf) printf(" b0x%lx",(unsigned long) asmap->parityInfo->bufPtr); - if (asmap->parityInfo->next) { - printf(", r%d c%d s%d-%d", asmap->parityInfo->next->row, - asmap->parityInfo->next->col, - (int) asmap->parityInfo->next->startSector, - (int)(asmap->parityInfo->next->startSector + - asmap->parityInfo->next->numSector - 1)); - if (prbuf) printf(" b0x%lx",(unsigned long) asmap->parityInfo->next->bufPtr); - RF_ASSERT(asmap->parityInfo->next->next == NULL); - } - printf("]\n\t"); - } - for (i=0,p=asmap->physInfo; p; p=p->next,i++) { - printf("SU r%d c%d s%d-%d ", p->row, p->col, (int)p->startSector, - (int)(p->startSector + p->numSector - 1)); - if (prbuf) printf("b0x%lx ", (unsigned long) p->bufPtr); - if (i && !(i&1)) printf("\n\t"); - } - printf("\n"); - p = asm_h->stripeMap->failedPDAs[0]; - if (asm_h->stripeMap->numDataFailed + asm_h->stripeMap->numParityFailed > 1) printf("[multiple failures]\n"); - else if (asm_h->stripeMap->numDataFailed + asm_h->stripeMap->numParityFailed > 0) - printf("\t[Failed PDA: r%d c%d s%d-%d]\n",p->row, p->col, - (int)p->startSector, (int)(p->startSector + p->numSector-1)); - } + int i; + RF_AccessStripeMap_t *asmap = asm_h->stripeMap; + RF_PhysDiskAddr_t *p; + printf("%d stripes total\n", (int) asm_h->numStripes); + for (; asmap; asmap = asmap->next) { + /* printf("Num failures: %d\n",asmap->numDataFailed); */ + /* printf("Num sectors: + * %d\n",(int)asmap->totalSectorsAccessed); */ + printf("Stripe %d (%d sectors), failures: %d data, %d parity: ", + (int) asmap->stripeID, + (int) asmap->totalSectorsAccessed, + (int) asmap->numDataFailed, + (int) asmap->numParityFailed); + if (asmap->parityInfo) { + printf("Parity [r%d c%d s%d-%d", asmap->parityInfo->row, asmap->parityInfo->col, + (int) asmap->parityInfo->startSector, + (int) (asmap->parityInfo->startSector + + asmap->parityInfo->numSector - 1)); + if (prbuf) + printf(" b0x%lx", (unsigned long) asmap->parityInfo->bufPtr); + if (asmap->parityInfo->next) { + printf(", r%d c%d s%d-%d", asmap->parityInfo->next->row, + asmap->parityInfo->next->col, + (int) asmap->parityInfo->next->startSector, + (int) (asmap->parityInfo->next->startSector + + asmap->parityInfo->next->numSector - 1)); + if (prbuf) + printf(" b0x%lx", (unsigned long) asmap->parityInfo->next->bufPtr); + RF_ASSERT(asmap->parityInfo->next->next == NULL); + } + printf("]\n\t"); + } + for (i = 0, p = asmap->physInfo; p; p = p->next, i++) { + printf("SU r%d c%d s%d-%d ", p->row, p->col, (int) p->startSector, + (int) (p->startSector + p->numSector - 1)); + if (prbuf) + printf("b0x%lx ", (unsigned long) p->bufPtr); + if (i && !(i & 1)) + printf("\n\t"); + } + printf("\n"); + p = asm_h->stripeMap->failedPDAs[0]; + if (asm_h->stripeMap->numDataFailed + asm_h->stripeMap->numParityFailed > 1) + printf("[multiple failures]\n"); + else + if (asm_h->stripeMap->numDataFailed + asm_h->stripeMap->numParityFailed > 0) + printf("\t[Failed PDA: r%d c%d s%d-%d]\n", p->row, p->col, + (int) p->startSector, (int) (p->startSector + p->numSector - 1)); + } } -void rf_PrintRaidAddressInfo(raidPtr, raidAddr, numBlocks) - RF_Raid_t *raidPtr; - RF_RaidAddr_t raidAddr; - RF_SectorCount_t numBlocks; +void +rf_PrintRaidAddressInfo(raidPtr, raidAddr, numBlocks) + RF_Raid_t *raidPtr; + RF_RaidAddr_t raidAddr; + RF_SectorCount_t numBlocks; { - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_RaidAddr_t ra, sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr); - - printf("Raid addrs of SU boundaries from start of stripe to end of access:\n\t"); - for (ra = sosAddr; ra <= raidAddr + numBlocks; ra += layoutPtr->sectorsPerStripeUnit) { - printf("%d (0x%x), ",(int)ra, (int)ra); - } - printf("\n"); - printf("Offset into stripe unit: %d (0x%x)\n", - (int)(raidAddr % layoutPtr->sectorsPerStripeUnit), - (int)(raidAddr % layoutPtr->sectorsPerStripeUnit)); -} + RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; + RF_RaidAddr_t ra, sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr); + printf("Raid addrs of SU boundaries from start of stripe to end of access:\n\t"); + for (ra = sosAddr; ra <= raidAddr + numBlocks; ra += layoutPtr->sectorsPerStripeUnit) { + printf("%d (0x%x), ", (int) ra, (int) ra); + } + printf("\n"); + printf("Offset into stripe unit: %d (0x%x)\n", + (int) (raidAddr % layoutPtr->sectorsPerStripeUnit), + (int) (raidAddr % layoutPtr->sectorsPerStripeUnit)); +} /* given a parity descriptor and the starting address within a stripe, range restrict the parity descriptor to touch only the correct stuff. */ -void rf_ASMParityAdjust( - RF_PhysDiskAddr_t *toAdjust, - RF_StripeNum_t startAddrWithinStripe, - RF_SectorNum_t endAddress, - RF_RaidLayout_t *layoutPtr, - RF_AccessStripeMap_t *asm_p) +void +rf_ASMParityAdjust( + RF_PhysDiskAddr_t * toAdjust, + RF_StripeNum_t startAddrWithinStripe, + RF_SectorNum_t endAddress, + RF_RaidLayout_t * layoutPtr, + RF_AccessStripeMap_t * asm_p) { - RF_PhysDiskAddr_t *new_pda; - - /* when we're accessing only a portion of one stripe unit, we want the parity descriptor - * to identify only the chunk of parity associated with the data. When the access spans - * exactly one stripe unit boundary and is less than a stripe unit in size, it uses two disjoint - * regions of the parity unit. When an access spans more than one stripe unit boundary, it - * uses all of the parity unit. - * - * To better handle the case where stripe units are small, we may eventually want to change - * the 2nd case so that if the SU size is below some threshold, we just read/write the whole - * thing instead of breaking it up into two accesses. - */ - if (asm_p->numStripeUnitsAccessed == 1) - { - int x = (startAddrWithinStripe % layoutPtr->sectorsPerStripeUnit); - toAdjust->startSector += x; - toAdjust->raidAddress += x; - toAdjust->numSector = asm_p->physInfo->numSector; - RF_ASSERT(toAdjust->numSector != 0); - } - else - if (asm_p->numStripeUnitsAccessed == 2 && asm_p->totalSectorsAccessed < layoutPtr->sectorsPerStripeUnit) - { - int x = (startAddrWithinStripe % layoutPtr->sectorsPerStripeUnit); - - /* create a second pda and copy the parity map info into it */ - RF_ASSERT(toAdjust->next == NULL); - new_pda = toAdjust->next = rf_AllocPhysDiskAddr(); - *new_pda = *toAdjust; /* structure assignment */ - new_pda->next = NULL; - - /* adjust the start sector & number of blocks for the first parity pda */ - toAdjust->startSector += x; - toAdjust->raidAddress += x; - toAdjust->numSector = rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, startAddrWithinStripe) - startAddrWithinStripe; - RF_ASSERT(toAdjust->numSector != 0); - - /* adjust the second pda */ - new_pda->numSector = endAddress - rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, endAddress); - /*new_pda->raidAddress = rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, toAdjust->raidAddress);*/ - RF_ASSERT(new_pda->numSector != 0); - } + RF_PhysDiskAddr_t *new_pda; + + /* when we're accessing only a portion of one stripe unit, we want the + * parity descriptor to identify only the chunk of parity associated + * with the data. When the access spans exactly one stripe unit + * boundary and is less than a stripe unit in size, it uses two + * disjoint regions of the parity unit. When an access spans more + * than one stripe unit boundary, it uses all of the parity unit. + * + * To better handle the case where stripe units are small, we may + * eventually want to change the 2nd case so that if the SU size is + * below some threshold, we just read/write the whole thing instead of + * breaking it up into two accesses. */ + if (asm_p->numStripeUnitsAccessed == 1) { + int x = (startAddrWithinStripe % layoutPtr->sectorsPerStripeUnit); + toAdjust->startSector += x; + toAdjust->raidAddress += x; + toAdjust->numSector = asm_p->physInfo->numSector; + RF_ASSERT(toAdjust->numSector != 0); + } else + if (asm_p->numStripeUnitsAccessed == 2 && asm_p->totalSectorsAccessed < layoutPtr->sectorsPerStripeUnit) { + int x = (startAddrWithinStripe % layoutPtr->sectorsPerStripeUnit); + + /* create a second pda and copy the parity map info + * into it */ + RF_ASSERT(toAdjust->next == NULL); + new_pda = toAdjust->next = rf_AllocPhysDiskAddr(); + *new_pda = *toAdjust; /* structure assignment */ + new_pda->next = NULL; + + /* adjust the start sector & number of blocks for the + * first parity pda */ + toAdjust->startSector += x; + toAdjust->raidAddress += x; + toAdjust->numSector = rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, startAddrWithinStripe) - startAddrWithinStripe; + RF_ASSERT(toAdjust->numSector != 0); + + /* adjust the second pda */ + new_pda->numSector = endAddress - rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, endAddress); + /* new_pda->raidAddress = + * rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, + * toAdjust->raidAddress); */ + RF_ASSERT(new_pda->numSector != 0); + } } - /* Check if a disk has been spared or failed. If spared, - redirect the I/O. + redirect the I/O. If it has been failed, record it in the asm pointer. Fourth arg is whether data or parity. */ -void rf_ASMCheckStatus( - RF_Raid_t *raidPtr, - RF_PhysDiskAddr_t *pda_p, - RF_AccessStripeMap_t *asm_p, - RF_RaidDisk_t **disks, - int parity) +void +rf_ASMCheckStatus( + RF_Raid_t * raidPtr, + RF_PhysDiskAddr_t * pda_p, + RF_AccessStripeMap_t * asm_p, + RF_RaidDisk_t ** disks, + int parity) { - RF_DiskStatus_t dstatus; - RF_RowCol_t frow, fcol; - - dstatus = disks[pda_p->row][pda_p->col].status; - - if (dstatus == rf_ds_spared) { - /* if the disk has been spared, redirect access to the spare */ - frow = pda_p->row; fcol = pda_p->col; - pda_p->row = disks[frow][fcol].spareRow; - pda_p->col = disks[frow][fcol].spareCol; - } - else if (dstatus == rf_ds_dist_spared) { - /* ditto if disk has been spared to dist spare space */ - RF_RowCol_t or = pda_p->row, oc=pda_p->col; - RF_SectorNum_t oo = pda_p->startSector; - - if (pda_p -> type == RF_PDA_TYPE_DATA) - raidPtr->Layout.map->MapSector(raidPtr, pda_p->raidAddress, &pda_p->row, &pda_p->col, &pda_p->startSector, RF_REMAP); - else - raidPtr->Layout.map->MapParity(raidPtr, pda_p->raidAddress, &pda_p->row, &pda_p->col, &pda_p->startSector, RF_REMAP); - - if (rf_mapDebug) { - printf("Redirected r %d c %d o %d -> r%d c %d o %d\n",or,oc,(int)oo, - pda_p->row,pda_p->col,(int)pda_p->startSector); - } - } else if (RF_DEAD_DISK(dstatus)) { - /* if the disk is inaccessible, mark the failure */ - if (parity) - asm_p->numParityFailed++; - else { - asm_p->numDataFailed++; + RF_DiskStatus_t dstatus; + RF_RowCol_t frow, fcol; + + dstatus = disks[pda_p->row][pda_p->col].status; + + if (dstatus == rf_ds_spared) { + /* if the disk has been spared, redirect access to the spare */ + frow = pda_p->row; + fcol = pda_p->col; + pda_p->row = disks[frow][fcol].spareRow; + pda_p->col = disks[frow][fcol].spareCol; + } else + if (dstatus == rf_ds_dist_spared) { + /* ditto if disk has been spared to dist spare space */ + RF_RowCol_t or = pda_p->row, oc = pda_p->col; + RF_SectorNum_t oo = pda_p->startSector; + + if (pda_p->type == RF_PDA_TYPE_DATA) + raidPtr->Layout.map->MapSector(raidPtr, pda_p->raidAddress, &pda_p->row, &pda_p->col, &pda_p->startSector, RF_REMAP); + else + raidPtr->Layout.map->MapParity(raidPtr, pda_p->raidAddress, &pda_p->row, &pda_p->col, &pda_p->startSector, RF_REMAP); + + if (rf_mapDebug) { + printf("Redirected r %d c %d o %d -> r%d c %d o %d\n", or, oc, (int) oo, + pda_p->row, pda_p->col, (int) pda_p->startSector); + } + } else + if (RF_DEAD_DISK(dstatus)) { + /* if the disk is inaccessible, mark the + * failure */ + if (parity) + asm_p->numParityFailed++; + else { + asm_p->numDataFailed++; #if 0 - /* XXX Do we really want this spewing out on the console? GO */ - printf("DATA_FAILED!\n"); + /* XXX Do we really want this spewing + * out on the console? GO */ + printf("DATA_FAILED!\n"); #endif - } - asm_p->failedPDAs[asm_p->numFailedPDAs] = pda_p; - asm_p->numFailedPDAs++; + } + asm_p->failedPDAs[asm_p->numFailedPDAs] = pda_p; + asm_p->numFailedPDAs++; #if 0 - switch (asm_p->numParityFailed + asm_p->numDataFailed) - { - case 1: - asm_p->failedPDAs[0] = pda_p; - break; - case 2: - asm_p->failedPDAs[1] = pda_p; - default: - break; - } + switch (asm_p->numParityFailed + asm_p->numDataFailed) { + case 1: + asm_p->failedPDAs[0] = pda_p; + break; + case 2: + asm_p->failedPDAs[1] = pda_p; + default: + break; + } #endif - } - /* the redirected access should never span a stripe unit boundary */ - RF_ASSERT(rf_RaidAddressToStripeUnitID(&raidPtr->Layout,pda_p->raidAddress) == - rf_RaidAddressToStripeUnitID(&raidPtr->Layout,pda_p->raidAddress + pda_p->numSector -1)); - RF_ASSERT(pda_p->col != -1); + } + /* the redirected access should never span a stripe unit boundary */ + RF_ASSERT(rf_RaidAddressToStripeUnitID(&raidPtr->Layout, pda_p->raidAddress) == + rf_RaidAddressToStripeUnitID(&raidPtr->Layout, pda_p->raidAddress + pda_p->numSector - 1)); + RF_ASSERT(pda_p->col != -1); } diff --git a/sys/dev/raidframe/rf_map.h b/sys/dev/raidframe/rf_map.h index 827de180b51..95b2d243083 100644 --- a/sys/dev/raidframe/rf_map.h +++ b/sys/dev/raidframe/rf_map.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_map.h,v 1.1 1999/01/11 14:29:29 niklas Exp $ */ -/* $NetBSD: rf_map.h,v 1.1 1998/11/13 04:20:31 oster Exp $ */ +/* $OpenBSD: rf_map.h,v 1.2 1999/02/16 00:02:56 niklas Exp $ */ +/* $NetBSD: rf_map.h,v 1.3 1999/02/05 00:06:12 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,51 +29,6 @@ /* rf_map.h */ -/* : - * Log: rf_map.h,v - * Revision 1.9 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.8 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.7 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.6 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.5 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.4 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.3 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.2 1995/12/01 19:25:14 root - * added copyright info - * - */ - #ifndef _RF__RF_MAP_H_ #define _RF__RF_MAP_H_ @@ -82,53 +37,58 @@ #include "rf_raid.h" /* mapping structure allocation and free routines */ -RF_AccessStripeMapHeader_t *rf_MapAccess(RF_Raid_t *raidPtr, - RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks, - caddr_t buffer, int remap); +RF_AccessStripeMapHeader_t * +rf_MapAccess(RF_Raid_t * raidPtr, + RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks, + caddr_t buffer, int remap); -void rf_MarkFailuresInASMList(RF_Raid_t *raidPtr, - RF_AccessStripeMapHeader_t *asm_h); +void +rf_MarkFailuresInASMList(RF_Raid_t * raidPtr, + RF_AccessStripeMapHeader_t * asm_h); -RF_AccessStripeMap_t *rf_DuplicateASM(RF_AccessStripeMap_t *asmap); +RF_AccessStripeMap_t *rf_DuplicateASM(RF_AccessStripeMap_t * asmap); -RF_PhysDiskAddr_t *rf_DuplicatePDA(RF_PhysDiskAddr_t *pda); +RF_PhysDiskAddr_t *rf_DuplicatePDA(RF_PhysDiskAddr_t * pda); -int rf_ConfigureMapModule(RF_ShutdownList_t **listp); +int rf_ConfigureMapModule(RF_ShutdownList_t ** listp); RF_AccessStripeMapHeader_t *rf_AllocAccessStripeMapHeader(void); -void rf_FreeAccessStripeMapHeader(RF_AccessStripeMapHeader_t *p); +void rf_FreeAccessStripeMapHeader(RF_AccessStripeMapHeader_t * p); RF_PhysDiskAddr_t *rf_AllocPhysDiskAddr(void); RF_PhysDiskAddr_t *rf_AllocPDAList(int count); -void rf_FreePhysDiskAddr(RF_PhysDiskAddr_t *p); +void rf_FreePhysDiskAddr(RF_PhysDiskAddr_t * p); RF_AccessStripeMap_t *rf_AllocAccessStripeMapComponent(void); RF_AccessStripeMap_t *rf_AllocASMList(int count); -void rf_FreeAccessStripeMapComponent(RF_AccessStripeMap_t *p); +void rf_FreeAccessStripeMapComponent(RF_AccessStripeMap_t * p); -void rf_FreeAccessStripeMap(RF_AccessStripeMapHeader_t *hdr); +void rf_FreeAccessStripeMap(RF_AccessStripeMapHeader_t * hdr); -int rf_CheckStripeForFailures(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap); +int rf_CheckStripeForFailures(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap); -int rf_NumFailedDataUnitsInStripe(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap); +int rf_NumFailedDataUnitsInStripe(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap); -void rf_PrintAccessStripeMap(RF_AccessStripeMapHeader_t *asm_h); +void rf_PrintAccessStripeMap(RF_AccessStripeMapHeader_t * asm_h); -void rf_PrintFullAccessStripeMap(RF_AccessStripeMapHeader_t *asm_h, int prbuf); +void rf_PrintFullAccessStripeMap(RF_AccessStripeMapHeader_t * asm_h, int prbuf); -void rf_PrintRaidAddressInfo(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr, - RF_SectorCount_t numBlocks); +void +rf_PrintRaidAddressInfo(RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr, + RF_SectorCount_t numBlocks); -void rf_ASMParityAdjust(RF_PhysDiskAddr_t *toAdjust, - RF_StripeNum_t startAddrWithinStripe, RF_SectorNum_t endAddress, - RF_RaidLayout_t *layoutPtr, RF_AccessStripeMap_t *asm_p); +void +rf_ASMParityAdjust(RF_PhysDiskAddr_t * toAdjust, + RF_StripeNum_t startAddrWithinStripe, RF_SectorNum_t endAddress, + RF_RaidLayout_t * layoutPtr, RF_AccessStripeMap_t * asm_p); -void rf_ASMCheckStatus(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda_p, - RF_AccessStripeMap_t *asm_p, RF_RaidDisk_t **disks, int parity); +void +rf_ASMCheckStatus(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * pda_p, + RF_AccessStripeMap_t * asm_p, RF_RaidDisk_t ** disks, int parity); -#endif /* !_RF__RF_MAP_H_ */ +#endif /* !_RF__RF_MAP_H_ */ diff --git a/sys/dev/raidframe/rf_mcpair.c b/sys/dev/raidframe/rf_mcpair.c index 4ed3a187b1c..5b39b182332 100644 --- a/sys/dev/raidframe/rf_mcpair.c +++ b/sys/dev/raidframe/rf_mcpair.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_mcpair.c,v 1.1 1999/01/11 14:29:29 niklas Exp $ */ -/* $NetBSD: rf_mcpair.c,v 1.1 1998/11/13 04:20:31 oster Exp $ */ +/* $OpenBSD: rf_mcpair.c,v 1.2 1999/02/16 00:02:56 niklas Exp $ */ +/* $NetBSD: rf_mcpair.c,v 1.3 1999/02/05 00:06:13 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -32,53 +32,6 @@ * it's used to block the current thread until some event occurs. */ -/* : - * Log: rf_mcpair.c,v - * Revision 1.16 1996/06/19 22:23:01 jimz - * parity verification is now a layout-configurable thing - * not all layouts currently support it (correctly, anyway) - * - * Revision 1.15 1996/06/17 03:18:04 jimz - * include shutdown.h for macroized ShutdownCreate - * - * Revision 1.14 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.13 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.12 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.11 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.10 1996/05/20 16:15:22 jimz - * switch to rf_{mutex,cond}_{init,destroy} - * - * Revision 1.9 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.8 1996/05/16 16:04:42 jimz - * convert to return-val on FREELIST init - * - * Revision 1.7 1996/05/16 14:47:21 jimz - * rewrote to use RF_FREELIST - * - * Revision 1.6 1995/12/01 19:25:43 root - * added copyright info - * - */ - #include "rf_types.h" #include "rf_threadstuff.h" #include "rf_mcpair.h" @@ -86,9 +39,7 @@ #include "rf_freelist.h" #include "rf_shutdown.h" -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) #include <sys/proc.h> -#endif static RF_FreeList_t *rf_mcpair_freelist; @@ -101,100 +52,97 @@ static void clean_mcpair(RF_MCPair_t *); static void rf_ShutdownMCPair(void *); - -static int init_mcpair(t) - RF_MCPair_t *t; +static int +init_mcpair(t) + RF_MCPair_t *t; { - int rc; + int rc; rc = rf_mutex_init(&t->mutex); if (rc) { RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return(rc); + __LINE__, rc); + return (rc); } rc = rf_cond_init(&t->cond); if (rc) { RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + __LINE__, rc); rf_mutex_destroy(&t->mutex); - return(rc); + return (rc); } - return(0); + return (0); } -static void clean_mcpair(t) - RF_MCPair_t *t; +static void +clean_mcpair(t) + RF_MCPair_t *t; { rf_mutex_destroy(&t->mutex); rf_cond_destroy(&t->cond); } -static void rf_ShutdownMCPair(ignored) - void *ignored; +static void +rf_ShutdownMCPair(ignored) + void *ignored; { - RF_FREELIST_DESTROY_CLEAN(rf_mcpair_freelist,next,(RF_MCPair_t *),clean_mcpair); + RF_FREELIST_DESTROY_CLEAN(rf_mcpair_freelist, next, (RF_MCPair_t *), clean_mcpair); } -int rf_ConfigureMCPair(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureMCPair(listp) + RF_ShutdownList_t **listp; { - int rc; + int rc; RF_FREELIST_CREATE(rf_mcpair_freelist, RF_MAX_FREE_MCPAIR, - RF_MCPAIR_INC, sizeof(RF_MCPair_t)); + RF_MCPAIR_INC, sizeof(RF_MCPair_t)); rc = rf_ShutdownCreate(listp, rf_ShutdownMCPair, NULL); if (rc) { RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); + __FILE__, __LINE__, rc); rf_ShutdownMCPair(NULL); - return(rc); + return (rc); } - RF_FREELIST_PRIME_INIT(rf_mcpair_freelist, RF_MCPAIR_INITIAL,next, - (RF_MCPair_t *),init_mcpair); - return(0); + RF_FREELIST_PRIME_INIT(rf_mcpair_freelist, RF_MCPAIR_INITIAL, next, + (RF_MCPair_t *), init_mcpair); + return (0); } -RF_MCPair_t *rf_AllocMCPair() +RF_MCPair_t * +rf_AllocMCPair() { RF_MCPair_t *t; - RF_FREELIST_GET_INIT(rf_mcpair_freelist,t,next,(RF_MCPair_t *),init_mcpair); + RF_FREELIST_GET_INIT(rf_mcpair_freelist, t, next, (RF_MCPair_t *), init_mcpair); if (t) { t->flag = 0; t->next = NULL; } - return(t); + return (t); } -void rf_FreeMCPair(t) - RF_MCPair_t *t; +void +rf_FreeMCPair(t) + RF_MCPair_t *t; { - RF_FREELIST_FREE_CLEAN(rf_mcpair_freelist,t,next,clean_mcpair); + RF_FREELIST_FREE_CLEAN(rf_mcpair_freelist, t, next, clean_mcpair); } - /* the callback function used to wake you up when you use an mcpair to wait for something */ -void rf_MCPairWakeupFunc(mcpair) - RF_MCPair_t *mcpair; +void +rf_MCPairWakeupFunc(mcpair) + RF_MCPair_t *mcpair; { RF_LOCK_MUTEX(mcpair->mutex); mcpair->flag = 1; #if 0 -printf("MCPairWakeupFunc called!\n"); + printf("MCPairWakeupFunc called!\n"); #endif -#ifdef KERNEL - wakeup(&(mcpair->flag)); /* XXX Does this do anything useful!! GO */ - /* - * XXX Looks like the following is needed to truly get the + wakeup(&(mcpair->flag));/* XXX Does this do anything useful!! GO */ + /* XXX Looks like the following is needed to truly get the * functionality they were looking for here... This could be a * side-effect of my using a tsleep in the Net- and OpenBSD port - * though... XXX - */ -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) - wakeup(&(mcpair->cond)); /* XXX XXX XXX GO */ -#endif -#else /* KERNEL */ - RF_SIGNAL_COND(mcpair->cond); -#endif /* KERNEL */ + * though... XXX */ + wakeup(&(mcpair->cond));/* XXX XXX XXX GO */ RF_UNLOCK_MUTEX(mcpair->mutex); } diff --git a/sys/dev/raidframe/rf_mcpair.h b/sys/dev/raidframe/rf_mcpair.h index 852b85ad041..b3b91a2bb95 100644 --- a/sys/dev/raidframe/rf_mcpair.h +++ b/sys/dev/raidframe/rf_mcpair.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_mcpair.h,v 1.1 1999/01/11 14:29:29 niklas Exp $ */ -/* $NetBSD: rf_mcpair.h,v 1.1 1998/11/13 04:20:31 oster Exp $ */ +/* $OpenBSD: rf_mcpair.h,v 1.2 1999/02/16 00:02:56 niklas Exp $ */ +/* $NetBSD: rf_mcpair.h,v 1.3 1999/02/05 00:06:13 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -38,25 +38,16 @@ #include "rf_threadstuff.h" struct RF_MCPair_s { - RF_DECLARE_MUTEX(mutex) - RF_DECLARE_COND(cond) - int flag; - RF_MCPair_t *next; + RF_DECLARE_MUTEX(mutex) + RF_DECLARE_COND(cond) + int flag; + RF_MCPair_t *next; }; - -#ifdef KERNEL -#if !defined(__NetBSD__) && !defined(__OpenBSD__) -#define RF_WAIT_MCPAIR(_mcp) mpsleep(&((_mcp)->flag), PZERO, "mcpair", 0, (void *) simple_lock_addr((_mcp)->mutex), MS_LOCK_SIMPLE) -#else #define RF_WAIT_MCPAIR(_mcp) tsleep(&((_mcp)->flag), PRIBIO | PCATCH, "mcpair", 0) -#endif -#else /* KERNEL */ -#define RF_WAIT_MCPAIR(_mcp) RF_WAIT_COND((_mcp)->cond, (_mcp)->mutex) -#endif /* KERNEL */ -int rf_ConfigureMCPair(RF_ShutdownList_t **listp); +int rf_ConfigureMCPair(RF_ShutdownList_t ** listp); RF_MCPair_t *rf_AllocMCPair(void); -void rf_FreeMCPair(RF_MCPair_t *t); -void rf_MCPairWakeupFunc(RF_MCPair_t *t); +void rf_FreeMCPair(RF_MCPair_t * t); +void rf_MCPairWakeupFunc(RF_MCPair_t * t); -#endif /* !_RF__RF_MCPAIR_H_ */ +#endif /* !_RF__RF_MCPAIR_H_ */ diff --git a/sys/dev/raidframe/rf_memchunk.c b/sys/dev/raidframe/rf_memchunk.c index 568eb90e12d..54c6ac95f3b 100644 --- a/sys/dev/raidframe/rf_memchunk.c +++ b/sys/dev/raidframe/rf_memchunk.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_memchunk.c,v 1.1 1999/01/11 14:29:30 niklas Exp $ */ -/* $NetBSD: rf_memchunk.c,v 1.1 1998/11/13 04:20:31 oster Exp $ */ +/* $OpenBSD: rf_memchunk.c,v 1.2 1999/02/16 00:02:57 niklas Exp $ */ +/* $NetBSD: rf_memchunk.c,v 1.3 1999/02/05 00:06:13 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -59,57 +59,6 @@ * ********************************************************************************/ -/* : - * Log: rf_memchunk.c,v - * Revision 1.17 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.16 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.15 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.14 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.13 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.12 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.11 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.10 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.9 1996/05/20 16:15:45 jimz - * switch to rf_{mutex,cond}_{init,destroy} - * - * Revision 1.8 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.7 1995/12/01 19:26:07 root - * added copyright info - * - */ - #include "rf_types.h" #include "rf_threadstuff.h" #include "rf_debugMem.h" @@ -121,136 +70,143 @@ typedef struct RF_ChunkHdr_s RF_ChunkHdr_t; struct RF_ChunkHdr_s { - int size; - RF_ChunkDesc_t *list; - RF_ChunkHdr_t *next; + int size; + RF_ChunkDesc_t *list; + RF_ChunkHdr_t *next; }; static RF_ChunkHdr_t *chunklist, *chunk_hdr_free_list; static RF_ChunkDesc_t *chunk_desc_free_list; RF_DECLARE_STATIC_MUTEX(chunkmutex) + static void rf_ShutdownMemChunk(void *); + static RF_ChunkDesc_t *NewMemChunk(int, char *); -static void rf_ShutdownMemChunk(void *); -static RF_ChunkDesc_t *NewMemChunk(int, char *); - -static void rf_ShutdownMemChunk(ignored) - void *ignored; + static void rf_ShutdownMemChunk(ignored) + void *ignored; { - RF_ChunkDesc_t *pt, *p; - RF_ChunkHdr_t *hdr, *ht; - - if (rf_memChunkDebug) - printf("Chunklist:\n"); - for (hdr = chunklist; hdr;) { - for (p = hdr->list; p; ) { - if (rf_memChunkDebug) - printf("Size %d reuse count %d\n",p->size, p->reuse_count); - pt = p; p=p->next; - RF_Free(pt->buf, pt->size); - RF_Free(pt, sizeof(*pt)); - } - ht = hdr; hdr=hdr->next; - RF_Free(ht, sizeof(*ht)); - } - - rf_mutex_destroy(&chunkmutex); + RF_ChunkDesc_t *pt, *p; + RF_ChunkHdr_t *hdr, *ht; + + if (rf_memChunkDebug) + printf("Chunklist:\n"); + for (hdr = chunklist; hdr;) { + for (p = hdr->list; p;) { + if (rf_memChunkDebug) + printf("Size %d reuse count %d\n", p->size, p->reuse_count); + pt = p; + p = p->next; + RF_Free(pt->buf, pt->size); + RF_Free(pt, sizeof(*pt)); + } + ht = hdr; + hdr = hdr->next; + RF_Free(ht, sizeof(*ht)); + } + + rf_mutex_destroy(&chunkmutex); } -int rf_ConfigureMemChunk(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureMemChunk(listp) + RF_ShutdownList_t **listp; { - int rc; - - chunklist = NULL; - chunk_hdr_free_list = NULL; - chunk_desc_free_list = NULL; - rc = rf_mutex_init(&chunkmutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - } - rc = rf_ShutdownCreate(listp, rf_ShutdownMemChunk, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_mutex_destroy(&chunkmutex); - } - return(rc); + int rc; + + chunklist = NULL; + chunk_hdr_free_list = NULL; + chunk_desc_free_list = NULL; + rc = rf_mutex_init(&chunkmutex); + if (rc) { + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + } + rc = rf_ShutdownCreate(listp, rf_ShutdownMemChunk, NULL); + if (rc) { + RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + rf_mutex_destroy(&chunkmutex); + } + return (rc); } - /* called to get a chunk descriptor for a newly-allocated chunk of memory * MUTEX MUST BE LOCKED * * free list is not currently used */ -static RF_ChunkDesc_t *NewMemChunk(size, buf) - int size; - char *buf; +static RF_ChunkDesc_t * +NewMemChunk(size, buf) + int size; + char *buf; { - RF_ChunkDesc_t *p; - - if (chunk_desc_free_list) {p = chunk_desc_free_list; chunk_desc_free_list = p->next;} - else RF_Malloc(p, sizeof(RF_ChunkDesc_t), (RF_ChunkDesc_t *)); - p->size = size; - p->buf = buf; - p->next = NULL; - p->reuse_count = 0; - return(p); + RF_ChunkDesc_t *p; + + if (chunk_desc_free_list) { + p = chunk_desc_free_list; + chunk_desc_free_list = p->next; + } else + RF_Malloc(p, sizeof(RF_ChunkDesc_t), (RF_ChunkDesc_t *)); + p->size = size; + p->buf = buf; + p->next = NULL; + p->reuse_count = 0; + return (p); } - /* looks for a chunk of memory of acceptable size. If none, allocates one and returns * a chunk descriptor for it, but does not install anything in the list. This is done * when the chunk is released. */ -RF_ChunkDesc_t *rf_GetMemChunk(size) - int size; +RF_ChunkDesc_t * +rf_GetMemChunk(size) + int size; { - RF_ChunkHdr_t *hdr = chunklist; - RF_ChunkDesc_t *p = NULL; - char *buf; - - RF_LOCK_MUTEX(chunkmutex); - for (hdr = chunklist; hdr; hdr = hdr->next) if (hdr->size >= size) { - p = hdr->list; - if (p) { - hdr->list = p->next; - p->next = NULL; - p->reuse_count++; - } - break; - } - if (!p) { - RF_Malloc(buf, size, (char *)); - p = NewMemChunk(size, buf); - } - RF_UNLOCK_MUTEX(chunkmutex); - (void) bzero(p->buf, size); - return(p); + RF_ChunkHdr_t *hdr = chunklist; + RF_ChunkDesc_t *p = NULL; + char *buf; + + RF_LOCK_MUTEX(chunkmutex); + for (hdr = chunklist; hdr; hdr = hdr->next) + if (hdr->size >= size) { + p = hdr->list; + if (p) { + hdr->list = p->next; + p->next = NULL; + p->reuse_count++; + } + break; + } + if (!p) { + RF_Malloc(buf, size, (char *)); + p = NewMemChunk(size, buf); + } + RF_UNLOCK_MUTEX(chunkmutex); + (void) bzero(p->buf, size); + return (p); } -void rf_ReleaseMemChunk(chunk) - RF_ChunkDesc_t *chunk; +void +rf_ReleaseMemChunk(chunk) + RF_ChunkDesc_t *chunk; { - RF_ChunkHdr_t *hdr, *ht = NULL, *new; - - RF_LOCK_MUTEX(chunkmutex); - for (hdr = chunklist; hdr && hdr->size < chunk->size; ht=hdr,hdr=hdr->next); - if (hdr && hdr->size == chunk->size) { - chunk->next = hdr->list; - hdr->list = chunk; - } - else { - RF_Malloc(new, sizeof(RF_ChunkHdr_t), (RF_ChunkHdr_t *)); - new->size = chunk->size; new->list = chunk; chunk->next = NULL; - if (ht) { - new->next = ht->next; - ht->next = new; - } - else { - new->next = hdr; - chunklist = new; - } - } - RF_UNLOCK_MUTEX(chunkmutex); + RF_ChunkHdr_t *hdr, *ht = NULL, *new; + + RF_LOCK_MUTEX(chunkmutex); + for (hdr = chunklist; hdr && hdr->size < chunk->size; ht = hdr, hdr = hdr->next); + if (hdr && hdr->size == chunk->size) { + chunk->next = hdr->list; + hdr->list = chunk; + } else { + RF_Malloc(new, sizeof(RF_ChunkHdr_t), (RF_ChunkHdr_t *)); + new->size = chunk->size; + new->list = chunk; + chunk->next = NULL; + if (ht) { + new->next = ht->next; + ht->next = new; + } else { + new->next = hdr; + chunklist = new; + } + } + RF_UNLOCK_MUTEX(chunkmutex); } diff --git a/sys/dev/raidframe/rf_memchunk.h b/sys/dev/raidframe/rf_memchunk.h index 7d41f57eae5..d2585a48987 100644 --- a/sys/dev/raidframe/rf_memchunk.h +++ b/sys/dev/raidframe/rf_memchunk.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_memchunk.h,v 1.1 1999/01/11 14:29:30 niklas Exp $ */ -/* $NetBSD: rf_memchunk.h,v 1.1 1998/11/13 04:20:31 oster Exp $ */ +/* $OpenBSD: rf_memchunk.h,v 1.2 1999/02/16 00:02:57 niklas Exp $ */ +/* $NetBSD: rf_memchunk.h,v 1.3 1999/02/05 00:06:13 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,52 +29,20 @@ /* header file for rf_memchunk.c. See comments there */ -/* : - * Log: rf_memchunk.h,v - * Revision 1.8 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.7 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.6 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.5 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.4 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.3 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.2 1995/12/01 19:25:56 root - * added copyright info - * - */ - #ifndef _RF__RF_MEMCHUNK_H_ #define _RF__RF_MEMCHUNK_H_ #include "rf_types.h" struct RF_ChunkDesc_s { - int size; - int reuse_count; - char *buf; - RF_ChunkDesc_t *next; + int size; + int reuse_count; + char *buf; + RF_ChunkDesc_t *next; }; -int rf_ConfigureMemChunk(RF_ShutdownList_t **listp); +int rf_ConfigureMemChunk(RF_ShutdownList_t ** listp); RF_ChunkDesc_t *rf_GetMemChunk(int size); -void rf_ReleaseMemChunk(RF_ChunkDesc_t *chunk); +void rf_ReleaseMemChunk(RF_ChunkDesc_t * chunk); -#endif /* !_RF__RF_MEMCHUNK_H_ */ +#endif /* !_RF__RF_MEMCHUNK_H_ */ diff --git a/sys/dev/raidframe/rf_netbsd.h b/sys/dev/raidframe/rf_netbsd.h index 6d66769112b..274c8193695 100644 --- a/sys/dev/raidframe/rf_netbsd.h +++ b/sys/dev/raidframe/rf_netbsd.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_netbsd.h,v 1.1 1999/01/11 14:29:30 niklas Exp $ */ -/* $NetBSD: rf_netbsd.h,v 1.1 1998/11/13 04:20:31 oster Exp $ */ +/* $OpenBSD: rf_netbsd.h,v 1.2 1999/02/16 00:02:57 niklas Exp $ */ +/* $NetBSD: rf_netbsd.h,v 1.3 1999/02/05 00:06:13 oster Exp $ */ /*- * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. * All rights reserved. @@ -76,6 +76,7 @@ #ifndef _RF__RF_NETBSDSTUFF_H_ #define _RF__RF_NETBSDSTUFF_H_ +#ifdef _KERNEL #include <sys/fcntl.h> #include <sys/systm.h> #include <sys/namei.h> @@ -83,16 +84,14 @@ -#if defined(__NetBSD__) && defined(_KERNEL) struct raidcinfo { - struct vnode *ci_vp; /* device's vnode */ - dev_t ci_dev; /* XXX: device's dev_t */ + struct vnode *ci_vp; /* device's vnode */ + dev_t ci_dev; /* XXX: device's dev_t */ #if 0 - size_t ci_size; /* size */ - char *ci_path; /* path to component */ - size_t ci_pathlen; /* length of component path */ + size_t ci_size; /* size */ + char *ci_path; /* path to component */ + size_t ci_pathlen; /* length of component path */ #endif }; -#endif - -#endif /* _RF__RF_NETBSDSTUFF_H_ */ +#endif /* _KERNEL */ +#endif /* _RF__RF_NETBSDSTUFF_H_ */ diff --git a/sys/dev/raidframe/rf_netbsdkintf.c b/sys/dev/raidframe/rf_netbsdkintf.c index ad6673541cc..e0ca8caa0f1 100644 --- a/sys/dev/raidframe/rf_netbsdkintf.c +++ b/sys/dev/raidframe/rf_netbsdkintf.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_netbsdkintf.c,v 1.1 1999/01/11 14:29:30 niklas Exp $ */ -/* $NetBSD: rf_netbsdkintf.c,v 1.5 1998/12/22 20:03:14 oster Exp $ */ +/* $OpenBSD: rf_netbsdkintf.c,v 1.2 1999/02/16 00:02:59 niklas Exp $ */ +/* $NetBSD: rf_netbsdkintf.c,v 1.10 1999/02/11 01:23:32 oster Exp $ */ /*- * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. * All rights reserved. @@ -113,189 +113,8 @@ * rf_kintf.c -- the kernel interface routines for RAIDframe * ***********************************************************/ -/* - * : - * Log: rf_kintf.c,v - * Revision 1.57 1996/07/19 16:12:20 jimz - * remove addition of protectedSectors in InitBP- it's already - * done in the diskqueue code - * - * Revision 1.56 1996/07/17 21:00:58 jimz - * clean up timer interface, tracing - * - * Revision 1.55 1996/06/17 03:00:54 jimz - * Change RAIDFRAME_GET_INFO interface to do its own copyout() - * (because size of device config structure now exceeds 8k) - * - * Revision 1.54 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.53 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.52 1996/06/06 17:28:08 jimz - * track sector number of last I/O dequeued - * - * Revision 1.51 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.50 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.49 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.48 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.47 1996/05/30 12:59:18 jimz - * make etimer happier, more portable - * - * Revision 1.46 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.45 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.44 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.43 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.42 1996/05/23 22:17:54 jimz - * fix sector size hardcoding problems - * - * Revision 1.41 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.40 1996/05/23 13:18:07 jimz - * tracing_mutex -> rf_tracing_mutex - * - * Revision 1.39 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.38 1996/05/20 16:15:32 jimz - * switch to rf_{mutex,cond}_{init,destroy} - * - * Revision 1.37 1996/05/10 16:23:47 jimz - * RF_offset -> RF_Offset - * - * Revision 1.36 1996/05/08 21:01:24 jimz - * fixed up enum type names that were conflicting with other - * enums and function names (ie, "panic") - * future naming trends will be towards RF_ and rf_ for - * everything raidframe-related - * - * Revision 1.35 1996/05/03 19:10:48 jimz - * change sanity checking for bogus I/Os to return more appropriate - * values (to make some user-level utilities happer with RAIDframe) - * - * Revision 1.34 1996/05/02 22:17:00 jimz - * When using DKUSAGE, send a bogus IO after configuring to let DKUSAGE know - * that we exist. This will let user-level programs doing group stats on the - * RF device function without error before RF gets its first IO - * - * Changed rf_device_config devs and spares fields to RF_RaidDisk_t - * - * Inc numOutstanding for the disk queue in rf_DispatchKernelIO if - * type is IO_TYPE_NOP. I'm not sure this is right, but it seems to be, - * because the disk IO completion routine wants to dec it, and doesn't - * care if there was no such IO. - * - * Revision 1.33 1996/05/02 15:05:44 jimz - * for now, rf_DoAccessKernel will reject non-sector-sized I/Os - * eventually, it should do something more clever... - * (and do it in DoAccess(), not just DoAccessKernel()) - * - * Revision 1.32 1996/05/01 16:28:39 jimz - * get rid of uses of ccmn_ functions - * - * Revision 1.31 1996/05/01 15:42:17 jimz - * ccmn_* memory management is on the way out. This is an archival checkpoint- - * both the old and new code are in place (all the ccmn_ calls are #if 0). After - * this, the ccmn_ code will no longer appear. - * - * Revision 1.30 1996/04/22 15:53:13 jimz - * MAX_RAIDS -> NRAIDFRAME - * - * Revision 1.29 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.28 1995/12/01 19:11:01 root - * added copyright info - * - * Revision 1.27 1995/11/28 18:56:40 wvcii - * disabled buffer copy in rf_write - * - * Revision 1.26 1995/10/06 16:37:08 jimz - * get struct bufs from ubc, not cam - * copy all write data, and operate on copy - * (temporary hack to get around dags in PQ that want - * to Xor into user write buffers) - * - * Revision 1.25 1995/09/30 22:23:08 jimz - * do not require raid to be active to perform ACCTOTAL ioctl - * - * Revision 1.24 1995/09/30 20:39:08 jimz - * added new ioctls: - * RAIDFRAME_RESET_ACCTOTALS - * RAIDFRAME_GET_ACCTOTALS - * RAIDFRAME_KEEP_ACCTOTALS - * - * Revision 1.23 1995/09/20 21:11:59 jimz - * include dfstrace.h in KERNEL block - * (even though it's a kernel-only file, this makes the depend process - * at user-level happy. Why the user-level Makefile wants to depend - * kintf.c is less clear, but this is a workaround). - * - * Revision 1.22 1995/09/19 23:19:03 jimz - * added DKUSAGE support - * - */ - - - - -#ifdef _KERNEL -#define KERNEL -#endif - - - -#ifdef KERNEL #include <sys/errno.h> - -#ifdef __NetBSD__ -#include "raid.h" #include <sys/param.h> #include <sys/pool.h> #include <sys/queue.h> @@ -307,25 +126,16 @@ #include <sys/systm.h> #include <sys/namei.h> #include <sys/vnode.h> -#endif - #include <sys/param.h> #include <sys/types.h> - #include <machine/types.h> - #include <sys/disklabel.h> - #include <sys/conf.h> - - -#ifdef __NetBSD__ #include <sys/lock.h> -#endif /* __NetBSD__ */ - - #include <sys/buf.h> #include <sys/user.h> + +#include "raid.h" #include "rf_raid.h" #include "rf_raidframe.h" #include "rf_dag.h" @@ -342,7 +152,7 @@ #include "rf_debugprint.h" #include "rf_threadstuff.h" -int rf_kdebug_level = 0; +int rf_kdebug_level = 0; #define RFK_BOOT_NONE 0 #define RFK_BOOT_GOOD 1 @@ -357,40 +167,43 @@ static int rf_kbooted = RFK_BOOT_NONE; #define db3_printf(a) if (rf_kdebug_level > 2) printf a #define db4_printf(a) if (rf_kdebug_level > 3) printf a #define db5_printf(a) if (rf_kdebug_level > 4) printf a -#else /* DEBUG */ +#else /* DEBUG */ #define db0_printf(a) printf a #define db1_printf(a) { } #define db2_printf(a) { } #define db3_printf(a) { } #define db4_printf(a) { } #define db5_printf(a) { } -#endif /* DEBUG */ +#endif /* DEBUG */ -static RF_Raid_t **raidPtrs; /* global raid device descriptors */ +static RF_Raid_t **raidPtrs; /* global raid device descriptors */ static int rf_pending_testaccs; RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex) RF_DECLARE_STATIC_MUTEX(rf_async_done_q_mutex) -static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a spare table */ -static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from installation process */ -static struct rf_test_acc *rf_async_done_qh, *rf_async_done_qt; - -static struct rf_recon_req *recon_queue = NULL; /* used to communicate reconstruction requests */ - +static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a + * spare table */ +static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from + * installation process */ +static struct rf_test_acc *rf_async_done_qh, *rf_async_done_qt; -decl_simple_lock_data(,recon_queue_mutex) +static struct rf_recon_req *recon_queue = NULL; /* used to communicate + * reconstruction + * requests */ +decl_simple_lock_data(, recon_queue_mutex) #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex) #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex) /* prototypes */ -static void KernelWakeupFunc(struct buf *bp); -static void InitBP(struct buf *bp, struct vnode *, unsigned rw_flag, dev_t dev, - RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t buf, - void (*cbFunc)(struct buf *), void *cbArg, int logBytesPerSector, - struct proc *b_proc); +static void KernelWakeupFunc(struct buf * bp); +static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag, + dev_t dev, RF_SectorNum_t startSect, + RF_SectorCount_t numSect, caddr_t buf, + void (*cbFunc) (struct buf *), void *cbArg, + int logBytesPerSector, struct proc * b_proc); #define Dprintf0(s) if (rf_queueDebug) rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL) #define Dprintf1(s,a) if (rf_queueDebug) rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL) @@ -401,52 +214,52 @@ static void InitBP(struct buf *bp, struct vnode *, unsigned rw_flag, dev_t dev, /* this is so that we can compile under 2.0 as well as 3.2 */ #ifndef proc_to_task #define proc_to_task(x) ((x)->task) -#endif /* !proc_to_task */ +#endif /* !proc_to_task */ -void raidattach __P((int)); -int raidsize __P((dev_t)); +void raidattach __P((int)); +int raidsize __P((dev_t)); -void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int); -void rf_CopybackReconstructedData(RF_Raid_t *raidPtr); -static int raidinit __P((dev_t,RF_Raid_t *,int)); +void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int); +void rf_CopybackReconstructedData(RF_Raid_t * raidPtr); +static int raidinit __P((dev_t, RF_Raid_t *, int)); -int raidopen __P((dev_t, int, int, struct proc *)); -int raidclose __P((dev_t, int, int, struct proc *)); -int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *)); -int raidwrite __P((dev_t, struct uio *, int)); -int raidread __P((dev_t, struct uio *, int)); -void raidstrategy __P((struct buf *)); -int raiddump __P((dev_t, daddr_t, caddr_t, size_t)); +int raidopen __P((dev_t, int, int, struct proc *)); +int raidclose __P((dev_t, int, int, struct proc *)); +int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *)); +int raidwrite __P((dev_t, struct uio *, int)); +int raidread __P((dev_t, struct uio *, int)); +void raidstrategy __P((struct buf *)); +int raiddump __P((dev_t, daddr_t, caddr_t, size_t)); /* * Pilfered from ccd.c */ struct raidbuf { - struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */ - struct buf *rf_obp; /* ptr. to original I/O buf */ - int rf_flags; /* misc. flags */ - RF_DiskQueueData_t *req; /* the request that this was part of.. */ + struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */ + struct buf *rf_obp; /* ptr. to original I/O buf */ + int rf_flags; /* misc. flags */ + RF_DiskQueueData_t *req; /* the request that this was + * part of.. */ }; #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT) #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp) -/* XXX Not sure if the following should be replacing the raidPtrs above, -or if it should be used in conjunction with that... */ +/* XXX Not sure if the following should be replacing the raidPtrs above, + or if it should be used in conjunction with that... */ struct raid_softc { - int sc_unit; /* logical unit number */ - int sc_flags; /* flags */ - int sc_cflags; /* configuration flags */ - size_t sc_size; /* size of the raid device */ - dev_t sc_dev; /* our device..*/ - char sc_xname[20]; /* XXX external name */ - struct disk sc_dkdev; /* generic disk device info */ - struct pool sc_cbufpool; /* component buffer pool */ + int sc_unit;/* logical unit number */ + int sc_flags; /* flags */ + int sc_cflags; /* configuration flags */ + size_t sc_size;/* size of the raid device */ + dev_t sc_dev; /* our device.. */ + char sc_xname[20]; /* XXX external name */ + struct disk sc_dkdev; /* generic disk device info */ + struct pool sc_cbufpool; /* component buffer pool */ }; - /* sc_flags */ #define RAIDF_INITED 0x01 /* unit has been initialized */ #define RAIDF_WLABEL 0x02 /* label area is writable */ @@ -455,7 +268,7 @@ struct raid_softc { #define RAIDF_LOCKED 0x80 /* unit is locked */ #define raidunit(x) DISKUNIT(x) -static int numraid=0; +static int numraid = 0; #define RAIDLABELDEV(dev) \ (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) @@ -463,23 +276,24 @@ static int numraid=0; /* declared here, and made public, for the benefit of KVM stuff.. */ struct raid_softc *raid_softc; -static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *, struct disklabel *)); -static void raidgetdisklabel __P((dev_t)); -static void raidmakedisklabel __P((struct raid_softc *)); +static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *, + struct disklabel *)); +static void raidgetdisklabel __P((dev_t)); +static void raidmakedisklabel __P((struct raid_softc *)); -static int raidlock __P((struct raid_softc *)); -static void raidunlock __P((struct raid_softc *)); -int raidlookup __P((char *, struct proc *p, struct vnode **)); +static int raidlock __P((struct raid_softc *)); +static void raidunlock __P((struct raid_softc *)); +int raidlookup __P((char *, struct proc * p, struct vnode **)); void raidattach(num) - int num; + int num; { - int raidID; + int raidID; #ifdef DEBUG - printf("raidattach: Asked for %d units\n",num); + printf("raidattach: Asked for %d units\n", num); #endif if (num <= 0) { @@ -488,9 +302,7 @@ raidattach(num) #endif return; } - /* - This is where all the initialization stuff gets done. - */ + /* This is where all the initialization stuff gets done. */ /* Make some space for requested number of units... */ @@ -498,24 +310,18 @@ raidattach(num) if (raidPtrs == NULL) { panic("raidPtrs is NULL!!\n"); } - - - rf_kbooted = rf_boot(); if (rf_kbooted) { panic("Serious error booting RAID!!\n"); } - - rf_kbooted = RFK_BOOT_GOOD; - - /* - put together some datastructures like the CCD device does.. - This lets us lock the device and what-not when it gets opened. - */ - + rf_kbooted = RFK_BOOT_GOOD; + + /* put together some datastructures like the CCD device does.. This + * lets us lock the device and what-not when it gets opened. */ + raid_softc = (struct raid_softc *) - malloc(num * sizeof(struct raid_softc), - M_DEVBUF, M_NOWAIT); + malloc(num * sizeof(struct raid_softc), + M_RAIDFRAME, M_NOWAIT); if (raid_softc == NULL) { printf("WARNING: no memory for RAIDframe driver\n"); return; @@ -523,11 +329,11 @@ raidattach(num) numraid = num; bzero(raid_softc, num * sizeof(struct raid_softc)); - for(raidID=0;raidID < num;raidID++) { - RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t), - (RF_Raid_t *)); - if (raidPtrs[raidID]==NULL) { - printf("raidPtrs[%d] is NULL\n",raidID); + for (raidID = 0; raidID < num; raidID++) { + RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t), + (RF_Raid_t *)); + if (raidPtrs[raidID] == NULL) { + printf("raidPtrs[%d] is NULL\n", raidID); } } } @@ -535,11 +341,11 @@ raidattach(num) int raidsize(dev) - dev_t dev; + dev_t dev; { struct raid_softc *rs; struct disklabel *lp; - int part, unit, omask, size; + int part, unit, omask, size; unit = raidunit(dev); if (unit >= numraid) @@ -571,50 +377,48 @@ raidsize(dev) int raiddump(dev, blkno, va, size) - dev_t dev; + dev_t dev; daddr_t blkno; caddr_t va; - size_t size; + size_t size; { /* Not implemented. */ return ENXIO; } - /* ARGSUSED */ int raidopen(dev, flags, fmt, p) - dev_t dev; - int flags, fmt; + dev_t dev; + int flags, fmt; struct proc *p; { - int unit = raidunit(dev); + int unit = raidunit(dev); struct raid_softc *rs; struct disklabel *lp; - int part,pmask; + int part, pmask; unsigned int raidID; - int rc; - int error = 0; - - /* This whole next chunk of code is somewhat suspect... Not sure - it's needed here at all... XXX */ + int rc; + int error = 0; + + /* This whole next chunk of code is somewhat suspect... Not sure it's + * needed here at all... XXX */ - if (rf_kbooted == RFK_BOOT_NONE) { + if (rf_kbooted == RFK_BOOT_NONE) { printf("Doing restart on raidopen.\n"); rf_kbooted = RFK_BOOT_GOOD; rc = rf_boot(); if (rc) { rf_kbooted = RFK_BOOT_BAD; printf("Someone is unhappy...\n"); - return(rc); + return (rc); } } - if (unit >= numraid) return (ENXIO); rs = &raid_softc[unit]; if ((error = raidlock(rs)) != 0) - return(error); + return (error); lp = rs->sc_dkdev.dk_label; raidID = raidunit(dev); @@ -623,12 +427,12 @@ raidopen(dev, flags, fmt, p) pmask = (1 << part); db1_printf(("Opening raid device number: %d partition: %d\n", - raidID,part)); + raidID, part)); if ((rs->sc_flags & RAIDF_INITED) && (rs->sc_dkdev.dk_openmask == 0)) - raidgetdisklabel(dev); + raidgetdisklabel(dev); /* make sure that this partition exists */ @@ -636,14 +440,13 @@ raidopen(dev, flags, fmt, p) db1_printf(("Not a raw partition..\n")); if (((rs->sc_flags & RAIDF_INITED) == 0) || ((part >= lp->d_npartitions) || - (lp->d_partitions[part].p_fstype == FS_UNUSED))) { + (lp->d_partitions[part].p_fstype == FS_UNUSED))) { error = ENXIO; raidunlock(rs); db1_printf(("Bailing out...\n")); - return(error); + return (error); } } - /* Prevent this unit from being unconfigured while open. */ switch (fmt) { case S_IFCHR: @@ -659,22 +462,21 @@ raidopen(dev, flags, fmt, p) raidunlock(rs); - return(error); + return (error); } - /* ARGSUSED */ int raidclose(dev, flags, fmt, p) - dev_t dev; - int flags, fmt; + dev_t dev; + int flags, fmt; struct proc *p; { - int unit = raidunit(dev); + int unit = raidunit(dev); struct raid_softc *rs; - int error = 0; - int part; + int error = 0; + int part; if (unit >= numraid) return (ENXIO); @@ -713,16 +515,16 @@ raidstrategy(bp) RF_Raid_t *raidPtr; struct raid_softc *rs = &raid_softc[raidID]; struct disklabel *lp; - int wlabel; + int wlabel; #if 0 - db1_printf(("Strategy: 0x%x 0x%x\n",bp,bp->b_data)); - db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int)bp->b_bufsize)); - db1_printf(("bp->b_count=%d\n",(int)bp->b_bcount)); - db1_printf(("bp->b_resid=%d\n",(int)bp->b_resid)); - db1_printf(("bp->b_blkno=%d\n",(int)bp->b_blkno)); + db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data)); + db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize)); + db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount)); + db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid)); + db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno)); - if (bp->b_flags&B_READ) + if (bp->b_flags & B_READ) db1_printf(("READ\n")); else db1_printf(("WRITE\n")); @@ -756,45 +558,43 @@ raidstrategy(bp) * error, the bounds check will flag that for us. */ - wlabel = rs->sc_flags & (RAIDF_WLABEL|RAIDF_LABELLING); + wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING); if (DISKPART(bp->b_dev) != RAW_PART) if (bounds_check_with_label(bp, lp, wlabel) <= 0) { db1_printf(("Bounds check failed!!:%d %d\n", - (int)bp->b_blkno,(int)wlabel)); + (int) bp->b_blkno, (int) wlabel)); biodone(bp); return; } - - s = splbio(); /* XXX Needed? */ + s = splbio(); /* XXX Needed? */ db1_printf(("Beginning strategy...\n")); - + bp->b_resid = 0; - bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp, - NULL, NULL, NULL); + bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp, + NULL, NULL, NULL); if (bp->b_error) { bp->b_flags |= B_ERROR; db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n", - bp->b_error)); + bp->b_error)); } splx(s); #if 0 db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n", - bp,bp->b_data, - (int)bp->b_bcount,(int)bp->b_resid)); + bp, bp->b_data, + (int) bp->b_bcount, (int) bp->b_resid)); #endif } - /* ARGSUSED */ int raidread(dev, uio, flags) - dev_t dev; + dev_t dev; struct uio *uio; - int flags; + int flags; { - int unit = raidunit(dev); + int unit = raidunit(dev); struct raid_softc *rs; - int result; - int part; + int result; + int part; if (unit >= numraid) return (ENXIO); @@ -804,26 +604,25 @@ raidread(dev, uio, flags) return (ENXIO); part = DISKPART(dev); - db1_printf(("raidread: unit: %d partition: %d\n",unit,part)); + db1_printf(("raidread: unit: %d partition: %d\n", unit, part)); #if 0 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio)); #endif - result=physio(raidstrategy, NULL, dev, B_READ, minphys, uio); + result = physio(raidstrategy, NULL, dev, B_READ, minphys, uio); db1_printf(("raidread done. Result is %d %d\n", - result,uio->uio_resid)); - return(result); + result, uio->uio_resid)); + return (result); } - /* ARGSUSED */ int raidwrite(dev, uio, flags) - dev_t dev; + dev_t dev; struct uio *uio; - int flags; + int flags; { - int unit = raidunit(dev); + int unit = raidunit(dev); struct raid_softc *rs; if (unit >= numraid) @@ -840,31 +639,31 @@ raidwrite(dev, uio, flags) int raidioctl(dev, cmd, data, flag, p) - dev_t dev; - u_long cmd; + dev_t dev; + u_long cmd; caddr_t data; - int flag; + int flag; struct proc *p; { - int unit = raidunit(dev); - int error = 0; - int part, pmask; + int unit = raidunit(dev); + int error = 0; + int part, pmask; struct raid_softc *rs; #if 0 - int r,c; + int r, c; #endif - /* struct raid_ioctl *ccio = (struct ccd_ioctl *)data; */ + /* struct raid_ioctl *ccio = (struct ccd_ioctl *)data; */ - /* struct ccdbuf *cbp; */ - /* struct raidbuf *raidbp; */ + /* struct ccdbuf *cbp; */ + /* struct raidbuf *raidbp; */ RF_Config_t *k_cfg, *u_cfg; u_char *specific_buf; - int retcode = 0; + int retcode = 0; - int row; + int row; struct rf_recon_req *rrcopy, *rr; #if 0 - int nbytes, spl, rw, row; + int nbytes, spl, rw, row; struct rf_test_acc *ta; struct buf *bp; RF_SparetWait_t *waitreq; @@ -875,8 +674,8 @@ raidioctl(dev, cmd, data, flag, p) return (ENXIO); rs = &raid_softc[unit]; - db1_printf(("raidioctl: %d %d %d %d\n",(int)dev, - (int)DISKPART(dev),(int)unit,(int)cmd)); + db1_printf(("raidioctl: %d %d %d %d\n", (int) dev, + (int) DISKPART(dev), (int) unit, (int) cmd)); /* Must be open for writes for these commands... */ switch (cmd) { @@ -908,7 +707,7 @@ raidioctl(dev, cmd, data, flag, p) if ((rs->sc_flags & RAIDF_INITED) == 0) return (ENXIO); } - + switch (cmd) { @@ -918,57 +717,56 @@ raidioctl(dev, cmd, data, flag, p) db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n")); /* copy-in the configuration information */ /* data points to a pointer to the configuration structure */ - u_cfg = *((RF_Config_t **) data); - RF_Malloc(k_cfg,sizeof(RF_Config_t),(RF_Config_t *)); + u_cfg = *((RF_Config_t **) data); + RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *)); if (k_cfg == NULL) { db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode)); - return(ENOMEM); + return (ENOMEM); } - retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg, - sizeof(RF_Config_t)); + retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg, + sizeof(RF_Config_t)); if (retcode) { - db3_printf(("rf_ioctl: retcode=%d copyin.1\n", - retcode)); - return(retcode); + db3_printf(("rf_ioctl: retcode=%d copyin.1\n", + retcode)); + return (retcode); } - - /* allocate a buffer for the layout-specific data, - and copy it in */ + /* allocate a buffer for the layout-specific data, and copy it + * in */ if (k_cfg->layoutSpecificSize) { - if (k_cfg->layoutSpecificSize > 10000) { + if (k_cfg->layoutSpecificSize > 10000) { /* sanity check */ db3_printf(("rf_ioctl: EINVAL %d\n", retcode)); - return(EINVAL); + return (EINVAL); } - RF_Malloc(specific_buf,k_cfg->layoutSpecificSize, - (u_char *)); + RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, + (u_char *)); if (specific_buf == NULL) { - RF_Free(k_cfg,sizeof(RF_Config_t)); + RF_Free(k_cfg, sizeof(RF_Config_t)); db3_printf(("rf_ioctl: ENOMEM %d\n", retcode)); - return(ENOMEM); + return (ENOMEM); } - retcode = copyin(k_cfg->layoutSpecific, - (caddr_t) specific_buf, - k_cfg->layoutSpecificSize); + retcode = copyin(k_cfg->layoutSpecific, + (caddr_t) specific_buf, + k_cfg->layoutSpecificSize); if (retcode) { db3_printf(("rf_ioctl: retcode=%d copyin.2\n", - retcode)); - return(retcode); + retcode)); + return (retcode); } - } else specific_buf = NULL; + } else + specific_buf = NULL; k_cfg->layoutSpecific = specific_buf; - - /* should do some kind of sanity check on the configuration. - Store the sum of all the bytes in the last byte? - */ + + /* should do some kind of sanity check on the configuration. + * Store the sum of all the bytes in the last byte? */ #if 0 db1_printf(("Considering configuring the system.:%d 0x%x\n", - unit,p)); + unit, p)); #endif - /* We need the pointer to this a little deeper, so - stash it here... */ + /* We need the pointer to this a little deeper, so stash it + * here... */ raidPtrs[unit]->proc = p; @@ -979,26 +777,25 @@ raidioctl(dev, cmd, data, flag, p) raidPtrs[unit]->raidid = unit; retcode = rf_Configure(raidPtrs[unit], k_cfg); - - if (retcode == 0) { - retcode = raidinit(dev, raidPtrs[unit],unit); - } + if (retcode == 0) { + retcode = raidinit(dev, raidPtrs[unit], unit); + } /* free the buffers. No return code here. */ if (k_cfg->layoutSpecificSize) { - RF_Free(specific_buf,k_cfg->layoutSpecificSize); + RF_Free(specific_buf, k_cfg->layoutSpecificSize); } - RF_Free(k_cfg,sizeof(RF_Config_t)); - - db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n", - retcode)); - return(retcode); - - /* shutdown the system */ + RF_Free(k_cfg, sizeof(RF_Config_t)); + + db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n", + retcode)); + return (retcode); + + /* shutdown the system */ case RAIDFRAME_SHUTDOWN: - - if ((error = raidlock(rs)) != 0) - return(error); + + if ((error = raidlock(rs)) != 0) + return (error); /* * If somebody has a partition mounted, we shouldn't @@ -1007,26 +804,24 @@ raidioctl(dev, cmd, data, flag, p) part = DISKPART(dev); pmask = (1 << part); - if ((rs->sc_dkdev.dk_openmask & ~pmask) || - ((rs->sc_dkdev.dk_bopenmask & pmask) && - (rs->sc_dkdev.dk_copenmask & pmask))) { - raidunlock(rs); - return (EBUSY); - } - + if ((rs->sc_dkdev.dk_openmask & ~pmask) || + ((rs->sc_dkdev.dk_bopenmask & pmask) && + (rs->sc_dkdev.dk_copenmask & pmask))) { + raidunlock(rs); + return (EBUSY); + } /* the intention here was to disallow shutdowns while - raidframe is mounted, but it doesn't work because the - shutdown ioctl calls rf_open - */ + * raidframe is mounted, but it doesn't work because the + * shutdown ioctl calls rf_open */ if (rf_pending_testaccs > 0) { printf("RAIDFRAME: Can't shutdown because there are %d pending test accs\n", - rf_pending_testaccs); - return(EINVAL); + rf_pending_testaccs); + return (EINVAL); } if (rf_debugKernelAccess) { printf("call shutdown\n"); } - raidPtrs[unit]->proc = p; /* XXX necessary evil */ + raidPtrs[unit]->proc = p; /* XXX necessary evil */ retcode = rf_Shutdown(raidPtrs[unit]); db1_printf(("Done main shutdown\n")); @@ -1037,112 +832,112 @@ raidioctl(dev, cmd, data, flag, p) /* It's no longer initialized... */ rs->sc_flags &= ~RAIDF_INITED; - /* Detach the disk. */ - disk_detach(&rs->sc_dkdev); + /* Detach the disk. */ + disk_detach(&rs->sc_dkdev); raidunlock(rs); - return(retcode); - + return (retcode); + /* initialize all parity */ case RAIDFRAME_REWRITEPARITY: - if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) - return(EINVAL); + if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) + return (EINVAL); /* borrow the thread of the requesting process */ - raidPtrs[unit]->proc = p; /* Blah... :-p GO */ + raidPtrs[unit]->proc = p; /* Blah... :-p GO */ retcode = rf_RewriteParity(raidPtrs[unit]); - /* return I/O Error if the parity rewrite fails */ + /* return I/O Error if the parity rewrite fails */ - if (retcode) - retcode = EIO; - return(retcode); + if (retcode) + retcode = EIO; + return (retcode); - /* issue a test-unit-ready through raidframe to the - indicated device */ -#if 0 /* XXX not supported yet (ever?) */ + /* issue a test-unit-ready through raidframe to the indicated + * device */ +#if 0 /* XXX not supported yet (ever?) */ case RAIDFRAME_TUR: /* debug only */ - retcode = rf_SCSI_DoTUR(0, 0, 0, 0, *(dev_t *) data); - return(retcode); + retcode = rf_SCSI_DoTUR(0, 0, 0, 0, *(dev_t *) data); + return (retcode); #endif case RAIDFRAME_GET_INFO: { RF_Raid_t *raid = raidPtrs[unit]; RF_DeviceConfig_t *cfg, **ucfgp; - int i, j, d; - + int i, j, d; + if (!raid->valid) - return(ENODEV); - ucfgp = (RF_DeviceConfig_t **)data; - RF_Malloc(cfg,sizeof(RF_DeviceConfig_t), - (RF_DeviceConfig_t *)); + return (ENODEV); + ucfgp = (RF_DeviceConfig_t **) data; + RF_Malloc(cfg, sizeof(RF_DeviceConfig_t), + (RF_DeviceConfig_t *)); if (cfg == NULL) - return(ENOMEM); - bzero((char *)cfg, sizeof(RF_DeviceConfig_t)); + return (ENOMEM); + bzero((char *) cfg, sizeof(RF_DeviceConfig_t)); cfg->rows = raid->numRow; cfg->cols = raid->numCol; cfg->ndevs = raid->numRow * raid->numCol; if (cfg->ndevs >= RF_MAX_DISKS) { cfg->ndevs = 0; - return(ENOMEM); + return (ENOMEM); } cfg->nspares = raid->numSpare; if (cfg->nspares >= RF_MAX_DISKS) { cfg->nspares = 0; - return(ENOMEM); + return (ENOMEM); } cfg->maxqdepth = raid->maxQueueDepth; d = 0; - for(i=0;i<cfg->rows;i++) { - for(j=0;j<cfg->cols;j++) { + for (i = 0; i < cfg->rows; i++) { + for (j = 0; j < cfg->cols; j++) { cfg->devs[d] = raid->Disks[i][j]; d++; } } - for(j=cfg->cols,i=0;i<cfg->nspares;i++,j++) { + for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) { cfg->spares[i] = raid->Disks[0][j]; } - retcode = copyout((caddr_t)cfg, (caddr_t)*ucfgp, - sizeof(RF_DeviceConfig_t)); - RF_Free(cfg,sizeof(RF_DeviceConfig_t)); - - return(retcode); + retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp, + sizeof(RF_DeviceConfig_t)); + RF_Free(cfg, sizeof(RF_DeviceConfig_t)); + + return (retcode); } - break; - + break; + case RAIDFRAME_RESET_ACCTOTALS: { RF_Raid_t *raid = raidPtrs[unit]; - + bzero(&raid->acc_totals, sizeof(raid->acc_totals)); - return(0); + return (0); } - break; - + break; + case RAIDFRAME_GET_ACCTOTALS: { - RF_AccTotals_t *totals = (RF_AccTotals_t *)data; + RF_AccTotals_t *totals = (RF_AccTotals_t *) data; RF_Raid_t *raid = raidPtrs[unit]; - + *totals = raid->acc_totals; - return(0); + return (0); } - break; - + break; + case RAIDFRAME_KEEP_ACCTOTALS: { RF_Raid_t *raid = raidPtrs[unit]; - int *keep = (int *)data; - + int *keep = (int *) data; + raid->keep_acc_totals = *keep; - return(0); + return (0); } - break; - + break; + case RAIDFRAME_GET_SIZE: *(int *) data = raidPtrs[unit]->totalSectors; - return(0); + return (0); #define RAIDFRAME_RECON 1 /* XXX The above should probably be set somewhere else!! GO */ @@ -1151,15 +946,15 @@ raidioctl(dev, cmd, data, flag, p) /* fail a disk & optionally start reconstruction */ case RAIDFRAME_FAIL_DISK: rr = (struct rf_recon_req *) data; - - if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow + + if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol) - return(EINVAL); + return (EINVAL); + + printf("Failing the disk: row: %d col: %d\n", rr->row, rr->col); - printf("Failing the disk: row: %d col: %d\n",rr->row,rr->col); - - /* make a copy of the recon request so that we don't - rely on the user's buffer */ + /* make a copy of the recon request so that we don't rely on + * the user's buffer */ RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); bcopy(rr, rrcopy, sizeof(*rr)); rrcopy->raidPtr = (void *) raidPtrs[unit]; @@ -1169,54 +964,52 @@ raidioctl(dev, cmd, data, flag, p) recon_queue = rrcopy; wakeup(&recon_queue); UNLOCK_RECON_Q_MUTEX(); - - return(0); - - /* invoke a copyback operation after recon on whatever - disk needs it, if any */ - case RAIDFRAME_COPYBACK: + + return (0); + + /* invoke a copyback operation after recon on whatever disk + * needs it, if any */ + case RAIDFRAME_COPYBACK: /* borrow the current thread to get this done */ - raidPtrs[unit]->proc = p; /* ICK.. but needed :-p GO */ + raidPtrs[unit]->proc = p; /* ICK.. but needed :-p GO */ rf_CopybackReconstructedData(raidPtrs[unit]); - return(0); - + return (0); + /* return the percentage completion of reconstruction */ case RAIDFRAME_CHECKRECON: row = *(int *) data; if (row < 0 || row >= raidPtrs[unit]->numRow) - return(EINVAL); - if (raidPtrs[unit]->status[row] != rf_rs_reconstructing) + return (EINVAL); + if (raidPtrs[unit]->status[row] != rf_rs_reconstructing) *(int *) data = 100; - else + else *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete; - return(0); - - /* the sparetable daemon calls this to wait for the - kernel to need a spare table. - * this ioctl does not return until a spare table is needed. - * XXX -- calling mpsleep here in the ioctl code is almost - certainly wrong and evil. -- XXX - * XXX -- I should either compute the spare table in the - kernel, or have a different -- XXX - * XXX -- interface (a different character device) for - delivering the table -- XXX - */ + return (0); + + /* the sparetable daemon calls this to wait for the kernel to + * need a spare table. this ioctl does not return until a + * spare table is needed. XXX -- calling mpsleep here in the + * ioctl code is almost certainly wrong and evil. -- XXX XXX + * -- I should either compute the spare table in the kernel, + * or have a different -- XXX XXX -- interface (a different + * character device) for delivering the table -- XXX */ #if 0 case RAIDFRAME_SPARET_WAIT: RF_LOCK_MUTEX(rf_sparet_wait_mutex); - while (!rf_sparet_wait_queue) mpsleep(&rf_sparet_wait_queue, (PZERO+1)|PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE); + while (!rf_sparet_wait_queue) + mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE); waitreq = rf_sparet_wait_queue; rf_sparet_wait_queue = rf_sparet_wait_queue->next; RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); - - *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */ - + + *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */ + RF_Free(waitreq, sizeof(*waitreq)); - return(0); - - - /* wakes up a process waiting on SPARET_WAIT and puts an - error code in it that will cause the dameon to exit */ + return (0); + + + /* wakes up a process waiting on SPARET_WAIT and puts an error + * code in it that will cause the dameon to exit */ case RAIDFRAME_ABORT_SPARET_WAIT: RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); waitreq->fcol = -1; @@ -1225,17 +1018,17 @@ raidioctl(dev, cmd, data, flag, p) rf_sparet_wait_queue = waitreq; RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); wakeup(&rf_sparet_wait_queue); - return(0); + return (0); - /* used by the spare table daemon to deliver a spare table - into the kernel */ + /* used by the spare table daemon to deliver a spare table + * into the kernel */ case RAIDFRAME_SEND_SPARET: - + /* install the spare table */ - retcode = rf_SetSpareTable(raidPtrs[unit],*(void **) data); - - /* respond to the requestor. the return status of the - spare table installation is passed in the "fcol" field */ + retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data); + + /* respond to the requestor. the return status of the spare + * table installation is passed in the "fcol" field */ RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); waitreq->fcol = retcode; RF_LOCK_MUTEX(rf_sparet_wait_mutex); @@ -1243,34 +1036,35 @@ raidioctl(dev, cmd, data, flag, p) rf_sparet_resp_queue = waitreq; wakeup(&rf_sparet_resp_queue); RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); - - return(retcode); + + return (retcode); #endif -#endif /* RAIDFRAME_RECON > 0 */ +#endif /* RAIDFRAME_RECON > 0 */ + + default: + break; /* fall through to the os-specific code below */ - default: break; /* fall through to the os-specific code below */ - } - + if (!raidPtrs[unit]->valid) - return(EINVAL); - + return (EINVAL); + /* * Add support for "regular" device ioctls here. */ - + switch (cmd) { case DIOCGDINFO: - db1_printf(("DIOCGDINFO %d %d\n",(int)dev,(int)DISKPART(dev))); - *(struct disklabel *)data = *(rs->sc_dkdev.dk_label); + db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev))); + *(struct disklabel *) data = *(rs->sc_dkdev.dk_label); break; case DIOCGPART: - db1_printf(("DIOCGPART: %d %d\n",(int)dev,(int)DISKPART(dev))); - ((struct partinfo *)data)->disklab = rs->sc_dkdev.dk_label; - ((struct partinfo *)data)->part = + db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev))); + ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label; + ((struct partinfo *) data)->part = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; break; @@ -1284,14 +1078,13 @@ raidioctl(dev, cmd, data, flag, p) rs->sc_flags |= RAIDF_LABELLING; error = setdisklabel(rs->sc_dkdev.dk_label, - (struct disklabel *)data, 0, rs->sc_dkdev.dk_cpulabel); + (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel); if (error == 0) { if (cmd == DIOCWDINFO) error = writedisklabel(RAIDLABELDEV(dev), raidstrategy, rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel); } - rs->sc_flags &= ~RAIDF_LABELLING; raidunlock(rs); @@ -1302,7 +1095,7 @@ raidioctl(dev, cmd, data, flag, p) case DIOCWLABEL: db1_printf(("DIOCWLABEL\n")); - if (*(int *)data != 0) + if (*(int *) data != 0) rs->sc_flags |= RAIDF_WLABEL; else rs->sc_flags &= ~RAIDF_WLABEL; @@ -1311,55 +1104,55 @@ raidioctl(dev, cmd, data, flag, p) case DIOCGDEFLABEL: db1_printf(("DIOCGDEFLABEL\n")); raidgetdefaultlabel(raidPtrs[unit], rs, - (struct disklabel *)data); + (struct disklabel *) data); break; default: - retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */ + retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */ } - return(retcode); + return (retcode); } -/* raidinit -- complete the rest of the initialization for the +/* raidinit -- complete the rest of the initialization for the RAIDframe device. */ static int -raidinit(dev, raidPtr,unit) - dev_t dev; +raidinit(dev, raidPtr, unit) + dev_t dev; RF_Raid_t *raidPtr; - int unit; + int unit; { - int retcode; - /* int ix; */ - /* struct raidbuf *raidbp; */ + int retcode; + /* int ix; */ + /* struct raidbuf *raidbp; */ struct raid_softc *rs; retcode = 0; rs = &raid_softc[unit]; pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0, - 0, 0, "raidpl", 0, NULL, NULL, M_DEVBUF); + 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME); + - /* XXX should check return code first... */ rs->sc_flags |= RAIDF_INITED; - sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds.*/ + sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */ - rs->sc_dkdev.dk_name = rs->sc_xname; + rs->sc_dkdev.dk_name = rs->sc_xname; /* disk_attach actually creates space for the CPU disklabel, among - other things, so it's critical to call this *BEFORE* we - try putzing with disklabels. */ + * other things, so it's critical to call this *BEFORE* we try putzing + * with disklabels. */ disk_attach(&rs->sc_dkdev); /* XXX There may be a weird interaction here between this, and - protectedSectors, as used in RAIDframe. */ - rs->sc_size = raidPtr->totalSectors; + * protectedSectors, as used in RAIDframe. */ + rs->sc_size = raidPtr->totalSectors; rs->sc_dev = dev; - return(retcode); + return (retcode); } @@ -1368,118 +1161,123 @@ raidinit(dev, raidPtr,unit) * initialization code called at boot time (startup.c) * ********************************************************/ -int rf_boot() +int +rf_boot() { - int i, rc; - - rc = rf_mutex_init(&rf_sparet_wait_mutex); - if (rc) { - RF_PANIC(); - } - rc = rf_mutex_init(&rf_async_done_q_mutex); - if (rc) { - RF_PANIC(); - } - rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; - recon_queue = NULL; - rf_async_done_qh = rf_async_done_qt = NULL; - for (i=0; i<numraid; i++) - raidPtrs[i] = NULL; - rc = rf_BootRaidframe(); - if (rc == 0) - printf("Kernelized RAIDframe activated\n"); - else - rf_kbooted = RFK_BOOT_BAD; - return(rc); -} + int i, rc; + rc = rf_mutex_init(&rf_sparet_wait_mutex); + if (rc) { + RF_PANIC(); + } + rc = rf_mutex_init(&rf_async_done_q_mutex); + if (rc) { + RF_PANIC(); + } + rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; + recon_queue = NULL; + rf_async_done_qh = rf_async_done_qt = NULL; + for (i = 0; i < numraid; i++) + raidPtrs[i] = NULL; + rc = rf_BootRaidframe(); + if (rc == 0) + printf("Kernelized RAIDframe activated\n"); + else + rf_kbooted = RFK_BOOT_BAD; + return (rc); +} /* * This kernel thread never exits. It is created once, and persists * until the system reboots. */ -void rf_ReconKernelThread() +void +rf_ReconKernelThread() { - struct rf_recon_req *req; - int s; - - /* XXX not sure what spl() level we should be at here... probably splbio() */ - s=splbio(); - - while (1) { - /* grab the next reconstruction request from the queue */ - LOCK_RECON_Q_MUTEX(); - while (!recon_queue) { - UNLOCK_RECON_Q_MUTEX(); - tsleep(&recon_queue, PRIBIO | PCATCH, "raidframe recon", 0); - LOCK_RECON_Q_MUTEX(); - } - req = recon_queue; - recon_queue = recon_queue->next; - UNLOCK_RECON_Q_MUTEX(); - - /* - * If flags specifies that we should start recon, this call - * will not return until reconstruction completes, fails, or is aborted. - */ - rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col, - ((req->flags&RF_FDFLAGS_RECON) ? 1 : 0)); - - RF_Free(req, sizeof(*req)); - } + struct rf_recon_req *req; + int s; + + /* XXX not sure what spl() level we should be at here... probably + * splbio() */ + s = splbio(); + + while (1) { + /* grab the next reconstruction request from the queue */ + LOCK_RECON_Q_MUTEX(); + while (!recon_queue) { + UNLOCK_RECON_Q_MUTEX(); + tsleep(&recon_queue, PRIBIO | PCATCH, "raidframe recon", 0); + LOCK_RECON_Q_MUTEX(); + } + req = recon_queue; + recon_queue = recon_queue->next; + UNLOCK_RECON_Q_MUTEX(); + + /* + * If flags specifies that we should start recon, this call + * will not return until reconstruction completes, fails, or is aborted. + */ + rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col, + ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); + + RF_Free(req, sizeof(*req)); + } } /* wake up the daemon & tell it to get us a spare table * XXX - * the entries in the queues should be tagged with the raidPtr + * the entries in the queues should be tagged with the raidPtr * so that in the extremely rare case that two recons happen at once, we know for * which device were requesting a spare table * XXX */ -int rf_GetSpareTableFromDaemon(req) - RF_SparetWait_t *req; +int +rf_GetSpareTableFromDaemon(req) + RF_SparetWait_t *req; { - int retcode; + int retcode; - RF_LOCK_MUTEX(rf_sparet_wait_mutex); - req->next = rf_sparet_wait_queue; - rf_sparet_wait_queue = req; - wakeup(&rf_sparet_wait_queue); + RF_LOCK_MUTEX(rf_sparet_wait_mutex); + req->next = rf_sparet_wait_queue; + rf_sparet_wait_queue = req; + wakeup(&rf_sparet_wait_queue); - /* mpsleep unlocks the mutex */ - while (!rf_sparet_resp_queue) { - tsleep(&rf_sparet_resp_queue, PRIBIO | PCATCH, - "raidframe getsparetable", 0); + /* mpsleep unlocks the mutex */ + while (!rf_sparet_resp_queue) { + tsleep(&rf_sparet_resp_queue, PRIBIO | PCATCH, + "raidframe getsparetable", 0); #if 0 - mpsleep(&rf_sparet_resp_queue, PZERO, "sparet resp", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE); + mpsleep(&rf_sparet_resp_queue, PZERO, "sparet resp", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE); #endif - } - req = rf_sparet_resp_queue; - rf_sparet_resp_queue = req->next; - RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); - - retcode = req->fcol; - RF_Free(req, sizeof(*req)); /* this is not the same req as we alloc'd */ - return(retcode); + } + req = rf_sparet_resp_queue; + rf_sparet_resp_queue = req->next; + RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); + + retcode = req->fcol; + RF_Free(req, sizeof(*req)); /* this is not the same req as we + * alloc'd */ + return (retcode); } - /* a wrapper around rf_DoAccess that extracts appropriate info from the bp & passes it down. * any calls originating in the kernel must use non-blocking I/O * do some extra sanity checking to return "appropriate" error values for * certain conditions (to make some standard utilities work) */ -int rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg) - RF_Raid_t *raidPtr; - struct buf *bp; - RF_RaidAccessFlags_t flags; - void (*cbFunc)(struct buf *); - void *cbArg; +int +rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg) + RF_Raid_t *raidPtr; + struct buf *bp; + RF_RaidAccessFlags_t flags; + void (*cbFunc) (struct buf *); + void *cbArg; { RF_SectorCount_t num_blocks, pb, sum; RF_RaidAddr_t raid_addr; - int retcode; + int retcode; struct partition *pp; - daddr_t blocknum; - int unit; + daddr_t blocknum; + int unit; struct raid_softc *rs; + int do_async; /* XXX The dev_t used here should be for /dev/[r]raid* !!! */ @@ -1487,47 +1285,42 @@ int rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg) rs = &raid_softc[unit]; /* Ok, for the bp we have here, bp->b_blkno is relative to the - partition.. Need to make it absolute to the underlying - device.. */ + * partition.. Need to make it absolute to the underlying device.. */ blocknum = bp->b_blkno; if (DISKPART(bp->b_dev) != RAW_PART) { pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; blocknum += pp->p_offset; - db1_printf(("updated: %d %d\n",DISKPART(bp->b_dev), - pp->p_offset)); + db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev), + pp->p_offset)); } else { db1_printf(("Is raw..\n")); } db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum)); - db1_printf(("bp->b_bcount = %d\n",(int)bp->b_bcount)); - db1_printf(("bp->b_resid = %d\n",(int)bp->b_resid)); + db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount)); + db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid)); - /* *THIS* is where we adjust what block we're going to... but - DO NOT TOUCH bp->b_blkno!!! */ + /* *THIS* is where we adjust what block we're going to... but DO NOT + * TOUCH bp->b_blkno!!! */ raid_addr = blocknum; - + num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector; - pb = (bp->b_bcount&raidPtr->sectorMask) ? 1 : 0; + pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0; sum = raid_addr + num_blocks + pb; if (1 || rf_debugKernelAccess) { - db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n", - (int)raid_addr, (int)sum,(int)num_blocks, - (int)pb,(int)bp->b_resid)); + db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n", + (int) raid_addr, (int) sum, (int) num_blocks, + (int) pb, (int) bp->b_resid)); } - - if ((sum > raidPtr->totalSectors) || (sum < raid_addr) - || (sum < num_blocks) || (sum < pb)) - { + || (sum < num_blocks) || (sum < pb)) { bp->b_error = ENOSPC; bp->b_flags |= B_ERROR; bp->b_resid = bp->b_bcount; biodone(bp); - return(bp->b_error); + return (bp->b_error); } - /* * XXX rf_DoAccess() should do this, not just DoAccessKernel() */ @@ -1537,75 +1330,85 @@ int rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg) bp->b_flags |= B_ERROR; bp->b_resid = bp->b_bcount; biodone(bp); - return(bp->b_error); + return (bp->b_error); } db1_printf(("Calling DoAccess..\n")); - /* don't ever condition on bp->b_flags & B_WRITE. - always condition on B_READ instead */ - retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? - RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, - 0, raid_addr, num_blocks, bp->b_un.b_addr, - bp, NULL, NULL, RF_DAG_NONBLOCKING_IO|flags, - NULL, cbFunc, cbArg); + /* + * XXX For now, all writes are sync + */ + do_async = 1; + if ((bp->b_flags & B_READ) == 0) + do_async = 0; + + /* don't ever condition on bp->b_flags & B_WRITE. always condition on + * B_READ instead */ + retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? + RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, + do_async, raid_addr, num_blocks, + bp->b_un.b_addr, + bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags, + NULL, cbFunc, cbArg); #if 0 - db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n",bp, - bp->b_data,(int)bp->b_resid)); + db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp, + bp->b_data, (int) bp->b_resid)); #endif - return(retcode); -} + /* + * If we requested sync I/O, sleep here. + */ + if ((retcode == 0) && (do_async == 0)) + tsleep(bp, PRIBIO, "raidsyncio", 0); + + return (retcode); +} /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ -int rf_DispatchKernelIO(queue, req) - RF_DiskQueue_t *queue; - RF_DiskQueueData_t *req; +int +rf_DispatchKernelIO(queue, req) + RF_DiskQueue_t *queue; + RF_DiskQueueData_t *req; { - int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; + int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; struct buf *bp; - struct raidbuf *raidbp=NULL; + struct raidbuf *raidbp = NULL; struct raid_softc *rs; - int unit; - + int unit; + /* XXX along with the vnode, we also need the softc associated with - this device.. */ - + * this device.. */ + req->queue = queue; - + unit = queue->raidPtr->raidid; - db1_printf(("DispatchKernelIO unit: %d\n",unit)); + db1_printf(("DispatchKernelIO unit: %d\n", unit)); - if (unit >= numraid) { - printf("Invalid unit number: %d %d\n",unit,numraid); + if (unit >= numraid) { + printf("Invalid unit number: %d %d\n", unit, numraid); panic("Invalid Unit number in rf_DispatchKernelIO\n"); } - rs = &raid_softc[unit]; /* XXX is this the right place? */ - disk_busy(&rs->sc_dkdev); + disk_busy(&rs->sc_dkdev); bp = req->bp; - /* - XXX when there is a physical disk failure, someone is passing - us a buffer that contains old stuff!! Attempt to deal with - this problem without taking a performance hit... - (not sure where the real bug is. It's buried in RAIDframe - somewhere) :-( GO ) - */ + /* XXX when there is a physical disk failure, someone is passing us a + * buffer that contains old stuff!! Attempt to deal with this problem + * without taking a performance hit... (not sure where the real bug + * is. It's buried in RAIDframe somewhere) :-( GO ) */ if (bp->b_flags & B_ERROR) { bp->b_flags &= ~B_ERROR; } - if (bp->b_error!=0) { + if (bp->b_error != 0) { bp->b_error = 0; } - raidbp = RAIDGETBUF(rs); - raidbp->rf_flags = 0; /* XXX not really used anywhere... */ + raidbp->rf_flags = 0; /* XXX not really used anywhere... */ /* * context for raidiodone @@ -1614,14 +1417,12 @@ int rf_DispatchKernelIO(queue, req) raidbp->req = req; switch (req->type) { - case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ - /* - Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n", - queue->row, queue->col); - */ + case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ + /* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n", + * queue->row, queue->col); */ /* XXX need to do something extra here.. */ - /* I'm leaving this in, as I've never actually seen it - used, and I'd like folks to report it... GO */ + /* I'm leaving this in, as I've never actually seen it used, + * and I'd like folks to report it... GO */ printf(("WAKEUP CALLED\n")); queue->numOutstanding++; @@ -1629,157 +1430,150 @@ int rf_DispatchKernelIO(queue, req) KernelWakeupFunc(&raidbp->rf_buf); break; - + case RF_IO_TYPE_READ: case RF_IO_TYPE_WRITE: - + if (req->tracerec) { RF_ETIMER_START(req->tracerec->timer); } - - - InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp, - op | bp->b_flags, queue->rf_cinfo->ci_dev, - req->sectorOffset, req->numSector, - req->buf, KernelWakeupFunc, (void *) req, - queue->raidPtr->logBytesPerSector, req->b_proc); + InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp, + op | bp->b_flags, queue->rf_cinfo->ci_dev, + req->sectorOffset, req->numSector, + req->buf, KernelWakeupFunc, (void *) req, + queue->raidPtr->logBytesPerSector, req->b_proc); if (rf_debugKernelAccess) { - db1_printf(("dispatch: bp->b_blkno = %ld\n", - (long) bp->b_blkno)); + db1_printf(("dispatch: bp->b_blkno = %ld\n", + (long) bp->b_blkno)); } queue->numOutstanding++; queue->last_deq_sector = req->sectorOffset; - /* acc wouldn't have been let in if there were any - pending reqs at any other priority */ + /* acc wouldn't have been let in if there were any pending + * reqs at any other priority */ queue->curPriority = req->priority; - /* - Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n", - req->type, queue->row, queue->col); - */ + /* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n", + * req->type, queue->row, queue->col); */ db1_printf(("Going for %c to unit %d row %d col %d\n", - req->type, unit, queue->row, queue->col)); + req->type, unit, queue->row, queue->col)); db1_printf(("sector %d count %d (%d bytes) %d\n", - (int) req->sectorOffset, (int) req->numSector, - (int) (req->numSector << - queue->raidPtr->logBytesPerSector), - (int) queue->raidPtr->logBytesPerSector)); + (int) req->sectorOffset, (int) req->numSector, + (int) (req->numSector << + queue->raidPtr->logBytesPerSector), + (int) queue->raidPtr->logBytesPerSector)); if ((raidbp->rf_buf.b_flags & B_READ) == 0) { raidbp->rf_buf.b_vp->v_numoutput++; } - VOP_STRATEGY(&raidbp->rf_buf); - + break; - + default: panic("bad req->type in rf_DispatchKernelIO"); } db1_printf(("Exiting from DispatchKernelIO\n")); - return(0); + return (0); } - -/* this is the callback function associated with a I/O invoked from +/* this is the callback function associated with a I/O invoked from kernel code. */ -static void KernelWakeupFunc(vbp) - struct buf *vbp; +static void +KernelWakeupFunc(vbp) + struct buf *vbp; { - RF_DiskQueueData_t *req = NULL; - RF_DiskQueue_t *queue; - struct raidbuf *raidbp = (struct raidbuf *)vbp; - struct buf *bp; - struct raid_softc *rs; - int unit; - register int s; - - s=splbio(); /* XXX */ - db1_printf(("recovering the request queue:\n")); - req = raidbp->req; - - bp = raidbp->rf_obp; + RF_DiskQueueData_t *req = NULL; + RF_DiskQueue_t *queue; + struct raidbuf *raidbp = (struct raidbuf *) vbp; + struct buf *bp; + struct raid_softc *rs; + int unit; + register int s; + + s = splbio(); /* XXX */ + db1_printf(("recovering the request queue:\n")); + req = raidbp->req; + + bp = raidbp->rf_obp; #if 0 - db1_printf(("bp=0x%x\n",bp)); + db1_printf(("bp=0x%x\n", bp)); #endif - queue = (RF_DiskQueue_t *) req->queue; + queue = (RF_DiskQueue_t *) req->queue; - if (raidbp->rf_buf.b_flags & B_ERROR) { + if (raidbp->rf_buf.b_flags & B_ERROR) { #if 0 - printf("Setting bp->b_flags!!! %d\n",raidbp->rf_buf.b_error); + printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error); #endif - bp->b_flags |= B_ERROR; - bp->b_error = raidbp->rf_buf.b_error ? - raidbp->rf_buf.b_error : EIO; - } - + bp->b_flags |= B_ERROR; + bp->b_error = raidbp->rf_buf.b_error ? + raidbp->rf_buf.b_error : EIO; + } #if 0 - db1_printf(("raidbp->rf_buf.b_bcount=%d\n",(int)raidbp->rf_buf.b_bcount)); - db1_printf(("raidbp->rf_buf.b_bufsize=%d\n",(int)raidbp->rf_buf.b_bufsize)); - db1_printf(("raidbp->rf_buf.b_resid=%d\n",(int)raidbp->rf_buf.b_resid)); - db1_printf(("raidbp->rf_buf.b_data=0x%x\n",raidbp->rf_buf.b_data)); + db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount)); + db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize)); + db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid)); + db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data)); #endif - /* XXX methinks this could be wrong... */ + /* XXX methinks this could be wrong... */ #if 1 - bp->b_resid = raidbp->rf_buf.b_resid; + bp->b_resid = raidbp->rf_buf.b_resid; #endif - if (req->tracerec) { - RF_ETIMER_STOP(req->tracerec->timer); - RF_ETIMER_EVAL(req->tracerec->timer); - RF_LOCK_MUTEX(rf_tracing_mutex); - req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); - req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); - req->tracerec->num_phys_ios++; - RF_UNLOCK_MUTEX(rf_tracing_mutex); - } - - bp->b_bcount = raidbp->rf_buf.b_bcount;/* XXXX ?? */ + if (req->tracerec) { + RF_ETIMER_STOP(req->tracerec->timer); + RF_ETIMER_EVAL(req->tracerec->timer); + RF_LOCK_MUTEX(rf_tracing_mutex); + req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); + req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); + req->tracerec->num_phys_ios++; + RF_UNLOCK_MUTEX(rf_tracing_mutex); + } + bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */ - unit = queue->raidPtr->raidid; /* *Much* simpler :-> */ + unit = queue->raidPtr->raidid; /* *Much* simpler :-> */ - /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go ballistic, - and mark the component as hosed... */ + /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go + * ballistic, and mark the component as hosed... */ #if 1 - if (bp->b_flags&B_ERROR) { - /* Mark the disk as dead */ - /* but only mark it once... */ - if (queue->raidPtr->Disks[queue->row][queue->col].status == - rf_ds_optimal) { - printf("raid%d: IO Error. Marking %s as failed.\n", - unit, queue->raidPtr->Disks[queue->row][queue->col].devname ); - queue->raidPtr->Disks[queue->row][queue->col].status = - rf_ds_failed; - queue->raidPtr->status[queue->row] = rf_rs_degraded; - queue->raidPtr->numFailures++; - } else { /* Disk is already dead... */ - /* printf("Disk already marked as dead!\n"); */ - } - - } + if (bp->b_flags & B_ERROR) { + /* Mark the disk as dead */ + /* but only mark it once... */ + if (queue->raidPtr->Disks[queue->row][queue->col].status == + rf_ds_optimal) { + printf("raid%d: IO Error. Marking %s as failed.\n", + unit, queue->raidPtr->Disks[queue->row][queue->col].devname); + queue->raidPtr->Disks[queue->row][queue->col].status = + rf_ds_failed; + queue->raidPtr->status[queue->row] = rf_rs_degraded; + queue->raidPtr->numFailures++; + } else { /* Disk is already dead... */ + /* printf("Disk already marked as dead!\n"); */ + } + + } #endif - rs = &raid_softc[unit]; - RAIDPUTBUF(rs,raidbp); + rs = &raid_softc[unit]; + RAIDPUTBUF(rs, raidbp); - if (bp->b_resid==0) { - db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n", - unit, bp->b_resid, bp->b_bcount)); - /* XXX is this the right place for a disk_unbusy()??!??!?!? */ - disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid)); - } else { - db1_printf(("b_resid is still %ld\n",bp->b_resid)); - } + if (bp->b_resid == 0) { + db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n", + unit, bp->b_resid, bp->b_bcount)); + /* XXX is this the right place for a disk_unbusy()??!??!?!? */ + disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid)); + } else { + db1_printf(("b_resid is still %ld\n", bp->b_resid)); + } - rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0); - (req->CompleteFunc)(req->argument, (bp->b_flags & B_ERROR) ? 1 : 0); - /* printf("Exiting KernelWakeupFunc\n"); */ + rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0); + (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0); + /* printf("Exiting KernelWakeupFunc\n"); */ - splx(s); /* XXX */ + splx(s); /* XXX */ } @@ -1787,62 +1581,62 @@ static void KernelWakeupFunc(vbp) /* * initialize a buf structure for doing an I/O in the kernel. */ -static void InitBP( - struct buf *bp, - struct vnode *b_vp, - unsigned rw_flag, - dev_t dev, - RF_SectorNum_t startSect, - RF_SectorCount_t numSect, - caddr_t buf, - void (*cbFunc)(struct buf *), - void *cbArg, - int logBytesPerSector, - struct proc *b_proc) +static void +InitBP( + struct buf * bp, + struct vnode * b_vp, + unsigned rw_flag, + dev_t dev, + RF_SectorNum_t startSect, + RF_SectorCount_t numSect, + caddr_t buf, + void (*cbFunc) (struct buf *), + void *cbArg, + int logBytesPerSector, + struct proc * b_proc) { - /* bp->b_flags = B_PHYS | rw_flag; */ - bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */ - bp->b_bcount = numSect << logBytesPerSector; - bp->b_bufsize = bp->b_bcount; - bp->b_error = 0; - bp->b_dev = dev; + /* bp->b_flags = B_PHYS | rw_flag; */ + bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */ + bp->b_bcount = numSect << logBytesPerSector; + bp->b_bufsize = bp->b_bcount; + bp->b_error = 0; + bp->b_dev = dev; db1_printf(("bp->b_dev is %d\n", dev)); - bp->b_un.b_addr = buf; + bp->b_un.b_addr = buf; #if 0 - db1_printf(("bp->b_data=0x%x\n",bp->b_data)); + db1_printf(("bp->b_data=0x%x\n", bp->b_data)); #endif - bp->b_blkno = startSect; - bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ - db1_printf(("b_bcount is: %d\n",(int)bp->b_bcount)); + bp->b_blkno = startSect; + bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ + db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount)); if (bp->b_bcount == 0) { panic("bp->b_bcount is zero in InitBP!!\n"); } - bp->b_proc = b_proc; - bp->b_iodone = cbFunc; - bp->b_vp = b_vp; - -} -#endif /* KERNEL */ + bp->b_proc = b_proc; + bp->b_iodone = cbFunc; + bp->b_vp = b_vp; +} /* Extras... */ -unsigned int rpcc() +unsigned int +rpcc() { - /* XXX no clue what this is supposed to do.. my guess is - that it's supposed to read the CPU cycle counter... */ - /* db1_printf("this is supposed to do something useful too!??\n"); */ - return(0); + /* XXX no clue what this is supposed to do.. my guess is that it's + * supposed to read the CPU cycle counter... */ + /* db1_printf("this is supposed to do something useful too!??\n"); */ + return (0); } - #if 0 -int rf_GetSpareTableFromDaemon(req) - RF_SparetWait_t *req; +int +rf_GetSpareTableFromDaemon(req) + RF_SparetWait_t *req; { - int retcode=1; - printf("This is supposed to do something useful!!\n"); /* XXX */ - - return(retcode); + int retcode = 1; + printf("This is supposed to do something useful!!\n"); /* XXX */ + + return (retcode); } #endif @@ -1865,7 +1659,7 @@ raidgetdefaultlabel(raidPtr, rs, lp) lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; strncpy(lp->d_typename, "raid", sizeof(lp->d_typename)); - lp->d_type = DTYPE_RAID; + lp->d_type = DTYPE_RAID; strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); lp->d_rpm = 3600; lp->d_interleave = 1; @@ -1881,18 +1675,17 @@ raidgetdefaultlabel(raidPtr, rs, lp) lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label); } - /* * Read the disklabel from the raid device. If one is not present, fake one * up. */ static void raidgetdisklabel(dev) - dev_t dev; + dev_t dev; { - int unit = raidunit(dev); + int unit = raidunit(dev); struct raid_softc *rs = &raid_softc[unit]; - char *errstring; + char *errstring; struct disklabel *lp = rs->sc_dkdev.dk_label; struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel; RF_Raid_t *raidPtr; @@ -1910,10 +1703,10 @@ raidgetdisklabel(dev) */ errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy, rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel); - if (errstring) + if (errstring) raidmakedisklabel(rs); else { - int i; + int i; struct partition *pp; /* @@ -1939,7 +1732,6 @@ raidgetdisklabel(dev) } } - /* * Take care of things one might want to take care of in the event * that a disklabel isn't present. @@ -1962,53 +1754,51 @@ raidmakedisklabel(rs) lp->d_checksum = dkcksum(lp); } - /* * Lookup the provided name in the filesystem. If the file exists, * is a valid block device, and isn't being used by anyone else, * set *vpp to the file's vnode. - * You'll find the original of this in ccd.c + * You'll find the original of this in ccd.c */ int raidlookup(path, p, vpp) - char *path; + char *path; struct proc *p; struct vnode **vpp; /* result */ { struct nameidata nd; struct vnode *vp; struct vattr va; - int error; + int error; NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p); - if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) { + if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) { #ifdef DEBUG - printf("RAIDframe: vn_open returned %d\n",error); + printf("RAIDframe: vn_open returned %d\n", error); #endif return (error); } vp = nd.ni_vp; if (vp->v_usecount > 1) { VOP_UNLOCK(vp, 0); - (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p); + (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); return (EBUSY); } if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) { VOP_UNLOCK(vp, 0); - (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p); + (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); return (error); } /* XXX: eventually we should handle VREG, too. */ if (va.va_type != VBLK) { VOP_UNLOCK(vp, 0); - (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p); + (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); return (ENOTBLK); } VOP_UNLOCK(vp, 0); *vpp = vp; return (0); } - /* * Wait interruptibly for an exclusive lock. * @@ -2020,18 +1810,17 @@ static int raidlock(rs) struct raid_softc *rs; { - int error; + int error; while ((rs->sc_flags & RAIDF_LOCKED) != 0) { rs->sc_flags |= RAIDF_WANTED; - if ((error = - tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0) + if ((error = + tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0) return (error); } rs->sc_flags |= RAIDF_LOCKED; return (0); } - /* * Unlock and wake up any waiters. */ diff --git a/sys/dev/raidframe/rf_nwayxor.c b/sys/dev/raidframe/rf_nwayxor.c index c319aa04c52..465827aeabc 100644 --- a/sys/dev/raidframe/rf_nwayxor.c +++ b/sys/dev/raidframe/rf_nwayxor.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_nwayxor.c,v 1.1 1999/01/11 14:29:31 niklas Exp $ */ -/* $NetBSD: rf_nwayxor.c,v 1.1 1998/11/13 04:20:31 oster Exp $ */ +/* $OpenBSD: rf_nwayxor.c,v 1.2 1999/02/16 00:03:00 niklas Exp $ */ +/* $NetBSD: rf_nwayxor.c,v 1.3 1999/02/05 00:06:13 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -36,149 +36,137 @@ * ************************************************************/ -/* : - * Log: rf_nwayxor.c,v - * Revision 1.6 1996/06/12 03:31:18 jimz - * only print call counts if rf_showXorCallCounts != 0 - * - * Revision 1.5 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.4 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.3 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.2 1995/12/01 19:29:14 root - * added copyright info - * - */ - #include "rf_nwayxor.h" #include "rf_shutdown.h" static int callcount[10]; static void rf_ShutdownNWayXor(void *); -static void rf_ShutdownNWayXor(ignored) - void *ignored; +static void +rf_ShutdownNWayXor(ignored) + void *ignored; { - int i; - - if (rf_showXorCallCounts == 0) - return; - printf("Call counts for n-way xor routines: "); - for (i=0; i<10; i++) - printf("%d ",callcount[i]); - printf("\n"); + int i; + + if (rf_showXorCallCounts == 0) + return; + printf("Call counts for n-way xor routines: "); + for (i = 0; i < 10; i++) + printf("%d ", callcount[i]); + printf("\n"); } -int rf_ConfigureNWayXor(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureNWayXor(listp) + RF_ShutdownList_t **listp; { - int i, rc; + int i, rc; - for (i=0; i<10; i++) - callcount[i] = 0; - rc = rf_ShutdownCreate(listp, rf_ShutdownNWayXor, NULL); - return(rc); + for (i = 0; i < 10; i++) + callcount[i] = 0; + rc = rf_ShutdownCreate(listp, rf_ShutdownNWayXor, NULL); + return (rc); } -void rf_nWayXor1(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; +void +rf_nWayXor1(src_rbs, dest_rb, len) + RF_ReconBuffer_t **src_rbs; + RF_ReconBuffer_t *dest_rb; + int len; { - register unsigned long *src = (unsigned long *) src_rbs[0]->buffer; - register unsigned long *dest= (unsigned long *) dest_rb->buffer; - register unsigned long *end = src+len; - register unsigned long d0, d1, d2, d3, s0, s1, s2, s3; - - callcount[1]++; - while (len >= 4 ) - { - d0 = dest[0]; - d1 = dest[1]; - d2 = dest[2]; - d3 = dest[3]; - s0 = src[0]; - s1 = src[1]; - s2 = src[2]; - s3 = src[3]; - dest[0] = d0 ^ s0; - dest[1] = d1 ^ s1; - dest[2] = d2 ^ s2; - dest[3] = d3 ^ s3; - src += 4; - dest += 4; - len -= 4; - } - while (src < end) {*dest++ ^= *src++;} + register unsigned long *src = (unsigned long *) src_rbs[0]->buffer; + register unsigned long *dest = (unsigned long *) dest_rb->buffer; + register unsigned long *end = src + len; + register unsigned long d0, d1, d2, d3, s0, s1, s2, s3; + + callcount[1]++; + while (len >= 4) { + d0 = dest[0]; + d1 = dest[1]; + d2 = dest[2]; + d3 = dest[3]; + s0 = src[0]; + s1 = src[1]; + s2 = src[2]; + s3 = src[3]; + dest[0] = d0 ^ s0; + dest[1] = d1 ^ s1; + dest[2] = d2 ^ s2; + dest[3] = d3 ^ s3; + src += 4; + dest += 4; + len -= 4; + } + while (src < end) { + *dest++ ^= *src++; + } } -void rf_nWayXor2(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; +void +rf_nWayXor2(src_rbs, dest_rb, len) + RF_ReconBuffer_t **src_rbs; + RF_ReconBuffer_t *dest_rb; + int len; { - register unsigned long *dst = (unsigned long *) dest_rb->buffer; - register unsigned long *a = dst; - register unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - register unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - unsigned long a0,a1,a2,a3, b0,b1,b2,b3; - - callcount[2]++; - /* align dest to cache line */ - while ((((unsigned long) dst) & 0x1f)) - { - *dst++ = *a++ ^ *b++ ^ *c++; - len--; - } - while (len > 4 ) - { - a0 = a[0]; len -= 4; - - a1 = a[1]; - a2 = a[2]; - - a3 = a[3]; a += 4; - - b0 = b[0]; - b1 = b[1]; - - b2 = b[2]; - b3 = b[3]; - /* start dual issue */ - a0 ^= b0; b0 = c[0]; - - b += 4; a1 ^= b1; - - a2 ^= b2; a3 ^= b3; - - b1 = c[1]; a0 ^= b0; - - b2 = c[2]; a1 ^= b1; - - b3 = c[3]; a2 ^= b2; - - dst[0] = a0; a3 ^= b3; - dst[1] = a1; c += 4; - dst[2] = a2; - dst[3] = a3; dst += 4; - } - while (len) - { - *dst++ = *a++ ^ *b++ ^ *c++; - len--; - } + register unsigned long *dst = (unsigned long *) dest_rb->buffer; + register unsigned long *a = dst; + register unsigned long *b = (unsigned long *) src_rbs[0]->buffer; + register unsigned long *c = (unsigned long *) src_rbs[1]->buffer; + unsigned long a0, a1, a2, a3, b0, b1, b2, b3; + + callcount[2]++; + /* align dest to cache line */ + while ((((unsigned long) dst) & 0x1f)) { + *dst++ = *a++ ^ *b++ ^ *c++; + len--; + } + while (len > 4) { + a0 = a[0]; + len -= 4; + + a1 = a[1]; + a2 = a[2]; + + a3 = a[3]; + a += 4; + + b0 = b[0]; + b1 = b[1]; + + b2 = b[2]; + b3 = b[3]; + /* start dual issue */ + a0 ^= b0; + b0 = c[0]; + + b += 4; + a1 ^= b1; + + a2 ^= b2; + a3 ^= b3; + + b1 = c[1]; + a0 ^= b0; + + b2 = c[2]; + a1 ^= b1; + + b3 = c[3]; + a2 ^= b2; + + dst[0] = a0; + a3 ^= b3; + dst[1] = a1; + c += 4; + dst[2] = a2; + dst[3] = a3; + dst += 4; + } + while (len) { + *dst++ = *a++ ^ *b++ ^ *c++; + len--; + } } - /* note that first arg is not incremented but 2nd arg is */ #define LOAD_FIRST(_dst,_b) \ a0 = _dst[0]; len -= 4; \ @@ -207,248 +195,255 @@ void rf_nWayXor2(src_rbs, dest_rb, len) _dst += 4; -void rf_nWayXor3(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; +void +rf_nWayXor3(src_rbs, dest_rb, len) + RF_ReconBuffer_t **src_rbs; + RF_ReconBuffer_t *dest_rb; + int len; { - register unsigned long *dst = (unsigned long *) dest_rb->buffer; - register unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - register unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - register unsigned long *d = (unsigned long *) src_rbs[2]->buffer; - unsigned long a0,a1,a2,a3, b0,b1,b2,b3; - - callcount[3]++; - /* align dest to cache line */ - while ((((unsigned long) dst) & 0x1f)) { - *dst++ ^= *b++ ^ *c++ ^ *d++; - len--; - } - while (len > 4 ) { - LOAD_FIRST(dst,b); - XOR_AND_LOAD_NEXT(c); - XOR_AND_LOAD_NEXT(d); - XOR_AND_STORE(dst); - } - while (len) { - *dst++ ^= *b++ ^ *c++ ^ *d++; - len--; - } + register unsigned long *dst = (unsigned long *) dest_rb->buffer; + register unsigned long *b = (unsigned long *) src_rbs[0]->buffer; + register unsigned long *c = (unsigned long *) src_rbs[1]->buffer; + register unsigned long *d = (unsigned long *) src_rbs[2]->buffer; + unsigned long a0, a1, a2, a3, b0, b1, b2, b3; + + callcount[3]++; + /* align dest to cache line */ + while ((((unsigned long) dst) & 0x1f)) { + *dst++ ^= *b++ ^ *c++ ^ *d++; + len--; + } + while (len > 4) { + LOAD_FIRST(dst, b); + XOR_AND_LOAD_NEXT(c); + XOR_AND_LOAD_NEXT(d); + XOR_AND_STORE(dst); + } + while (len) { + *dst++ ^= *b++ ^ *c++ ^ *d++; + len--; + } } -void rf_nWayXor4(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; +void +rf_nWayXor4(src_rbs, dest_rb, len) + RF_ReconBuffer_t **src_rbs; + RF_ReconBuffer_t *dest_rb; + int len; { - register unsigned long *dst = (unsigned long *) dest_rb->buffer; - register unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - register unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - register unsigned long *d = (unsigned long *) src_rbs[2]->buffer; - register unsigned long *e = (unsigned long *) src_rbs[3]->buffer; - unsigned long a0,a1,a2,a3, b0,b1,b2,b3; - - callcount[4]++; - /* align dest to cache line */ - while ((((unsigned long) dst) & 0x1f)) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++; - len--; - } - while (len > 4 ) { - LOAD_FIRST(dst,b); - XOR_AND_LOAD_NEXT(c); - XOR_AND_LOAD_NEXT(d); - XOR_AND_LOAD_NEXT(e); - XOR_AND_STORE(dst); - } - while (len) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++; - len--; - } + register unsigned long *dst = (unsigned long *) dest_rb->buffer; + register unsigned long *b = (unsigned long *) src_rbs[0]->buffer; + register unsigned long *c = (unsigned long *) src_rbs[1]->buffer; + register unsigned long *d = (unsigned long *) src_rbs[2]->buffer; + register unsigned long *e = (unsigned long *) src_rbs[3]->buffer; + unsigned long a0, a1, a2, a3, b0, b1, b2, b3; + + callcount[4]++; + /* align dest to cache line */ + while ((((unsigned long) dst) & 0x1f)) { + *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++; + len--; + } + while (len > 4) { + LOAD_FIRST(dst, b); + XOR_AND_LOAD_NEXT(c); + XOR_AND_LOAD_NEXT(d); + XOR_AND_LOAD_NEXT(e); + XOR_AND_STORE(dst); + } + while (len) { + *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++; + len--; + } } -void rf_nWayXor5(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; +void +rf_nWayXor5(src_rbs, dest_rb, len) + RF_ReconBuffer_t **src_rbs; + RF_ReconBuffer_t *dest_rb; + int len; { - register unsigned long *dst = (unsigned long *) dest_rb->buffer; - register unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - register unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - register unsigned long *d = (unsigned long *) src_rbs[2]->buffer; - register unsigned long *e = (unsigned long *) src_rbs[3]->buffer; - register unsigned long *f = (unsigned long *) src_rbs[4]->buffer; - unsigned long a0,a1,a2,a3, b0,b1,b2,b3; - - callcount[5]++; - /* align dest to cache line */ - while ((((unsigned long) dst) & 0x1f)) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++; - len--; - } - while (len > 4 ) { - LOAD_FIRST(dst,b); - XOR_AND_LOAD_NEXT(c); - XOR_AND_LOAD_NEXT(d); - XOR_AND_LOAD_NEXT(e); - XOR_AND_LOAD_NEXT(f); - XOR_AND_STORE(dst); - } - while (len) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++; - len--; - } + register unsigned long *dst = (unsigned long *) dest_rb->buffer; + register unsigned long *b = (unsigned long *) src_rbs[0]->buffer; + register unsigned long *c = (unsigned long *) src_rbs[1]->buffer; + register unsigned long *d = (unsigned long *) src_rbs[2]->buffer; + register unsigned long *e = (unsigned long *) src_rbs[3]->buffer; + register unsigned long *f = (unsigned long *) src_rbs[4]->buffer; + unsigned long a0, a1, a2, a3, b0, b1, b2, b3; + + callcount[5]++; + /* align dest to cache line */ + while ((((unsigned long) dst) & 0x1f)) { + *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++; + len--; + } + while (len > 4) { + LOAD_FIRST(dst, b); + XOR_AND_LOAD_NEXT(c); + XOR_AND_LOAD_NEXT(d); + XOR_AND_LOAD_NEXT(e); + XOR_AND_LOAD_NEXT(f); + XOR_AND_STORE(dst); + } + while (len) { + *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++; + len--; + } } -void rf_nWayXor6(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; +void +rf_nWayXor6(src_rbs, dest_rb, len) + RF_ReconBuffer_t **src_rbs; + RF_ReconBuffer_t *dest_rb; + int len; { - register unsigned long *dst = (unsigned long *) dest_rb->buffer; - register unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - register unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - register unsigned long *d = (unsigned long *) src_rbs[2]->buffer; - register unsigned long *e = (unsigned long *) src_rbs[3]->buffer; - register unsigned long *f = (unsigned long *) src_rbs[4]->buffer; - register unsigned long *g = (unsigned long *) src_rbs[5]->buffer; - unsigned long a0,a1,a2,a3, b0,b1,b2,b3; - - callcount[6]++; - /* align dest to cache line */ - while ((((unsigned long) dst) & 0x1f)) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++; - len--; - } - while (len > 4 ) { - LOAD_FIRST(dst,b); - XOR_AND_LOAD_NEXT(c); - XOR_AND_LOAD_NEXT(d); - XOR_AND_LOAD_NEXT(e); - XOR_AND_LOAD_NEXT(f); - XOR_AND_LOAD_NEXT(g); - XOR_AND_STORE(dst); - } - while (len) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++; - len--; - } + register unsigned long *dst = (unsigned long *) dest_rb->buffer; + register unsigned long *b = (unsigned long *) src_rbs[0]->buffer; + register unsigned long *c = (unsigned long *) src_rbs[1]->buffer; + register unsigned long *d = (unsigned long *) src_rbs[2]->buffer; + register unsigned long *e = (unsigned long *) src_rbs[3]->buffer; + register unsigned long *f = (unsigned long *) src_rbs[4]->buffer; + register unsigned long *g = (unsigned long *) src_rbs[5]->buffer; + unsigned long a0, a1, a2, a3, b0, b1, b2, b3; + + callcount[6]++; + /* align dest to cache line */ + while ((((unsigned long) dst) & 0x1f)) { + *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++; + len--; + } + while (len > 4) { + LOAD_FIRST(dst, b); + XOR_AND_LOAD_NEXT(c); + XOR_AND_LOAD_NEXT(d); + XOR_AND_LOAD_NEXT(e); + XOR_AND_LOAD_NEXT(f); + XOR_AND_LOAD_NEXT(g); + XOR_AND_STORE(dst); + } + while (len) { + *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++; + len--; + } } -void rf_nWayXor7(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; +void +rf_nWayXor7(src_rbs, dest_rb, len) + RF_ReconBuffer_t **src_rbs; + RF_ReconBuffer_t *dest_rb; + int len; { - register unsigned long *dst = (unsigned long *) dest_rb->buffer; - register unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - register unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - register unsigned long *d = (unsigned long *) src_rbs[2]->buffer; - register unsigned long *e = (unsigned long *) src_rbs[3]->buffer; - register unsigned long *f = (unsigned long *) src_rbs[4]->buffer; - register unsigned long *g = (unsigned long *) src_rbs[5]->buffer; - register unsigned long *h = (unsigned long *) src_rbs[6]->buffer; - unsigned long a0,a1,a2,a3, b0,b1,b2,b3; - - callcount[7]++; - /* align dest to cache line */ - while ((((unsigned long) dst) & 0x1f)) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++; - len--; - } - while (len > 4 ) { - LOAD_FIRST(dst,b); - XOR_AND_LOAD_NEXT(c); - XOR_AND_LOAD_NEXT(d); - XOR_AND_LOAD_NEXT(e); - XOR_AND_LOAD_NEXT(f); - XOR_AND_LOAD_NEXT(g); - XOR_AND_LOAD_NEXT(h); - XOR_AND_STORE(dst); - } - while (len) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++; - len--; - } + register unsigned long *dst = (unsigned long *) dest_rb->buffer; + register unsigned long *b = (unsigned long *) src_rbs[0]->buffer; + register unsigned long *c = (unsigned long *) src_rbs[1]->buffer; + register unsigned long *d = (unsigned long *) src_rbs[2]->buffer; + register unsigned long *e = (unsigned long *) src_rbs[3]->buffer; + register unsigned long *f = (unsigned long *) src_rbs[4]->buffer; + register unsigned long *g = (unsigned long *) src_rbs[5]->buffer; + register unsigned long *h = (unsigned long *) src_rbs[6]->buffer; + unsigned long a0, a1, a2, a3, b0, b1, b2, b3; + + callcount[7]++; + /* align dest to cache line */ + while ((((unsigned long) dst) & 0x1f)) { + *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++; + len--; + } + while (len > 4) { + LOAD_FIRST(dst, b); + XOR_AND_LOAD_NEXT(c); + XOR_AND_LOAD_NEXT(d); + XOR_AND_LOAD_NEXT(e); + XOR_AND_LOAD_NEXT(f); + XOR_AND_LOAD_NEXT(g); + XOR_AND_LOAD_NEXT(h); + XOR_AND_STORE(dst); + } + while (len) { + *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++; + len--; + } } -void rf_nWayXor8(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; +void +rf_nWayXor8(src_rbs, dest_rb, len) + RF_ReconBuffer_t **src_rbs; + RF_ReconBuffer_t *dest_rb; + int len; { - register unsigned long *dst = (unsigned long *) dest_rb->buffer; - register unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - register unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - register unsigned long *d = (unsigned long *) src_rbs[2]->buffer; - register unsigned long *e = (unsigned long *) src_rbs[3]->buffer; - register unsigned long *f = (unsigned long *) src_rbs[4]->buffer; - register unsigned long *g = (unsigned long *) src_rbs[5]->buffer; - register unsigned long *h = (unsigned long *) src_rbs[6]->buffer; - register unsigned long *i = (unsigned long *) src_rbs[7]->buffer; - unsigned long a0,a1,a2,a3, b0,b1,b2,b3; - - callcount[8]++; - /* align dest to cache line */ - while ((((unsigned long) dst) & 0x1f)) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++; - len--; - } - while (len > 4 ) { - LOAD_FIRST(dst,b); - XOR_AND_LOAD_NEXT(c); - XOR_AND_LOAD_NEXT(d); - XOR_AND_LOAD_NEXT(e); - XOR_AND_LOAD_NEXT(f); - XOR_AND_LOAD_NEXT(g); - XOR_AND_LOAD_NEXT(h); - XOR_AND_LOAD_NEXT(i); - XOR_AND_STORE(dst); - } - while (len) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++; - len--; - } + register unsigned long *dst = (unsigned long *) dest_rb->buffer; + register unsigned long *b = (unsigned long *) src_rbs[0]->buffer; + register unsigned long *c = (unsigned long *) src_rbs[1]->buffer; + register unsigned long *d = (unsigned long *) src_rbs[2]->buffer; + register unsigned long *e = (unsigned long *) src_rbs[3]->buffer; + register unsigned long *f = (unsigned long *) src_rbs[4]->buffer; + register unsigned long *g = (unsigned long *) src_rbs[5]->buffer; + register unsigned long *h = (unsigned long *) src_rbs[6]->buffer; + register unsigned long *i = (unsigned long *) src_rbs[7]->buffer; + unsigned long a0, a1, a2, a3, b0, b1, b2, b3; + + callcount[8]++; + /* align dest to cache line */ + while ((((unsigned long) dst) & 0x1f)) { + *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++; + len--; + } + while (len > 4) { + LOAD_FIRST(dst, b); + XOR_AND_LOAD_NEXT(c); + XOR_AND_LOAD_NEXT(d); + XOR_AND_LOAD_NEXT(e); + XOR_AND_LOAD_NEXT(f); + XOR_AND_LOAD_NEXT(g); + XOR_AND_LOAD_NEXT(h); + XOR_AND_LOAD_NEXT(i); + XOR_AND_STORE(dst); + } + while (len) { + *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++; + len--; + } } -void rf_nWayXor9(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; +void +rf_nWayXor9(src_rbs, dest_rb, len) + RF_ReconBuffer_t **src_rbs; + RF_ReconBuffer_t *dest_rb; + int len; { - register unsigned long *dst = (unsigned long *) dest_rb->buffer; - register unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - register unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - register unsigned long *d = (unsigned long *) src_rbs[2]->buffer; - register unsigned long *e = (unsigned long *) src_rbs[3]->buffer; - register unsigned long *f = (unsigned long *) src_rbs[4]->buffer; - register unsigned long *g = (unsigned long *) src_rbs[5]->buffer; - register unsigned long *h = (unsigned long *) src_rbs[6]->buffer; - register unsigned long *i = (unsigned long *) src_rbs[7]->buffer; - register unsigned long *j = (unsigned long *) src_rbs[8]->buffer; - unsigned long a0,a1,a2,a3, b0,b1,b2,b3; - - callcount[9]++; - /* align dest to cache line */ - while ((((unsigned long) dst) & 0x1f)) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++; - len--; - } - while (len > 4 ) { - LOAD_FIRST(dst,b); - XOR_AND_LOAD_NEXT(c); - XOR_AND_LOAD_NEXT(d); - XOR_AND_LOAD_NEXT(e); - XOR_AND_LOAD_NEXT(f); - XOR_AND_LOAD_NEXT(g); - XOR_AND_LOAD_NEXT(h); - XOR_AND_LOAD_NEXT(i); - XOR_AND_LOAD_NEXT(j); - XOR_AND_STORE(dst); - } - while (len) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++; - len--; - } + register unsigned long *dst = (unsigned long *) dest_rb->buffer; + register unsigned long *b = (unsigned long *) src_rbs[0]->buffer; + register unsigned long *c = (unsigned long *) src_rbs[1]->buffer; + register unsigned long *d = (unsigned long *) src_rbs[2]->buffer; + register unsigned long *e = (unsigned long *) src_rbs[3]->buffer; + register unsigned long *f = (unsigned long *) src_rbs[4]->buffer; + register unsigned long *g = (unsigned long *) src_rbs[5]->buffer; + register unsigned long *h = (unsigned long *) src_rbs[6]->buffer; + register unsigned long *i = (unsigned long *) src_rbs[7]->buffer; + register unsigned long *j = (unsigned long *) src_rbs[8]->buffer; + unsigned long a0, a1, a2, a3, b0, b1, b2, b3; + + callcount[9]++; + /* align dest to cache line */ + while ((((unsigned long) dst) & 0x1f)) { + *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++; + len--; + } + while (len > 4) { + LOAD_FIRST(dst, b); + XOR_AND_LOAD_NEXT(c); + XOR_AND_LOAD_NEXT(d); + XOR_AND_LOAD_NEXT(e); + XOR_AND_LOAD_NEXT(f); + XOR_AND_LOAD_NEXT(g); + XOR_AND_LOAD_NEXT(h); + XOR_AND_LOAD_NEXT(i); + XOR_AND_LOAD_NEXT(j); + XOR_AND_STORE(dst); + } + while (len) { + *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++; + len--; + } } diff --git a/sys/dev/raidframe/rf_nwayxor.h b/sys/dev/raidframe/rf_nwayxor.h index f474dff9908..e328696220c 100644 --- a/sys/dev/raidframe/rf_nwayxor.h +++ b/sys/dev/raidframe/rf_nwayxor.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_nwayxor.h,v 1.1 1999/01/11 14:29:31 niklas Exp $ */ -/* $NetBSD: rf_nwayxor.h,v 1.1 1998/11/13 04:20:31 oster Exp $ */ +/* $OpenBSD: rf_nwayxor.h,v 1.2 1999/02/16 00:03:00 niklas Exp $ */ +/* $NetBSD: rf_nwayxor.h,v 1.3 1999/02/05 00:06:13 oster Exp $ */ /* * rf_nwayxor.h */ @@ -32,27 +32,6 @@ /* * rf_nwayxor.h -- types and prototypes for nwayxor module */ -/* - * : - * Log: rf_nwayxor.h,v - * Revision 1.4 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.3 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.2 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.1 1996/05/18 19:56:47 jimz - * Initial revision - * - */ #ifndef _RF__RF_NWAYXOR_H_ #define _RF__RF_NWAYXOR_H_ @@ -61,15 +40,15 @@ #include "rf_raid.h" #include "rf_reconstruct.h" -int rf_ConfigureNWayXor(RF_ShutdownList_t **listp); -void rf_nWayXor1(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len); -void rf_nWayXor2(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len); -void rf_nWayXor3(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len); -void rf_nWayXor4(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len); -void rf_nWayXor5(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len); -void rf_nWayXor6(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len); -void rf_nWayXor7(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len); -void rf_nWayXor8(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len); -void rf_nWayXor9(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len); +int rf_ConfigureNWayXor(RF_ShutdownList_t ** listp); +void rf_nWayXor1(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); +void rf_nWayXor2(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); +void rf_nWayXor3(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); +void rf_nWayXor4(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); +void rf_nWayXor5(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); +void rf_nWayXor6(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); +void rf_nWayXor7(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); +void rf_nWayXor8(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); +void rf_nWayXor9(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); -#endif /* !_RF__RF_NWAYXOR_H_ */ +#endif /* !_RF__RF_NWAYXOR_H_ */ diff --git a/sys/dev/raidframe/rf_openbsd.h b/sys/dev/raidframe/rf_openbsd.h index 5e34e977c91..6909775613e 100644 --- a/sys/dev/raidframe/rf_openbsd.h +++ b/sys/dev/raidframe/rf_openbsd.h @@ -1,4 +1,4 @@ -/* $OpenBSD: rf_openbsd.h,v 1.1 1999/01/11 14:29:32 niklas Exp $ */ +/* $OpenBSD: rf_openbsd.h,v 1.2 1999/02/16 00:03:01 niklas Exp $ */ /*- * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. @@ -76,19 +76,21 @@ #ifndef _RF__RF_OPENBSD_H_ #define _RF__RF_OPENBSD_H_ +#ifdef _KERNEL #include <sys/fcntl.h> #include <sys/systm.h> #include <sys/namei.h> #include <sys/vnode.h> struct raidcinfo { - struct vnode *ci_vp; /* device's vnode */ - dev_t ci_dev; /* XXX: device's dev_t */ + struct vnode *ci_vp; /* device's vnode */ + dev_t ci_dev; /* XXX: device's dev_t */ #if 0 - size_t ci_size; /* size */ - char *ci_path; /* path to component */ - size_t ci_pathlen; /* length of component path */ + size_t ci_size; /* size */ + char *ci_path; /* path to component */ + size_t ci_pathlen; /* length of component path */ #endif }; -#endif /* _RF__RF_OPENBSD_H_ */ +#endif /* _KERNEL */ +#endif /* _RF__RF_OPENBSD_H_ */ diff --git a/sys/dev/raidframe/rf_openbsdkintf.c b/sys/dev/raidframe/rf_openbsdkintf.c index 55b7cfbcca4..64dd4ed1759 100644 --- a/sys/dev/raidframe/rf_openbsdkintf.c +++ b/sys/dev/raidframe/rf_openbsdkintf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rf_openbsdkintf.c,v 1.1 1999/01/11 14:29:32 niklas Exp $ */ +/* $OpenBSD: rf_openbsdkintf.c,v 1.2 1999/02/16 00:03:01 niklas Exp $ */ /*- * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. @@ -113,183 +113,9 @@ * rf_kintf.c -- the kernel interface routines for RAIDframe * ***********************************************************/ -/* - * : - * Log: rf_kintf.c,v - * Revision 1.57 1996/07/19 16:12:20 jimz - * remove addition of protectedSectors in InitBP- it's already - * done in the diskqueue code - * - * Revision 1.56 1996/07/17 21:00:58 jimz - * clean up timer interface, tracing - * - * Revision 1.55 1996/06/17 03:00:54 jimz - * Change RAIDFRAME_GET_INFO interface to do its own copyout() - * (because size of device config structure now exceeds 8k) - * - * Revision 1.54 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.53 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.52 1996/06/06 17:28:08 jimz - * track sector number of last I/O dequeued - * - * Revision 1.51 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.50 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.49 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.48 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.47 1996/05/30 12:59:18 jimz - * make etimer happier, more portable - * - * Revision 1.46 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.45 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.44 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.43 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.42 1996/05/23 22:17:54 jimz - * fix sector size hardcoding problems - * - * Revision 1.41 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.40 1996/05/23 13:18:07 jimz - * tracing_mutex -> rf_tracing_mutex - * - * Revision 1.39 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.38 1996/05/20 16:15:32 jimz - * switch to rf_{mutex,cond}_{init,destroy} - * - * Revision 1.37 1996/05/10 16:23:47 jimz - * RF_offset -> RF_Offset - * - * Revision 1.36 1996/05/08 21:01:24 jimz - * fixed up enum type names that were conflicting with other - * enums and function names (ie, "panic") - * future naming trends will be towards RF_ and rf_ for - * everything raidframe-related - * - * Revision 1.35 1996/05/03 19:10:48 jimz - * change sanity checking for bogus I/Os to return more appropriate - * values (to make some user-level utilities happer with RAIDframe) - * - * Revision 1.34 1996/05/02 22:17:00 jimz - * When using DKUSAGE, send a bogus IO after configuring to let DKUSAGE know - * that we exist. This will let user-level programs doing group stats on the - * RF device function without error before RF gets its first IO - * - * Changed rf_device_config devs and spares fields to RF_RaidDisk_t - * - * Inc numOutstanding for the disk queue in rf_DispatchKernelIO if - * type is IO_TYPE_NOP. I'm not sure this is right, but it seems to be, - * because the disk IO completion routine wants to dec it, and doesn't - * care if there was no such IO. - * - * Revision 1.33 1996/05/02 15:05:44 jimz - * for now, rf_DoAccessKernel will reject non-sector-sized I/Os - * eventually, it should do something more clever... - * (and do it in DoAccess(), not just DoAccessKernel()) - * - * Revision 1.32 1996/05/01 16:28:39 jimz - * get rid of uses of ccmn_ functions - * - * Revision 1.31 1996/05/01 15:42:17 jimz - * ccmn_* memory management is on the way out. This is an archival checkpoint- - * both the old and new code are in place (all the ccmn_ calls are #if 0). After - * this, the ccmn_ code will no longer appear. - * - * Revision 1.30 1996/04/22 15:53:13 jimz - * MAX_RAIDS -> NRAIDFRAME - * - * Revision 1.29 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.28 1995/12/01 19:11:01 root - * added copyright info - * - * Revision 1.27 1995/11/28 18:56:40 wvcii - * disabled buffer copy in rf_write - * - * Revision 1.26 1995/10/06 16:37:08 jimz - * get struct bufs from ubc, not cam - * copy all write data, and operate on copy - * (temporary hack to get around dags in PQ that want - * to Xor into user write buffers) - * - * Revision 1.25 1995/09/30 22:23:08 jimz - * do not require raid to be active to perform ACCTOTAL ioctl - * - * Revision 1.24 1995/09/30 20:39:08 jimz - * added new ioctls: - * RAIDFRAME_RESET_ACCTOTALS - * RAIDFRAME_GET_ACCTOTALS - * RAIDFRAME_KEEP_ACCTOTALS - * - * Revision 1.23 1995/09/20 21:11:59 jimz - * include dfstrace.h in KERNEL block - * (even though it's a kernel-only file, this makes the depend process - * at user-level happy. Why the user-level Makefile wants to depend - * kintf.c is less clear, but this is a workaround). - * - * Revision 1.22 1995/09/19 23:19:03 jimz - * added DKUSAGE support - * - */ - -#ifdef _KERNEL -#define KERNEL -#endif - -#ifdef KERNEL #include <sys/errno.h> -#include "raid.h" #include <sys/param.h> #include <sys/malloc.h> #include <sys/queue.h> @@ -305,6 +131,7 @@ #include <sys/buf.h> #include <sys/user.h> +#include "raid.h" #include "rf_raid.h" #include "rf_raidframe.h" #include "rf_dag.h" @@ -321,7 +148,7 @@ #include "rf_debugprint.h" #include "rf_threadstuff.h" -int rf_kdebug_level = 0; +int rf_kdebug_level = 0; #define RFK_BOOT_NONE 0 #define RFK_BOOT_GOOD 1 @@ -336,16 +163,16 @@ static int rf_kbooted = RFK_BOOT_NONE; #define db3_printf(a) do if (rf_kdebug_level > 2) printf a; while(0) #define db4_printf(a) do if (rf_kdebug_level > 3) printf a; while(0) #define db5_printf(a) do if (rf_kdebug_level > 4) printf a; while(0) -#else /* RAIDDEBUG */ +#else /* RAIDDEBUG */ #define db0_printf(a) printf a #define db1_printf(a) (void)0 #define db2_printf(a) (void)0 #define db3_printf(a) (void)0 #define db4_printf(a) (void)0 #define db5_printf(a) (void)0 -#endif /* RAIDDEBUG */ +#endif /* RAIDDEBUG */ -static RF_Raid_t **raidPtrs; /* global raid device descriptors */ +static RF_Raid_t **raidPtrs; /* global raid device descriptors */ static int rf_pending_testaccs; @@ -362,7 +189,7 @@ static struct rf_test_acc *rf_async_done_qh, *rf_async_done_qt; /* used to communicate reconstruction requests */ static struct rf_recon_req *recon_queue = NULL; -decl_simple_lock_data(,recon_queue_mutex) +decl_simple_lock_data(, recon_queue_mutex) #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex) #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex) @@ -376,7 +203,7 @@ void rf_InitBP __P((struct buf *, struct vnode *, unsigned, dev_t, /* this is so that we can compile under 2.0 as well as 3.2 */ #ifndef proc_to_task #define proc_to_task(x) ((x)->task) -#endif /* !proc_to_task */ +#endif /* !proc_to_task */ void raidattach __P((int)); int raidsize __P((dev_t)); @@ -397,27 +224,27 @@ int raiddump __P((dev_t, daddr_t, caddr_t, size_t)); * Pilfered from ccd.c */ struct raidbuf { - struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */ - struct buf *rf_obp; /* ptr. to original I/O buf */ - int rf_flags; /* misc. flags */ - RF_DiskQueueData_t *req; /* the request that this was part of.. */ + struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */ + struct buf *rf_obp; /* ptr. to original I/O buf */ + int rf_flags; /* misc. flags */ + RF_DiskQueueData_t *req;/* the request that this was part of.. */ }; -#define RAIDGETBUF() malloc(sizeof (struct raidbuf), M_DEVBUF, M_NOWAIT) -#define RAIDPUTBUF(buf) free(buf, M_DEVBUF) +#define RAIDGETBUF() malloc(sizeof (struct raidbuf), M_RAIDFRAME, M_NOWAIT) +#define RAIDPUTBUF(buf) free(buf, M_RAIDFRAME) /* * XXX Not sure if the following should be replacing the raidPtrs above, * or if it should be used in conjunction with that... */ struct raid_softc { - int sc_unit; /* logical unit number */ - int sc_flags; /* flags */ - int sc_cflags; /* configuration flags */ - size_t sc_size; /* size of the raid device */ - dev_t sc_dev; /* our device..*/ - char sc_xname[20]; /* XXX external name */ - struct disk sc_dkdev; /* generic disk device info */ + int sc_unit; /* logical unit number */ + int sc_flags; /* flags */ + int sc_cflags; /* configuration flags */ + size_t sc_size; /* size of the raid device */ + dev_t sc_dev; /* our device..*/ + char sc_xname[20]; /* XXX external name */ + struct disk sc_dkdev; /* generic disk device info */ }; /* sc_flags */ @@ -428,7 +255,7 @@ struct raid_softc { #define RAIDF_LOCKED 0x80 /* unit is locked */ #define raidunit(x) DISKUNIT(x) -static int numraid=0; +static int numraid = 0; #define RAIDLABELDEV(dev) \ (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) @@ -461,9 +288,7 @@ raidattach(num) return; } - /* - * This is where all the initialization stuff gets done. - */ + /* This is where all the initialization stuff gets done. */ /* Make some space for requested number of units... */ RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **)); @@ -479,25 +304,24 @@ raidattach(num) rf_kbooted = RFK_BOOT_GOOD; /* - put together some datastructures like the CCD device does.. - This lets us lock the device and what-not when it gets opened. - */ + * Put together some datastructures like the CCD device does.. + * This lets us lock the device and what-not when it gets opened. + */ raid_softc = (struct raid_softc *) - malloc(num * sizeof(struct raid_softc), - M_DEVBUF, M_NOWAIT); + malloc(num * sizeof (struct raid_softc), M_RAIDFRAME, M_NOWAIT); if (raid_softc == NULL) { printf("WARNING: no memory for RAIDframe driver\n"); return; } numraid = num; - bzero(raid_softc, num * sizeof(struct raid_softc)); + bzero(raid_softc, num * sizeof (struct raid_softc)); - for(raidID=0;raidID < num;raidID++) { - RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t), - (RF_Raid_t *)); - if (raidPtrs[raidID]==NULL) { - printf("raidPtrs[%d] is NULL\n",raidID); + for (raidID = 0; raidID < num; raidID++) { + RF_Calloc(raidPtrs[raidID], 1, sizeof (RF_Raid_t), + (RF_Raid_t *)); + if (raidPtrs[raidID] == NULL) { + printf("raidPtrs[%d] is NULL\n", raidID); } } } @@ -720,12 +544,12 @@ raidstrategy(bp) * Do bounds checking and adjust transfer. If there's an * error, the bounds check will flag that for us. */ - wlabel = rs->sc_flags & (RAIDF_WLABEL|RAIDF_LABELLING); + wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING); if (DISKPART(bp->b_dev) != RAW_PART) if (bounds_check_with_label(bp, lp, rs->sc_dkdev.dk_cpulabel, wlabel) <= 0) { db1_printf(("Bounds check failed!!:%d %d\n", - (int)bp->b_blkno, (int)wlabel)); + (int)bp->b_blkno, (int)wlabel)); biodone(bp); return; } @@ -772,7 +596,7 @@ raidread(dev, uio, flags) #if 0 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio)); #endif - result=physio(raidstrategy, NULL, dev, B_READ, minphys, uio); + result = physio(raidstrategy, NULL, dev, B_READ, minphys, uio); db1_printf(("raidread done. Result is %d %d\n", result, uio->uio_resid)); return (result); @@ -1041,8 +865,8 @@ raidioctl(dev, cmd, data, flag, p) if (!raid->valid) return (ENODEV); ucfgp = (RF_DeviceConfig_t **)data; - RF_Malloc(cfg,sizeof(RF_DeviceConfig_t), - (RF_DeviceConfig_t *)); + RF_Malloc(cfg, sizeof (RF_DeviceConfig_t), + (RF_DeviceConfig_t *)); if (cfg == NULL) return (ENOMEM); bzero((char *)cfg, sizeof(RF_DeviceConfig_t)); @@ -1060,18 +884,18 @@ raidioctl(dev, cmd, data, flag, p) } cfg->maxqdepth = raid->maxQueueDepth; d = 0; - for(i=0;i<cfg->rows;i++) { - for(j=0;j<cfg->cols;j++) { + for(i = 0; i < cfg->rows; i++) { + for(j = 0; j < cfg->cols; j++) { cfg->devs[d] = raid->Disks[i][j]; d++; } } - for(j=cfg->cols,i=0;i<cfg->nspares;i++,j++) { + for(j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) { cfg->spares[i] = raid->Disks[0][j]; } retcode = copyout((caddr_t)cfg, (caddr_t)*ucfgp, - sizeof(RF_DeviceConfig_t)); - RF_Free(cfg,sizeof(RF_DeviceConfig_t)); + sizeof (RF_DeviceConfig_t)); + RF_Free(cfg, sizeof (RF_DeviceConfig_t)); return (retcode); } @@ -1116,10 +940,10 @@ raidioctl(dev, cmd, data, flag, p) /* fail a disk & optionally start reconstruction */ case RAIDFRAME_FAIL_DISK: - rr = (struct rf_recon_req *) data; + rr = (struct rf_recon_req *)data; - if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow - || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol) + if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow || + rr->col < 0 || rr->col >= raidPtrs[unit]->numCol) return (EINVAL); printf("Failing the disk: row: %d col: %d\n",rr->row,rr->col); @@ -1130,7 +954,7 @@ raidioctl(dev, cmd, data, flag, p) */ RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); bcopy(rr, rrcopy, sizeof(*rr)); - rrcopy->raidPtr = (void *) raidPtrs[unit]; + rrcopy->raidPtr = (void *)raidPtrs[unit]; LOCK_RECON_Q_MUTEX(); rrcopy->next = recon_queue; @@ -1177,7 +1001,7 @@ raidioctl(dev, cmd, data, flag, p) */ RF_LOCK_MUTEX(rf_sparet_wait_mutex); while (!rf_sparet_wait_queue) - mpsleep(&rf_sparet_wait_queue, (PZERO+1)|PCATCH, + mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *)simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE); @@ -1195,7 +1019,7 @@ raidioctl(dev, cmd, data, flag, p) * Wakes up a process waiting on SPARET_WAIT and puts an * error code in it that will cause the dameon to exit. */ - RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); + RF_Malloc(waitreq, sizeof (*waitreq), (RF_SparetWait_t *)); waitreq->fcol = -1; RF_LOCK_MUTEX(rf_sparet_wait_mutex); waitreq->next = rf_sparet_wait_queue; @@ -1211,7 +1035,7 @@ raidioctl(dev, cmd, data, flag, p) */ /* Install the spare table */ - retcode = rf_SetSpareTable(raidPtrs[unit],*(void **) data); + retcode = rf_SetSpareTable(raidPtrs[unit],*(void **)data); /* * Respond to the requestor. the return status of the @@ -1359,7 +1183,7 @@ rf_boot() rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; recon_queue = NULL; rf_async_done_qh = rf_async_done_qt = NULL; - for (i=0; i<numraid; i++) + for (i = 0; i < numraid; i++) raidPtrs[i] = NULL; rc = rf_BootRaidframe(); if (rc == 0) @@ -1389,9 +1213,10 @@ rf_ReconKernelThread() /* grab the next reconstruction request from the queue */ LOCK_RECON_Q_MUTEX(); while (!recon_queue) { - UNLOCK_RECON_Q_MUTEX(); - tsleep(&recon_queue, PRIBIO | PCATCH, "raidframe recon", 0); - LOCK_RECON_Q_MUTEX(); + UNLOCK_RECON_Q_MUTEX(); + tsleep(&recon_queue, PRIBIO | PCATCH, + "raidframe recon", 0); + LOCK_RECON_Q_MUTEX(); } req = recon_queue; recon_queue = recon_queue->next; @@ -1402,10 +1227,10 @@ rf_ReconKernelThread() * will not return until reconstruction completes, fails, or * is aborted. */ - rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col, + rf_FailDisk((RF_Raid_t *)req->raidPtr, req->row, req->col, ((req->flags&RF_FDFLAGS_RECON) ? 1 : 0)); - RF_Free(req, sizeof(*req)); + RF_Free(req, sizeof *req); } } @@ -1434,7 +1259,7 @@ rf_GetSpareTableFromDaemon(req) "raidframe getsparetable", 0); #if 0 mpsleep(&rf_sparet_resp_queue, PZERO, "sparet resp", 0, - (void *) simple_lock_addr(rf_sparet_wait_mutex), + (void *)simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE); #endif } @@ -1444,7 +1269,7 @@ rf_GetSpareTableFromDaemon(req) retcode = req->fcol; /* this is not the same req as we alloc'd */ - RF_Free(req, sizeof(*req)); + RF_Free(req, sizeof *req); return (retcode); } @@ -1457,11 +1282,11 @@ rf_GetSpareTableFromDaemon(req) */ int rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg) - RF_Raid_t *raidPtr; - struct buf *bp; - RF_RaidAccessFlags_t flags; - void (*cbFunc)(struct buf *); - void *cbArg; + RF_Raid_t *raidPtr; + struct buf *bp; + RF_RaidAccessFlags_t flags; + void (*cbFunc)(struct buf *); + void *cbArg; { RF_SectorCount_t num_blocks, pb, sum; RF_RaidAddr_t raid_addr; @@ -1547,8 +1372,8 @@ rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg) int rf_DispatchKernelIO(queue, req) - RF_DiskQueue_t *queue; - RF_DiskQueueData_t *req; + RF_DiskQueue_t *queue; + RF_DiskQueueData_t *req; { int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; struct buf *bp; @@ -1812,7 +1637,6 @@ rf_InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg, bp->b_iodone = cbFunc; bp->b_vp = b_vp; } -#endif /* KERNEL */ /* Extras... */ diff --git a/sys/dev/raidframe/rf_options.c b/sys/dev/raidframe/rf_options.c index c9af8105ba7..dd7a9957c8a 100644 --- a/sys/dev/raidframe/rf_options.c +++ b/sys/dev/raidframe/rf_options.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_options.c,v 1.1 1999/01/11 14:29:33 niklas Exp $ */ -/* $NetBSD: rf_options.c,v 1.1 1998/11/13 04:20:31 oster Exp $ */ +/* $OpenBSD: rf_options.c,v 1.2 1999/02/16 00:03:02 niklas Exp $ */ +/* $NetBSD: rf_options.c,v 1.3 1999/02/05 00:06:13 oster Exp $ */ /* * rf_options.c */ @@ -30,15 +30,6 @@ * rights to redistribute these changes. */ -#ifdef _KERNEL -#define KERNEL -#endif - -#ifdef KERNEL -#if !defined(__NetBSD__) && !defined(__OpenBSD__) -#include <dfstrace.h> -#endif /* !__NetBSD__ && !__OpenBSD__ */ -#endif /* KERNEL */ #include "rf_threadstuff.h" #include "rf_types.h" @@ -48,13 +39,13 @@ #ifdef RF_DBG_OPTION #undef RF_DBG_OPTION -#endif /* RF_DBG_OPTION */ +#endif /* RF_DBG_OPTION */ #ifdef __STDC__ #define RF_DBG_OPTION(_option_,_defval_) long rf_##_option_ = _defval_; -#else /* __STDC__ */ +#else /* __STDC__ */ #define RF_DBG_OPTION(_option_,_defval_) long rf_/**/_option_ = _defval_; -#endif /* __STDC__ */ +#endif /* __STDC__ */ #include "rf_optnames.h" @@ -62,24 +53,24 @@ #ifdef __STDC__ #define RF_DBG_OPTION(_option_,_defval_) { RF_STRING(_option_), &rf_##_option_ }, -#else /* __STDC__ */ +#else /* __STDC__ */ #define RF_DBG_OPTION(_option_,_defval_) { RF_STRING(_option_), &rf_/**/_option_ }, -#endif /* __STDC__ */ +#endif /* __STDC__ */ RF_DebugName_t rf_debugNames[] = { #include "rf_optnames.h" {NULL, NULL} }; - #undef RF_DBG_OPTION #ifdef __STDC__ #define RF_DBG_OPTION(_option_,_defval_) rf_##_option_ = _defval_ ; -#else /* __STDC__ */ +#else /* __STDC__ */ #define RF_DBG_OPTION(_option_,_defval_) rf_/**/_option_ = _defval_ ; -#endif /* __STDC__ */ +#endif /* __STDC__ */ -void rf_ResetDebugOptions() +void +rf_ResetDebugOptions() { #include "rf_optnames.h" } diff --git a/sys/dev/raidframe/rf_options.h b/sys/dev/raidframe/rf_options.h index 2b5499cb672..fa603b61419 100644 --- a/sys/dev/raidframe/rf_options.h +++ b/sys/dev/raidframe/rf_options.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_options.h,v 1.1 1999/01/11 14:29:33 niklas Exp $ */ -/* $NetBSD: rf_options.h,v 1.1 1998/11/13 04:20:31 oster Exp $ */ +/* $OpenBSD: rf_options.h,v 1.2 1999/02/16 00:03:02 niklas Exp $ */ +/* $NetBSD: rf_options.h,v 1.3 1999/02/05 00:06:13 oster Exp $ */ /* * rf_options.h */ @@ -33,36 +33,26 @@ #ifndef _RF__RF_OPTIONS_H_ #define _RF__RF_OPTIONS_H_ -#ifdef _KERNEL -#define KERNEL -#endif - -#ifdef KERNEL -#if !defined(__NetBSD__) && !defined(__OpenBSD__) -#include <dfstrace.h> -#endif /* !__NetBSD__ && !__OpenBSD__ */ -#endif /* KERNEL */ - #define RF_DEFAULT_LOCK_TABLE_SIZE 256 typedef struct RF_DebugNames_s { - char *name; - long *ptr; -} RF_DebugName_t; + char *name; + long *ptr; +} RF_DebugName_t; extern RF_DebugName_t rf_debugNames[]; #ifdef RF_DBG_OPTION #undef RF_DBG_OPTION -#endif /* RF_DBG_OPTION */ +#endif /* RF_DBG_OPTION */ #ifdef __STDC__ #define RF_DBG_OPTION(_option_,_defval_) extern long rf_##_option_; -#else /* __STDC__ */ +#else /* __STDC__ */ #define RF_DBG_OPTION(_option_,_defval_) extern long rf_/**/_option_; -#endif /* __STDC__ */ +#endif /* __STDC__ */ #include "rf_optnames.h" -void rf_ResetDebugOptions(void); +void rf_ResetDebugOptions(void); -#endif /* !_RF__RF_OPTIONS_H_ */ +#endif /* !_RF__RF_OPTIONS_H_ */ diff --git a/sys/dev/raidframe/rf_optnames.h b/sys/dev/raidframe/rf_optnames.h index 064b2da76f2..dc7c67d558c 100644 --- a/sys/dev/raidframe/rf_optnames.h +++ b/sys/dev/raidframe/rf_optnames.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_optnames.h,v 1.1 1999/01/11 14:29:33 niklas Exp $ */ -/* $NetBSD: rf_optnames.h,v 1.1 1998/11/13 04:20:31 oster Exp $ */ +/* $OpenBSD: rf_optnames.h,v 1.2 1999/02/16 00:03:03 niklas Exp $ */ +/* $NetBSD: rf_optnames.h,v 1.4 1999/02/05 00:06:13 oster Exp $ */ /* * rf_optnames.h */ @@ -34,111 +34,98 @@ * Don't protect against multiple inclusion here- we actually want this. */ -#ifdef _KERNEL -#define KERNEL -#endif - -RF_DBG_OPTION(accSizeKB,0) /* if nonzero, the fixed access size to run */ -RF_DBG_OPTION(accessDebug,0) -RF_DBG_OPTION(accessTraceBufSize,0) -RF_DBG_OPTION(alignAccesses,0) /* whether accs should be aligned to their size */ -RF_DBG_OPTION(camlayerIOs,0) -RF_DBG_OPTION(camlayerDebug,0) /* debug CAM activity */ -RF_DBG_OPTION(cscanDebug,0) /* debug CSCAN sorting */ -RF_DBG_OPTION(dagDebug,0) -RF_DBG_OPTION(debugPrintUseBuffer,0) -RF_DBG_OPTION(degDagDebug,0) -RF_DBG_OPTION(disableAsyncAccs,0) -RF_DBG_OPTION(diskDebug,0) -RF_DBG_OPTION(doDebug,0) -RF_DBG_OPTION(dtDebug,0) -RF_DBG_OPTION(enableAtomicRMW,0) /* this debug var enables locking of the disk - * arm during small-write operations. Setting - * this variable to anything other than 0 will - * result in deadlock. (wvcii) - */ -RF_DBG_OPTION(engineDebug,0) -RF_DBG_OPTION(fifoDebug,0) /* debug fifo queueing */ -RF_DBG_OPTION(floatingRbufDebug,0) -RF_DBG_OPTION(forceHeadSepLimit,-1) -RF_DBG_OPTION(forceNumFloatingReconBufs,-1) /* wire down number of extra recon buffers to use */ -RF_DBG_OPTION(keepAccTotals,0) /* turn on keep_acc_totals */ -RF_DBG_OPTION(lockTableSize,RF_DEFAULT_LOCK_TABLE_SIZE) -RF_DBG_OPTION(mapDebug,0) -RF_DBG_OPTION(maxNumTraces,-1) -RF_DBG_OPTION(maxRandomSizeKB,128) /* if rf_accSizeKB==0, acc sizes are uniform in [ (1/2)..maxRandomSizeKB ] */ -RF_DBG_OPTION(maxTraceRunTimeSec,0) -RF_DBG_OPTION(memAmtDebug,0) /* trace amount of memory allocated */ -RF_DBG_OPTION(memChunkDebug,0) -RF_DBG_OPTION(memDebug,0) -RF_DBG_OPTION(memDebugAddress,0) -RF_DBG_OPTION(numBufsToAccumulate,1) /* number of buffers to accumulate before doing XOR */ -RF_DBG_OPTION(prReconSched,0) -RF_DBG_OPTION(printDAGsDebug,0) -RF_DBG_OPTION(printStatesDebug,0) -RF_DBG_OPTION(protectedSectors,64L) /* # of sectors at start of disk to - exclude from RAID address space */ -RF_DBG_OPTION(pssDebug,0) -RF_DBG_OPTION(queueDebug,0) -RF_DBG_OPTION(quiesceDebug,0) -RF_DBG_OPTION(raidSectorOffset,0) /* added to all incoming sectors to - debug alignment problems */ -RF_DBG_OPTION(reconDebug,0) -RF_DBG_OPTION(reconbufferDebug,0) -RF_DBG_OPTION(rewriteParityStripes,0) /* debug flag that causes parity rewrite at startup */ -RF_DBG_OPTION(scanDebug,0) /* debug SCAN sorting */ -RF_DBG_OPTION(showXorCallCounts,0) /* show n-way Xor call counts */ -RF_DBG_OPTION(shutdownDebug,0) /* show shutdown calls */ -RF_DBG_OPTION(sizePercentage,100) -RF_DBG_OPTION(sstfDebug,0) /* turn on debugging info for sstf queueing */ -RF_DBG_OPTION(stripeLockDebug,0) -RF_DBG_OPTION(suppressLocksAndLargeWrites,0) -RF_DBG_OPTION(suppressTraceDelays,0) -RF_DBG_OPTION(testDebug,0) -RF_DBG_OPTION(useMemChunks,1) -RF_DBG_OPTION(validateDAGDebug,0) -RF_DBG_OPTION(validateVisitedDebug,1) /* XXX turn to zero by default? */ -RF_DBG_OPTION(verifyParityDebug,0) -RF_DBG_OPTION(warnLongIOs,0) - -#ifdef KERNEL -RF_DBG_OPTION(debugKernelAccess,0) /* DoAccessKernel debugging */ -#endif /* KERNEL */ - -#ifndef KERNEL -RF_DBG_OPTION(disableParityVerify,0) /* supress verification of parity */ -RF_DBG_OPTION(interactiveScript,0) /* set as a debug option for now */ -RF_DBG_OPTION(looptestShowWrites,0) /* user-level loop test write debugging */ -RF_DBG_OPTION(traceDebug,0) -#endif /* !KERNEL */ - -#ifdef SIMULATE -RF_DBG_OPTION(addrSizePercentage,100) -RF_DBG_OPTION(diskTrace,0) /* ised to turn the timing traces on and of */ -RF_DBG_OPTION(eventDebug,0) -RF_DBG_OPTION(mWactive,1500) -RF_DBG_OPTION(mWidle,625) -RF_DBG_OPTION(mWsleep,15) -RF_DBG_OPTION(mWspinup,3500) -#endif /* SIMULATE */ +RF_DBG_OPTION(accSizeKB, 0) /* if nonzero, the fixed access size to run */ +RF_DBG_OPTION(accessDebug, 0) +RF_DBG_OPTION(accessTraceBufSize, 0) +RF_DBG_OPTION(alignAccesses, 0) /* whether accs should be aligned to + * their size */ +RF_DBG_OPTION(camlayerIOs, 0) +RF_DBG_OPTION(camlayerDebug, 0) /* debug CAM activity */ +RF_DBG_OPTION(cscanDebug, 0) /* debug CSCAN sorting */ +RF_DBG_OPTION(dagDebug, 0) +RF_DBG_OPTION(debugPrintUseBuffer, 0) +RF_DBG_OPTION(degDagDebug, 0) +RF_DBG_OPTION(disableAsyncAccs, 0) +RF_DBG_OPTION(diskDebug, 0) +RF_DBG_OPTION(doDebug, 0) +RF_DBG_OPTION(dtDebug, 0) +RF_DBG_OPTION(enableAtomicRMW, 0) /* this debug var enables locking of + * the disk arm during small-write + * operations. Setting this variable + * to anything other than 0 will + * result in deadlock. (wvcii) */ +RF_DBG_OPTION(engineDebug, 0) +RF_DBG_OPTION(fifoDebug, 0) /* debug fifo queueing */ +RF_DBG_OPTION(floatingRbufDebug, 0) +RF_DBG_OPTION(forceHeadSepLimit, -1) +RF_DBG_OPTION(forceNumFloatingReconBufs, -1) /* wire down number of + * extra recon buffers + * to use */ +RF_DBG_OPTION(keepAccTotals, 0) /* turn on keep_acc_totals */ +RF_DBG_OPTION(lockTableSize, RF_DEFAULT_LOCK_TABLE_SIZE) +RF_DBG_OPTION(mapDebug, 0) +RF_DBG_OPTION(maxNumTraces, -1) +RF_DBG_OPTION(maxRandomSizeKB, 128) /* if rf_accSizeKB==0, acc sizes are + * uniform in [ (1/2)..maxRandomSizeKB + * ] */ +RF_DBG_OPTION(maxTraceRunTimeSec, 0) +RF_DBG_OPTION(memAmtDebug, 0) /* trace amount of memory allocated */ +RF_DBG_OPTION(memChunkDebug, 0) +RF_DBG_OPTION(memDebug, 0) +RF_DBG_OPTION(memDebugAddress, 0) +RF_DBG_OPTION(numBufsToAccumulate, 1) /* number of buffers to + * accumulate before doing XOR */ +RF_DBG_OPTION(prReconSched, 0) +RF_DBG_OPTION(printDAGsDebug, 0) +RF_DBG_OPTION(printStatesDebug, 0) +RF_DBG_OPTION(protectedSectors, 64L) /* # of sectors at start of + * disk to exclude from RAID + * address space */ +RF_DBG_OPTION(pssDebug, 0) +RF_DBG_OPTION(queueDebug, 0) +RF_DBG_OPTION(quiesceDebug, 0) +RF_DBG_OPTION(raidSectorOffset, 0) /* added to all incoming sectors to + * debug alignment problems */ +RF_DBG_OPTION(reconDebug, 0) +RF_DBG_OPTION(reconbufferDebug, 0) +RF_DBG_OPTION(rewriteParityStripes, 0) /* debug flag that causes + * parity rewrite at startup */ +RF_DBG_OPTION(scanDebug, 0) /* debug SCAN sorting */ +RF_DBG_OPTION(showXorCallCounts, 0) /* show n-way Xor call counts */ +RF_DBG_OPTION(shutdownDebug, 0) /* show shutdown calls */ +RF_DBG_OPTION(sizePercentage, 100) +RF_DBG_OPTION(sstfDebug, 0) /* turn on debugging info for sstf queueing */ +RF_DBG_OPTION(stripeLockDebug, 0) +RF_DBG_OPTION(suppressLocksAndLargeWrites, 0) +RF_DBG_OPTION(suppressTraceDelays, 0) +RF_DBG_OPTION(testDebug, 0) +RF_DBG_OPTION(useMemChunks, 1) +RF_DBG_OPTION(validateDAGDebug, 0) +RF_DBG_OPTION(validateVisitedDebug, 1) /* XXX turn to zero by + * default? */ +RF_DBG_OPTION(verifyParityDebug, 0) +RF_DBG_OPTION(warnLongIOs, 0) +RF_DBG_OPTION(debugKernelAccess, 0) /* DoAccessKernel debugging */ +#ifndef _KERNEL +RF_DBG_OPTION(disableParityVerify, 0) /* supress verification of + * parity */ +RF_DBG_OPTION(interactiveScript, 0) /* set as a debug option for now */ +RF_DBG_OPTION(looptestShowWrites, 0) /* user-level loop test write + * debugging */ +RF_DBG_OPTION(traceDebug, 0) +#endif /* !KERNEL */ #if RF_INCLUDE_PARITYLOGGING > 0 -RF_DBG_OPTION(forceParityLogReint,0) -RF_DBG_OPTION(numParityRegions,0) /* number of regions in the array */ -RF_DBG_OPTION(numReintegrationThreads,1) -RF_DBG_OPTION(parityLogDebug,0) /* if nonzero, enables debugging of parity logging */ -RF_DBG_OPTION(totalInCoreLogCapacity,1024*1024) /* target bytes available for in-core logs */ -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ +RF_DBG_OPTION(forceParityLogReint, 0) +RF_DBG_OPTION(numParityRegions, 0) /* number of regions in the array */ +RF_DBG_OPTION(numReintegrationThreads, 1) +RF_DBG_OPTION(parityLogDebug, 0) /* if nonzero, enables debugging of + * parity logging */ +RF_DBG_OPTION(totalInCoreLogCapacity, 1024 * 1024) /* target bytes + * available for in-core + * logs */ +#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ #if DFSTRACE > 0 -RF_DBG_OPTION(DFSTraceAccesses,0) -#endif /* DFSTRACE > 0 */ - -#if RF_DEMO > 0 -RF_DBG_OPTION(demoMeterHpos,0) /* horizontal position of meters for demo mode */ -RF_DBG_OPTION(demoMeterTag,0) -RF_DBG_OPTION(demoMeterVpos,0) /* vertical position of meters for demo mode */ -RF_DBG_OPTION(demoMode,0) -RF_DBG_OPTION(demoSMM,0) -RF_DBG_OPTION(demoSuppressReconInitVerify,0) /* supress initialization & verify for recon */ -#endif /* RF_DEMO > 0 */ +RF_DBG_OPTION(DFSTraceAccesses, 0) +#endif /* DFSTRACE > 0 */ diff --git a/sys/dev/raidframe/rf_owner.h b/sys/dev/raidframe/rf_owner.h deleted file mode 100644 index 5b741bf3a5d..00000000000 --- a/sys/dev/raidframe/rf_owner.h +++ /dev/null @@ -1,75 +0,0 @@ -/* $OpenBSD: rf_owner.h,v 1.1 1999/01/11 14:29:33 niklas Exp $ */ -/* $NetBSD: rf_owner.h,v 1.1 1998/11/13 04:20:31 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* : - * Log: rf_owner.h,v - * Revision 1.8 1996/08/20 14:36:51 jimz - * add bufLen to RF_EventCreate_t to be able to include buffer length - * when freeing buffer - * - * Revision 1.7 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.6 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.5 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.4 1995/12/01 19:44:30 root - * added copyright info - * - */ - -#ifndef _RF__RF_OWNER_H_ -#define _RF__RF_OWNER_H_ - -#include "rf_types.h" - -struct RF_OwnerInfo_s { - RF_RaidAccessDesc_t *desc; - int owner; - double last_start; - int done; - int notFirst; -}; - -struct RF_EventCreate_s { - RF_Raid_t *raidPtr; - RF_Script_t *script; - RF_OwnerInfo_t *ownerInfo; - char *bufPtr; - int bufLen; -}; - -#endif /* !_RF__RF_OWNER_H_ */ diff --git a/sys/dev/raidframe/rf_paritylog.c b/sys/dev/raidframe/rf_paritylog.c index 84bf2107d99..ea5fa86d94a 100644 --- a/sys/dev/raidframe/rf_paritylog.c +++ b/sys/dev/raidframe/rf_paritylog.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_paritylog.c,v 1.1 1999/01/11 14:29:34 niklas Exp $ */ -/* $NetBSD: rf_paritylog.c,v 1.1 1998/11/13 04:20:31 oster Exp $ */ +/* $OpenBSD: rf_paritylog.c,v 1.2 1999/02/16 00:03:04 niklas Exp $ */ +/* $NetBSD: rf_paritylog.c,v 1.3 1999/02/05 00:06:13 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,107 +29,6 @@ /* Code for manipulating in-core parity logs * - * : - * Log: rf_paritylog.c,v - * Revision 1.27 1996/07/28 20:31:39 jimz - * i386netbsd port - * true/false fixup - * - * Revision 1.26 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.25 1996/07/17 21:00:58 jimz - * clean up timer interface, tracing - * - * Revision 1.24 1996/06/11 10:18:59 jimz - * AllocParityLogCommonData() was freeing the common pointer immediately - * after allocating this. It appeared that this free really belonged - * inside one of the failure cases (for backing out), so I moved it - * in there. - * - * Revision 1.23 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.22 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.21 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.20 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.19 1996/05/30 12:59:18 jimz - * make etimer happier, more portable - * - * Revision 1.18 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.17 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.16 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.15 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.14 1996/05/20 16:16:59 jimz - * switch to rf_{mutex,cond}_{init,destroy} - * - * Revision 1.13 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.12 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.11 1995/12/06 20:54:44 wvcii - * added prototyping - * - * Revision 1.10 1995/11/30 16:05:37 wvcii - * added copyright info - * - * Revision 1.9 1995/10/08 20:41:28 wvcii - * fixed bug in allocation of CommonLogData (was allocating incorrect size) - * - * Revision 1.8 1995/09/07 15:52:12 jimz - * noop compile when INCLUDE_PARITYLOGGING not defined - * - * Revision 1.7 1995/09/06 19:17:36 wvcii - * moved code for reintegration to rf_paritylogDiskMgr.c - * - * Revision 1.6 95/07/07 00:16:06 wvcii - * this version free from deadlock, fails parity verification - * - * Revision 1.5 1995/06/09 13:14:24 wvcii - * code is now nonblocking - * - * Revision 1.4 95/06/01 17:01:59 wvcii - * code debug - * - * Revision 1.3 95/05/31 13:08:23 wvcii - * code debug - * - * Revision 1.2 95/05/21 15:42:15 wvcii - * code debug - * - * Revision 1.1 95/05/18 10:43:54 wvcii - * Initial revision - * */ #include "rf_archs.h" @@ -158,865 +57,815 @@ #include "rf_paritylogDiskMgr.h" #include "rf_sys.h" -static RF_CommonLogData_t *AllocParityLogCommonData(RF_Raid_t *raidPtr) +static RF_CommonLogData_t * +AllocParityLogCommonData(RF_Raid_t * raidPtr) { - RF_CommonLogData_t *common = NULL; - int rc; - - /* Return a struct for holding common parity log information from the free - list (rf_parityLogDiskQueue.freeCommonList). If the free list is empty, call - RF_Malloc to create a new structure. - NON-BLOCKING */ - - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - if (raidPtr->parityLogDiskQueue.freeCommonList) - { - common = raidPtr->parityLogDiskQueue.freeCommonList; - raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next; - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - } - else - { - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *)); - rc = rf_mutex_init(&common->mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - RF_Free(common, sizeof(RF_CommonLogData_t)); - common = NULL; - } - } - common->next = NULL; - return(common); + RF_CommonLogData_t *common = NULL; + int rc; + + /* Return a struct for holding common parity log information from the + * free list (rf_parityLogDiskQueue.freeCommonList). If the free list + * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */ + + RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + if (raidPtr->parityLogDiskQueue.freeCommonList) { + common = raidPtr->parityLogDiskQueue.freeCommonList; + raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next; + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + } else { + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *)); + rc = rf_mutex_init(&common->mutex); + if (rc) { + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + RF_Free(common, sizeof(RF_CommonLogData_t)); + common = NULL; + } + } + common->next = NULL; + return (common); } -static void FreeParityLogCommonData(RF_CommonLogData_t *common) +static void +FreeParityLogCommonData(RF_CommonLogData_t * common) { - RF_Raid_t *raidPtr; + RF_Raid_t *raidPtr; - /* Insert a single struct for holding parity log information - (data) into the free list (rf_parityLogDiskQueue.freeCommonList). - NON-BLOCKING */ + /* Insert a single struct for holding parity log information (data) + * into the free list (rf_parityLogDiskQueue.freeCommonList). + * NON-BLOCKING */ - raidPtr = common->raidPtr; - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - common->next = raidPtr->parityLogDiskQueue.freeCommonList; - raidPtr->parityLogDiskQueue.freeCommonList = common; - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + raidPtr = common->raidPtr; + RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + common->next = raidPtr->parityLogDiskQueue.freeCommonList; + raidPtr->parityLogDiskQueue.freeCommonList = common; + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); } -static RF_ParityLogData_t *AllocParityLogData(RF_Raid_t *raidPtr) +static RF_ParityLogData_t * +AllocParityLogData(RF_Raid_t * raidPtr) { - RF_ParityLogData_t *data = NULL; - - /* Return a struct for holding parity log information from the free - list (rf_parityLogDiskQueue.freeList). If the free list is empty, call - RF_Malloc to create a new structure. - NON-BLOCKING */ - - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - if (raidPtr->parityLogDiskQueue.freeDataList) - { - data = raidPtr->parityLogDiskQueue.freeDataList; - raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next; - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - } - else - { - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *)); - } - data->next = NULL; - data->prev = NULL; - return(data); + RF_ParityLogData_t *data = NULL; + + /* Return a struct for holding parity log information from the free + * list (rf_parityLogDiskQueue.freeList). If the free list is empty, + * call RF_Malloc to create a new structure. NON-BLOCKING */ + + RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + if (raidPtr->parityLogDiskQueue.freeDataList) { + data = raidPtr->parityLogDiskQueue.freeDataList; + raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next; + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + } else { + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *)); + } + data->next = NULL; + data->prev = NULL; + return (data); } -static void FreeParityLogData(RF_ParityLogData_t *data) +static void +FreeParityLogData(RF_ParityLogData_t * data) { - RF_ParityLogData_t *nextItem; - RF_Raid_t *raidPtr; - - /* Insert a linked list of structs for holding parity log - information (data) into the free list (parityLogDiskQueue.freeList). - NON-BLOCKING */ - - raidPtr = data->common->raidPtr; - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - while (data) - { - nextItem = data->next; - data->next = raidPtr->parityLogDiskQueue.freeDataList; - raidPtr->parityLogDiskQueue.freeDataList = data; - data = nextItem; - } - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + RF_ParityLogData_t *nextItem; + RF_Raid_t *raidPtr; + + /* Insert a linked list of structs for holding parity log information + * (data) into the free list (parityLogDiskQueue.freeList). + * NON-BLOCKING */ + + raidPtr = data->common->raidPtr; + RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + while (data) { + nextItem = data->next; + data->next = raidPtr->parityLogDiskQueue.freeDataList; + raidPtr->parityLogDiskQueue.freeDataList = data; + data = nextItem; + } + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); } -static void EnqueueParityLogData( - RF_ParityLogData_t *data, - RF_ParityLogData_t **head, - RF_ParityLogData_t **tail) +static void +EnqueueParityLogData( + RF_ParityLogData_t * data, + RF_ParityLogData_t ** head, + RF_ParityLogData_t ** tail) { - RF_Raid_t *raidPtr; - - /* Insert an in-core parity log (*data) into the head of - a disk queue (*head, *tail). - NON-BLOCKING */ - - raidPtr = data->common->raidPtr; - if (rf_parityLogDebug) - printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n",data->regionID,(int)data->diskAddress.raidAddress, (int)data->diskAddress.numSector); - RF_ASSERT(data->prev == NULL); - RF_ASSERT(data->next == NULL); - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - if (*head) - { - /* insert into head of queue */ - RF_ASSERT((*head)->prev == NULL); - RF_ASSERT((*tail)->next == NULL); - data->next = *head; - (*head)->prev = data; - *head = data; - } - else - { - /* insert into empty list */ - RF_ASSERT(*head == NULL); - RF_ASSERT(*tail == NULL); - *head = data; - *tail = data; - } - RF_ASSERT((*head)->prev == NULL); - RF_ASSERT((*tail)->next == NULL); - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + RF_Raid_t *raidPtr; + + /* Insert an in-core parity log (*data) into the head of a disk queue + * (*head, *tail). NON-BLOCKING */ + + raidPtr = data->common->raidPtr; + if (rf_parityLogDebug) + printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); + RF_ASSERT(data->prev == NULL); + RF_ASSERT(data->next == NULL); + RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + if (*head) { + /* insert into head of queue */ + RF_ASSERT((*head)->prev == NULL); + RF_ASSERT((*tail)->next == NULL); + data->next = *head; + (*head)->prev = data; + *head = data; + } else { + /* insert into empty list */ + RF_ASSERT(*head == NULL); + RF_ASSERT(*tail == NULL); + *head = data; + *tail = data; + } + RF_ASSERT((*head)->prev == NULL); + RF_ASSERT((*tail)->next == NULL); + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); } -static RF_ParityLogData_t *DequeueParityLogData( - RF_Raid_t *raidPtr, - RF_ParityLogData_t **head, - RF_ParityLogData_t **tail, - int ignoreLocks) +static RF_ParityLogData_t * +DequeueParityLogData( + RF_Raid_t * raidPtr, + RF_ParityLogData_t ** head, + RF_ParityLogData_t ** tail, + int ignoreLocks) { - RF_ParityLogData_t *data; - - /* Remove and return an in-core parity log from the tail of - a disk queue (*head, *tail). - NON-BLOCKING */ - - /* remove from tail, preserving FIFO order */ - if (!ignoreLocks) - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - data = *tail; - if (data) - { - if (*head == *tail) - { - /* removing last item from queue */ - *head = NULL; - *tail = NULL; + RF_ParityLogData_t *data; + + /* Remove and return an in-core parity log from the tail of a disk + * queue (*head, *tail). NON-BLOCKING */ + + /* remove from tail, preserving FIFO order */ + if (!ignoreLocks) + RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + data = *tail; + if (data) { + if (*head == *tail) { + /* removing last item from queue */ + *head = NULL; + *tail = NULL; + } else { + *tail = (*tail)->prev; + (*tail)->next = NULL; + RF_ASSERT((*head)->prev == NULL); + RF_ASSERT((*tail)->next == NULL); + } + data->next = NULL; + data->prev = NULL; + if (rf_parityLogDebug) + printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); } - else - { - *tail = (*tail)->prev; - (*tail)->next = NULL; - RF_ASSERT((*head)->prev == NULL); - RF_ASSERT((*tail)->next == NULL); + if (*head) { + RF_ASSERT((*head)->prev == NULL); + RF_ASSERT((*tail)->next == NULL); } - data->next = NULL; - data->prev = NULL; - if (rf_parityLogDebug) - printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n",data->regionID,(int)data->diskAddress.raidAddress, (int)data->diskAddress.numSector); - } - if (*head) - { - RF_ASSERT((*head)->prev == NULL); - RF_ASSERT((*tail)->next == NULL); - } - if (!ignoreLocks) - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - return(data); + if (!ignoreLocks) + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + return (data); } -static void RequeueParityLogData( - RF_ParityLogData_t *data, - RF_ParityLogData_t **head, - RF_ParityLogData_t **tail) +static void +RequeueParityLogData( + RF_ParityLogData_t * data, + RF_ParityLogData_t ** head, + RF_ParityLogData_t ** tail) { - RF_Raid_t *raidPtr; - - /* Insert an in-core parity log (*data) into the tail of - a disk queue (*head, *tail). - NON-BLOCKING */ - - raidPtr = data->common->raidPtr; - RF_ASSERT(data); - if (rf_parityLogDebug) - printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n",data->regionID,(int)data->diskAddress.raidAddress, (int) data->diskAddress.numSector); - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - if (*tail) - { - /* append to tail of list */ - data->prev = *tail; - data->next = NULL; - (*tail)->next = data; - *tail = data; - } - else - { - /* inserting into an empty list */ - *head = data; - *tail = data; - (*head)->prev = NULL; - (*tail)->next = NULL; - } - RF_ASSERT((*head)->prev == NULL); - RF_ASSERT((*tail)->next == NULL); - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + RF_Raid_t *raidPtr; + + /* Insert an in-core parity log (*data) into the tail of a disk queue + * (*head, *tail). NON-BLOCKING */ + + raidPtr = data->common->raidPtr; + RF_ASSERT(data); + if (rf_parityLogDebug) + printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); + RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + if (*tail) { + /* append to tail of list */ + data->prev = *tail; + data->next = NULL; + (*tail)->next = data; + *tail = data; + } else { + /* inserting into an empty list */ + *head = data; + *tail = data; + (*head)->prev = NULL; + (*tail)->next = NULL; + } + RF_ASSERT((*head)->prev == NULL); + RF_ASSERT((*tail)->next == NULL); + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); } -RF_ParityLogData_t *rf_CreateParityLogData( - RF_ParityRecordType_t operation, - RF_PhysDiskAddr_t *pda, - caddr_t bufPtr, - RF_Raid_t *raidPtr, - int (*wakeFunc)(RF_DagNode_t *node, int status), - void *wakeArg, - RF_AccTraceEntry_t *tracerec, - RF_Etimer_t startTime) +RF_ParityLogData_t * +rf_CreateParityLogData( + RF_ParityRecordType_t operation, + RF_PhysDiskAddr_t * pda, + caddr_t bufPtr, + RF_Raid_t * raidPtr, + int (*wakeFunc) (RF_DagNode_t * node, int status), + void *wakeArg, + RF_AccTraceEntry_t * tracerec, + RF_Etimer_t startTime) { - RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL; - RF_CommonLogData_t *common; - RF_PhysDiskAddr_t *diskAddress; - int boundary, offset = 0; - - /* Return an initialized struct of info to be logged. - Build one item per physical disk address, one item per region. - - NON-BLOCKING */ - - diskAddress = pda; - common = AllocParityLogCommonData(raidPtr); - RF_ASSERT(common); - - common->operation = operation; - common->bufPtr = bufPtr; - common->raidPtr = raidPtr; - common->wakeFunc = wakeFunc; - common->wakeArg = wakeArg; - common->tracerec = tracerec; - common->startTime = startTime; - common->cnt = 0; - - if (rf_parityLogDebug) - printf("[entering CreateParityLogData]\n"); - while (diskAddress) - { - common->cnt++; - data = AllocParityLogData(raidPtr); - RF_ASSERT(data); - data->common = common; - data->next = NULL; - data->prev = NULL; - data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector); - if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) - { - /* disk address does not cross a region boundary */ - data->diskAddress = *diskAddress; - data->bufOffset = offset; - offset = offset + diskAddress->numSector; - EnqueueParityLogData(data, &resultHead, &resultTail); - /* adjust disk address */ - diskAddress = diskAddress->next; - } - else - { - /* disk address crosses a region boundary */ - /* find address where region is crossed */ - boundary = 0; - while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary)) - boundary++; - - /* enter data before the boundary */ - data->diskAddress = *diskAddress; - data->diskAddress.numSector = boundary; - data->bufOffset = offset; - offset += boundary; - EnqueueParityLogData(data, &resultHead, &resultTail); - /* adjust disk address */ - diskAddress->startSector += boundary; - diskAddress->numSector -= boundary; + RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL; + RF_CommonLogData_t *common; + RF_PhysDiskAddr_t *diskAddress; + int boundary, offset = 0; + + /* Return an initialized struct of info to be logged. Build one item + * per physical disk address, one item per region. + * + * NON-BLOCKING */ + + diskAddress = pda; + common = AllocParityLogCommonData(raidPtr); + RF_ASSERT(common); + + common->operation = operation; + common->bufPtr = bufPtr; + common->raidPtr = raidPtr; + common->wakeFunc = wakeFunc; + common->wakeArg = wakeArg; + common->tracerec = tracerec; + common->startTime = startTime; + common->cnt = 0; + + if (rf_parityLogDebug) + printf("[entering CreateParityLogData]\n"); + while (diskAddress) { + common->cnt++; + data = AllocParityLogData(raidPtr); + RF_ASSERT(data); + data->common = common; + data->next = NULL; + data->prev = NULL; + data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector); + if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) { + /* disk address does not cross a region boundary */ + data->diskAddress = *diskAddress; + data->bufOffset = offset; + offset = offset + diskAddress->numSector; + EnqueueParityLogData(data, &resultHead, &resultTail); + /* adjust disk address */ + diskAddress = diskAddress->next; + } else { + /* disk address crosses a region boundary */ + /* find address where region is crossed */ + boundary = 0; + while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary)) + boundary++; + + /* enter data before the boundary */ + data->diskAddress = *diskAddress; + data->diskAddress.numSector = boundary; + data->bufOffset = offset; + offset += boundary; + EnqueueParityLogData(data, &resultHead, &resultTail); + /* adjust disk address */ + diskAddress->startSector += boundary; + diskAddress->numSector -= boundary; + } } - } - if (rf_parityLogDebug) - printf("[leaving CreateParityLogData]\n"); - return(resultHead); + if (rf_parityLogDebug) + printf("[leaving CreateParityLogData]\n"); + return (resultHead); } -RF_ParityLogData_t *rf_SearchAndDequeueParityLogData( - RF_Raid_t *raidPtr, - int regionID, - RF_ParityLogData_t **head, - RF_ParityLogData_t **tail, - int ignoreLocks) +RF_ParityLogData_t * +rf_SearchAndDequeueParityLogData( + RF_Raid_t * raidPtr, + int regionID, + RF_ParityLogData_t ** head, + RF_ParityLogData_t ** tail, + int ignoreLocks) { - RF_ParityLogData_t *w; - - /* Remove and return an in-core parity log from a specified region (regionID). - If a matching log is not found, return NULL. - - NON-BLOCKING. - */ - - /* walk backward through a list, looking for an entry with a matching region ID */ - if (!ignoreLocks) - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - w = (*tail); - while (w) - { - if (w->regionID == regionID) - { - /* remove an element from the list */ - if (w == *tail) - { - if (*head == *tail) - { - /* removing only element in the list */ - *head = NULL; - *tail = NULL; - } - else - { - /* removing last item in the list */ - *tail = (*tail)->prev; - (*tail)->next = NULL; - RF_ASSERT((*head)->prev == NULL); - RF_ASSERT((*tail)->next == NULL); - } - } - else - { - if (w == *head) - { - /* removing first item in the list */ - *head = (*head)->next; - (*head)->prev = NULL; - RF_ASSERT((*head)->prev == NULL); - RF_ASSERT((*tail)->next == NULL); - } - else - { - /* removing an item from the middle of the list */ - w->prev->next = w->next; - w->next->prev = w->prev; - RF_ASSERT((*head)->prev == NULL); - RF_ASSERT((*tail)->next == NULL); - } - } - w->prev = NULL; - w->next = NULL; - if (rf_parityLogDebug) - printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n",w->regionID,(int)w->diskAddress.raidAddress,(int) w->diskAddress.numSector); - return(w); + RF_ParityLogData_t *w; + + /* Remove and return an in-core parity log from a specified region + * (regionID). If a matching log is not found, return NULL. + * + * NON-BLOCKING. */ + + /* walk backward through a list, looking for an entry with a matching + * region ID */ + if (!ignoreLocks) + RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + w = (*tail); + while (w) { + if (w->regionID == regionID) { + /* remove an element from the list */ + if (w == *tail) { + if (*head == *tail) { + /* removing only element in the list */ + *head = NULL; + *tail = NULL; + } else { + /* removing last item in the list */ + *tail = (*tail)->prev; + (*tail)->next = NULL; + RF_ASSERT((*head)->prev == NULL); + RF_ASSERT((*tail)->next == NULL); + } + } else { + if (w == *head) { + /* removing first item in the list */ + *head = (*head)->next; + (*head)->prev = NULL; + RF_ASSERT((*head)->prev == NULL); + RF_ASSERT((*tail)->next == NULL); + } else { + /* removing an item from the middle of + * the list */ + w->prev->next = w->next; + w->next->prev = w->prev; + RF_ASSERT((*head)->prev == NULL); + RF_ASSERT((*tail)->next == NULL); + } + } + w->prev = NULL; + w->next = NULL; + if (rf_parityLogDebug) + printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector); + return (w); + } else + w = w->prev; } - else - w = w->prev; - } - if (!ignoreLocks) - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - return(NULL); + if (!ignoreLocks) + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + return (NULL); } -static RF_ParityLogData_t *DequeueMatchingLogData( - RF_Raid_t *raidPtr, - RF_ParityLogData_t **head, - RF_ParityLogData_t **tail) +static RF_ParityLogData_t * +DequeueMatchingLogData( + RF_Raid_t * raidPtr, + RF_ParityLogData_t ** head, + RF_ParityLogData_t ** tail) { - RF_ParityLogData_t *logDataList, *logData; - int regionID; - - /* Remove and return an in-core parity log from the tail of - a disk queue (*head, *tail). Then remove all matching - (identical regionIDs) logData and return as a linked list. - - NON-BLOCKING - */ - - logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE); - if (logDataList) - { - regionID = logDataList->regionID; - logData = logDataList; - logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); - while (logData->next) - { - logData = logData->next; - logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); + RF_ParityLogData_t *logDataList, *logData; + int regionID; + + /* Remove and return an in-core parity log from the tail of a disk + * queue (*head, *tail). Then remove all matching (identical + * regionIDs) logData and return as a linked list. + * + * NON-BLOCKING */ + + logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE); + if (logDataList) { + regionID = logDataList->regionID; + logData = logDataList; + logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); + while (logData->next) { + logData = logData->next; + logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); + } } - } - return(logDataList); + return (logDataList); } -static RF_ParityLog_t *AcquireParityLog( - RF_ParityLogData_t *logData, - int finish) +static RF_ParityLog_t * +AcquireParityLog( + RF_ParityLogData_t * logData, + int finish) { - RF_ParityLog_t *log = NULL; - RF_Raid_t *raidPtr; - - /* Grab a log buffer from the pool and return it. - If no buffers are available, return NULL. - NON-BLOCKING - */ - raidPtr = logData->common->raidPtr; - RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); - if (raidPtr->parityLogPool.parityLogs) - { - log = raidPtr->parityLogPool.parityLogs; - raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next; - log->regionID = logData->regionID; - log->numRecords = 0; - log->next = NULL; - raidPtr->logsInUse++; - RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); - } - else - { - /* no logs available, so place ourselves on the queue of work waiting on log buffers - this is done while parityLogPool.mutex is held, to ensure synchronization - with ReleaseParityLogs. - */ - if (rf_parityLogDebug) - printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish); - if (finish) - RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); - else - EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); - } - RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); - return(log); + RF_ParityLog_t *log = NULL; + RF_Raid_t *raidPtr; + + /* Grab a log buffer from the pool and return it. If no buffers are + * available, return NULL. NON-BLOCKING */ + raidPtr = logData->common->raidPtr; + RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); + if (raidPtr->parityLogPool.parityLogs) { + log = raidPtr->parityLogPool.parityLogs; + raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next; + log->regionID = logData->regionID; + log->numRecords = 0; + log->next = NULL; + raidPtr->logsInUse++; + RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); + } else { + /* no logs available, so place ourselves on the queue of work + * waiting on log buffers this is done while + * parityLogPool.mutex is held, to ensure synchronization with + * ReleaseParityLogs. */ + if (rf_parityLogDebug) + printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish); + if (finish) + RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); + else + EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); + } + RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); + return (log); } -void rf_ReleaseParityLogs( - RF_Raid_t *raidPtr, - RF_ParityLog_t *firstLog) +void +rf_ReleaseParityLogs( + RF_Raid_t * raidPtr, + RF_ParityLog_t * firstLog) { - RF_ParityLogData_t *logDataList; - RF_ParityLog_t *log, *lastLog; - int cnt; - - /* Insert a linked list of parity logs (firstLog) to - the free list (parityLogPool.parityLogPool) - - NON-BLOCKING. - */ - - RF_ASSERT(firstLog); - - /* Before returning logs to global free list, service all - requests which are blocked on logs. Holding mutexes for parityLogPool and parityLogDiskQueue - forces synchronization with AcquireParityLog(). - */ - RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); - log = firstLog; - if (firstLog) - firstLog = firstLog->next; - log->numRecords = 0; - log->next = NULL; - while (logDataList && log) - { - RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE); - if (rf_parityLogDebug) - printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID); - if (log == NULL) - { - log = firstLog; - if (firstLog) - { - firstLog = firstLog->next; - log->numRecords = 0; - log->next = NULL; - } - } - RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - if (log) + RF_ParityLogData_t *logDataList; + RF_ParityLog_t *log, *lastLog; + int cnt; + + /* Insert a linked list of parity logs (firstLog) to the free list + * (parityLogPool.parityLogPool) + * + * NON-BLOCKING. */ + + RF_ASSERT(firstLog); + + /* Before returning logs to global free list, service all requests + * which are blocked on logs. Holding mutexes for parityLogPool and + * parityLogDiskQueue forces synchronization with AcquireParityLog(). */ + RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); + RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); - } - /* return remaining logs to pool */ - if (log) - { - log->next = firstLog; - firstLog = log; - } - if (firstLog) - { - lastLog = firstLog; - raidPtr->logsInUse--; - RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); - while (lastLog->next) - { - lastLog = lastLog->next; - raidPtr->logsInUse--; - RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); + log = firstLog; + if (firstLog) + firstLog = firstLog->next; + log->numRecords = 0; + log->next = NULL; + while (logDataList && log) { + RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE); + if (rf_parityLogDebug) + printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID); + if (log == NULL) { + log = firstLog; + if (firstLog) { + firstLog = firstLog->next; + log->numRecords = 0; + log->next = NULL; + } + } + RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); + RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + if (log) + logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); } - lastLog->next = raidPtr->parityLogPool.parityLogs; - raidPtr->parityLogPool.parityLogs = firstLog; - cnt = 0; - log = raidPtr->parityLogPool.parityLogs; - while (log) - { - cnt++; - log = log->next; + /* return remaining logs to pool */ + if (log) { + log->next = firstLog; + firstLog = log; } - RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs); - } - RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + if (firstLog) { + lastLog = firstLog; + raidPtr->logsInUse--; + RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); + while (lastLog->next) { + lastLog = lastLog->next; + raidPtr->logsInUse--; + RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); + } + lastLog->next = raidPtr->parityLogPool.parityLogs; + raidPtr->parityLogPool.parityLogs = firstLog; + cnt = 0; + log = raidPtr->parityLogPool.parityLogs; + while (log) { + cnt++; + log = log->next; + } + RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs); + } + RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); } -static void ReintLog( - RF_Raid_t *raidPtr, - int regionID, - RF_ParityLog_t *log) +static void +ReintLog( + RF_Raid_t * raidPtr, + int regionID, + RF_ParityLog_t * log) { - RF_ASSERT(log); - - /* Insert an in-core parity log (log) into the disk queue of reintegration - work. Set the flag (reintInProgress) for the specified region (regionID) - to indicate that reintegration is in progress for this region. - NON-BLOCKING - */ - - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); - raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint complete */ - - if (rf_parityLogDebug) - printf("[requesting reintegration of region %d]\n", log->regionID); - /* move record to reintegration queue */ - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - log->next = raidPtr->parityLogDiskQueue.reintQueue; - raidPtr->parityLogDiskQueue.reintQueue = log; - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); + RF_ASSERT(log); + + /* Insert an in-core parity log (log) into the disk queue of + * reintegration work. Set the flag (reintInProgress) for the + * specified region (regionID) to indicate that reintegration is in + * progress for this region. NON-BLOCKING */ + + RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); + raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint + * complete */ + + if (rf_parityLogDebug) + printf("[requesting reintegration of region %d]\n", log->regionID); + /* move record to reintegration queue */ + RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + log->next = raidPtr->parityLogDiskQueue.reintQueue; + raidPtr->parityLogDiskQueue.reintQueue = log; + RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); } -static void FlushLog( - RF_Raid_t *raidPtr, - RF_ParityLog_t *log) +static void +FlushLog( + RF_Raid_t * raidPtr, + RF_ParityLog_t * log) { - /* insert a core log (log) into a list of logs (parityLogDiskQueue.flushQueue) - waiting to be written to disk. - NON-BLOCKING - */ - - RF_ASSERT(log); - RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); - RF_ASSERT(log->next == NULL); - /* move log to flush queue */ - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - log->next = raidPtr->parityLogDiskQueue.flushQueue; - raidPtr->parityLogDiskQueue.flushQueue = log; - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); + /* insert a core log (log) into a list of logs + * (parityLogDiskQueue.flushQueue) waiting to be written to disk. + * NON-BLOCKING */ + + RF_ASSERT(log); + RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); + RF_ASSERT(log->next == NULL); + /* move log to flush queue */ + RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + log->next = raidPtr->parityLogDiskQueue.flushQueue; + raidPtr->parityLogDiskQueue.flushQueue = log; + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); } -static int DumpParityLogToDisk( - int finish, - RF_ParityLogData_t *logData) +static int +DumpParityLogToDisk( + int finish, + RF_ParityLogData_t * logData) { - int i, diskCount, regionID = logData->regionID; - RF_ParityLog_t *log; - RF_Raid_t *raidPtr; - - raidPtr = logData->common->raidPtr; - - /* Move a core log to disk. If the log disk is full, initiate - reintegration. - - Return (0) if we can enqueue the dump immediately, otherwise - return (1) to indicate we are blocked on reintegration and - control of the thread should be relinquished. - - Caller must hold regionInfo[regionID].mutex - - NON-BLOCKING - */ - - if (rf_parityLogDebug) - printf("[dumping parity log to disk, region %d]\n", regionID); - log = raidPtr->regionInfo[regionID].coreLog; - RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); - RF_ASSERT(log->next == NULL); - - /* if reintegration is in progress, must queue work */ - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); - if (raidPtr->regionInfo[regionID].reintInProgress) - { - /* Can not proceed since this region is currently being reintegrated. - We can not block, so queue remaining work and return */ - if (rf_parityLogDebug) - printf("[region %d waiting on reintegration]\n",regionID); - /* XXX not sure about the use of finish - shouldn't this always be "Enqueue"? */ - if (finish) - RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); - else - EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); - return(1); /* relenquish control of this thread */ - } - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); - raidPtr->regionInfo[regionID].coreLog = NULL; - if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity) - /* IMPORTANT!! this loop bound assumes region disk holds an integral number of core logs */ - { - /* update disk map for this region */ - diskCount = raidPtr->regionInfo[regionID].diskCount; - for (i = 0; i < raidPtr->numSectorsPerLog; i++) + int i, diskCount, regionID = logData->regionID; + RF_ParityLog_t *log; + RF_Raid_t *raidPtr; + + raidPtr = logData->common->raidPtr; + + /* Move a core log to disk. If the log disk is full, initiate + * reintegration. + * + * Return (0) if we can enqueue the dump immediately, otherwise return + * (1) to indicate we are blocked on reintegration and control of the + * thread should be relinquished. + * + * Caller must hold regionInfo[regionID].mutex + * + * NON-BLOCKING */ + + if (rf_parityLogDebug) + printf("[dumping parity log to disk, region %d]\n", regionID); + log = raidPtr->regionInfo[regionID].coreLog; + RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); + RF_ASSERT(log->next == NULL); + + /* if reintegration is in progress, must queue work */ + RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); + if (raidPtr->regionInfo[regionID].reintInProgress) { + /* Can not proceed since this region is currently being + * reintegrated. We can not block, so queue remaining work and + * return */ + if (rf_parityLogDebug) + printf("[region %d waiting on reintegration]\n", regionID); + /* XXX not sure about the use of finish - shouldn't this + * always be "Enqueue"? */ + if (finish) + RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); + else + EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); + RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); + return (1); /* relenquish control of this thread */ + } + RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); + raidPtr->regionInfo[regionID].coreLog = NULL; + if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity) + /* IMPORTANT!! this loop bound assumes region disk holds an + * integral number of core logs */ { - raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation; - raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr; + /* update disk map for this region */ + diskCount = raidPtr->regionInfo[regionID].diskCount; + for (i = 0; i < raidPtr->numSectorsPerLog; i++) { + raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation; + raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr; + } + log->diskOffset = diskCount; + raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog; + FlushLog(raidPtr, log); + } else { + /* no room for log on disk, send it to disk manager and + * request reintegration */ + RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity); + ReintLog(raidPtr, regionID, log); } - log->diskOffset = diskCount; - raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog; - FlushLog(raidPtr, log); - } - else - { - /* no room for log on disk, send it to disk manager and request reintegration */ - RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity); - ReintLog(raidPtr, regionID, log); - } - if (rf_parityLogDebug) - printf("[finished dumping parity log to disk, region %d]\n", regionID); - return(0); + if (rf_parityLogDebug) + printf("[finished dumping parity log to disk, region %d]\n", regionID); + return (0); } -int rf_ParityLogAppend( - RF_ParityLogData_t *logData, - int finish, - RF_ParityLog_t **incomingLog, - int clearReintFlag) +int +rf_ParityLogAppend( + RF_ParityLogData_t * logData, + int finish, + RF_ParityLog_t ** incomingLog, + int clearReintFlag) { - int regionID, logItem, itemDone; - RF_ParityLogData_t *item; - int punt, done = RF_FALSE; - RF_ParityLog_t *log; - RF_Raid_t *raidPtr; - RF_Etimer_t timer; - int (*wakeFunc)(RF_DagNode_t *node, int status); - void *wakeArg; - - /* Add parity to the appropriate log, one sector at a time. - This routine is called is called by dag functions ParityLogUpdateFunc - and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING. - - Parity to be logged is contained in a linked-list (logData). When - this routine returns, every sector in the list will be in one of - three places: - 1) entered into the parity log - 2) queued, waiting on reintegration - 3) queued, waiting on a core log - - Blocked work is passed to the ParityLoggingDiskManager for completion. - Later, as conditions which required the block are removed, the work - reenters this routine with the "finish" parameter set to "RF_TRUE." - - NON-BLOCKING - */ - - raidPtr = logData->common->raidPtr; - /* lock the region for the first item in logData */ - RF_ASSERT(logData != NULL); - regionID = logData->regionID; - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); - - if (clearReintFlag) - { - /* Enable flushing for this region. Holding both locks provides - a synchronization barrier with DumpParityLogToDisk - */ - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE); - raidPtr->regionInfo[regionID].diskCount = 0; - raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now enabled */ - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - } - - /* process each item in logData */ - while (logData) - { - /* remove an item from logData */ - item = logData; - logData = logData->next; - item->next = NULL; - item->prev = NULL; - - if (rf_parityLogDebug) - printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n",item->regionID,(int)item->diskAddress.raidAddress, (int)item->diskAddress.numSector); - - /* see if we moved to a new region */ - if (regionID != item->regionID) - { - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - regionID = item->regionID; - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); + int regionID, logItem, itemDone; + RF_ParityLogData_t *item; + int punt, done = RF_FALSE; + RF_ParityLog_t *log; + RF_Raid_t *raidPtr; + RF_Etimer_t timer; + int (*wakeFunc) (RF_DagNode_t * node, int status); + void *wakeArg; + + /* Add parity to the appropriate log, one sector at a time. This + * routine is called is called by dag functions ParityLogUpdateFunc + * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING. + * + * Parity to be logged is contained in a linked-list (logData). When + * this routine returns, every sector in the list will be in one of + * three places: 1) entered into the parity log 2) queued, waiting on + * reintegration 3) queued, waiting on a core log + * + * Blocked work is passed to the ParityLoggingDiskManager for completion. + * Later, as conditions which required the block are removed, the work + * reenters this routine with the "finish" parameter set to "RF_TRUE." + * + * NON-BLOCKING */ + + raidPtr = logData->common->raidPtr; + /* lock the region for the first item in logData */ + RF_ASSERT(logData != NULL); + regionID = logData->regionID; + RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); + RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); + + if (clearReintFlag) { + /* Enable flushing for this region. Holding both locks + * provides a synchronization barrier with DumpParityLogToDisk */ + RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); + RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE); + raidPtr->regionInfo[regionID].diskCount = 0; + raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; + RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now + * enabled */ + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); } - - punt = RF_FALSE; /* Set to RF_TRUE if work is blocked. This can happen in one of two ways: - 1) no core log (AcquireParityLog) - 2) waiting on reintegration (DumpParityLogToDisk) - If punt is RF_TRUE, the dataItem was queued, so skip to next item. - */ - - /* process item, one sector at a time, until all sectors processed or we punt */ - if (item->diskAddress.numSector > 0) - done = RF_FALSE; - else - RF_ASSERT(0); - while (!punt && !done) - { - /* verify that a core log exists for this region */ - if (!raidPtr->regionInfo[regionID].coreLog) - { - /* Attempt to acquire a parity log. - If acquisition fails, queue remaining work in data item and move to nextItem. - */ - if (incomingLog) { - if (*incomingLog) - { - RF_ASSERT((*incomingLog)->next == NULL); - raidPtr->regionInfo[regionID].coreLog = *incomingLog; - raidPtr->regionInfo[regionID].coreLog->regionID = regionID; - *incomingLog = NULL; - } + /* process each item in logData */ + while (logData) { + /* remove an item from logData */ + item = logData; + logData = logData->next; + item->next = NULL; + item->prev = NULL; + + if (rf_parityLogDebug) + printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector); + + /* see if we moved to a new region */ + if (regionID != item->regionID) { + RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); + regionID = item->regionID; + RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); + RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); + } + punt = RF_FALSE;/* Set to RF_TRUE if work is blocked. This + * can happen in one of two ways: 1) no core + * log (AcquireParityLog) 2) waiting on + * reintegration (DumpParityLogToDisk) If punt + * is RF_TRUE, the dataItem was queued, so + * skip to next item. */ + + /* process item, one sector at a time, until all sectors + * processed or we punt */ + if (item->diskAddress.numSector > 0) + done = RF_FALSE; else - raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); - } else - raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); - /* Note: AcquireParityLog either returns a log or enqueues currentItem */ - } - if (!raidPtr->regionInfo[regionID].coreLog) - punt = RF_TRUE; /* failed to find a core log */ - else - { - RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); - /* verify that the log has room for new entries */ - /* if log is full, dump it to disk and grab a new log */ - if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) - { - /* log is full, dump it to disk */ - if (DumpParityLogToDisk(finish, item)) - punt = RF_TRUE; /* dump unsuccessful, blocked on reintegration */ - else - { - /* dump was successful */ - if (incomingLog) { - if (*incomingLog) - { - RF_ASSERT((*incomingLog)->next == NULL); - raidPtr->regionInfo[regionID].coreLog = *incomingLog; - raidPtr->regionInfo[regionID].coreLog->regionID = regionID; - *incomingLog = NULL; - } - else - raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); - } else - raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); - /* if a core log is not available, must queue work and return */ - if (!raidPtr->regionInfo[regionID].coreLog) - punt = RF_TRUE; /* blocked on log availability */ - } + RF_ASSERT(0); + while (!punt && !done) { + /* verify that a core log exists for this region */ + if (!raidPtr->regionInfo[regionID].coreLog) { + /* Attempt to acquire a parity log. If + * acquisition fails, queue remaining work in + * data item and move to nextItem. */ + if (incomingLog) { + if (*incomingLog) { + RF_ASSERT((*incomingLog)->next == NULL); + raidPtr->regionInfo[regionID].coreLog = *incomingLog; + raidPtr->regionInfo[regionID].coreLog->regionID = regionID; + *incomingLog = NULL; + } else + raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); + } else + raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); + /* Note: AcquireParityLog either returns a log + * or enqueues currentItem */ + } + if (!raidPtr->regionInfo[regionID].coreLog) + punt = RF_TRUE; /* failed to find a core log */ + else { + RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); + /* verify that the log has room for new + * entries */ + /* if log is full, dump it to disk and grab a + * new log */ + if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) { + /* log is full, dump it to disk */ + if (DumpParityLogToDisk(finish, item)) + punt = RF_TRUE; /* dump unsuccessful, + * blocked on + * reintegration */ + else { + /* dump was successful */ + if (incomingLog) { + if (*incomingLog) { + RF_ASSERT((*incomingLog)->next == NULL); + raidPtr->regionInfo[regionID].coreLog = *incomingLog; + raidPtr->regionInfo[regionID].coreLog->regionID = regionID; + *incomingLog = NULL; + } else + raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); + } else + raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); + /* if a core log is not + * available, must queue work + * and return */ + if (!raidPtr->regionInfo[regionID].coreLog) + punt = RF_TRUE; /* blocked on log + * availability */ + } + } + } + /* if we didn't punt on this item, attempt to add a + * sector to the core log */ + if (!punt) { + RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); + /* at this point, we have a core log with + * enough room for a sector */ + /* copy a sector into the log */ + log = raidPtr->regionInfo[regionID].coreLog; + RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog); + logItem = log->numRecords++; + log->records[logItem].parityAddr = item->diskAddress; + RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr); + RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity); + log->records[logItem].parityAddr.numSector = 1; + log->records[logItem].operation = item->common->operation; + bcopy((item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (1 << item->common->raidPtr->logBytesPerSector)); + item->diskAddress.numSector--; + item->diskAddress.startSector++; + if (item->diskAddress.numSector == 0) + done = RF_TRUE; + } } - } - /* if we didn't punt on this item, attempt to add a sector to the core log */ - if (!punt) - { - RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); - /* at this point, we have a core log with enough room for a sector */ - /* copy a sector into the log */ - log = raidPtr->regionInfo[regionID].coreLog; - RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog); - logItem = log->numRecords++; - log->records[logItem].parityAddr = item->diskAddress; - RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr); - RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity); - log->records[logItem].parityAddr.numSector = 1; - log->records[logItem].operation = item->common->operation; - bcopy((item->common->bufPtr + (item->bufOffset++ * (1<<item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1<<item->common->raidPtr->logBytesPerSector)), (1<<item->common->raidPtr->logBytesPerSector)); - item->diskAddress.numSector--; - item->diskAddress.startSector++; - if (item->diskAddress.numSector == 0) - done = RF_TRUE; - } - } - if (!punt) - { - /* Processed this item completely, decrement count of items - to be processed. - */ - RF_ASSERT(item->diskAddress.numSector == 0); - RF_LOCK_MUTEX(item->common->mutex); - item->common->cnt--; - if (item->common->cnt == 0) - itemDone = RF_TRUE; - else - itemDone = RF_FALSE; - RF_UNLOCK_MUTEX(item->common->mutex); - if (itemDone) - { - /* Finished processing all log data for this IO - Return structs to free list and invoke wakeup function. - */ - timer = item->common->startTime; /* grab initial value of timer */ - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer); - if (rf_parityLogDebug) - printf("[waking process for region %d]\n", item->regionID); - wakeFunc = item->common->wakeFunc; - wakeArg = item->common->wakeArg; - FreeParityLogCommonData(item->common); - FreeParityLogData(item); - (wakeFunc)(wakeArg, 0); - } - else - FreeParityLogData(item); + if (!punt) { + /* Processed this item completely, decrement count of + * items to be processed. */ + RF_ASSERT(item->diskAddress.numSector == 0); + RF_LOCK_MUTEX(item->common->mutex); + item->common->cnt--; + if (item->common->cnt == 0) + itemDone = RF_TRUE; + else + itemDone = RF_FALSE; + RF_UNLOCK_MUTEX(item->common->mutex); + if (itemDone) { + /* Finished processing all log data for this + * IO Return structs to free list and invoke + * wakeup function. */ + timer = item->common->startTime; /* grab initial value of + * timer */ + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer); + if (rf_parityLogDebug) + printf("[waking process for region %d]\n", item->regionID); + wakeFunc = item->common->wakeFunc; + wakeArg = item->common->wakeArg; + FreeParityLogCommonData(item->common); + FreeParityLogData(item); + (wakeFunc) (wakeArg, 0); + } else + FreeParityLogData(item); + } } - } - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - if (rf_parityLogDebug) - printf("[exiting ParityLogAppend]\n"); - return(0); + RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); + if (rf_parityLogDebug) + printf("[exiting ParityLogAppend]\n"); + return (0); } -void rf_EnableParityLogging(RF_Raid_t *raidPtr) +void +rf_EnableParityLogging(RF_Raid_t * raidPtr) { - int regionID; - - for (regionID = 0; regionID < rf_numParityRegions; regionID++) { - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE; - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - } - if (rf_parityLogDebug) - printf("[parity logging enabled]\n"); -} + int regionID; -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ + for (regionID = 0; regionID < rf_numParityRegions; regionID++) { + RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); + raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE; + RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); + } + if (rf_parityLogDebug) + printf("[parity logging enabled]\n"); +} +#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ diff --git a/sys/dev/raidframe/rf_paritylog.h b/sys/dev/raidframe/rf_paritylog.h index fd6128174e1..43c5711c666 100644 --- a/sys/dev/raidframe/rf_paritylog.h +++ b/sys/dev/raidframe/rf_paritylog.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_paritylog.h,v 1.1 1999/01/11 14:29:34 niklas Exp $ */ -/* $NetBSD: rf_paritylog.h,v 1.1 1998/11/13 04:20:31 oster Exp $ */ +/* $OpenBSD: rf_paritylog.h,v 1.2 1999/02/16 00:03:05 niklas Exp $ */ +/* $NetBSD: rf_paritylog.h,v 1.3 1999/02/05 00:06:14 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,70 +29,6 @@ /* header file for parity log * - * : - * Log: rf_paritylog.h,v - * Revision 1.21 1996/07/17 21:00:58 jimz - * clean up timer interface, tracing - * - * Revision 1.20 1996/07/15 17:22:18 jimz - * nit-pick code cleanup - * resolve stdlib problems on DEC OSF - * - * Revision 1.19 1996/06/11 10:17:57 jimz - * definitions and run state for parity logging thread - * - * Revision 1.18 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.17 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.16 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.15 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.14 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.13 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.12 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.11 1995/12/06 20:54:58 wvcii - * added prototyping - * - * Revision 1.10 1995/11/30 16:05:50 wvcii - * added copyright info - * - * Revision 1.9 1995/10/07 05:09:27 wvcii - * removed #define BYTESPERSECTOR 512 - * - * Revision 1.8 1995/09/06 19:27:52 wvcii - * added startTime to commonLogData - * - * Revision 1.7 1995/07/07 00:13:42 wvcii - * this version free from deadlock, fails parity verification - * */ #ifndef _RF__RF_PARITYLOG_H_ @@ -105,121 +41,141 @@ typedef int RF_RegionId_t; typedef enum RF_ParityRecordType_e { - RF_STOP, - RF_UPDATE, - RF_OVERWRITE -} RF_ParityRecordType_t; + RF_STOP, + RF_UPDATE, + RF_OVERWRITE +} RF_ParityRecordType_t; struct RF_CommonLogData_s { - RF_DECLARE_MUTEX(mutex) /* protects cnt */ - int cnt; /* when 0, time to call wakeFunc */ - RF_Raid_t *raidPtr; + RF_DECLARE_MUTEX(mutex) /* protects cnt */ + int cnt; /* when 0, time to call wakeFunc */ + RF_Raid_t *raidPtr; /* int (*wakeFunc)(struct buf *); */ - int (*wakeFunc)(RF_DagNode_t *node, int status); - void *wakeArg; - RF_AccTraceEntry_t *tracerec; - RF_Etimer_t startTime; - caddr_t bufPtr; - RF_ParityRecordType_t operation; - RF_CommonLogData_t *next; + int (*wakeFunc) (RF_DagNode_t * node, int status); + void *wakeArg; + RF_AccTraceEntry_t *tracerec; + RF_Etimer_t startTime; + caddr_t bufPtr; + RF_ParityRecordType_t operation; + RF_CommonLogData_t *next; }; struct RF_ParityLogData_s { - RF_RegionId_t regionID; /* this struct guaranteed to span a single region */ - int bufOffset; /* offset from common->bufPtr */ - RF_PhysDiskAddr_t diskAddress; - RF_CommonLogData_t *common; /* info shared by one or more parityLogData structs */ - RF_ParityLogData_t *next; - RF_ParityLogData_t *prev; + RF_RegionId_t regionID; /* this struct guaranteed to span a single + * region */ + int bufOffset; /* offset from common->bufPtr */ + RF_PhysDiskAddr_t diskAddress; + RF_CommonLogData_t *common; /* info shared by one or more + * parityLogData structs */ + RF_ParityLogData_t *next; + RF_ParityLogData_t *prev; }; struct RF_ParityLogAppendQueue_s { - RF_DECLARE_MUTEX(mutex) + RF_DECLARE_MUTEX(mutex) }; struct RF_ParityLogRecord_s { - RF_PhysDiskAddr_t parityAddr; - RF_ParityRecordType_t operation; + RF_PhysDiskAddr_t parityAddr; + RF_ParityRecordType_t operation; }; struct RF_ParityLog_s { - RF_RegionId_t regionID; - int numRecords; - int diskOffset; - RF_ParityLogRecord_t *records; - caddr_t bufPtr; - RF_ParityLog_t *next; + RF_RegionId_t regionID; + int numRecords; + int diskOffset; + RF_ParityLogRecord_t *records; + caddr_t bufPtr; + RF_ParityLog_t *next; }; struct RF_ParityLogQueue_s { - RF_DECLARE_MUTEX(mutex) - RF_ParityLog_t *parityLogs; + RF_DECLARE_MUTEX(mutex) + RF_ParityLog_t *parityLogs; }; struct RF_RegionBufferQueue_s { - RF_DECLARE_MUTEX(mutex) - RF_DECLARE_COND(cond) - int bufferSize; - int totalBuffers; /* size of array 'buffers' */ - int availableBuffers; /* num available 'buffers' */ - int emptyBuffersIndex; /* stick next freed buffer here */ - int availBuffersIndex; /* grab next buffer from here */ - caddr_t *buffers; /* array buffers used to hold parity */ + RF_DECLARE_MUTEX(mutex) + RF_DECLARE_COND(cond) + int bufferSize; + int totalBuffers; /* size of array 'buffers' */ + int availableBuffers; /* num available 'buffers' */ + int emptyBuffersIndex; /* stick next freed buffer here */ + int availBuffersIndex; /* grab next buffer from here */ + caddr_t *buffers; /* array buffers used to hold parity */ }; - -#define RF_PLOG_CREATED (1<<0) /* thread is created */ -#define RF_PLOG_RUNNING (1<<1) /* thread is running */ -#define RF_PLOG_TERMINATE (1<<2) /* thread is terminated (should exit) */ -#define RF_PLOG_SHUTDOWN (1<<3) /* thread is aware and exiting/exited */ +#define RF_PLOG_CREATED (1<<0)/* thread is created */ +#define RF_PLOG_RUNNING (1<<1)/* thread is running */ +#define RF_PLOG_TERMINATE (1<<2)/* thread is terminated (should exit) */ +#define RF_PLOG_SHUTDOWN (1<<3)/* thread is aware and exiting/exited */ struct RF_ParityLogDiskQueue_s { - RF_DECLARE_MUTEX(mutex) /* protects all vars in this struct */ - RF_DECLARE_COND(cond) - int threadState; /* is thread running, should it shutdown (see above) */ - RF_ParityLog_t *flushQueue; /* list of parity logs to be flushed to log disk */ - RF_ParityLog_t *reintQueue; /* list of parity logs waiting to be reintegrated */ - RF_ParityLogData_t *bufHead; /* head of FIFO list of log data, waiting on a buffer */ - RF_ParityLogData_t *bufTail; /* tail of FIFO list of log data, waiting on a buffer */ - RF_ParityLogData_t *reintHead; /* head of FIFO list of log data, waiting on reintegration */ - RF_ParityLogData_t *reintTail; /* tail of FIFO list of log data, waiting on reintegration */ - RF_ParityLogData_t *logBlockHead; /* queue of work, blocked until a log is available */ - RF_ParityLogData_t *logBlockTail; - RF_ParityLogData_t *reintBlockHead; /* queue of work, blocked until reintegration is complete */ - RF_ParityLogData_t *reintBlockTail; - RF_CommonLogData_t *freeCommonList; /* list of unused common data structs */ - RF_ParityLogData_t *freeDataList; /* list of unused log data structs */ + RF_DECLARE_MUTEX(mutex) /* protects all vars in this struct */ + RF_DECLARE_COND(cond) + int threadState; /* is thread running, should it shutdown (see + * above) */ + RF_ParityLog_t *flushQueue; /* list of parity logs to be flushed + * to log disk */ + RF_ParityLog_t *reintQueue; /* list of parity logs waiting to be + * reintegrated */ + RF_ParityLogData_t *bufHead; /* head of FIFO list of log data, + * waiting on a buffer */ + RF_ParityLogData_t *bufTail; /* tail of FIFO list of log data, + * waiting on a buffer */ + RF_ParityLogData_t *reintHead; /* head of FIFO list of log data, + * waiting on reintegration */ + RF_ParityLogData_t *reintTail; /* tail of FIFO list of log data, + * waiting on reintegration */ + RF_ParityLogData_t *logBlockHead; /* queue of work, blocked + * until a log is available */ + RF_ParityLogData_t *logBlockTail; + RF_ParityLogData_t *reintBlockHead; /* queue of work, blocked + * until reintegration is + * complete */ + RF_ParityLogData_t *reintBlockTail; + RF_CommonLogData_t *freeCommonList; /* list of unused common data + * structs */ + RF_ParityLogData_t *freeDataList; /* list of unused log data + * structs */ }; struct RF_DiskMap_s { - RF_PhysDiskAddr_t parityAddr; - RF_ParityRecordType_t operation; + RF_PhysDiskAddr_t parityAddr; + RF_ParityRecordType_t operation; }; struct RF_RegionInfo_s { - RF_DECLARE_MUTEX(mutex) /* protects: diskCount, diskMap, loggingEnabled, coreLog */ - RF_DECLARE_MUTEX(reintMutex) /* protects: reintInProgress */ - int reintInProgress; /* flag used to suspend flushing operations */ - RF_SectorCount_t capacity; /* capacity of this region in sectors */ - RF_SectorNum_t regionStartAddr; /* starting disk address for this region */ - RF_SectorNum_t parityStartAddr; /* starting disk address for this region */ - RF_SectorCount_t numSectorsParity; /* number of parity sectors protected by this region */ - RF_SectorCount_t diskCount; /* num of sectors written to this region's disk log */ - RF_DiskMap_t *diskMap; /* in-core map of what's in this region's disk log */ - int loggingEnabled; /* logging enable for this region */ - RF_ParityLog_t *coreLog; /* in-core log for this region */ + RF_DECLARE_MUTEX(mutex) /* protects: diskCount, diskMap, + * loggingEnabled, coreLog */ + RF_DECLARE_MUTEX(reintMutex) /* protects: reintInProgress */ + int reintInProgress;/* flag used to suspend flushing operations */ + RF_SectorCount_t capacity; /* capacity of this region in sectors */ + RF_SectorNum_t regionStartAddr; /* starting disk address for this + * region */ + RF_SectorNum_t parityStartAddr; /* starting disk address for this + * region */ + RF_SectorCount_t numSectorsParity; /* number of parity sectors + * protected by this region */ + RF_SectorCount_t diskCount; /* num of sectors written to this + * region's disk log */ + RF_DiskMap_t *diskMap; /* in-core map of what's in this region's disk + * log */ + int loggingEnabled; /* logging enable for this region */ + RF_ParityLog_t *coreLog;/* in-core log for this region */ }; -RF_ParityLogData_t *rf_CreateParityLogData(RF_ParityRecordType_t operation, - RF_PhysDiskAddr_t *pda, caddr_t bufPtr, RF_Raid_t *raidPtr, - int (*wakeFunc)(RF_DagNode_t *node, int status), - void *wakeArg, RF_AccTraceEntry_t *tracerec, - RF_Etimer_t startTime); -RF_ParityLogData_t *rf_SearchAndDequeueParityLogData(RF_Raid_t *raidPtr, - RF_RegionId_t regionID, RF_ParityLogData_t **head, - RF_ParityLogData_t **tail, int ignoreLocks); -void rf_ReleaseParityLogs(RF_Raid_t *raidPtr, RF_ParityLog_t *firstLog); -int rf_ParityLogAppend(RF_ParityLogData_t *logData, int finish, - RF_ParityLog_t **incomingLog, int clearReintFlag); -void rf_EnableParityLogging(RF_Raid_t *raidPtr); - -#endif /* !_RF__RF_PARITYLOG_H_ */ +RF_ParityLogData_t * +rf_CreateParityLogData(RF_ParityRecordType_t operation, + RF_PhysDiskAddr_t * pda, caddr_t bufPtr, RF_Raid_t * raidPtr, + int (*wakeFunc) (RF_DagNode_t * node, int status), + void *wakeArg, RF_AccTraceEntry_t * tracerec, + RF_Etimer_t startTime); + RF_ParityLogData_t *rf_SearchAndDequeueParityLogData(RF_Raid_t * raidPtr, + RF_RegionId_t regionID, RF_ParityLogData_t ** head, + RF_ParityLogData_t ** tail, int ignoreLocks); + void rf_ReleaseParityLogs(RF_Raid_t * raidPtr, RF_ParityLog_t * firstLog); + int rf_ParityLogAppend(RF_ParityLogData_t * logData, int finish, + RF_ParityLog_t ** incomingLog, int clearReintFlag); + void rf_EnableParityLogging(RF_Raid_t * raidPtr); + +#endif /* !_RF__RF_PARITYLOG_H_ */ diff --git a/sys/dev/raidframe/rf_paritylogDiskMgr.c b/sys/dev/raidframe/rf_paritylogDiskMgr.c index 92079d5ec26..624b1b3609f 100644 --- a/sys/dev/raidframe/rf_paritylogDiskMgr.c +++ b/sys/dev/raidframe/rf_paritylogDiskMgr.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_paritylogDiskMgr.c,v 1.1 1999/01/11 14:29:34 niklas Exp $ */ -/* $NetBSD: rf_paritylogDiskMgr.c,v 1.1 1998/11/13 04:20:31 oster Exp $ */ +/* $OpenBSD: rf_paritylogDiskMgr.c,v 1.2 1999/02/16 00:03:05 niklas Exp $ */ +/* $NetBSD: rf_paritylogDiskMgr.c,v 1.3 1999/02/05 00:06:14 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -28,113 +28,6 @@ */ /* Code for flushing and reintegration operations related to parity logging. * - * : - * Log: rf_paritylogDiskMgr.c,v - * Revision 1.25 1996/07/28 20:31:39 jimz - * i386netbsd port - * true/false fixup - * - * Revision 1.24 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.23 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.22 1996/06/11 10:17:33 jimz - * Put in thread startup/shutdown mechanism for proper synchronization - * with start and end of day routines. - * - * Revision 1.21 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.20 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.19 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.18 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.17 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.16 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.15 1996/05/30 12:59:18 jimz - * make etimer happier, more portable - * - * Revision 1.14 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.13 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.12 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.11 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.10 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.9 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.8 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.7 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.6 1995/12/06 20:58:27 wvcii - * added prototypes - * - * Revision 1.5 1995/11/30 16:06:05 wvcii - * added copyright info - * - * Revision 1.4 1995/10/09 22:41:10 wvcii - * minor bug fix - * - * Revision 1.3 1995/10/08 20:43:47 wvcii - * lots of random debugging - debugging still incomplete - * - * Revision 1.2 1995/09/07 15:52:19 jimz - * noop compile when INCLUDE_PARITYLOGGING not defined - * - * Revision 1.1 1995/09/06 19:24:44 wvcii - * Initial revision - * */ #include "rf_archs.h" @@ -165,626 +58,601 @@ static caddr_t AcquireReintBuffer(RF_RegionBufferQueue_t *); -static caddr_t AcquireReintBuffer(pool) - RF_RegionBufferQueue_t *pool; +static caddr_t +AcquireReintBuffer(pool) + RF_RegionBufferQueue_t *pool; { - caddr_t bufPtr = NULL; - - /* Return a region buffer from the free list (pool). - If the free list is empty, WAIT. - BLOCKING */ - - RF_LOCK_MUTEX(pool->mutex); - if (pool->availableBuffers > 0) { - bufPtr = pool->buffers[pool->availBuffersIndex]; - pool->availableBuffers--; - pool->availBuffersIndex++; - if (pool->availBuffersIndex == pool->totalBuffers) - pool->availBuffersIndex = 0; - RF_UNLOCK_MUTEX(pool->mutex); - } - else { - RF_PANIC(); /* should never happen in currect config, single reint */ - RF_WAIT_COND(pool->cond, pool->mutex); - } - return(bufPtr); + caddr_t bufPtr = NULL; + + /* Return a region buffer from the free list (pool). If the free list + * is empty, WAIT. BLOCKING */ + + RF_LOCK_MUTEX(pool->mutex); + if (pool->availableBuffers > 0) { + bufPtr = pool->buffers[pool->availBuffersIndex]; + pool->availableBuffers--; + pool->availBuffersIndex++; + if (pool->availBuffersIndex == pool->totalBuffers) + pool->availBuffersIndex = 0; + RF_UNLOCK_MUTEX(pool->mutex); + } else { + RF_PANIC(); /* should never happen in currect config, + * single reint */ + RF_WAIT_COND(pool->cond, pool->mutex); + } + return (bufPtr); } -static void ReleaseReintBuffer( - RF_RegionBufferQueue_t *pool, - caddr_t bufPtr) +static void +ReleaseReintBuffer( + RF_RegionBufferQueue_t * pool, + caddr_t bufPtr) { - /* Insert a region buffer (bufPtr) into the free list (pool). - NON-BLOCKING */ - - RF_LOCK_MUTEX(pool->mutex); - pool->availableBuffers++; - pool->buffers[pool->emptyBuffersIndex] = bufPtr; - pool->emptyBuffersIndex++; - if (pool->emptyBuffersIndex == pool->totalBuffers) - pool->emptyBuffersIndex = 0; - RF_ASSERT(pool->availableBuffers <= pool->totalBuffers); - RF_UNLOCK_MUTEX(pool->mutex); - RF_SIGNAL_COND(pool->cond); + /* Insert a region buffer (bufPtr) into the free list (pool). + * NON-BLOCKING */ + + RF_LOCK_MUTEX(pool->mutex); + pool->availableBuffers++; + pool->buffers[pool->emptyBuffersIndex] = bufPtr; + pool->emptyBuffersIndex++; + if (pool->emptyBuffersIndex == pool->totalBuffers) + pool->emptyBuffersIndex = 0; + RF_ASSERT(pool->availableBuffers <= pool->totalBuffers); + RF_UNLOCK_MUTEX(pool->mutex); + RF_SIGNAL_COND(pool->cond); } -static void ReadRegionLog( - RF_RegionId_t regionID, - RF_MCPair_t *rrd_mcpair, - caddr_t regionBuffer, - RF_Raid_t *raidPtr, - RF_DagHeader_t **rrd_dag_h, - RF_AllocListElem_t **rrd_alloclist, - RF_PhysDiskAddr_t **rrd_pda) +static void +ReadRegionLog( + RF_RegionId_t regionID, + RF_MCPair_t * rrd_mcpair, + caddr_t regionBuffer, + RF_Raid_t * raidPtr, + RF_DagHeader_t ** rrd_dag_h, + RF_AllocListElem_t ** rrd_alloclist, + RF_PhysDiskAddr_t ** rrd_pda) { - /* Initiate the read a region log from disk. Once initiated, return - to the calling routine. - - NON-BLOCKING - */ - - RF_AccTraceEntry_t tracerec; - RF_DagNode_t *rrd_rdNode; - - /* create DAG to read region log from disk */ - rf_MakeAllocList(*rrd_alloclist); - *rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer, rf_DiskReadFunc, rf_DiskReadUndoFunc, - "Rrl", *rrd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY); - - /* create and initialize PDA for the core log */ - /* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */ - *rrd_pda = rf_AllocPDAList(1); - rf_MapLogParityLogging(raidPtr, regionID, 0, &((*rrd_pda)->row), &((*rrd_pda)->col), &((*rrd_pda)->startSector)); - (*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity; - - if ((*rrd_pda)->next) { - (*rrd_pda)->next = NULL; - printf("set rrd_pda->next to NULL\n"); - } - - /* initialize DAG parameters */ - bzero((char *)&tracerec,sizeof(tracerec)); - (*rrd_dag_h)->tracerec = &tracerec; - rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0]; - rrd_rdNode->params[0].p = *rrd_pda; + /* Initiate the read a region log from disk. Once initiated, return + * to the calling routine. + * + * NON-BLOCKING */ + + RF_AccTraceEntry_t tracerec; + RF_DagNode_t *rrd_rdNode; + + /* create DAG to read region log from disk */ + rf_MakeAllocList(*rrd_alloclist); + *rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer, rf_DiskReadFunc, rf_DiskReadUndoFunc, + "Rrl", *rrd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY); + + /* create and initialize PDA for the core log */ + /* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t + * *)); */ + *rrd_pda = rf_AllocPDAList(1); + rf_MapLogParityLogging(raidPtr, regionID, 0, &((*rrd_pda)->row), &((*rrd_pda)->col), &((*rrd_pda)->startSector)); + (*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity; + + if ((*rrd_pda)->next) { + (*rrd_pda)->next = NULL; + printf("set rrd_pda->next to NULL\n"); + } + /* initialize DAG parameters */ + bzero((char *) &tracerec, sizeof(tracerec)); + (*rrd_dag_h)->tracerec = &tracerec; + rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0]; + rrd_rdNode->params[0].p = *rrd_pda; /* rrd_rdNode->params[1] = regionBuffer; */ - rrd_rdNode->params[2].v = 0; - rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); + rrd_rdNode->params[2].v = 0; + rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); - /* launch region log read dag */ - rf_DispatchDAG(*rrd_dag_h, (void (*)(void *))rf_MCPairWakeupFunc, - (void *) rrd_mcpair); + /* launch region log read dag */ + rf_DispatchDAG(*rrd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, + (void *) rrd_mcpair); } -static void WriteCoreLog( - RF_ParityLog_t *log, - RF_MCPair_t *fwr_mcpair, - RF_Raid_t *raidPtr, - RF_DagHeader_t **fwr_dag_h, - RF_AllocListElem_t **fwr_alloclist, - RF_PhysDiskAddr_t **fwr_pda) +static void +WriteCoreLog( + RF_ParityLog_t * log, + RF_MCPair_t * fwr_mcpair, + RF_Raid_t * raidPtr, + RF_DagHeader_t ** fwr_dag_h, + RF_AllocListElem_t ** fwr_alloclist, + RF_PhysDiskAddr_t ** fwr_pda) { - RF_RegionId_t regionID = log->regionID; - RF_AccTraceEntry_t tracerec; - RF_SectorNum_t regionOffset; - RF_DagNode_t *fwr_wrNode; - - /* Initiate the write of a core log to a region log disk. - Once initiated, return to the calling routine. - - NON-BLOCKING - */ - - /* create DAG to write a core log to a region log disk */ - rf_MakeAllocList(*fwr_alloclist); - *fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - "Wcl", *fwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY); - - /* create and initialize PDA for the region log */ - /* RF_Malloc(*fwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */ - *fwr_pda = rf_AllocPDAList(1); - regionOffset = log->diskOffset; - rf_MapLogParityLogging(raidPtr, regionID, regionOffset, &((*fwr_pda)->row), &((*fwr_pda)->col), &((*fwr_pda)->startSector)); - (*fwr_pda)->numSector = raidPtr->numSectorsPerLog; - - /* initialize DAG parameters */ - bzero((char *)&tracerec,sizeof(tracerec)); - (*fwr_dag_h)->tracerec = &tracerec; - fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0]; - fwr_wrNode->params[0].p = *fwr_pda; + RF_RegionId_t regionID = log->regionID; + RF_AccTraceEntry_t tracerec; + RF_SectorNum_t regionOffset; + RF_DagNode_t *fwr_wrNode; + + /* Initiate the write of a core log to a region log disk. Once + * initiated, return to the calling routine. + * + * NON-BLOCKING */ + + /* create DAG to write a core log to a region log disk */ + rf_MakeAllocList(*fwr_alloclist); + *fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + "Wcl", *fwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY); + + /* create and initialize PDA for the region log */ + /* RF_Malloc(*fwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t + * *)); */ + *fwr_pda = rf_AllocPDAList(1); + regionOffset = log->diskOffset; + rf_MapLogParityLogging(raidPtr, regionID, regionOffset, &((*fwr_pda)->row), &((*fwr_pda)->col), &((*fwr_pda)->startSector)); + (*fwr_pda)->numSector = raidPtr->numSectorsPerLog; + + /* initialize DAG parameters */ + bzero((char *) &tracerec, sizeof(tracerec)); + (*fwr_dag_h)->tracerec = &tracerec; + fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0]; + fwr_wrNode->params[0].p = *fwr_pda; /* fwr_wrNode->params[1] = log->bufPtr; */ - fwr_wrNode->params[2].v = 0; - fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); - - /* launch the dag to write the core log to disk */ - rf_DispatchDAG(*fwr_dag_h, (void (*)(void *)) rf_MCPairWakeupFunc, - (void *) fwr_mcpair); + fwr_wrNode->params[2].v = 0; + fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); + + /* launch the dag to write the core log to disk */ + rf_DispatchDAG(*fwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, + (void *) fwr_mcpair); } -static void ReadRegionParity( - RF_RegionId_t regionID, - RF_MCPair_t *prd_mcpair, - caddr_t parityBuffer, - RF_Raid_t *raidPtr, - RF_DagHeader_t **prd_dag_h, - RF_AllocListElem_t **prd_alloclist, - RF_PhysDiskAddr_t **prd_pda) +static void +ReadRegionParity( + RF_RegionId_t regionID, + RF_MCPair_t * prd_mcpair, + caddr_t parityBuffer, + RF_Raid_t * raidPtr, + RF_DagHeader_t ** prd_dag_h, + RF_AllocListElem_t ** prd_alloclist, + RF_PhysDiskAddr_t ** prd_pda) { - /* Initiate the read region parity from disk. - Once initiated, return to the calling routine. - - NON-BLOCKING - */ - - RF_AccTraceEntry_t tracerec; - RF_DagNode_t *prd_rdNode; - - /* create DAG to read region parity from disk */ - rf_MakeAllocList(*prd_alloclist); - *prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc, rf_DiskReadUndoFunc, - "Rrp", *prd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY); - - /* create and initialize PDA for region parity */ - /* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */ - *prd_pda = rf_AllocPDAList(1); - rf_MapRegionParity(raidPtr, regionID, &((*prd_pda)->row), &((*prd_pda)->col), &((*prd_pda)->startSector), &((*prd_pda)->numSector)); - if (rf_parityLogDebug) - printf("[reading %d sectors of parity from region %d]\n", - (int)(*prd_pda)->numSector, regionID); - if ((*prd_pda)->next) { - (*prd_pda)->next = NULL; - printf("set prd_pda->next to NULL\n"); - } - - /* initialize DAG parameters */ - bzero((char *)&tracerec,sizeof(tracerec)); - (*prd_dag_h)->tracerec = &tracerec; - prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0]; - prd_rdNode->params[0].p = *prd_pda; - prd_rdNode->params[1].p = parityBuffer; - prd_rdNode->params[2].v = 0; - prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); - if (rf_validateDAGDebug) - rf_ValidateDAG(*prd_dag_h); - /* launch region parity read dag */ - rf_DispatchDAG(*prd_dag_h, (void (*)(void *)) rf_MCPairWakeupFunc, - (void *) prd_mcpair); + /* Initiate the read region parity from disk. Once initiated, return + * to the calling routine. + * + * NON-BLOCKING */ + + RF_AccTraceEntry_t tracerec; + RF_DagNode_t *prd_rdNode; + + /* create DAG to read region parity from disk */ + rf_MakeAllocList(*prd_alloclist); + *prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc, rf_DiskReadUndoFunc, + "Rrp", *prd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY); + + /* create and initialize PDA for region parity */ + /* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t + * *)); */ + *prd_pda = rf_AllocPDAList(1); + rf_MapRegionParity(raidPtr, regionID, &((*prd_pda)->row), &((*prd_pda)->col), &((*prd_pda)->startSector), &((*prd_pda)->numSector)); + if (rf_parityLogDebug) + printf("[reading %d sectors of parity from region %d]\n", + (int) (*prd_pda)->numSector, regionID); + if ((*prd_pda)->next) { + (*prd_pda)->next = NULL; + printf("set prd_pda->next to NULL\n"); + } + /* initialize DAG parameters */ + bzero((char *) &tracerec, sizeof(tracerec)); + (*prd_dag_h)->tracerec = &tracerec; + prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0]; + prd_rdNode->params[0].p = *prd_pda; + prd_rdNode->params[1].p = parityBuffer; + prd_rdNode->params[2].v = 0; + prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); + if (rf_validateDAGDebug) + rf_ValidateDAG(*prd_dag_h); + /* launch region parity read dag */ + rf_DispatchDAG(*prd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, + (void *) prd_mcpair); } -static void WriteRegionParity( - RF_RegionId_t regionID, - RF_MCPair_t *pwr_mcpair, - caddr_t parityBuffer, - RF_Raid_t *raidPtr, - RF_DagHeader_t **pwr_dag_h, - RF_AllocListElem_t **pwr_alloclist, - RF_PhysDiskAddr_t **pwr_pda) +static void +WriteRegionParity( + RF_RegionId_t regionID, + RF_MCPair_t * pwr_mcpair, + caddr_t parityBuffer, + RF_Raid_t * raidPtr, + RF_DagHeader_t ** pwr_dag_h, + RF_AllocListElem_t ** pwr_alloclist, + RF_PhysDiskAddr_t ** pwr_pda) { - /* Initiate the write of region parity to disk. - Once initiated, return to the calling routine. - - NON-BLOCKING - */ - - RF_AccTraceEntry_t tracerec; - RF_DagNode_t *pwr_wrNode; - - /* create DAG to write region log from disk */ - rf_MakeAllocList(*pwr_alloclist); - *pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - "Wrp", *pwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY); - - /* create and initialize PDA for region parity */ - /* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */ - *pwr_pda = rf_AllocPDAList(1); - rf_MapRegionParity(raidPtr, regionID, &((*pwr_pda)->row), &((*pwr_pda)->col), &((*pwr_pda)->startSector), &((*pwr_pda)->numSector)); - - /* initialize DAG parameters */ - bzero((char *)&tracerec,sizeof(tracerec)); - (*pwr_dag_h)->tracerec = &tracerec; - pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0]; - pwr_wrNode->params[0].p = *pwr_pda; + /* Initiate the write of region parity to disk. Once initiated, return + * to the calling routine. + * + * NON-BLOCKING */ + + RF_AccTraceEntry_t tracerec; + RF_DagNode_t *pwr_wrNode; + + /* create DAG to write region log from disk */ + rf_MakeAllocList(*pwr_alloclist); + *pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + "Wrp", *pwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY); + + /* create and initialize PDA for region parity */ + /* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t + * *)); */ + *pwr_pda = rf_AllocPDAList(1); + rf_MapRegionParity(raidPtr, regionID, &((*pwr_pda)->row), &((*pwr_pda)->col), &((*pwr_pda)->startSector), &((*pwr_pda)->numSector)); + + /* initialize DAG parameters */ + bzero((char *) &tracerec, sizeof(tracerec)); + (*pwr_dag_h)->tracerec = &tracerec; + pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0]; + pwr_wrNode->params[0].p = *pwr_pda; /* pwr_wrNode->params[1] = parityBuffer; */ - pwr_wrNode->params[2].v = 0; - pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); + pwr_wrNode->params[2].v = 0; + pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); - /* launch the dag to write region parity to disk */ - rf_DispatchDAG(*pwr_dag_h, (void (*)(void *))rf_MCPairWakeupFunc, - (void *) pwr_mcpair); + /* launch the dag to write region parity to disk */ + rf_DispatchDAG(*pwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, + (void *) pwr_mcpair); } -static void FlushLogsToDisk( - RF_Raid_t *raidPtr, - RF_ParityLog_t *logList) +static void +FlushLogsToDisk( + RF_Raid_t * raidPtr, + RF_ParityLog_t * logList) { - /* Flush a linked list of core logs to the log disk. - Logs contain the disk location where they should be - written. Logs were written in FIFO order and that - order must be preserved. - - Recommended optimizations: - 1) allow multiple flushes to occur simultaneously - 2) coalesce contiguous flush operations - - BLOCKING - */ - - RF_ParityLog_t *log; - RF_RegionId_t regionID; - RF_MCPair_t *fwr_mcpair; - RF_DagHeader_t *fwr_dag_h; - RF_AllocListElem_t *fwr_alloclist; - RF_PhysDiskAddr_t *fwr_pda; - - fwr_mcpair = rf_AllocMCPair(); - RF_LOCK_MUTEX(fwr_mcpair->mutex); - - RF_ASSERT(logList); - log = logList; - while (log) - { - regionID = log->regionID; - - /* create and launch a DAG to write the core log */ - if (rf_parityLogDebug) - printf("[initiating write of core log for region %d]\n", regionID); - fwr_mcpair->flag = RF_FALSE; - WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h, &fwr_alloclist, &fwr_pda); - - /* wait for the DAG to complete */ -#ifndef SIMULATE - while (!fwr_mcpair->flag) - RF_WAIT_COND(fwr_mcpair->cond, fwr_mcpair->mutex); -#endif /* !SIMULATE */ - if (fwr_dag_h->status != rf_enable) - { - RF_ERRORMSG1("Unable to write core log to disk (region %d)\n", regionID); - RF_ASSERT(0); + /* Flush a linked list of core logs to the log disk. Logs contain the + * disk location where they should be written. Logs were written in + * FIFO order and that order must be preserved. + * + * Recommended optimizations: 1) allow multiple flushes to occur + * simultaneously 2) coalesce contiguous flush operations + * + * BLOCKING */ + + RF_ParityLog_t *log; + RF_RegionId_t regionID; + RF_MCPair_t *fwr_mcpair; + RF_DagHeader_t *fwr_dag_h; + RF_AllocListElem_t *fwr_alloclist; + RF_PhysDiskAddr_t *fwr_pda; + + fwr_mcpair = rf_AllocMCPair(); + RF_LOCK_MUTEX(fwr_mcpair->mutex); + + RF_ASSERT(logList); + log = logList; + while (log) { + regionID = log->regionID; + + /* create and launch a DAG to write the core log */ + if (rf_parityLogDebug) + printf("[initiating write of core log for region %d]\n", regionID); + fwr_mcpair->flag = RF_FALSE; + WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h, &fwr_alloclist, &fwr_pda); + + /* wait for the DAG to complete */ + while (!fwr_mcpair->flag) + RF_WAIT_COND(fwr_mcpair->cond, fwr_mcpair->mutex); + if (fwr_dag_h->status != rf_enable) { + RF_ERRORMSG1("Unable to write core log to disk (region %d)\n", regionID); + RF_ASSERT(0); + } + /* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */ + rf_FreePhysDiskAddr(fwr_pda); + rf_FreeDAG(fwr_dag_h); + rf_FreeAllocList(fwr_alloclist); + + log = log->next; } - - /* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */ - rf_FreePhysDiskAddr(fwr_pda); - rf_FreeDAG(fwr_dag_h); - rf_FreeAllocList(fwr_alloclist); - - log = log->next; - } - RF_UNLOCK_MUTEX(fwr_mcpair->mutex); - rf_FreeMCPair(fwr_mcpair); - rf_ReleaseParityLogs(raidPtr, logList); + RF_UNLOCK_MUTEX(fwr_mcpair->mutex); + rf_FreeMCPair(fwr_mcpair); + rf_ReleaseParityLogs(raidPtr, logList); } -static void ReintegrateRegion( - RF_Raid_t *raidPtr, - RF_RegionId_t regionID, - RF_ParityLog_t *coreLog) +static void +ReintegrateRegion( + RF_Raid_t * raidPtr, + RF_RegionId_t regionID, + RF_ParityLog_t * coreLog) { - RF_MCPair_t *rrd_mcpair=NULL, *prd_mcpair, *pwr_mcpair; - RF_DagHeader_t *rrd_dag_h, *prd_dag_h, *pwr_dag_h; - RF_AllocListElem_t *rrd_alloclist, *prd_alloclist, *pwr_alloclist; - RF_PhysDiskAddr_t *rrd_pda, *prd_pda, *pwr_pda; - caddr_t parityBuffer, regionBuffer=NULL; - - /* Reintegrate a region (regionID). - 1. acquire region and parity buffers - 2. read log from disk - 3. read parity from disk - 4. apply log to parity - 5. apply core log to parity - 6. write new parity to disk - - BLOCKING - */ - - if (rf_parityLogDebug) - printf("[reintegrating region %d]\n", regionID); - - /* initiate read of region parity */ - if (rf_parityLogDebug) - printf("[initiating read of parity for region %d]\n", regionID); - parityBuffer = AcquireReintBuffer(&raidPtr->parityBufferPool); - prd_mcpair = rf_AllocMCPair(); - RF_LOCK_MUTEX(prd_mcpair->mutex); - prd_mcpair->flag = RF_FALSE; - ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr, &prd_dag_h, &prd_alloclist, &prd_pda); - - /* if region log nonempty, initiate read */ - if (raidPtr->regionInfo[regionID].diskCount > 0) - { - if (rf_parityLogDebug) - printf("[initiating read of disk log for region %d]\n", regionID); - regionBuffer = AcquireReintBuffer(&raidPtr->regionBufferPool); - rrd_mcpair = rf_AllocMCPair(); - RF_LOCK_MUTEX(rrd_mcpair->mutex); - rrd_mcpair->flag = RF_FALSE; - ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr, &rrd_dag_h, &rrd_alloclist, &rrd_pda); - } - - /* wait on read of region parity to complete */ -#ifndef SIMULATE - while (!prd_mcpair->flag) { - RF_WAIT_COND(prd_mcpair->cond, prd_mcpair->mutex); - } -#endif /* !SIMULATE */ - RF_UNLOCK_MUTEX(prd_mcpair->mutex); - if (prd_dag_h->status != rf_enable) - { - RF_ERRORMSG("Unable to read parity from disk\n"); - /* add code to fail the parity disk */ - RF_ASSERT(0); - } - - /* apply core log to parity */ - /* if (coreLog) - ApplyLogsToParity(coreLog, parityBuffer); */ - - if (raidPtr->regionInfo[regionID].diskCount > 0) - { - /* wait on read of region log to complete */ -#ifndef SIMULATE - while (!rrd_mcpair->flag) - RF_WAIT_COND(rrd_mcpair->cond, rrd_mcpair->mutex); -#endif /* !SIMULATE */ - RF_UNLOCK_MUTEX(rrd_mcpair->mutex); - if (rrd_dag_h->status != rf_enable) - { - RF_ERRORMSG("Unable to read region log from disk\n"); - /* add code to fail the log disk */ - RF_ASSERT(0); + RF_MCPair_t *rrd_mcpair = NULL, *prd_mcpair, *pwr_mcpair; + RF_DagHeader_t *rrd_dag_h, *prd_dag_h, *pwr_dag_h; + RF_AllocListElem_t *rrd_alloclist, *prd_alloclist, *pwr_alloclist; + RF_PhysDiskAddr_t *rrd_pda, *prd_pda, *pwr_pda; + caddr_t parityBuffer, regionBuffer = NULL; + + /* Reintegrate a region (regionID). 1. acquire region and parity + * buffers 2. read log from disk 3. read parity from disk 4. apply log + * to parity 5. apply core log to parity 6. write new parity to disk + * + * BLOCKING */ + + if (rf_parityLogDebug) + printf("[reintegrating region %d]\n", regionID); + + /* initiate read of region parity */ + if (rf_parityLogDebug) + printf("[initiating read of parity for region %d]\n", regionID); + parityBuffer = AcquireReintBuffer(&raidPtr->parityBufferPool); + prd_mcpair = rf_AllocMCPair(); + RF_LOCK_MUTEX(prd_mcpair->mutex); + prd_mcpair->flag = RF_FALSE; + ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr, &prd_dag_h, &prd_alloclist, &prd_pda); + + /* if region log nonempty, initiate read */ + if (raidPtr->regionInfo[regionID].diskCount > 0) { + if (rf_parityLogDebug) + printf("[initiating read of disk log for region %d]\n", regionID); + regionBuffer = AcquireReintBuffer(&raidPtr->regionBufferPool); + rrd_mcpair = rf_AllocMCPair(); + RF_LOCK_MUTEX(rrd_mcpair->mutex); + rrd_mcpair->flag = RF_FALSE; + ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr, &rrd_dag_h, &rrd_alloclist, &rrd_pda); + } + /* wait on read of region parity to complete */ + while (!prd_mcpair->flag) { + RF_WAIT_COND(prd_mcpair->cond, prd_mcpair->mutex); } - /* apply region log to parity */ - /* ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */ - /* release resources associated with region log */ - /* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */ - rf_FreePhysDiskAddr(rrd_pda); - rf_FreeDAG(rrd_dag_h); - rf_FreeAllocList(rrd_alloclist); - rf_FreeMCPair(rrd_mcpair); - ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer); - } - - /* write reintegrated parity to disk */ - if (rf_parityLogDebug) - printf("[initiating write of parity for region %d]\n", regionID); - pwr_mcpair = rf_AllocMCPair(); - RF_LOCK_MUTEX(pwr_mcpair->mutex); - pwr_mcpair->flag = RF_FALSE; - WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr, &pwr_dag_h, &pwr_alloclist, &pwr_pda); -#ifndef SIMULATE - while (!pwr_mcpair->flag) - RF_WAIT_COND(pwr_mcpair->cond, pwr_mcpair->mutex); -#endif /* !SIMULATE */ - RF_UNLOCK_MUTEX(pwr_mcpair->mutex); - if (pwr_dag_h->status != rf_enable) - { - RF_ERRORMSG("Unable to write parity to disk\n"); - /* add code to fail the parity disk */ - RF_ASSERT(0); - } - - /* release resources associated with read of old parity */ - /* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */ - rf_FreePhysDiskAddr(prd_pda); - rf_FreeDAG(prd_dag_h); - rf_FreeAllocList(prd_alloclist); - rf_FreeMCPair(prd_mcpair); - - /* release resources associated with write of new parity */ - ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer); - /* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */ - rf_FreePhysDiskAddr(pwr_pda); - rf_FreeDAG(pwr_dag_h); - rf_FreeAllocList(pwr_alloclist); - rf_FreeMCPair(pwr_mcpair); - - if (rf_parityLogDebug) - printf("[finished reintegrating region %d]\n", regionID); + RF_UNLOCK_MUTEX(prd_mcpair->mutex); + if (prd_dag_h->status != rf_enable) { + RF_ERRORMSG("Unable to read parity from disk\n"); + /* add code to fail the parity disk */ + RF_ASSERT(0); + } + /* apply core log to parity */ + /* if (coreLog) ApplyLogsToParity(coreLog, parityBuffer); */ + + if (raidPtr->regionInfo[regionID].diskCount > 0) { + /* wait on read of region log to complete */ + while (!rrd_mcpair->flag) + RF_WAIT_COND(rrd_mcpair->cond, rrd_mcpair->mutex); + RF_UNLOCK_MUTEX(rrd_mcpair->mutex); + if (rrd_dag_h->status != rf_enable) { + RF_ERRORMSG("Unable to read region log from disk\n"); + /* add code to fail the log disk */ + RF_ASSERT(0); + } + /* apply region log to parity */ + /* ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */ + /* release resources associated with region log */ + /* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */ + rf_FreePhysDiskAddr(rrd_pda); + rf_FreeDAG(rrd_dag_h); + rf_FreeAllocList(rrd_alloclist); + rf_FreeMCPair(rrd_mcpair); + ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer); + } + /* write reintegrated parity to disk */ + if (rf_parityLogDebug) + printf("[initiating write of parity for region %d]\n", regionID); + pwr_mcpair = rf_AllocMCPair(); + RF_LOCK_MUTEX(pwr_mcpair->mutex); + pwr_mcpair->flag = RF_FALSE; + WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr, &pwr_dag_h, &pwr_alloclist, &pwr_pda); + while (!pwr_mcpair->flag) + RF_WAIT_COND(pwr_mcpair->cond, pwr_mcpair->mutex); + RF_UNLOCK_MUTEX(pwr_mcpair->mutex); + if (pwr_dag_h->status != rf_enable) { + RF_ERRORMSG("Unable to write parity to disk\n"); + /* add code to fail the parity disk */ + RF_ASSERT(0); + } + /* release resources associated with read of old parity */ + /* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */ + rf_FreePhysDiskAddr(prd_pda); + rf_FreeDAG(prd_dag_h); + rf_FreeAllocList(prd_alloclist); + rf_FreeMCPair(prd_mcpair); + + /* release resources associated with write of new parity */ + ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer); + /* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */ + rf_FreePhysDiskAddr(pwr_pda); + rf_FreeDAG(pwr_dag_h); + rf_FreeAllocList(pwr_alloclist); + rf_FreeMCPair(pwr_mcpair); + + if (rf_parityLogDebug) + printf("[finished reintegrating region %d]\n", regionID); } -static void ReintegrateLogs( - RF_Raid_t *raidPtr, - RF_ParityLog_t *logList) +static void +ReintegrateLogs( + RF_Raid_t * raidPtr, + RF_ParityLog_t * logList) { - RF_ParityLog_t *log, *freeLogList = NULL; - RF_ParityLogData_t *logData, *logDataList; - RF_RegionId_t regionID; - - RF_ASSERT(logList); - while (logList) - { - log = logList; - logList = logList->next; - log->next = NULL; - regionID = log->regionID; - ReintegrateRegion(raidPtr, regionID, log); - log->numRecords = 0; - - /* remove all items which are blocked on reintegration of this region */ - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE); - logDataList = logData; - while (logData) - { - logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE); - logData = logData->next; - } - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - - /* process blocked log data and clear reintInProgress flag for this region */ - if (logDataList) - rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE); - else - { - /* Enable flushing for this region. Holding both locks provides - a synchronization barrier with DumpParityLogToDisk - */ - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - raidPtr->regionInfo[regionID].diskCount = 0; - raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now enabled */ - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - } - /* if log wasn't used, attach it to the list of logs to be returned */ - if (log) - { - log->next = freeLogList; - freeLogList = log; + RF_ParityLog_t *log, *freeLogList = NULL; + RF_ParityLogData_t *logData, *logDataList; + RF_RegionId_t regionID; + + RF_ASSERT(logList); + while (logList) { + log = logList; + logList = logList->next; + log->next = NULL; + regionID = log->regionID; + ReintegrateRegion(raidPtr, regionID, log); + log->numRecords = 0; + + /* remove all items which are blocked on reintegration of this + * region */ + RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE); + logDataList = logData; + while (logData) { + logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE); + logData = logData->next; + } + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + + /* process blocked log data and clear reintInProgress flag for + * this region */ + if (logDataList) + rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE); + else { + /* Enable flushing for this region. Holding both + * locks provides a synchronization barrier with + * DumpParityLogToDisk */ + RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); + RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); + RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + raidPtr->regionInfo[regionID].diskCount = 0; + raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; + RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); + RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now + * enabled */ + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + } + /* if log wasn't used, attach it to the list of logs to be + * returned */ + if (log) { + log->next = freeLogList; + freeLogList = log; + } } - } - if (freeLogList) - rf_ReleaseParityLogs(raidPtr, freeLogList); + if (freeLogList) + rf_ReleaseParityLogs(raidPtr, freeLogList); } -int rf_ShutdownLogging(RF_Raid_t *raidPtr) +int +rf_ShutdownLogging(RF_Raid_t * raidPtr) { - /* shutdown parity logging - 1) disable parity logging in all regions - 2) reintegrate all regions - */ - - RF_SectorCount_t diskCount; - RF_RegionId_t regionID; - RF_ParityLog_t *log; - - if (rf_parityLogDebug) - printf("[shutting down parity logging]\n"); - /* Since parity log maps are volatile, we must reintegrate all regions. */ - if (rf_forceParityLogReint) { - for (regionID = 0; regionID < rf_numParityRegions; regionID++) - { - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - raidPtr->regionInfo[regionID].loggingEnabled = RF_FALSE; - log = raidPtr->regionInfo[regionID].coreLog; - raidPtr->regionInfo[regionID].coreLog = NULL; - diskCount = raidPtr->regionInfo[regionID].diskCount; - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - if (diskCount > 0 || log != NULL) - ReintegrateRegion(raidPtr, regionID, log); - if (log != NULL) - rf_ReleaseParityLogs(raidPtr, log); - } - } - if (rf_parityLogDebug) - { - printf("[parity logging disabled]\n"); - printf("[should be done!]\n"); - } - return(0); + /* shutdown parity logging 1) disable parity logging in all regions 2) + * reintegrate all regions */ + + RF_SectorCount_t diskCount; + RF_RegionId_t regionID; + RF_ParityLog_t *log; + + if (rf_parityLogDebug) + printf("[shutting down parity logging]\n"); + /* Since parity log maps are volatile, we must reintegrate all + * regions. */ + if (rf_forceParityLogReint) { + for (regionID = 0; regionID < rf_numParityRegions; regionID++) { + RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); + raidPtr->regionInfo[regionID].loggingEnabled = RF_FALSE; + log = raidPtr->regionInfo[regionID].coreLog; + raidPtr->regionInfo[regionID].coreLog = NULL; + diskCount = raidPtr->regionInfo[regionID].diskCount; + RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); + if (diskCount > 0 || log != NULL) + ReintegrateRegion(raidPtr, regionID, log); + if (log != NULL) + rf_ReleaseParityLogs(raidPtr, log); + } + } + if (rf_parityLogDebug) { + printf("[parity logging disabled]\n"); + printf("[should be done!]\n"); + } + return (0); } -int rf_ParityLoggingDiskManager(RF_Raid_t *raidPtr) +int +rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr) { - RF_ParityLog_t *reintQueue, *flushQueue; - int workNeeded, done = RF_FALSE; - - rf_assign_threadid(); /* don't remove this line */ - - /* Main program for parity logging disk thread. This routine waits - for work to appear in either the flush or reintegration queues - and is responsible for flushing core logs to the log disk as - well as reintegrating parity regions. - - BLOCKING - */ - - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - - /* - * Inform our creator that we're running. Don't bother doing the - * mutex lock/unlock dance- we locked above, and we'll unlock - * below with nothing to do, yet. - */ - raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING; - RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); - - /* empty the work queues */ - flushQueue = raidPtr->parityLogDiskQueue.flushQueue; raidPtr->parityLogDiskQueue.flushQueue = NULL; - reintQueue = raidPtr->parityLogDiskQueue.reintQueue; raidPtr->parityLogDiskQueue.reintQueue = NULL; - workNeeded = (flushQueue || reintQueue); - - while (!done) - { - while (workNeeded) - { - /* First, flush all logs in the flush queue, freeing buffers - Second, reintegrate all regions which are reported as full. - Third, append queued log data until blocked. - - Note: Incoming appends (ParityLogAppend) can block on either - 1. empty buffer pool - 2. region under reintegration - To preserve a global FIFO ordering of appends, buffers are not - released to the world until those appends blocked on buffers are - removed from the append queue. Similarly, regions which are - reintegrated are not opened for general use until the append - queue has been emptied. - */ - - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - - /* empty flushQueue, using free'd log buffers to process bufTail */ - if (flushQueue) - FlushLogsToDisk(raidPtr, flushQueue); - - /* empty reintQueue, flushing from reintTail as we go */ - if (reintQueue) - ReintegrateLogs(raidPtr, reintQueue); - - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - flushQueue = raidPtr->parityLogDiskQueue.flushQueue; raidPtr->parityLogDiskQueue.flushQueue = NULL; - reintQueue = raidPtr->parityLogDiskQueue.reintQueue; raidPtr->parityLogDiskQueue.reintQueue = NULL; - workNeeded = (flushQueue || reintQueue); - } - /* no work is needed at this point */ - if (raidPtr->parityLogDiskQueue.threadState&RF_PLOG_TERMINATE) - { - /* shutdown parity logging - 1. disable parity logging in all regions - 2. reintegrate all regions - */ - done = RF_TRUE; /* thread disabled, no work needed */ - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - rf_ShutdownLogging(raidPtr); + RF_ParityLog_t *reintQueue, *flushQueue; + int workNeeded, done = RF_FALSE; + + rf_assign_threadid(); /* don't remove this line */ + + /* Main program for parity logging disk thread. This routine waits + * for work to appear in either the flush or reintegration queues and + * is responsible for flushing core logs to the log disk as well as + * reintegrating parity regions. + * + * BLOCKING */ + + RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + + /* + * Inform our creator that we're running. Don't bother doing the + * mutex lock/unlock dance- we locked above, and we'll unlock + * below with nothing to do, yet. + */ + raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING; + RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); + + /* empty the work queues */ + flushQueue = raidPtr->parityLogDiskQueue.flushQueue; + raidPtr->parityLogDiskQueue.flushQueue = NULL; + reintQueue = raidPtr->parityLogDiskQueue.reintQueue; + raidPtr->parityLogDiskQueue.reintQueue = NULL; + workNeeded = (flushQueue || reintQueue); + + while (!done) { + while (workNeeded) { + /* First, flush all logs in the flush queue, freeing + * buffers Second, reintegrate all regions which are + * reported as full. Third, append queued log data + * until blocked. + * + * Note: Incoming appends (ParityLogAppend) can block on + * either 1. empty buffer pool 2. region under + * reintegration To preserve a global FIFO ordering of + * appends, buffers are not released to the world + * until those appends blocked on buffers are removed + * from the append queue. Similarly, regions which + * are reintegrated are not opened for general use + * until the append queue has been emptied. */ + + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + + /* empty flushQueue, using free'd log buffers to + * process bufTail */ + if (flushQueue) + FlushLogsToDisk(raidPtr, flushQueue); + + /* empty reintQueue, flushing from reintTail as we go */ + if (reintQueue) + ReintegrateLogs(raidPtr, reintQueue); + + RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + flushQueue = raidPtr->parityLogDiskQueue.flushQueue; + raidPtr->parityLogDiskQueue.flushQueue = NULL; + reintQueue = raidPtr->parityLogDiskQueue.reintQueue; + raidPtr->parityLogDiskQueue.reintQueue = NULL; + workNeeded = (flushQueue || reintQueue); + } + /* no work is needed at this point */ + if (raidPtr->parityLogDiskQueue.threadState & RF_PLOG_TERMINATE) { + /* shutdown parity logging 1. disable parity logging + * in all regions 2. reintegrate all regions */ + done = RF_TRUE; /* thread disabled, no work needed */ + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + rf_ShutdownLogging(raidPtr); + } + if (!done) { + /* thread enabled, no work needed, so sleep */ + if (rf_parityLogDebug) + printf("[parity logging disk manager sleeping]\n"); + RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, raidPtr->parityLogDiskQueue.mutex); + if (rf_parityLogDebug) + printf("[parity logging disk manager just woke up]\n"); + flushQueue = raidPtr->parityLogDiskQueue.flushQueue; + raidPtr->parityLogDiskQueue.flushQueue = NULL; + reintQueue = raidPtr->parityLogDiskQueue.reintQueue; + raidPtr->parityLogDiskQueue.reintQueue = NULL; + workNeeded = (flushQueue || reintQueue); + } } - if (!done) - { - /* thread enabled, no work needed, so sleep */ - if (rf_parityLogDebug) - printf("[parity logging disk manager sleeping]\n"); - RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, raidPtr->parityLogDiskQueue.mutex); - if (rf_parityLogDebug) - printf("[parity logging disk manager just woke up]\n"); - flushQueue = raidPtr->parityLogDiskQueue.flushQueue; raidPtr->parityLogDiskQueue.flushQueue = NULL; - reintQueue = raidPtr->parityLogDiskQueue.reintQueue; raidPtr->parityLogDiskQueue.reintQueue = NULL; - workNeeded = (flushQueue || reintQueue); - } - } - /* - * Announce that we're done. - */ - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN; - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); + /* + * Announce that we're done. + */ + RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN; + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); #if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) - /* - * In the Net- and OpenBSD kernels, the thread must exit; returning would - * cause the proc trampoline to attempt to return to userspace. - */ - kthread_exit(0); /* does not return */ + /* + * In the Net- & OpenBSD kernel, the thread must exit; returning would + * cause the proc trampoline to attempt to return to userspace. + */ + kthread_exit(0); /* does not return */ #else - return(0); + return (0); #endif } - -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ +#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ diff --git a/sys/dev/raidframe/rf_paritylogDiskMgr.h b/sys/dev/raidframe/rf_paritylogDiskMgr.h index c20558d9897..96e0ac7485f 100644 --- a/sys/dev/raidframe/rf_paritylogDiskMgr.h +++ b/sys/dev/raidframe/rf_paritylogDiskMgr.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_paritylogDiskMgr.h,v 1.1 1999/01/11 14:29:35 niklas Exp $ */ -/* $NetBSD: rf_paritylogDiskMgr.h,v 1.1 1998/11/13 04:20:32 oster Exp $ */ +/* $OpenBSD: rf_paritylogDiskMgr.h,v 1.2 1999/02/16 00:03:06 niklas Exp $ */ +/* $NetBSD: rf_paritylogDiskMgr.h,v 1.3 1999/02/05 00:06:14 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,27 +29,6 @@ /* header file for parity log disk mgr code * - * : - * Log: rf_paritylogDiskMgr.h,v - * Revision 1.5 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.4 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.3 1995/12/06 20:56:39 wvcii - * added prototypes - * - * Revision 1.2 1995/11/30 16:06:21 wvcii - * added copyright info - * - * Revision 1.1 1995/09/06 19:25:29 wvcii - * Initial revision - * - * */ #ifndef _RF__RF_PARITYLOGDISKMGR_H_ @@ -57,7 +36,7 @@ #include "rf_types.h" -int rf_ShutdownLogging(RF_Raid_t *raidPtr); -int rf_ParityLoggingDiskManager(RF_Raid_t *raidPtr); +int rf_ShutdownLogging(RF_Raid_t * raidPtr); +int rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr); -#endif /* !_RF__RF_PARITYLOGDISKMGR_H_ */ +#endif /* !_RF__RF_PARITYLOGDISKMGR_H_ */ diff --git a/sys/dev/raidframe/rf_paritylogging.c b/sys/dev/raidframe/rf_paritylogging.c index 595612b3718..4cd95744629 100644 --- a/sys/dev/raidframe/rf_paritylogging.c +++ b/sys/dev/raidframe/rf_paritylogging.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_paritylogging.c,v 1.1 1999/01/11 14:29:35 niklas Exp $ */ -/* $NetBSD: rf_paritylogging.c,v 1.1 1998/11/13 04:20:32 oster Exp $ */ +/* $OpenBSD: rf_paritylogging.c,v 1.2 1999/02/16 00:03:06 niklas Exp $ */ +/* $NetBSD: rf_paritylogging.c,v 1.3 1999/02/05 00:06:14 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,165 +27,6 @@ * rights to redistribute these changes. */ -/* : - * Log: rf_paritylogging.c,v - * Revision 1.42 1996/11/05 21:10:40 jimz - * failed pda generalization - * - * Revision 1.41 1996/07/31 16:56:18 jimz - * dataBytesPerStripe, sectorsPerDisk init arch-indep. - * - * Revision 1.40 1996/07/28 20:31:39 jimz - * i386netbsd port - * true/false fixup - * - * Revision 1.39 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.38 1996/07/13 00:00:59 jimz - * sanitized generalized reconstruction architecture - * cleaned up head sep, rbuf problems - * - * Revision 1.37 1996/06/17 03:24:14 jimz - * switch to new shutdown function typing - * - * Revision 1.36 1996/06/14 23:15:38 jimz - * attempt to deal with thread GC problem - * - * Revision 1.35 1996/06/11 13:48:30 jimz - * get it to compile in-kernel - * - * Revision 1.34 1996/06/11 10:16:35 jimz - * Check return values on array configuration- back out if failed. - * Reorder shutdown to avoid using deallocated resources. - * Get rid of bogus join op in shutdown. - * - * Revision 1.33 1996/06/10 18:29:17 wvcii - * fixed bug in rf_IdentifyStripeParityLogging - * - added array initialization - * - * Revision 1.32 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.31 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.30 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.29 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.28 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.27 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.26 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.25 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.24 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.23 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.22 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.21 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.20 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.19 1996/05/20 16:16:30 jimz - * switch to rf_{mutex,cond}_{init,destroy} - * - * Revision 1.18 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.17 1996/05/03 19:47:11 wvcii - * added includes of new dag library - * - * Revision 1.16 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.15 1995/12/06 20:57:43 wvcii - * added prototypes - * reintegration of logs on shutdown now conditional on forceParityLogReint - * - * Revision 1.14 1995/11/30 16:06:42 wvcii - * added copyright info - * - * Revision 1.13 1995/11/17 19:01:29 wvcii - * added prototyping to MapParity - * - * Revision 1.12 1995/11/07 15:36:03 wvcii - * changed ParityLoggingDagSelect prototype - * function no longer returns numHdrSucc, numTermAnt - * - * Revision 1.11 1995/10/08 20:42:54 wvcii - * lots of random debugging - debugging incomplete - * - * Revision 1.10 1995/09/07 01:26:55 jimz - * Achive basic compilation in kernel. Kernel functionality - * is not guaranteed at all, but it'll compile. Mostly. I hope. - * - * Revision 1.9 1995/09/06 19:21:17 wvcii - * explicit shutdown (forced reintegration) for simulator version - * - * Revision 1.8 1995/07/08 18:19:16 rachad - * Parity verifies can not be done in the simulator. - * - * Revision 1.7 1995/07/07 00:17:20 wvcii - * this version free from deadlock, fails parity verification - * - * Revision 1.6 1995/06/23 13:39:59 robby - * updeated to prototypes in rf_layout.h - * - * Revision 1.5 1995/06/09 13:14:56 wvcii - * code is now nonblocking - * - * Revision 1.4 95/06/01 17:02:23 wvcii - * code debug - * - * Revision 1.3 95/05/31 13:08:57 wvcii - * code debug - * - * Revision 1.2 95/05/21 15:35:00 wvcii - * code debug - * - * - * - */ /* parity logging configuration, dag selection, and mapping is implemented here @@ -215,10 +56,11 @@ #include "rf_shutdown.h" typedef struct RF_ParityLoggingConfigInfo_s { - RF_RowCol_t **stripeIdentifier; /* filled in at config time & used by IdentifyStripe */ -} RF_ParityLoggingConfigInfo_t; + RF_RowCol_t **stripeIdentifier; /* filled in at config time & used by + * IdentifyStripe */ +} RF_ParityLoggingConfigInfo_t; -static void FreeRegionInfo(RF_Raid_t *raidPtr, RF_RegionId_t regionID); +static void FreeRegionInfo(RF_Raid_t * raidPtr, RF_RegionId_t regionID); static void rf_ShutdownParityLogging(RF_ThreadArg_t arg); static void rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t arg); static void rf_ShutdownParityLoggingPool(RF_ThreadArg_t arg); @@ -226,729 +68,724 @@ static void rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t arg); static void rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t arg); static void rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t arg); -int rf_ConfigureParityLogging( - RF_ShutdownList_t **listp, - RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr) +int +rf_ConfigureParityLogging( + RF_ShutdownList_t ** listp, + RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr) { - int i, j, startdisk, rc; - RF_SectorCount_t totalLogCapacity, fragmentation, lastRegionCapacity; - RF_SectorCount_t parityBufferCapacity, maxRegionParityRange; - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_ParityLoggingConfigInfo_t *info; - RF_ParityLog_t *l=NULL, *next; - caddr_t lHeapPtr; - - /* - * We create multiple entries on the shutdown list here, since - * this configuration routine is fairly complicated in and of - * itself, and this makes backing out of a failed configuration - * much simpler. - */ - - raidPtr->numSectorsPerLog = RF_DEFAULT_NUM_SECTORS_PER_LOG; - - /* create a parity logging configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_ParityLoggingConfigInfo_t), (RF_ParityLoggingConfigInfo_t *), raidPtr->cleanupList); - if (info == NULL) - return(ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - - RF_ASSERT(raidPtr->numRow == 1); - - /* the stripe identifier must identify the disks in each stripe, - * IN THE ORDER THAT THEY APPEAR IN THE STRIPE. - */ - info->stripeIdentifier = rf_make_2d_array((raidPtr->numCol), (raidPtr->numCol), raidPtr->cleanupList); - if (info->stripeIdentifier == NULL) - return(ENOMEM); - - startdisk = 0; - for (i=0; i<(raidPtr->numCol); i++) - { - for (j=0; j<(raidPtr->numCol); j++) - { - info->stripeIdentifier[i][j] = (startdisk + j) % (raidPtr->numCol - 1); + int i, j, startdisk, rc; + RF_SectorCount_t totalLogCapacity, fragmentation, lastRegionCapacity; + RF_SectorCount_t parityBufferCapacity, maxRegionParityRange; + RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; + RF_ParityLoggingConfigInfo_t *info; + RF_ParityLog_t *l = NULL, *next; + caddr_t lHeapPtr; + + /* + * We create multiple entries on the shutdown list here, since + * this configuration routine is fairly complicated in and of + * itself, and this makes backing out of a failed configuration + * much simpler. + */ + + raidPtr->numSectorsPerLog = RF_DEFAULT_NUM_SECTORS_PER_LOG; + + /* create a parity logging configuration structure */ + RF_MallocAndAdd(info, sizeof(RF_ParityLoggingConfigInfo_t), (RF_ParityLoggingConfigInfo_t *), raidPtr->cleanupList); + if (info == NULL) + return (ENOMEM); + layoutPtr->layoutSpecificInfo = (void *) info; + + RF_ASSERT(raidPtr->numRow == 1); + + /* the stripe identifier must identify the disks in each stripe, IN + * THE ORDER THAT THEY APPEAR IN THE STRIPE. */ + info->stripeIdentifier = rf_make_2d_array((raidPtr->numCol), (raidPtr->numCol), raidPtr->cleanupList); + if (info->stripeIdentifier == NULL) + return (ENOMEM); + + startdisk = 0; + for (i = 0; i < (raidPtr->numCol); i++) { + for (j = 0; j < (raidPtr->numCol); j++) { + info->stripeIdentifier[i][j] = (startdisk + j) % (raidPtr->numCol - 1); + } + if ((--startdisk) < 0) + startdisk = raidPtr->numCol - 1 - 1; } - if ((--startdisk) < 0) - startdisk = raidPtr->numCol-1-1; - } - - /* fill in the remaining layout parameters */ - layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numParityCol = 1; - layoutPtr->numParityLogCol = 1; - layoutPtr->numDataCol = raidPtr->numCol - layoutPtr->numParityCol - layoutPtr->numParityLogCol; - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk; - raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; - - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - - /* configure parity log parameters - - parameter comment/constraints - ---------------- ------------------- - * numParityRegions all regions (except possibly last) of equal size - * totalInCoreLogCapacity amount of memory in bytes available for in-core logs (default 1 MB) - # numSectorsPerLog capacity of an in-core log in sectors (1 disk track) - numParityLogs total number of in-core logs, should be at least numParityRegions - regionLogCapacity size of a region log (except possibly last one) in sectors - totalLogCapacity total amount of log space in sectors - - * denotes a user settable parameter. - # logs are fixed to be the size of a disk track, value #defined in rf_paritylog.h - - */ - - totalLogCapacity = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit * layoutPtr->numParityLogCol; - raidPtr->regionLogCapacity = totalLogCapacity / rf_numParityRegions; - if (rf_parityLogDebug) - printf("bytes per sector %d\n", raidPtr->bytesPerSector); - - /* reduce fragmentation within a disk region by adjusting the number of regions - in an attempt to allow an integral number of logs to fit into a disk region */ - fragmentation = raidPtr->regionLogCapacity % raidPtr->numSectorsPerLog; - if (fragmentation > 0) - for (i = 1; i < (raidPtr->numSectorsPerLog / 2); i++) - { - if (((totalLogCapacity / (rf_numParityRegions + i)) % raidPtr->numSectorsPerLog) < fragmentation) - { - rf_numParityRegions++; - raidPtr->regionLogCapacity = totalLogCapacity / rf_numParityRegions; - fragmentation = raidPtr->regionLogCapacity % raidPtr->numSectorsPerLog; - } - if (((totalLogCapacity / (rf_numParityRegions - i)) % raidPtr->numSectorsPerLog) < fragmentation) - { - rf_numParityRegions--; - raidPtr->regionLogCapacity = totalLogCapacity / rf_numParityRegions; - fragmentation = raidPtr->regionLogCapacity % raidPtr->numSectorsPerLog; - } - } - /* ensure integral number of regions per log */ - raidPtr->regionLogCapacity = (raidPtr->regionLogCapacity / raidPtr->numSectorsPerLog) * raidPtr->numSectorsPerLog; - - raidPtr->numParityLogs = rf_totalInCoreLogCapacity / (raidPtr->bytesPerSector * raidPtr->numSectorsPerLog); - /* to avoid deadlock, must ensure that enough logs exist for each region to have one simultaneously */ - if (raidPtr->numParityLogs < rf_numParityRegions) - raidPtr->numParityLogs = rf_numParityRegions; - - /* create region information structs */ - RF_Malloc(raidPtr->regionInfo, (rf_numParityRegions * sizeof(RF_RegionInfo_t)), (RF_RegionInfo_t *)); - if (raidPtr->regionInfo == NULL) - return(ENOMEM); - - /* last region may not be full capacity */ - lastRegionCapacity = raidPtr->regionLogCapacity; - while ((rf_numParityRegions - 1) * raidPtr->regionLogCapacity + lastRegionCapacity > totalLogCapacity) - lastRegionCapacity = lastRegionCapacity - raidPtr->numSectorsPerLog; - - raidPtr->regionParityRange = raidPtr->sectorsPerDisk / rf_numParityRegions; - maxRegionParityRange = raidPtr->regionParityRange; + + /* fill in the remaining layout parameters */ + layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; + layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; + layoutPtr->numParityCol = 1; + layoutPtr->numParityLogCol = 1; + layoutPtr->numDataCol = raidPtr->numCol - layoutPtr->numParityCol - layoutPtr->numParityLogCol; + layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; + layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk; + raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; + + raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; + + /* configure parity log parameters + * + * parameter comment/constraints ---------------- + * ------------------- numParityRegions all regions (except + * possibly last) of equal size totalInCoreLogCapacity amount of + * memory in bytes available for in-core logs (default 1 MB) # + * numSectorsPerLog capacity of an in-core log in sectors (1 + * disk track) numParityLogs total number of in-core logs, + * should be at least numParityRegions regionLogCapacity size of + * a region log (except possibly last one) in sectors totalLogCapacity + * total amount of log space in sectors + * + * denotes a user settable parameter. # logs are fixed to be the size of + * a disk track, value #defined in rf_paritylog.h + * + */ + + totalLogCapacity = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit * layoutPtr->numParityLogCol; + raidPtr->regionLogCapacity = totalLogCapacity / rf_numParityRegions; + if (rf_parityLogDebug) + printf("bytes per sector %d\n", raidPtr->bytesPerSector); + + /* reduce fragmentation within a disk region by adjusting the number + * of regions in an attempt to allow an integral number of logs to fit + * into a disk region */ + fragmentation = raidPtr->regionLogCapacity % raidPtr->numSectorsPerLog; + if (fragmentation > 0) + for (i = 1; i < (raidPtr->numSectorsPerLog / 2); i++) { + if (((totalLogCapacity / (rf_numParityRegions + i)) % raidPtr->numSectorsPerLog) < fragmentation) { + rf_numParityRegions++; + raidPtr->regionLogCapacity = totalLogCapacity / rf_numParityRegions; + fragmentation = raidPtr->regionLogCapacity % raidPtr->numSectorsPerLog; + } + if (((totalLogCapacity / (rf_numParityRegions - i)) % raidPtr->numSectorsPerLog) < fragmentation) { + rf_numParityRegions--; + raidPtr->regionLogCapacity = totalLogCapacity / rf_numParityRegions; + fragmentation = raidPtr->regionLogCapacity % raidPtr->numSectorsPerLog; + } + } + /* ensure integral number of regions per log */ + raidPtr->regionLogCapacity = (raidPtr->regionLogCapacity / raidPtr->numSectorsPerLog) * raidPtr->numSectorsPerLog; + + raidPtr->numParityLogs = rf_totalInCoreLogCapacity / (raidPtr->bytesPerSector * raidPtr->numSectorsPerLog); + /* to avoid deadlock, must ensure that enough logs exist for each + * region to have one simultaneously */ + if (raidPtr->numParityLogs < rf_numParityRegions) + raidPtr->numParityLogs = rf_numParityRegions; + + /* create region information structs */ + RF_Malloc(raidPtr->regionInfo, (rf_numParityRegions * sizeof(RF_RegionInfo_t)), (RF_RegionInfo_t *)); + if (raidPtr->regionInfo == NULL) + return (ENOMEM); + + /* last region may not be full capacity */ + lastRegionCapacity = raidPtr->regionLogCapacity; + while ((rf_numParityRegions - 1) * raidPtr->regionLogCapacity + lastRegionCapacity > totalLogCapacity) + lastRegionCapacity = lastRegionCapacity - raidPtr->numSectorsPerLog; + + raidPtr->regionParityRange = raidPtr->sectorsPerDisk / rf_numParityRegions; + maxRegionParityRange = raidPtr->regionParityRange; /* i can't remember why this line is in the code -wvcii 6/30/95 */ /* if (raidPtr->sectorsPerDisk % rf_numParityRegions > 0) regionParityRange++; */ - /* build pool of unused parity logs */ - RF_Malloc(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * raidPtr->numSectorsPerLog * raidPtr->bytesPerSector, (caddr_t)); - if (raidPtr->parityLogBufferHeap == NULL) - return(ENOMEM); - lHeapPtr = raidPtr->parityLogBufferHeap; - rc = rf_mutex_init(&raidPtr->parityLogPool.mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * raidPtr->numSectorsPerLog * raidPtr->bytesPerSector); - return(ENOMEM); - } - for (i = 0; i < raidPtr->numParityLogs; i++) - { - if (i == 0) - { - RF_Calloc(raidPtr->parityLogPool.parityLogs, 1, sizeof(RF_ParityLog_t), (RF_ParityLog_t *)); - if (raidPtr->parityLogPool.parityLogs == NULL) { - RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * raidPtr->numSectorsPerLog * raidPtr->bytesPerSector); - return(ENOMEM); - } - l = raidPtr->parityLogPool.parityLogs; + /* build pool of unused parity logs */ + RF_Malloc(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * raidPtr->numSectorsPerLog * raidPtr->bytesPerSector, (caddr_t)); + if (raidPtr->parityLogBufferHeap == NULL) + return (ENOMEM); + lHeapPtr = raidPtr->parityLogBufferHeap; + rc = rf_mutex_init(&raidPtr->parityLogPool.mutex); + if (rc) { + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * raidPtr->numSectorsPerLog * raidPtr->bytesPerSector); + return (ENOMEM); + } + for (i = 0; i < raidPtr->numParityLogs; i++) { + if (i == 0) { + RF_Calloc(raidPtr->parityLogPool.parityLogs, 1, sizeof(RF_ParityLog_t), (RF_ParityLog_t *)); + if (raidPtr->parityLogPool.parityLogs == NULL) { + RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * raidPtr->numSectorsPerLog * raidPtr->bytesPerSector); + return (ENOMEM); + } + l = raidPtr->parityLogPool.parityLogs; + } else { + RF_Calloc(l->next, 1, sizeof(RF_ParityLog_t), (RF_ParityLog_t *)); + if (l->next == NULL) { + RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * raidPtr->numSectorsPerLog * raidPtr->bytesPerSector); + for (l = raidPtr->parityLogPool.parityLogs; l; l = next) { + next = l->next; + if (l->records) + RF_Free(l->records, (raidPtr->numSectorsPerLog * sizeof(RF_ParityLogRecord_t))); + RF_Free(l, sizeof(RF_ParityLog_t)); + } + return (ENOMEM); + } + l = l->next; + } + l->bufPtr = lHeapPtr; + lHeapPtr += raidPtr->numSectorsPerLog * raidPtr->bytesPerSector; + RF_Malloc(l->records, (raidPtr->numSectorsPerLog * sizeof(RF_ParityLogRecord_t)), (RF_ParityLogRecord_t *)); + if (l->records == NULL) { + RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * raidPtr->numSectorsPerLog * raidPtr->bytesPerSector); + for (l = raidPtr->parityLogPool.parityLogs; l; l = next) { + next = l->next; + if (l->records) + RF_Free(l->records, (raidPtr->numSectorsPerLog * sizeof(RF_ParityLogRecord_t))); + RF_Free(l, sizeof(RF_ParityLog_t)); + } + return (ENOMEM); + } } - else - { - RF_Calloc(l->next, 1, sizeof(RF_ParityLog_t), (RF_ParityLog_t *)); - if (l->next == NULL) { - RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * raidPtr->numSectorsPerLog * raidPtr->bytesPerSector); - for(l=raidPtr->parityLogPool.parityLogs;l;l=next) { - next = l->next; - if (l->records) - RF_Free(l->records, (raidPtr->numSectorsPerLog * sizeof(RF_ParityLogRecord_t))); - RF_Free(l, sizeof(RF_ParityLog_t)); - } - return(ENOMEM); - } - l = l->next; + rc = rf_ShutdownCreate(listp, rf_ShutdownParityLoggingPool, raidPtr); + if (rc) { + RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + rf_ShutdownParityLoggingPool(raidPtr); + return (rc); } - l->bufPtr = lHeapPtr; - lHeapPtr += raidPtr->numSectorsPerLog * raidPtr->bytesPerSector; - RF_Malloc(l->records, (raidPtr->numSectorsPerLog * sizeof(RF_ParityLogRecord_t)), (RF_ParityLogRecord_t *)); - if (l->records == NULL) { - RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * raidPtr->numSectorsPerLog * raidPtr->bytesPerSector); - for(l=raidPtr->parityLogPool.parityLogs;l;l=next) { - next = l->next; - if (l->records) - RF_Free(l->records, (raidPtr->numSectorsPerLog * sizeof(RF_ParityLogRecord_t))); - RF_Free(l, sizeof(RF_ParityLog_t)); - } - return(ENOMEM); - } - } - rc = rf_ShutdownCreate(listp, rf_ShutdownParityLoggingPool, raidPtr); - if (rc) { - RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownParityLoggingPool(raidPtr); - return(rc); - } - - /* build pool of region buffers */ - rc = rf_mutex_init(&raidPtr->regionBufferPool.mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return(ENOMEM); - } - rc = rf_cond_init(&raidPtr->regionBufferPool.cond); - if (rc) { - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_mutex_destroy(&raidPtr->regionBufferPool.mutex); - return(ENOMEM); - } - raidPtr->regionBufferPool.bufferSize = raidPtr->regionLogCapacity * raidPtr->bytesPerSector; - printf("regionBufferPool.bufferSize %d\n",raidPtr->regionBufferPool.bufferSize); - raidPtr->regionBufferPool.totalBuffers = 1; /* for now, only one region at a time may be reintegrated */ - raidPtr->regionBufferPool.availableBuffers = raidPtr->regionBufferPool.totalBuffers; - raidPtr->regionBufferPool.availBuffersIndex = 0; - raidPtr->regionBufferPool.emptyBuffersIndex = 0; - RF_Malloc(raidPtr->regionBufferPool.buffers, raidPtr->regionBufferPool.totalBuffers * sizeof(caddr_t), (caddr_t *)); - if (raidPtr->regionBufferPool.buffers == NULL) { - rf_mutex_destroy(&raidPtr->regionBufferPool.mutex); - rf_cond_destroy(&raidPtr->regionBufferPool.cond); - return(ENOMEM); - } - for (i = 0; i < raidPtr->regionBufferPool.totalBuffers; i++) { - RF_Malloc(raidPtr->regionBufferPool.buffers[i], raidPtr->regionBufferPool.bufferSize * sizeof(char), (caddr_t)); - if (raidPtr->regionBufferPool.buffers == NULL) { - rf_mutex_destroy(&raidPtr->regionBufferPool.mutex); - rf_cond_destroy(&raidPtr->regionBufferPool.cond); - for(j=0;j<i;j++) { - RF_Free(raidPtr->regionBufferPool.buffers[i], raidPtr->regionBufferPool.bufferSize * sizeof(char)); - } - RF_Free(raidPtr->regionBufferPool.buffers, raidPtr->regionBufferPool.totalBuffers * sizeof(caddr_t)); - return(ENOMEM); - } - printf("raidPtr->regionBufferPool.buffers[%d] = %lx\n", i, - (long)raidPtr->regionBufferPool.buffers[i]); - } - rc = rf_ShutdownCreate(listp, rf_ShutdownParityLoggingRegionBufferPool, raidPtr); - if (rc) { - RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownParityLoggingRegionBufferPool(raidPtr); - return(rc); - } - - /* build pool of parity buffers */ - parityBufferCapacity = maxRegionParityRange; - rc = rf_mutex_init(&raidPtr->parityBufferPool.mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return(rc); - } - rc = rf_cond_init(&raidPtr->parityBufferPool.cond); - if (rc) { - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_mutex_destroy(&raidPtr->parityBufferPool.mutex); - return(ENOMEM); - } - raidPtr->parityBufferPool.bufferSize = parityBufferCapacity * raidPtr->bytesPerSector; - printf("parityBufferPool.bufferSize %d\n",raidPtr->parityBufferPool.bufferSize); - raidPtr->parityBufferPool.totalBuffers = 1; /* for now, only one region at a time may be reintegrated */ - raidPtr->parityBufferPool.availableBuffers = raidPtr->parityBufferPool.totalBuffers; - raidPtr->parityBufferPool.availBuffersIndex = 0; - raidPtr->parityBufferPool.emptyBuffersIndex = 0; - RF_Malloc(raidPtr->parityBufferPool.buffers, raidPtr->parityBufferPool.totalBuffers * sizeof(caddr_t), (caddr_t *)); - if (raidPtr->parityBufferPool.buffers == NULL) { - rf_mutex_destroy(&raidPtr->parityBufferPool.mutex); - rf_cond_destroy(&raidPtr->parityBufferPool.cond); - return(ENOMEM); - } - for (i = 0; i < raidPtr->parityBufferPool.totalBuffers; i++) { - RF_Malloc(raidPtr->parityBufferPool.buffers[i], raidPtr->parityBufferPool.bufferSize * sizeof(char), (caddr_t)); - if (raidPtr->parityBufferPool.buffers == NULL) { - rf_mutex_destroy(&raidPtr->parityBufferPool.mutex); - rf_cond_destroy(&raidPtr->parityBufferPool.cond); - for(j=0;j<i;j++) { - RF_Free(raidPtr->parityBufferPool.buffers[i], raidPtr->regionBufferPool.bufferSize * sizeof(char)); - } - RF_Free(raidPtr->parityBufferPool.buffers, raidPtr->regionBufferPool.totalBuffers * sizeof(caddr_t)); - return(ENOMEM); - } - printf("parityBufferPool.buffers[%d] = %lx\n", i, - (long)raidPtr->parityBufferPool.buffers[i]); - } - rc = rf_ShutdownCreate(listp, rf_ShutdownParityLoggingParityBufferPool, raidPtr); - if (rc) { - RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownParityLoggingParityBufferPool(raidPtr); - return(rc); - } - - /* initialize parityLogDiskQueue */ - rc = rf_create_managed_mutex(listp, &raidPtr->parityLogDiskQueue.mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return(rc); - } - rc = rf_create_managed_cond(listp, &raidPtr->parityLogDiskQueue.cond); - if (rc) { - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return(rc); - } - raidPtr->parityLogDiskQueue.flushQueue = NULL; - raidPtr->parityLogDiskQueue.reintQueue = NULL; - raidPtr->parityLogDiskQueue.bufHead = NULL; - raidPtr->parityLogDiskQueue.bufTail = NULL; - raidPtr->parityLogDiskQueue.reintHead = NULL; - raidPtr->parityLogDiskQueue.reintTail = NULL; - raidPtr->parityLogDiskQueue.logBlockHead = NULL; - raidPtr->parityLogDiskQueue.logBlockTail = NULL; - raidPtr->parityLogDiskQueue.reintBlockHead = NULL; - raidPtr->parityLogDiskQueue.reintBlockTail = NULL; - raidPtr->parityLogDiskQueue.freeDataList = NULL; - raidPtr->parityLogDiskQueue.freeCommonList = NULL; - - rc = rf_ShutdownCreate(listp, rf_ShutdownParityLoggingDiskQueue, raidPtr); - if (rc) { - RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return(rc); - } - - for (i = 0; i < rf_numParityRegions; i++) - { - rc = rf_mutex_init(&raidPtr->regionInfo[i].mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - for(j=0;j<i;j++) - FreeRegionInfo(raidPtr, j); - RF_Free(raidPtr->regionInfo, (rf_numParityRegions * sizeof(RF_RegionInfo_t))); - return(ENOMEM); - } - rc = rf_mutex_init(&raidPtr->regionInfo[i].reintMutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_mutex_destroy(&raidPtr->regionInfo[i].mutex); - for(j=0;j<i;j++) - FreeRegionInfo(raidPtr, j); - RF_Free(raidPtr->regionInfo, (rf_numParityRegions * sizeof(RF_RegionInfo_t))); - return(ENOMEM); - } - raidPtr->regionInfo[i].reintInProgress = RF_FALSE; - raidPtr->regionInfo[i].regionStartAddr = raidPtr->regionLogCapacity * i; - raidPtr->regionInfo[i].parityStartAddr = raidPtr->regionParityRange * i; - if (i < rf_numParityRegions - 1) - { - raidPtr->regionInfo[i].capacity = raidPtr->regionLogCapacity; - raidPtr->regionInfo[i].numSectorsParity = raidPtr->regionParityRange; + /* build pool of region buffers */ + rc = rf_mutex_init(&raidPtr->regionBufferPool.mutex); + if (rc) { + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + return (ENOMEM); } - else - { - raidPtr->regionInfo[i].capacity = lastRegionCapacity; - raidPtr->regionInfo[i].numSectorsParity = raidPtr->sectorsPerDisk - raidPtr->regionParityRange * i; - if (raidPtr->regionInfo[i].numSectorsParity > maxRegionParityRange) - maxRegionParityRange = raidPtr->regionInfo[i].numSectorsParity; + rc = rf_cond_init(&raidPtr->regionBufferPool.cond); + if (rc) { + RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + rf_mutex_destroy(&raidPtr->regionBufferPool.mutex); + return (ENOMEM); } - raidPtr->regionInfo[i].diskCount = 0; - RF_ASSERT(raidPtr->regionInfo[i].capacity + raidPtr->regionInfo[i].regionStartAddr <= totalLogCapacity); - RF_ASSERT(raidPtr->regionInfo[i].parityStartAddr + raidPtr->regionInfo[i].numSectorsParity <= raidPtr->sectorsPerDisk); - RF_Malloc(raidPtr->regionInfo[i].diskMap, (raidPtr->regionInfo[i].capacity * sizeof(RF_DiskMap_t)), (RF_DiskMap_t *)); - if (raidPtr->regionInfo[i].diskMap == NULL) { - rf_mutex_destroy(&raidPtr->regionInfo[i].mutex); - rf_mutex_destroy(&raidPtr->regionInfo[i].reintMutex); - for(j=0;j<i;j++) - FreeRegionInfo(raidPtr, j); - RF_Free(raidPtr->regionInfo, (rf_numParityRegions * sizeof(RF_RegionInfo_t))); - return(ENOMEM); - } - raidPtr->regionInfo[i].loggingEnabled = RF_FALSE; - raidPtr->regionInfo[i].coreLog = NULL; - } - rc = rf_ShutdownCreate(listp, rf_ShutdownParityLoggingRegionInfo, raidPtr); - if (rc) { - RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownParityLoggingRegionInfo(raidPtr); - return(rc); - } - - RF_ASSERT(raidPtr->parityLogDiskQueue.threadState == 0); - raidPtr->parityLogDiskQueue.threadState = RF_PLOG_CREATED; - rc = RF_CREATE_THREAD(raidPtr->pLogDiskThreadHandle, rf_ParityLoggingDiskManager, raidPtr); - if (rc) { - raidPtr->parityLogDiskQueue.threadState = 0; - RF_ERRORMSG3("Unable to create parity logging disk thread file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - return(ENOMEM); - } - /* wait for thread to start */ - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - while(!(raidPtr->parityLogDiskQueue.threadState&RF_PLOG_RUNNING)) { - RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, raidPtr->parityLogDiskQueue.mutex); - } - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - - rc = rf_ShutdownCreate(listp, rf_ShutdownParityLogging, raidPtr); - if (rc) { - RF_ERRORMSG1("Got rc=%d adding parity logging shutdown event\n", rc); - rf_ShutdownParityLogging(raidPtr); - return(rc); - } - - if (rf_parityLogDebug) - { - printf(" size of disk log in sectors: %d\n", - (int)totalLogCapacity); - printf(" total number of parity regions is %d\n", (int)rf_numParityRegions); - printf(" nominal sectors of log per parity region is %d\n", (int)raidPtr->regionLogCapacity); - printf(" nominal region fragmentation is %d sectors\n",(int)fragmentation); - printf(" total number of parity logs is %d\n", raidPtr->numParityLogs); - printf(" parity log size is %d sectors\n", raidPtr->numSectorsPerLog); - printf(" total in-core log space is %d bytes\n", (int) rf_totalInCoreLogCapacity); - } - - rf_EnableParityLogging(raidPtr); - - return(0); + raidPtr->regionBufferPool.bufferSize = raidPtr->regionLogCapacity * raidPtr->bytesPerSector; + printf("regionBufferPool.bufferSize %d\n", raidPtr->regionBufferPool.bufferSize); + raidPtr->regionBufferPool.totalBuffers = 1; /* for now, only one + * region at a time may + * be reintegrated */ + raidPtr->regionBufferPool.availableBuffers = raidPtr->regionBufferPool.totalBuffers; + raidPtr->regionBufferPool.availBuffersIndex = 0; + raidPtr->regionBufferPool.emptyBuffersIndex = 0; + RF_Malloc(raidPtr->regionBufferPool.buffers, raidPtr->regionBufferPool.totalBuffers * sizeof(caddr_t), (caddr_t *)); + if (raidPtr->regionBufferPool.buffers == NULL) { + rf_mutex_destroy(&raidPtr->regionBufferPool.mutex); + rf_cond_destroy(&raidPtr->regionBufferPool.cond); + return (ENOMEM); + } + for (i = 0; i < raidPtr->regionBufferPool.totalBuffers; i++) { + RF_Malloc(raidPtr->regionBufferPool.buffers[i], raidPtr->regionBufferPool.bufferSize * sizeof(char), (caddr_t)); + if (raidPtr->regionBufferPool.buffers == NULL) { + rf_mutex_destroy(&raidPtr->regionBufferPool.mutex); + rf_cond_destroy(&raidPtr->regionBufferPool.cond); + for (j = 0; j < i; j++) { + RF_Free(raidPtr->regionBufferPool.buffers[i], raidPtr->regionBufferPool.bufferSize * sizeof(char)); + } + RF_Free(raidPtr->regionBufferPool.buffers, raidPtr->regionBufferPool.totalBuffers * sizeof(caddr_t)); + return (ENOMEM); + } + printf("raidPtr->regionBufferPool.buffers[%d] = %lx\n", i, + (long) raidPtr->regionBufferPool.buffers[i]); + } + rc = rf_ShutdownCreate(listp, rf_ShutdownParityLoggingRegionBufferPool, raidPtr); + if (rc) { + RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + rf_ShutdownParityLoggingRegionBufferPool(raidPtr); + return (rc); + } + /* build pool of parity buffers */ + parityBufferCapacity = maxRegionParityRange; + rc = rf_mutex_init(&raidPtr->parityBufferPool.mutex); + if (rc) { + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + return (rc); + } + rc = rf_cond_init(&raidPtr->parityBufferPool.cond); + if (rc) { + RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + rf_mutex_destroy(&raidPtr->parityBufferPool.mutex); + return (ENOMEM); + } + raidPtr->parityBufferPool.bufferSize = parityBufferCapacity * raidPtr->bytesPerSector; + printf("parityBufferPool.bufferSize %d\n", raidPtr->parityBufferPool.bufferSize); + raidPtr->parityBufferPool.totalBuffers = 1; /* for now, only one + * region at a time may + * be reintegrated */ + raidPtr->parityBufferPool.availableBuffers = raidPtr->parityBufferPool.totalBuffers; + raidPtr->parityBufferPool.availBuffersIndex = 0; + raidPtr->parityBufferPool.emptyBuffersIndex = 0; + RF_Malloc(raidPtr->parityBufferPool.buffers, raidPtr->parityBufferPool.totalBuffers * sizeof(caddr_t), (caddr_t *)); + if (raidPtr->parityBufferPool.buffers == NULL) { + rf_mutex_destroy(&raidPtr->parityBufferPool.mutex); + rf_cond_destroy(&raidPtr->parityBufferPool.cond); + return (ENOMEM); + } + for (i = 0; i < raidPtr->parityBufferPool.totalBuffers; i++) { + RF_Malloc(raidPtr->parityBufferPool.buffers[i], raidPtr->parityBufferPool.bufferSize * sizeof(char), (caddr_t)); + if (raidPtr->parityBufferPool.buffers == NULL) { + rf_mutex_destroy(&raidPtr->parityBufferPool.mutex); + rf_cond_destroy(&raidPtr->parityBufferPool.cond); + for (j = 0; j < i; j++) { + RF_Free(raidPtr->parityBufferPool.buffers[i], raidPtr->regionBufferPool.bufferSize * sizeof(char)); + } + RF_Free(raidPtr->parityBufferPool.buffers, raidPtr->regionBufferPool.totalBuffers * sizeof(caddr_t)); + return (ENOMEM); + } + printf("parityBufferPool.buffers[%d] = %lx\n", i, + (long) raidPtr->parityBufferPool.buffers[i]); + } + rc = rf_ShutdownCreate(listp, rf_ShutdownParityLoggingParityBufferPool, raidPtr); + if (rc) { + RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + rf_ShutdownParityLoggingParityBufferPool(raidPtr); + return (rc); + } + /* initialize parityLogDiskQueue */ + rc = rf_create_managed_mutex(listp, &raidPtr->parityLogDiskQueue.mutex); + if (rc) { + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + return (rc); + } + rc = rf_create_managed_cond(listp, &raidPtr->parityLogDiskQueue.cond); + if (rc) { + RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + return (rc); + } + raidPtr->parityLogDiskQueue.flushQueue = NULL; + raidPtr->parityLogDiskQueue.reintQueue = NULL; + raidPtr->parityLogDiskQueue.bufHead = NULL; + raidPtr->parityLogDiskQueue.bufTail = NULL; + raidPtr->parityLogDiskQueue.reintHead = NULL; + raidPtr->parityLogDiskQueue.reintTail = NULL; + raidPtr->parityLogDiskQueue.logBlockHead = NULL; + raidPtr->parityLogDiskQueue.logBlockTail = NULL; + raidPtr->parityLogDiskQueue.reintBlockHead = NULL; + raidPtr->parityLogDiskQueue.reintBlockTail = NULL; + raidPtr->parityLogDiskQueue.freeDataList = NULL; + raidPtr->parityLogDiskQueue.freeCommonList = NULL; + + rc = rf_ShutdownCreate(listp, rf_ShutdownParityLoggingDiskQueue, raidPtr); + if (rc) { + RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + return (rc); + } + for (i = 0; i < rf_numParityRegions; i++) { + rc = rf_mutex_init(&raidPtr->regionInfo[i].mutex); + if (rc) { + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + for (j = 0; j < i; j++) + FreeRegionInfo(raidPtr, j); + RF_Free(raidPtr->regionInfo, (rf_numParityRegions * sizeof(RF_RegionInfo_t))); + return (ENOMEM); + } + rc = rf_mutex_init(&raidPtr->regionInfo[i].reintMutex); + if (rc) { + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + rf_mutex_destroy(&raidPtr->regionInfo[i].mutex); + for (j = 0; j < i; j++) + FreeRegionInfo(raidPtr, j); + RF_Free(raidPtr->regionInfo, (rf_numParityRegions * sizeof(RF_RegionInfo_t))); + return (ENOMEM); + } + raidPtr->regionInfo[i].reintInProgress = RF_FALSE; + raidPtr->regionInfo[i].regionStartAddr = raidPtr->regionLogCapacity * i; + raidPtr->regionInfo[i].parityStartAddr = raidPtr->regionParityRange * i; + if (i < rf_numParityRegions - 1) { + raidPtr->regionInfo[i].capacity = raidPtr->regionLogCapacity; + raidPtr->regionInfo[i].numSectorsParity = raidPtr->regionParityRange; + } else { + raidPtr->regionInfo[i].capacity = lastRegionCapacity; + raidPtr->regionInfo[i].numSectorsParity = raidPtr->sectorsPerDisk - raidPtr->regionParityRange * i; + if (raidPtr->regionInfo[i].numSectorsParity > maxRegionParityRange) + maxRegionParityRange = raidPtr->regionInfo[i].numSectorsParity; + } + raidPtr->regionInfo[i].diskCount = 0; + RF_ASSERT(raidPtr->regionInfo[i].capacity + raidPtr->regionInfo[i].regionStartAddr <= totalLogCapacity); + RF_ASSERT(raidPtr->regionInfo[i].parityStartAddr + raidPtr->regionInfo[i].numSectorsParity <= raidPtr->sectorsPerDisk); + RF_Malloc(raidPtr->regionInfo[i].diskMap, (raidPtr->regionInfo[i].capacity * sizeof(RF_DiskMap_t)), (RF_DiskMap_t *)); + if (raidPtr->regionInfo[i].diskMap == NULL) { + rf_mutex_destroy(&raidPtr->regionInfo[i].mutex); + rf_mutex_destroy(&raidPtr->regionInfo[i].reintMutex); + for (j = 0; j < i; j++) + FreeRegionInfo(raidPtr, j); + RF_Free(raidPtr->regionInfo, (rf_numParityRegions * sizeof(RF_RegionInfo_t))); + return (ENOMEM); + } + raidPtr->regionInfo[i].loggingEnabled = RF_FALSE; + raidPtr->regionInfo[i].coreLog = NULL; + } + rc = rf_ShutdownCreate(listp, rf_ShutdownParityLoggingRegionInfo, raidPtr); + if (rc) { + RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + rf_ShutdownParityLoggingRegionInfo(raidPtr); + return (rc); + } + RF_ASSERT(raidPtr->parityLogDiskQueue.threadState == 0); + raidPtr->parityLogDiskQueue.threadState = RF_PLOG_CREATED; + rc = RF_CREATE_THREAD(raidPtr->pLogDiskThreadHandle, rf_ParityLoggingDiskManager, raidPtr); + if (rc) { + raidPtr->parityLogDiskQueue.threadState = 0; + RF_ERRORMSG3("Unable to create parity logging disk thread file %s line %d rc=%d\n", + __FILE__, __LINE__, rc); + return (ENOMEM); + } + /* wait for thread to start */ + RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + while (!(raidPtr->parityLogDiskQueue.threadState & RF_PLOG_RUNNING)) { + RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, raidPtr->parityLogDiskQueue.mutex); + } + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + + rc = rf_ShutdownCreate(listp, rf_ShutdownParityLogging, raidPtr); + if (rc) { + RF_ERRORMSG1("Got rc=%d adding parity logging shutdown event\n", rc); + rf_ShutdownParityLogging(raidPtr); + return (rc); + } + if (rf_parityLogDebug) { + printf(" size of disk log in sectors: %d\n", + (int) totalLogCapacity); + printf(" total number of parity regions is %d\n", (int) rf_numParityRegions); + printf(" nominal sectors of log per parity region is %d\n", (int) raidPtr->regionLogCapacity); + printf(" nominal region fragmentation is %d sectors\n", (int) fragmentation); + printf(" total number of parity logs is %d\n", raidPtr->numParityLogs); + printf(" parity log size is %d sectors\n", raidPtr->numSectorsPerLog); + printf(" total in-core log space is %d bytes\n", (int) rf_totalInCoreLogCapacity); + } + rf_EnableParityLogging(raidPtr); + + return (0); } -static void FreeRegionInfo( - RF_Raid_t *raidPtr, - RF_RegionId_t regionID) +static void +FreeRegionInfo( + RF_Raid_t * raidPtr, + RF_RegionId_t regionID) { - RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - RF_Free(raidPtr->regionInfo[regionID].diskMap, (raidPtr->regionInfo[regionID].capacity * sizeof(RF_DiskMap_t))); - if (!rf_forceParityLogReint && raidPtr->regionInfo[regionID].coreLog) { - rf_ReleaseParityLogs(raidPtr, raidPtr->regionInfo[regionID].coreLog); - raidPtr->regionInfo[regionID].coreLog = NULL; - } - else { - RF_ASSERT(raidPtr->regionInfo[regionID].coreLog == NULL); - RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == 0); - } - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - rf_mutex_destroy(&raidPtr->regionInfo[regionID].mutex); - rf_mutex_destroy(&raidPtr->regionInfo[regionID].reintMutex); + RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); + RF_Free(raidPtr->regionInfo[regionID].diskMap, (raidPtr->regionInfo[regionID].capacity * sizeof(RF_DiskMap_t))); + if (!rf_forceParityLogReint && raidPtr->regionInfo[regionID].coreLog) { + rf_ReleaseParityLogs(raidPtr, raidPtr->regionInfo[regionID].coreLog); + raidPtr->regionInfo[regionID].coreLog = NULL; + } else { + RF_ASSERT(raidPtr->regionInfo[regionID].coreLog == NULL); + RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == 0); + } + RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); + rf_mutex_destroy(&raidPtr->regionInfo[regionID].mutex); + rf_mutex_destroy(&raidPtr->regionInfo[regionID].reintMutex); } -static void FreeParityLogQueue( - RF_Raid_t *raidPtr, - RF_ParityLogQueue_t *queue) +static void +FreeParityLogQueue( + RF_Raid_t * raidPtr, + RF_ParityLogQueue_t * queue) { - RF_ParityLog_t *l1, *l2; - - RF_LOCK_MUTEX(queue->mutex); - l1 = queue->parityLogs; - while (l1) - { - l2 = l1; - l1 = l2->next; - RF_Free(l2->records, (raidPtr->numSectorsPerLog * sizeof(RF_ParityLogRecord_t))); - RF_Free(l2, sizeof(RF_ParityLog_t)); - } - RF_UNLOCK_MUTEX(queue->mutex); - rf_mutex_destroy(&queue->mutex); + RF_ParityLog_t *l1, *l2; + + RF_LOCK_MUTEX(queue->mutex); + l1 = queue->parityLogs; + while (l1) { + l2 = l1; + l1 = l2->next; + RF_Free(l2->records, (raidPtr->numSectorsPerLog * sizeof(RF_ParityLogRecord_t))); + RF_Free(l2, sizeof(RF_ParityLog_t)); + } + RF_UNLOCK_MUTEX(queue->mutex); + rf_mutex_destroy(&queue->mutex); } -static void FreeRegionBufferQueue(RF_RegionBufferQueue_t *queue) +static void +FreeRegionBufferQueue(RF_RegionBufferQueue_t * queue) { - int i; - - RF_LOCK_MUTEX(queue->mutex); - if (queue->availableBuffers != queue->totalBuffers) - { - printf("Attempt to free region queue which is still in use!\n"); - RF_ASSERT(0); - } - for (i = 0; i < queue->totalBuffers; i++) - RF_Free(queue->buffers[i], queue->bufferSize); - RF_Free(queue->buffers, queue->totalBuffers * sizeof(caddr_t)); - RF_UNLOCK_MUTEX(queue->mutex); - rf_mutex_destroy(&queue->mutex); + int i; + + RF_LOCK_MUTEX(queue->mutex); + if (queue->availableBuffers != queue->totalBuffers) { + printf("Attempt to free region queue which is still in use!\n"); + RF_ASSERT(0); + } + for (i = 0; i < queue->totalBuffers; i++) + RF_Free(queue->buffers[i], queue->bufferSize); + RF_Free(queue->buffers, queue->totalBuffers * sizeof(caddr_t)); + RF_UNLOCK_MUTEX(queue->mutex); + rf_mutex_destroy(&queue->mutex); } -static void rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t arg) +static void +rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t arg) { - RF_Raid_t *raidPtr; - RF_RegionId_t i; - - raidPtr = (RF_Raid_t *)arg; - if (rf_parityLogDebug) { - int tid; - rf_get_threadid(tid); - printf("[%d] ShutdownParityLoggingRegionInfo\n", tid); - } - /* free region information structs */ - for (i = 0; i < rf_numParityRegions; i++) - FreeRegionInfo(raidPtr, i); - RF_Free(raidPtr->regionInfo, (rf_numParityRegions * sizeof(raidPtr->regionInfo))); - raidPtr->regionInfo = NULL; + RF_Raid_t *raidPtr; + RF_RegionId_t i; + + raidPtr = (RF_Raid_t *) arg; + if (rf_parityLogDebug) { + int tid; + rf_get_threadid(tid); + printf("[%d] ShutdownParityLoggingRegionInfo\n", tid); + } + /* free region information structs */ + for (i = 0; i < rf_numParityRegions; i++) + FreeRegionInfo(raidPtr, i); + RF_Free(raidPtr->regionInfo, (rf_numParityRegions * sizeof(raidPtr->regionInfo))); + raidPtr->regionInfo = NULL; } -static void rf_ShutdownParityLoggingPool(RF_ThreadArg_t arg) +static void +rf_ShutdownParityLoggingPool(RF_ThreadArg_t arg) { - RF_Raid_t *raidPtr; - - raidPtr = (RF_Raid_t *)arg; - if (rf_parityLogDebug) { - int tid; - rf_get_threadid(tid); - printf("[%d] ShutdownParityLoggingPool\n", tid); - } - /* free contents of parityLogPool */ - FreeParityLogQueue(raidPtr, &raidPtr->parityLogPool); - RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * raidPtr->numSectorsPerLog * raidPtr->bytesPerSector); + RF_Raid_t *raidPtr; + + raidPtr = (RF_Raid_t *) arg; + if (rf_parityLogDebug) { + int tid; + rf_get_threadid(tid); + printf("[%d] ShutdownParityLoggingPool\n", tid); + } + /* free contents of parityLogPool */ + FreeParityLogQueue(raidPtr, &raidPtr->parityLogPool); + RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * raidPtr->numSectorsPerLog * raidPtr->bytesPerSector); } -static void rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t arg) +static void +rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t arg) { - RF_Raid_t *raidPtr; - - raidPtr = (RF_Raid_t *)arg; - if (rf_parityLogDebug) { - int tid; - rf_get_threadid(tid); - printf("[%d] ShutdownParityLoggingRegionBufferPool\n", tid); - } - FreeRegionBufferQueue(&raidPtr->regionBufferPool); + RF_Raid_t *raidPtr; + + raidPtr = (RF_Raid_t *) arg; + if (rf_parityLogDebug) { + int tid; + rf_get_threadid(tid); + printf("[%d] ShutdownParityLoggingRegionBufferPool\n", tid); + } + FreeRegionBufferQueue(&raidPtr->regionBufferPool); } -static void rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t arg) +static void +rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t arg) { - RF_Raid_t *raidPtr; - - raidPtr = (RF_Raid_t *)arg; - if (rf_parityLogDebug) { - int tid; - rf_get_threadid(tid); - printf("[%d] ShutdownParityLoggingParityBufferPool\n", tid); - } - FreeRegionBufferQueue(&raidPtr->parityBufferPool); + RF_Raid_t *raidPtr; + + raidPtr = (RF_Raid_t *) arg; + if (rf_parityLogDebug) { + int tid; + rf_get_threadid(tid); + printf("[%d] ShutdownParityLoggingParityBufferPool\n", tid); + } + FreeRegionBufferQueue(&raidPtr->parityBufferPool); } -static void rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t arg) +static void +rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t arg) { - RF_ParityLogData_t *d; - RF_CommonLogData_t *c; - RF_Raid_t *raidPtr; - - raidPtr = (RF_Raid_t *)arg; - if (rf_parityLogDebug) { - int tid; - rf_get_threadid(tid); - printf("[%d] ShutdownParityLoggingDiskQueue\n", tid); - } - /* free disk manager stuff */ - RF_ASSERT(raidPtr->parityLogDiskQueue.bufHead == NULL); - RF_ASSERT(raidPtr->parityLogDiskQueue.bufTail == NULL); - RF_ASSERT(raidPtr->parityLogDiskQueue.reintHead == NULL); - RF_ASSERT(raidPtr->parityLogDiskQueue.reintTail == NULL); - while (raidPtr->parityLogDiskQueue.freeDataList) - { - d = raidPtr->parityLogDiskQueue.freeDataList; - raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next; - RF_Free(d, sizeof(RF_ParityLogData_t)); - } - while (raidPtr->parityLogDiskQueue.freeCommonList) - { - c = raidPtr->parityLogDiskQueue.freeCommonList; - rf_mutex_destroy(&c->mutex); - raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next; - RF_Free(c, sizeof(RF_CommonLogData_t)); - } + RF_ParityLogData_t *d; + RF_CommonLogData_t *c; + RF_Raid_t *raidPtr; + + raidPtr = (RF_Raid_t *) arg; + if (rf_parityLogDebug) { + int tid; + rf_get_threadid(tid); + printf("[%d] ShutdownParityLoggingDiskQueue\n", tid); + } + /* free disk manager stuff */ + RF_ASSERT(raidPtr->parityLogDiskQueue.bufHead == NULL); + RF_ASSERT(raidPtr->parityLogDiskQueue.bufTail == NULL); + RF_ASSERT(raidPtr->parityLogDiskQueue.reintHead == NULL); + RF_ASSERT(raidPtr->parityLogDiskQueue.reintTail == NULL); + while (raidPtr->parityLogDiskQueue.freeDataList) { + d = raidPtr->parityLogDiskQueue.freeDataList; + raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next; + RF_Free(d, sizeof(RF_ParityLogData_t)); + } + while (raidPtr->parityLogDiskQueue.freeCommonList) { + c = raidPtr->parityLogDiskQueue.freeCommonList; + rf_mutex_destroy(&c->mutex); + raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next; + RF_Free(c, sizeof(RF_CommonLogData_t)); + } } -static void rf_ShutdownParityLogging(RF_ThreadArg_t arg) +static void +rf_ShutdownParityLogging(RF_ThreadArg_t arg) { - RF_Raid_t *raidPtr; - - raidPtr = (RF_Raid_t *)arg; - if (rf_parityLogDebug) { - int tid; - rf_get_threadid(tid); - printf("[%d] ShutdownParityLogging\n", tid); - } -#ifndef SIMULATE - /* shutdown disk thread */ - /* This has the desirable side-effect of forcing all regions to be - reintegrated. This is necessary since all parity log maps are - currently held in volatile memory. */ - - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_TERMINATE; - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); - /* - * pLogDiskThread will now terminate when queues are cleared - * now wait for it to be done - */ - RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - while(!(raidPtr->parityLogDiskQueue.threadState&RF_PLOG_SHUTDOWN)) { - RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, raidPtr->parityLogDiskQueue.mutex); - } - RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); -#else /* !SIMULATE */ - /* explicitly call shutdown routines which force reintegration */ - rf_ShutdownLogging(raidPtr); -#endif /* !SIMULATE */ - if (rf_parityLogDebug) { - int tid; - rf_get_threadid(tid); - printf("[%d] ShutdownParityLogging done (thread completed)\n", tid); - } + RF_Raid_t *raidPtr; + + raidPtr = (RF_Raid_t *) arg; + if (rf_parityLogDebug) { + int tid; + rf_get_threadid(tid); + printf("[%d] ShutdownParityLogging\n", tid); + } + /* shutdown disk thread */ + /* This has the desirable side-effect of forcing all regions to be + * reintegrated. This is necessary since all parity log maps are + * currently held in volatile memory. */ + + RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_TERMINATE; + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); + /* + * pLogDiskThread will now terminate when queues are cleared + * now wait for it to be done + */ + RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + while (!(raidPtr->parityLogDiskQueue.threadState & RF_PLOG_SHUTDOWN)) { + RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, raidPtr->parityLogDiskQueue.mutex); + } + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + if (rf_parityLogDebug) { + int tid; + rf_get_threadid(tid); + printf("[%d] ShutdownParityLogging done (thread completed)\n", tid); + } } -int rf_GetDefaultNumFloatingReconBuffersParityLogging(RF_Raid_t *raidPtr) +int +rf_GetDefaultNumFloatingReconBuffersParityLogging(RF_Raid_t * raidPtr) { - return(20); + return (20); } -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitParityLogging(RF_Raid_t *raidPtr) +RF_HeadSepLimit_t +rf_GetDefaultHeadSepLimitParityLogging(RF_Raid_t * raidPtr) { - return(10); + return (10); } - /* return the region ID for a given RAID address */ -RF_RegionId_t rf_MapRegionIDParityLogging( - RF_Raid_t *raidPtr, - RF_SectorNum_t address) +RF_RegionId_t +rf_MapRegionIDParityLogging( + RF_Raid_t * raidPtr, + RF_SectorNum_t address) { - RF_RegionId_t regionID; + RF_RegionId_t regionID; /* regionID = address / (raidPtr->regionParityRange * raidPtr->Layout.numDataCol); */ - regionID = address / raidPtr->regionParityRange; - if (regionID == rf_numParityRegions) - { - /* last region may be larger than other regions */ - regionID--; - } - RF_ASSERT(address >= raidPtr->regionInfo[regionID].parityStartAddr); - RF_ASSERT(address < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity); - RF_ASSERT(regionID < rf_numParityRegions); - return(regionID); + regionID = address / raidPtr->regionParityRange; + if (regionID == rf_numParityRegions) { + /* last region may be larger than other regions */ + regionID--; + } + RF_ASSERT(address >= raidPtr->regionInfo[regionID].parityStartAddr); + RF_ASSERT(address < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity); + RF_ASSERT(regionID < rf_numParityRegions); + return (regionID); } /* given a logical RAID sector, determine physical disk address of data */ -void rf_MapSectorParityLogging( - RF_Raid_t *raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t *row, - RF_RowCol_t *col, - RF_SectorNum_t *diskSector, - int remap) +void +rf_MapSectorParityLogging( + RF_Raid_t * raidPtr, + RF_RaidAddr_t raidSector, + RF_RowCol_t * row, + RF_RowCol_t * col, + RF_SectorNum_t * diskSector, + int remap) { - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - *row = 0; - /* *col = (SUID % (raidPtr->numCol - raidPtr->Layout.numParityLogCol)); */ - *col = SUID % raidPtr->Layout.numDataCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; + *row = 0; + /* *col = (SUID % (raidPtr->numCol - + * raidPtr->Layout.numParityLogCol)); */ + *col = SUID % raidPtr->Layout.numDataCol; + *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); } /* given a logical RAID sector, determine physical disk address of parity */ -void rf_MapParityParityLogging( - RF_Raid_t *raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t *row, - RF_RowCol_t *col, - RF_SectorNum_t *diskSector, - int remap) +void +rf_MapParityParityLogging( + RF_Raid_t * raidPtr, + RF_RaidAddr_t raidSector, + RF_RowCol_t * row, + RF_RowCol_t * col, + RF_SectorNum_t * diskSector, + int remap) { - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - - *row = 0; - /* *col = raidPtr->Layout.numDataCol-(SUID/raidPtr->Layout.numDataCol)%(raidPtr->numCol - raidPtr->Layout.numParityLogCol); */ - *col = raidPtr->Layout.numDataCol; - *diskSector =(SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; + + *row = 0; + /* *col = + * raidPtr->Layout.numDataCol-(SUID/raidPtr->Layout.numDataCol)%(raidPt + * r->numCol - raidPtr->Layout.numParityLogCol); */ + *col = raidPtr->Layout.numDataCol; + *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); } /* given a regionID and sector offset, determine the physical disk address of the parity log */ -void rf_MapLogParityLogging( - RF_Raid_t *raidPtr, - RF_RegionId_t regionID, - RF_SectorNum_t regionOffset, - RF_RowCol_t *row, - RF_RowCol_t *col, - RF_SectorNum_t *startSector) +void +rf_MapLogParityLogging( + RF_Raid_t * raidPtr, + RF_RegionId_t regionID, + RF_SectorNum_t regionOffset, + RF_RowCol_t * row, + RF_RowCol_t * col, + RF_SectorNum_t * startSector) { - *row = 0; - *col = raidPtr->numCol - 1; - *startSector = raidPtr->regionInfo[regionID].regionStartAddr + regionOffset; + *row = 0; + *col = raidPtr->numCol - 1; + *startSector = raidPtr->regionInfo[regionID].regionStartAddr + regionOffset; } /* given a regionID, determine the physical disk address of the logged parity for that region */ -void rf_MapRegionParity( - RF_Raid_t *raidPtr, - RF_RegionId_t regionID, - RF_RowCol_t *row, - RF_RowCol_t *col, - RF_SectorNum_t *startSector, - RF_SectorCount_t *numSector) +void +rf_MapRegionParity( + RF_Raid_t * raidPtr, + RF_RegionId_t regionID, + RF_RowCol_t * row, + RF_RowCol_t * col, + RF_SectorNum_t * startSector, + RF_SectorCount_t * numSector) { - *row = 0; - *col = raidPtr->numCol - 2; - *startSector = raidPtr->regionInfo[regionID].parityStartAddr; - *numSector = raidPtr->regionInfo[regionID].numSectorsParity; + *row = 0; + *col = raidPtr->numCol - 2; + *startSector = raidPtr->regionInfo[regionID].parityStartAddr; + *numSector = raidPtr->regionInfo[regionID].numSectorsParity; } /* given a logical RAID address, determine the participating disks in the stripe */ -void rf_IdentifyStripeParityLogging( - RF_Raid_t *raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t **diskids, - RF_RowCol_t *outRow) +void +rf_IdentifyStripeParityLogging( + RF_Raid_t * raidPtr, + RF_RaidAddr_t addr, + RF_RowCol_t ** diskids, + RF_RowCol_t * outRow) { - RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr); - RF_ParityLoggingConfigInfo_t *info = (RF_ParityLoggingConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - *outRow = 0; - *diskids = info->stripeIdentifier[ stripeID % raidPtr->numCol ]; + RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr); + RF_ParityLoggingConfigInfo_t *info = (RF_ParityLoggingConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + *outRow = 0; + *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol]; } -void rf_MapSIDToPSIDParityLogging( - RF_RaidLayout_t *layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t *psID, - RF_ReconUnitNum_t *which_ru) +void +rf_MapSIDToPSIDParityLogging( + RF_RaidLayout_t * layoutPtr, + RF_StripeNum_t stripeID, + RF_StripeNum_t * psID, + RF_ReconUnitNum_t * which_ru) { - *which_ru = 0; - *psID = stripeID; + *which_ru = 0; + *psID = stripeID; } @@ -956,133 +793,139 @@ void rf_MapSIDToPSIDParityLogging( * one to a function that will return information about the DAG, and * another to a function that will create the dag. */ -void rf_ParityLoggingDagSelect( - RF_Raid_t *raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t *asmp, - RF_VoidFuncPtr *createFunc) +void +rf_ParityLoggingDagSelect( + RF_Raid_t * raidPtr, + RF_IoType_t type, + RF_AccessStripeMap_t * asmp, + RF_VoidFuncPtr * createFunc) { - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_PhysDiskAddr_t *failedPDA=NULL; - RF_RowCol_t frow, fcol; - RF_RowStatus_t rstat; - int prior_recon; - int tid; - - RF_ASSERT(RF_IO_IS_R_OR_W(type)); - - if (asmp->numDataFailed + asmp->numParityFailed > 1) { - RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); - /* *infoFunc = */ *createFunc = NULL; - return; - } else if (asmp->numDataFailed + asmp->numParityFailed == 1) { - - /* if under recon & already reconstructed, redirect the access to the spare drive - * and eliminate the failure indication - */ - failedPDA = asmp->failedPDAs[0]; - frow = failedPDA->row; fcol = failedPDA->col; - rstat = raidPtr->status[failedPDA->row]; - prior_recon = (rstat == rf_rs_reconfigured) || ( - (rstat == rf_rs_reconstructing) ? - rf_CheckRUReconstructed(raidPtr->reconControl[frow]->reconMap, failedPDA->startSector) : 0 - ); - if (prior_recon) { - RF_RowCol_t or = failedPDA->row,oc=failedPDA->col; - RF_SectorNum_t oo=failedPDA->startSector; - if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) { /* redirect to dist spare space */ - - if (failedPDA == asmp->parityInfo) { - - /* parity has failed */ - (layoutPtr->map->MapParity)(raidPtr, failedPDA->raidAddress, &failedPDA->row, - &failedPDA->col, &failedPDA->startSector, RF_REMAP); - - if (asmp->parityInfo->next) { /* redir 2nd component, if any */ - RF_PhysDiskAddr_t *p = asmp->parityInfo->next; - RF_SectorNum_t SUoffs = p->startSector % layoutPtr->sectorsPerStripeUnit; - p->row = failedPDA->row; - p->col = failedPDA->col; - p->startSector = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, failedPDA->startSector) + - SUoffs; /* cheating: startSector is not really a RAID address */ - } - - } else if (asmp->parityInfo->next && failedPDA == asmp->parityInfo->next) { - RF_ASSERT(0); /* should not ever happen */ - } else { - - /* data has failed */ - (layoutPtr->map->MapSector)(raidPtr, failedPDA->raidAddress, &failedPDA->row, - &failedPDA->col, &failedPDA->startSector, RF_REMAP); - - } - - } else { /* redirect to dedicated spare space */ - - failedPDA->row = raidPtr->Disks[frow][fcol].spareRow; - failedPDA->col = raidPtr->Disks[frow][fcol].spareCol; - - /* the parity may have two distinct components, both of which may need to be redirected */ - if (asmp->parityInfo->next) { - if (failedPDA == asmp->parityInfo) { - failedPDA->next->row = failedPDA->row; - failedPDA->next->col = failedPDA->col; - } else if (failedPDA == asmp->parityInfo->next) { /* paranoid: should never occur */ - asmp->parityInfo->row = failedPDA->row; - asmp->parityInfo->col = failedPDA->col; - } - } - } - - RF_ASSERT(failedPDA->col != -1); - - if (rf_dagDebug || rf_mapDebug) { - rf_get_threadid(tid); - printf("[%d] Redirected type '%c' r %d c %d o %ld -> r %d c %d o %ld\n", - tid,type,or,oc,(long)oo,failedPDA->row,failedPDA->col,(long)failedPDA->startSector); - } - - asmp->numDataFailed = asmp->numParityFailed = 0; - } - - } - - - if (type == RF_IO_TYPE_READ) { - - if (asmp->numDataFailed == 0) - *createFunc = (RF_VoidFuncPtr)rf_CreateFaultFreeReadDAG; - else - *createFunc = (RF_VoidFuncPtr)rf_CreateRaidFiveDegradedReadDAG; - - } - else { - - - /* if mirroring, always use large writes. If the access requires two distinct parity updates, - * always do a small write. If the stripe contains a failure but the access does not, do a - * small write. - * The first conditional (numStripeUnitsAccessed <= numDataCol/2) uses a less-than-or-equal - * rather than just a less-than because when G is 3 or 4, numDataCol/2 is 1, and I want - * single-stripe-unit updates to use just one disk. - */ - if ( (asmp->numDataFailed + asmp->numParityFailed) == 0) { - if (((asmp->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol!=1)) || - (asmp->parityInfo->next!=NULL) || rf_CheckStripeForFailures(raidPtr, asmp)) { - *createFunc = (RF_VoidFuncPtr)rf_CreateParityLoggingSmallWriteDAG; - } - else - *createFunc = (RF_VoidFuncPtr)rf_CreateParityLoggingLargeWriteDAG; - } - else - if (asmp->numParityFailed == 1) - *createFunc = (RF_VoidFuncPtr)rf_CreateNonRedundantWriteDAG; - else - if (asmp->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit) - *createFunc = NULL; - else - *createFunc = (RF_VoidFuncPtr)rf_CreateDegradedWriteDAG; - } + RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); + RF_PhysDiskAddr_t *failedPDA = NULL; + RF_RowCol_t frow, fcol; + RF_RowStatus_t rstat; + int prior_recon; + int tid; + + RF_ASSERT(RF_IO_IS_R_OR_W(type)); + + if (asmp->numDataFailed + asmp->numParityFailed > 1) { + RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); + /* *infoFunc = */ *createFunc = NULL; + return; + } else + if (asmp->numDataFailed + asmp->numParityFailed == 1) { + + /* if under recon & already reconstructed, redirect + * the access to the spare drive and eliminate the + * failure indication */ + failedPDA = asmp->failedPDAs[0]; + frow = failedPDA->row; + fcol = failedPDA->col; + rstat = raidPtr->status[failedPDA->row]; + prior_recon = (rstat == rf_rs_reconfigured) || ( + (rstat == rf_rs_reconstructing) ? + rf_CheckRUReconstructed(raidPtr->reconControl[frow]->reconMap, failedPDA->startSector) : 0 + ); + if (prior_recon) { + RF_RowCol_t or = failedPDA->row, oc = failedPDA->col; + RF_SectorNum_t oo = failedPDA->startSector; + if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) { /* redirect to dist + * spare space */ + + if (failedPDA == asmp->parityInfo) { + + /* parity has failed */ + (layoutPtr->map->MapParity) (raidPtr, failedPDA->raidAddress, &failedPDA->row, + &failedPDA->col, &failedPDA->startSector, RF_REMAP); + + if (asmp->parityInfo->next) { /* redir 2nd component, + * if any */ + RF_PhysDiskAddr_t *p = asmp->parityInfo->next; + RF_SectorNum_t SUoffs = p->startSector % layoutPtr->sectorsPerStripeUnit; + p->row = failedPDA->row; + p->col = failedPDA->col; + p->startSector = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, failedPDA->startSector) + + SUoffs; /* cheating: + * startSector is not + * really a RAID address */ + } + } else + if (asmp->parityInfo->next && failedPDA == asmp->parityInfo->next) { + RF_ASSERT(0); /* should not ever + * happen */ + } else { + + /* data has failed */ + (layoutPtr->map->MapSector) (raidPtr, failedPDA->raidAddress, &failedPDA->row, + &failedPDA->col, &failedPDA->startSector, RF_REMAP); + + } + + } else { /* redirect to dedicated spare + * space */ + + failedPDA->row = raidPtr->Disks[frow][fcol].spareRow; + failedPDA->col = raidPtr->Disks[frow][fcol].spareCol; + + /* the parity may have two distinct + * components, both of which may need + * to be redirected */ + if (asmp->parityInfo->next) { + if (failedPDA == asmp->parityInfo) { + failedPDA->next->row = failedPDA->row; + failedPDA->next->col = failedPDA->col; + } else + if (failedPDA == asmp->parityInfo->next) { /* paranoid: should + * never occur */ + asmp->parityInfo->row = failedPDA->row; + asmp->parityInfo->col = failedPDA->col; + } + } + } + + RF_ASSERT(failedPDA->col != -1); + + if (rf_dagDebug || rf_mapDebug) { + rf_get_threadid(tid); + printf("[%d] Redirected type '%c' r %d c %d o %ld -> r %d c %d o %ld\n", + tid, type, or, oc, (long) oo, failedPDA->row, failedPDA->col, (long) failedPDA->startSector); + } + asmp->numDataFailed = asmp->numParityFailed = 0; + } + } + if (type == RF_IO_TYPE_READ) { + + if (asmp->numDataFailed == 0) + *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; + else + *createFunc = (RF_VoidFuncPtr) rf_CreateRaidFiveDegradedReadDAG; + + } else { + + + /* if mirroring, always use large writes. If the access + * requires two distinct parity updates, always do a small + * write. If the stripe contains a failure but the access + * does not, do a small write. The first conditional + * (numStripeUnitsAccessed <= numDataCol/2) uses a + * less-than-or-equal rather than just a less-than because + * when G is 3 or 4, numDataCol/2 is 1, and I want + * single-stripe-unit updates to use just one disk. */ + if ((asmp->numDataFailed + asmp->numParityFailed) == 0) { + if (((asmp->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) || + (asmp->parityInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmp)) { + *createFunc = (RF_VoidFuncPtr) rf_CreateParityLoggingSmallWriteDAG; + } else + *createFunc = (RF_VoidFuncPtr) rf_CreateParityLoggingLargeWriteDAG; + } else + if (asmp->numParityFailed == 1) + *createFunc = (RF_VoidFuncPtr) rf_CreateNonRedundantWriteDAG; + else + if (asmp->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit) + *createFunc = NULL; + else + *createFunc = (RF_VoidFuncPtr) rf_CreateDegradedWriteDAG; + } } - -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ +#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ diff --git a/sys/dev/raidframe/rf_paritylogging.h b/sys/dev/raidframe/rf_paritylogging.h index 3a2db063c28..532da664940 100644 --- a/sys/dev/raidframe/rf_paritylogging.h +++ b/sys/dev/raidframe/rf_paritylogging.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_paritylogging.h,v 1.1 1999/01/11 14:29:36 niklas Exp $ */ -/* $NetBSD: rf_paritylogging.h,v 1.1 1998/11/13 04:20:32 oster Exp $ */ +/* $OpenBSD: rf_paritylogging.h,v 1.2 1999/02/16 00:03:07 niklas Exp $ */ +/* $NetBSD: rf_paritylogging.h,v 1.3 1999/02/05 00:06:14 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,109 +29,42 @@ /* header file for Parity Logging */ -/* - * : - * Log: rf_paritylogging.h,v - * Revision 1.22 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.21 1996/07/13 00:00:59 jimz - * sanitized generalized reconstruction architecture - * cleaned up head sep, rbuf problems - * - * Revision 1.20 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.19 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.18 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.17 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.16 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.15 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.14 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.13 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.12 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.11 1995/12/06 20:56:25 wvcii - * added prototypes - * - * Revision 1.10 1995/11/30 16:06:58 wvcii - * added copyright info - * - * Revision 1.9 1995/11/17 19:53:08 wvcii - * fixed bug in MapParityRegion prototype - * - * Revision 1.8 1995/11/17 19:09:24 wvcii - * added prototypint to MapParity - * - * Revision 1.7 1995/11/07 15:28:17 wvcii - * changed ParityLoggingDagSelect prototype - * function no longer generates numHdrSucc, numTermAnt - * - * Revision 1.6 1995/07/07 00:16:50 wvcii - * this version free from deadlock, fails parity verification - * - * Revision 1.5 1995/06/23 13:39:44 robby - * updeated to prototypes in rf_layout.h - * - */ - #ifndef _RF__RF_PARITYLOGGING_H_ #define _RF__RF_PARITYLOGGING_H_ -int rf_ConfigureParityLogging(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr); -int rf_GetDefaultNumFloatingReconBuffersParityLogging(RF_Raid_t *raidPtr); -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitParityLogging(RF_Raid_t *raidPtr); -RF_RegionId_t rf_MapRegionIDParityLogging(RF_Raid_t *raidPtr, - RF_SectorNum_t address); -void rf_MapSectorParityLogging(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, - int remap); -void rf_MapParityParityLogging(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, - int remap); -void rf_MapLogParityLogging(RF_Raid_t *raidPtr, RF_RegionId_t regionID, - RF_SectorNum_t regionOffset, RF_RowCol_t *row, RF_RowCol_t *col, - RF_SectorNum_t *startSector); -void rf_MapRegionParity(RF_Raid_t *raidPtr, RF_RegionId_t regionID, - RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *startSector, - RF_SectorCount_t *numSector); -void rf_IdentifyStripeParityLogging(RF_Raid_t *raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t **diskids, RF_RowCol_t *outRow); -void rf_MapSIDToPSIDParityLogging(RF_RaidLayout_t *layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t *psID, - RF_ReconUnitNum_t *which_ru); -void rf_ParityLoggingDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc); +int +rf_ConfigureParityLogging(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr); +int rf_GetDefaultNumFloatingReconBuffersParityLogging(RF_Raid_t * raidPtr); +RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitParityLogging(RF_Raid_t * raidPtr); +RF_RegionId_t +rf_MapRegionIDParityLogging(RF_Raid_t * raidPtr, + RF_SectorNum_t address); +void +rf_MapSectorParityLogging(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, + int remap); +void +rf_MapParityParityLogging(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, + int remap); +void +rf_MapLogParityLogging(RF_Raid_t * raidPtr, RF_RegionId_t regionID, + RF_SectorNum_t regionOffset, RF_RowCol_t * row, RF_RowCol_t * col, + RF_SectorNum_t * startSector); +void +rf_MapRegionParity(RF_Raid_t * raidPtr, RF_RegionId_t regionID, + RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * startSector, + RF_SectorCount_t * numSector); +void +rf_IdentifyStripeParityLogging(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, + RF_RowCol_t ** diskids, RF_RowCol_t * outRow); +void +rf_MapSIDToPSIDParityLogging(RF_RaidLayout_t * layoutPtr, + RF_StripeNum_t stripeID, RF_StripeNum_t * psID, + RF_ReconUnitNum_t * which_ru); +void +rf_ParityLoggingDagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, + RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); -#endif /* !_RF__RF_PARITYLOGGING_H_ */ +#endif /* !_RF__RF_PARITYLOGGING_H_ */ diff --git a/sys/dev/raidframe/rf_parityloggingdags.c b/sys/dev/raidframe/rf_parityloggingdags.c index 1cc51d0a7e3..921a80e03f3 100644 --- a/sys/dev/raidframe/rf_parityloggingdags.c +++ b/sys/dev/raidframe/rf_parityloggingdags.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_parityloggingdags.c,v 1.1 1999/01/11 14:29:37 niklas Exp $ */ -/* $NetBSD: rf_parityloggingdags.c,v 1.1 1998/11/13 04:20:32 oster Exp $ */ +/* $OpenBSD: rf_parityloggingdags.c,v 1.2 1999/02/16 00:03:08 niklas Exp $ */ +/* $NetBSD: rf_parityloggingdags.c,v 1.3 1999/02/05 00:06:14 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,121 +27,6 @@ * rights to redistribute these changes. */ -/* - * Log: rf_parityloggingdags.c,v - * Revision 1.27 1996/07/28 20:31:39 jimz - * i386netbsd port - * true/false fixup - * - * Revision 1.26 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.25 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.24 1996/06/11 13:47:21 jimz - * fix up for in-kernel compilation - * - * Revision 1.23 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.22 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.21 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.20 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.19 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.18 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.17 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.16 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.15 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.14 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.13 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.12 1996/05/08 21:01:24 jimz - * fixed up enum type names that were conflicting with other - * enums and function names (ie, "panic") - * future naming trends will be towards RF_ and rf_ for - * everything raidframe-related - * - * Revision 1.11 1996/05/03 19:42:02 wvcii - * added includes for dag library - * - * Revision 1.10 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.9 1995/12/06 20:55:24 wvcii - * added prototyping - * fixed bug in dag header numSuccedents count for both small and large dags - * - * Revision 1.8 1995/11/30 16:08:01 wvcii - * added copyright info - * - * Revision 1.7 1995/11/07 15:29:05 wvcii - * reorganized code, adding comments and asserts - * dag creation routines now generate term node - * encoded commit point, barrier, and antecedence types into dags - * - * Revision 1.6 1995/09/07 15:52:06 jimz - * noop compile when INCLUDE_PARITYLOGGING not defined - * - * Revision 1.5 1995/06/15 13:51:53 robby - * updated some wrong prototypes (after prototyping rf_dagutils.h) - * - * Revision 1.4 1995/06/09 13:15:05 wvcii - * code is now nonblocking - * - * Revision 1.3 95/05/31 13:09:14 wvcii - * code debug - * - * Revision 1.2 1995/05/21 15:34:14 wvcii - * code debug - * - * Revision 1.1 95/05/16 14:36:53 wvcii - * Initial revision - * - * - */ - #include "rf_archs.h" #if RF_INCLUDE_PARITYLOGGING > 0 @@ -183,193 +68,205 @@ * *****************************************************************************/ -void rf_CommonCreateParityLoggingLargeWriteDAG( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList, - int nfaults, - int (*redFunc)(RF_DagNode_t *)) +void +rf_CommonCreateParityLoggingLargeWriteDAG( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList, + int nfaults, + int (*redFunc) (RF_DagNode_t *)) { - RF_DagNode_t *nodes, *wndNodes, *rodNodes=NULL, *syncNode, *xorNode, *lpoNode, *blockNode, *unblockNode, *termNode; - int nWndNodes, nRodNodes, i; - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_AccessStripeMapHeader_t *new_asm_h[2]; - int nodeNum, asmNum; - RF_ReconUnitNum_t which_ru; - char *sosBuffer, *eosBuffer; - RF_PhysDiskAddr_t *pda; - RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru); - - if (rf_dagDebug) - printf("[Creating parity-logging large-write DAG]\n"); - RF_ASSERT(nfaults == 1); /* this arch only single fault tolerant */ - dag_h->creator = "ParityLoggingLargeWriteDAG"; - - /* alloc the Wnd nodes, the xor node, and the Lpo node */ - nWndNodes = asmap->numStripeUnitsAccessed; - RF_CallocAndAdd(nodes, nWndNodes + 6, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - i = 0; - wndNodes = &nodes[i]; i += nWndNodes; - xorNode = &nodes[i]; i += 1; - lpoNode = &nodes[i]; i += 1; - blockNode = &nodes[i]; i += 1; - syncNode = &nodes[i]; i += 1; - unblockNode = &nodes[i]; i += 1; - termNode = &nodes[i]; i += 1; - - dag_h->numCommitNodes = nWndNodes + 1; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h, new_asm_h, &nRodNodes, &sosBuffer, &eosBuffer, allocList); - if (nRodNodes > 0) - RF_CallocAndAdd(rodNodes, nRodNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - - /* begin node initialization */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nRodNodes + 1, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nWndNodes + 1, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(syncNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nWndNodes + 1, nRodNodes + 1, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - - /* initialize the Rod nodes */ - for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) { - if (new_asm_h[asmNum]) { - pda = new_asm_h[asmNum]->stripeMap->physInfo; - while (pda) { - rf_InitNode(&rodNodes[nodeNum], rf_wait, RF_FALSE, rf_DiskReadFunc,rf_DiskReadUndoFunc,rf_GenericWakeupFunc,1,1,4,0, dag_h, "Rod", allocList); - rodNodes[nodeNum].params[0].p = pda; - rodNodes[nodeNum].params[1].p = pda->bufPtr; - rodNodes[nodeNum].params[2].v = parityStripeID; - rodNodes[nodeNum].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - nodeNum++; - pda=pda->next; - } - } - } - RF_ASSERT(nodeNum == nRodNodes); - - /* initialize the wnd nodes */ - pda = asmap->physInfo; - for (i=0; i < nWndNodes; i++) { - rf_InitNode(&wndNodes[i], rf_wait, RF_TRUE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList); - RF_ASSERT(pda != NULL); - wndNodes[i].params[0].p = pda; - wndNodes[i].params[1].p = pda->bufPtr; - wndNodes[i].params[2].v = parityStripeID; - wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - pda = pda->next; - } - - /* initialize the redundancy node */ - rf_InitNode(xorNode, rf_wait, RF_TRUE, redFunc, rf_NullNodeUndoFunc, NULL, 1, 1, 2*(nWndNodes+nRodNodes)+1, 1, dag_h, "Xr ", allocList); - xorNode->flags |= RF_DAGNODE_FLAG_YIELD; - for (i=0; i < nWndNodes; i++) { - xorNode->params[2*i+0] = wndNodes[i].params[0]; /* pda */ - xorNode->params[2*i+1] = wndNodes[i].params[1]; /* buf ptr */ - } - for (i=0; i < nRodNodes; i++) { - xorNode->params[2*(nWndNodes+i)+0] = rodNodes[i].params[0]; /* pda */ - xorNode->params[2*(nWndNodes+i)+1] = rodNodes[i].params[1]; /* buf ptr */ - } - xorNode->params[2*(nWndNodes+nRodNodes)].p = raidPtr; /* xor node needs to get at RAID information */ - - /* look for an Rod node that reads a complete SU. If none, alloc a buffer to receive the parity info. - * Note that we can't use a new data buffer because it will not have gotten written when the xor occurs. - */ - for (i = 0; i < nRodNodes; i++) - if (((RF_PhysDiskAddr_t *) rodNodes[i].params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit) - break; - if (i == nRodNodes) { - RF_CallocAndAdd(xorNode->results[0], 1, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (void *), allocList); - } - else { - xorNode->results[0] = rodNodes[i].params[1].p; - } - - /* initialize the Lpo node */ - rf_InitNode(lpoNode, rf_wait, RF_FALSE, rf_ParityLogOverwriteFunc, rf_ParityLogOverwriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Lpo", allocList); - - lpoNode->params[0].p = asmap->parityInfo; - lpoNode->params[1].p = xorNode->results[0]; - RF_ASSERT(asmap->parityInfo->next == NULL); /* parityInfo must describe entire parity unit */ - - /* connect nodes to form graph */ - - /* connect dag header to block node */ - RF_ASSERT(dag_h->numSuccedents == 1); - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - /* connect the block node to the Rod nodes */ - RF_ASSERT(blockNode->numSuccedents == nRodNodes + 1); - for (i = 0; i < nRodNodes; i++) { - RF_ASSERT(rodNodes[i].numAntecedents == 1); - blockNode->succedents[i] = &rodNodes[i]; - rodNodes[i].antecedents[0] = blockNode; - rodNodes[i].antType[0] = rf_control; - } - - /* connect the block node to the sync node */ - /* necessary if nRodNodes == 0 */ - RF_ASSERT(syncNode->numAntecedents == nRodNodes + 1); - blockNode->succedents[nRodNodes] = syncNode; - syncNode->antecedents[0] = blockNode; - syncNode->antType[0] = rf_control; - - /* connect the Rod nodes to the syncNode */ - for (i = 0; i < nRodNodes; i++) { - rodNodes[i].succedents[0] = syncNode; - syncNode->antecedents[1 + i] = &rodNodes[i]; - syncNode->antType[1 + i] = rf_control; - } - - /* connect the sync node to the xor node */ - RF_ASSERT(syncNode->numSuccedents == nWndNodes + 1); - RF_ASSERT(xorNode->numAntecedents == 1); - syncNode->succedents[0] = xorNode; - xorNode->antecedents[0] = syncNode; - xorNode->antType[0] = rf_trueData; /* carry forward from sync */ - - /* connect the sync node to the Wnd nodes */ - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNodes->numAntecedents == 1); - syncNode->succedents[1 + i] = &wndNodes[i]; - wndNodes[i].antecedents[0] = syncNode; - wndNodes[i].antType[0] = rf_control; - } - - /* connect the xor node to the Lpo node */ - RF_ASSERT(xorNode->numSuccedents == 1); - RF_ASSERT(lpoNode->numAntecedents == 1); - xorNode->succedents[0] = lpoNode; - lpoNode->antecedents[0]= xorNode; - lpoNode->antType[0] = rf_trueData; - - /* connect the Wnd nodes to the unblock node */ - RF_ASSERT(unblockNode->numAntecedents == nWndNodes + 1); - for (i = 0; i < nWndNodes; i++) { - RF_ASSERT(wndNodes->numSuccedents == 1); - wndNodes[i].succedents[0] = unblockNode; - unblockNode->antecedents[i] = &wndNodes[i]; - unblockNode->antType[i] = rf_control; - } - - /* connect the Lpo node to the unblock node */ - RF_ASSERT(lpoNode->numSuccedents == 1); - lpoNode->succedents[0] = unblockNode; - unblockNode->antecedents[nWndNodes] = lpoNode; - unblockNode->antType[nWndNodes] = rf_control; - - /* connect unblock node to terminator */ - RF_ASSERT(unblockNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - unblockNode->succedents[0] = termNode; - termNode->antecedents[0] = unblockNode; - termNode->antType[0] = rf_control; + RF_DagNode_t *nodes, *wndNodes, *rodNodes = NULL, *syncNode, *xorNode, + *lpoNode, *blockNode, *unblockNode, *termNode; + int nWndNodes, nRodNodes, i; + RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); + RF_AccessStripeMapHeader_t *new_asm_h[2]; + int nodeNum, asmNum; + RF_ReconUnitNum_t which_ru; + char *sosBuffer, *eosBuffer; + RF_PhysDiskAddr_t *pda; + RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru); + + if (rf_dagDebug) + printf("[Creating parity-logging large-write DAG]\n"); + RF_ASSERT(nfaults == 1);/* this arch only single fault tolerant */ + dag_h->creator = "ParityLoggingLargeWriteDAG"; + + /* alloc the Wnd nodes, the xor node, and the Lpo node */ + nWndNodes = asmap->numStripeUnitsAccessed; + RF_CallocAndAdd(nodes, nWndNodes + 6, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); + i = 0; + wndNodes = &nodes[i]; + i += nWndNodes; + xorNode = &nodes[i]; + i += 1; + lpoNode = &nodes[i]; + i += 1; + blockNode = &nodes[i]; + i += 1; + syncNode = &nodes[i]; + i += 1; + unblockNode = &nodes[i]; + i += 1; + termNode = &nodes[i]; + i += 1; + + dag_h->numCommitNodes = nWndNodes + 1; + dag_h->numCommits = 0; + dag_h->numSuccedents = 1; + + rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h, new_asm_h, &nRodNodes, &sosBuffer, &eosBuffer, allocList); + if (nRodNodes > 0) + RF_CallocAndAdd(rodNodes, nRodNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); + + /* begin node initialization */ + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nRodNodes + 1, 0, 0, 0, dag_h, "Nil", allocList); + rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nWndNodes + 1, 0, 0, dag_h, "Nil", allocList); + rf_InitNode(syncNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nWndNodes + 1, nRodNodes + 1, 0, 0, dag_h, "Nil", allocList); + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); + + /* initialize the Rod nodes */ + for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) { + if (new_asm_h[asmNum]) { + pda = new_asm_h[asmNum]->stripeMap->physInfo; + while (pda) { + rf_InitNode(&rodNodes[nodeNum], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rod", allocList); + rodNodes[nodeNum].params[0].p = pda; + rodNodes[nodeNum].params[1].p = pda->bufPtr; + rodNodes[nodeNum].params[2].v = parityStripeID; + rodNodes[nodeNum].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + nodeNum++; + pda = pda->next; + } + } + } + RF_ASSERT(nodeNum == nRodNodes); + + /* initialize the wnd nodes */ + pda = asmap->physInfo; + for (i = 0; i < nWndNodes; i++) { + rf_InitNode(&wndNodes[i], rf_wait, RF_TRUE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList); + RF_ASSERT(pda != NULL); + wndNodes[i].params[0].p = pda; + wndNodes[i].params[1].p = pda->bufPtr; + wndNodes[i].params[2].v = parityStripeID; + wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + pda = pda->next; + } + + /* initialize the redundancy node */ + rf_InitNode(xorNode, rf_wait, RF_TRUE, redFunc, rf_NullNodeUndoFunc, NULL, 1, 1, 2 * (nWndNodes + nRodNodes) + 1, 1, dag_h, "Xr ", allocList); + xorNode->flags |= RF_DAGNODE_FLAG_YIELD; + for (i = 0; i < nWndNodes; i++) { + xorNode->params[2 * i + 0] = wndNodes[i].params[0]; /* pda */ + xorNode->params[2 * i + 1] = wndNodes[i].params[1]; /* buf ptr */ + } + for (i = 0; i < nRodNodes; i++) { + xorNode->params[2 * (nWndNodes + i) + 0] = rodNodes[i].params[0]; /* pda */ + xorNode->params[2 * (nWndNodes + i) + 1] = rodNodes[i].params[1]; /* buf ptr */ + } + xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr; /* xor node needs to get + * at RAID information */ + + /* look for an Rod node that reads a complete SU. If none, alloc a + * buffer to receive the parity info. Note that we can't use a new + * data buffer because it will not have gotten written when the xor + * occurs. */ + for (i = 0; i < nRodNodes; i++) + if (((RF_PhysDiskAddr_t *) rodNodes[i].params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit) + break; + if (i == nRodNodes) { + RF_CallocAndAdd(xorNode->results[0], 1, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (void *), allocList); + } else { + xorNode->results[0] = rodNodes[i].params[1].p; + } + + /* initialize the Lpo node */ + rf_InitNode(lpoNode, rf_wait, RF_FALSE, rf_ParityLogOverwriteFunc, rf_ParityLogOverwriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Lpo", allocList); + + lpoNode->params[0].p = asmap->parityInfo; + lpoNode->params[1].p = xorNode->results[0]; + RF_ASSERT(asmap->parityInfo->next == NULL); /* parityInfo must + * describe entire + * parity unit */ + + /* connect nodes to form graph */ + + /* connect dag header to block node */ + RF_ASSERT(dag_h->numSuccedents == 1); + RF_ASSERT(blockNode->numAntecedents == 0); + dag_h->succedents[0] = blockNode; + + /* connect the block node to the Rod nodes */ + RF_ASSERT(blockNode->numSuccedents == nRodNodes + 1); + for (i = 0; i < nRodNodes; i++) { + RF_ASSERT(rodNodes[i].numAntecedents == 1); + blockNode->succedents[i] = &rodNodes[i]; + rodNodes[i].antecedents[0] = blockNode; + rodNodes[i].antType[0] = rf_control; + } + + /* connect the block node to the sync node */ + /* necessary if nRodNodes == 0 */ + RF_ASSERT(syncNode->numAntecedents == nRodNodes + 1); + blockNode->succedents[nRodNodes] = syncNode; + syncNode->antecedents[0] = blockNode; + syncNode->antType[0] = rf_control; + + /* connect the Rod nodes to the syncNode */ + for (i = 0; i < nRodNodes; i++) { + rodNodes[i].succedents[0] = syncNode; + syncNode->antecedents[1 + i] = &rodNodes[i]; + syncNode->antType[1 + i] = rf_control; + } + + /* connect the sync node to the xor node */ + RF_ASSERT(syncNode->numSuccedents == nWndNodes + 1); + RF_ASSERT(xorNode->numAntecedents == 1); + syncNode->succedents[0] = xorNode; + xorNode->antecedents[0] = syncNode; + xorNode->antType[0] = rf_trueData; /* carry forward from sync */ + + /* connect the sync node to the Wnd nodes */ + for (i = 0; i < nWndNodes; i++) { + RF_ASSERT(wndNodes->numAntecedents == 1); + syncNode->succedents[1 + i] = &wndNodes[i]; + wndNodes[i].antecedents[0] = syncNode; + wndNodes[i].antType[0] = rf_control; + } + + /* connect the xor node to the Lpo node */ + RF_ASSERT(xorNode->numSuccedents == 1); + RF_ASSERT(lpoNode->numAntecedents == 1); + xorNode->succedents[0] = lpoNode; + lpoNode->antecedents[0] = xorNode; + lpoNode->antType[0] = rf_trueData; + + /* connect the Wnd nodes to the unblock node */ + RF_ASSERT(unblockNode->numAntecedents == nWndNodes + 1); + for (i = 0; i < nWndNodes; i++) { + RF_ASSERT(wndNodes->numSuccedents == 1); + wndNodes[i].succedents[0] = unblockNode; + unblockNode->antecedents[i] = &wndNodes[i]; + unblockNode->antType[i] = rf_control; + } + + /* connect the Lpo node to the unblock node */ + RF_ASSERT(lpoNode->numSuccedents == 1); + lpoNode->succedents[0] = unblockNode; + unblockNode->antecedents[nWndNodes] = lpoNode; + unblockNode->antType[nWndNodes] = rf_control; + + /* connect unblock node to terminator */ + RF_ASSERT(unblockNode->numSuccedents == 1); + RF_ASSERT(termNode->numAntecedents == 1); + RF_ASSERT(termNode->numSuccedents == 0); + unblockNode->succedents[0] = termNode; + termNode->antecedents[0] = unblockNode; + termNode->antType[0] = rf_control; } @@ -382,20 +279,20 @@ void rf_CommonCreateParityLoggingLargeWriteDAG( * Header * | * Block - * / | ... \ \ - * / | \ \ + * / | ... \ \ + * / | \ \ * Rod Rod Rod Rop - * | \ /| \ / | \/ | - * | | | /\ | - * Wnd Wnd Wnd X - * | \ / | - * | \ / | + * | \ /| \ / | \/ | + * | | | /\ | + * Wnd Wnd Wnd X + * | \ / | + * | \ / | * \ \ / Lpo - * \ \ / / - * +-> Unblock <-+ + * \ \ / / + * +-> Unblock <-+ * | * T - * + * * * R = Read, W = Write, X = Xor, o = old, n = new, d = data, p = parity. * When the access spans a stripe unit boundary and is less than one SU in size, there will @@ -413,340 +310,365 @@ void rf_CommonCreateParityLoggingLargeWriteDAG( * A null qfuncs indicates single fault tolerant *****************************************************************************/ -void rf_CommonCreateParityLoggingSmallWriteDAG( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList, - RF_RedFuncs_t *pfuncs, - RF_RedFuncs_t *qfuncs) +void +rf_CommonCreateParityLoggingSmallWriteDAG( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList, + RF_RedFuncs_t * pfuncs, + RF_RedFuncs_t * qfuncs) { - RF_DagNode_t *xorNodes, *blockNode, *unblockNode, *nodes; - RF_DagNode_t *readDataNodes, *readParityNodes; - RF_DagNode_t *writeDataNodes, *lpuNodes; - RF_DagNode_t *unlockDataNodes=NULL, *termNode; - RF_PhysDiskAddr_t *pda = asmap->physInfo; - int numDataNodes = asmap->numStripeUnitsAccessed; - int numParityNodes = (asmap->parityInfo->next) ? 2 : 1; - int i, j, nNodes, totalNumNodes; - RF_ReconUnitNum_t which_ru; - int (*func)(RF_DagNode_t *node), (*undoFunc)(RF_DagNode_t *node); - int (*qfunc)(RF_DagNode_t *node); - char *name, *qname; - RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru); - long nfaults = qfuncs ? 2 : 1; - int lu_flag = (rf_enableAtomicRMW) ? 1 : 0; /* lock/unlock flag */ - - if (rf_dagDebug) printf("[Creating parity-logging small-write DAG]\n"); - RF_ASSERT(numDataNodes > 0); - RF_ASSERT(nfaults == 1); - dag_h->creator = "ParityLoggingSmallWriteDAG"; - - /* DAG creation occurs in three steps: - 1. count the number of nodes in the DAG - 2. create the nodes - 3. initialize the nodes - 4. connect the nodes - */ - - /* Step 1. compute number of nodes in the graph */ - - /* number of nodes: - a read and write for each data unit - a redundancy computation node for each parity node - a read and Lpu for each parity unit - a block and unblock node (2) - a terminator node - if atomic RMW - an unlock node for each data unit, redundancy unit - */ - totalNumNodes = (2 * numDataNodes) + numParityNodes + (2 * numParityNodes) + 3; - if (lu_flag) - totalNumNodes += numDataNodes; - - nNodes = numDataNodes + numParityNodes; - - dag_h->numCommitNodes = numDataNodes + numParityNodes; - dag_h->numCommits = 0; - dag_h->numSuccedents = 1; - - /* Step 2. create the nodes */ - RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - i = 0; - blockNode = &nodes[i]; i += 1; - unblockNode = &nodes[i]; i += 1; - readDataNodes = &nodes[i]; i += numDataNodes; - readParityNodes = &nodes[i]; i += numParityNodes; - writeDataNodes = &nodes[i]; i += numDataNodes; - lpuNodes = &nodes[i]; i += numParityNodes; - xorNodes = &nodes[i]; i += numParityNodes; - termNode = &nodes[i]; i += 1; - if (lu_flag) { - unlockDataNodes = &nodes[i]; i += numDataNodes; - } - RF_ASSERT(i == totalNumNodes); - - /* Step 3. initialize the nodes */ - /* initialize block node (Nil) */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", allocList); - - /* initialize unblock node (Nil) */ - rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nNodes, 0, 0, dag_h, "Nil", allocList); - - /* initialize terminatory node (Trm) */ - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - - /* initialize nodes which read old data (Rod) */ - for (i = 0; i < numDataNodes; i++) { - rf_InitNode(&readDataNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, nNodes, 1, 4, 0, dag_h, "Rod", allocList); - RF_ASSERT(pda != NULL); - readDataNodes[i].params[0].p = pda; /* physical disk addr desc */ - readDataNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old data */ - readDataNodes[i].params[2].v = parityStripeID; - readDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru); - pda=pda->next; - readDataNodes[i].propList[0] = NULL; - readDataNodes[i].propList[1] = NULL; - } - - /* initialize nodes which read old parity (Rop) */ - pda = asmap->parityInfo; i = 0; - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(pda != NULL); - rf_InitNode(&readParityNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, nNodes, 1, 4, 0, dag_h, "Rop", allocList); - readParityNodes[i].params[0].p = pda; - readParityNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old parity */ - readParityNodes[i].params[2].v = parityStripeID; - readParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - readParityNodes[i].propList[0] = NULL; - pda=pda->next; - } - - /* initialize nodes which write new data (Wnd) */ - pda = asmap->physInfo; - for (i=0; i < numDataNodes; i++) { - RF_ASSERT(pda != NULL); - rf_InitNode(&writeDataNodes[i], rf_wait, RF_TRUE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, nNodes, 4, 0, dag_h, "Wnd", allocList); - writeDataNodes[i].params[0].p = pda; /* physical disk addr desc */ - writeDataNodes[i].params[1].p = pda->bufPtr; /* buffer holding new data to be written */ - writeDataNodes[i].params[2].v = parityStripeID; - writeDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - - if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockDataNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Und", allocList); - unlockDataNodes[i].params[0].p = pda; /* physical disk addr desc */ - unlockDataNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru); - } - pda = pda->next; - } - - - /* initialize nodes which compute new parity */ - /* we use the simple XOR func in the double-XOR case, and when we're accessing only a portion of one stripe unit. - * the distinction between the two is that the regular XOR func assumes that the targbuf is a full SU in size, - * and examines the pda associated with the buffer to decide where within the buffer to XOR the data, whereas - * the simple XOR func just XORs the data into the start of the buffer. - */ - if ((numParityNodes==2) || ((numDataNodes == 1) && (asmap->totalSectorsAccessed < raidPtr->Layout.sectorsPerStripeUnit))) { - func = pfuncs->simple; undoFunc = rf_NullNodeUndoFunc; name = pfuncs->SimpleName; - if (qfuncs) - { qfunc = qfuncs->simple; qname = qfuncs->SimpleName;} - } else { - func = pfuncs->regular; undoFunc = rf_NullNodeUndoFunc; name = pfuncs->RegularName; - if (qfuncs) { qfunc = qfuncs->regular; qname = qfuncs->RegularName;} - } - /* initialize the xor nodes: params are {pda,buf} from {Rod,Wnd,Rop} nodes, and raidPtr */ - if (numParityNodes==2) { /* double-xor case */ - for (i=0; i < numParityNodes; i++) { - rf_InitNode(&xorNodes[i], rf_wait, RF_TRUE, func, undoFunc, NULL, 1, nNodes, 7, 1, dag_h, name, allocList); /* no wakeup func for xor */ - xorNodes[i].flags |= RF_DAGNODE_FLAG_YIELD; - xorNodes[i].params[0] = readDataNodes[i].params[0]; - xorNodes[i].params[1] = readDataNodes[i].params[1]; - xorNodes[i].params[2] = readParityNodes[i].params[0]; - xorNodes[i].params[3] = readParityNodes[i].params[1]; - xorNodes[i].params[4] = writeDataNodes[i].params[0]; - xorNodes[i].params[5] = writeDataNodes[i].params[1]; - xorNodes[i].params[6].p = raidPtr; - xorNodes[i].results[0] = readParityNodes[i].params[1].p; /* use old parity buf as target buf */ - } - } - else { - /* there is only one xor node in this case */ - rf_InitNode(&xorNodes[0], rf_wait, RF_TRUE, func, undoFunc, NULL, 1, nNodes, (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, name, allocList); - xorNodes[0].flags |= RF_DAGNODE_FLAG_YIELD; - for (i=0; i < numDataNodes + 1; i++) { - /* set up params related to Rod and Rop nodes */ - xorNodes[0].params[2*i+0] = readDataNodes[i].params[0]; /* pda */ - xorNodes[0].params[2*i+1] = readDataNodes[i].params[1]; /* buffer pointer */ - } - for (i=0; i < numDataNodes; i++) { - /* set up params related to Wnd and Wnp nodes */ - xorNodes[0].params[2*(numDataNodes+1+i)+0] = writeDataNodes[i].params[0]; /* pda */ - xorNodes[0].params[2*(numDataNodes+1+i)+1] = writeDataNodes[i].params[1]; /* buffer pointer */ - } - xorNodes[0].params[2*(numDataNodes+numDataNodes+1)].p = raidPtr; /* xor node needs to get at RAID information */ - xorNodes[0].results[0] = readParityNodes[0].params[1].p; - } - - /* initialize the log node(s) */ - pda = asmap->parityInfo; - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(pda); - rf_InitNode(&lpuNodes[i], rf_wait, RF_FALSE, rf_ParityLogUpdateFunc, rf_ParityLogUpdateUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Lpu", allocList); - lpuNodes[i].params[0].p = pda; /* PhysDiskAddr of parity */ - lpuNodes[i].params[1].p = xorNodes[i].results[0]; /* buffer pointer to parity */ - pda = pda->next; - } - - - /* Step 4. connect the nodes */ - - /* connect header to block node */ - RF_ASSERT(dag_h->numSuccedents == 1); - RF_ASSERT(blockNode->numAntecedents == 0); - dag_h->succedents[0] = blockNode; - - /* connect block node to read old data nodes */ - RF_ASSERT(blockNode->numSuccedents == (numDataNodes + numParityNodes)); - for (i = 0; i < numDataNodes; i++) { - blockNode->succedents[i] = &readDataNodes[i]; - RF_ASSERT(readDataNodes[i].numAntecedents == 1); - readDataNodes[i].antecedents[0]= blockNode; - readDataNodes[i].antType[0] = rf_control; - } - - /* connect block node to read old parity nodes */ - for (i = 0; i < numParityNodes; i++) { - blockNode->succedents[numDataNodes + i] = &readParityNodes[i]; - RF_ASSERT(readParityNodes[i].numAntecedents == 1); - readParityNodes[i].antecedents[0] = blockNode; - readParityNodes[i].antType[0] = rf_control; - } - - /* connect read old data nodes to write new data nodes */ - for (i = 0; i < numDataNodes; i++) { - RF_ASSERT(readDataNodes[i].numSuccedents == numDataNodes + numParityNodes); - for (j = 0; j < numDataNodes; j++) { - RF_ASSERT(writeDataNodes[j].numAntecedents == numDataNodes + numParityNodes); - readDataNodes[i].succedents[j] = &writeDataNodes[j]; - writeDataNodes[j].antecedents[i] = &readDataNodes[i]; - if (i == j) - writeDataNodes[j].antType[i] = rf_antiData; - else - writeDataNodes[j].antType[i] = rf_control; - } - } - - /* connect read old data nodes to xor nodes */ - for (i = 0; i < numDataNodes; i++) - for (j = 0; j < numParityNodes; j++){ - RF_ASSERT(xorNodes[j].numAntecedents == numDataNodes + numParityNodes); - readDataNodes[i].succedents[numDataNodes + j] = &xorNodes[j]; - xorNodes[j].antecedents[i] = &readDataNodes[i]; - xorNodes[j].antType[i] = rf_trueData; - } - - /* connect read old parity nodes to write new data nodes */ - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(readParityNodes[i].numSuccedents == numDataNodes + numParityNodes); - for (j = 0; j < numDataNodes; j++) { - readParityNodes[i].succedents[j] = &writeDataNodes[j]; - writeDataNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i]; - writeDataNodes[j].antType[numDataNodes + i] = rf_control; - } - } - - /* connect read old parity nodes to xor nodes */ - for (i = 0; i < numParityNodes; i++) - for (j = 0; j < numParityNodes; j++) { - readParityNodes[i].succedents[numDataNodes + j] = &xorNodes[j]; - xorNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i]; - xorNodes[j].antType[numDataNodes + i] = rf_trueData; - } - - /* connect xor nodes to write new parity nodes */ - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(xorNodes[i].numSuccedents == 1); - RF_ASSERT(lpuNodes[i].numAntecedents == 1); - xorNodes[i].succedents[0] = &lpuNodes[i]; - lpuNodes[i].antecedents[0] = &xorNodes[i]; - lpuNodes[i].antType[0] = rf_trueData; - } - - for (i = 0; i < numDataNodes; i++) { - if (lu_flag) { - /* connect write new data nodes to unlock nodes */ - RF_ASSERT(writeDataNodes[i].numSuccedents == 1); - RF_ASSERT(unlockDataNodes[i].numAntecedents == 1); - writeDataNodes[i].succedents[0] = &unlockDataNodes[i]; - unlockDataNodes[i].antecedents[0] = &writeDataNodes[i]; - unlockDataNodes[i].antType[0] = rf_control; - - /* connect unlock nodes to unblock node */ - RF_ASSERT(unlockDataNodes[i].numSuccedents == 1); - RF_ASSERT(unblockNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); - unlockDataNodes[i].succedents[0] = unblockNode; - unblockNode->antecedents[i] = &unlockDataNodes[i]; - unblockNode->antType[i] = rf_control; - } - else { - /* connect write new data nodes to unblock node */ - RF_ASSERT(writeDataNodes[i].numSuccedents == 1); - RF_ASSERT(unblockNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); - writeDataNodes[i].succedents[0] = unblockNode; - unblockNode->antecedents[i] = &writeDataNodes[i]; - unblockNode->antType[i] = rf_control; - } - } - - /* connect write new parity nodes to unblock node */ - for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(lpuNodes[i].numSuccedents == 1); - lpuNodes[i].succedents[0] = unblockNode; - unblockNode->antecedents[numDataNodes + i] = &lpuNodes[i]; - unblockNode->antType[numDataNodes + i] = rf_control; - } - - /* connect unblock node to terminator */ - RF_ASSERT(unblockNode->numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == 1); - RF_ASSERT(termNode->numSuccedents == 0); - unblockNode->succedents[0] = termNode; - termNode->antecedents[0] = unblockNode; - termNode->antType[0] = rf_control; + RF_DagNode_t *xorNodes, *blockNode, *unblockNode, *nodes; + RF_DagNode_t *readDataNodes, *readParityNodes; + RF_DagNode_t *writeDataNodes, *lpuNodes; + RF_DagNode_t *unlockDataNodes = NULL, *termNode; + RF_PhysDiskAddr_t *pda = asmap->physInfo; + int numDataNodes = asmap->numStripeUnitsAccessed; + int numParityNodes = (asmap->parityInfo->next) ? 2 : 1; + int i, j, nNodes, totalNumNodes; + RF_ReconUnitNum_t which_ru; + int (*func) (RF_DagNode_t * node), (*undoFunc) (RF_DagNode_t * node); + int (*qfunc) (RF_DagNode_t * node); + char *name, *qname; + RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru); + long nfaults = qfuncs ? 2 : 1; + int lu_flag = (rf_enableAtomicRMW) ? 1 : 0; /* lock/unlock flag */ + + if (rf_dagDebug) + printf("[Creating parity-logging small-write DAG]\n"); + RF_ASSERT(numDataNodes > 0); + RF_ASSERT(nfaults == 1); + dag_h->creator = "ParityLoggingSmallWriteDAG"; + + /* DAG creation occurs in three steps: 1. count the number of nodes in + * the DAG 2. create the nodes 3. initialize the nodes 4. connect the + * nodes */ + + /* Step 1. compute number of nodes in the graph */ + + /* number of nodes: a read and write for each data unit a redundancy + * computation node for each parity node a read and Lpu for each + * parity unit a block and unblock node (2) a terminator node if + * atomic RMW an unlock node for each data unit, redundancy unit */ + totalNumNodes = (2 * numDataNodes) + numParityNodes + (2 * numParityNodes) + 3; + if (lu_flag) + totalNumNodes += numDataNodes; + + nNodes = numDataNodes + numParityNodes; + + dag_h->numCommitNodes = numDataNodes + numParityNodes; + dag_h->numCommits = 0; + dag_h->numSuccedents = 1; + + /* Step 2. create the nodes */ + RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); + i = 0; + blockNode = &nodes[i]; + i += 1; + unblockNode = &nodes[i]; + i += 1; + readDataNodes = &nodes[i]; + i += numDataNodes; + readParityNodes = &nodes[i]; + i += numParityNodes; + writeDataNodes = &nodes[i]; + i += numDataNodes; + lpuNodes = &nodes[i]; + i += numParityNodes; + xorNodes = &nodes[i]; + i += numParityNodes; + termNode = &nodes[i]; + i += 1; + if (lu_flag) { + unlockDataNodes = &nodes[i]; + i += numDataNodes; + } + RF_ASSERT(i == totalNumNodes); + + /* Step 3. initialize the nodes */ + /* initialize block node (Nil) */ + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", allocList); + + /* initialize unblock node (Nil) */ + rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nNodes, 0, 0, dag_h, "Nil", allocList); + + /* initialize terminatory node (Trm) */ + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); + + /* initialize nodes which read old data (Rod) */ + for (i = 0; i < numDataNodes; i++) { + rf_InitNode(&readDataNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, nNodes, 1, 4, 0, dag_h, "Rod", allocList); + RF_ASSERT(pda != NULL); + readDataNodes[i].params[0].p = pda; /* physical disk addr + * desc */ + readDataNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old + * data */ + readDataNodes[i].params[2].v = parityStripeID; + readDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru); + pda = pda->next; + readDataNodes[i].propList[0] = NULL; + readDataNodes[i].propList[1] = NULL; + } + + /* initialize nodes which read old parity (Rop) */ + pda = asmap->parityInfo; + i = 0; + for (i = 0; i < numParityNodes; i++) { + RF_ASSERT(pda != NULL); + rf_InitNode(&readParityNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, nNodes, 1, 4, 0, dag_h, "Rop", allocList); + readParityNodes[i].params[0].p = pda; + readParityNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old + * parity */ + readParityNodes[i].params[2].v = parityStripeID; + readParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + readParityNodes[i].propList[0] = NULL; + pda = pda->next; + } + + /* initialize nodes which write new data (Wnd) */ + pda = asmap->physInfo; + for (i = 0; i < numDataNodes; i++) { + RF_ASSERT(pda != NULL); + rf_InitNode(&writeDataNodes[i], rf_wait, RF_TRUE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, nNodes, 4, 0, dag_h, "Wnd", allocList); + writeDataNodes[i].params[0].p = pda; /* physical disk addr + * desc */ + writeDataNodes[i].params[1].p = pda->bufPtr; /* buffer holding new + * data to be written */ + writeDataNodes[i].params[2].v = parityStripeID; + writeDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + + if (lu_flag) { + /* initialize node to unlock the disk queue */ + rf_InitNode(&unlockDataNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Und", allocList); + unlockDataNodes[i].params[0].p = pda; /* physical disk addr + * desc */ + unlockDataNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru); + } + pda = pda->next; + } + + + /* initialize nodes which compute new parity */ + /* we use the simple XOR func in the double-XOR case, and when we're + * accessing only a portion of one stripe unit. the distinction + * between the two is that the regular XOR func assumes that the + * targbuf is a full SU in size, and examines the pda associated with + * the buffer to decide where within the buffer to XOR the data, + * whereas the simple XOR func just XORs the data into the start of + * the buffer. */ + if ((numParityNodes == 2) || ((numDataNodes == 1) && (asmap->totalSectorsAccessed < raidPtr->Layout.sectorsPerStripeUnit))) { + func = pfuncs->simple; + undoFunc = rf_NullNodeUndoFunc; + name = pfuncs->SimpleName; + if (qfuncs) { + qfunc = qfuncs->simple; + qname = qfuncs->SimpleName; + } + } else { + func = pfuncs->regular; + undoFunc = rf_NullNodeUndoFunc; + name = pfuncs->RegularName; + if (qfuncs) { + qfunc = qfuncs->regular; + qname = qfuncs->RegularName; + } + } + /* initialize the xor nodes: params are {pda,buf} from {Rod,Wnd,Rop} + * nodes, and raidPtr */ + if (numParityNodes == 2) { /* double-xor case */ + for (i = 0; i < numParityNodes; i++) { + rf_InitNode(&xorNodes[i], rf_wait, RF_TRUE, func, undoFunc, NULL, 1, nNodes, 7, 1, dag_h, name, allocList); /* no wakeup func for + * xor */ + xorNodes[i].flags |= RF_DAGNODE_FLAG_YIELD; + xorNodes[i].params[0] = readDataNodes[i].params[0]; + xorNodes[i].params[1] = readDataNodes[i].params[1]; + xorNodes[i].params[2] = readParityNodes[i].params[0]; + xorNodes[i].params[3] = readParityNodes[i].params[1]; + xorNodes[i].params[4] = writeDataNodes[i].params[0]; + xorNodes[i].params[5] = writeDataNodes[i].params[1]; + xorNodes[i].params[6].p = raidPtr; + xorNodes[i].results[0] = readParityNodes[i].params[1].p; /* use old parity buf as + * target buf */ + } + } else { + /* there is only one xor node in this case */ + rf_InitNode(&xorNodes[0], rf_wait, RF_TRUE, func, undoFunc, NULL, 1, nNodes, (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, name, allocList); + xorNodes[0].flags |= RF_DAGNODE_FLAG_YIELD; + for (i = 0; i < numDataNodes + 1; i++) { + /* set up params related to Rod and Rop nodes */ + xorNodes[0].params[2 * i + 0] = readDataNodes[i].params[0]; /* pda */ + xorNodes[0].params[2 * i + 1] = readDataNodes[i].params[1]; /* buffer pointer */ + } + for (i = 0; i < numDataNodes; i++) { + /* set up params related to Wnd and Wnp nodes */ + xorNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = writeDataNodes[i].params[0]; /* pda */ + xorNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = writeDataNodes[i].params[1]; /* buffer pointer */ + } + xorNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr; /* xor node needs to get + * at RAID information */ + xorNodes[0].results[0] = readParityNodes[0].params[1].p; + } + + /* initialize the log node(s) */ + pda = asmap->parityInfo; + for (i = 0; i < numParityNodes; i++) { + RF_ASSERT(pda); + rf_InitNode(&lpuNodes[i], rf_wait, RF_FALSE, rf_ParityLogUpdateFunc, rf_ParityLogUpdateUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Lpu", allocList); + lpuNodes[i].params[0].p = pda; /* PhysDiskAddr of parity */ + lpuNodes[i].params[1].p = xorNodes[i].results[0]; /* buffer pointer to + * parity */ + pda = pda->next; + } + + + /* Step 4. connect the nodes */ + + /* connect header to block node */ + RF_ASSERT(dag_h->numSuccedents == 1); + RF_ASSERT(blockNode->numAntecedents == 0); + dag_h->succedents[0] = blockNode; + + /* connect block node to read old data nodes */ + RF_ASSERT(blockNode->numSuccedents == (numDataNodes + numParityNodes)); + for (i = 0; i < numDataNodes; i++) { + blockNode->succedents[i] = &readDataNodes[i]; + RF_ASSERT(readDataNodes[i].numAntecedents == 1); + readDataNodes[i].antecedents[0] = blockNode; + readDataNodes[i].antType[0] = rf_control; + } + + /* connect block node to read old parity nodes */ + for (i = 0; i < numParityNodes; i++) { + blockNode->succedents[numDataNodes + i] = &readParityNodes[i]; + RF_ASSERT(readParityNodes[i].numAntecedents == 1); + readParityNodes[i].antecedents[0] = blockNode; + readParityNodes[i].antType[0] = rf_control; + } + + /* connect read old data nodes to write new data nodes */ + for (i = 0; i < numDataNodes; i++) { + RF_ASSERT(readDataNodes[i].numSuccedents == numDataNodes + numParityNodes); + for (j = 0; j < numDataNodes; j++) { + RF_ASSERT(writeDataNodes[j].numAntecedents == numDataNodes + numParityNodes); + readDataNodes[i].succedents[j] = &writeDataNodes[j]; + writeDataNodes[j].antecedents[i] = &readDataNodes[i]; + if (i == j) + writeDataNodes[j].antType[i] = rf_antiData; + else + writeDataNodes[j].antType[i] = rf_control; + } + } + + /* connect read old data nodes to xor nodes */ + for (i = 0; i < numDataNodes; i++) + for (j = 0; j < numParityNodes; j++) { + RF_ASSERT(xorNodes[j].numAntecedents == numDataNodes + numParityNodes); + readDataNodes[i].succedents[numDataNodes + j] = &xorNodes[j]; + xorNodes[j].antecedents[i] = &readDataNodes[i]; + xorNodes[j].antType[i] = rf_trueData; + } + + /* connect read old parity nodes to write new data nodes */ + for (i = 0; i < numParityNodes; i++) { + RF_ASSERT(readParityNodes[i].numSuccedents == numDataNodes + numParityNodes); + for (j = 0; j < numDataNodes; j++) { + readParityNodes[i].succedents[j] = &writeDataNodes[j]; + writeDataNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i]; + writeDataNodes[j].antType[numDataNodes + i] = rf_control; + } + } + + /* connect read old parity nodes to xor nodes */ + for (i = 0; i < numParityNodes; i++) + for (j = 0; j < numParityNodes; j++) { + readParityNodes[i].succedents[numDataNodes + j] = &xorNodes[j]; + xorNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i]; + xorNodes[j].antType[numDataNodes + i] = rf_trueData; + } + + /* connect xor nodes to write new parity nodes */ + for (i = 0; i < numParityNodes; i++) { + RF_ASSERT(xorNodes[i].numSuccedents == 1); + RF_ASSERT(lpuNodes[i].numAntecedents == 1); + xorNodes[i].succedents[0] = &lpuNodes[i]; + lpuNodes[i].antecedents[0] = &xorNodes[i]; + lpuNodes[i].antType[0] = rf_trueData; + } + + for (i = 0; i < numDataNodes; i++) { + if (lu_flag) { + /* connect write new data nodes to unlock nodes */ + RF_ASSERT(writeDataNodes[i].numSuccedents == 1); + RF_ASSERT(unlockDataNodes[i].numAntecedents == 1); + writeDataNodes[i].succedents[0] = &unlockDataNodes[i]; + unlockDataNodes[i].antecedents[0] = &writeDataNodes[i]; + unlockDataNodes[i].antType[0] = rf_control; + + /* connect unlock nodes to unblock node */ + RF_ASSERT(unlockDataNodes[i].numSuccedents == 1); + RF_ASSERT(unblockNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); + unlockDataNodes[i].succedents[0] = unblockNode; + unblockNode->antecedents[i] = &unlockDataNodes[i]; + unblockNode->antType[i] = rf_control; + } else { + /* connect write new data nodes to unblock node */ + RF_ASSERT(writeDataNodes[i].numSuccedents == 1); + RF_ASSERT(unblockNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); + writeDataNodes[i].succedents[0] = unblockNode; + unblockNode->antecedents[i] = &writeDataNodes[i]; + unblockNode->antType[i] = rf_control; + } + } + + /* connect write new parity nodes to unblock node */ + for (i = 0; i < numParityNodes; i++) { + RF_ASSERT(lpuNodes[i].numSuccedents == 1); + lpuNodes[i].succedents[0] = unblockNode; + unblockNode->antecedents[numDataNodes + i] = &lpuNodes[i]; + unblockNode->antType[numDataNodes + i] = rf_control; + } + + /* connect unblock node to terminator */ + RF_ASSERT(unblockNode->numSuccedents == 1); + RF_ASSERT(termNode->numAntecedents == 1); + RF_ASSERT(termNode->numSuccedents == 0); + unblockNode->succedents[0] = termNode; + termNode->antecedents[0] = unblockNode; + termNode->antType[0] = rf_control; } -void rf_CreateParityLoggingSmallWriteDAG( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList, - RF_RedFuncs_t *pfuncs, - RF_RedFuncs_t *qfuncs) +void +rf_CreateParityLoggingSmallWriteDAG( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList, + RF_RedFuncs_t * pfuncs, + RF_RedFuncs_t * qfuncs) { - dag_h->creator = "ParityLoggingSmallWriteDAG"; - rf_CommonCreateParityLoggingSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_xorFuncs, NULL); + dag_h->creator = "ParityLoggingSmallWriteDAG"; + rf_CommonCreateParityLoggingSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_xorFuncs, NULL); } -void rf_CreateParityLoggingLargeWriteDAG( - RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, - RF_DagHeader_t *dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t *allocList, - int nfaults, - int (*redFunc)(RF_DagNode_t *)) +void +rf_CreateParityLoggingLargeWriteDAG( + RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, + RF_DagHeader_t * dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t * allocList, + int nfaults, + int (*redFunc) (RF_DagNode_t *)) { - dag_h->creator = "ParityLoggingSmallWriteDAG"; - rf_CommonCreateParityLoggingLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularXorFunc); + dag_h->creator = "ParityLoggingSmallWriteDAG"; + rf_CommonCreateParityLoggingLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularXorFunc); } - -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ +#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ diff --git a/sys/dev/raidframe/rf_parityloggingdags.h b/sys/dev/raidframe/rf_parityloggingdags.h index 1eecfc7fe08..91ee70a9487 100644 --- a/sys/dev/raidframe/rf_parityloggingdags.h +++ b/sys/dev/raidframe/rf_parityloggingdags.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_parityloggingdags.h,v 1.1 1999/01/11 14:29:37 niklas Exp $ */ -/* $NetBSD: rf_parityloggingdags.h,v 1.1 1998/11/13 04:20:32 oster Exp $ */ +/* $OpenBSD: rf_parityloggingdags.h,v 1.2 1999/02/16 00:03:08 niklas Exp $ */ +/* $NetBSD: rf_parityloggingdags.h,v 1.3 1999/02/05 00:06:14 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -33,62 +33,27 @@ * * ****************************************************************************/ -/* : - * Log: rf_parityloggingdags.h,v - * Revision 1.10 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.9 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.8 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.7 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.6 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.5 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.4 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.3 1995/12/06 20:55:08 wvcii - * added prototyping - * - */ - #ifndef _RF__RF_PARITYLOGGINGDAGS_H_ #define _RF__RF_PARITYLOGGINGDAGS_H_ /* routines that create DAGs */ -void rf_CommonCreateParityLoggingLargeWriteDAG(RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, - void *bp, RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList, - int nfaults, int (*redFunc)(RF_DagNode_t *)); -void rf_CommonCreateParityLoggingSmallWriteDAG(RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, - void *bp, RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList, - RF_RedFuncs_t *pfuncs, RF_RedFuncs_t *qfuncs); +void +rf_CommonCreateParityLoggingLargeWriteDAG(RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, + void *bp, RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, + int nfaults, int (*redFunc) (RF_DagNode_t *)); + void rf_CommonCreateParityLoggingSmallWriteDAG(RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, + void *bp, RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, + RF_RedFuncs_t * pfuncs, RF_RedFuncs_t * qfuncs); -void rf_CreateParityLoggingLargeWriteDAG(RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, - void *bp, RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList, - int nfaults, int (*redFunc)(RF_DagNode_t *)); -void rf_CreateParityLoggingSmallWriteDAG(RF_Raid_t *raidPtr, - RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, - void *bp, RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList, - RF_RedFuncs_t *pfuncs, RF_RedFuncs_t *qfuncs); + void rf_CreateParityLoggingLargeWriteDAG(RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, + void *bp, RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, + int nfaults, int (*redFunc) (RF_DagNode_t *)); + void rf_CreateParityLoggingSmallWriteDAG(RF_Raid_t * raidPtr, + RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, + void *bp, RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, + RF_RedFuncs_t * pfuncs, RF_RedFuncs_t * qfuncs); -#endif /* !_RF__RF_PARITYLOGGINGDAGS_H_ */ +#endif /* !_RF__RF_PARITYLOGGINGDAGS_H_ */ diff --git a/sys/dev/raidframe/rf_parityscan.c b/sys/dev/raidframe/rf_parityscan.c index 3e6086873be..2bba8d31b28 100644 --- a/sys/dev/raidframe/rf_parityscan.c +++ b/sys/dev/raidframe/rf_parityscan.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_parityscan.c,v 1.1 1999/01/11 14:29:37 niklas Exp $ */ -/* $NetBSD: rf_parityscan.c,v 1.1 1998/11/13 04:20:32 oster Exp $ */ +/* $OpenBSD: rf_parityscan.c,v 1.2 1999/02/16 00:03:09 niklas Exp $ */ +/* $NetBSD: rf_parityscan.c,v 1.3 1999/02/05 00:06:14 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -33,141 +33,6 @@ * *****************************************************************************/ -/* - * : - * Log: rf_parityscan.c,v - * Revision 1.47 1996/08/20 20:35:01 jimz - * change diagnostic string in rewrite - * - * Revision 1.46 1996/08/20 20:03:19 jimz - * fixed parity rewrite to actually use arch-specific parity stuff - * (this ever worked... how?) - * - * Revision 1.45 1996/08/16 17:41:25 jimz - * allow rewrite parity on any fault-tolerant arch - * - * Revision 1.44 1996/07/28 20:31:39 jimz - * i386netbsd port - * true/false fixup - * - * Revision 1.43 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.42 1996/07/22 21:12:01 jimz - * clean up parity scan status printing - * - * Revision 1.41 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.40 1996/07/13 00:00:59 jimz - * sanitized generalized reconstruction architecture - * cleaned up head sep, rbuf problems - * - * Revision 1.39 1996/07/09 21:44:26 jimz - * fix bogus return code in VerifyParityBasic when a stripe can't be corrected - * - * Revision 1.38 1996/06/20 17:56:57 jimz - * update VerifyParity to check complete AccessStripeMaps - * - * Revision 1.37 1996/06/19 22:23:01 jimz - * parity verification is now a layout-configurable thing - * not all layouts currently support it (correctly, anyway) - * - * Revision 1.36 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.35 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.34 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.33 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.32 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.31 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.30 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.29 1996/05/30 12:59:18 jimz - * make etimer happier, more portable - * - * Revision 1.28 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.27 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.26 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.25 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.24 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.23 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.22 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.21 1996/05/08 21:01:24 jimz - * fixed up enum type names that were conflicting with other - * enums and function names (ie, "panic") - * future naming trends will be towards RF_ and rf_ for - * everything raidframe-related - * - * Revision 1.20 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.19 1995/11/30 16:16:49 wvcii - * added copyright info - * - * Revision 1.18 1995/11/19 16:32:19 wvcii - * eliminated initialization of dag header fields which no longer exist - * (numDags, numDagsDone, firstHdr) - * - * Revision 1.17 1995/11/07 16:23:36 wvcii - * added comments, asserts, and prototypes - * encoded commit point nodes, barrier, and antecedents types into dags - * - */ - #include "rf_types.h" #include "rf_raid.h" #include "rf_dag.h" @@ -193,63 +58,55 @@ * ****************************************************************************************/ -int rf_RewriteParity(raidPtr) - RF_Raid_t *raidPtr; +int +rf_RewriteParity(raidPtr) + RF_Raid_t *raidPtr; { - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_AccessStripeMapHeader_t *asm_h; - int old_pctg, new_pctg, rc; - RF_PhysDiskAddr_t pda; - RF_SectorNum_t i; + RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; + RF_AccessStripeMapHeader_t *asm_h; + int old_pctg, new_pctg, rc; + RF_PhysDiskAddr_t pda; + RF_SectorNum_t i; - pda.startSector = 0; - pda.numSector = raidPtr->Layout.sectorsPerStripeUnit; - old_pctg = -1; + pda.startSector = 0; + pda.numSector = raidPtr->Layout.sectorsPerStripeUnit; + old_pctg = -1; /* rf_verifyParityDebug=1; */ - for (i=0; i<raidPtr->totalSectors; i+=layoutPtr->dataSectorsPerStripe) { - asm_h = rf_MapAccess(raidPtr, i, layoutPtr->dataSectorsPerStripe, NULL, RF_DONT_REMAP); - rc = rf_VerifyParity(raidPtr, asm_h->stripeMap, 1, 0); - /* printf("Parity verified: rc=%d\n",rc); */ - switch (rc) { - case RF_PARITY_OKAY: - case RF_PARITY_CORRECTED: - break; - case RF_PARITY_BAD: - printf("Parity bad during correction\n"); - RF_PANIC(); - break; - case RF_PARITY_COULD_NOT_CORRECT: - printf("Could not correct bad parity\n"); - RF_PANIC(); - break; - case RF_PARITY_COULD_NOT_VERIFY: - printf("Could not verify parity\n"); - RF_PANIC(); - break; - default: - printf("Bad rc=%d from VerifyParity in RewriteParity\n", rc); - RF_PANIC(); - } - rf_FreeAccessStripeMap(asm_h); - new_pctg = i*1000/raidPtr->totalSectors; - if (new_pctg != old_pctg) { -#ifndef KERNEL - fprintf(stderr,"\rParity rewrite: %d.%d%% complete", - new_pctg/10, new_pctg%10); - fflush(stderr); -#endif /* !KERNEL */ - } - old_pctg = new_pctg; - } -#ifndef KERNEL - fprintf(stderr,"\rParity rewrite: 100.0%% complete\n"); -#endif /* !KERNEL */ + for (i = 0; i < raidPtr->totalSectors; i += layoutPtr->dataSectorsPerStripe) { + asm_h = rf_MapAccess(raidPtr, i, layoutPtr->dataSectorsPerStripe, NULL, RF_DONT_REMAP); + rc = rf_VerifyParity(raidPtr, asm_h->stripeMap, 1, 0); + /* printf("Parity verified: rc=%d\n",rc); */ + switch (rc) { + case RF_PARITY_OKAY: + case RF_PARITY_CORRECTED: + break; + case RF_PARITY_BAD: + printf("Parity bad during correction\n"); + RF_PANIC(); + break; + case RF_PARITY_COULD_NOT_CORRECT: + printf("Could not correct bad parity\n"); + RF_PANIC(); + break; + case RF_PARITY_COULD_NOT_VERIFY: + printf("Could not verify parity\n"); + RF_PANIC(); + break; + default: + printf("Bad rc=%d from VerifyParity in RewriteParity\n", rc); + RF_PANIC(); + } + rf_FreeAccessStripeMap(asm_h); + new_pctg = i * 1000 / raidPtr->totalSectors; + if (new_pctg != old_pctg) { + } + old_pctg = new_pctg; + } #if 1 - return(0); /* XXX nothing was here.. GO */ + return (0); /* XXX nothing was here.. GO */ #endif } - /***************************************************************************************** * * verify that the parity in a particular stripe is correct. @@ -259,211 +116,217 @@ int rf_RewriteParity(raidPtr) * region defined by the parityPDA. * ****************************************************************************************/ -int rf_VerifyParity(raidPtr, aasm, correct_it, flags) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *aasm; - int correct_it; - RF_RaidAccessFlags_t flags; +int +rf_VerifyParity(raidPtr, aasm, correct_it, flags) + RF_Raid_t *raidPtr; + RF_AccessStripeMap_t *aasm; + int correct_it; + RF_RaidAccessFlags_t flags; { - RF_PhysDiskAddr_t *parityPDA; - RF_AccessStripeMap_t *doasm; - RF_LayoutSW_t *lp; - int lrc, rc; + RF_PhysDiskAddr_t *parityPDA; + RF_AccessStripeMap_t *doasm; + RF_LayoutSW_t *lp; + int lrc, rc; - lp = raidPtr->Layout.map; - if (lp->faultsTolerated == 0) { - /* - * There isn't any parity. Call it "okay." - */ - return(RF_PARITY_OKAY); - } - rc = RF_PARITY_OKAY; - if (lp->VerifyParity) { - for(doasm=aasm;doasm;doasm=doasm->next) { - for(parityPDA=doasm->parityInfo;parityPDA;parityPDA=parityPDA->next) { - lrc = lp->VerifyParity(raidPtr, doasm->raidAddress, parityPDA, - correct_it, flags); - if (lrc > rc) { - /* see rf_parityscan.h for why this works */ - rc = lrc; - } - } - } - } - else { - rc = RF_PARITY_COULD_NOT_VERIFY; - } - return(rc); + lp = raidPtr->Layout.map; + if (lp->faultsTolerated == 0) { + /* + * There isn't any parity. Call it "okay." + */ + return (RF_PARITY_OKAY); + } + rc = RF_PARITY_OKAY; + if (lp->VerifyParity) { + for (doasm = aasm; doasm; doasm = doasm->next) { + for (parityPDA = doasm->parityInfo; parityPDA; parityPDA = parityPDA->next) { + lrc = lp->VerifyParity(raidPtr, doasm->raidAddress, parityPDA, + correct_it, flags); + if (lrc > rc) { + /* see rf_parityscan.h for why this + * works */ + rc = lrc; + } + } + } + } else { + rc = RF_PARITY_COULD_NOT_VERIFY; + } + return (rc); } -int rf_VerifyParityBasic(raidPtr, raidAddr, parityPDA, correct_it, flags) - RF_Raid_t *raidPtr; - RF_RaidAddr_t raidAddr; - RF_PhysDiskAddr_t *parityPDA; - int correct_it; - RF_RaidAccessFlags_t flags; +int +rf_VerifyParityBasic(raidPtr, raidAddr, parityPDA, correct_it, flags) + RF_Raid_t *raidPtr; + RF_RaidAddr_t raidAddr; + RF_PhysDiskAddr_t *parityPDA; + int correct_it; + RF_RaidAccessFlags_t flags; { - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_RaidAddr_t startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr); - RF_SectorCount_t numsector = parityPDA->numSector; - int numbytes = rf_RaidAddressToByte(raidPtr, numsector); - int bytesPerStripe = numbytes * layoutPtr->numDataCol; - RF_DagHeader_t *rd_dag_h, *wr_dag_h; /* read, write dag */ - RF_DagNode_t *blockNode, *unblockNode, *wrBlock, *wrUnblock; - RF_AccessStripeMapHeader_t *asm_h; - RF_AccessStripeMap_t *asmap; - RF_AllocListElem_t *alloclist; - RF_PhysDiskAddr_t *pda; - char *pbuf, *buf, *end_p, *p; - int i, retcode; - RF_ReconUnitNum_t which_ru; - RF_StripeNum_t psID = rf_RaidAddressToParityStripeID(layoutPtr, raidAddr, &which_ru); - int stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; - RF_AccTraceEntry_t tracerec; - RF_MCPair_t *mcpair; + RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); + RF_RaidAddr_t startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr); + RF_SectorCount_t numsector = parityPDA->numSector; + int numbytes = rf_RaidAddressToByte(raidPtr, numsector); + int bytesPerStripe = numbytes * layoutPtr->numDataCol; + RF_DagHeader_t *rd_dag_h, *wr_dag_h; /* read, write dag */ + RF_DagNode_t *blockNode, *unblockNode, *wrBlock, *wrUnblock; + RF_AccessStripeMapHeader_t *asm_h; + RF_AccessStripeMap_t *asmap; + RF_AllocListElem_t *alloclist; + RF_PhysDiskAddr_t *pda; + char *pbuf, *buf, *end_p, *p; + int i, retcode; + RF_ReconUnitNum_t which_ru; + RF_StripeNum_t psID = rf_RaidAddressToParityStripeID(layoutPtr, raidAddr, &which_ru); + int stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; + RF_AccTraceEntry_t tracerec; + RF_MCPair_t *mcpair; - retcode = RF_PARITY_OKAY; + retcode = RF_PARITY_OKAY; - mcpair = rf_AllocMCPair(); - rf_MakeAllocList(alloclist); - RF_MallocAndAdd(buf, numbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol), (char *), alloclist); - RF_CallocAndAdd(pbuf, 1, numbytes, (char *), alloclist); /* use calloc to make sure buffer is zeroed */ - end_p = buf + bytesPerStripe; + mcpair = rf_AllocMCPair(); + rf_MakeAllocList(alloclist); + RF_MallocAndAdd(buf, numbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol), (char *), alloclist); + RF_CallocAndAdd(pbuf, 1, numbytes, (char *), alloclist); /* use calloc to make + * sure buffer is zeroed */ + end_p = buf + bytesPerStripe; - rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, numbytes, buf, rf_DiskReadFunc, rf_DiskReadUndoFunc, - "Rod", alloclist, flags, RF_IO_NORMAL_PRIORITY); - blockNode = rd_dag_h->succedents[0]; - unblockNode = blockNode->succedents[0]->succedents[0]; + rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, numbytes, buf, rf_DiskReadFunc, rf_DiskReadUndoFunc, + "Rod", alloclist, flags, RF_IO_NORMAL_PRIORITY); + blockNode = rd_dag_h->succedents[0]; + unblockNode = blockNode->succedents[0]->succedents[0]; - /* map the stripe and fill in the PDAs in the dag */ - asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, buf, RF_DONT_REMAP); - asmap = asm_h->stripeMap; - - for (pda=asmap->physInfo,i=0; i<layoutPtr->numDataCol; i++,pda=pda->next) { - RF_ASSERT(pda); - rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); - RF_ASSERT(pda->numSector != 0); - if (rf_TryToRedirectPDA(raidPtr, pda, 0)) goto out; /* no way to verify parity if disk is dead. return w/ good status */ - blockNode->succedents[i]->params[0].p = pda; - blockNode->succedents[i]->params[2].v = psID; - blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } + /* map the stripe and fill in the PDAs in the dag */ + asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, buf, RF_DONT_REMAP); + asmap = asm_h->stripeMap; - RF_ASSERT(!asmap->parityInfo->next); - rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->parityInfo, 0, 1); - RF_ASSERT(asmap->parityInfo->numSector != 0); - if (rf_TryToRedirectPDA(raidPtr, asmap->parityInfo, 1)) - goto out; - blockNode->succedents[layoutPtr->numDataCol]->params[0].p = asmap->parityInfo; - - /* fire off the DAG */ - bzero((char *)&tracerec,sizeof(tracerec)); - rd_dag_h->tracerec = &tracerec; + for (pda = asmap->physInfo, i = 0; i < layoutPtr->numDataCol; i++, pda = pda->next) { + RF_ASSERT(pda); + rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); + RF_ASSERT(pda->numSector != 0); + if (rf_TryToRedirectPDA(raidPtr, pda, 0)) + goto out; /* no way to verify parity if disk is + * dead. return w/ good status */ + blockNode->succedents[i]->params[0].p = pda; + blockNode->succedents[i]->params[2].v = psID; + blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + } - if (rf_verifyParityDebug) { - printf("Parity verify read dag:\n"); - rf_PrintDAGList(rd_dag_h); - } + RF_ASSERT(!asmap->parityInfo->next); + rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->parityInfo, 0, 1); + RF_ASSERT(asmap->parityInfo->numSector != 0); + if (rf_TryToRedirectPDA(raidPtr, asmap->parityInfo, 1)) + goto out; + blockNode->succedents[layoutPtr->numDataCol]->params[0].p = asmap->parityInfo; - RF_LOCK_MUTEX(mcpair->mutex); - mcpair->flag = 0; - rf_DispatchDAG(rd_dag_h, (void (*)(void *))rf_MCPairWakeupFunc, - (void *) mcpair); - while (!mcpair->flag) - RF_WAIT_COND(mcpair->cond, mcpair->mutex); - RF_UNLOCK_MUTEX(mcpair->mutex); - if (rd_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to verify parity: can't read the stripe\n"); - retcode = RF_PARITY_COULD_NOT_VERIFY; - goto out; - } + /* fire off the DAG */ + bzero((char *) &tracerec, sizeof(tracerec)); + rd_dag_h->tracerec = &tracerec; - for (p=buf; p<end_p; p+=numbytes) { - rf_bxor(p, pbuf, numbytes, NULL); - } - for (i=0; i<numbytes; i++) { + if (rf_verifyParityDebug) { + printf("Parity verify read dag:\n"); + rf_PrintDAGList(rd_dag_h); + } + RF_LOCK_MUTEX(mcpair->mutex); + mcpair->flag = 0; + rf_DispatchDAG(rd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, + (void *) mcpair); + while (!mcpair->flag) + RF_WAIT_COND(mcpair->cond, mcpair->mutex); + RF_UNLOCK_MUTEX(mcpair->mutex); + if (rd_dag_h->status != rf_enable) { + RF_ERRORMSG("Unable to verify parity: can't read the stripe\n"); + retcode = RF_PARITY_COULD_NOT_VERIFY; + goto out; + } + for (p = buf; p < end_p; p += numbytes) { + rf_bxor(p, pbuf, numbytes, NULL); + } + for (i = 0; i < numbytes; i++) { #if 0 - if (pbuf[i]!=0 || buf[bytesPerStripe+i]!=0) { - printf("Bytes: %d %d %d\n",i,pbuf[i],buf[bytesPerStripe+i]); - } + if (pbuf[i] != 0 || buf[bytesPerStripe + i] != 0) { + printf("Bytes: %d %d %d\n", i, pbuf[i], buf[bytesPerStripe + i]); + } #endif - if (pbuf[i] != buf[bytesPerStripe+i]) { - if (!correct_it) - RF_ERRORMSG3("Parity verify error: byte %d of parity is 0x%x should be 0x%x\n", - i,(u_char) buf[bytesPerStripe+i],(u_char) pbuf[i]); - retcode = RF_PARITY_BAD; - break; - } - } - - if (retcode && correct_it) { - wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, pbuf, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - "Wnp", alloclist, flags, RF_IO_NORMAL_PRIORITY); - wrBlock = wr_dag_h->succedents[0]; wrUnblock = wrBlock->succedents[0]->succedents[0]; - wrBlock->succedents[0]->params[0].p = asmap->parityInfo; - wrBlock->succedents[0]->params[2].v = psID; - wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - bzero((char *)&tracerec,sizeof(tracerec)); - wr_dag_h->tracerec = &tracerec; - if (rf_verifyParityDebug) { - printf("Parity verify write dag:\n"); - rf_PrintDAGList(wr_dag_h); - } - RF_LOCK_MUTEX(mcpair->mutex); - mcpair->flag = 0; - rf_DispatchDAG(wr_dag_h, (void (*)(void *))rf_MCPairWakeupFunc, - (void *) mcpair); - while (!mcpair->flag) - RF_WAIT_COND(mcpair->cond, mcpair->mutex); - RF_UNLOCK_MUTEX(mcpair->mutex); - if (wr_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to correct parity in VerifyParity: can't write the stripe\n"); - retcode = RF_PARITY_COULD_NOT_CORRECT; - } - rf_FreeDAG(wr_dag_h); - if (retcode == RF_PARITY_BAD) - retcode = RF_PARITY_CORRECTED; - } + if (pbuf[i] != buf[bytesPerStripe + i]) { + if (!correct_it) + RF_ERRORMSG3("Parity verify error: byte %d of parity is 0x%x should be 0x%x\n", + i, (u_char) buf[bytesPerStripe + i], (u_char) pbuf[i]); + retcode = RF_PARITY_BAD; + break; + } + } + if (retcode && correct_it) { + wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, pbuf, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + "Wnp", alloclist, flags, RF_IO_NORMAL_PRIORITY); + wrBlock = wr_dag_h->succedents[0]; + wrUnblock = wrBlock->succedents[0]->succedents[0]; + wrBlock->succedents[0]->params[0].p = asmap->parityInfo; + wrBlock->succedents[0]->params[2].v = psID; + wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + bzero((char *) &tracerec, sizeof(tracerec)); + wr_dag_h->tracerec = &tracerec; + if (rf_verifyParityDebug) { + printf("Parity verify write dag:\n"); + rf_PrintDAGList(wr_dag_h); + } + RF_LOCK_MUTEX(mcpair->mutex); + mcpair->flag = 0; + rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, + (void *) mcpair); + while (!mcpair->flag) + RF_WAIT_COND(mcpair->cond, mcpair->mutex); + RF_UNLOCK_MUTEX(mcpair->mutex); + if (wr_dag_h->status != rf_enable) { + RF_ERRORMSG("Unable to correct parity in VerifyParity: can't write the stripe\n"); + retcode = RF_PARITY_COULD_NOT_CORRECT; + } + rf_FreeDAG(wr_dag_h); + if (retcode == RF_PARITY_BAD) + retcode = RF_PARITY_CORRECTED; + } out: - rf_FreeAccessStripeMap(asm_h); - rf_FreeAllocList(alloclist); - rf_FreeDAG(rd_dag_h); - rf_FreeMCPair(mcpair); - return(retcode); + rf_FreeAccessStripeMap(asm_h); + rf_FreeAllocList(alloclist); + rf_FreeDAG(rd_dag_h); + rf_FreeMCPair(mcpair); + return (retcode); } -int rf_TryToRedirectPDA(raidPtr, pda, parity) - RF_Raid_t *raidPtr; - RF_PhysDiskAddr_t *pda; - int parity; +int +rf_TryToRedirectPDA(raidPtr, pda, parity) + RF_Raid_t *raidPtr; + RF_PhysDiskAddr_t *pda; + int parity; { - if (raidPtr->Disks[pda->row][pda->col].status == rf_ds_reconstructing) { - if (rf_CheckRUReconstructed(raidPtr->reconControl[pda->row]->reconMap, pda->startSector)) { - if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { - RF_RowCol_t or = pda->row, oc = pda->col; - RF_SectorNum_t os = pda->startSector; - if (parity) { - (raidPtr->Layout.map->MapParity)(raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP); - if (rf_verifyParityDebug) printf("VerifyParity: Redir P r %d c %d sect %ld -> r %d c %d sect %ld\n", - or,oc,(long)os,pda->row,pda->col,(long)pda->startSector); - } else { - (raidPtr->Layout.map->MapSector)(raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP); - if (rf_verifyParityDebug) printf("VerifyParity: Redir D r %d c %d sect %ld -> r %d c %d sect %ld\n", - or,oc,(long)os,pda->row,pda->col,(long)pda->startSector); + if (raidPtr->Disks[pda->row][pda->col].status == rf_ds_reconstructing) { + if (rf_CheckRUReconstructed(raidPtr->reconControl[pda->row]->reconMap, pda->startSector)) { + if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { + RF_RowCol_t or = pda->row, oc = pda->col; + RF_SectorNum_t os = pda->startSector; + if (parity) { + (raidPtr->Layout.map->MapParity) (raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP); + if (rf_verifyParityDebug) + printf("VerifyParity: Redir P r %d c %d sect %ld -> r %d c %d sect %ld\n", + or, oc, (long) os, pda->row, pda->col, (long) pda->startSector); + } else { + (raidPtr->Layout.map->MapSector) (raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP); + if (rf_verifyParityDebug) + printf("VerifyParity: Redir D r %d c %d sect %ld -> r %d c %d sect %ld\n", + or, oc, (long) os, pda->row, pda->col, (long) pda->startSector); + } + } else { + RF_RowCol_t spRow = raidPtr->Disks[pda->row][pda->col].spareRow; + RF_RowCol_t spCol = raidPtr->Disks[pda->row][pda->col].spareCol; + pda->row = spRow; + pda->col = spCol; + } + } } - } else { - RF_RowCol_t spRow = raidPtr->Disks[pda->row][pda->col].spareRow; - RF_RowCol_t spCol = raidPtr->Disks[pda->row][pda->col].spareCol; - pda->row = spRow; - pda->col = spCol; - } - } - } - if (RF_DEAD_DISK(raidPtr->Disks[pda->row][pda->col].status)) return(1); - return(0); + if (RF_DEAD_DISK(raidPtr->Disks[pda->row][pda->col].status)) + return (1); + return (0); } - /***************************************************************************************** * * currently a stub. @@ -482,13 +345,13 @@ int rf_TryToRedirectPDA(raidPtr, pda, parity) * course not the case for the new parity. * ****************************************************************************************/ -int rf_VerifyDegrModeWrite(raidPtr, asmh) - RF_Raid_t *raidPtr; - RF_AccessStripeMapHeader_t *asmh; +int +rf_VerifyDegrModeWrite(raidPtr, asmh) + RF_Raid_t *raidPtr; + RF_AccessStripeMapHeader_t *asmh; { - return(0); + return (0); } - /* creates a simple DAG with a header, a block-recon node at level 1, * nNodes nodes at level 2, an unblock-recon node at level 3, and * a terminator node at level 4. The stripe address field in @@ -498,56 +361,57 @@ int rf_VerifyDegrModeWrite(raidPtr, asmh) * commit point is established at unblock node - this means that any * failure during dag execution causes the dag to fail */ -RF_DagHeader_t *rf_MakeSimpleDAG(raidPtr, nNodes, bytesPerSU, databuf, doFunc, undoFunc, name, alloclist, flags, priority) - RF_Raid_t *raidPtr; - int nNodes; - int bytesPerSU; - char *databuf; - int (*doFunc)(RF_DagNode_t *node); - int (*undoFunc)(RF_DagNode_t *node); - char *name; /* node names at the second level */ - RF_AllocListElem_t *alloclist; - RF_RaidAccessFlags_t flags; - int priority; +RF_DagHeader_t * +rf_MakeSimpleDAG(raidPtr, nNodes, bytesPerSU, databuf, doFunc, undoFunc, name, alloclist, flags, priority) + RF_Raid_t *raidPtr; + int nNodes; + int bytesPerSU; + char *databuf; + int (*doFunc) (RF_DagNode_t * node); + int (*undoFunc) (RF_DagNode_t * node); + char *name; /* node names at the second level */ + RF_AllocListElem_t *alloclist; + RF_RaidAccessFlags_t flags; + int priority; { - RF_DagHeader_t *dag_h; - RF_DagNode_t *nodes, *termNode, *blockNode, *unblockNode; - int i; - - /* create the nodes, the block & unblock nodes, and the terminator node */ - RF_CallocAndAdd(nodes, nNodes+3, sizeof(RF_DagNode_t), (RF_DagNode_t *), alloclist); - blockNode = &nodes[nNodes]; - unblockNode = blockNode+1; - termNode = unblockNode+1; + RF_DagHeader_t *dag_h; + RF_DagNode_t *nodes, *termNode, *blockNode, *unblockNode; + int i; + + /* create the nodes, the block & unblock nodes, and the terminator + * node */ + RF_CallocAndAdd(nodes, nNodes + 3, sizeof(RF_DagNode_t), (RF_DagNode_t *), alloclist); + blockNode = &nodes[nNodes]; + unblockNode = blockNode + 1; + termNode = unblockNode + 1; - dag_h = rf_AllocDAGHeader(); - dag_h->raidPtr = (void *) raidPtr; - dag_h->allocList = NULL; /* we won't use this alloc list */ - dag_h->status = rf_enable; - dag_h->numSuccedents = 1; - dag_h->creator = "SimpleDAG"; + dag_h = rf_AllocDAGHeader(); + dag_h->raidPtr = (void *) raidPtr; + dag_h->allocList = NULL;/* we won't use this alloc list */ + dag_h->status = rf_enable; + dag_h->numSuccedents = 1; + dag_h->creator = "SimpleDAG"; - /* this dag can not commit until the unblock node is reached - * errors prior to the commit point imply the dag has failed - */ - dag_h->numCommitNodes = 1; - dag_h->numCommits = 0; + /* this dag can not commit until the unblock node is reached errors + * prior to the commit point imply the dag has failed */ + dag_h->numCommitNodes = 1; + dag_h->numCommits = 0; - dag_h->succedents[0] = blockNode; - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", alloclist); - rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nNodes, 0, 0, dag_h, "Nil", alloclist); - unblockNode->succedents[0] = termNode; - for (i=0; i<nNodes; i++) { - blockNode->succedents[i] = unblockNode->antecedents[i] = &nodes[i]; - unblockNode->antType[i] = rf_control; - rf_InitNode(&nodes[i], rf_wait, RF_FALSE, doFunc, undoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, alloclist); - nodes[i].succedents[0] = unblockNode; - nodes[i].antecedents[0] = blockNode; - nodes[i].antType[0] = rf_control; - nodes[i].params[1].p = (databuf + (i*bytesPerSU)); - } - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", alloclist); - termNode->antecedents[0] = unblockNode; - termNode->antType[0] = rf_control; - return(dag_h); + dag_h->succedents[0] = blockNode; + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", alloclist); + rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nNodes, 0, 0, dag_h, "Nil", alloclist); + unblockNode->succedents[0] = termNode; + for (i = 0; i < nNodes; i++) { + blockNode->succedents[i] = unblockNode->antecedents[i] = &nodes[i]; + unblockNode->antType[i] = rf_control; + rf_InitNode(&nodes[i], rf_wait, RF_FALSE, doFunc, undoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, alloclist); + nodes[i].succedents[0] = unblockNode; + nodes[i].antecedents[0] = blockNode; + nodes[i].antType[0] = rf_control; + nodes[i].params[1].p = (databuf + (i * bytesPerSU)); + } + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", alloclist); + termNode->antecedents[0] = unblockNode; + termNode->antType[0] = rf_control; + return (dag_h); } diff --git a/sys/dev/raidframe/rf_parityscan.h b/sys/dev/raidframe/rf_parityscan.h index 44aec7e2ca6..66324207abc 100644 --- a/sys/dev/raidframe/rf_parityscan.h +++ b/sys/dev/raidframe/rf_parityscan.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_parityscan.h,v 1.1 1999/01/11 14:29:38 niklas Exp $ */ -/* $NetBSD: rf_parityscan.h,v 1.1 1998/11/13 04:20:32 oster Exp $ */ +/* $OpenBSD: rf_parityscan.h,v 1.2 1999/02/16 00:03:09 niklas Exp $ */ +/* $NetBSD: rf_parityscan.h,v 1.3 1999/02/05 00:06:14 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,79 +27,28 @@ * rights to redistribute these changes. */ -/* : - * Log: rf_parityscan.h,v - * Revision 1.14 1996/07/05 18:01:12 jimz - * don't make parity protos ndef KERNEL - * - * Revision 1.13 1996/06/20 17:41:43 jimz - * change decl for VerifyParity - * - * Revision 1.12 1996/06/20 15:38:39 jimz - * renumber parityscan return codes - * - * Revision 1.11 1996/06/19 22:23:01 jimz - * parity verification is now a layout-configurable thing - * not all layouts currently support it (correctly, anyway) - * - * Revision 1.10 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.9 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.8 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.7 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.6 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.5 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.4 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.3 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.2 1995/11/30 16:20:46 wvcii - * added copyright info - * - */ - #ifndef _RF__RF_PARITYSCAN_H_ #define _RF__RF_PARITYSCAN_H_ #include "rf_types.h" #include "rf_alloclist.h" -int rf_RewriteParity(RF_Raid_t *raidPtr); -int rf_VerifyParityBasic(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t *parityPDA, int correct_it, RF_RaidAccessFlags_t flags); -int rf_VerifyParity(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *stripeMap, - int correct_it, RF_RaidAccessFlags_t flags); -int rf_TryToRedirectPDA(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda, int parity); -int rf_VerifyDegrModeWrite(RF_Raid_t *raidPtr, RF_AccessStripeMapHeader_t *asmh); -RF_DagHeader_t *rf_MakeSimpleDAG(RF_Raid_t *raidPtr, int nNodes, - int bytesPerSU, char *databuf, - int (*doFunc)(RF_DagNode_t *), - int (*undoFunc)(RF_DagNode_t *), - char *name, RF_AllocListElem_t *alloclist, - RF_RaidAccessFlags_t flags, int priority); +int rf_RewriteParity(RF_Raid_t * raidPtr); +int +rf_VerifyParityBasic(RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr, + RF_PhysDiskAddr_t * parityPDA, int correct_it, RF_RaidAccessFlags_t flags); +int +rf_VerifyParity(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * stripeMap, + int correct_it, RF_RaidAccessFlags_t flags); +int rf_TryToRedirectPDA(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * pda, int parity); +int rf_VerifyDegrModeWrite(RF_Raid_t * raidPtr, RF_AccessStripeMapHeader_t * asmh); +RF_DagHeader_t * +rf_MakeSimpleDAG(RF_Raid_t * raidPtr, int nNodes, + int bytesPerSU, char *databuf, + int (*doFunc) (RF_DagNode_t *), + int (*undoFunc) (RF_DagNode_t *), + char *name, RF_AllocListElem_t * alloclist, + RF_RaidAccessFlags_t flags, int priority); #define RF_DO_CORRECT_PARITY 1 #define RF_DONT_CORRECT_PARITY 0 @@ -109,10 +58,10 @@ RF_DagHeader_t *rf_MakeSimpleDAG(RF_Raid_t *raidPtr, int nNodes, * * Ordering is important here. */ -#define RF_PARITY_OKAY 0 /* or no parity information */ +#define RF_PARITY_OKAY 0 /* or no parity information */ #define RF_PARITY_CORRECTED 1 #define RF_PARITY_BAD 2 #define RF_PARITY_COULD_NOT_CORRECT 3 #define RF_PARITY_COULD_NOT_VERIFY 4 -#endif /* !_RF__RF_PARITYSCAN_H_ */ +#endif /* !_RF__RF_PARITYSCAN_H_ */ diff --git a/sys/dev/raidframe/rf_pq.c b/sys/dev/raidframe/rf_pq.c index ebbc7917b26..654930901bd 100644 --- a/sys/dev/raidframe/rf_pq.c +++ b/sys/dev/raidframe/rf_pq.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_pq.c,v 1.1 1999/01/11 14:29:38 niklas Exp $ */ -/* $NetBSD: rf_pq.c,v 1.1 1998/11/13 04:20:32 oster Exp $ */ +/* $OpenBSD: rf_pq.c,v 1.2 1999/02/16 00:03:09 niklas Exp $ */ +/* $NetBSD: rf_pq.c,v 1.3 1999/02/05 00:06:14 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,106 +29,6 @@ /* * Code for RAID level 6 (P + Q) disk array architecture. - * - * : - * Log: rf_pq.c,v - * Revision 1.33 1996/11/05 21:10:40 jimz - * failed pda generalization - * - * Revision 1.32 1996/07/31 16:29:50 jimz - * "fix" math on 32-bit machines using RF_LONGSHIFT - * (may be incorrect) - * - * Revision 1.31 1996/07/31 15:35:01 jimz - * evenodd changes; bugfixes for double-degraded archs, generalize - * some formerly PQ-only functions - * - * Revision 1.30 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.29 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.28 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.27 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.26 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.25 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.24 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.23 1996/05/30 12:59:18 jimz - * make etimer happier, more portable - * - * Revision 1.22 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.21 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.20 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.19 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.18 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.17 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.16 1996/05/17 14:52:04 wvcii - * added prototyping to QDelta() - * - changed buf params from volatile unsigned long * to char * - * changed QDelta for kernel - * - just bzero the buf since kernel doesn't include pq decode table - * - * Revision 1.15 1996/05/03 19:40:20 wvcii - * added includes for dag library - * - * Revision 1.14 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.13 1995/11/30 16:19:55 wvcii - * added copyright info - * - * Revision 1.12 1995/11/07 16:13:47 wvcii - * changed PQDagSelect prototype - * function no longer returns numHdrSucc, numTermAnt - * note: this file contains node functions which should be - * moved to rf_dagfuncs.c so that all node funcs are bundled together - * - * Revision 1.11 1995/10/04 03:50:33 wvcii - * removed panics, minor code cleanup in dag selection - * - * */ #include "rf_archs.h" @@ -149,266 +49,252 @@ #include "rf_pq.h" #include "rf_sys.h" -RF_RedFuncs_t rf_pFuncs = { rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P" }; -RF_RedFuncs_t rf_pRecoveryFuncs = { rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func" }; +RF_RedFuncs_t rf_pFuncs = {rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P"}; +RF_RedFuncs_t rf_pRecoveryFuncs = {rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func"}; -int rf_RegularONPFunc(node) - RF_DagNode_t *node; +int +rf_RegularONPFunc(node) + RF_DagNode_t *node; { - return(rf_RegularXorFunc(node)); + return (rf_RegularXorFunc(node)); } - /* - same as simpleONQ func, but the coefficient is always 1 + same as simpleONQ func, but the coefficient is always 1 */ -int rf_SimpleONPFunc(node) - RF_DagNode_t *node; +int +rf_SimpleONPFunc(node) + RF_DagNode_t *node; { - return(rf_SimpleXorFunc(node)); + return (rf_SimpleXorFunc(node)); } -int rf_RecoveryPFunc(node) -RF_DagNode_t *node; +int +rf_RecoveryPFunc(node) + RF_DagNode_t *node; { - return(rf_RecoveryXorFunc(node)); + return (rf_RecoveryXorFunc(node)); } -int rf_RegularPFunc(node) - RF_DagNode_t *node; +int +rf_RegularPFunc(node) + RF_DagNode_t *node; { - return(rf_RegularXorFunc(node)); + return (rf_RegularXorFunc(node)); } - #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) -static void QDelta(char *dest, char *obuf, char *nbuf, unsigned length, - unsigned char coeff); -static void rf_InvertQ(unsigned long *qbuf, unsigned long *abuf, - unsigned length, unsigned coeff); - -RF_RedFuncs_t rf_qFuncs = { rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q" }; -RF_RedFuncs_t rf_qRecoveryFuncs = { rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func" }; -RF_RedFuncs_t rf_pqRecoveryFuncs = { rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func" }; - -void rf_PQDagSelect( - RF_Raid_t *raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t *asmap, - RF_VoidFuncPtr *createFunc) +static void +QDelta(char *dest, char *obuf, char *nbuf, unsigned length, + unsigned char coeff); +static void +rf_InvertQ(unsigned long *qbuf, unsigned long *abuf, + unsigned length, unsigned coeff); + +RF_RedFuncs_t rf_qFuncs = {rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q"}; +RF_RedFuncs_t rf_qRecoveryFuncs = {rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func"}; +RF_RedFuncs_t rf_pqRecoveryFuncs = {rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func"}; + +void +rf_PQDagSelect( + RF_Raid_t * raidPtr, + RF_IoType_t type, + RF_AccessStripeMap_t * asmap, + RF_VoidFuncPtr * createFunc) { - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - unsigned ndfail = asmap->numDataFailed; - unsigned npfail = asmap->numParityFailed; - unsigned ntfail = npfail + ndfail; - - RF_ASSERT(RF_IO_IS_R_OR_W(type)); - if (ntfail > 2) - { - RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n"); - /* *infoFunc = */ *createFunc = NULL; - return; - } - - /* ok, we can do this I/O */ - if (type == RF_IO_TYPE_READ) - { - switch (ndfail) - { - case 0: - /* fault free read */ - *createFunc = rf_CreateFaultFreeReadDAG; /* same as raid 5 */ - break; - case 1: - /* lost a single data unit */ - /* two cases: - (1) parity is not lost. - do a normal raid 5 reconstruct read. - (2) parity is lost. - do a reconstruct read using "q". - */ - if (ntfail == 2) /* also lost redundancy */ - { - if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) - *createFunc = rf_PQ_110_CreateReadDAG; - else - *createFunc = rf_PQ_101_CreateReadDAG; - } - else - { - /* P and Q are ok. But is there a failure - in some unaccessed data unit? - */ - if (rf_NumFailedDataUnitsInStripe(raidPtr,asmap)==2) - *createFunc = rf_PQ_200_CreateReadDAG; - else - *createFunc = rf_PQ_100_CreateReadDAG; - } - break; - case 2: - /* lost two data units */ - /* *infoFunc = PQOneTwo; */ - *createFunc = rf_PQ_200_CreateReadDAG; - break; + RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); + unsigned ndfail = asmap->numDataFailed; + unsigned npfail = asmap->numParityFailed; + unsigned ntfail = npfail + ndfail; + + RF_ASSERT(RF_IO_IS_R_OR_W(type)); + if (ntfail > 2) { + RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n"); + /* *infoFunc = */ *createFunc = NULL; + return; } - return; - } - - /* a write */ - switch (ntfail) - { - case 0: /* fault free */ - if (rf_suppressLocksAndLargeWrites || - (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) || - (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) { - - *createFunc = rf_PQCreateSmallWriteDAG; - } - else { - *createFunc = rf_PQCreateLargeWriteDAG; - } - break; - - case 1: /* single disk fault */ - if (npfail==1) - { - RF_ASSERT ((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q)); - if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) - { /* q died, treat like normal mode raid5 write.*/ - if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) - || rf_NumFailedDataUnitsInStripe(raidPtr,asmap)) - *createFunc = rf_PQ_001_CreateSmallWriteDAG; - else - *createFunc = rf_PQ_001_CreateLargeWriteDAG; - } - else - { /* parity died, small write only updating Q */ - if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) - || rf_NumFailedDataUnitsInStripe(raidPtr,asmap)) - *createFunc = rf_PQ_010_CreateSmallWriteDAG; - else - *createFunc = rf_PQ_010_CreateLargeWriteDAG; - } + /* ok, we can do this I/O */ + if (type == RF_IO_TYPE_READ) { + switch (ndfail) { + case 0: + /* fault free read */ + *createFunc = rf_CreateFaultFreeReadDAG; /* same as raid 5 */ + break; + case 1: + /* lost a single data unit */ + /* two cases: (1) parity is not lost. do a normal raid + * 5 reconstruct read. (2) parity is lost. do a + * reconstruct read using "q". */ + if (ntfail == 2) { /* also lost redundancy */ + if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) + *createFunc = rf_PQ_110_CreateReadDAG; + else + *createFunc = rf_PQ_101_CreateReadDAG; + } else { + /* P and Q are ok. But is there a failure in + * some unaccessed data unit? */ + if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2) + *createFunc = rf_PQ_200_CreateReadDAG; + else + *createFunc = rf_PQ_100_CreateReadDAG; + } + break; + case 2: + /* lost two data units */ + /* *infoFunc = PQOneTwo; */ + *createFunc = rf_PQ_200_CreateReadDAG; + break; + } + return; } - else - { /* data missing. - Do a P reconstruct write if only a single data unit - is lost in the stripe, otherwise a PQ reconstruct - write. */ - if (rf_NumFailedDataUnitsInStripe(raidPtr,asmap)==2) - *createFunc = rf_PQ_200_CreateWriteDAG; - else - *createFunc = rf_PQ_100_CreateWriteDAG; + /* a write */ + switch (ntfail) { + case 0: /* fault free */ + if (rf_suppressLocksAndLargeWrites || + (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) || + (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) { + + *createFunc = rf_PQCreateSmallWriteDAG; + } else { + *createFunc = rf_PQCreateLargeWriteDAG; + } + break; + + case 1: /* single disk fault */ + if (npfail == 1) { + RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q)); + if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) { /* q died, treat like + * normal mode raid5 + * write. */ + if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) + || rf_NumFailedDataUnitsInStripe(raidPtr, asmap)) + *createFunc = rf_PQ_001_CreateSmallWriteDAG; + else + *createFunc = rf_PQ_001_CreateLargeWriteDAG; + } else {/* parity died, small write only updating Q */ + if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) + || rf_NumFailedDataUnitsInStripe(raidPtr, asmap)) + *createFunc = rf_PQ_010_CreateSmallWriteDAG; + else + *createFunc = rf_PQ_010_CreateLargeWriteDAG; + } + } else { /* data missing. Do a P reconstruct write if + * only a single data unit is lost in the + * stripe, otherwise a PQ reconstruct write. */ + if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2) + *createFunc = rf_PQ_200_CreateWriteDAG; + else + *createFunc = rf_PQ_100_CreateWriteDAG; + } + break; + + case 2: /* two disk faults */ + switch (npfail) { + case 2: /* both p and q dead */ + *createFunc = rf_PQ_011_CreateWriteDAG; + break; + case 1: /* either p or q and dead data */ + RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA); + RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)); + if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q) + *createFunc = rf_PQ_101_CreateWriteDAG; + else + *createFunc = rf_PQ_110_CreateWriteDAG; + break; + case 0: /* double data loss */ + *createFunc = rf_PQ_200_CreateWriteDAG; + break; + } + break; + + default: /* more than 2 disk faults */ + *createFunc = NULL; + RF_PANIC(); } - break; - - case 2: /* two disk faults */ - switch (npfail) - { - case 2: /* both p and q dead */ - *createFunc = rf_PQ_011_CreateWriteDAG; - break; - case 1: /* either p or q and dead data */ - RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA); - RF_ASSERT ((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)); - if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q) - *createFunc = rf_PQ_101_CreateWriteDAG; - else - *createFunc = rf_PQ_110_CreateWriteDAG; - break; - case 0: /* double data loss */ - *createFunc = rf_PQ_200_CreateWriteDAG; - break; - } - break; - - default: /* more than 2 disk faults */ - *createFunc = NULL; - RF_PANIC(); - } - return; + return; } - /* - Used as a stop gap info function + Used as a stop gap info function */ -static void PQOne(raidPtr, nSucc, nAnte, asmap) - RF_Raid_t *raidPtr; - int *nSucc; - int *nAnte; - RF_AccessStripeMap_t *asmap; +static void +PQOne(raidPtr, nSucc, nAnte, asmap) + RF_Raid_t *raidPtr; + int *nSucc; + int *nAnte; + RF_AccessStripeMap_t *asmap; { - *nSucc = *nAnte = 1; + *nSucc = *nAnte = 1; } -static void PQOneTwo(raidPtr, nSucc, nAnte, asmap) - RF_Raid_t *raidPtr; - int *nSucc; - int *nAnte; - RF_AccessStripeMap_t *asmap; +static void +PQOneTwo(raidPtr, nSucc, nAnte, asmap) + RF_Raid_t *raidPtr; + int *nSucc; + int *nAnte; + RF_AccessStripeMap_t *asmap; { - *nSucc = 1; - *nAnte = 2; + *nSucc = 1; + *nAnte = 2; } - RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG) { - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2, - rf_RegularPQFunc, RF_FALSE); + rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2, + rf_RegularPQFunc, RF_FALSE); } -int rf_RegularONQFunc(node) - RF_DagNode_t *node; +int +rf_RegularONQFunc(node) + RF_DagNode_t *node; { - int np = node->numParams; - int d; - RF_Raid_t *raidPtr = (RF_Raid_t *)node->params[np-1].p; - int i; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - char *qbuf, *qpbuf; - char *obuf, *nbuf; - RF_PhysDiskAddr_t *old, *new; - unsigned long coeff; - unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; - - RF_ETIMER_START(timer); - - d = (np-3)/4; - RF_ASSERT (4*d+3 == np); - qbuf = (char *) node->params[2*d+1].p; /* q buffer*/ - for (i=0; i < d; i++) - { - old = (RF_PhysDiskAddr_t *) node->params[2*i].p; - obuf = (char *) node->params[2*i+1].p; - new = (RF_PhysDiskAddr_t *) node->params[2*(d+1+i)].p; - nbuf = (char *) node->params[2*(d+1+i)+1].p; - RF_ASSERT (new->numSector == old->numSector); - RF_ASSERT (new->raidAddress == old->raidAddress); - /* the stripe unit within the stripe tells us the coefficient to use - for the multiply. */ - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),new->raidAddress); - /* compute the data unit offset within the column, then add one */ - coeff = (coeff % raidPtr->Layout.numDataCol); - qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,old->startSector % secPerSU); - QDelta(qpbuf,obuf,nbuf, rf_RaidAddressToByte(raidPtr, old->numSector),coeff); - } - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->q_us += RF_ETIMER_VAL_US(timer); - rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no I/O in this node */ - return(0); -} + int np = node->numParams; + int d; + RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; + int i; + RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; + RF_Etimer_t timer; + char *qbuf, *qpbuf; + char *obuf, *nbuf; + RF_PhysDiskAddr_t *old, *new; + unsigned long coeff; + unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; + + RF_ETIMER_START(timer); + + d = (np - 3) / 4; + RF_ASSERT(4 * d + 3 == np); + qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */ + for (i = 0; i < d; i++) { + old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; + obuf = (char *) node->params[2 * i + 1].p; + new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p; + nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p; + RF_ASSERT(new->numSector == old->numSector); + RF_ASSERT(new->raidAddress == old->raidAddress); + /* the stripe unit within the stripe tells us the coefficient + * to use for the multiply. */ + coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress); + /* compute the data unit offset within the column, then add + * one */ + coeff = (coeff % raidPtr->Layout.numDataCol); + qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU); + QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); + } + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->q_us += RF_ETIMER_VAL_US(timer); + rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no + * I/O in this node */ + return (0); +} /* See the SimpleXORFunc for the difference between a simple and regular func. - These Q functions should be used for + These Q functions should be used for + + new q = Q(data,old data,old q) - new q = Q(data,old data,old q) + style updates and not for - style updates and not for - q = ( new data, new data, .... ) computations. @@ -421,145 +307,148 @@ int rf_RegularONQFunc(node) raidPtr */ -int rf_SimpleONQFunc(node) - RF_DagNode_t *node; +int +rf_SimpleONQFunc(node) + RF_DagNode_t *node; { - int np = node->numParams; - int d; - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-1].p; - int i; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - char *qbuf; - char *obuf, *nbuf; - RF_PhysDiskAddr_t *old, *new; - unsigned long coeff; - - RF_ETIMER_START(timer); - - d = (np-3)/4; - RF_ASSERT (4*d+3 == np); - qbuf = (char *) node->params[2*d+1].p; /* q buffer*/ - for (i=0; i < d; i++) - { - old = (RF_PhysDiskAddr_t *) node->params[2*i].p; - obuf = (char *) node->params[2*i+1].p; - new = (RF_PhysDiskAddr_t *) node->params[2*(d+1+i)].p; - nbuf = (char *) node->params[2*(d+1+i)+1].p; - RF_ASSERT (new->numSector == old->numSector); - RF_ASSERT (new->raidAddress == old->raidAddress); - /* the stripe unit within the stripe tells us the coefficient to use - for the multiply. */ - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),new->raidAddress); - /* compute the data unit offset within the column, then add one */ - coeff = (coeff % raidPtr->Layout.numDataCol); - QDelta(qbuf,obuf,nbuf, rf_RaidAddressToByte(raidPtr, old->numSector),coeff); - } - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->q_us += RF_ETIMER_VAL_US(timer); - rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no I/O in this node */ - return(0); -} + int np = node->numParams; + int d; + RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; + int i; + RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; + RF_Etimer_t timer; + char *qbuf; + char *obuf, *nbuf; + RF_PhysDiskAddr_t *old, *new; + unsigned long coeff; + + RF_ETIMER_START(timer); + + d = (np - 3) / 4; + RF_ASSERT(4 * d + 3 == np); + qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */ + for (i = 0; i < d; i++) { + old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; + obuf = (char *) node->params[2 * i + 1].p; + new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p; + nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p; + RF_ASSERT(new->numSector == old->numSector); + RF_ASSERT(new->raidAddress == old->raidAddress); + /* the stripe unit within the stripe tells us the coefficient + * to use for the multiply. */ + coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress); + /* compute the data unit offset within the column, then add + * one */ + coeff = (coeff % raidPtr->Layout.numDataCol); + QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); + } + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->q_us += RF_ETIMER_VAL_US(timer); + rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no + * I/O in this node */ + return (0); +} RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG) { - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs); + rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs); } -static void RegularQSubr(node,qbuf) - RF_DagNode_t *node; - char *qbuf; +static void +RegularQSubr(node, qbuf) + RF_DagNode_t *node; + char *qbuf; { - int np = node->numParams; - int d; - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-1].p; - unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; - int i; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - char *obuf, *qpbuf; - RF_PhysDiskAddr_t *old; - unsigned long coeff; - - RF_ETIMER_START(timer); - - d = (np-1)/2; - RF_ASSERT (2*d+1 == np); - for (i=0; i < d; i++) - { - old = (RF_PhysDiskAddr_t *) node->params[2*i].p; - obuf = (char *) node->params[2*i+1].p; - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),old->raidAddress); - /* compute the data unit offset within the column, then add one */ - coeff = (coeff % raidPtr->Layout.numDataCol); - /* the input buffers may not all be aligned with the start of the - stripe. so shift by their sector offset within the stripe unit */ - qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,old->startSector % secPerSU); - rf_IncQ((unsigned long *)qpbuf,(unsigned long *)obuf,rf_RaidAddressToByte(raidPtr, old->numSector),coeff); - } - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->q_us += RF_ETIMER_VAL_US(timer); -} + int np = node->numParams; + int d; + RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; + unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; + int i; + RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; + RF_Etimer_t timer; + char *obuf, *qpbuf; + RF_PhysDiskAddr_t *old; + unsigned long coeff; + + RF_ETIMER_START(timer); + + d = (np - 1) / 2; + RF_ASSERT(2 * d + 1 == np); + for (i = 0; i < d; i++) { + old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; + obuf = (char *) node->params[2 * i + 1].p; + coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress); + /* compute the data unit offset within the column, then add + * one */ + coeff = (coeff % raidPtr->Layout.numDataCol); + /* the input buffers may not all be aligned with the start of + * the stripe. so shift by their sector offset within the + * stripe unit */ + qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU); + rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); + } + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->q_us += RF_ETIMER_VAL_US(timer); +} /* used in degraded writes. */ -static void DegrQSubr(node) - RF_DagNode_t *node; +static void +DegrQSubr(node) + RF_DagNode_t *node; { - int np = node->numParams; - int d; - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-1].p; - unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; - int i; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - char *qbuf = node->results[1]; - char *obuf, *qpbuf; - RF_PhysDiskAddr_t *old; - unsigned long coeff; - unsigned fail_start; - int j; - - old = (RF_PhysDiskAddr_t *)node->params[np-2].p; - fail_start = old->startSector % secPerSU; - - RF_ETIMER_START(timer); - - d = (np-2)/2; - RF_ASSERT (2*d+2 == np); - for (i=0; i < d; i++) - { - old = (RF_PhysDiskAddr_t *) node->params[2*i].p; - obuf = (char *) node->params[2*i+1].p; - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),old->raidAddress); - /* compute the data unit offset within the column, then add one */ - coeff = (coeff % raidPtr->Layout.numDataCol); - /* the input buffers may not all be aligned with the start of the - stripe. so shift by their sector offset within the stripe unit */ - j = old->startSector % secPerSU; - RF_ASSERT(j >= fail_start); - qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,j - fail_start); - rf_IncQ((unsigned long *)qpbuf,(unsigned long *)obuf,rf_RaidAddressToByte(raidPtr, old->numSector),coeff); - } - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->q_us += RF_ETIMER_VAL_US(timer); -} + int np = node->numParams; + int d; + RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; + unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; + int i; + RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; + RF_Etimer_t timer; + char *qbuf = node->results[1]; + char *obuf, *qpbuf; + RF_PhysDiskAddr_t *old; + unsigned long coeff; + unsigned fail_start; + int j; + + old = (RF_PhysDiskAddr_t *) node->params[np - 2].p; + fail_start = old->startSector % secPerSU; + + RF_ETIMER_START(timer); + + d = (np - 2) / 2; + RF_ASSERT(2 * d + 2 == np); + for (i = 0; i < d; i++) { + old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; + obuf = (char *) node->params[2 * i + 1].p; + coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress); + /* compute the data unit offset within the column, then add + * one */ + coeff = (coeff % raidPtr->Layout.numDataCol); + /* the input buffers may not all be aligned with the start of + * the stripe. so shift by their sector offset within the + * stripe unit */ + j = old->startSector % secPerSU; + RF_ASSERT(j >= fail_start); + qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start); + rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); + } + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->q_us += RF_ETIMER_VAL_US(timer); +} /* Called by large write code to compute the new parity and the new q. - + structure of the params: pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d ( d = numDataCol - raidPtr + raidPtr for a total of 2d+1 arguments. The result buffers results[0], results[1] are the buffers for the p and q, @@ -570,29 +459,31 @@ static void DegrQSubr(node) corrupt the input for the q calculation. */ -int rf_RegularPQFunc(node) - RF_DagNode_t *node; +int +rf_RegularPQFunc(node) + RF_DagNode_t *node; { - RegularQSubr(node,node->results[1]); - return(rf_RegularXorFunc(node)); /* does the wakeup */ + RegularQSubr(node, node->results[1]); + return (rf_RegularXorFunc(node)); /* does the wakeup */ } -int rf_RegularQFunc(node) - RF_DagNode_t *node; +int +rf_RegularQFunc(node) + RF_DagNode_t *node; { - /* Almost ... adjust Qsubr args */ - RegularQSubr(node, node->results[0]); - rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no I/O in this node */ - return(0); + /* Almost ... adjust Qsubr args */ + RegularQSubr(node, node->results[0]); + rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no + * I/O in this node */ + return (0); } - /* Called by singly degraded write code to compute the new parity and the new q. - + structure of the params: - pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d - failedPDA raidPtr + pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d + failedPDA raidPtr for a total of 2d+2 arguments. The result buffers results[0], results[1] are the buffers for the parity and q, @@ -605,14 +496,15 @@ int rf_RegularQFunc(node) We treat this identically to the regularPQ case, ignoring the failedPDA extra argument. */ -void rf_Degraded_100_PQFunc(node) - RF_DagNode_t *node; +void +rf_Degraded_100_PQFunc(node) + RF_DagNode_t *node; { - int np = node->numParams; + int np = node->numParams; - RF_ASSERT (np >= 2); - DegrQSubr(node); - rf_RecoveryXorFunc(node); + RF_ASSERT(np >= 2); + DegrQSubr(node); + rf_RecoveryXorFunc(node); } @@ -623,7 +515,7 @@ void rf_Degraded_100_PQFunc(node) pda_0, buffer_0, .... pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr and results[0] contains the data buffer. Which is originally zero-filled. - + */ /* this Q func is used by the degraded-mode dag functions to recover lost data. @@ -633,8 +525,8 @@ void rf_Degraded_100_PQFunc(node) * the other PDAs in the parameter list to determine where within the target * buffer the corresponding data should be xored. * - * Recall the basic equation is - * + * Recall the basic equation is + * * Q = ( data_1 + 2 * data_2 ... + k * data_k ) mod 256 * * so to recover data_j we need @@ -644,60 +536,60 @@ void rf_Degraded_100_PQFunc(node) * So the coefficient for each buffer is (255 - data_col), and j should be initialized by * copying Q into it. Then we need to do a table lookup to convert to solve * data_j /= J - * - * + * + * */ -int rf_RecoveryQFunc(node) - RF_DagNode_t *node; +int +rf_RecoveryQFunc(node) + RF_DagNode_t *node; { - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout; - RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams-2].p; - int i; - RF_PhysDiskAddr_t *pda; - RF_RaidAddr_t suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr,failedPDA->startSector); - char *srcbuf, *destbuf; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - unsigned long coeff; - - RF_ETIMER_START(timer); - /* start by copying Q into the buffer */ - bcopy(node->params[node->numParams-3].p,node->results[0], - rf_RaidAddressToByte(raidPtr, failedPDA->numSector)); - for (i=0; i<node->numParams-4; i+=2) - { - RF_ASSERT (node->params[i+1].p != node->results[0]); - pda = (RF_PhysDiskAddr_t *) node->params[i].p; - srcbuf = (char *) node->params[i+1].p; - suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); - destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr,suoffset-failedSUOffset); - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),pda->raidAddress); - /* compute the data unit offset within the column */ - coeff = (coeff % raidPtr->Layout.numDataCol); - rf_IncQ((unsigned long *)destbuf, (unsigned long *)srcbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff); - } - /* Do the nasty inversion now */ - coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),failedPDA->startSector) % raidPtr->Layout.numDataCol); - rf_InvertQ(node->results[0],node->results[0],rf_RaidAddressToByte(raidPtr,pda->numSector),coeff); - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->q_us += RF_ETIMER_VAL_US(timer); - rf_GenericWakeupFunc(node, 0); - return(0); + RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; + RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; + RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; + int i; + RF_PhysDiskAddr_t *pda; + RF_RaidAddr_t suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); + char *srcbuf, *destbuf; + RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; + RF_Etimer_t timer; + unsigned long coeff; + + RF_ETIMER_START(timer); + /* start by copying Q into the buffer */ + bcopy(node->params[node->numParams - 3].p, node->results[0], + rf_RaidAddressToByte(raidPtr, failedPDA->numSector)); + for (i = 0; i < node->numParams - 4; i += 2) { + RF_ASSERT(node->params[i + 1].p != node->results[0]); + pda = (RF_PhysDiskAddr_t *) node->params[i].p; + srcbuf = (char *) node->params[i + 1].p; + suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); + destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); + coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress); + /* compute the data unit offset within the column */ + coeff = (coeff % raidPtr->Layout.numDataCol); + rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff); + } + /* Do the nasty inversion now */ + coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), failedPDA->startSector) % raidPtr->Layout.numDataCol); + rf_InvertQ(node->results[0], node->results[0], rf_RaidAddressToByte(raidPtr, pda->numSector), coeff); + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->q_us += RF_ETIMER_VAL_US(timer); + rf_GenericWakeupFunc(node, 0); + return (0); } -int rf_RecoveryPQFunc(node) - RF_DagNode_t *node; +int +rf_RecoveryPQFunc(node) + RF_DagNode_t *node; { - RF_PANIC(); - return(1); + RF_PANIC(); + return (1); } - /* - Degraded write Q subroutine. + Degraded write Q subroutine. Used when P is dead. - Large-write style Q computation. + Large-write style Q computation. Parameters (pda,buf),(pda,buf),.....,(failedPDA,bufPtr),failedPDA,raidPtr. @@ -707,47 +599,48 @@ int rf_RecoveryPQFunc(node) This is a "simple style" recovery func. */ -void rf_PQ_DegradedWriteQFunc(node) - RF_DagNode_t *node; +void +rf_PQ_DegradedWriteQFunc(node) + RF_DagNode_t *node; { - int np = node->numParams; - int d; - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-1].p; - unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; - int i; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_Etimer_t timer; - char *qbuf = node->results[0]; - char *obuf, *qpbuf; - RF_PhysDiskAddr_t *old; - unsigned long coeff; - int fail_start,j; - - old = (RF_PhysDiskAddr_t *) node->params[np-2].p; - fail_start = old->startSector % secPerSU; - - RF_ETIMER_START(timer); - - d = (np-2)/2; - RF_ASSERT (2*d+2 == np); - - for (i=0; i < d; i++) - { - old = (RF_PhysDiskAddr_t *) node->params[2*i].p; - obuf = (char *) node->params[2*i+1].p; - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),old->raidAddress); - /* compute the data unit offset within the column, then add one */ - coeff = (coeff % raidPtr->Layout.numDataCol); - j = old->startSector % secPerSU; - RF_ASSERT(j >= fail_start); - qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,j - fail_start); - rf_IncQ((unsigned long *)qpbuf,(unsigned long *)obuf,rf_RaidAddressToByte(raidPtr, old->numSector),coeff); - } - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->q_us += RF_ETIMER_VAL_US(timer); - rf_GenericWakeupFunc(node, 0); + int np = node->numParams; + int d; + RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; + unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; + int i; + RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; + RF_Etimer_t timer; + char *qbuf = node->results[0]; + char *obuf, *qpbuf; + RF_PhysDiskAddr_t *old; + unsigned long coeff; + int fail_start, j; + + old = (RF_PhysDiskAddr_t *) node->params[np - 2].p; + fail_start = old->startSector % secPerSU; + + RF_ETIMER_START(timer); + + d = (np - 2) / 2; + RF_ASSERT(2 * d + 2 == np); + + for (i = 0; i < d; i++) { + old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; + obuf = (char *) node->params[2 * i + 1].p; + coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress); + /* compute the data unit offset within the column, then add + * one */ + coeff = (coeff % raidPtr->Layout.numDataCol); + j = old->startSector % secPerSU; + RF_ASSERT(j >= fail_start); + qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start); + rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); + } + + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->q_us += RF_ETIMER_VAL_US(timer); + rf_GenericWakeupFunc(node, 0); } @@ -764,70 +657,69 @@ void rf_PQ_DegradedWriteQFunc(node) length in bytes; */ -void rf_IncQ(dest,buf,length,coeff) - unsigned long *dest; - unsigned long *buf; - unsigned length; - unsigned coeff; +void +rf_IncQ(dest, buf, length, coeff) + unsigned long *dest; + unsigned long *buf; + unsigned length; + unsigned coeff; { - unsigned long a, d, new; - unsigned long a1, a2; - unsigned int *q = &(rf_qfor[28-coeff][0]); - unsigned r = rf_rn[coeff+1]; + unsigned long a, d, new; + unsigned long a1, a2; + unsigned int *q = &(rf_qfor[28 - coeff][0]); + unsigned r = rf_rn[coeff + 1]; #define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f) #define INSERT(a,i) (a << (5L*i)) - length /= 8; - /* 13 5 bit quants in a 64 bit word */ - while (length) - { - a = *buf++; - d = *dest; - a1 = EXTRACT(a,0) ^ r; - a2 = EXTRACT(a,1) ^ r; - new = INSERT(a2,1) | a1 ; - a1 = EXTRACT(a,2) ^ r; - a2 = EXTRACT(a,3) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1,2) | INSERT (a2,3); - a1 = EXTRACT(a,4) ^ r; - a2 = EXTRACT(a,5) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1,4) | INSERT (a2,5); - a1 = EXTRACT(a,5) ^ r; - a2 = EXTRACT(a,6) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1,5) | INSERT (a2,6); + length /= 8; + /* 13 5 bit quants in a 64 bit word */ + while (length) { + a = *buf++; + d = *dest; + a1 = EXTRACT(a, 0) ^ r; + a2 = EXTRACT(a, 1) ^ r; + new = INSERT(a2, 1) | a1; + a1 = EXTRACT(a, 2) ^ r; + a2 = EXTRACT(a, 3) ^ r; + a1 = q[a1]; + a2 = q[a2]; + new = new | INSERT(a1, 2) | INSERT(a2, 3); + a1 = EXTRACT(a, 4) ^ r; + a2 = EXTRACT(a, 5) ^ r; + a1 = q[a1]; + a2 = q[a2]; + new = new | INSERT(a1, 4) | INSERT(a2, 5); + a1 = EXTRACT(a, 5) ^ r; + a2 = EXTRACT(a, 6) ^ r; + a1 = q[a1]; + a2 = q[a2]; + new = new | INSERT(a1, 5) | INSERT(a2, 6); #if RF_LONGSHIFT > 2 - a1 = EXTRACT(a,7) ^ r; - a2 = EXTRACT(a,8) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1,7) | INSERT (a2,8); - a1 = EXTRACT(a,9) ^ r; - a2 = EXTRACT(a,10) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1,9) | INSERT (a2,10); - a1 = EXTRACT(a,11) ^ r; - a2 = EXTRACT(a,12) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1,11) | INSERT (a2,12); -#endif /* RF_LONGSHIFT > 2 */ - d ^= new; - *dest++ = d; - length--; - } + a1 = EXTRACT(a, 7) ^ r; + a2 = EXTRACT(a, 8) ^ r; + a1 = q[a1]; + a2 = q[a2]; + new = new | INSERT(a1, 7) | INSERT(a2, 8); + a1 = EXTRACT(a, 9) ^ r; + a2 = EXTRACT(a, 10) ^ r; + a1 = q[a1]; + a2 = q[a2]; + new = new | INSERT(a1, 9) | INSERT(a2, 10); + a1 = EXTRACT(a, 11) ^ r; + a2 = EXTRACT(a, 12) ^ r; + a1 = q[a1]; + a2 = q[a2]; + new = new | INSERT(a1, 11) | INSERT(a2, 12); +#endif /* RF_LONGSHIFT > 2 */ + d ^= new; + *dest++ = d; + length--; + } } - /* - compute - + compute + dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new) ] on a five bit basis. @@ -836,133 +728,132 @@ void rf_IncQ(dest,buf,length,coeff) length in bytes. */ -static void QDelta( - char *dest, - char *obuf, - char *nbuf, - unsigned length, - unsigned char coeff) +static void +QDelta( + char *dest, + char *obuf, + char *nbuf, + unsigned length, + unsigned char coeff) { - unsigned long a, d, new; - unsigned long a1, a2; - unsigned int *q = &(rf_qfor[28-coeff][0]); - unsigned r = rf_rn[coeff+1]; - -#ifdef KERNEL - /* PQ in kernel currently not supported because the encoding/decoding table is not present */ - bzero(dest, length); -#else /* KERNEL */ - /* this code probably doesn't work and should be rewritten -wvcii */ - /* 13 5 bit quants in a 64 bit word */ - length /= 8; - while (length) - { - a = *obuf++; /* XXX need to reorg to avoid cache conflicts */ - a ^= *nbuf++; - d = *dest; - a1 = EXTRACT(a,0) ^ r; - a2 = EXTRACT(a,1) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = INSERT(a2,1) | a1 ; - a1 = EXTRACT(a,2) ^ r; - a2 = EXTRACT(a,3) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1,2) | INSERT (a2,3); - a1 = EXTRACT(a,4) ^ r; - a2 = EXTRACT(a,5) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1,4) | INSERT (a2,5); - a1 = EXTRACT(a,5) ^ r; - a2 = EXTRACT(a,6) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1,5) | INSERT (a2,6); + unsigned long a, d, new; + unsigned long a1, a2; + unsigned int *q = &(rf_qfor[28 - coeff][0]); + unsigned r = rf_rn[coeff + 1]; + +#ifdef _KERNEL + /* PQ in kernel currently not supported because the encoding/decoding + * table is not present */ + bzero(dest, length); +#else /* KERNEL */ + /* this code probably doesn't work and should be rewritten -wvcii */ + /* 13 5 bit quants in a 64 bit word */ + length /= 8; + while (length) { + a = *obuf++; /* XXX need to reorg to avoid cache conflicts */ + a ^= *nbuf++; + d = *dest; + a1 = EXTRACT(a, 0) ^ r; + a2 = EXTRACT(a, 1) ^ r; + a1 = q[a1]; + a2 = q[a2]; + new = INSERT(a2, 1) | a1; + a1 = EXTRACT(a, 2) ^ r; + a2 = EXTRACT(a, 3) ^ r; + a1 = q[a1]; + a2 = q[a2]; + new = new | INSERT(a1, 2) | INSERT(a2, 3); + a1 = EXTRACT(a, 4) ^ r; + a2 = EXTRACT(a, 5) ^ r; + a1 = q[a1]; + a2 = q[a2]; + new = new | INSERT(a1, 4) | INSERT(a2, 5); + a1 = EXTRACT(a, 5) ^ r; + a2 = EXTRACT(a, 6) ^ r; + a1 = q[a1]; + a2 = q[a2]; + new = new | INSERT(a1, 5) | INSERT(a2, 6); #if RF_LONGSHIFT > 2 - a1 = EXTRACT(a,7) ^ r; - a2 = EXTRACT(a,8) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1,7) | INSERT (a2,8); - a1 = EXTRACT(a,9) ^ r; - a2 = EXTRACT(a,10) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1,9) | INSERT (a2,10); - a1 = EXTRACT(a,11) ^ r; - a2 = EXTRACT(a,12) ^ r; - a1 = q[a1]; - a2 = q[a2]; - new = new | INSERT(a1,11) | INSERT (a2,12); -#endif /* RF_LONGSHIFT > 2 */ - d ^= new; - *dest++ = d; - length--; - } -#endif /* KERNEL */ + a1 = EXTRACT(a, 7) ^ r; + a2 = EXTRACT(a, 8) ^ r; + a1 = q[a1]; + a2 = q[a2]; + new = new | INSERT(a1, 7) | INSERT(a2, 8); + a1 = EXTRACT(a, 9) ^ r; + a2 = EXTRACT(a, 10) ^ r; + a1 = q[a1]; + a2 = q[a2]; + new = new | INSERT(a1, 9) | INSERT(a2, 10); + a1 = EXTRACT(a, 11) ^ r; + a2 = EXTRACT(a, 12) ^ r; + a1 = q[a1]; + a2 = q[a2]; + new = new | INSERT(a1, 11) | INSERT(a2, 12); +#endif /* RF_LONGSHIFT > 2 */ + d ^= new; + *dest++ = d; + length--; + } +#endif /* _KERNEL */ } - /* recover columns a and b from the given p and q into bufs abuf and bbuf. All bufs are word aligned. Length is in bytes. */ - + /* * XXX * * Everything about this seems wrong. */ -void rf_PQ_recover(pbuf,qbuf,abuf,bbuf,length,coeff_a,coeff_b) - unsigned long *pbuf; - unsigned long *qbuf; - unsigned long *abuf; - unsigned long *bbuf; - unsigned length; - unsigned coeff_a; - unsigned coeff_b; +void +rf_PQ_recover(pbuf, qbuf, abuf, bbuf, length, coeff_a, coeff_b) + unsigned long *pbuf; + unsigned long *qbuf; + unsigned long *abuf; + unsigned long *bbuf; + unsigned length; + unsigned coeff_a; + unsigned coeff_b; { - unsigned long p, q, a, a0, a1; - int col = (29 * coeff_a) + coeff_b; - unsigned char *q0 = & (rf_qinv[col][0]); - - length /= 8; - while (length) - { - p = *pbuf++; - q = *qbuf++; - a0 = EXTRACT(p,0); - a1 = EXTRACT(q,0); - a = q0[a0<<5 | a1]; + unsigned long p, q, a, a0, a1; + int col = (29 * coeff_a) + coeff_b; + unsigned char *q0 = &(rf_qinv[col][0]); + + length /= 8; + while (length) { + p = *pbuf++; + q = *qbuf++; + a0 = EXTRACT(p, 0); + a1 = EXTRACT(q, 0); + a = q0[a0 << 5 | a1]; #define MF(i) \ a0 = EXTRACT(p,i); \ a1 = EXTRACT(q,i); \ a = a | INSERT(q0[a0<<5 | a1],i) - MF(1); - MF(2); - MF(3); - MF(4); - MF(5); - MF(6); + MF(1); + MF(2); + MF(3); + MF(4); + MF(5); + MF(6); #if 0 - MF(7); - MF(8); - MF(9); - MF(10); - MF(11); - MF(12); -#endif /* 0 */ - *abuf++ = a; - *bbuf++ = a ^ p; - length--; - } + MF(7); + MF(8); + MF(9); + MF(10); + MF(11); + MF(12); +#endif /* 0 */ + *abuf++ = a; + *bbuf++ = a ^ p; + length--; + } } - -/* +/* Lost parity and a data column. Recover that data column. Assume col coeff is lost. Let q the contents of Q after all surviving data columns have been q-xored out of it. @@ -970,7 +861,7 @@ void rf_PQ_recover(pbuf,qbuf,abuf,bbuf,length,coeff_a,coeff_b) q[28-coeff][a_i ^ r_i+1] = q - but q is cyclic with period 31. + but q is cyclic with period 31. So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] = q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} . @@ -981,28 +872,28 @@ void rf_PQ_recover(pbuf,qbuf,abuf,bbuf,length,coeff_a,coeff_b) */ - -static void rf_InvertQ( - unsigned long *qbuf, - unsigned long *abuf, - unsigned length, - unsigned coeff) + +static void +rf_InvertQ( + unsigned long *qbuf, + unsigned long *abuf, + unsigned length, + unsigned coeff) { - unsigned long a, new; - unsigned long a1, a2; - unsigned int *q = &(rf_qfor[3+coeff][0]); - unsigned r = rf_rn[coeff+1]; - - /* 13 5 bit quants in a 64 bit word */ - length /= 8; - while (length) - { - a = *qbuf++; - a1 = EXTRACT(a,0); - a2 = EXTRACT(a,1); - a1 = r ^ q[a1]; - a2 = r ^ q[a2]; - new = INSERT(a2,1) | a1; + unsigned long a, new; + unsigned long a1, a2; + unsigned int *q = &(rf_qfor[3 + coeff][0]); + unsigned r = rf_rn[coeff + 1]; + + /* 13 5 bit quants in a 64 bit word */ + length /= 8; + while (length) { + a = *qbuf++; + a1 = EXTRACT(a, 0); + a2 = EXTRACT(a, 1); + a1 = r ^ q[a1]; + a2 = r ^ q[a2]; + new = INSERT(a2, 1) | a1; #define M(i,j) \ a1 = EXTRACT(a,i); \ a2 = EXTRACT(a,j); \ @@ -1010,17 +901,17 @@ static void rf_InvertQ( a2 = r ^ q[a2]; \ new = new | INSERT(a1,i) | INSERT(a2,j) - M(2,3); - M(4,5); - M(5,6); + M(2, 3); + M(4, 5); + M(5, 6); #if RF_LONGSHIFT > 2 - M(7,8); - M(9,10); - M(11,12); -#endif /* RF_LONGSHIFT > 2 */ - *abuf++ = new; - length--; - } + M(7, 8); + M(9, 10); + M(11, 12); +#endif /* RF_LONGSHIFT > 2 */ + *abuf++ = new; + length--; + } } - -#endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */ +#endif /* (RF_INCLUDE_DECL_PQ > 0) || + * (RF_INCLUDE_RAID6 > 0) */ diff --git a/sys/dev/raidframe/rf_pq.h b/sys/dev/raidframe/rf_pq.h index 52f816354fa..70472786c85 100644 --- a/sys/dev/raidframe/rf_pq.h +++ b/sys/dev/raidframe/rf_pq.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_pq.h,v 1.1 1999/01/11 14:29:39 niklas Exp $ */ -/* $NetBSD: rf_pq.h,v 1.1 1998/11/13 04:20:32 oster Exp $ */ +/* $OpenBSD: rf_pq.h,v 1.2 1999/02/16 00:03:10 niklas Exp $ */ +/* $NetBSD: rf_pq.h,v 1.3 1999/02/05 00:06:15 oster Exp $ */ /* * rf_pq.h */ @@ -29,50 +29,6 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ -/* - * : - * Log: rf_pq.h,v - * Revision 1.9 1996/07/31 15:35:05 jimz - * evenodd changes; bugfixes for double-degraded archs, generalize - * some formerly PQ-only functions - * - * Revision 1.8 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.7 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.6 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.5 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.4 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.3 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.2 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.1 1996/05/18 19:56:21 jimz - * Initial revision - * - */ #ifndef _RF__RF_PQ_H_ #define _RF__RF_PQ_H_ @@ -82,10 +38,10 @@ extern RF_RedFuncs_t rf_pFuncs; extern RF_RedFuncs_t rf_pRecoveryFuncs; -int rf_RegularONPFunc(RF_DagNode_t *node); -int rf_SimpleONPFunc(RF_DagNode_t *node); -int rf_RecoveryPFunc(RF_DagNode_t *node); -int rf_RegularPFunc(RF_DagNode_t *node); +int rf_RegularONPFunc(RF_DagNode_t * node); +int rf_SimpleONPFunc(RF_DagNode_t * node); +int rf_RecoveryPFunc(RF_DagNode_t * node); +int rf_RegularPFunc(RF_DagNode_t * node); #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) @@ -93,23 +49,27 @@ extern RF_RedFuncs_t rf_qFuncs; extern RF_RedFuncs_t rf_qRecoveryFuncs; extern RF_RedFuncs_t rf_pqRecoveryFuncs; -void rf_PQDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc); +void +rf_PQDagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, + RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG); -int rf_RegularONQFunc(RF_DagNode_t *node); -int rf_SimpleONQFunc(RF_DagNode_t *node); +int rf_RegularONQFunc(RF_DagNode_t * node); +int rf_SimpleONQFunc(RF_DagNode_t * node); RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG); -int rf_RegularPQFunc(RF_DagNode_t *node); -int rf_RegularQFunc(RF_DagNode_t *node); -void rf_Degraded_100_PQFunc(RF_DagNode_t *node); -int rf_RecoveryQFunc(RF_DagNode_t *node); -int rf_RecoveryPQFunc(RF_DagNode_t *node); -void rf_PQ_DegradedWriteQFunc(RF_DagNode_t *node); -void rf_IncQ(unsigned long *dest, unsigned long *buf, unsigned length, - unsigned coeff); -void rf_PQ_recover(unsigned long *pbuf, unsigned long *qbuf, unsigned long *abuf, - unsigned long *bbuf, unsigned length, unsigned coeff_a, unsigned coeff_b); +int rf_RegularPQFunc(RF_DagNode_t * node); +int rf_RegularQFunc(RF_DagNode_t * node); +void rf_Degraded_100_PQFunc(RF_DagNode_t * node); +int rf_RecoveryQFunc(RF_DagNode_t * node); +int rf_RecoveryPQFunc(RF_DagNode_t * node); +void rf_PQ_DegradedWriteQFunc(RF_DagNode_t * node); +void +rf_IncQ(unsigned long *dest, unsigned long *buf, unsigned length, + unsigned coeff); +void +rf_PQ_recover(unsigned long *pbuf, unsigned long *qbuf, unsigned long *abuf, + unsigned long *bbuf, unsigned length, unsigned coeff_a, unsigned coeff_b); -#endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */ +#endif /* (RF_INCLUDE_DECL_PQ > 0) || + * (RF_INCLUDE_RAID6 > 0) */ -#endif /* !_RF__RF_PQ_H_ */ +#endif /* !_RF__RF_PQ_H_ */ diff --git a/sys/dev/raidframe/rf_pqdeg.c b/sys/dev/raidframe/rf_pqdeg.c index 6376201b6c3..82c3e5de08d 100644 --- a/sys/dev/raidframe/rf_pqdeg.c +++ b/sys/dev/raidframe/rf_pqdeg.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_pqdeg.c,v 1.1 1999/01/11 14:29:39 niklas Exp $ */ -/* $NetBSD: rf_pqdeg.c,v 1.1 1998/11/13 04:20:32 oster Exp $ */ +/* $OpenBSD: rf_pqdeg.c,v 1.2 1999/02/16 00:03:10 niklas Exp $ */ +/* $NetBSD: rf_pqdeg.c,v 1.3 1999/02/05 00:06:15 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,80 +27,6 @@ * rights to redistribute these changes. */ -/* - * Log: rf_pqdeg.c,v - * Revision 1.19 1996/11/05 21:10:40 jimz - * failed pda generalization - * - * Revision 1.18 1996/07/31 16:30:01 jimz - * asm/asmap fix - * - * Revision 1.17 1996/07/31 15:35:09 jimz - * evenodd changes; bugfixes for double-degraded archs, generalize - * some formerly PQ-only functions - * - * Revision 1.16 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.15 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.14 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.13 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.12 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.11 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.10 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.9 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.8 1996/05/03 19:41:07 wvcii - * added includes for dag library - * - * Revision 1.7 1995/11/30 16:19:36 wvcii - * added copyright info - * - * Revision 1.6 1995/11/07 16:15:08 wvcii - * updated/added prototyping for dag creation - * - * Revision 1.5 1995/03/01 20:25:48 holland - * kernelization changes - * - * Revision 1.4 1995/02/03 22:31:36 holland - * many changes related to kernelization - * - * Revision 1.3 1995/02/01 15:13:05 holland - * moved #include of general.h out of raid.h and into each file - * - * Revision 1.2 1994/12/05 04:50:26 danner - * additional pq support - * - * Revision 1.1 1994/11/29 20:36:02 danner - * Initial revision - * - */ - #include "rf_archs.h" #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) @@ -124,7 +50,7 @@ /* Degraded mode dag functions for P+Q calculations. - The following nomenclature is used. + The following nomenclature is used. PQ_<D><P><Q>_Create{Large,Small}<Write|Read>DAG @@ -132,12 +58,12 @@ data units <D> (0,1,2), parity units <P> (0,1), and Q units <Q>, effecting the I/O. The reads have only PQ_<D><P><Q>_CreateReadDAG variants, while the single fault writes have both large and small write versions. (Single fault - PQ is equivalent to normal mode raid 5 in many aspects. + PQ is equivalent to normal mode raid 5 in many aspects. Some versions degenerate into the same case, and are grouped together below. */ -/* Reads, single failure +/* Reads, single failure we have parity, so we can do a raid 5 reconstruct read. @@ -145,9 +71,8 @@ RF_CREATE_DAG_FUNC_DECL(rf_PQ_100_CreateReadDAG) { - rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pRecoveryFuncs); + rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pRecoveryFuncs); } - /* Reads double failure */ /* @@ -157,9 +82,8 @@ RF_CREATE_DAG_FUNC_DECL(rf_PQ_100_CreateReadDAG) RF_CREATE_DAG_FUNC_DECL(rf_PQ_101_CreateReadDAG) { - rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pRecoveryFuncs); + rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pRecoveryFuncs); } - /* parity is lost, so we need to do a reconstruct read and recompute @@ -168,66 +92,65 @@ RF_CREATE_DAG_FUNC_DECL(rf_PQ_101_CreateReadDAG) RF_CREATE_DAG_FUNC_DECL(rf_PQ_110_CreateReadDAG) { - RF_PhysDiskAddr_t *temp; - /* swap P and Q pointers to fake out the DegradedReadDAG code */ - temp = asmap->parityInfo; asmap->parityInfo = asmap->qInfo; asmap->qInfo = temp; - rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_qRecoveryFuncs); + RF_PhysDiskAddr_t *temp; + /* swap P and Q pointers to fake out the DegradedReadDAG code */ + temp = asmap->parityInfo; + asmap->parityInfo = asmap->qInfo; + asmap->qInfo = temp; + rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_qRecoveryFuncs); } - /* Two data units are dead in this stripe, so we will need read - both P and Q to reconstruct the data. Note that only - one data unit we are reading may actually be missing. + both P and Q to reconstruct the data. Note that only + one data unit we are reading may actually be missing. */ RF_CREATE_DAG_FUNC_DECL(rf_CreateDoubleDegradedReadDAG) { - rf_PQ_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList); + rf_PQ_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList); } - RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateReadDAG) { - rf_CreateDoubleDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList); + rf_CreateDoubleDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList); } - /* Writes, single failure */ RF_CREATE_DAG_FUNC_DECL(rf_PQ_100_CreateWriteDAG) { - if (asmap->numStripeUnitsAccessed != 1 && - asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) - RF_PANIC(); - rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, - allocList, 2, (int (*)())rf_Degraded_100_PQFunc, RF_FALSE); + if (asmap->numStripeUnitsAccessed != 1 && + asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) + RF_PANIC(); + rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, + allocList, 2, (int (*) ()) rf_Degraded_100_PQFunc, RF_FALSE); } - /* Dead P - act like a RAID 5 small write with parity = Q */ RF_CREATE_DAG_FUNC_DECL(rf_PQ_010_CreateSmallWriteDAG) { - RF_PhysDiskAddr_t *temp; - /* swap P and Q pointers to fake out the DegradedReadDAG code */ - temp = asmap->parityInfo; asmap->parityInfo = asmap->qInfo; asmap->qInfo = temp; - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_qFuncs, NULL); + RF_PhysDiskAddr_t *temp; + /* swap P and Q pointers to fake out the DegradedReadDAG code */ + temp = asmap->parityInfo; + asmap->parityInfo = asmap->qInfo; + asmap->qInfo = temp; + rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_qFuncs, NULL); } - /* Dead Q - act like a RAID 5 small write */ RF_CREATE_DAG_FUNC_DECL(rf_PQ_001_CreateSmallWriteDAG) { - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, NULL); + rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, NULL); } - /* Dead P - act like a RAID 5 large write but for Q */ RF_CREATE_DAG_FUNC_DECL(rf_PQ_010_CreateLargeWriteDAG) { - RF_PhysDiskAddr_t *temp; - /* swap P and Q pointers to fake out the code */ - temp = asmap->parityInfo; asmap->parityInfo = asmap->qInfo; asmap->qInfo = temp; - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularQFunc, RF_FALSE); + RF_PhysDiskAddr_t *temp; + /* swap P and Q pointers to fake out the code */ + temp = asmap->parityInfo; + asmap->parityInfo = asmap->qInfo; + asmap->qInfo = temp; + rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularQFunc, RF_FALSE); } - /* Dead Q - act like a RAID 5 large write */ RF_CREATE_DAG_FUNC_DECL(rf_PQ_001_CreateLargeWriteDAG) { - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularPFunc, RF_FALSE); + rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularPFunc, RF_FALSE); } @@ -236,14 +159,13 @@ RF_CREATE_DAG_FUNC_DECL(rf_PQ_001_CreateLargeWriteDAG) */ /* - * Lost P & Q - do a nonredundant write + * Lost P & Q - do a nonredundant write */ RF_CREATE_DAG_FUNC_DECL(rf_PQ_011_CreateWriteDAG) { - rf_CreateNonRedundantWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - RF_IO_TYPE_WRITE); + rf_CreateNonRedundantWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, + RF_IO_TYPE_WRITE); } - /* In the two cases below, A nasty case arises when the write a (strict) portion of a failed stripe unit @@ -255,32 +177,30 @@ RF_CREATE_DAG_FUNC_DECL(rf_PQ_011_CreateWriteDAG) */ RF_CREATE_DAG_FUNC_DECL(rf_PQ_110_CreateWriteDAG) { - RF_PhysDiskAddr_t *temp; - - if (asmap->numStripeUnitsAccessed != 1 && - asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) - { - RF_PANIC(); - } - /* swap P and Q to fake out parity code */ - temp = asmap->parityInfo; - asmap->parityInfo = asmap->qInfo; - asmap->qInfo = temp; - rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, - allocList,1, (int (*)())rf_PQ_DegradedWriteQFunc, RF_FALSE); - /* is the regular Q func the right one to call? */ + RF_PhysDiskAddr_t *temp; + + if (asmap->numStripeUnitsAccessed != 1 && + asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) { + RF_PANIC(); + } + /* swap P and Q to fake out parity code */ + temp = asmap->parityInfo; + asmap->parityInfo = asmap->qInfo; + asmap->qInfo = temp; + rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, + allocList, 1, (int (*) ()) rf_PQ_DegradedWriteQFunc, RF_FALSE); + /* is the regular Q func the right one to call? */ } - /* Lost Data and Q - do degraded mode P write */ RF_CREATE_DAG_FUNC_DECL(rf_PQ_101_CreateWriteDAG) { - if (asmap->numStripeUnitsAccessed != 1 && - asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) - RF_PANIC(); - rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, - allocList,1, rf_RecoveryXorFunc, RF_FALSE); + if (asmap->numStripeUnitsAccessed != 1 && + asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) + RF_PANIC(); + rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, + allocList, 1, rf_RecoveryXorFunc, RF_FALSE); } - -#endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */ +#endif /* (RF_INCLUDE_DECL_PQ > 0) || + * (RF_INCLUDE_RAID6 > 0) */ diff --git a/sys/dev/raidframe/rf_pqdeg.h b/sys/dev/raidframe/rf_pqdeg.h index dc34a7970f7..5841231c014 100644 --- a/sys/dev/raidframe/rf_pqdeg.h +++ b/sys/dev/raidframe/rf_pqdeg.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_pqdeg.h,v 1.1 1999/01/11 14:29:39 niklas Exp $ */ -/* $NetBSD: rf_pqdeg.h,v 1.1 1998/11/13 04:20:32 oster Exp $ */ +/* $OpenBSD: rf_pqdeg.h,v 1.2 1999/02/16 00:03:11 niklas Exp $ */ +/* $NetBSD: rf_pqdeg.h,v 1.3 1999/02/05 00:06:15 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,25 +27,6 @@ * rights to redistribute these changes. */ -/* : - * Log: rf_pqdeg.h,v - * Revision 1.7 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.6 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.5 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.4 1995/11/30 16:19:11 wvcii - * added copyright info - * - */ - #ifndef _RF__RF_PQDEG_H_ #define _RF__RF_PQDEG_H_ @@ -77,17 +58,18 @@ RF_CREATE_DAG_FUNC_DECL(rf_PQ_011_CreateWriteDAG); RF_CREATE_DAG_FUNC_DECL(rf_PQ_110_CreateWriteDAG); RF_CREATE_DAG_FUNC_DECL(rf_PQ_101_CreateWriteDAG); RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateWriteDAG); -#endif /* RF_UTILITY == 0 */ +#endif /* RF_UTILITY == 0 */ -typedef RF_uint32 RF_ua32_t[32]; -typedef RF_uint8 RF_ua1024_t[1024]; +typedef RF_uint32 RF_ua32_t[32]; +typedef RF_uint8 RF_ua1024_t[1024]; extern RF_ua32_t rf_rn; extern RF_ua32_t rf_qfor[32]; -#ifndef KERNEL /* we don't support PQ in the kernel yet, so don't link in this monster table */ -extern RF_ua1024_t rf_qinv[29*29]; -#else /* !KERNEL */ +#ifndef _KERNEL /* we don't support PQ in the kernel yet, so + * don't link in this monster table */ +extern RF_ua1024_t rf_qinv[29 * 29]; +#else /* !_KERNEL */ extern RF_ua1024_t rf_qinv[1]; -#endif /* !KERNEL */ +#endif /* !_KERNEL */ -#endif /* !_RF__RF_PQDEG_H_ */ +#endif /* !_RF__RF_PQDEG_H_ */ diff --git a/sys/dev/raidframe/rf_pqdegdags.c b/sys/dev/raidframe/rf_pqdegdags.c index e8346b4f941..ef41bf3f9a0 100644 --- a/sys/dev/raidframe/rf_pqdegdags.c +++ b/sys/dev/raidframe/rf_pqdegdags.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_pqdegdags.c,v 1.1 1999/01/11 14:29:40 niklas Exp $ */ -/* $NetBSD: rf_pqdegdags.c,v 1.1 1998/11/13 04:20:32 oster Exp $ */ +/* $OpenBSD: rf_pqdegdags.c,v 1.2 1999/02/16 00:03:11 niklas Exp $ */ +/* $NetBSD: rf_pqdegdags.c,v 1.3 1999/02/05 00:06:15 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,118 +29,9 @@ /* * rf_pqdegdags.c - * Degraded mode dags for double fault cases. + * Degraded mode dags for double fault cases. */ -/* - * : - * Log: rf_pqdegdags.c,v - * Revision 1.31 1996/11/05 21:10:40 jimz - * failed pda generalization - * - * Revision 1.30 1996/07/31 16:30:05 jimz - * asm/asmap fix - * - * Revision 1.29 1996/07/31 15:35:15 jimz - * evenodd changes; bugfixes for double-degraded archs, generalize - * some formerly PQ-only functions - * - * Revision 1.28 1996/07/28 20:31:39 jimz - * i386netbsd port - * true/false fixup - * - * Revision 1.27 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.26 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.25 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.24 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.23 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.22 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.21 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.20 1996/05/30 12:59:18 jimz - * make etimer happier, more portable - * - * Revision 1.19 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.18 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.17 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.16 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.15 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.14 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.13 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.12 1996/05/08 21:01:24 jimz - * fixed up enum type names that were conflicting with other - * enums and function names (ie, "panic") - * future naming trends will be towards RF_ and rf_ for - * everything raidframe-related - * - * Revision 1.11 1996/05/03 19:47:50 wvcii - * removed include of rf_redstripe.h - * - * Revision 1.10 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.9 1995/11/30 16:17:57 wvcii - * added copyright info - * - * Revision 1.8 1995/11/07 15:33:25 wvcii - * dag creation routines now generate term node - * added asserts - * encoded commit point nodes, antecedence types into dags - * didn't add commit barrier - the code is a mess and needs to - * be cleand up first - * - */ #include "rf_archs.h" @@ -158,13 +49,14 @@ #include "rf_pq.h" #include "rf_sys.h" -static void applyPDA(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda, RF_PhysDiskAddr_t *ppda, - RF_PhysDiskAddr_t *qpda, void *bp); +static void +applyPDA(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * pda, RF_PhysDiskAddr_t * ppda, + RF_PhysDiskAddr_t * qpda, void *bp); /* - Two data drives have failed, and we are doing a read that covers one of them. - We may also be reading some of the surviving drives. - + Two data drives have failed, and we are doing a read that covers one of them. + We may also be reading some of the surviving drives. + ***************************************************************************************** * @@ -220,58 +112,56 @@ rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_Ge RF_CREATE_DAG_FUNC_DECL(rf_PQ_DoubleDegRead) { - rf_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList, - "Rq", "PQ Recovery", rf_PQDoubleRecoveryFunc); + rf_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList, + "Rq", "PQ Recovery", rf_PQDoubleRecoveryFunc); } - -static void applyPDA(raidPtr,pda,ppda,qpda, bp) - RF_Raid_t *raidPtr; - RF_PhysDiskAddr_t *pda; - RF_PhysDiskAddr_t *ppda; - RF_PhysDiskAddr_t *qpda; - void *bp; + +static void +applyPDA(raidPtr, pda, ppda, qpda, bp) + RF_Raid_t *raidPtr; + RF_PhysDiskAddr_t *pda; + RF_PhysDiskAddr_t *ppda; + RF_PhysDiskAddr_t *qpda; + void *bp; { - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_RaidAddr_t s0off = rf_StripeUnitOffset(layoutPtr, ppda->startSector); - RF_SectorCount_t s0len = ppda->numSector, len; - RF_SectorNum_t suoffset; - unsigned coeff; - char *pbuf = ppda->bufPtr; - char *qbuf = qpda->bufPtr; - char *buf; - int delta; - - suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); - len = pda->numSector; - /* see if pda intersects a recovery pda */ - if ((suoffset < s0off+s0len) && ( suoffset+len > s0off)) - { - buf = pda->bufPtr; - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),pda->raidAddress); - coeff = (coeff % raidPtr->Layout.numDataCol); - - if (suoffset < s0off) - { - delta = s0off - suoffset; - buf += rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),delta); - suoffset = s0off; - len -= delta; - } - if (suoffset > s0off) - { - delta = suoffset - s0off; - pbuf += rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),delta); - qbuf += rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),delta); + RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); + RF_RaidAddr_t s0off = rf_StripeUnitOffset(layoutPtr, ppda->startSector); + RF_SectorCount_t s0len = ppda->numSector, len; + RF_SectorNum_t suoffset; + unsigned coeff; + char *pbuf = ppda->bufPtr; + char *qbuf = qpda->bufPtr; + char *buf; + int delta; + + suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); + len = pda->numSector; + /* see if pda intersects a recovery pda */ + if ((suoffset < s0off + s0len) && (suoffset + len > s0off)) { + buf = pda->bufPtr; + coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress); + coeff = (coeff % raidPtr->Layout.numDataCol); + + if (suoffset < s0off) { + delta = s0off - suoffset; + buf += rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), delta); + suoffset = s0off; + len -= delta; + } + if (suoffset > s0off) { + delta = suoffset - s0off; + pbuf += rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), delta); + qbuf += rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), delta); + } + if ((suoffset + len) > (s0len + s0off)) + len = s0len + s0off - suoffset; + + /* src, dest, len */ + rf_bxor(buf, pbuf, rf_RaidAddressToByte(raidPtr, len), bp); + + /* dest, src, len, coeff */ + rf_IncQ((unsigned long *) qbuf, (unsigned long *) buf, rf_RaidAddressToByte(raidPtr, len), coeff); } - if ((suoffset + len) > (s0len + s0off)) - len = s0len + s0off - suoffset; - - /* src, dest, len */ - rf_bxor(buf,pbuf,rf_RaidAddressToByte(raidPtr,len), bp); - - /* dest, src, len, coeff */ - rf_IncQ((unsigned long *)qbuf,(unsigned long *)buf,rf_RaidAddressToByte(raidPtr,len),coeff); - } } /* Recover data in the case of a double failure. There can be two @@ -281,7 +171,7 @@ static void applyPDA(raidPtr,pda,ppda,qpda, bp) pdas of P and Q, followed by the raidPtr. The list can look like pda, pda, ... , p pda, q pda, raidptr, asm - + or pda, pda, ... , p_1 pda, p_2 pda, q_1 pda, q_2 pda, raidptr, asm @@ -293,211 +183,201 @@ static void applyPDA(raidPtr,pda,ppda,qpda, bp) */ -int rf_PQDoubleRecoveryFunc(node) - RF_DagNode_t *node; +int +rf_PQDoubleRecoveryFunc(node) + RF_DagNode_t *node; { - int np = node->numParams; - RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np-1].p; - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-2].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout); - int d, i; - unsigned coeff; - RF_RaidAddr_t sosAddr, suoffset; - RF_SectorCount_t len, secPerSU = layoutPtr->sectorsPerStripeUnit; - int two = 0; - RF_PhysDiskAddr_t *ppda,*ppda2,*qpda,*qpda2,*pda,npda; - char *buf; - int numDataCol = layoutPtr->numDataCol; - RF_Etimer_t timer; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - - RF_ETIMER_START(timer); - - if (asmap->failedPDAs[1] && - (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) - { - RF_ASSERT(0); - ppda = node->params[np-6].p; - ppda2 = node->params[np-5].p; - qpda = node->params[np-4].p; - qpda2 = node->params[np-3].p; - d = (np-6); - two = 1; - } - else - { - ppda = node->params[np-4].p; - qpda = node->params[np-3].p; - d = (np-4); - } - - for (i=0; i < d; i++) - { - pda = node->params[i].p; - buf = pda->bufPtr; - suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); - len = pda->numSector; - coeff = rf_RaidAddressToStripeUnitID(layoutPtr,pda->raidAddress); - /* compute the data unit offset within the column */ - coeff = (coeff % raidPtr->Layout.numDataCol); - /* see if pda intersects a recovery pda */ - applyPDA(raidPtr,pda,ppda,qpda,node->dagHdr->bp); - if (two) - applyPDA(raidPtr,pda,ppda,qpda,node->dagHdr->bp); - } - - /* ok, we got the parity back to the point where we can recover. - We now need to determine the coeff of the columns that need to be - recovered. We can also only need to recover a single stripe unit. - */ - - if (asmap->failedPDAs[1] == NULL) - { /* only a single stripe unit to recover. */ - pda = asmap->failedPDAs[0]; - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - /* need to determine the column of the other failed disk */ - coeff = rf_RaidAddressToStripeUnitID(layoutPtr,pda->raidAddress); - /* compute the data unit offset within the column */ - coeff = (coeff % raidPtr->Layout.numDataCol); - for (i=0; i < numDataCol; i++) - { - npda.raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector)(raidPtr,npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0); - /* skip over dead disks */ - if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status)) - if (i != coeff) break; + int np = node->numParams; + RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p; + RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p; + RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); + int d, i; + unsigned coeff; + RF_RaidAddr_t sosAddr, suoffset; + RF_SectorCount_t len, secPerSU = layoutPtr->sectorsPerStripeUnit; + int two = 0; + RF_PhysDiskAddr_t *ppda, *ppda2, *qpda, *qpda2, *pda, npda; + char *buf; + int numDataCol = layoutPtr->numDataCol; + RF_Etimer_t timer; + RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; + + RF_ETIMER_START(timer); + + if (asmap->failedPDAs[1] && + (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) { + RF_ASSERT(0); + ppda = node->params[np - 6].p; + ppda2 = node->params[np - 5].p; + qpda = node->params[np - 4].p; + qpda2 = node->params[np - 3].p; + d = (np - 6); + two = 1; + } else { + ppda = node->params[np - 4].p; + qpda = node->params[np - 3].p; + d = (np - 4); + } + + for (i = 0; i < d; i++) { + pda = node->params[i].p; + buf = pda->bufPtr; + suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); + len = pda->numSector; + coeff = rf_RaidAddressToStripeUnitID(layoutPtr, pda->raidAddress); + /* compute the data unit offset within the column */ + coeff = (coeff % raidPtr->Layout.numDataCol); + /* see if pda intersects a recovery pda */ + applyPDA(raidPtr, pda, ppda, qpda, node->dagHdr->bp); + if (two) + applyPDA(raidPtr, pda, ppda, qpda, node->dagHdr->bp); } - RF_ASSERT (i < numDataCol); - RF_ASSERT (two==0); - /* recover the data. Since we need only want to recover one column, we overwrite the - parity with the other one. */ - if (coeff < i) /* recovering 'a' */ - rf_PQ_recover((unsigned long *)ppda->bufPtr,(unsigned long *)qpda->bufPtr,(unsigned long *)pda->bufPtr,(unsigned long *)ppda->bufPtr,rf_RaidAddressToByte(raidPtr,pda->numSector), coeff, i); - else /* recovering 'b' */ - rf_PQ_recover((unsigned long *)ppda->bufPtr,(unsigned long *)qpda->bufPtr,(unsigned long *)ppda->bufPtr,(unsigned long *)pda->bufPtr,rf_RaidAddressToByte(raidPtr,pda->numSector), i, coeff); - } - else - RF_PANIC(); - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - if (tracerec) - tracerec->q_us += RF_ETIMER_VAL_US(timer); - rf_GenericWakeupFunc(node,0); - return(0); + + /* ok, we got the parity back to the point where we can recover. We + * now need to determine the coeff of the columns that need to be + * recovered. We can also only need to recover a single stripe unit. */ + + if (asmap->failedPDAs[1] == NULL) { /* only a single stripe unit + * to recover. */ + pda = asmap->failedPDAs[0]; + sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); + /* need to determine the column of the other failed disk */ + coeff = rf_RaidAddressToStripeUnitID(layoutPtr, pda->raidAddress); + /* compute the data unit offset within the column */ + coeff = (coeff % raidPtr->Layout.numDataCol); + for (i = 0; i < numDataCol; i++) { + npda.raidAddress = sosAddr + (i * secPerSU); + (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0); + /* skip over dead disks */ + if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status)) + if (i != coeff) + break; + } + RF_ASSERT(i < numDataCol); + RF_ASSERT(two == 0); + /* recover the data. Since we need only want to recover one + * column, we overwrite the parity with the other one. */ + if (coeff < i) /* recovering 'a' */ + rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) pda->bufPtr, (unsigned long *) ppda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff, i); + else /* recovering 'b' */ + rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, (unsigned long *) pda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), i, coeff); + } else + RF_PANIC(); + + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + if (tracerec) + tracerec->q_us += RF_ETIMER_VAL_US(timer); + rf_GenericWakeupFunc(node, 0); + return (0); } -int rf_PQWriteDoubleRecoveryFunc(node) - RF_DagNode_t *node; +int +rf_PQWriteDoubleRecoveryFunc(node) + RF_DagNode_t *node; { - /* The situation: - - We are doing a write that hits only one - failed data unit. - The other failed data unit is not being overwritten, so - we need to generate it. - - For the moment, we assume all the nonfailed data being - written is in the shadow of the failed data unit. - (i.e,, either a single data unit write or the entire - failed stripe unit is being overwritten. ) - - Recovery strategy: - apply the recovery data to the parity and q. - Use P & Q to recover the second failed data unit in P. - Zero fill Q, then apply the recovered data to p. - Then apply the data being written to the failed drive. - Then walk through the surviving drives, applying new data - when it exists, othewise the recovery data. Quite a mess. - - - The params - - read pda0, read pda1, ... read pda (numDataCol-3), - write pda0, ... , write pda (numStripeUnitAccess - numDataFailed), - failed pda, raidPtr, asmap - */ - - int np = node->numParams; - RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np-1].p; - RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-2].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout); - int i; - RF_RaidAddr_t sosAddr; - unsigned coeff; - RF_StripeCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; - RF_PhysDiskAddr_t *ppda,*qpda,*pda,npda; - int numDataCol = layoutPtr->numDataCol; - RF_Etimer_t timer; - RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - - RF_ASSERT(node->numResults == 2); - RF_ASSERT(asmap->failedPDAs[1] == NULL); - RF_ETIMER_START(timer); - ppda = node->results[0]; - qpda = node->results[1]; - /* apply the recovery data */ - for (i=0; i < numDataCol-2; i++) - applyPDA(raidPtr,node->params[i].p,ppda,qpda, node->dagHdr->bp); - - /* determine the other failed data unit */ - pda = asmap->failedPDAs[0]; - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - /* need to determine the column of the other failed disk */ - coeff = rf_RaidAddressToStripeUnitID(layoutPtr,pda->raidAddress); - /* compute the data unit offset within the column */ - coeff = (coeff % raidPtr->Layout.numDataCol); - for (i=0; i < numDataCol; i++) - { - npda.raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector)(raidPtr,npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0); - /* skip over dead disks */ - if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status)) - if (i != coeff) break; - } - RF_ASSERT (i < numDataCol); - /* recover the data. The column we want to recover we write over the parity. - The column we don't care about we dump in q. */ - if (coeff < i) /* recovering 'a' */ - rf_PQ_recover((unsigned long *)ppda->bufPtr,(unsigned long *)qpda->bufPtr,(unsigned long *)ppda->bufPtr,(unsigned long *)qpda->bufPtr,rf_RaidAddressToByte(raidPtr,pda->numSector), coeff, i); - else /* recovering 'b' */ - rf_PQ_recover((unsigned long *)ppda->bufPtr,(unsigned long *)qpda->bufPtr,(unsigned long *)qpda->bufPtr,(unsigned long *)ppda->bufPtr,rf_RaidAddressToByte(raidPtr,pda->numSector), i, coeff); - - /* OK. The valid data is in P. Zero fill Q, then inc it into it. */ - bzero(qpda->bufPtr,rf_RaidAddressToByte(raidPtr,qpda->numSector)); - rf_IncQ((unsigned long *)qpda->bufPtr,(unsigned long *)ppda->bufPtr,rf_RaidAddressToByte(raidPtr,qpda->numSector),i); - - /* now apply all the write data to the buffer */ - /* single stripe unit write case: the failed data is only thing we are writing. */ - RF_ASSERT(asmap->numStripeUnitsAccessed == 1); - /* dest, src, len, coeff */ - rf_IncQ((unsigned long *)qpda->bufPtr,(unsigned long *)asmap->failedPDAs[0]->bufPtr,rf_RaidAddressToByte(raidPtr,qpda->numSector),coeff); - rf_bxor(asmap->failedPDAs[0]->bufPtr,ppda->bufPtr,rf_RaidAddressToByte(raidPtr,ppda->numSector),node->dagHdr->bp); - - /* now apply all the recovery data */ - for (i=0; i < numDataCol-2; i++) - applyPDA(raidPtr,node->params[i].p,ppda,qpda, node->dagHdr->bp); - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - if (tracerec) - tracerec->q_us += RF_ETIMER_VAL_US(timer); - - rf_GenericWakeupFunc(node,0); - return(0); + /* The situation: + * + * We are doing a write that hits only one failed data unit. The other + * failed data unit is not being overwritten, so we need to generate + * it. + * + * For the moment, we assume all the nonfailed data being written is in + * the shadow of the failed data unit. (i.e,, either a single data + * unit write or the entire failed stripe unit is being overwritten. ) + * + * Recovery strategy: apply the recovery data to the parity and q. Use P + * & Q to recover the second failed data unit in P. Zero fill Q, then + * apply the recovered data to p. Then apply the data being written to + * the failed drive. Then walk through the surviving drives, applying + * new data when it exists, othewise the recovery data. Quite a mess. + * + * + * The params + * + * read pda0, read pda1, ... read pda (numDataCol-3), write pda0, ... , + * write pda (numStripeUnitAccess - numDataFailed), failed pda, + * raidPtr, asmap */ + + int np = node->numParams; + RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p; + RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p; + RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); + int i; + RF_RaidAddr_t sosAddr; + unsigned coeff; + RF_StripeCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; + RF_PhysDiskAddr_t *ppda, *qpda, *pda, npda; + int numDataCol = layoutPtr->numDataCol; + RF_Etimer_t timer; + RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; + + RF_ASSERT(node->numResults == 2); + RF_ASSERT(asmap->failedPDAs[1] == NULL); + RF_ETIMER_START(timer); + ppda = node->results[0]; + qpda = node->results[1]; + /* apply the recovery data */ + for (i = 0; i < numDataCol - 2; i++) + applyPDA(raidPtr, node->params[i].p, ppda, qpda, node->dagHdr->bp); + + /* determine the other failed data unit */ + pda = asmap->failedPDAs[0]; + sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); + /* need to determine the column of the other failed disk */ + coeff = rf_RaidAddressToStripeUnitID(layoutPtr, pda->raidAddress); + /* compute the data unit offset within the column */ + coeff = (coeff % raidPtr->Layout.numDataCol); + for (i = 0; i < numDataCol; i++) { + npda.raidAddress = sosAddr + (i * secPerSU); + (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0); + /* skip over dead disks */ + if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status)) + if (i != coeff) + break; + } + RF_ASSERT(i < numDataCol); + /* recover the data. The column we want to recover we write over the + * parity. The column we don't care about we dump in q. */ + if (coeff < i) /* recovering 'a' */ + rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff, i); + else /* recovering 'b' */ + rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), i, coeff); + + /* OK. The valid data is in P. Zero fill Q, then inc it into it. */ + bzero(qpda->bufPtr, rf_RaidAddressToByte(raidPtr, qpda->numSector)); + rf_IncQ((unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, rf_RaidAddressToByte(raidPtr, qpda->numSector), i); + + /* now apply all the write data to the buffer */ + /* single stripe unit write case: the failed data is only thing we are + * writing. */ + RF_ASSERT(asmap->numStripeUnitsAccessed == 1); + /* dest, src, len, coeff */ + rf_IncQ((unsigned long *) qpda->bufPtr, (unsigned long *) asmap->failedPDAs[0]->bufPtr, rf_RaidAddressToByte(raidPtr, qpda->numSector), coeff); + rf_bxor(asmap->failedPDAs[0]->bufPtr, ppda->bufPtr, rf_RaidAddressToByte(raidPtr, ppda->numSector), node->dagHdr->bp); + + /* now apply all the recovery data */ + for (i = 0; i < numDataCol - 2; i++) + applyPDA(raidPtr, node->params[i].p, ppda, qpda, node->dagHdr->bp); + + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + if (tracerec) + tracerec->q_us += RF_ETIMER_VAL_US(timer); + + rf_GenericWakeupFunc(node, 0); + return (0); } - RF_CREATE_DAG_FUNC_DECL(rf_PQ_DDLargeWrite) { - RF_PANIC(); + RF_PANIC(); } - /* Two lost data unit write case. There are really two cases here: - (1) The write completely covers the two lost data units. + (1) The write completely covers the two lost data units. In that case, a reconstruct write that doesn't write the failed data units will do the correct thing. So in this case, the dag looks like @@ -508,10 +388,10 @@ RF_CREATE_DAG_FUNC_DECL(rf_PQ_DDLargeWrite) (2) The write does not completely cover both failed data units - (but touches at least one of them). Then we need to do the + (but touches at least one of them). Then we need to do the equivalent of a reconstruct read to recover the missing data - unit from the other stripe. - + unit from the other stripe. + For any data we are writing that is not in the "shadow" of the failed units, we need to do a four cycle update. PANIC on this case. for now @@ -520,35 +400,30 @@ RF_CREATE_DAG_FUNC_DECL(rf_PQ_DDLargeWrite) RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateWriteDAG) { - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_SectorCount_t sectorsPerSU = layoutPtr->sectorsPerStripeUnit; - int sum; - int nf = asmap->numDataFailed; - - sum = asmap->failedPDAs[0]->numSector; - if (nf == 2) - sum += asmap->failedPDAs[1]->numSector; - - if ((nf == 2) && ( sum == (2*sectorsPerSU))) - { - /* large write case */ - rf_PQ_DDLargeWrite(raidPtr, asmap, dag_h, bp, flags, allocList); - return; - } - - - if ((nf == asmap->numStripeUnitsAccessed) || (sum >= sectorsPerSU)) - { - /* small write case, no user data not in shadow */ - rf_PQ_DDSimpleSmallWrite(raidPtr, asmap, dag_h, bp, flags, allocList); - return; - } - RF_PANIC(); + RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); + RF_SectorCount_t sectorsPerSU = layoutPtr->sectorsPerStripeUnit; + int sum; + int nf = asmap->numDataFailed; + + sum = asmap->failedPDAs[0]->numSector; + if (nf == 2) + sum += asmap->failedPDAs[1]->numSector; + + if ((nf == 2) && (sum == (2 * sectorsPerSU))) { + /* large write case */ + rf_PQ_DDLargeWrite(raidPtr, asmap, dag_h, bp, flags, allocList); + return; + } + if ((nf == asmap->numStripeUnitsAccessed) || (sum >= sectorsPerSU)) { + /* small write case, no user data not in shadow */ + rf_PQ_DDSimpleSmallWrite(raidPtr, asmap, dag_h, bp, flags, allocList); + return; + } + RF_PANIC(); } - RF_CREATE_DAG_FUNC_DECL(rf_PQ_DDSimpleSmallWrite) { - rf_DoubleDegSmallWrite(raidPtr, asmap, dag_h, bp, flags, allocList, "Rq", "Wq", "PQ Recovery", rf_PQWriteDoubleRecoveryFunc); + rf_DoubleDegSmallWrite(raidPtr, asmap, dag_h, bp, flags, allocList, "Rq", "Wq", "PQ Recovery", rf_PQWriteDoubleRecoveryFunc); } - -#endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */ +#endif /* (RF_INCLUDE_DECL_PQ > 0) || + * (RF_INCLUDE_RAID6 > 0) */ diff --git a/sys/dev/raidframe/rf_pqdegdags.h b/sys/dev/raidframe/rf_pqdegdags.h index e860ffe0183..8fc1f3dbf97 100644 --- a/sys/dev/raidframe/rf_pqdegdags.h +++ b/sys/dev/raidframe/rf_pqdegdags.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_pqdegdags.h,v 1.1 1999/01/11 14:29:40 niklas Exp $ */ -/* $NetBSD: rf_pqdegdags.h,v 1.1 1998/11/13 04:20:32 oster Exp $ */ +/* $OpenBSD: rf_pqdegdags.h,v 1.2 1999/02/16 00:03:12 niklas Exp $ */ +/* $NetBSD: rf_pqdegdags.h,v 1.3 1999/02/05 00:06:15 oster Exp $ */ /* * rf_pqdegdags.h */ @@ -31,35 +31,7 @@ */ /* * rf_pqdegdags.c - * Degraded mode dags for double fault cases. - */ -/* - * : - * Log: rf_pqdegdags.h,v - * Revision 1.6 1996/07/31 15:35:20 jimz - * evenodd changes; bugfixes for double-degraded archs, generalize - * some formerly PQ-only functions - * - * Revision 1.5 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.4 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.3 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.2 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.1 1996/05/18 19:56:30 jimz - * Initial revision - * + * Degraded mode dags for double fault cases. */ #ifndef _RF__RF_PQDEGDAGS_H_ @@ -68,10 +40,10 @@ #include "rf_dag.h" RF_CREATE_DAG_FUNC_DECL(rf_PQ_DoubleDegRead); -int rf_PQDoubleRecoveryFunc(RF_DagNode_t *node); -int rf_PQWriteDoubleRecoveryFunc(RF_DagNode_t *node); +int rf_PQDoubleRecoveryFunc(RF_DagNode_t * node); +int rf_PQWriteDoubleRecoveryFunc(RF_DagNode_t * node); RF_CREATE_DAG_FUNC_DECL(rf_PQ_DDLargeWrite); RF_CREATE_DAG_FUNC_DECL(rf_PQ_DDSimpleSmallWrite); RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateWriteDAG); -#endif /* !_RF__RF_PQDEGDAGS_H_ */ +#endif /* !_RF__RF_PQDEGDAGS_H_ */ diff --git a/sys/dev/raidframe/rf_psstatus.c b/sys/dev/raidframe/rf_psstatus.c index a1396d150bd..d8874e74146 100644 --- a/sys/dev/raidframe/rf_psstatus.c +++ b/sys/dev/raidframe/rf_psstatus.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_psstatus.c,v 1.1 1999/01/11 14:29:40 niklas Exp $ */ -/* $NetBSD: rf_psstatus.c,v 1.1 1998/11/13 04:20:32 oster Exp $ */ +/* $OpenBSD: rf_psstatus.c,v 1.2 1999/02/16 00:03:12 niklas Exp $ */ +/* $NetBSD: rf_psstatus.c,v 1.3 1999/02/05 00:06:15 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -37,79 +37,6 @@ * *****************************************************************************/ -/* : - * Log: rf_psstatus.c,v - * Revision 1.29 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.28 1996/07/15 17:22:18 jimz - * nit-pick code cleanup - * resolve stdlib problems on DEC OSF - * - * Revision 1.27 1996/07/13 00:00:59 jimz - * sanitized generalized reconstruction architecture - * cleaned up head sep, rbuf problems - * - * Revision 1.26 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.25 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.24 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.23 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.22 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.21 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.20 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.19 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.18 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.17 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.16 1996/05/20 16:15:27 jimz - * switch to rf_{mutex,cond}_{init,destroy} - * - * Revision 1.15 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.14 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.13 1995/11/30 16:17:18 wvcii - * added copyright info - * - */ - #include "rf_types.h" #include "rf_raid.h" #include "rf_threadid.h" @@ -124,109 +51,115 @@ #define Dprintf2(s,a,b) if (rf_pssDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL) #define Dprintf3(s,a,b,c) if (rf_pssDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL) -static void RealPrintPSStatusTable(RF_Raid_t *raidPtr, - RF_PSStatusHeader_t *pssTable); +static void +RealPrintPSStatusTable(RF_Raid_t * raidPtr, + RF_PSStatusHeader_t * pssTable); #define RF_MAX_FREE_PSS 32 #define RF_PSS_INC 8 #define RF_PSS_INITIAL 4 -static int init_pss( RF_ReconParityStripeStatus_t *, RF_Raid_t *); +static int init_pss(RF_ReconParityStripeStatus_t *, RF_Raid_t *); static void clean_pss(RF_ReconParityStripeStatus_t *, RF_Raid_t *); static void rf_ShutdownPSStatus(void *); -static int init_pss(p, raidPtr) - RF_ReconParityStripeStatus_t *p; - RF_Raid_t *raidPtr; +static int +init_pss(p, raidPtr) + RF_ReconParityStripeStatus_t *p; + RF_Raid_t *raidPtr; { - RF_Calloc(p->issued, raidPtr->numCol, sizeof(char), (char *)); - if (p->issued == NULL) - return(ENOMEM); - return(0); + RF_Calloc(p->issued, raidPtr->numCol, sizeof(char), (char *)); + if (p->issued == NULL) + return (ENOMEM); + return (0); } -static void clean_pss(p, raidPtr) - RF_ReconParityStripeStatus_t *p; - RF_Raid_t *raidPtr; +static void +clean_pss(p, raidPtr) + RF_ReconParityStripeStatus_t *p; + RF_Raid_t *raidPtr; { - RF_Free(p->issued, raidPtr->numCol*sizeof(char)); + RF_Free(p->issued, raidPtr->numCol * sizeof(char)); } -static void rf_ShutdownPSStatus(arg) - void *arg; +static void +rf_ShutdownPSStatus(arg) + void *arg; { - RF_Raid_t *raidPtr = (RF_Raid_t *)arg; + RF_Raid_t *raidPtr = (RF_Raid_t *) arg; - RF_FREELIST_DESTROY_CLEAN_ARG(raidPtr->pss_freelist,next,(RF_ReconParityStripeStatus_t *),clean_pss,raidPtr); + RF_FREELIST_DESTROY_CLEAN_ARG(raidPtr->pss_freelist, next, (RF_ReconParityStripeStatus_t *), clean_pss, raidPtr); } -int rf_ConfigurePSStatus( - RF_ShutdownList_t **listp, - RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr) +int +rf_ConfigurePSStatus( + RF_ShutdownList_t ** listp, + RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr) { - int rc; - - raidPtr->pssTableSize = RF_PSS_DEFAULT_TABLESIZE; - RF_FREELIST_CREATE(raidPtr->pss_freelist, RF_MAX_FREE_PSS, - RF_PSS_INC, sizeof(RF_ReconParityStripeStatus_t)); - if (raidPtr->pss_freelist == NULL) - return(ENOMEM); - rc = rf_ShutdownCreate(listp, rf_ShutdownPSStatus, raidPtr); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - rf_ShutdownPSStatus(raidPtr); - return(rc); - } - RF_FREELIST_PRIME_INIT_ARG(raidPtr->pss_freelist, RF_PSS_INITIAL,next, - (RF_ReconParityStripeStatus_t *),init_pss,raidPtr); - return(0); + int rc; + + raidPtr->pssTableSize = RF_PSS_DEFAULT_TABLESIZE; + RF_FREELIST_CREATE(raidPtr->pss_freelist, RF_MAX_FREE_PSS, + RF_PSS_INC, sizeof(RF_ReconParityStripeStatus_t)); + if (raidPtr->pss_freelist == NULL) + return (ENOMEM); + rc = rf_ShutdownCreate(listp, rf_ShutdownPSStatus, raidPtr); + if (rc) { + RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", + __FILE__, __LINE__, rc); + rf_ShutdownPSStatus(raidPtr); + return (rc); + } + RF_FREELIST_PRIME_INIT_ARG(raidPtr->pss_freelist, RF_PSS_INITIAL, next, + (RF_ReconParityStripeStatus_t *), init_pss, raidPtr); + return (0); } - /***************************************************************************************** * sets up the pss table * We pre-allocate a bunch of entries to avoid as much as possible having to * malloc up hash chain entries. ****************************************************************************************/ -RF_PSStatusHeader_t *rf_MakeParityStripeStatusTable(raidPtr) - RF_Raid_t *raidPtr; +RF_PSStatusHeader_t * +rf_MakeParityStripeStatusTable(raidPtr) + RF_Raid_t *raidPtr; { - RF_PSStatusHeader_t *pssTable; - int i, j, rc; - - RF_Calloc(pssTable, raidPtr->pssTableSize, sizeof(RF_PSStatusHeader_t), (RF_PSStatusHeader_t *)); - for (i=0; i<raidPtr->pssTableSize; i++) { - rc = rf_mutex_init(&pssTable[i].mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - /* fail and deallocate */ - for(j=0;j<i;j++) { - rf_mutex_destroy(&pssTable[i].mutex); - } - RF_Free(pssTable, raidPtr->pssTableSize*sizeof(RF_PSStatusHeader_t)); - return(NULL); - } - } - return(pssTable); + RF_PSStatusHeader_t *pssTable; + int i, j, rc; + + RF_Calloc(pssTable, raidPtr->pssTableSize, sizeof(RF_PSStatusHeader_t), (RF_PSStatusHeader_t *)); + for (i = 0; i < raidPtr->pssTableSize; i++) { + rc = rf_mutex_init(&pssTable[i].mutex); + if (rc) { + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + /* fail and deallocate */ + for (j = 0; j < i; j++) { + rf_mutex_destroy(&pssTable[i].mutex); + } + RF_Free(pssTable, raidPtr->pssTableSize * sizeof(RF_PSStatusHeader_t)); + return (NULL); + } + } + return (pssTable); } -void rf_FreeParityStripeStatusTable(raidPtr, pssTable) - RF_Raid_t *raidPtr; - RF_PSStatusHeader_t *pssTable; +void +rf_FreeParityStripeStatusTable(raidPtr, pssTable) + RF_Raid_t *raidPtr; + RF_PSStatusHeader_t *pssTable; { - int i; - - if (rf_pssDebug) - RealPrintPSStatusTable(raidPtr, pssTable); - for (i=0; i<raidPtr->pssTableSize; i++) { - if (pssTable[i].chain) { - printf("ERROR: pss hash chain not null at recon shutdown\n"); - } - rf_mutex_destroy(&pssTable[i].mutex); - } - RF_Free(pssTable, raidPtr->pssTableSize * sizeof(RF_PSStatusHeader_t)); + int i; + + if (rf_pssDebug) + RealPrintPSStatusTable(raidPtr, pssTable); + for (i = 0; i < raidPtr->pssTableSize; i++) { + if (pssTable[i].chain) { + printf("ERROR: pss hash chain not null at recon shutdown\n"); + } + rf_mutex_destroy(&pssTable[i].mutex); + } + RF_Free(pssTable, raidPtr->pssTableSize * sizeof(RF_PSStatusHeader_t)); } @@ -236,182 +169,211 @@ void rf_FreeParityStripeStatusTable(raidPtr, pssTable) * * ASSUMES THE PSS DESCRIPTOR IS LOCKED UPON ENTRY */ -RF_ReconParityStripeStatus_t *rf_LookupRUStatus( - RF_Raid_t *raidPtr, - RF_PSStatusHeader_t *pssTable, - RF_StripeNum_t psID, - RF_ReconUnitNum_t which_ru, - RF_PSSFlags_t flags, /* whether or not to create it if it doesn't exist + what flags to set initially */ - int *created) +RF_ReconParityStripeStatus_t * +rf_LookupRUStatus( + RF_Raid_t * raidPtr, + RF_PSStatusHeader_t * pssTable, + RF_StripeNum_t psID, + RF_ReconUnitNum_t which_ru, + RF_PSSFlags_t flags, /* whether or not to create it if it doesn't + * exist + what flags to set initially */ + int *created) { - RF_PSStatusHeader_t *hdr = &pssTable[ RF_HASH_PSID(raidPtr,psID) ]; - RF_ReconParityStripeStatus_t *p, *pssPtr = hdr->chain; - - *created = 0; - for (p = pssPtr; p; p=p->next) { - if (p->parityStripeID == psID && p->which_ru == which_ru) - break; - } - - if (!p && (flags&RF_PSS_CREATE)) { - Dprintf2("PSS: creating pss for psid %ld ru %d\n",psID,which_ru); - p = rf_AllocPSStatus(raidPtr); - p->next = hdr->chain; hdr->chain = p; - - p->parityStripeID = psID; - p->which_ru = which_ru; - p->flags = flags; - p->rbuf = NULL; - p->writeRbuf = NULL; - p->blockCount = 0; - p->procWaitList = NULL; - p->blockWaitList = NULL; - p->bufWaitList = NULL; - *created = 1; - } else if (p) { /* we didn't create, but we want to specify some new status */ - p->flags |= flags; /* add in whatever flags we're specifying */ - } - if (p && (flags & RF_PSS_RECON_BLOCKED)) { - int tid; - rf_get_threadid(tid); - p->blockCount++; /* if we're asking to block recon, bump the count */ - Dprintf3("[%d] Blocked recon on psid %ld. count now %d\n",tid,psID,p->blockCount); - } - return(p); + RF_PSStatusHeader_t *hdr = &pssTable[RF_HASH_PSID(raidPtr, psID)]; + RF_ReconParityStripeStatus_t *p, *pssPtr = hdr->chain; + + *created = 0; + for (p = pssPtr; p; p = p->next) { + if (p->parityStripeID == psID && p->which_ru == which_ru) + break; + } + + if (!p && (flags & RF_PSS_CREATE)) { + Dprintf2("PSS: creating pss for psid %ld ru %d\n", psID, which_ru); + p = rf_AllocPSStatus(raidPtr); + p->next = hdr->chain; + hdr->chain = p; + + p->parityStripeID = psID; + p->which_ru = which_ru; + p->flags = flags; + p->rbuf = NULL; + p->writeRbuf = NULL; + p->blockCount = 0; + p->procWaitList = NULL; + p->blockWaitList = NULL; + p->bufWaitList = NULL; + *created = 1; + } else + if (p) { /* we didn't create, but we want to specify + * some new status */ + p->flags |= flags; /* add in whatever flags we're + * specifying */ + } + if (p && (flags & RF_PSS_RECON_BLOCKED)) { + int tid; + rf_get_threadid(tid); + p->blockCount++;/* if we're asking to block recon, bump the + * count */ + Dprintf3("[%d] Blocked recon on psid %ld. count now %d\n", tid, psID, p->blockCount); + } + return (p); } - /* deletes an entry from the parity stripe status table. typically used * when an entry has been allocated solely to block reconstruction, and * no recon was requested while recon was blocked. Assumes the hash * chain is ALREADY LOCKED. */ -void rf_PSStatusDelete(raidPtr, pssTable, pssPtr) - RF_Raid_t *raidPtr; - RF_PSStatusHeader_t *pssTable; - RF_ReconParityStripeStatus_t *pssPtr; +void +rf_PSStatusDelete(raidPtr, pssTable, pssPtr) + RF_Raid_t *raidPtr; + RF_PSStatusHeader_t *pssTable; + RF_ReconParityStripeStatus_t *pssPtr; { - RF_PSStatusHeader_t *hdr = &(pssTable[ RF_HASH_PSID(raidPtr,pssPtr->parityStripeID) ] ); - RF_ReconParityStripeStatus_t *p = hdr->chain, *pt = NULL; - - while (p) { - if (p == pssPtr) { - if (pt) pt->next = p->next; else hdr->chain = p->next; - p->next = NULL; - rf_FreePSStatus(raidPtr, p); - return; - } - pt = p; p=p->next; - } - RF_ASSERT(0); /* we must find it here */ + RF_PSStatusHeader_t *hdr = &(pssTable[RF_HASH_PSID(raidPtr, pssPtr->parityStripeID)]); + RF_ReconParityStripeStatus_t *p = hdr->chain, *pt = NULL; + + while (p) { + if (p == pssPtr) { + if (pt) + pt->next = p->next; + else + hdr->chain = p->next; + p->next = NULL; + rf_FreePSStatus(raidPtr, p); + return; + } + pt = p; + p = p->next; + } + RF_ASSERT(0); /* we must find it here */ } - /* deletes an entry from the ps status table after reconstruction has completed */ -void rf_RemoveFromActiveReconTable(raidPtr, row, psid, which_ru) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_ReconUnitNum_t which_ru; - RF_StripeNum_t psid; +void +rf_RemoveFromActiveReconTable(raidPtr, row, psid, which_ru) + RF_Raid_t *raidPtr; + RF_RowCol_t row; + RF_ReconUnitNum_t which_ru; + RF_StripeNum_t psid; { - RF_PSStatusHeader_t *hdr = &(raidPtr->reconControl[row]->pssTable[ RF_HASH_PSID(raidPtr,psid) ]); - RF_ReconParityStripeStatus_t *p, *pt; - RF_CallbackDesc_t *cb, *cb1; - - RF_LOCK_MUTEX( hdr->mutex ); - for (pt=NULL, p = hdr->chain; p; pt=p,p=p->next) { - if ((p->parityStripeID == psid) && (p->which_ru == which_ru)) - break; - } - if (p == NULL) { - rf_PrintPSStatusTable(raidPtr, row); - } - RF_ASSERT(p); /* it must be there */ - - Dprintf2("PSS: deleting pss for psid %ld ru %d\n",psid,which_ru); - - /* delete this entry from the hash chain */ - if (pt) pt->next = p->next; - else hdr->chain = p->next; - p->next = NULL; - - RF_UNLOCK_MUTEX( hdr->mutex ); - - /* wakup anyone waiting on the parity stripe ID */ - cb = p->procWaitList; - p->procWaitList = NULL; - while (cb) { - Dprintf1("Waking up access waiting on parity stripe ID %ld\n",p->parityStripeID); - cb1 = cb->next; - (cb->callbackFunc)(cb->callbackArg); - - /* THIS IS WHAT THE ORIGINAL CODE HAD... the extra 0 is bogus, IMHO */ - /* (cb->callbackFunc)(cb->callbackArg, 0); */ - rf_FreeCallbackDesc(cb); - cb = cb1; - } - - rf_FreePSStatus(raidPtr, p); + RF_PSStatusHeader_t *hdr = &(raidPtr->reconControl[row]->pssTable[RF_HASH_PSID(raidPtr, psid)]); + RF_ReconParityStripeStatus_t *p, *pt; + RF_CallbackDesc_t *cb, *cb1; + + RF_LOCK_MUTEX(hdr->mutex); + for (pt = NULL, p = hdr->chain; p; pt = p, p = p->next) { + if ((p->parityStripeID == psid) && (p->which_ru == which_ru)) + break; + } + if (p == NULL) { + rf_PrintPSStatusTable(raidPtr, row); + } + RF_ASSERT(p); /* it must be there */ + + Dprintf2("PSS: deleting pss for psid %ld ru %d\n", psid, which_ru); + + /* delete this entry from the hash chain */ + if (pt) + pt->next = p->next; + else + hdr->chain = p->next; + p->next = NULL; + + RF_UNLOCK_MUTEX(hdr->mutex); + + /* wakup anyone waiting on the parity stripe ID */ + cb = p->procWaitList; + p->procWaitList = NULL; + while (cb) { + Dprintf1("Waking up access waiting on parity stripe ID %ld\n", p->parityStripeID); + cb1 = cb->next; + (cb->callbackFunc) (cb->callbackArg); + + /* THIS IS WHAT THE ORIGINAL CODE HAD... the extra 0 is bogus, + * IMHO */ + /* (cb->callbackFunc)(cb->callbackArg, 0); */ + rf_FreeCallbackDesc(cb); + cb = cb1; + } + + rf_FreePSStatus(raidPtr, p); } -RF_ReconParityStripeStatus_t *rf_AllocPSStatus(raidPtr) - RF_Raid_t *raidPtr; +RF_ReconParityStripeStatus_t * +rf_AllocPSStatus(raidPtr) + RF_Raid_t *raidPtr; { - RF_ReconParityStripeStatus_t *p; - - RF_FREELIST_GET_INIT_ARG(raidPtr->pss_freelist,p,next,(RF_ReconParityStripeStatus_t *),init_pss,raidPtr); - if (p) { - bzero(p->issued, raidPtr->numCol); - } - p->next = NULL; - /* no need to initialize here b/c the only place we're called from is the above Lookup */ - return(p); + RF_ReconParityStripeStatus_t *p; + + RF_FREELIST_GET_INIT_ARG(raidPtr->pss_freelist, p, next, (RF_ReconParityStripeStatus_t *), init_pss, raidPtr); + if (p) { + bzero(p->issued, raidPtr->numCol); + } + p->next = NULL; + /* no need to initialize here b/c the only place we're called from is + * the above Lookup */ + return (p); } -void rf_FreePSStatus(raidPtr, p) - RF_Raid_t *raidPtr; - RF_ReconParityStripeStatus_t *p; +void +rf_FreePSStatus(raidPtr, p) + RF_Raid_t *raidPtr; + RF_ReconParityStripeStatus_t *p; { - RF_ASSERT(p->procWaitList == NULL); - RF_ASSERT(p->blockWaitList == NULL); - RF_ASSERT(p->bufWaitList == NULL); + RF_ASSERT(p->procWaitList == NULL); + RF_ASSERT(p->blockWaitList == NULL); + RF_ASSERT(p->bufWaitList == NULL); - RF_FREELIST_FREE_CLEAN_ARG(raidPtr->pss_freelist,p,next,clean_pss,raidPtr); + RF_FREELIST_FREE_CLEAN_ARG(raidPtr->pss_freelist, p, next, clean_pss, raidPtr); } -static void RealPrintPSStatusTable(raidPtr, pssTable) - RF_Raid_t *raidPtr; - RF_PSStatusHeader_t *pssTable; +static void +RealPrintPSStatusTable(raidPtr, pssTable) + RF_Raid_t *raidPtr; + RF_PSStatusHeader_t *pssTable; { - int i, j, procsWaiting, blocksWaiting, bufsWaiting; - RF_ReconParityStripeStatus_t *p; - RF_CallbackDesc_t *cb; - - printf("\nParity Stripe Status Table\n"); - for (i=0; i< raidPtr->pssTableSize; i++) { - for (p = pssTable[i].chain; p; p=p->next) { - procsWaiting = blocksWaiting = bufsWaiting = 0; - for (cb = p->procWaitList; cb; cb=cb->next) procsWaiting++; - for (cb = p->blockWaitList; cb; cb=cb->next) blocksWaiting++; - for (cb = p->bufWaitList; cb; cb=cb->next) bufsWaiting++; - printf("PSID %ld RU %d : blockCount %d %d/%d/%d proc/block/buf waiting, issued ", - (long)p->parityStripeID, p->which_ru, p->blockCount, procsWaiting, blocksWaiting, bufsWaiting); - for (j=0;j<raidPtr->numCol; j++) printf("%c", (p->issued[j]) ? '1' : '0'); - if (!p->flags) printf(" flags: (none)"); - else { - if (p->flags & RF_PSS_UNDER_RECON) printf(" under-recon"); - if (p->flags & RF_PSS_FORCED_ON_WRITE) printf(" forced-w"); - if (p->flags & RF_PSS_FORCED_ON_READ) printf(" forced-r"); - if (p->flags & RF_PSS_RECON_BLOCKED) printf(" blocked"); - if (p->flags & RF_PSS_BUFFERWAIT) printf(" bufwait"); - } - printf("\n"); - } - } + int i, j, procsWaiting, blocksWaiting, bufsWaiting; + RF_ReconParityStripeStatus_t *p; + RF_CallbackDesc_t *cb; + + printf("\nParity Stripe Status Table\n"); + for (i = 0; i < raidPtr->pssTableSize; i++) { + for (p = pssTable[i].chain; p; p = p->next) { + procsWaiting = blocksWaiting = bufsWaiting = 0; + for (cb = p->procWaitList; cb; cb = cb->next) + procsWaiting++; + for (cb = p->blockWaitList; cb; cb = cb->next) + blocksWaiting++; + for (cb = p->bufWaitList; cb; cb = cb->next) + bufsWaiting++; + printf("PSID %ld RU %d : blockCount %d %d/%d/%d proc/block/buf waiting, issued ", + (long) p->parityStripeID, p->which_ru, p->blockCount, procsWaiting, blocksWaiting, bufsWaiting); + for (j = 0; j < raidPtr->numCol; j++) + printf("%c", (p->issued[j]) ? '1' : '0'); + if (!p->flags) + printf(" flags: (none)"); + else { + if (p->flags & RF_PSS_UNDER_RECON) + printf(" under-recon"); + if (p->flags & RF_PSS_FORCED_ON_WRITE) + printf(" forced-w"); + if (p->flags & RF_PSS_FORCED_ON_READ) + printf(" forced-r"); + if (p->flags & RF_PSS_RECON_BLOCKED) + printf(" blocked"); + if (p->flags & RF_PSS_BUFFERWAIT) + printf(" bufwait"); + } + printf("\n"); + } + } } -void rf_PrintPSStatusTable(raidPtr, row) - RF_Raid_t *raidPtr; - RF_RowCol_t row; +void +rf_PrintPSStatusTable(raidPtr, row) + RF_Raid_t *raidPtr; + RF_RowCol_t row; { - RF_PSStatusHeader_t *pssTable = raidPtr->reconControl[row]->pssTable; - RealPrintPSStatusTable(raidPtr, pssTable); + RF_PSStatusHeader_t *pssTable = raidPtr->reconControl[row]->pssTable; + RealPrintPSStatusTable(raidPtr, pssTable); } diff --git a/sys/dev/raidframe/rf_psstatus.h b/sys/dev/raidframe/rf_psstatus.h index eaca5822094..76fbb6999a0 100644 --- a/sys/dev/raidframe/rf_psstatus.h +++ b/sys/dev/raidframe/rf_psstatus.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_psstatus.h,v 1.1 1999/01/11 14:29:41 niklas Exp $ */ -/* $NetBSD: rf_psstatus.h,v 1.1 1998/11/13 04:20:32 oster Exp $ */ +/* $OpenBSD: rf_psstatus.h,v 1.2 1999/02/16 00:03:13 niklas Exp $ */ +/* $NetBSD: rf_psstatus.h,v 1.3 1999/02/05 00:06:15 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -37,52 +37,6 @@ * *****************************************************************************/ -/* : - * Log: rf_psstatus.h,v - * Revision 1.16 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.15 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.14 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.13 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.12 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.11 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.10 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.9 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.8 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.7 1995/11/30 16:17:28 wvcii - * added copyright info - * - */ - #ifndef _RF__RF_PSSTATUS_H_ #define _RF__RF_PSSTATUS_H_ @@ -90,7 +44,8 @@ #include "rf_threadstuff.h" #include "rf_callback.h" -#define RF_PS_MAX_BUFS 10 /* max number of bufs we'll accumulate before we do an XOR */ +#define RF_PS_MAX_BUFS 10 /* max number of bufs we'll accumulate before + * we do an XOR */ #define RF_PSS_DEFAULT_TABLESIZE 200 @@ -98,57 +53,80 @@ * Macros to acquire/release the mutex lock on a parity stripe status * descriptor. Note that we use just one lock for the whole hash chain. */ -#define RF_HASH_PSID(_raid_,_psid_) ( (_psid_) % ((_raid_)->pssTableSize) ) /* simple hash function */ +#define RF_HASH_PSID(_raid_,_psid_) ( (_psid_) % ((_raid_)->pssTableSize) ) /* simple hash function */ #define RF_LOCK_PSS_MUTEX(_raidPtr, _row, _psid) \ RF_LOCK_MUTEX((_raidPtr)->reconControl[_row]->pssTable[ RF_HASH_PSID(_raidPtr,_psid) ].mutex) #define RF_UNLOCK_PSS_MUTEX(_raidPtr, _row, _psid) \ RF_UNLOCK_MUTEX((_raidPtr)->reconControl[_row]->pssTable[ RF_HASH_PSID(_raidPtr,_psid) ].mutex) struct RF_ReconParityStripeStatus_s { - RF_StripeNum_t parityStripeID; /* the parity stripe ID */ - RF_ReconUnitNum_t which_ru; /* which reconstruction unit with the indicated parity stripe */ - RF_PSSFlags_t flags; /* flags indicating various conditions */ - void *rbuf; /* this is the accumulating xor sum */ - void *writeRbuf; /* DEBUG ONLY: a pointer to the rbuf after it has filled & been sent to disk */ - void *rbufsForXor[RF_PS_MAX_BUFS]; /* these are buffers still to be xored into the accumulating sum */ - int xorBufCount; /* num buffers waiting to be xored */ - int blockCount; /* count of # proc that have blocked recon on this parity stripe */ - char *issued; /* issued[i]==1 <=> column i has already issued a read request for the indicated RU */ - RF_CallbackDesc_t *procWaitList; /* list of user procs waiting for recon to be done */ - RF_CallbackDesc_t *blockWaitList;/* list of disks blocked waiting for user write to complete */ - RF_CallbackDesc_t *bufWaitList; /* list of disks blocked waiting to acquire a buffer for this RU */ - RF_ReconParityStripeStatus_t *next; + RF_StripeNum_t parityStripeID; /* the parity stripe ID */ + RF_ReconUnitNum_t which_ru; /* which reconstruction unit with the + * indicated parity stripe */ + RF_PSSFlags_t flags; /* flags indicating various conditions */ + void *rbuf; /* this is the accumulating xor sum */ + void *writeRbuf; /* DEBUG ONLY: a pointer to the rbuf after it + * has filled & been sent to disk */ + void *rbufsForXor[RF_PS_MAX_BUFS]; /* these are buffers still to + * be xored into the + * accumulating sum */ + int xorBufCount; /* num buffers waiting to be xored */ + int blockCount; /* count of # proc that have blocked recon on + * this parity stripe */ + char *issued; /* issued[i]==1 <=> column i has already + * issued a read request for the indicated RU */ + RF_CallbackDesc_t *procWaitList; /* list of user procs waiting + * for recon to be done */ + RF_CallbackDesc_t *blockWaitList; /* list of disks blocked + * waiting for user write to + * complete */ + RF_CallbackDesc_t *bufWaitList; /* list of disks blocked waiting to + * acquire a buffer for this RU */ + RF_ReconParityStripeStatus_t *next; }; struct RF_PSStatusHeader_s { - RF_DECLARE_MUTEX(mutex) /* mutex for this hash chain */ - RF_ReconParityStripeStatus_t *chain; /* the hash chain */ + RF_DECLARE_MUTEX(mutex) /* mutex for this hash chain */ + RF_ReconParityStripeStatus_t *chain; /* the hash chain */ }; - /* masks for the "flags" field above */ -#define RF_PSS_NONE 0x00000000 /* no flags */ -#define RF_PSS_UNDER_RECON 0x00000001 /* this parity stripe is currently under reconstruction */ -#define RF_PSS_FORCED_ON_WRITE 0x00000002 /* indicates a recon was forced due to a user-write operation */ -#define RF_PSS_FORCED_ON_READ 0x00000004 /* ditto for read, but not currently implemented */ -#define RF_PSS_RECON_BLOCKED 0x00000008 /* reconstruction is currently blocked due to a pending user I/O */ -#define RF_PSS_CREATE 0x00000010 /* tells LookupRUStatus to create the entry */ -#define RF_PSS_BUFFERWAIT 0x00000020 /* someone is waiting for a buffer for this RU */ +#define RF_PSS_NONE 0x00000000 /* no flags */ +#define RF_PSS_UNDER_RECON 0x00000001 /* this parity stripe is + * currently under + * reconstruction */ +#define RF_PSS_FORCED_ON_WRITE 0x00000002 /* indicates a recon was + * forced due to a user-write + * operation */ +#define RF_PSS_FORCED_ON_READ 0x00000004 /* ditto for read, but not + * currently implemented */ +#define RF_PSS_RECON_BLOCKED 0x00000008 /* reconstruction is currently + * blocked due to a pending + * user I/O */ +#define RF_PSS_CREATE 0x00000010 /* tells LookupRUStatus to + * create the entry */ +#define RF_PSS_BUFFERWAIT 0x00000020 /* someone is waiting for a + * buffer for this RU */ -int rf_ConfigurePSStatus(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr); +int +rf_ConfigurePSStatus(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr); -RF_PSStatusHeader_t *rf_MakeParityStripeStatusTable(RF_Raid_t *raidPtr); -void rf_FreeParityStripeStatusTable(RF_Raid_t *raidPtr, - RF_PSStatusHeader_t *pssTable); -RF_ReconParityStripeStatus_t *rf_LookupRUStatus(RF_Raid_t *raidPtr, - RF_PSStatusHeader_t *pssTable, RF_StripeNum_t psID, - RF_ReconUnitNum_t which_ru, RF_PSSFlags_t flags, int *created); -void rf_PSStatusDelete(RF_Raid_t *raidPtr, RF_PSStatusHeader_t *pssTable, - RF_ReconParityStripeStatus_t *pssPtr); -void rf_RemoveFromActiveReconTable(RF_Raid_t *raidPtr, RF_RowCol_t row, - RF_StripeNum_t psid, RF_ReconUnitNum_t which_ru); -RF_ReconParityStripeStatus_t *rf_AllocPSStatus(RF_Raid_t *raidPtr); -void rf_FreePSStatus(RF_Raid_t *raidPtr, RF_ReconParityStripeStatus_t *p); -void rf_PrintPSStatusTable(RF_Raid_t *raidPtr, RF_RowCol_t row); +RF_PSStatusHeader_t *rf_MakeParityStripeStatusTable(RF_Raid_t * raidPtr); +void +rf_FreeParityStripeStatusTable(RF_Raid_t * raidPtr, + RF_PSStatusHeader_t * pssTable); +RF_ReconParityStripeStatus_t * +rf_LookupRUStatus(RF_Raid_t * raidPtr, + RF_PSStatusHeader_t * pssTable, RF_StripeNum_t psID, + RF_ReconUnitNum_t which_ru, RF_PSSFlags_t flags, int *created); +void +rf_PSStatusDelete(RF_Raid_t * raidPtr, RF_PSStatusHeader_t * pssTable, + RF_ReconParityStripeStatus_t * pssPtr); +void +rf_RemoveFromActiveReconTable(RF_Raid_t * raidPtr, RF_RowCol_t row, + RF_StripeNum_t psid, RF_ReconUnitNum_t which_ru); +RF_ReconParityStripeStatus_t *rf_AllocPSStatus(RF_Raid_t * raidPtr); +void rf_FreePSStatus(RF_Raid_t * raidPtr, RF_ReconParityStripeStatus_t * p); +void rf_PrintPSStatusTable(RF_Raid_t * raidPtr, RF_RowCol_t row); -#endif /* !_RF__RF_PSSTATUS_H_ */ +#endif /* !_RF__RF_PSSTATUS_H_ */ diff --git a/sys/dev/raidframe/rf_raid.h b/sys/dev/raidframe/rf_raid.h index 278cc9f507a..798dcdd1c8f 100644 --- a/sys/dev/raidframe/rf_raid.h +++ b/sys/dev/raidframe/rf_raid.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_raid.h,v 1.1 1999/01/11 14:29:41 niklas Exp $ */ -/* $NetBSD: rf_raid.h,v 1.1 1998/11/13 04:20:32 oster Exp $ */ +/* $OpenBSD: rf_raid.h,v 1.2 1999/02/16 00:03:13 niklas Exp $ */ +/* $NetBSD: rf_raid.h,v 1.3 1999/02/05 00:06:15 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -31,180 +31,21 @@ * rf_raid.h -- main header file for RAID driver **********************************************/ -/* - * : - * Log: rf_raid.h,v - * Revision 1.48 1996/08/20 22:33:54 jimz - * make hist_diskreq a doubly-indexed array - * - * Revision 1.47 1996/07/15 05:40:41 jimz - * some recon datastructure cleanup - * better handling of multiple failures - * added undocumented double-recon test - * - * Revision 1.46 1996/07/10 22:28:51 jimz - * get rid of obsolete row statuses (dead,degraded2) - * - * Revision 1.45 1996/06/14 14:56:29 jimz - * make engine threading stuff ifndef SIMULATE - * - * Revision 1.44 1996/06/14 14:16:54 jimz - * move in engine node queue, atomicity control - * - * Revision 1.43 1996/06/12 04:41:26 jimz - * tweaks to make genplot work with user-level driver - * (mainly change stat collection) - * - * Revision 1.42 1996/06/11 10:57:17 jimz - * add recon_done_procs, recon_done_proc_mutex - * - * Revision 1.41 1996/06/11 01:26:48 jimz - * added mechanism for user-level to sync diskthread startup, - * shutdown - * - * Revision 1.40 1996/06/10 14:18:58 jimz - * move user, throughput stats into per-array structure - * - * Revision 1.39 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.38 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.37 1996/06/05 19:38:32 jimz - * fixed up disk queueing types config - * added sstf disk queueing - * fixed exit bug on diskthreads (ref-ing bad mem) - * - * Revision 1.36 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.35 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.34 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.33 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.32 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.31 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.30 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.29 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.28 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.27 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.26 1996/05/08 21:01:24 jimz - * fixed up enum type names that were conflicting with other - * enums and function names (ie, "panic") - * future naming trends will be towards RF_ and rf_ for - * everything raidframe-related - * - * Revision 1.25 1996/05/02 14:57:55 jimz - * add sectorMask - * - * Revision 1.24 1996/04/22 15:53:13 jimz - * MAX_RAIDS -> NRAIDFRAME - * - * Revision 1.23 1995/12/14 18:39:46 jimz - * convert to rf_types.h types - * - * Revision 1.22 1995/12/06 15:02:26 root - * added copyright info - * - * Revision 1.21 1995/10/09 17:39:24 jimz - * added info for tracking number of outstanding accesses - * at user-level - * - * Revision 1.20 1995/09/30 20:37:46 jimz - * added acc_totals to Raid for kernel - * - * Revision 1.19 1995/09/19 22:57:14 jimz - * add cache of raidid for kernel - * - * Revision 1.18 1995/09/18 16:50:04 jimz - * added RF_MAX_DISKS (for config ioctls) - * - * Revision 1.17 1995/09/07 19:02:31 jimz - * mods to get raidframe to compile and link - * in kernel environment - * - * Revision 1.16 1995/07/21 19:29:51 robby - * added some info for the idler to the Raid - * - * Revision 1.15 1995/07/16 03:19:14 cfb - * added cachePtr to *raidPtr - * - * Revision 1.14 1995/06/23 13:39:36 robby - * updeated to prototypes in rf_layout.h - * - */ #ifndef _RF__RF_RAID_H_ #define _RF__RF_RAID_H_ -#ifdef _KERNEL -#define KERNEL -#endif - #include "rf_archs.h" #include "rf_types.h" #include "rf_threadstuff.h" -#ifdef _KERNEL #if defined(__NetBSD__) #include "rf_netbsd.h" #elif defined(__OpenBSD__) #include "rf_openbsd.h" #endif -#endif -#ifdef KERNEL -/* XXX Needs to be added. GO -#include <raidframe.h> -*/ #include <sys/disklabel.h> -#else /* KERNEL */ -#include <stdio.h> -#include <assert.h> -#endif /* KERNEL */ #include <sys/types.h> #include "rf_alloclist.h" @@ -218,220 +59,194 @@ #if RF_INCLUDE_PARITYLOGGING > 0 #include "rf_paritylog.h" -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ +#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ -#define RF_MAX_DISKS 128 /* max disks per array */ -#if defined(__NetBSD__) || defined(__OpenBSD__) +#define RF_MAX_DISKS 128 /* max disks per array */ #define RF_DEV2RAIDID(_dev) (DISKUNIT(_dev)) -#else -#define RF_DEV2RAIDID(_dev) (minor(_dev)>>6) /* convert dev_t to raid id */ -#endif /* * Each row in the array is a distinct parity group, so * each has it's own status, which is one of the following. */ typedef enum RF_RowStatus_e { - rf_rs_optimal, - rf_rs_degraded, - rf_rs_reconstructing, - rf_rs_reconfigured -} RF_RowStatus_t; + rf_rs_optimal, + rf_rs_degraded, + rf_rs_reconstructing, + rf_rs_reconfigured +} RF_RowStatus_t; struct RF_CumulativeStats_s { - struct timeval start; /* the time when the stats were last started*/ - struct timeval stop; /* the time when the stats were last stopped */ - long sum_io_us; /* sum of all user response times (us) */ - long num_ios; /* total number of I/Os serviced */ - long num_sect_moved; /* total number of sectors read or written */ + struct timeval start; /* the time when the stats were last started */ + struct timeval stop; /* the time when the stats were last stopped */ + long sum_io_us; /* sum of all user response times (us) */ + long num_ios; /* total number of I/Os serviced */ + long num_sect_moved; /* total number of sectors read or written */ }; struct RF_ThroughputStats_s { - RF_DECLARE_MUTEX(mutex)/* a mutex used to lock the configuration stuff */ - struct timeval start; /* timer started when numOutstandingRequests moves from 0 to 1 */ - struct timeval stop; /* timer stopped when numOutstandingRequests moves from 1 to 0 */ - RF_uint64 sum_io_us; /* total time timer is enabled */ - RF_uint64 num_ios; /* total number of ios processed by RAIDframe */ - long num_out_ios; /* number of outstanding ios */ -}; - -#ifdef SIMULATE -typedef struct RF_PendingRecon_s RF_PendingRecon_t; -struct RF_PendingRecon_s { - RF_RowCol_t row; - RF_RowCol_t col; - RF_PendingRecon_t *next; + RF_DECLARE_MUTEX(mutex) /* a mutex used to lock the configuration + * stuff */ + struct timeval start; /* timer started when numOutstandingRequests + * moves from 0 to 1 */ + struct timeval stop; /* timer stopped when numOutstandingRequests + * moves from 1 to 0 */ + RF_uint64 sum_io_us; /* total time timer is enabled */ + RF_uint64 num_ios; /* total number of ios processed by RAIDframe */ + long num_out_ios; /* number of outstanding ios */ }; -#endif /* SIMULATE */ struct RF_Raid_s { - /* This portion never changes, and can be accessed without locking */ - /* an exception is Disks[][].status, which requires locking when it is changed */ - u_int numRow; /* number of rows of disks, typically == # of ranks */ - u_int numCol; /* number of columns of disks, typically == # of disks/rank */ - u_int numSpare; /* number of spare disks */ - int maxQueueDepth; /* max disk queue depth */ - RF_SectorCount_t totalSectors; /* total number of sectors in the array */ - RF_SectorCount_t sectorsPerDisk; /* number of sectors on each disk */ - u_int logBytesPerSector; /* base-2 log of the number of bytes in a sector */ - u_int bytesPerSector; /* bytes in a sector */ - RF_int32 sectorMask; /* mask of bytes-per-sector */ - - RF_RaidLayout_t Layout; /* all information related to layout */ - RF_RaidDisk_t **Disks; /* all information related to physical disks */ - RF_DiskQueue_t **Queues; /* all information related to disk queues */ - /* NOTE: This is an anchor point via which the queues can be accessed, - * but the enqueue/dequeue routines in diskqueue.c use a local copy of - * this pointer for the actual accesses. - */ - /* The remainder of the structure can change, and therefore requires locking on reads and updates */ - RF_DECLARE_MUTEX(mutex) /* mutex used to serialize access to the fields below */ - RF_RowStatus_t *status; /* the status of each row in the array */ - int valid; /* indicates successful configuration */ - RF_LockTableEntry_t *lockTable; /* stripe-lock table */ - RF_LockTableEntry_t *quiesceLock; /* quiesnce table */ - int numFailures; /* total number of failures in the array */ - - /* - * Cleanup stuff - */ - RF_ShutdownList_t *shutdownList; /* shutdown activities */ - RF_AllocListElem_t *cleanupList; /* memory to be freed at shutdown time */ - - /* - * Recon stuff - */ - RF_HeadSepLimit_t headSepLimit; - int numFloatingReconBufs; - int reconInProgress; -#ifdef SIMULATE - RF_PendingRecon_t *pendingRecon; -#endif /* SIMULATE */ - RF_DECLARE_COND(waitForReconCond) - RF_RaidReconDesc_t *reconDesc; /* reconstruction descriptor */ - RF_ReconCtrl_t **reconControl; /* reconstruction control structure pointers for each row in the array */ - -#if !defined(KERNEL) && !defined(SIMULATE) - /* - * Disk thread stuff - */ - int diskthreads_created; - int diskthreads_running; - int diskthreads_shutdown; - RF_DECLARE_MUTEX(diskthread_count_mutex) - RF_DECLARE_COND(diskthread_count_cond) -#endif /* !KERNEL && !SIMULATE */ - - /* - * Array-quiescence stuff - */ - RF_DECLARE_MUTEX(access_suspend_mutex) - RF_DECLARE_COND(quiescent_cond) - RF_IoCount_t accesses_suspended; - RF_IoCount_t accs_in_flight; - int access_suspend_release; - int waiting_for_quiescence; - RF_CallbackDesc_t *quiesce_wait_list; - - /* - * Statistics - */ -#if !defined(KERNEL) && !defined(SIMULATE) - RF_ThroughputStats_t throughputstats; -#endif /* !KERNEL && !SIMULATE */ - RF_CumulativeStats_t userstats; - - /* - * Engine thread control - */ - RF_DECLARE_MUTEX(node_queue_mutex) - RF_DECLARE_COND(node_queue_cond) - RF_DagNode_t *node_queue; -#ifndef SIMULATE - RF_Thread_t engine_thread; - RF_ThreadGroup_t engine_tg; -#endif /* !SIMULATE */ - int shutdown_engine; - int dags_in_flight; /* debug */ - - /* - * PSS (Parity Stripe Status) stuff - */ - RF_FreeList_t *pss_freelist; - long pssTableSize; - - /* - * Reconstruction stuff - */ - int procsInBufWait; - int numFullReconBuffers; - RF_AccTraceEntry_t *recon_tracerecs; - unsigned long accumXorTimeUs; - RF_ReconDoneProc_t *recon_done_procs; - RF_DECLARE_MUTEX(recon_done_proc_mutex) - -#if !defined(KERNEL) && !defined(SIMULATE) - RF_Thread_t **diskthreads, *sparediskthreads; /* thread descriptors for disk threads in user-level version */ -#endif /* !KERNEL && !SIMULATE */ - - /* - * nAccOutstanding, waitShutdown protected by desc freelist lock - * (This may seem strange, since that's a central serialization point - * for a per-array piece of data, but otherwise, it'd be an extra - * per-array lock, and that'd only be less efficient...) - */ - RF_DECLARE_COND(outstandingCond) - int waitShutdown; - int nAccOutstanding; - - RF_DiskId_t **diskids; - RF_DiskId_t *sparediskids; - -#ifdef KERNEL - int raidid; -#endif /* KERNEL */ - RF_AccTotals_t acc_totals; - int keep_acc_totals; - -#ifdef _KERNEL - struct raidcinfo **raid_cinfo; /* array of component info */ - struct proc *proc; /* XXX shouldn't be needed here.. :-p */ -#endif - - int terminate_disk_queues; - - /* - * XXX - * - * config-specific information should be moved - * somewhere else, or at least hung off this - * in some generic way - */ - - /* used by rf_compute_workload_shift */ - RF_RowCol_t hist_diskreq[RF_MAXROW][RF_MAXCOL]; - - /* used by declustering */ - int noRotate; + /* This portion never changes, and can be accessed without locking */ + /* an exception is Disks[][].status, which requires locking when it is + * changed */ + u_int numRow; /* number of rows of disks, typically == # of + * ranks */ + u_int numCol; /* number of columns of disks, typically == # + * of disks/rank */ + u_int numSpare; /* number of spare disks */ + int maxQueueDepth; /* max disk queue depth */ + RF_SectorCount_t totalSectors; /* total number of sectors in the + * array */ + RF_SectorCount_t sectorsPerDisk; /* number of sectors on each + * disk */ + u_int logBytesPerSector; /* base-2 log of the number of bytes + * in a sector */ + u_int bytesPerSector; /* bytes in a sector */ + RF_int32 sectorMask; /* mask of bytes-per-sector */ + + RF_RaidLayout_t Layout; /* all information related to layout */ + RF_RaidDisk_t **Disks; /* all information related to physical disks */ + RF_DiskQueue_t **Queues;/* all information related to disk queues */ + /* NOTE: This is an anchor point via which the queues can be + * accessed, but the enqueue/dequeue routines in diskqueue.c use a + * local copy of this pointer for the actual accesses. */ + /* The remainder of the structure can change, and therefore requires + * locking on reads and updates */ + RF_DECLARE_MUTEX(mutex) /* mutex used to serialize access to + * the fields below */ + RF_RowStatus_t *status; /* the status of each row in the array */ + int valid; /* indicates successful configuration */ + RF_LockTableEntry_t *lockTable; /* stripe-lock table */ + RF_LockTableEntry_t *quiesceLock; /* quiesnce table */ + int numFailures; /* total number of failures in the array */ + + /* + * Cleanup stuff + */ + RF_ShutdownList_t *shutdownList; /* shutdown activities */ + RF_AllocListElem_t *cleanupList; /* memory to be freed at + * shutdown time */ + + /* + * Recon stuff + */ + RF_HeadSepLimit_t headSepLimit; + int numFloatingReconBufs; + int reconInProgress; + RF_DECLARE_COND(waitForReconCond) + RF_RaidReconDesc_t *reconDesc; /* reconstruction descriptor */ + RF_ReconCtrl_t **reconControl; /* reconstruction control structure + * pointers for each row in the array */ + + /* + * Array-quiescence stuff + */ + RF_DECLARE_MUTEX(access_suspend_mutex) + RF_DECLARE_COND(quiescent_cond) + RF_IoCount_t accesses_suspended; + RF_IoCount_t accs_in_flight; + int access_suspend_release; + int waiting_for_quiescence; + RF_CallbackDesc_t *quiesce_wait_list; + + /* + * Statistics + */ +#if !defined(_KERNEL) && !defined(SIMULATE) + RF_ThroughputStats_t throughputstats; +#endif /* !KERNEL && !SIMULATE */ + RF_CumulativeStats_t userstats; + + /* + * Engine thread control + */ + RF_DECLARE_MUTEX(node_queue_mutex) + RF_DECLARE_COND(node_queue_cond) + RF_DagNode_t *node_queue; + RF_Thread_t engine_thread; + RF_ThreadGroup_t engine_tg; + int shutdown_engine; + int dags_in_flight; /* debug */ + + /* + * PSS (Parity Stripe Status) stuff + */ + RF_FreeList_t *pss_freelist; + long pssTableSize; + + /* + * Reconstruction stuff + */ + int procsInBufWait; + int numFullReconBuffers; + RF_AccTraceEntry_t *recon_tracerecs; + unsigned long accumXorTimeUs; + RF_ReconDoneProc_t *recon_done_procs; + RF_DECLARE_MUTEX(recon_done_proc_mutex) + /* + * nAccOutstanding, waitShutdown protected by desc freelist lock + * (This may seem strange, since that's a central serialization point + * for a per-array piece of data, but otherwise, it'd be an extra + * per-array lock, and that'd only be less efficient...) + */ + RF_DECLARE_COND(outstandingCond) + int waitShutdown; + int nAccOutstanding; + + RF_DiskId_t **diskids; + RF_DiskId_t *sparediskids; + + int raidid; + RF_AccTotals_t acc_totals; + int keep_acc_totals; + + struct raidcinfo **raid_cinfo; /* array of component info */ + struct proc *proc; /* XXX shouldn't be needed here.. :-p */ + + int terminate_disk_queues; + + /* + * XXX + * + * config-specific information should be moved + * somewhere else, or at least hung off this + * in some generic way + */ + + /* used by rf_compute_workload_shift */ + RF_RowCol_t hist_diskreq[RF_MAXROW][RF_MAXCOL]; + + /* used by declustering */ + int noRotate; #if RF_INCLUDE_PARITYLOGGING > 0 - /* used by parity logging */ - RF_SectorCount_t regionLogCapacity; - RF_ParityLogQueue_t parityLogPool; /* pool of unused parity logs */ - RF_RegionInfo_t *regionInfo; /* array of region state */ - int numParityLogs; - int numSectorsPerLog; - int regionParityRange; - int logsInUse; /* debugging */ - RF_ParityLogDiskQueue_t parityLogDiskQueue; /* state of parity logging disk work */ - RF_RegionBufferQueue_t regionBufferPool; /* buffers for holding region log */ - RF_RegionBufferQueue_t parityBufferPool; /* buffers for holding parity */ - caddr_t parityLogBufferHeap; /* pool of unused parity logs */ -#ifndef SIMULATE - RF_Thread_t pLogDiskThreadHandle; -#endif /* !SIMULATE */ - -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ + /* used by parity logging */ + RF_SectorCount_t regionLogCapacity; + RF_ParityLogQueue_t parityLogPool; /* pool of unused parity logs */ + RF_RegionInfo_t *regionInfo; /* array of region state */ + int numParityLogs; + int numSectorsPerLog; + int regionParityRange; + int logsInUse; /* debugging */ + RF_ParityLogDiskQueue_t parityLogDiskQueue; /* state of parity + * logging disk work */ + RF_RegionBufferQueue_t regionBufferPool; /* buffers for holding + * region log */ + RF_RegionBufferQueue_t parityBufferPool; /* buffers for holding + * parity */ + caddr_t parityLogBufferHeap; /* pool of unused parity logs */ + RF_Thread_t pLogDiskThreadHandle; + +#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ }; - -#endif /* !_RF__RF_RAID_H_ */ +#endif /* !_RF__RF_RAID_H_ */ diff --git a/sys/dev/raidframe/rf_raid0.c b/sys/dev/raidframe/rf_raid0.c index c81068affd9..c26ae1509cf 100644 --- a/sys/dev/raidframe/rf_raid0.c +++ b/sys/dev/raidframe/rf_raid0.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_raid0.c,v 1.1 1999/01/11 14:29:41 niklas Exp $ */ -/* $NetBSD: rf_raid0.c,v 1.1 1998/11/13 04:20:33 oster Exp $ */ +/* $OpenBSD: rf_raid0.c,v 1.2 1999/02/16 00:03:13 niklas Exp $ */ +/* $NetBSD: rf_raid0.c,v 1.3 1999/02/05 00:06:15 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -33,93 +33,6 @@ * ***************************************/ -/* - * : - * Log: rf_raid0.c,v - * Revision 1.24 1996/07/31 16:56:18 jimz - * dataBytesPerStripe, sectorsPerDisk init arch-indep. - * - * Revision 1.23 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.22 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.21 1996/06/19 22:07:34 jimz - * added parity verify - * - * Revision 1.20 1996/06/17 14:38:33 jimz - * properly #if out RF_DEMO code - * fix bug in MakeConfig that was causing weird behavior - * in configuration routines (config was not zeroed at start) - * clean up genplot handling of stacks - * - * Revision 1.19 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.18 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.17 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.16 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.15 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.14 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.13 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.12 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.11 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.10 1996/05/03 19:37:32 wvcii - * moved dag creation routines to dag library - * - * Revision 1.9 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.8 1995/12/06 15:06:36 root - * added copyright info - * - * Revision 1.7 1995/11/17 18:57:15 wvcii - * added prototypint to MapParity - * - * Revision 1.6 1995/11/16 13:53:51 wvcii - * fixed bug in CreateRAID0WriteDAG prototype - * - * Revision 1.5 1995/11/07 15:22:01 wvcii - * changed RAID0DagSelect prototype - * function no longer generates numHdrSucc, numTermAnt - * - * Revision 1.4 1995/06/23 13:39:17 robby - * updeated to prototypes in rf_layout.h - * - */ - #include "rf_types.h" #include "rf_raid.h" #include "rf_raid0.h" @@ -134,109 +47,116 @@ #include "rf_parityscan.h" typedef struct RF_Raid0ConfigInfo_s { - RF_RowCol_t *stripeIdentifier; -} RF_Raid0ConfigInfo_t; + RF_RowCol_t *stripeIdentifier; +} RF_Raid0ConfigInfo_t; -int rf_ConfigureRAID0( - RF_ShutdownList_t **listp, - RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr) +int +rf_ConfigureRAID0( + RF_ShutdownList_t ** listp, + RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr) { - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_Raid0ConfigInfo_t *info; - RF_RowCol_t i; + RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; + RF_Raid0ConfigInfo_t *info; + RF_RowCol_t i; - /* create a RAID level 0 configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_Raid0ConfigInfo_t), (RF_Raid0ConfigInfo_t *), raidPtr->cleanupList); - if (info == NULL) - return(ENOMEM); - layoutPtr->layoutSpecificInfo = (void *)info; + /* create a RAID level 0 configuration structure */ + RF_MallocAndAdd(info, sizeof(RF_Raid0ConfigInfo_t), (RF_Raid0ConfigInfo_t *), raidPtr->cleanupList); + if (info == NULL) + return (ENOMEM); + layoutPtr->layoutSpecificInfo = (void *) info; - RF_MallocAndAdd(info->stripeIdentifier, raidPtr->numCol * sizeof(RF_RowCol_t), (RF_RowCol_t *), raidPtr->cleanupList); - if (info->stripeIdentifier == NULL) - return(ENOMEM); - for (i=0; i<raidPtr->numCol; i++) - info->stripeIdentifier[i] = i; + RF_MallocAndAdd(info->stripeIdentifier, raidPtr->numCol * sizeof(RF_RowCol_t), (RF_RowCol_t *), raidPtr->cleanupList); + if (info->stripeIdentifier == NULL) + return (ENOMEM); + for (i = 0; i < raidPtr->numCol; i++) + info->stripeIdentifier[i] = i; - RF_ASSERT(raidPtr->numRow == 1); - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * raidPtr->numCol * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; - layoutPtr->dataSectorsPerStripe = raidPtr->numCol * layoutPtr->sectorsPerStripeUnit; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = raidPtr->numCol; - layoutPtr->numParityCol = 0; - return(0); + RF_ASSERT(raidPtr->numRow == 1); + raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * raidPtr->numCol * layoutPtr->sectorsPerStripeUnit; + layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; + layoutPtr->dataSectorsPerStripe = raidPtr->numCol * layoutPtr->sectorsPerStripeUnit; + layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; + layoutPtr->numDataCol = raidPtr->numCol; + layoutPtr->numParityCol = 0; + return (0); } -void rf_MapSectorRAID0( - RF_Raid_t *raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t *row, - RF_RowCol_t *col, - RF_SectorNum_t *diskSector, - int remap) +void +rf_MapSectorRAID0( + RF_Raid_t * raidPtr, + RF_RaidAddr_t raidSector, + RF_RowCol_t * row, + RF_RowCol_t * col, + RF_SectorNum_t * diskSector, + int remap) { - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - *row = 0; - *col = SUID % raidPtr->numCol; - *diskSector = (SUID / raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; + *row = 0; + *col = SUID % raidPtr->numCol; + *diskSector = (SUID / raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit + + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); } -void rf_MapParityRAID0( - RF_Raid_t *raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t *row, - RF_RowCol_t *col, - RF_SectorNum_t *diskSector, - int remap) +void +rf_MapParityRAID0( + RF_Raid_t * raidPtr, + RF_RaidAddr_t raidSector, + RF_RowCol_t * row, + RF_RowCol_t * col, + RF_SectorNum_t * diskSector, + int remap) { - *row = *col = 0; - *diskSector = 0; + *row = *col = 0; + *diskSector = 0; } -void rf_IdentifyStripeRAID0( - RF_Raid_t *raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t **diskids, - RF_RowCol_t *outRow) +void +rf_IdentifyStripeRAID0( + RF_Raid_t * raidPtr, + RF_RaidAddr_t addr, + RF_RowCol_t ** diskids, + RF_RowCol_t * outRow) { - RF_Raid0ConfigInfo_t *info; + RF_Raid0ConfigInfo_t *info; - info = raidPtr->Layout.layoutSpecificInfo; - *diskids = info->stripeIdentifier; - *outRow = 0; + info = raidPtr->Layout.layoutSpecificInfo; + *diskids = info->stripeIdentifier; + *outRow = 0; } -void rf_MapSIDToPSIDRAID0( - RF_RaidLayout_t *layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t *psID, - RF_ReconUnitNum_t *which_ru) +void +rf_MapSIDToPSIDRAID0( + RF_RaidLayout_t * layoutPtr, + RF_StripeNum_t stripeID, + RF_StripeNum_t * psID, + RF_ReconUnitNum_t * which_ru) { - *which_ru = 0; - *psID = stripeID; + *which_ru = 0; + *psID = stripeID; } -void rf_RAID0DagSelect( - RF_Raid_t *raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t *asmap, - RF_VoidFuncPtr *createFunc) +void +rf_RAID0DagSelect( + RF_Raid_t * raidPtr, + RF_IoType_t type, + RF_AccessStripeMap_t * asmap, + RF_VoidFuncPtr * createFunc) { - *createFunc = ((type == RF_IO_TYPE_READ) ? - (RF_VoidFuncPtr)rf_CreateFaultFreeReadDAG : (RF_VoidFuncPtr)rf_CreateRAID0WriteDAG); + *createFunc = ((type == RF_IO_TYPE_READ) ? + (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG : (RF_VoidFuncPtr) rf_CreateRAID0WriteDAG); } -int rf_VerifyParityRAID0( - RF_Raid_t *raidPtr, - RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t *parityPDA, - int correct_it, - RF_RaidAccessFlags_t flags) +int +rf_VerifyParityRAID0( + RF_Raid_t * raidPtr, + RF_RaidAddr_t raidAddr, + RF_PhysDiskAddr_t * parityPDA, + int correct_it, + RF_RaidAccessFlags_t flags) { - /* - * No parity is always okay. - */ - return(RF_PARITY_OKAY); + /* + * No parity is always okay. + */ + return (RF_PARITY_OKAY); } diff --git a/sys/dev/raidframe/rf_raid0.h b/sys/dev/raidframe/rf_raid0.h index fe90ff49c73..fbc74006cc7 100644 --- a/sys/dev/raidframe/rf_raid0.h +++ b/sys/dev/raidframe/rf_raid0.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_raid0.h,v 1.1 1999/01/11 14:29:41 niklas Exp $ */ -/* $NetBSD: rf_raid0.h,v 1.1 1998/11/13 04:20:33 oster Exp $ */ +/* $OpenBSD: rf_raid0.h,v 1.2 1999/02/16 00:03:14 niklas Exp $ */ +/* $NetBSD: rf_raid0.h,v 1.3 1999/02/05 00:06:15 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,83 +29,30 @@ /* rf_raid0.h - header file for RAID Level 0 */ -/* - * : - * Log: rf_raid0.h,v - * Revision 1.15 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.14 1996/06/19 22:07:42 jimz - * added parity verify - * - * Revision 1.13 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.12 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.11 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.10 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.9 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.8 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.7 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.6 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.5 1995/12/06 15:02:36 root - * added copyright info - * - * Revision 1.4 1995/11/17 18:58:33 wvcii - * added prototyping to MapParity - * - * Revision 1.3 1995/11/07 15:21:00 wvcii - * changed RAID0DagSelect prototype - * - * Revision 1.2 1995/06/23 13:39:10 robby - * updeated to prototypes in rf_layout.h - * - */ - #ifndef _RF__RF_RAID0_H_ #define _RF__RF_RAID0_H_ -int rf_ConfigureRAID0(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr); -void rf_MapSectorRAID0(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap); -void rf_MapParityRAID0(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap); -void rf_IdentifyStripeRAID0(RF_Raid_t *raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t **diskids, RF_RowCol_t *outRow); -void rf_MapSIDToPSIDRAID0(RF_RaidLayout_t *layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t *psID, - RF_ReconUnitNum_t *which_ru); -void rf_RAID0DagSelect(RF_Raid_t *raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc); -int rf_VerifyParityRAID0(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t *parityPDA, int correct_it, RF_RaidAccessFlags_t flags); +int +rf_ConfigureRAID0(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr); +void +rf_MapSectorRAID0(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); +void +rf_MapParityRAID0(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); +void +rf_IdentifyStripeRAID0(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, + RF_RowCol_t ** diskids, RF_RowCol_t * outRow); +void +rf_MapSIDToPSIDRAID0(RF_RaidLayout_t * layoutPtr, + RF_StripeNum_t stripeID, RF_StripeNum_t * psID, + RF_ReconUnitNum_t * which_ru); +void +rf_RAID0DagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, + RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); +int +rf_VerifyParityRAID0(RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr, + RF_PhysDiskAddr_t * parityPDA, int correct_it, RF_RaidAccessFlags_t flags); -#endif /* !_RF__RF_RAID0_H_ */ +#endif /* !_RF__RF_RAID0_H_ */ diff --git a/sys/dev/raidframe/rf_raid1.c b/sys/dev/raidframe/rf_raid1.c index e941bf384b2..6ad2645a0fa 100644 --- a/sys/dev/raidframe/rf_raid1.c +++ b/sys/dev/raidframe/rf_raid1.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_raid1.c,v 1.1 1999/01/11 14:29:42 niklas Exp $ */ -/* $NetBSD: rf_raid1.c,v 1.1 1998/11/13 04:20:33 oster Exp $ */ +/* $OpenBSD: rf_raid1.c,v 1.2 1999/02/16 00:03:15 niklas Exp $ */ +/* $NetBSD: rf_raid1.c,v 1.3 1999/02/05 00:06:15 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -33,196 +33,6 @@ * *****************************************************************************/ -/* - * : - * Log: rf_raid1.c,v - * Revision 1.46 1996/11/05 21:10:40 jimz - * failed pda generalization - * - * Revision 1.45 1996/07/31 16:56:18 jimz - * dataBytesPerStripe, sectorsPerDisk init arch-indep. - * - * Revision 1.44 1996/07/30 03:06:43 jimz - * get rid of extra rf_threadid.h include - * - * Revision 1.43 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.42 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.41 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.40 1996/07/17 14:31:19 jimz - * minor cleanup for readability - * - * Revision 1.39 1996/07/15 17:22:18 jimz - * nit-pick code cleanup - * resolve stdlib problems on DEC OSF - * - * Revision 1.38 1996/07/15 02:56:31 jimz - * fixed dag selection to deal with failed + recon to spare disks - * enhanced recon, parity check debugging - * - * Revision 1.37 1996/07/13 00:00:59 jimz - * sanitized generalized reconstruction architecture - * cleaned up head sep, rbuf problems - * - * Revision 1.36 1996/07/11 19:08:00 jimz - * generalize reconstruction mechanism - * allow raid1 reconstructs via copyback (done with array - * quiesced, not online, therefore not disk-directed) - * - * Revision 1.35 1996/07/10 23:01:24 jimz - * Better commenting of VerifyParity (for posterity) - * - * Revision 1.34 1996/07/10 22:29:45 jimz - * VerifyParityRAID1: corrected return values for stripes in degraded mode - * - * Revision 1.33 1996/07/10 16:05:39 jimz - * fixed a couple minor bugs in VerifyParityRAID1 - * added code to correct bad RAID1 parity - * - * Revision 1.32 1996/06/20 18:47:04 jimz - * fix up verification bugs - * - * Revision 1.31 1996/06/20 15:38:59 jimz - * added parity verification - * can't correct bad parity yet, but can return pass/fail - * - * Revision 1.30 1996/06/19 22:23:01 jimz - * parity verification is now a layout-configurable thing - * not all layouts currently support it (correctly, anyway) - * - * Revision 1.29 1996/06/11 08:54:27 jimz - * improved error-checking at configuration time - * - * Revision 1.28 1996/06/10 18:25:24 wvcii - * fixed bug in rf_IdentifyStripeRAID1 - added array initialization - * - * Revision 1.27 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.26 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.25 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.24 1996/06/06 17:29:43 jimz - * use CreateMirrorIdleReadDAG for mirrored read - * - * Revision 1.23 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.22 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.21 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.20 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.19 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.18 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.17 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.16 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.15 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.14 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.13 1996/05/03 19:36:22 wvcii - * moved dag creation routines to dag library - * - * Revision 1.12 1996/02/23 01:38:16 amiri - * removed chained declustering special case in SelectIdleDisk - * - * Revision 1.11 1996/02/22 16:47:18 amiri - * disabled shortest queue optimization for chained declustering - * - * Revision 1.10 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.9 1995/12/04 19:21:28 wvcii - * modified SelectIdleDisk to take a mirror node as a parameter and - * conditionally swap params 0 (data pda) and 4 (mirror pda). - * modified CreateRaidOneReadDAG so that it creates the DAG itself - * as opposed to reusing code in CreateNonredundantDAG. - * - * Revision 1.8 1995/11/30 16:07:45 wvcii - * added copyright info - * - * Revision 1.7 1995/11/16 14:46:18 wvcii - * fixed bugs in mapping and degraded dag creation, added comments - * - * Revision 1.6 1995/11/14 22:29:16 wvcii - * fixed bugs in dag creation - * - * Revision 1.5 1995/11/07 15:23:33 wvcii - * changed RAID1DagSelect prototype - * function no longer generates numHdrSucc, numTermAnt - * changed dag creation routines: - * term node generated during dag creation - * encoded commit nodes, barrier, antecedent types - * - * Revision 1.4 1995/10/10 19:09:21 wvcii - * write dag now handles non-aligned accesses - * - * Revision 1.3 1995/10/05 02:32:56 jimz - * ifdef'd out queue locking for load balancing - * - * Revision 1.2 1995/10/04 07:04:40 wvcii - * reads are now scheduled according to disk queue length. - * queue length is the sum of number of ios queued in raidframe as well as those at the disk. - * reads are sent to the disk with the shortest queue. - * testing against user disks successful, sim & kernel untested. - * - * Revision 1.1 1995/10/04 03:53:23 wvcii - * Initial revision - * - * - */ - #include "rf_raid.h" #include "rf_raid1.h" #include "rf_dag.h" @@ -244,65 +54,66 @@ #include "rf_sys.h" typedef struct RF_Raid1ConfigInfo_s { - RF_RowCol_t **stripeIdentifier; -} RF_Raid1ConfigInfo_t; - + RF_RowCol_t **stripeIdentifier; +} RF_Raid1ConfigInfo_t; /* start of day code specific to RAID level 1 */ -int rf_ConfigureRAID1( - RF_ShutdownList_t **listp, - RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr) +int +rf_ConfigureRAID1( + RF_ShutdownList_t ** listp, + RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr) { - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_Raid1ConfigInfo_t *info; - RF_RowCol_t i; - - /* create a RAID level 1 configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_Raid1ConfigInfo_t), (RF_Raid1ConfigInfo_t *), raidPtr->cleanupList); - if (info == NULL) - return(ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - - /* ... and fill it in. */ - info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol / 2, 2, raidPtr->cleanupList); - if (info->stripeIdentifier == NULL) - return(ENOMEM); - for (i = 0; i < (raidPtr->numCol / 2); i ++) { - info->stripeIdentifier[i][0] = (2 * i); - info->stripeIdentifier[i][1] = (2 * i) + 1; - } - - RF_ASSERT(raidPtr->numRow == 1); - - /* this implementation of RAID level 1 uses one row of numCol disks and allows multiple (numCol / 2) - * stripes per row. A stripe consists of a single data unit and a single parity (mirror) unit. - * stripe id = raidAddr / stripeUnitSize - */ - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * (raidPtr->numCol / 2) * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk * (raidPtr->numCol / 2); - layoutPtr->dataSectorsPerStripe = layoutPtr->sectorsPerStripeUnit; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = 1; - layoutPtr->numParityCol = 1; - return(0); + RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; + RF_Raid1ConfigInfo_t *info; + RF_RowCol_t i; + + /* create a RAID level 1 configuration structure */ + RF_MallocAndAdd(info, sizeof(RF_Raid1ConfigInfo_t), (RF_Raid1ConfigInfo_t *), raidPtr->cleanupList); + if (info == NULL) + return (ENOMEM); + layoutPtr->layoutSpecificInfo = (void *) info; + + /* ... and fill it in. */ + info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol / 2, 2, raidPtr->cleanupList); + if (info->stripeIdentifier == NULL) + return (ENOMEM); + for (i = 0; i < (raidPtr->numCol / 2); i++) { + info->stripeIdentifier[i][0] = (2 * i); + info->stripeIdentifier[i][1] = (2 * i) + 1; + } + + RF_ASSERT(raidPtr->numRow == 1); + + /* this implementation of RAID level 1 uses one row of numCol disks + * and allows multiple (numCol / 2) stripes per row. A stripe + * consists of a single data unit and a single parity (mirror) unit. + * stripe id = raidAddr / stripeUnitSize */ + raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * (raidPtr->numCol / 2) * layoutPtr->sectorsPerStripeUnit; + layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk * (raidPtr->numCol / 2); + layoutPtr->dataSectorsPerStripe = layoutPtr->sectorsPerStripeUnit; + layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; + layoutPtr->numDataCol = 1; + layoutPtr->numParityCol = 1; + return (0); } /* returns the physical disk location of the primary copy in the mirror pair */ -void rf_MapSectorRAID1( - RF_Raid_t *raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t *row, - RF_RowCol_t *col, - RF_SectorNum_t *diskSector, - int remap) +void +rf_MapSectorRAID1( + RF_Raid_t * raidPtr, + RF_RaidAddr_t raidSector, + RF_RowCol_t * row, + RF_RowCol_t * col, + RF_SectorNum_t * diskSector, + int remap) { - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2); + RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; + RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2); - *row = 0; - *col = 2 * mirrorPair; - *diskSector = ((SUID / (raidPtr->numCol / 2)) * raidPtr->Layout.sectorsPerStripeUnit) + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + *row = 0; + *col = 2 * mirrorPair; + *diskSector = ((SUID / (raidPtr->numCol / 2)) * raidPtr->Layout.sectorsPerStripeUnit) + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); } @@ -311,21 +122,22 @@ void rf_MapSectorRAID1( * returns the physical disk location of the secondary copy in the mirror * pair */ -void rf_MapParityRAID1( - RF_Raid_t *raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t *row, - RF_RowCol_t *col, - RF_SectorNum_t *diskSector, - int remap) +void +rf_MapParityRAID1( + RF_Raid_t * raidPtr, + RF_RaidAddr_t raidSector, + RF_RowCol_t * row, + RF_RowCol_t * col, + RF_SectorNum_t * diskSector, + int remap) { - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2); + RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; + RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2); - *row = 0; - *col = (2 * mirrorPair) + 1; + *row = 0; + *col = (2 * mirrorPair) + 1; - *diskSector = ((SUID / (raidPtr->numCol / 2)) * raidPtr->Layout.sectorsPerStripeUnit) + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + *diskSector = ((SUID / (raidPtr->numCol / 2)) * raidPtr->Layout.sectorsPerStripeUnit) + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); } @@ -333,19 +145,20 @@ void rf_MapParityRAID1( * * returns a list of disks for a given redundancy group */ -void rf_IdentifyStripeRAID1( - RF_Raid_t *raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t **diskids, - RF_RowCol_t *outRow) +void +rf_IdentifyStripeRAID1( + RF_Raid_t * raidPtr, + RF_RaidAddr_t addr, + RF_RowCol_t ** diskids, + RF_RowCol_t * outRow) { - RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr); - RF_Raid1ConfigInfo_t *info = raidPtr->Layout.layoutSpecificInfo; - RF_ASSERT(stripeID >= 0); - RF_ASSERT(addr >= 0); - *outRow = 0; - *diskids = info->stripeIdentifier[ stripeID % (raidPtr->numCol/2)]; - RF_ASSERT(*diskids); + RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr); + RF_Raid1ConfigInfo_t *info = raidPtr->Layout.layoutSpecificInfo; + RF_ASSERT(stripeID >= 0); + RF_ASSERT(addr >= 0); + *outRow = 0; + *diskids = info->stripeIdentifier[stripeID % (raidPtr->numCol / 2)]; + RF_ASSERT(*diskids); } @@ -353,14 +166,15 @@ void rf_IdentifyStripeRAID1( * * maps a logical stripe to a stripe in the redundant array */ -void rf_MapSIDToPSIDRAID1( - RF_RaidLayout_t *layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t *psID, - RF_ReconUnitNum_t *which_ru) +void +rf_MapSIDToPSIDRAID1( + RF_RaidLayout_t * layoutPtr, + RF_StripeNum_t stripeID, + RF_StripeNum_t * psID, + RF_ReconUnitNum_t * which_ru) { - *which_ru = 0; - *psID = stripeID; + *which_ru = 0; + *psID = stripeID; } @@ -374,508 +188,499 @@ void rf_MapSIDToPSIDRAID1( * createFunc - name of function to use to create the graph *****************************************************************************/ -void rf_RAID1DagSelect( - RF_Raid_t *raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t *asmap, - RF_VoidFuncPtr *createFunc) +void +rf_RAID1DagSelect( + RF_Raid_t * raidPtr, + RF_IoType_t type, + RF_AccessStripeMap_t * asmap, + RF_VoidFuncPtr * createFunc) { - RF_RowCol_t frow, fcol, or, oc; - RF_PhysDiskAddr_t *failedPDA; - int prior_recon, tid; - RF_RowStatus_t rstat; - RF_SectorNum_t oo; - - - RF_ASSERT(RF_IO_IS_R_OR_W(type)); - - if (asmap->numDataFailed + asmap->numParityFailed > 1) { - RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); - *createFunc = NULL; - return; - } - - if (asmap->numDataFailed + asmap->numParityFailed) { - /* - * We've got a fault. Re-map to spare space, iff applicable. - * Shouldn't the arch-independent code do this for us? - * Anyway, it turns out if we don't do this here, then when - * we're reconstructing, writes go only to the surviving - * original disk, and aren't reflected on the reconstructed - * spare. Oops. --jimz - */ - failedPDA = asmap->failedPDAs[0]; - frow = failedPDA->row; - fcol = failedPDA->col; - rstat = raidPtr->status[frow]; - prior_recon = (rstat == rf_rs_reconfigured) || ( - (rstat == rf_rs_reconstructing) ? - rf_CheckRUReconstructed(raidPtr->reconControl[frow]->reconMap, failedPDA->startSector) : 0 - ); - if (prior_recon) { - or = frow; - oc = fcol; - oo = failedPDA->startSector; - /* - * If we did distributed sparing, we'd monkey with that here. - * But we don't, so we'll - */ - failedPDA->row = raidPtr->Disks[frow][fcol].spareRow; - failedPDA->col = raidPtr->Disks[frow][fcol].spareCol; - /* - * Redirect other components, iff necessary. This looks - * pretty suspicious to me, but it's what the raid5 - * DAG select does. - */ - if (asmap->parityInfo->next) { - if (failedPDA == asmap->parityInfo) { - failedPDA->next->row = failedPDA->row; - failedPDA->next->col = failedPDA->col; - } - else { - if (failedPDA == asmap->parityInfo->next) { - asmap->parityInfo->row = failedPDA->row; - asmap->parityInfo->col = failedPDA->col; - } - } - } - if (rf_dagDebug || rf_mapDebug) { - rf_get_threadid(tid); - printf("[%d] Redirected type '%c' r %d c %d o %ld -> r %d c %d o %ld\n", - tid, type, or, oc, (long)oo, failedPDA->row, failedPDA->col, - (long)failedPDA->startSector); - } - asmap->numDataFailed = asmap->numParityFailed = 0; - } - } - if (type == RF_IO_TYPE_READ) { - if (asmap->numDataFailed == 0) - *createFunc = (RF_VoidFuncPtr)rf_CreateMirrorIdleReadDAG; - else - *createFunc = (RF_VoidFuncPtr)rf_CreateRaidOneDegradedReadDAG; - } - else { - *createFunc = (RF_VoidFuncPtr)rf_CreateRaidOneWriteDAG; - } + RF_RowCol_t frow, fcol, or, oc; + RF_PhysDiskAddr_t *failedPDA; + int prior_recon, tid; + RF_RowStatus_t rstat; + RF_SectorNum_t oo; + + + RF_ASSERT(RF_IO_IS_R_OR_W(type)); + + if (asmap->numDataFailed + asmap->numParityFailed > 1) { + RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); + *createFunc = NULL; + return; + } + if (asmap->numDataFailed + asmap->numParityFailed) { + /* + * We've got a fault. Re-map to spare space, iff applicable. + * Shouldn't the arch-independent code do this for us? + * Anyway, it turns out if we don't do this here, then when + * we're reconstructing, writes go only to the surviving + * original disk, and aren't reflected on the reconstructed + * spare. Oops. --jimz + */ + failedPDA = asmap->failedPDAs[0]; + frow = failedPDA->row; + fcol = failedPDA->col; + rstat = raidPtr->status[frow]; + prior_recon = (rstat == rf_rs_reconfigured) || ( + (rstat == rf_rs_reconstructing) ? + rf_CheckRUReconstructed(raidPtr->reconControl[frow]->reconMap, failedPDA->startSector) : 0 + ); + if (prior_recon) { + or = frow; + oc = fcol; + oo = failedPDA->startSector; + /* + * If we did distributed sparing, we'd monkey with that here. + * But we don't, so we'll + */ + failedPDA->row = raidPtr->Disks[frow][fcol].spareRow; + failedPDA->col = raidPtr->Disks[frow][fcol].spareCol; + /* + * Redirect other components, iff necessary. This looks + * pretty suspicious to me, but it's what the raid5 + * DAG select does. + */ + if (asmap->parityInfo->next) { + if (failedPDA == asmap->parityInfo) { + failedPDA->next->row = failedPDA->row; + failedPDA->next->col = failedPDA->col; + } else { + if (failedPDA == asmap->parityInfo->next) { + asmap->parityInfo->row = failedPDA->row; + asmap->parityInfo->col = failedPDA->col; + } + } + } + if (rf_dagDebug || rf_mapDebug) { + rf_get_threadid(tid); + printf("[%d] Redirected type '%c' r %d c %d o %ld -> r %d c %d o %ld\n", + tid, type, or, oc, (long) oo, failedPDA->row, failedPDA->col, + (long) failedPDA->startSector); + } + asmap->numDataFailed = asmap->numParityFailed = 0; + } + } + if (type == RF_IO_TYPE_READ) { + if (asmap->numDataFailed == 0) + *createFunc = (RF_VoidFuncPtr) rf_CreateMirrorIdleReadDAG; + else + *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneDegradedReadDAG; + } else { + *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; + } } -int rf_VerifyParityRAID1( - RF_Raid_t *raidPtr, - RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t *parityPDA, - int correct_it, - RF_RaidAccessFlags_t flags) +int +rf_VerifyParityRAID1( + RF_Raid_t * raidPtr, + RF_RaidAddr_t raidAddr, + RF_PhysDiskAddr_t * parityPDA, + int correct_it, + RF_RaidAccessFlags_t flags) { - int nbytes, bcount, stripeWidth, ret, i, j, tid=0, nbad, *bbufs; - RF_DagNode_t *blockNode, *unblockNode, *wrBlock; - RF_DagHeader_t *rd_dag_h, *wr_dag_h; - RF_AccessStripeMapHeader_t *asm_h; - RF_AllocListElem_t *allocList; - RF_AccTraceEntry_t tracerec; - RF_ReconUnitNum_t which_ru; - RF_RaidLayout_t *layoutPtr; - RF_AccessStripeMap_t *aasm; - RF_SectorCount_t nsector; - RF_RaidAddr_t startAddr; - char *buf, *buf1, *buf2; - RF_PhysDiskAddr_t *pda; - RF_StripeNum_t psID; - RF_MCPair_t *mcpair; - - if (rf_verifyParityDebug) { - rf_get_threadid(tid); - } - - layoutPtr = &raidPtr->Layout; - startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr); - nsector = parityPDA->numSector; - nbytes = rf_RaidAddressToByte(raidPtr, nsector); - psID = rf_RaidAddressToParityStripeID(layoutPtr, raidAddr, &which_ru); - - asm_h = NULL; - rd_dag_h = wr_dag_h = NULL; - mcpair = NULL; - - ret = RF_PARITY_COULD_NOT_VERIFY; - - rf_MakeAllocList(allocList); - if (allocList == NULL) - return(RF_PARITY_COULD_NOT_VERIFY); - mcpair = rf_AllocMCPair(); - if (mcpair == NULL) - goto done; - RF_ASSERT(layoutPtr->numDataCol == layoutPtr->numParityCol); - stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; - bcount = nbytes*(layoutPtr->numDataCol + layoutPtr->numParityCol); - RF_MallocAndAdd(buf, bcount, (char *), allocList); - if (buf == NULL) - goto done; - if (rf_verifyParityDebug) { - printf("[%d] RAID1 parity verify: buf=%lx bcount=%d (%lx - %lx)\n", - tid, (long)buf, bcount, (long)buf, (long)buf+bcount); - } - - /* - * Generate a DAG which will read the entire stripe- then we can - * just compare data chunks versus "parity" chunks. - */ - - rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, nbytes, buf, - rf_DiskReadFunc, rf_DiskReadUndoFunc, "Rod", allocList, flags, - RF_IO_NORMAL_PRIORITY); - if (rd_dag_h == NULL) - goto done; - blockNode = rd_dag_h->succedents[0]; - unblockNode = blockNode->succedents[0]->succedents[0]; - - /* - * Map the access to physical disk addresses (PDAs)- this will - * get us both a list of data addresses, and "parity" addresses - * (which are really mirror copies). - */ - asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, - buf, RF_DONT_REMAP); - aasm = asm_h->stripeMap; - - buf1 = buf; - /* - * Loop through the data blocks, setting up read nodes for each. - */ - for(pda=aasm->physInfo,i=0;i<layoutPtr->numDataCol;i++,pda=pda->next) - { - RF_ASSERT(pda); - - rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); - - RF_ASSERT(pda->numSector != 0); - if (rf_TryToRedirectPDA(raidPtr, pda, 0)) { - /* cannot verify parity with dead disk */ - goto done; - } - pda->bufPtr = buf1; - blockNode->succedents[i]->params[0].p = pda; - blockNode->succedents[i]->params[1].p = buf1; - blockNode->succedents[i]->params[2].v = psID; - blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - buf1 += nbytes; - } - RF_ASSERT(pda == NULL); - /* - * keep i, buf1 running - * - * Loop through parity blocks, setting up read nodes for each. - */ - for(pda=aasm->parityInfo;i<layoutPtr->numDataCol+layoutPtr->numParityCol;i++,pda=pda->next) - { - RF_ASSERT(pda); - rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); - RF_ASSERT(pda->numSector != 0); - if (rf_TryToRedirectPDA(raidPtr, pda, 0)) { - /* cannot verify parity with dead disk */ - goto done; - } - pda->bufPtr = buf1; - blockNode->succedents[i]->params[0].p = pda; - blockNode->succedents[i]->params[1].p = buf1; - blockNode->succedents[i]->params[2].v = psID; - blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - buf1 += nbytes; - } - RF_ASSERT(pda == NULL); - - bzero((char *)&tracerec, sizeof(tracerec)); - rd_dag_h->tracerec = &tracerec; - - if (rf_verifyParityDebug > 1) { - printf("[%d] RAID1 parity verify read dag:\n", tid); - rf_PrintDAGList(rd_dag_h); - } - - RF_LOCK_MUTEX(mcpair->mutex); - mcpair->flag = 0; - rf_DispatchDAG(rd_dag_h, (void (*)(void *))rf_MCPairWakeupFunc, - (void *)mcpair); - while (mcpair->flag == 0) { - RF_WAIT_MCPAIR(mcpair); - } - RF_UNLOCK_MUTEX(mcpair->mutex); - - if (rd_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to verify raid1 parity: can't read stripe\n"); - ret = RF_PARITY_COULD_NOT_VERIFY; - goto done; - } - - /* - * buf1 is the beginning of the data blocks chunk - * buf2 is the beginning of the parity blocks chunk - */ - buf1 = buf; - buf2 = buf + (nbytes * layoutPtr->numDataCol); - ret = RF_PARITY_OKAY; - /* - * bbufs is "bad bufs"- an array whose entries are the data - * column numbers where we had miscompares. (That is, column 0 - * and column 1 of the array are mirror copies, and are considered - * "data column 0" for this purpose). - */ - RF_MallocAndAdd(bbufs, layoutPtr->numParityCol*sizeof(int), (int *), - allocList); - nbad = 0; - /* - * Check data vs "parity" (mirror copy). - */ - for(i=0;i<layoutPtr->numDataCol;i++) { - if (rf_verifyParityDebug) { - printf("[%d] RAID1 parity verify %d bytes: i=%d buf1=%lx buf2=%lx buf=%lx\n", - tid, nbytes, i, (long)buf1, (long)buf2, (long)buf); - } - ret = bcmp(buf1, buf2, nbytes); - if (ret) { - if (rf_verifyParityDebug > 1) { - for(j=0;j<nbytes;j++) { - if (buf1[j] != buf2[j]) - break; - } - printf("psid=%ld j=%d\n", (long)psID, j); - printf("buf1 %02x %02x %02x %02x %02x\n", buf1[0]&0xff, - buf1[1]&0xff, buf1[2]&0xff, buf1[3]&0xff, buf1[4]&0xff); - printf("buf2 %02x %02x %02x %02x %02x\n", buf2[0]&0xff, - buf2[1]&0xff, buf2[2]&0xff, buf2[3]&0xff, buf2[4]&0xff); - } - if (rf_verifyParityDebug) { - printf("[%d] RAID1: found bad parity, i=%d\n", tid, i); - } - /* - * Parity is bad. Keep track of which columns were bad. - */ - if (bbufs) - bbufs[nbad] = i; - nbad++; - ret = RF_PARITY_BAD; - } - buf1 += nbytes; - buf2 += nbytes; - } - - if ((ret != RF_PARITY_OKAY) && correct_it) { - ret = RF_PARITY_COULD_NOT_CORRECT; - if (rf_verifyParityDebug) { - printf("[%d] RAID1 parity verify: parity not correct\n", tid); - } - if (bbufs == NULL) - goto done; - /* - * Make a DAG with one write node for each bad unit. We'll simply - * write the contents of the data unit onto the parity unit for - * correction. (It's possible that the mirror copy was the correct - * copy, and that we're spooging good data by writing bad over it, - * but there's no way we can know that. - */ - wr_dag_h = rf_MakeSimpleDAG(raidPtr, nbad, nbytes, buf, - rf_DiskWriteFunc, rf_DiskWriteUndoFunc, "Wnp", allocList, flags, - RF_IO_NORMAL_PRIORITY); - if (wr_dag_h == NULL) - goto done; - wrBlock = wr_dag_h->succedents[0]; - /* - * Fill in a write node for each bad compare. - */ - for(i=0;i<nbad;i++) { - j = i+layoutPtr->numDataCol; - pda = blockNode->succedents[j]->params[0].p; - pda->bufPtr = blockNode->succedents[i]->params[1].p; - wrBlock->succedents[i]->params[0].p = pda; - wrBlock->succedents[i]->params[1].p = pda->bufPtr; - wrBlock->succedents[i]->params[2].v = psID; - wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - } - bzero((char *)&tracerec, sizeof(tracerec)); - wr_dag_h->tracerec = &tracerec; - if (rf_verifyParityDebug > 1) { - printf("Parity verify write dag:\n"); - rf_PrintDAGList(wr_dag_h); - } - RF_LOCK_MUTEX(mcpair->mutex); - mcpair->flag = 0; - /* fire off the write DAG */ - rf_DispatchDAG(wr_dag_h, (void (*)(void *))rf_MCPairWakeupFunc, - (void *)mcpair); - while (!mcpair->flag) { - RF_WAIT_COND(mcpair->cond, mcpair->mutex); - } - RF_UNLOCK_MUTEX(mcpair->mutex); - if (wr_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to correct RAID1 parity in VerifyParity\n"); - goto done; - } - ret = RF_PARITY_CORRECTED; - } - + int nbytes, bcount, stripeWidth, ret, i, j, tid = 0, nbad, *bbufs; + RF_DagNode_t *blockNode, *unblockNode, *wrBlock; + RF_DagHeader_t *rd_dag_h, *wr_dag_h; + RF_AccessStripeMapHeader_t *asm_h; + RF_AllocListElem_t *allocList; + RF_AccTraceEntry_t tracerec; + RF_ReconUnitNum_t which_ru; + RF_RaidLayout_t *layoutPtr; + RF_AccessStripeMap_t *aasm; + RF_SectorCount_t nsector; + RF_RaidAddr_t startAddr; + char *buf, *buf1, *buf2; + RF_PhysDiskAddr_t *pda; + RF_StripeNum_t psID; + RF_MCPair_t *mcpair; + + if (rf_verifyParityDebug) { + rf_get_threadid(tid); + } + layoutPtr = &raidPtr->Layout; + startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr); + nsector = parityPDA->numSector; + nbytes = rf_RaidAddressToByte(raidPtr, nsector); + psID = rf_RaidAddressToParityStripeID(layoutPtr, raidAddr, &which_ru); + + asm_h = NULL; + rd_dag_h = wr_dag_h = NULL; + mcpair = NULL; + + ret = RF_PARITY_COULD_NOT_VERIFY; + + rf_MakeAllocList(allocList); + if (allocList == NULL) + return (RF_PARITY_COULD_NOT_VERIFY); + mcpair = rf_AllocMCPair(); + if (mcpair == NULL) + goto done; + RF_ASSERT(layoutPtr->numDataCol == layoutPtr->numParityCol); + stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; + bcount = nbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol); + RF_MallocAndAdd(buf, bcount, (char *), allocList); + if (buf == NULL) + goto done; + if (rf_verifyParityDebug) { + printf("[%d] RAID1 parity verify: buf=%lx bcount=%d (%lx - %lx)\n", + tid, (long) buf, bcount, (long) buf, (long) buf + bcount); + } + /* + * Generate a DAG which will read the entire stripe- then we can + * just compare data chunks versus "parity" chunks. + */ + + rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, nbytes, buf, + rf_DiskReadFunc, rf_DiskReadUndoFunc, "Rod", allocList, flags, + RF_IO_NORMAL_PRIORITY); + if (rd_dag_h == NULL) + goto done; + blockNode = rd_dag_h->succedents[0]; + unblockNode = blockNode->succedents[0]->succedents[0]; + + /* + * Map the access to physical disk addresses (PDAs)- this will + * get us both a list of data addresses, and "parity" addresses + * (which are really mirror copies). + */ + asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, + buf, RF_DONT_REMAP); + aasm = asm_h->stripeMap; + + buf1 = buf; + /* + * Loop through the data blocks, setting up read nodes for each. + */ + for (pda = aasm->physInfo, i = 0; i < layoutPtr->numDataCol; i++, pda = pda->next) { + RF_ASSERT(pda); + + rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); + + RF_ASSERT(pda->numSector != 0); + if (rf_TryToRedirectPDA(raidPtr, pda, 0)) { + /* cannot verify parity with dead disk */ + goto done; + } + pda->bufPtr = buf1; + blockNode->succedents[i]->params[0].p = pda; + blockNode->succedents[i]->params[1].p = buf1; + blockNode->succedents[i]->params[2].v = psID; + blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + buf1 += nbytes; + } + RF_ASSERT(pda == NULL); + /* + * keep i, buf1 running + * + * Loop through parity blocks, setting up read nodes for each. + */ + for (pda = aasm->parityInfo; i < layoutPtr->numDataCol + layoutPtr->numParityCol; i++, pda = pda->next) { + RF_ASSERT(pda); + rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); + RF_ASSERT(pda->numSector != 0); + if (rf_TryToRedirectPDA(raidPtr, pda, 0)) { + /* cannot verify parity with dead disk */ + goto done; + } + pda->bufPtr = buf1; + blockNode->succedents[i]->params[0].p = pda; + blockNode->succedents[i]->params[1].p = buf1; + blockNode->succedents[i]->params[2].v = psID; + blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + buf1 += nbytes; + } + RF_ASSERT(pda == NULL); + + bzero((char *) &tracerec, sizeof(tracerec)); + rd_dag_h->tracerec = &tracerec; + + if (rf_verifyParityDebug > 1) { + printf("[%d] RAID1 parity verify read dag:\n", tid); + rf_PrintDAGList(rd_dag_h); + } + RF_LOCK_MUTEX(mcpair->mutex); + mcpair->flag = 0; + rf_DispatchDAG(rd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, + (void *) mcpair); + while (mcpair->flag == 0) { + RF_WAIT_MCPAIR(mcpair); + } + RF_UNLOCK_MUTEX(mcpair->mutex); + + if (rd_dag_h->status != rf_enable) { + RF_ERRORMSG("Unable to verify raid1 parity: can't read stripe\n"); + ret = RF_PARITY_COULD_NOT_VERIFY; + goto done; + } + /* + * buf1 is the beginning of the data blocks chunk + * buf2 is the beginning of the parity blocks chunk + */ + buf1 = buf; + buf2 = buf + (nbytes * layoutPtr->numDataCol); + ret = RF_PARITY_OKAY; + /* + * bbufs is "bad bufs"- an array whose entries are the data + * column numbers where we had miscompares. (That is, column 0 + * and column 1 of the array are mirror copies, and are considered + * "data column 0" for this purpose). + */ + RF_MallocAndAdd(bbufs, layoutPtr->numParityCol * sizeof(int), (int *), + allocList); + nbad = 0; + /* + * Check data vs "parity" (mirror copy). + */ + for (i = 0; i < layoutPtr->numDataCol; i++) { + if (rf_verifyParityDebug) { + printf("[%d] RAID1 parity verify %d bytes: i=%d buf1=%lx buf2=%lx buf=%lx\n", + tid, nbytes, i, (long) buf1, (long) buf2, (long) buf); + } + ret = bcmp(buf1, buf2, nbytes); + if (ret) { + if (rf_verifyParityDebug > 1) { + for (j = 0; j < nbytes; j++) { + if (buf1[j] != buf2[j]) + break; + } + printf("psid=%ld j=%d\n", (long) psID, j); + printf("buf1 %02x %02x %02x %02x %02x\n", buf1[0] & 0xff, + buf1[1] & 0xff, buf1[2] & 0xff, buf1[3] & 0xff, buf1[4] & 0xff); + printf("buf2 %02x %02x %02x %02x %02x\n", buf2[0] & 0xff, + buf2[1] & 0xff, buf2[2] & 0xff, buf2[3] & 0xff, buf2[4] & 0xff); + } + if (rf_verifyParityDebug) { + printf("[%d] RAID1: found bad parity, i=%d\n", tid, i); + } + /* + * Parity is bad. Keep track of which columns were bad. + */ + if (bbufs) + bbufs[nbad] = i; + nbad++; + ret = RF_PARITY_BAD; + } + buf1 += nbytes; + buf2 += nbytes; + } + + if ((ret != RF_PARITY_OKAY) && correct_it) { + ret = RF_PARITY_COULD_NOT_CORRECT; + if (rf_verifyParityDebug) { + printf("[%d] RAID1 parity verify: parity not correct\n", tid); + } + if (bbufs == NULL) + goto done; + /* + * Make a DAG with one write node for each bad unit. We'll simply + * write the contents of the data unit onto the parity unit for + * correction. (It's possible that the mirror copy was the correct + * copy, and that we're spooging good data by writing bad over it, + * but there's no way we can know that. + */ + wr_dag_h = rf_MakeSimpleDAG(raidPtr, nbad, nbytes, buf, + rf_DiskWriteFunc, rf_DiskWriteUndoFunc, "Wnp", allocList, flags, + RF_IO_NORMAL_PRIORITY); + if (wr_dag_h == NULL) + goto done; + wrBlock = wr_dag_h->succedents[0]; + /* + * Fill in a write node for each bad compare. + */ + for (i = 0; i < nbad; i++) { + j = i + layoutPtr->numDataCol; + pda = blockNode->succedents[j]->params[0].p; + pda->bufPtr = blockNode->succedents[i]->params[1].p; + wrBlock->succedents[i]->params[0].p = pda; + wrBlock->succedents[i]->params[1].p = pda->bufPtr; + wrBlock->succedents[i]->params[2].v = psID; + wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + } + bzero((char *) &tracerec, sizeof(tracerec)); + wr_dag_h->tracerec = &tracerec; + if (rf_verifyParityDebug > 1) { + printf("Parity verify write dag:\n"); + rf_PrintDAGList(wr_dag_h); + } + RF_LOCK_MUTEX(mcpair->mutex); + mcpair->flag = 0; + /* fire off the write DAG */ + rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, + (void *) mcpair); + while (!mcpair->flag) { + RF_WAIT_COND(mcpair->cond, mcpair->mutex); + } + RF_UNLOCK_MUTEX(mcpair->mutex); + if (wr_dag_h->status != rf_enable) { + RF_ERRORMSG("Unable to correct RAID1 parity in VerifyParity\n"); + goto done; + } + ret = RF_PARITY_CORRECTED; + } done: - /* - * All done. We might've gotten here without doing part of the function, - * so cleanup what we have to and return our running status. - */ - if (asm_h) - rf_FreeAccessStripeMap(asm_h); - if (rd_dag_h) - rf_FreeDAG(rd_dag_h); - if (wr_dag_h) - rf_FreeDAG(wr_dag_h); - if (mcpair) - rf_FreeMCPair(mcpair); - rf_FreeAllocList(allocList); - if (rf_verifyParityDebug) { - printf("[%d] RAID1 parity verify, returning %d\n", tid, ret); - } - return(ret); + /* + * All done. We might've gotten here without doing part of the function, + * so cleanup what we have to and return our running status. + */ + if (asm_h) + rf_FreeAccessStripeMap(asm_h); + if (rd_dag_h) + rf_FreeDAG(rd_dag_h); + if (wr_dag_h) + rf_FreeDAG(wr_dag_h); + if (mcpair) + rf_FreeMCPair(mcpair); + rf_FreeAllocList(allocList); + if (rf_verifyParityDebug) { + printf("[%d] RAID1 parity verify, returning %d\n", tid, ret); + } + return (ret); } -int rf_SubmitReconBufferRAID1(rbuf, keep_it, use_committed) - RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */ - int keep_it; /* whether we can keep this buffer or we have to return it */ - int use_committed; /* whether to use a committed or an available recon buffer */ +int +rf_SubmitReconBufferRAID1(rbuf, keep_it, use_committed) + RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */ + int keep_it; /* whether we can keep this buffer or we have + * to return it */ + int use_committed; /* whether to use a committed or an available + * recon buffer */ { - RF_ReconParityStripeStatus_t *pssPtr; - RF_ReconCtrl_t *reconCtrlPtr; - RF_RaidLayout_t *layoutPtr; - int tid=0, retcode, created; - RF_CallbackDesc_t *cb, *p; - RF_ReconBuffer_t *t; - RF_Raid_t *raidPtr; - caddr_t ta; - - retcode = 0; - created = 0; - - raidPtr = rbuf->raidPtr; - layoutPtr = &raidPtr->Layout; - reconCtrlPtr = raidPtr->reconControl[rbuf->row]; - - RF_ASSERT(rbuf); - RF_ASSERT(rbuf->col != reconCtrlPtr->fcol); - - if (rf_reconbufferDebug) { - rf_get_threadid(tid); - printf("[%d] RAID1 reconbuffer submission r%d c%d psid %ld ru%d (failed offset %ld)\n", - tid, rbuf->row, rbuf->col, (long)rbuf->parityStripeID, rbuf->which_ru, - (long)rbuf->failedDiskSectorOffset); - } - - if (rf_reconDebug) { - printf("RAID1 reconbuffer submit psid %ld buf %lx\n", - (long)rbuf->parityStripeID, (long)rbuf->buffer); - printf("RAID1 psid %ld %02x %02x %02x %02x %02x\n", - (long)rbuf->parityStripeID, - rbuf->buffer[0], rbuf->buffer[1], rbuf->buffer[2], rbuf->buffer[3], - rbuf->buffer[4]); - } - - RF_LOCK_PSS_MUTEX(raidPtr,rbuf->row,rbuf->parityStripeID); - - RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); - - pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, - rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created); - RF_ASSERT(pssPtr); /* if it didn't exist, we wouldn't have gotten an rbuf for it */ - - /* - * Since this is simple mirroring, the first submission for a stripe is also - * treated as the last. - */ - - t = NULL; - if (keep_it) { - if (rf_reconbufferDebug) { - printf("[%d] RAID1 rbuf submission: keeping rbuf\n", tid); - } - t = rbuf; - } - else { - if (use_committed) { - if (rf_reconbufferDebug) { - printf("[%d] RAID1 rbuf submission: using committed rbuf\n", tid); - } - t = reconCtrlPtr->committedRbufs; - RF_ASSERT(t); - reconCtrlPtr->committedRbufs = t->next; - t->next = NULL; - } - else if (reconCtrlPtr->floatingRbufs) { - if (rf_reconbufferDebug) { - printf("[%d] RAID1 rbuf submission: using floating rbuf\n", tid); - } - t = reconCtrlPtr->floatingRbufs; - reconCtrlPtr->floatingRbufs = t->next; - t->next = NULL; - } - } - if (t == NULL) { - if (rf_reconbufferDebug) { - printf("[%d] RAID1 rbuf submission: waiting for rbuf\n", tid); - } - RF_ASSERT((keep_it == 0) && (use_committed == 0)); - raidPtr->procsInBufWait++; - if ((raidPtr->procsInBufWait == (raidPtr->numCol-1)) - && (raidPtr->numFullReconBuffers == 0)) - { - /* ruh-ro */ - RF_ERRORMSG("Buffer wait deadlock\n"); - rf_PrintPSStatusTable(raidPtr, rbuf->row); - RF_PANIC(); - } - pssPtr->flags |= RF_PSS_BUFFERWAIT; - cb = rf_AllocCallbackDesc(); - cb->row = rbuf->row; - cb->col = rbuf->col; - cb->callbackArg.v = rbuf->parityStripeID; - cb->callbackArg2.v = rbuf->which_ru; - cb->next = NULL; - if (reconCtrlPtr->bufferWaitList == NULL) { - /* we are the wait list- lucky us */ - reconCtrlPtr->bufferWaitList = cb; - } - else { - /* append to wait list */ - for(p=reconCtrlPtr->bufferWaitList;p->next;p=p->next); - p->next = cb; - } - retcode = 1; - goto out; - } - if (t != rbuf) { - t->row = rbuf->row; - t->col = reconCtrlPtr->fcol; - t->parityStripeID = rbuf->parityStripeID; - t->which_ru = rbuf->which_ru; - t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset; - t->spRow = rbuf->spRow; - t->spCol = rbuf->spCol; - t->spOffset = rbuf->spOffset; - /* Swap buffers. DANCE! */ - ta = t->buffer; - t->buffer = rbuf->buffer; - rbuf->buffer = ta; - } - /* - * Use the rbuf we've been given as the target. - */ - RF_ASSERT(pssPtr->rbuf == NULL); - pssPtr->rbuf = t; - - t->count = 1; - /* - * Below, we use 1 for numDataCol (which is equal to the count in the - * previous line), so we'll always be done. - */ - rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, 1); + RF_ReconParityStripeStatus_t *pssPtr; + RF_ReconCtrl_t *reconCtrlPtr; + RF_RaidLayout_t *layoutPtr; + int tid = 0, retcode, created; + RF_CallbackDesc_t *cb, *p; + RF_ReconBuffer_t *t; + RF_Raid_t *raidPtr; + caddr_t ta; + + retcode = 0; + created = 0; + + raidPtr = rbuf->raidPtr; + layoutPtr = &raidPtr->Layout; + reconCtrlPtr = raidPtr->reconControl[rbuf->row]; + + RF_ASSERT(rbuf); + RF_ASSERT(rbuf->col != reconCtrlPtr->fcol); + + if (rf_reconbufferDebug) { + rf_get_threadid(tid); + printf("[%d] RAID1 reconbuffer submission r%d c%d psid %ld ru%d (failed offset %ld)\n", + tid, rbuf->row, rbuf->col, (long) rbuf->parityStripeID, rbuf->which_ru, + (long) rbuf->failedDiskSectorOffset); + } + if (rf_reconDebug) { + printf("RAID1 reconbuffer submit psid %ld buf %lx\n", + (long) rbuf->parityStripeID, (long) rbuf->buffer); + printf("RAID1 psid %ld %02x %02x %02x %02x %02x\n", + (long) rbuf->parityStripeID, + rbuf->buffer[0], rbuf->buffer[1], rbuf->buffer[2], rbuf->buffer[3], + rbuf->buffer[4]); + } + RF_LOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); + + RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); + + pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, + rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created); + RF_ASSERT(pssPtr); /* if it didn't exist, we wouldn't have gotten + * an rbuf for it */ + + /* + * Since this is simple mirroring, the first submission for a stripe is also + * treated as the last. + */ + + t = NULL; + if (keep_it) { + if (rf_reconbufferDebug) { + printf("[%d] RAID1 rbuf submission: keeping rbuf\n", tid); + } + t = rbuf; + } else { + if (use_committed) { + if (rf_reconbufferDebug) { + printf("[%d] RAID1 rbuf submission: using committed rbuf\n", tid); + } + t = reconCtrlPtr->committedRbufs; + RF_ASSERT(t); + reconCtrlPtr->committedRbufs = t->next; + t->next = NULL; + } else + if (reconCtrlPtr->floatingRbufs) { + if (rf_reconbufferDebug) { + printf("[%d] RAID1 rbuf submission: using floating rbuf\n", tid); + } + t = reconCtrlPtr->floatingRbufs; + reconCtrlPtr->floatingRbufs = t->next; + t->next = NULL; + } + } + if (t == NULL) { + if (rf_reconbufferDebug) { + printf("[%d] RAID1 rbuf submission: waiting for rbuf\n", tid); + } + RF_ASSERT((keep_it == 0) && (use_committed == 0)); + raidPtr->procsInBufWait++; + if ((raidPtr->procsInBufWait == (raidPtr->numCol - 1)) + && (raidPtr->numFullReconBuffers == 0)) { + /* ruh-ro */ + RF_ERRORMSG("Buffer wait deadlock\n"); + rf_PrintPSStatusTable(raidPtr, rbuf->row); + RF_PANIC(); + } + pssPtr->flags |= RF_PSS_BUFFERWAIT; + cb = rf_AllocCallbackDesc(); + cb->row = rbuf->row; + cb->col = rbuf->col; + cb->callbackArg.v = rbuf->parityStripeID; + cb->callbackArg2.v = rbuf->which_ru; + cb->next = NULL; + if (reconCtrlPtr->bufferWaitList == NULL) { + /* we are the wait list- lucky us */ + reconCtrlPtr->bufferWaitList = cb; + } else { + /* append to wait list */ + for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next); + p->next = cb; + } + retcode = 1; + goto out; + } + if (t != rbuf) { + t->row = rbuf->row; + t->col = reconCtrlPtr->fcol; + t->parityStripeID = rbuf->parityStripeID; + t->which_ru = rbuf->which_ru; + t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset; + t->spRow = rbuf->spRow; + t->spCol = rbuf->spCol; + t->spOffset = rbuf->spOffset; + /* Swap buffers. DANCE! */ + ta = t->buffer; + t->buffer = rbuf->buffer; + rbuf->buffer = ta; + } + /* + * Use the rbuf we've been given as the target. + */ + RF_ASSERT(pssPtr->rbuf == NULL); + pssPtr->rbuf = t; + + t->count = 1; + /* + * Below, we use 1 for numDataCol (which is equal to the count in the + * previous line), so we'll always be done. + */ + rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, 1); out: - RF_UNLOCK_PSS_MUTEX( raidPtr,rbuf->row,rbuf->parityStripeID); - RF_UNLOCK_MUTEX( reconCtrlPtr->rb_mutex ); - if (rf_reconbufferDebug) { - printf("[%d] RAID1 rbuf submission: returning %d\n", tid, retcode); - } - return(retcode); + RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); + RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); + if (rf_reconbufferDebug) { + printf("[%d] RAID1 rbuf submission: returning %d\n", tid, retcode); + } + return (retcode); } diff --git a/sys/dev/raidframe/rf_raid1.h b/sys/dev/raidframe/rf_raid1.h index 9ce0cb64067..ef2201cacfa 100644 --- a/sys/dev/raidframe/rf_raid1.h +++ b/sys/dev/raidframe/rf_raid1.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_raid1.h,v 1.1 1999/01/11 14:29:42 niklas Exp $ */ -/* $NetBSD: rf_raid1.h,v 1.1 1998/11/13 04:20:33 oster Exp $ */ +/* $OpenBSD: rf_raid1.h,v 1.2 1999/02/16 00:03:16 niklas Exp $ */ +/* $NetBSD: rf_raid1.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,102 +29,35 @@ /* header file for RAID Level 1 */ -/* - * : - * Log: rf_raid1.h,v - * Revision 1.17 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.16 1996/07/13 00:00:59 jimz - * sanitized generalized reconstruction architecture - * cleaned up head sep, rbuf problems - * - * Revision 1.15 1996/07/11 19:08:00 jimz - * generalize reconstruction mechanism - * allow raid1 reconstructs via copyback (done with array - * quiesced, not online, therefore not disk-directed) - * - * Revision 1.14 1996/06/19 22:23:01 jimz - * parity verification is now a layout-configurable thing - * not all layouts currently support it (correctly, anyway) - * - * Revision 1.13 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.12 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.11 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.10 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.9 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.8 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.7 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.6 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.5 1996/05/03 19:35:34 wvcii - * moved dags to dag library - * - * Revision 1.4 1995/11/30 16:07:26 wvcii - * added copyright info - * - * Revision 1.3 1995/11/16 14:56:41 wvcii - * updated prototypes - * - * Revision 1.2 1995/11/07 15:23:01 wvcii - * changed RAID1DagSelect prototype - * function no longer generates numHdrSucc, numTermAnt - * - * Revision 1.1 1995/10/04 03:52:59 wvcii - * Initial revision - * - * - */ - #ifndef _RF__RF_RAID1_H_ #define _RF__RF_RAID1_H_ #include "rf_types.h" -int rf_ConfigureRAID1(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr); -void rf_MapSectorRAID1(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap); -void rf_MapParityRAID1(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap); -void rf_IdentifyStripeRAID1(RF_Raid_t *raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t **diskids, RF_RowCol_t *outRow); -void rf_MapSIDToPSIDRAID1(RF_RaidLayout_t *layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t *psID, - RF_ReconUnitNum_t *which_ru); -void rf_RAID1DagSelect(RF_Raid_t *raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc); -int rf_VerifyParityRAID1(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t *parityPDA, int correct_it, RF_RaidAccessFlags_t flags); -int rf_SubmitReconBufferRAID1(RF_ReconBuffer_t *rbuf, int keep_int, - int use_committed); +int +rf_ConfigureRAID1(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr); +void +rf_MapSectorRAID1(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); +void +rf_MapParityRAID1(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); +void +rf_IdentifyStripeRAID1(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, + RF_RowCol_t ** diskids, RF_RowCol_t * outRow); +void +rf_MapSIDToPSIDRAID1(RF_RaidLayout_t * layoutPtr, + RF_StripeNum_t stripeID, RF_StripeNum_t * psID, + RF_ReconUnitNum_t * which_ru); +void +rf_RAID1DagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, + RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); +int +rf_VerifyParityRAID1(RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr, + RF_PhysDiskAddr_t * parityPDA, int correct_it, RF_RaidAccessFlags_t flags); +int +rf_SubmitReconBufferRAID1(RF_ReconBuffer_t * rbuf, int keep_int, + int use_committed); -#endif /* !_RF__RF_RAID1_H_ */ +#endif /* !_RF__RF_RAID1_H_ */ diff --git a/sys/dev/raidframe/rf_raid4.c b/sys/dev/raidframe/rf_raid4.c index 5a2c0da50bf..5c8c963d73c 100644 --- a/sys/dev/raidframe/rf_raid4.c +++ b/sys/dev/raidframe/rf_raid4.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_raid4.c,v 1.1 1999/01/11 14:29:43 niklas Exp $ */ -/* $NetBSD: rf_raid4.c,v 1.1 1998/11/13 04:20:33 oster Exp $ */ +/* $OpenBSD: rf_raid4.c,v 1.2 1999/02/16 00:03:16 niklas Exp $ */ +/* $NetBSD: rf_raid4.c,v 1.3 1999/02/05 00:06:16 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -33,81 +33,6 @@ * ***************************************/ -/* - * : - * Log: rf_raid4.c,v - * Revision 1.24 1996/07/31 16:56:18 jimz - * dataBytesPerStripe, sectorsPerDisk init arch-indep. - * - * Revision 1.23 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.22 1996/07/13 00:00:59 jimz - * sanitized generalized reconstruction architecture - * cleaned up head sep, rbuf problems - * - * Revision 1.21 1996/06/11 08:54:27 jimz - * improved error-checking at configuration time - * - * Revision 1.20 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.19 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.18 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.17 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.16 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.15 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.14 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.13 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.12 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.11 1996/05/03 19:39:41 wvcii - * added includes for dag library - * - * Revision 1.10 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.9 1995/12/06 15:02:46 root - * added copyright info - * - * Revision 1.8 1995/11/17 18:57:32 wvcii - * added prototyping to MapParity - * - * Revision 1.7 1995/06/23 13:38:58 robby - * updeated to prototypes in rf_layout.h - * - */ - #include "rf_raid.h" #include "rf_dag.h" #include "rf_dagutils.h" @@ -121,105 +46,113 @@ #include "rf_general.h" typedef struct RF_Raid4ConfigInfo_s { - RF_RowCol_t *stripeIdentifier; /* filled in at config time & used by IdentifyStripe */ -} RF_Raid4ConfigInfo_t; + RF_RowCol_t *stripeIdentifier; /* filled in at config time & used by + * IdentifyStripe */ +} RF_Raid4ConfigInfo_t; -int rf_ConfigureRAID4( - RF_ShutdownList_t **listp, - RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr) +int +rf_ConfigureRAID4( + RF_ShutdownList_t ** listp, + RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr) { - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_Raid4ConfigInfo_t *info; - int i; - - /* create a RAID level 4 configuration structure ... */ - RF_MallocAndAdd(info, sizeof(RF_Raid4ConfigInfo_t), (RF_Raid4ConfigInfo_t *), raidPtr->cleanupList); - if (info == NULL) - return(ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - - /* ... and fill it in. */ - RF_MallocAndAdd(info->stripeIdentifier, raidPtr->numCol * sizeof(RF_RowCol_t), (RF_RowCol_t *), raidPtr->cleanupList); - if (info->stripeIdentifier == NULL) - return(ENOMEM); - for (i=0; i<raidPtr->numCol; i++) - info->stripeIdentifier[i] = i; - - RF_ASSERT(raidPtr->numRow == 1); - - /* fill in the remaining layout parameters */ - layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = raidPtr->numCol-1; - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numParityCol = 1; - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - - return(0); + RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; + RF_Raid4ConfigInfo_t *info; + int i; + + /* create a RAID level 4 configuration structure ... */ + RF_MallocAndAdd(info, sizeof(RF_Raid4ConfigInfo_t), (RF_Raid4ConfigInfo_t *), raidPtr->cleanupList); + if (info == NULL) + return (ENOMEM); + layoutPtr->layoutSpecificInfo = (void *) info; + + /* ... and fill it in. */ + RF_MallocAndAdd(info->stripeIdentifier, raidPtr->numCol * sizeof(RF_RowCol_t), (RF_RowCol_t *), raidPtr->cleanupList); + if (info->stripeIdentifier == NULL) + return (ENOMEM); + for (i = 0; i < raidPtr->numCol; i++) + info->stripeIdentifier[i] = i; + + RF_ASSERT(raidPtr->numRow == 1); + + /* fill in the remaining layout parameters */ + layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; + layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; + layoutPtr->numDataCol = raidPtr->numCol - 1; + layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; + layoutPtr->numParityCol = 1; + raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; + + return (0); } -int rf_GetDefaultNumFloatingReconBuffersRAID4(RF_Raid_t *raidPtr) +int +rf_GetDefaultNumFloatingReconBuffersRAID4(RF_Raid_t * raidPtr) { - return(20); + return (20); } -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitRAID4(RF_Raid_t *raidPtr) +RF_HeadSepLimit_t +rf_GetDefaultHeadSepLimitRAID4(RF_Raid_t * raidPtr) { - return(20); + return (20); } -void rf_MapSectorRAID4( - RF_Raid_t *raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t *row, - RF_RowCol_t *col, - RF_SectorNum_t *diskSector, - int remap) +void +rf_MapSectorRAID4( + RF_Raid_t * raidPtr, + RF_RaidAddr_t raidSector, + RF_RowCol_t * row, + RF_RowCol_t * col, + RF_SectorNum_t * diskSector, + int remap) { - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - *row = 0; - *col = SUID % raidPtr->Layout.numDataCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; + *row = 0; + *col = SUID % raidPtr->Layout.numDataCol; + *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); } -void rf_MapParityRAID4( - RF_Raid_t *raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t *row, - RF_RowCol_t *col, - RF_SectorNum_t *diskSector, - int remap) +void +rf_MapParityRAID4( + RF_Raid_t * raidPtr, + RF_RaidAddr_t raidSector, + RF_RowCol_t * row, + RF_RowCol_t * col, + RF_SectorNum_t * diskSector, + int remap) { - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - - *row = 0; - *col = raidPtr->Layout.numDataCol; - *diskSector =(SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; + + *row = 0; + *col = raidPtr->Layout.numDataCol; + *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); } -void rf_IdentifyStripeRAID4( - RF_Raid_t *raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t **diskids, - RF_RowCol_t *outRow) +void +rf_IdentifyStripeRAID4( + RF_Raid_t * raidPtr, + RF_RaidAddr_t addr, + RF_RowCol_t ** diskids, + RF_RowCol_t * outRow) { - RF_Raid4ConfigInfo_t *info = raidPtr->Layout.layoutSpecificInfo; - - *outRow = 0; - *diskids = info->stripeIdentifier; + RF_Raid4ConfigInfo_t *info = raidPtr->Layout.layoutSpecificInfo; + + *outRow = 0; + *diskids = info->stripeIdentifier; } -void rf_MapSIDToPSIDRAID4( - RF_RaidLayout_t *layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t *psID, - RF_ReconUnitNum_t *which_ru) +void +rf_MapSIDToPSIDRAID4( + RF_RaidLayout_t * layoutPtr, + RF_StripeNum_t stripeID, + RF_StripeNum_t * psID, + RF_ReconUnitNum_t * which_ru) { - *which_ru = 0; - *psID = stripeID; + *which_ru = 0; + *psID = stripeID; } diff --git a/sys/dev/raidframe/rf_raid4.h b/sys/dev/raidframe/rf_raid4.h index 81f8e5375d3..9d84a594961 100644 --- a/sys/dev/raidframe/rf_raid4.h +++ b/sys/dev/raidframe/rf_raid4.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_raid4.h,v 1.1 1999/01/11 14:29:43 niklas Exp $ */ -/* $NetBSD: rf_raid4.h,v 1.1 1998/11/13 04:20:33 oster Exp $ */ +/* $OpenBSD: rf_raid4.h,v 1.2 1999/02/16 00:03:17 niklas Exp $ */ +/* $NetBSD: rf_raid4.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,81 +29,29 @@ /* rf_raid4.h header file for RAID Level 4 */ -/* - * : - * Log: rf_raid4.h,v - * Revision 1.15 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.14 1996/07/13 00:00:59 jimz - * sanitized generalized reconstruction architecture - * cleaned up head sep, rbuf problems - * - * Revision 1.13 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.12 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.11 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.10 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.9 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.8 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.7 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.6 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.5 1995/12/06 15:07:03 root - * added copyright info - * - * Revision 1.4 1995/11/17 18:58:46 wvcii - * added prototyping to MapParity - * - * Revision 1.3 1995/06/23 13:38:46 robby - * updeated to prototypes in rf_layout.h - * - */ - #ifndef _RF__RF_RAID4_H_ #define _RF__RF_RAID4_H_ -int rf_ConfigureRAID4(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr); -int rf_GetDefaultNumFloatingReconBuffersRAID4(RF_Raid_t *raidPtr); -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitRAID4(RF_Raid_t *raidPtr); -void rf_MapSectorRAID4(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap); -void rf_MapParityRAID4(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap); -void rf_IdentifyStripeRAID4(RF_Raid_t *raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t **diskids, RF_RowCol_t *outRow); -void rf_MapSIDToPSIDRAID4(RF_RaidLayout_t *layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t *psID, - RF_ReconUnitNum_t *which_ru); -void rf_RAID4DagSelect(RF_Raid_t *raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc); +int +rf_ConfigureRAID4(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr); +int rf_GetDefaultNumFloatingReconBuffersRAID4(RF_Raid_t * raidPtr); +RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitRAID4(RF_Raid_t * raidPtr); +void +rf_MapSectorRAID4(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); +void +rf_MapParityRAID4(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); +void +rf_IdentifyStripeRAID4(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, + RF_RowCol_t ** diskids, RF_RowCol_t * outRow); +void +rf_MapSIDToPSIDRAID4(RF_RaidLayout_t * layoutPtr, + RF_StripeNum_t stripeID, RF_StripeNum_t * psID, + RF_ReconUnitNum_t * which_ru); +void +rf_RAID4DagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, + RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); -#endif /* !_RF__RF_RAID4_H_ */ +#endif /* !_RF__RF_RAID4_H_ */ diff --git a/sys/dev/raidframe/rf_raid5.c b/sys/dev/raidframe/rf_raid5.c index febb9f51f44..8e00cfc271f 100644 --- a/sys/dev/raidframe/rf_raid5.c +++ b/sys/dev/raidframe/rf_raid5.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_raid5.c,v 1.1 1999/01/11 14:29:43 niklas Exp $ */ -/* $NetBSD: rf_raid5.c,v 1.1 1998/11/13 04:20:33 oster Exp $ */ +/* $OpenBSD: rf_raid5.c,v 1.2 1999/02/16 00:03:17 niklas Exp $ */ +/* $NetBSD: rf_raid5.c,v 1.3 1999/02/05 00:06:16 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -33,103 +33,6 @@ * *****************************************************************************/ -/* - * : - * Log: rf_raid5.c,v - * Revision 1.26 1996/11/05 21:10:40 jimz - * failed pda generalization - * - * Revision 1.25 1996/07/31 16:56:18 jimz - * dataBytesPerStripe, sectorsPerDisk init arch-indep. - * - * Revision 1.24 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.23 1996/07/13 00:00:59 jimz - * sanitized generalized reconstruction architecture - * cleaned up head sep, rbuf problems - * - * Revision 1.22 1996/06/11 08:54:27 jimz - * improved error-checking at configuration time - * - * Revision 1.21 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.20 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.19 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.18 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.17 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.16 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.15 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.14 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.13 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.12 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.11 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.10 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.9 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.8 1996/05/03 19:38:58 wvcii - * moved dag creation routines to dag library - * - * Revision 1.7 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.6 1995/12/06 15:04:28 root - * added copyright info - * - * Revision 1.5 1995/11/17 18:59:41 wvcii - * added prototyping to MapParity - * - * Revision 1.4 1995/06/23 13:38:21 robby - * updeated to prototypes in rf_layout.h - * - */ - #include "rf_types.h" #include "rf_raid.h" #include "rf_raid5.h" @@ -145,259 +48,274 @@ #include "rf_utils.h" typedef struct RF_Raid5ConfigInfo_s { - RF_RowCol_t **stripeIdentifier; /* filled in at config time and used by IdentifyStripe */ -} RF_Raid5ConfigInfo_t; - -int rf_ConfigureRAID5( - RF_ShutdownList_t **listp, - RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr) + RF_RowCol_t **stripeIdentifier; /* filled in at config time and used + * by IdentifyStripe */ +} RF_Raid5ConfigInfo_t; + +int +rf_ConfigureRAID5( + RF_ShutdownList_t ** listp, + RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr) { - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_Raid5ConfigInfo_t *info; - RF_RowCol_t i, j, startdisk; - - /* create a RAID level 5 configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_Raid5ConfigInfo_t), (RF_Raid5ConfigInfo_t *), raidPtr->cleanupList); - if (info == NULL) - return(ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - - RF_ASSERT(raidPtr->numRow == 1); - - /* the stripe identifier must identify the disks in each stripe, - * IN THE ORDER THAT THEY APPEAR IN THE STRIPE. - */ - info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, raidPtr->numCol, raidPtr->cleanupList); - if (info->stripeIdentifier == NULL) - return(ENOMEM); - startdisk = 0; - for (i=0; i<raidPtr->numCol; i++) { - for (j=0; j<raidPtr->numCol; j++) { - info->stripeIdentifier[i][j] = (startdisk + j) % raidPtr->numCol; - } - if ((--startdisk) < 0) startdisk = raidPtr->numCol-1; - } - - /* fill in the remaining layout parameters */ - layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = raidPtr->numCol-1; - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numParityCol = 1; - layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk; - - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - - return(0); + RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; + RF_Raid5ConfigInfo_t *info; + RF_RowCol_t i, j, startdisk; + + /* create a RAID level 5 configuration structure */ + RF_MallocAndAdd(info, sizeof(RF_Raid5ConfigInfo_t), (RF_Raid5ConfigInfo_t *), raidPtr->cleanupList); + if (info == NULL) + return (ENOMEM); + layoutPtr->layoutSpecificInfo = (void *) info; + + RF_ASSERT(raidPtr->numRow == 1); + + /* the stripe identifier must identify the disks in each stripe, IN + * THE ORDER THAT THEY APPEAR IN THE STRIPE. */ + info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, raidPtr->numCol, raidPtr->cleanupList); + if (info->stripeIdentifier == NULL) + return (ENOMEM); + startdisk = 0; + for (i = 0; i < raidPtr->numCol; i++) { + for (j = 0; j < raidPtr->numCol; j++) { + info->stripeIdentifier[i][j] = (startdisk + j) % raidPtr->numCol; + } + if ((--startdisk) < 0) + startdisk = raidPtr->numCol - 1; + } + + /* fill in the remaining layout parameters */ + layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; + layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; + layoutPtr->numDataCol = raidPtr->numCol - 1; + layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; + layoutPtr->numParityCol = 1; + layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk; + + raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; + + return (0); } -int rf_GetDefaultNumFloatingReconBuffersRAID5(RF_Raid_t *raidPtr) +int +rf_GetDefaultNumFloatingReconBuffersRAID5(RF_Raid_t * raidPtr) { - return(20); + return (20); } -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitRAID5(RF_Raid_t *raidPtr) +RF_HeadSepLimit_t +rf_GetDefaultHeadSepLimitRAID5(RF_Raid_t * raidPtr) { - return(10); + return (10); } - #if !defined(__NetBSD__) && !defined(__OpenBSD__) && !defined(_KERNEL) /* not currently used */ -int rf_ShutdownRAID5(RF_Raid_t *raidPtr) +int +rf_ShutdownRAID5(RF_Raid_t * raidPtr) { - return(0); + return (0); } #endif -void rf_MapSectorRAID5( - RF_Raid_t *raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t *row, - RF_RowCol_t *col, - RF_SectorNum_t *diskSector, - int remap) +void +rf_MapSectorRAID5( + RF_Raid_t * raidPtr, + RF_RaidAddr_t raidSector, + RF_RowCol_t * row, + RF_RowCol_t * col, + RF_SectorNum_t * diskSector, + int remap) { - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - *row = 0; - *col = (SUID % raidPtr->numCol); - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; + *row = 0; + *col = (SUID % raidPtr->numCol); + *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); } -void rf_MapParityRAID5( - RF_Raid_t *raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t *row, - RF_RowCol_t *col, - RF_SectorNum_t *diskSector, - int remap) +void +rf_MapParityRAID5( + RF_Raid_t * raidPtr, + RF_RaidAddr_t raidSector, + RF_RowCol_t * row, + RF_RowCol_t * col, + RF_SectorNum_t * diskSector, + int remap) { - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - - *row = 0; - *col = raidPtr->Layout.numDataCol-(SUID/raidPtr->Layout.numDataCol)%raidPtr->numCol; - *diskSector =(SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; + + *row = 0; + *col = raidPtr->Layout.numDataCol - (SUID / raidPtr->Layout.numDataCol) % raidPtr->numCol; + *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); } -void rf_IdentifyStripeRAID5( - RF_Raid_t *raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t **diskids, - RF_RowCol_t *outRow) +void +rf_IdentifyStripeRAID5( + RF_Raid_t * raidPtr, + RF_RaidAddr_t addr, + RF_RowCol_t ** diskids, + RF_RowCol_t * outRow) { - RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr); - RF_Raid5ConfigInfo_t *info = (RF_Raid5ConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr); + RF_Raid5ConfigInfo_t *info = (RF_Raid5ConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - *outRow = 0; - *diskids = info->stripeIdentifier[ stripeID % raidPtr->numCol ]; + *outRow = 0; + *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol]; } -void rf_MapSIDToPSIDRAID5( - RF_RaidLayout_t *layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t *psID, - RF_ReconUnitNum_t *which_ru) +void +rf_MapSIDToPSIDRAID5( + RF_RaidLayout_t * layoutPtr, + RF_StripeNum_t stripeID, + RF_StripeNum_t * psID, + RF_ReconUnitNum_t * which_ru) { - *which_ru = 0; - *psID = stripeID; + *which_ru = 0; + *psID = stripeID; } - /* select an algorithm for performing an access. Returns two pointers, * one to a function that will return information about the DAG, and * another to a function that will create the dag. */ -void rf_RaidFiveDagSelect( - RF_Raid_t *raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t *asmap, - RF_VoidFuncPtr *createFunc) +void +rf_RaidFiveDagSelect( + RF_Raid_t * raidPtr, + RF_IoType_t type, + RF_AccessStripeMap_t * asmap, + RF_VoidFuncPtr * createFunc) { - RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_PhysDiskAddr_t *failedPDA=NULL; - RF_RowCol_t frow, fcol; - RF_RowStatus_t rstat; - int prior_recon; - int tid; - - RF_ASSERT(RF_IO_IS_R_OR_W(type)); - - if (asmap->numDataFailed + asmap->numParityFailed > 1) { - RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); - /* *infoFunc = */ *createFunc = NULL; - return; - } else if (asmap->numDataFailed + asmap->numParityFailed == 1) { - - /* if under recon & already reconstructed, redirect the access to the spare drive - * and eliminate the failure indication - */ - failedPDA = asmap->failedPDAs[0]; - frow = failedPDA->row; fcol = failedPDA->col; - rstat = raidPtr->status[failedPDA->row]; - prior_recon = (rstat == rf_rs_reconfigured) || ( - (rstat == rf_rs_reconstructing) ? - rf_CheckRUReconstructed(raidPtr->reconControl[frow]->reconMap, failedPDA->startSector) : 0 - ); - if (prior_recon) { - RF_RowCol_t or = failedPDA->row,oc=failedPDA->col; - RF_SectorNum_t oo=failedPDA->startSector; - - if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) { /* redirect to dist spare space */ - - if (failedPDA == asmap->parityInfo) { - - /* parity has failed */ - (layoutPtr->map->MapParity)(raidPtr, failedPDA->raidAddress, &failedPDA->row, - &failedPDA->col, &failedPDA->startSector, RF_REMAP); - - if (asmap->parityInfo->next) { /* redir 2nd component, if any */ - RF_PhysDiskAddr_t *p = asmap->parityInfo->next; - RF_SectorNum_t SUoffs = p->startSector % layoutPtr->sectorsPerStripeUnit; - p->row = failedPDA->row; - p->col = failedPDA->col; - p->startSector = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, failedPDA->startSector) + - SUoffs; /* cheating: startSector is not really a RAID address */ - } - - } else if (asmap->parityInfo->next && failedPDA == asmap->parityInfo->next) { - RF_ASSERT(0); /* should not ever happen */ + RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); + RF_PhysDiskAddr_t *failedPDA = NULL; + RF_RowCol_t frow, fcol; + RF_RowStatus_t rstat; + int prior_recon; + int tid; + + RF_ASSERT(RF_IO_IS_R_OR_W(type)); + + if (asmap->numDataFailed + asmap->numParityFailed > 1) { + RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); + /* *infoFunc = */ *createFunc = NULL; + return; + } else + if (asmap->numDataFailed + asmap->numParityFailed == 1) { + + /* if under recon & already reconstructed, redirect + * the access to the spare drive and eliminate the + * failure indication */ + failedPDA = asmap->failedPDAs[0]; + frow = failedPDA->row; + fcol = failedPDA->col; + rstat = raidPtr->status[failedPDA->row]; + prior_recon = (rstat == rf_rs_reconfigured) || ( + (rstat == rf_rs_reconstructing) ? + rf_CheckRUReconstructed(raidPtr->reconControl[frow]->reconMap, failedPDA->startSector) : 0 + ); + if (prior_recon) { + RF_RowCol_t or = failedPDA->row, oc = failedPDA->col; + RF_SectorNum_t oo = failedPDA->startSector; + + if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) { /* redirect to dist + * spare space */ + + if (failedPDA == asmap->parityInfo) { + + /* parity has failed */ + (layoutPtr->map->MapParity) (raidPtr, failedPDA->raidAddress, &failedPDA->row, + &failedPDA->col, &failedPDA->startSector, RF_REMAP); + + if (asmap->parityInfo->next) { /* redir 2nd component, + * if any */ + RF_PhysDiskAddr_t *p = asmap->parityInfo->next; + RF_SectorNum_t SUoffs = p->startSector % layoutPtr->sectorsPerStripeUnit; + p->row = failedPDA->row; + p->col = failedPDA->col; + p->startSector = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, failedPDA->startSector) + + SUoffs; /* cheating: + * startSector is not + * really a RAID address */ + } + } else + if (asmap->parityInfo->next && failedPDA == asmap->parityInfo->next) { + RF_ASSERT(0); /* should not ever + * happen */ + } else { + + /* data has failed */ + (layoutPtr->map->MapSector) (raidPtr, failedPDA->raidAddress, &failedPDA->row, + &failedPDA->col, &failedPDA->startSector, RF_REMAP); + + } + + } else { /* redirect to dedicated spare + * space */ + + failedPDA->row = raidPtr->Disks[frow][fcol].spareRow; + failedPDA->col = raidPtr->Disks[frow][fcol].spareCol; + + /* the parity may have two distinct + * components, both of which may need + * to be redirected */ + if (asmap->parityInfo->next) { + if (failedPDA == asmap->parityInfo) { + failedPDA->next->row = failedPDA->row; + failedPDA->next->col = failedPDA->col; + } else + if (failedPDA == asmap->parityInfo->next) { /* paranoid: should + * never occur */ + asmap->parityInfo->row = failedPDA->row; + asmap->parityInfo->col = failedPDA->col; + } + } + } + + RF_ASSERT(failedPDA->col != -1); + + if (rf_dagDebug || rf_mapDebug) { + rf_get_threadid(tid); + printf("[%d] Redirected type '%c' r %d c %d o %ld -> r %d c %d o %ld\n", + tid, type, or, oc, (long) oo, failedPDA->row, failedPDA->col, + (long) failedPDA->startSector); + } + asmap->numDataFailed = asmap->numParityFailed = 0; + } + } + /* all dags begin/end with block/unblock node therefore, hdrSucc & + * termAnt counts should always be 1 also, these counts should not be + * visible outside dag creation routines - manipulating the counts + * here should be removed */ + if (type == RF_IO_TYPE_READ) { + if (asmap->numDataFailed == 0) + *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; + else + *createFunc = (RF_VoidFuncPtr) rf_CreateRaidFiveDegradedReadDAG; } else { - /* data has failed */ - (layoutPtr->map->MapSector)(raidPtr, failedPDA->raidAddress, &failedPDA->row, - &failedPDA->col, &failedPDA->startSector, RF_REMAP); - - } - } else { /* redirect to dedicated spare space */ - - failedPDA->row = raidPtr->Disks[frow][fcol].spareRow; - failedPDA->col = raidPtr->Disks[frow][fcol].spareCol; - - /* the parity may have two distinct components, both of which may need to be redirected */ - if (asmap->parityInfo->next) { - if (failedPDA == asmap->parityInfo) { - failedPDA->next->row = failedPDA->row; - failedPDA->next->col = failedPDA->col; - } else if (failedPDA == asmap->parityInfo->next) { /* paranoid: should never occur */ - asmap->parityInfo->row = failedPDA->row; - asmap->parityInfo->col = failedPDA->col; - } + /* if mirroring, always use large writes. If the access + * requires two distinct parity updates, always do a small + * write. If the stripe contains a failure but the access + * does not, do a small write. The first conditional + * (numStripeUnitsAccessed <= numDataCol/2) uses a + * less-than-or-equal rather than just a less-than because + * when G is 3 or 4, numDataCol/2 is 1, and I want + * single-stripe-unit updates to use just one disk. */ + if ((asmap->numDataFailed + asmap->numParityFailed) == 0) { + if (rf_suppressLocksAndLargeWrites || + (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) || + (asmap->parityInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) { + *createFunc = (RF_VoidFuncPtr) rf_CreateSmallWriteDAG; + } else + *createFunc = (RF_VoidFuncPtr) rf_CreateLargeWriteDAG; + } else { + if (asmap->numParityFailed == 1) + *createFunc = (RF_VoidFuncPtr) rf_CreateNonRedundantWriteDAG; + else + if (asmap->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit) + *createFunc = NULL; + else + *createFunc = (RF_VoidFuncPtr) rf_CreateDegradedWriteDAG; + } } - } - - RF_ASSERT(failedPDA->col != -1); - - if (rf_dagDebug || rf_mapDebug) { - rf_get_threadid(tid); - printf("[%d] Redirected type '%c' r %d c %d o %ld -> r %d c %d o %ld\n", - tid,type,or,oc,(long)oo,failedPDA->row,failedPDA->col, - (long)failedPDA->startSector); - } - - asmap->numDataFailed = asmap->numParityFailed = 0; - } - - } - - /* all dags begin/end with block/unblock node - * therefore, hdrSucc & termAnt counts should always be 1 - * also, these counts should not be visible outside dag creation routines - - * manipulating the counts here should be removed */ - if (type == RF_IO_TYPE_READ) { - if (asmap->numDataFailed == 0) - *createFunc = (RF_VoidFuncPtr)rf_CreateFaultFreeReadDAG; - else - *createFunc = (RF_VoidFuncPtr)rf_CreateRaidFiveDegradedReadDAG; - } else { - - - /* if mirroring, always use large writes. If the access requires two - * distinct parity updates, always do a small write. If the stripe - * contains a failure but the access does not, do a small write. - * The first conditional (numStripeUnitsAccessed <= numDataCol/2) uses a - * less-than-or-equal rather than just a less-than because when G is 3 - * or 4, numDataCol/2 is 1, and I want single-stripe-unit updates to use - * just one disk. - */ - if ( (asmap->numDataFailed + asmap->numParityFailed) == 0) { - if (rf_suppressLocksAndLargeWrites || - (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol!=1)) || - (asmap->parityInfo->next!=NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) { - *createFunc = (RF_VoidFuncPtr)rf_CreateSmallWriteDAG; - } - else - *createFunc = (RF_VoidFuncPtr)rf_CreateLargeWriteDAG; - } - else { - if (asmap->numParityFailed == 1) - *createFunc = (RF_VoidFuncPtr)rf_CreateNonRedundantWriteDAG; - else - if (asmap->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit) - *createFunc = NULL; - else - *createFunc = (RF_VoidFuncPtr)rf_CreateDegradedWriteDAG; - } - } } diff --git a/sys/dev/raidframe/rf_raid5.h b/sys/dev/raidframe/rf_raid5.h index a6ffc971ca4..06eecda486e 100644 --- a/sys/dev/raidframe/rf_raid5.h +++ b/sys/dev/raidframe/rf_raid5.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_raid5.h,v 1.1 1999/01/11 14:29:43 niklas Exp $ */ -/* $NetBSD: rf_raid5.h,v 1.1 1998/11/13 04:20:33 oster Exp $ */ +/* $OpenBSD: rf_raid5.h,v 1.2 1999/02/16 00:03:17 niklas Exp $ */ +/* $NetBSD: rf_raid5.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,85 +29,29 @@ /* rf_raid5.h - header file for RAID Level 5 */ -/* - * : - * Log: rf_raid5.h,v - * Revision 1.15 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.14 1996/07/13 00:00:59 jimz - * sanitized generalized reconstruction architecture - * cleaned up head sep, rbuf problems - * - * Revision 1.13 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.12 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.11 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.10 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.9 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.8 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.7 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.6 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.5 1995/12/06 15:04:35 root - * added copyright info - * - * Revision 1.4 1995/11/17 19:09:08 wvcii - * added prototyping to MapParity - * - * Revision 1.3 1995/11/07 15:25:40 wvcii - * changed RAIDFiveDagSelect prototype - * function no longer generates numHdrSucc, numTermAnt - * - * Revision 1.2 1995/06/23 13:37:53 robby - * updeated to prototypes in rf_layout.h - * - */ - #ifndef _RF__RF_RAID5_H_ #define _RF__RF_RAID5_H_ -int rf_ConfigureRAID5(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr); -int rf_GetDefaultNumFloatingReconBuffersRAID5(RF_Raid_t *raidPtr); -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitRAID5(RF_Raid_t *raidPtr); -void rf_MapSectorRAID5(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap); -void rf_MapParityRAID5(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap); -void rf_IdentifyStripeRAID5(RF_Raid_t *raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t **diskids, RF_RowCol_t *outRow); -void rf_MapSIDToPSIDRAID5(RF_RaidLayout_t *layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t *psID, - RF_ReconUnitNum_t *which_ru); -void rf_RaidFiveDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc); +int +rf_ConfigureRAID5(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr); +int rf_GetDefaultNumFloatingReconBuffersRAID5(RF_Raid_t * raidPtr); +RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitRAID5(RF_Raid_t * raidPtr); +void +rf_MapSectorRAID5(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); +void +rf_MapParityRAID5(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); +void +rf_IdentifyStripeRAID5(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, + RF_RowCol_t ** diskids, RF_RowCol_t * outRow); +void +rf_MapSIDToPSIDRAID5(RF_RaidLayout_t * layoutPtr, + RF_StripeNum_t stripeID, RF_StripeNum_t * psID, + RF_ReconUnitNum_t * which_ru); +void +rf_RaidFiveDagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, + RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); -#endif /* !_RF__RF_RAID5_H_ */ +#endif /* !_RF__RF_RAID5_H_ */ diff --git a/sys/dev/raidframe/rf_raid5_rotatedspare.c b/sys/dev/raidframe/rf_raid5_rotatedspare.c index ca103f2116a..5995d61ea15 100644 --- a/sys/dev/raidframe/rf_raid5_rotatedspare.c +++ b/sys/dev/raidframe/rf_raid5_rotatedspare.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_raid5_rotatedspare.c,v 1.1 1999/01/11 14:29:44 niklas Exp $ */ -/* $NetBSD: rf_raid5_rotatedspare.c,v 1.1 1998/11/13 04:20:33 oster Exp $ */ +/* $OpenBSD: rf_raid5_rotatedspare.c,v 1.2 1999/02/16 00:03:18 niklas Exp $ */ +/* $NetBSD: rf_raid5_rotatedspare.c,v 1.3 1999/02/05 00:06:16 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -33,92 +33,6 @@ * **************************************************************************/ -/* : - * Log: rf_raid5_rotatedspare.c,v - * Revision 1.22 1996/07/31 16:56:18 jimz - * dataBytesPerStripe, sectorsPerDisk init arch-indep. - * - * Revision 1.21 1996/07/29 14:05:12 jimz - * fix numPUs/numRUs confusion (everything is now numRUs) - * clean up some commenting, return values - * - * Revision 1.20 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.19 1996/07/13 00:00:59 jimz - * sanitized generalized reconstruction architecture - * cleaned up head sep, rbuf problems - * - * Revision 1.18 1996/06/19 17:53:48 jimz - * move GetNumSparePUs, InstallSpareTable ops into layout switch - * - * Revision 1.17 1996/06/11 08:54:27 jimz - * improved error-checking at configuration time - * - * Revision 1.16 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.15 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.14 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.13 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.12 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.11 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.10 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.9 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.8 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.7 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.6 1996/05/03 19:48:36 wvcii - * removed include of rf_redstripe.h - * - * Revision 1.5 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.4 1995/12/06 15:05:53 root - * added copyright info - * - * Revision 1.3 1995/11/19 21:26:29 amiri - * Added an assert to make sure numCol >= 3 - * - * Revision 1.2 1995/11/17 19:03:18 wvcii - * added prototyping to MapParity - * - */ - #include "rf_raid.h" #include "rf_raid5.h" #include "rf_dag.h" @@ -129,122 +43,129 @@ #include "rf_utils.h" #include "rf_raid5_rotatedspare.h" -typedef struct RF_Raid5RSConfigInfo_s { - RF_RowCol_t **stripeIdentifier; /* filled in at config time & used by IdentifyStripe */ -} RF_Raid5RSConfigInfo_t; +typedef struct RF_Raid5RSConfigInfo_s { + RF_RowCol_t **stripeIdentifier; /* filled in at config time & used by + * IdentifyStripe */ +} RF_Raid5RSConfigInfo_t; -int rf_ConfigureRAID5_RS( - RF_ShutdownList_t **listp, - RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr) +int +rf_ConfigureRAID5_RS( + RF_ShutdownList_t ** listp, + RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr) { - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_Raid5RSConfigInfo_t *info; - RF_RowCol_t i, j, startdisk; - - /* create a RAID level 5 configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_Raid5RSConfigInfo_t), (RF_Raid5RSConfigInfo_t *), raidPtr->cleanupList); - if (info == NULL) - return(ENOMEM); - layoutPtr->layoutSpecificInfo = (void *) info; - - RF_ASSERT(raidPtr->numRow == 1); - RF_ASSERT(raidPtr->numCol >= 3); - - /* the stripe identifier must identify the disks in each stripe, - * IN THE ORDER THAT THEY APPEAR IN THE STRIPE. - */ - info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, raidPtr->numCol, raidPtr->cleanupList); - if (info->stripeIdentifier == NULL) - return(ENOMEM); - startdisk = 0; - for (i=0; i<raidPtr->numCol; i++) { - for (j=0; j<raidPtr->numCol; j++) { - info->stripeIdentifier[i][j] = (startdisk + j) % raidPtr->numCol; - } - if ((--startdisk) < 0) startdisk = raidPtr->numCol-1; - } - - /* fill in the remaining layout parameters */ - layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = raidPtr->numCol-2; - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numParityCol = 1; - layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk; - raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; - - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - - return(0); + RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; + RF_Raid5RSConfigInfo_t *info; + RF_RowCol_t i, j, startdisk; + + /* create a RAID level 5 configuration structure */ + RF_MallocAndAdd(info, sizeof(RF_Raid5RSConfigInfo_t), (RF_Raid5RSConfigInfo_t *), raidPtr->cleanupList); + if (info == NULL) + return (ENOMEM); + layoutPtr->layoutSpecificInfo = (void *) info; + + RF_ASSERT(raidPtr->numRow == 1); + RF_ASSERT(raidPtr->numCol >= 3); + + /* the stripe identifier must identify the disks in each stripe, IN + * THE ORDER THAT THEY APPEAR IN THE STRIPE. */ + info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, raidPtr->numCol, raidPtr->cleanupList); + if (info->stripeIdentifier == NULL) + return (ENOMEM); + startdisk = 0; + for (i = 0; i < raidPtr->numCol; i++) { + for (j = 0; j < raidPtr->numCol; j++) { + info->stripeIdentifier[i][j] = (startdisk + j) % raidPtr->numCol; + } + if ((--startdisk) < 0) + startdisk = raidPtr->numCol - 1; + } + + /* fill in the remaining layout parameters */ + layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; + layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; + layoutPtr->numDataCol = raidPtr->numCol - 2; + layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; + layoutPtr->numParityCol = 1; + layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk; + raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; + + raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; + + return (0); } -RF_ReconUnitCount_t rf_GetNumSpareRUsRAID5_RS(raidPtr) - RF_Raid_t *raidPtr; +RF_ReconUnitCount_t +rf_GetNumSpareRUsRAID5_RS(raidPtr) + RF_Raid_t *raidPtr; { - return ( raidPtr->Layout.stripeUnitsPerDisk / raidPtr->numCol ); + return (raidPtr->Layout.stripeUnitsPerDisk / raidPtr->numCol); } -void rf_MapSectorRAID5_RS( - RF_Raid_t *raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t *row, - RF_RowCol_t *col, - RF_SectorNum_t *diskSector, - int remap) +void +rf_MapSectorRAID5_RS( + RF_Raid_t * raidPtr, + RF_RaidAddr_t raidSector, + RF_RowCol_t * row, + RF_RowCol_t * col, + RF_SectorNum_t * diskSector, + int remap) { - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - - *row = 0; - if (remap) { - *col = raidPtr->numCol-1-(1+SUID/raidPtr->Layout.numDataCol)%raidPtr->numCol; - *col = (*col+1)%raidPtr->numCol; /*spare unit is rotated with parity; line above maps to parity */ - } - else { - *col = ( SUID + (SUID/raidPtr->Layout.numDataCol) ) % raidPtr->numCol; - } - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; + + *row = 0; + if (remap) { + *col = raidPtr->numCol - 1 - (1 + SUID / raidPtr->Layout.numDataCol) % raidPtr->numCol; + *col = (*col + 1) % raidPtr->numCol; /* spare unit is rotated + * with parity; line + * above maps to parity */ + } else { + *col = (SUID + (SUID / raidPtr->Layout.numDataCol)) % raidPtr->numCol; + } + *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); } -void rf_MapParityRAID5_RS( - RF_Raid_t *raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t *row, - RF_RowCol_t *col, - RF_SectorNum_t *diskSector, - int remap) +void +rf_MapParityRAID5_RS( + RF_Raid_t * raidPtr, + RF_RaidAddr_t raidSector, + RF_RowCol_t * row, + RF_RowCol_t * col, + RF_SectorNum_t * diskSector, + int remap) { - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - - *row = 0; - *col = raidPtr->numCol-1-(1+SUID/raidPtr->Layout.numDataCol)%raidPtr->numCol; - *diskSector =(SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + - (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - if (remap) - *col = (*col+1)%raidPtr->numCol; + RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; + + *row = 0; + *col = raidPtr->numCol - 1 - (1 + SUID / raidPtr->Layout.numDataCol) % raidPtr->numCol; + *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + if (remap) + *col = (*col + 1) % raidPtr->numCol; } -void rf_IdentifyStripeRAID5_RS( - RF_Raid_t *raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t **diskids, - RF_RowCol_t *outRow) +void +rf_IdentifyStripeRAID5_RS( + RF_Raid_t * raidPtr, + RF_RaidAddr_t addr, + RF_RowCol_t ** diskids, + RF_RowCol_t * outRow) { - RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr); - RF_Raid5RSConfigInfo_t *info = (RF_Raid5RSConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - *outRow = 0; - *diskids = info->stripeIdentifier[ stripeID % raidPtr->numCol ]; + RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr); + RF_Raid5RSConfigInfo_t *info = (RF_Raid5RSConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + *outRow = 0; + *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol]; } -void rf_MapSIDToPSIDRAID5_RS( - RF_RaidLayout_t *layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t *psID, - RF_ReconUnitNum_t *which_ru) +void +rf_MapSIDToPSIDRAID5_RS( + RF_RaidLayout_t * layoutPtr, + RF_StripeNum_t stripeID, + RF_StripeNum_t * psID, + RF_ReconUnitNum_t * which_ru) { - *which_ru = 0; - *psID = stripeID; + *which_ru = 0; + *psID = stripeID; } - diff --git a/sys/dev/raidframe/rf_raid5_rotatedspare.h b/sys/dev/raidframe/rf_raid5_rotatedspare.h index e144b00f6d0..d3d13cb57c4 100644 --- a/sys/dev/raidframe/rf_raid5_rotatedspare.h +++ b/sys/dev/raidframe/rf_raid5_rotatedspare.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_raid5_rotatedspare.h,v 1.1 1999/01/11 14:29:44 niklas Exp $ */ -/* $NetBSD: rf_raid5_rotatedspare.h,v 1.1 1998/11/13 04:20:33 oster Exp $ */ +/* $OpenBSD: rf_raid5_rotatedspare.h,v 1.2 1999/02/16 00:03:18 niklas Exp $ */ +/* $NetBSD: rf_raid5_rotatedspare.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,77 +29,25 @@ /* rf_raid5_rotatedspare.h - header file for RAID Level 5 with rotated sparing */ -/* : - * Log: rf_raid5_rotatedspare.h,v - * Revision 1.13 1996/07/29 14:05:12 jimz - * fix numPUs/numRUs confusion (everything is now numRUs) - * clean up some commenting, return values - * - * Revision 1.12 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.11 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.10 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.9 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.8 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.7 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.6 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.5 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.4 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.3 1995/12/06 15:06:00 root - * added copyright info - * - * Revision 1.2 1995/11/17 19:09:54 wvcii - * added prototyping to MapParity - * - */ - #ifndef _RF__RF_RAID5_ROTATEDSPARE_H_ #define _RF__RF_RAID5_ROTATEDSPARE_H_ -int rf_ConfigureRAID5_RS(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr); -RF_ReconUnitCount_t rf_GetNumSpareRUsRAID5_RS(RF_Raid_t *raidPtr); -void rf_MapSectorRAID5_RS(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap); -void rf_MapParityRAID5_RS(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap); -void rf_IdentifyStripeRAID5_RS(RF_Raid_t *raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t **diskids, RF_RowCol_t *outRow); -void rf_MapSIDToPSIDRAID5_RS(RF_RaidLayout_t *layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t *psID, - RF_ReconUnitNum_t *which_ru); +int +rf_ConfigureRAID5_RS(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr); +RF_ReconUnitCount_t rf_GetNumSpareRUsRAID5_RS(RF_Raid_t * raidPtr); +void +rf_MapSectorRAID5_RS(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); +void +rf_MapParityRAID5_RS(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); +void +rf_IdentifyStripeRAID5_RS(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, + RF_RowCol_t ** diskids, RF_RowCol_t * outRow); +void +rf_MapSIDToPSIDRAID5_RS(RF_RaidLayout_t * layoutPtr, + RF_StripeNum_t stripeID, RF_StripeNum_t * psID, + RF_ReconUnitNum_t * which_ru); -#endif /* !_RF__RF_RAID5_ROTATEDSPARE_H_ */ +#endif /* !_RF__RF_RAID5_ROTATEDSPARE_H_ */ diff --git a/sys/dev/raidframe/rf_raidframe.h b/sys/dev/raidframe/rf_raidframe.h index e316dd09eb4..3c39f0c09d5 100644 --- a/sys/dev/raidframe/rf_raidframe.h +++ b/sys/dev/raidframe/rf_raidframe.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_raidframe.h,v 1.1 1999/01/11 14:29:44 niklas Exp $ */ -/* $NetBSD: rf_raidframe.h,v 1.1 1998/11/13 04:20:33 oster Exp $ */ +/* $OpenBSD: rf_raidframe.h,v 1.2 1999/02/16 00:03:19 niklas Exp $ */ +/* $NetBSD: rf_raidframe.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,62 +29,12 @@ /***************************************************** * - * rf_raidframe.h + * rf_raidframe.h * * main header file for using raidframe in the kernel. * *****************************************************/ -/* - * : - * - * Log: rf_raidframe.h,v - * Revision 1.21 1996/06/17 03:00:15 jimz - * Change RAIDFRAME_GET_INFO interface to work around ioctl - * size limitation problem. This operation now takes a pointer - * to a pointer, and does its own copyout() (so it can transfer - * more than 8k at a time). - * - * Revision 1.20 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.19 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.18 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.17 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.16 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.15 1996/05/02 22:09:48 jimz - * change devs and spares in device_config to RF_RaidDisk_t - * - * Revision 1.14 1995/12/06 15:03:33 root - * added copyright info - * - * Revision 1.13 1995/09/30 20:39:54 jimz - * added new ioctls: - * RAIDFRAME_RESET_ACCTOTALS - * RAIDFRAME_GET_ACCTOTALS - * RAIDFRAME_KEEP_ACCTOTALS - * - * Revision 1.12 1995/09/25 20:11:51 wvcii - * Added #include "rf_raid.h" - * - * - */ #ifndef _RF__RF_RAIDFRAME_H_ #define _RF__RF_RAIDFRAME_H_ @@ -94,72 +44,98 @@ #include "rf_disks.h" #include "rf_raid.h" -struct rf_test_acc { /* used by RAIDFRAME_TEST_ACC ioctl */ - RF_SectorNum_t startSector; /* raidAddress */ - RF_SectorCount_t numSector; /* number of sectors to xfer */ - char *buf; /* data buffer */ - void *returnBufs[10]; /* for async accs only, completed I/Os returned */ - struct rf_test_acc *next; /* for making lists */ - RF_IoType_t type; /* (see rf_types.h for RF_IO_TYPE_*) */ - struct rf_test_acc *myaddr; /* user-address of this struct */ - void *bp; /* used in-kernel: need not be set by user */ +struct rf_test_acc { /* used by RAIDFRAME_TEST_ACC ioctl */ + RF_SectorNum_t startSector; /* raidAddress */ + RF_SectorCount_t numSector; /* number of sectors to xfer */ + char *buf; /* data buffer */ + void *returnBufs[10]; /* for async accs only, completed I/Os + * returned */ + struct rf_test_acc *next; /* for making lists */ + RF_IoType_t type; /* (see rf_types.h for RF_IO_TYPE_*) */ + struct rf_test_acc *myaddr; /* user-address of this struct */ + void *bp; /* used in-kernel: need not be set by user */ }; typedef RF_uint32 RF_ReconReqFlags_t; -struct rf_recon_req { /* used to tell the kernel to fail a disk */ - RF_RowCol_t row, col; - RF_ReconReqFlags_t flags; - void *raidPtr; /* used internally; need not be set at ioctl time */ - struct rf_recon_req *next; /* used internally; need not be set at ioctl time */ +struct rf_recon_req { /* used to tell the kernel to fail a disk */ + RF_RowCol_t row, col; + RF_ReconReqFlags_t flags; + void *raidPtr; /* used internally; need not be set at ioctl + * time */ + struct rf_recon_req *next; /* used internally; need not be set at + * ioctl time */ }; struct RF_SparetWait_s { - int C, G, fcol; /* C = # disks in row, G = # units in stripe, fcol = which disk has failed */ - - RF_StripeCount_t SUsPerPU; /* this stuff is the info required to create a spare table */ - int TablesPerSpareRegion; - int BlocksPerTable; - RF_StripeCount_t TableDepthInPUs; - RF_StripeCount_t SpareSpaceDepthPerRegionInSUs; - - RF_SparetWait_t *next; /* used internally; need not be set at ioctl time */ + int C, G, fcol; /* C = # disks in row, G = # units in stripe, + * fcol = which disk has failed */ + + RF_StripeCount_t SUsPerPU; /* this stuff is the info required to + * create a spare table */ + int TablesPerSpareRegion; + int BlocksPerTable; + RF_StripeCount_t TableDepthInPUs; + RF_StripeCount_t SpareSpaceDepthPerRegionInSUs; + + RF_SparetWait_t *next; /* used internally; need not be set at ioctl + * time */ }; typedef struct RF_DeviceConfig_s { - u_int rows; - u_int cols; - u_int maxqdepth; - int ndevs; + u_int rows; + u_int cols; + u_int maxqdepth; + int ndevs; RF_RaidDisk_t devs[RF_MAX_DISKS]; - int nspares; + int nspares; RF_RaidDisk_t spares[RF_MAX_DISKS]; -} RF_DeviceConfig_t; +} RF_DeviceConfig_t; /* flags that can be put in the rf_recon_req structure */ -#define RF_FDFLAGS_NONE 0x0 /* just fail the disk */ -#define RF_FDFLAGS_RECON 0x1 /* fail and initiate recon */ - -#define RF_SCSI_DISK_MAJOR 8 /* the device major number for disks in the system */ - -#define RAIDFRAME_CONFIGURE _IOW ('r', 1, void *) /* configure the driver */ -#define RAIDFRAME_SHUTDOWN _IO ('r', 2) /* shutdown the driver */ -#define RAIDFRAME_TUR _IOW ('r', 3, dev_t) /* debug only: test unit ready */ -#define RAIDFRAME_TEST_ACC _IOWR('r', 4, struct rf_test_acc) /* run a test access */ -#define RAIDFRAME_FAIL_DISK _IOW ('r', 5, struct rf_recon_req) /* fail a disk & optionally start recon */ -#define RAIDFRAME_CHECKRECON _IOWR('r', 6, int) /* get reconstruction % complete on indicated row */ -#define RAIDFRAME_REWRITEPARITY _IO ('r', 7) /* rewrite (initialize) all parity */ -#define RAIDFRAME_COPYBACK _IO ('r', 8) /* copy reconstructed data back to replaced disk */ -#define RAIDFRAME_SPARET_WAIT _IOR ('r', 9, RF_SparetWait_t) /* does not return until kernel needs a spare table */ -#define RAIDFRAME_SEND_SPARET _IOW ('r', 10, void *) /* used to send a spare table down into the kernel */ -#define RAIDFRAME_ABORT_SPARET_WAIT _IO ('r', 11) /* used to wake up the sparemap daemon & tell it to exit */ -#define RAIDFRAME_START_ATRACE _IO ('r', 12) /* start tracing accesses */ -#define RAIDFRAME_STOP_ATRACE _IO ('r', 13) /* stop tracing accesses */ -#define RAIDFRAME_GET_SIZE _IOR ('r', 14, int) /* get size (# sectors) in raid device */ -#define RAIDFRAME_GET_INFO _IOWR('r', 15, RF_DeviceConfig_t *) /* get configuration */ -#define RAIDFRAME_RESET_ACCTOTALS _IO ('r', 16) /* reset AccTotals for device */ -#define RAIDFRAME_GET_ACCTOTALS _IOR ('r', 17, RF_AccTotals_t) /* retrieve AccTotals for device */ -#define RAIDFRAME_KEEP_ACCTOTALS _IOW ('r', 18, int) /* turn AccTotals on or off for device */ - -#endif /* !_RF__RF_RAIDFRAME_H_ */ +#define RF_FDFLAGS_NONE 0x0 /* just fail the disk */ +#define RF_FDFLAGS_RECON 0x1 /* fail and initiate recon */ + +#define RF_SCSI_DISK_MAJOR 8 /* the device major number for disks in the + * system */ + +#define RAIDFRAME_CONFIGURE _IOW ('r', 1, void *) /* configure the driver */ +#define RAIDFRAME_SHUTDOWN _IO ('r', 2) /* shutdown the driver */ +#define RAIDFRAME_TUR _IOW ('r', 3, dev_t) /* debug only: test unit + * ready */ +#define RAIDFRAME_TEST_ACC _IOWR('r', 4, struct rf_test_acc) /* run a test access */ +#define RAIDFRAME_FAIL_DISK _IOW ('r', 5, struct rf_recon_req) /* fail a disk & + * optionally start + * recon */ +#define RAIDFRAME_CHECKRECON _IOWR('r', 6, int) /* get reconstruction % + * complete on indicated + * row */ +#define RAIDFRAME_REWRITEPARITY _IO ('r', 7) /* rewrite (initialize) + * all parity */ +#define RAIDFRAME_COPYBACK _IO ('r', 8) /* copy reconstructed + * data back to replaced + * disk */ +#define RAIDFRAME_SPARET_WAIT _IOR ('r', 9, RF_SparetWait_t) /* does not return until + * kernel needs a spare + * table */ +#define RAIDFRAME_SEND_SPARET _IOW ('r', 10, void *) /* used to send a spare + * table down into the + * kernel */ +#define RAIDFRAME_ABORT_SPARET_WAIT _IO ('r', 11) /* used to wake up the + * sparemap daemon & + * tell it to exit */ +#define RAIDFRAME_START_ATRACE _IO ('r', 12) /* start tracing + * accesses */ +#define RAIDFRAME_STOP_ATRACE _IO ('r', 13) /* stop tracing accesses */ +#define RAIDFRAME_GET_SIZE _IOR ('r', 14, int) /* get size (# sectors) + * in raid device */ +#define RAIDFRAME_GET_INFO _IOWR('r', 15, RF_DeviceConfig_t *) /* get configuration */ +#define RAIDFRAME_RESET_ACCTOTALS _IO ('r', 16) /* reset AccTotals for + * device */ +#define RAIDFRAME_GET_ACCTOTALS _IOR ('r', 17, RF_AccTotals_t) /* retrieve AccTotals + * for device */ +#define RAIDFRAME_KEEP_ACCTOTALS _IOW ('r', 18, int) /* turn AccTotals on or + * off for device */ + +#endif /* !_RF__RF_RAIDFRAME_H_ */ diff --git a/sys/dev/raidframe/rf_randmacros.h b/sys/dev/raidframe/rf_randmacros.h deleted file mode 100644 index c3536e0c613..00000000000 --- a/sys/dev/raidframe/rf_randmacros.h +++ /dev/null @@ -1,228 +0,0 @@ -/* $OpenBSD: rf_randmacros.h,v 1.1 1999/01/11 14:29:45 niklas Exp $ */ -/* $NetBSD: rf_randmacros.h,v 1.1 1998/11/13 04:20:33 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_randmacros.h - * some macros to simplify using random in a multithreaded environment - */ - -/* : - * Log: rf_randmacros.h,v - * Revision 1.17 1996/08/12 22:37:57 jimz - * use regular random() stuff for AIX - * - * Revision 1.16 1996/08/11 00:41:03 jimz - * fix up for aix4 - * - * Revision 1.15 1996/07/29 05:22:34 jimz - * use rand/srand on hpux - * - * Revision 1.14 1996/07/28 20:31:39 jimz - * i386netbsd port - * true/false fixup - * - * Revision 1.13 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.12 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.11 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.10 1996/07/15 17:22:18 jimz - * nit-pick code cleanup - * resolve stdlib problems on DEC OSF - * - * Revision 1.9 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.8 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.7 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.6 1996/05/21 18:52:56 jimz - * mask out highest bit from RANDOM (was causing angst) - * - * Revision 1.5 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.4 1995/12/06 15:05:41 root - * added copyright info - * - */ - -#ifndef _RF__RF_RANDMACROS_H_ -#define _RF__RF_RANDMACROS_H_ - -#ifndef KERNEL - -#ifdef __osf__ -/* - * Okay, here's the deal. The DEC man page for initstate_r() sez: - * - * int initstate_r(unsigned seed, char *state, int size, char **retval, - * struct random_data *rand_data); - * - * That wouldn't bug me so much, if /usr/include/random.h on the alpha - * didn't say: - * - * int initstate_r(unsigned, char *, int, RANDMOD *); - * - * Most of the other random functions have similar problems (docs - * don't match random.h). This is the case for random_r(), for - * instance. Generally, I'm inclined to trust the code over the - * documentation. Problem is, I have no clue what the arguments for - * the prototyped versions are, since they don't have descriptive names - * comma the bastards. - * - * Update: I looked at the DU sources to get this straightened out. - * The docs are correct. and everything in random.h is wrong. Uh, that's - * really cool or something. Not. I'm going to try slapping in prototypes - * that match my view of the universe, here. - * - * Okay, now let's have some more fun. /usr/include/stdlib.h also defines - * all this stuff, only differently. I mean differently from random.h, - * _and_ differently from the source. How cool is _that_? - * - * --jimz - */ -#ifndef _NO_PROTO -#define _NO_PROTO -#define _RF_SPANKME -#endif /* !_NO_PROTO */ -#include <random.h> -#ifdef _RF_SPANKME -#undef _NO_PROTO -#undef _RF_SPANKME -#endif /* _RF_SPANKME */ - -extern int initstate_r(unsigned seed, char *arg_state, int n, char **retval, - struct random_data *rand_data); -extern int random_r(int *retval, struct random_data *rand_data); -#endif /* __osf__ */ -#ifdef SIMULATE -#if defined(DEC_OSF) || defined(hpux) -extern int random(void); -extern int srandom(unsigned); -#endif /* DEC_OSF || hpux */ -#if defined(AIX) && RF_AIXVERS == 3 -extern int random(void); -extern int srandom(unsigned); -#endif /* AIX && RF_AIXVERS == 3 */ -#endif /* SIMULATE */ - -#define RF_FASTRANDOM 0 /* when >0 make RANDOM a macro instead of a function */ - -#ifdef __osf__ -long rf_do_random(long *rval, struct random_data *rdata); /* in utils.c */ -#endif /* __osf__ */ - -#ifndef SIMULATE - -#ifdef __osf__ -/* - * Mark's original comment about this rigamarole was, "What a pile of crap." - */ -#define RF_DECLARE_RANDOM \ - struct random_data randdata; \ - long randstate[64+1]; \ - char *stptr = ((char *) randstate)+4; \ - char *randst; \ - long randval - -#define RF_DECLARE_STATIC_RANDOM \ - static struct random_data randdata_st; \ - static long randstate_st[64+1]; \ - static char *stptr_st = ((char *) randstate_st)+4; \ - static char *randst_st; \ - long randval_st; - -#define RF_INIT_RANDOM(_s_) \ - randdata.state = NULL; \ - initstate_r((unsigned) (_s_), stptr, 64, &randst, &randdata); - -#define RF_INIT_STATIC_RANDOM(_s_) \ - randdata_st.state = NULL; \ - initstate_r((unsigned) (_s_), stptr_st, 64, &randst_st, &randdata_st); - -#if RF_FASTRANDOM > 0 -#define RF_RANDOM() (random_r(&randval, &randdata),randval) -#define RF_STATIC_RANDOM() (random_r(&randval_st, &randdata_st),randval_st) -#else /* RF_FASTRANDOM > 0 */ -#define RF_RANDOM() (rf_do_random(&randval, &randdata)&0x7fffffffffffffff) -#define RF_STATIC_RANDOM() rf_do_random(&randval_st, &randdata_st) -#endif /* RF_FASTRANDOM > 0 */ - -#define RF_SRANDOM(_s_) srandom_r((_s_), &randdata) -#define RF_STATIC_SRANDOM(_s_) srandom_r((_s_), &randdata_st) -#endif /* __osf__ */ - -#ifdef AIX -#define RF_INIT_STATIC_RANDOM(_s_) -#define RF_DECLARE_STATIC_RANDOM static int rf_rand_decl##__LINE__ -#define RF_DECLARE_RANDOM int rf_rand_decl##__LINE__ -#define RF_RANDOM() random() -#define RF_STATIC_RANDOM() random() -#define RF_INIT_RANDOM(_n_) srandom(_n_) -#endif /* AIX */ - -#else /* !SIMULATE */ - -#define RF_INIT_STATIC_RANDOM(_s_) -#define RF_DECLARE_STATIC_RANDOM static int rf_rand_decl##__LINE__ -#define RF_DECLARE_RANDOM int rf_rand_decl##__LINE__ -#if defined(sun) || defined(hpux) -#define RF_RANDOM() rand() -#define RF_STATIC_RANDOM() rand() -#define RF_INIT_RANDOM(_n_) srand(_n_) -#else /* sun || hpux */ -#define RF_RANDOM() random() -#define RF_STATIC_RANDOM() random() -#define RF_INIT_RANDOM(_n_) srandom(_n_) -#endif /* sun || hpux */ - -#endif /* !SIMULATE */ - -#endif /* !KERNEL */ - -#endif /* !_RF__RF_RANDMACROS_H_ */ diff --git a/sys/dev/raidframe/rf_reconbuffer.c b/sys/dev/raidframe/rf_reconbuffer.c index 2c24e47c111..011060ef6db 100644 --- a/sys/dev/raidframe/rf_reconbuffer.c +++ b/sys/dev/raidframe/rf_reconbuffer.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_reconbuffer.c,v 1.1 1999/01/11 14:29:45 niklas Exp $ */ -/* $NetBSD: rf_reconbuffer.c,v 1.1 1998/11/13 04:20:33 oster Exp $ */ +/* $OpenBSD: rf_reconbuffer.c,v 1.2 1999/02/16 00:03:20 niklas Exp $ */ +/* $NetBSD: rf_reconbuffer.c,v 1.3 1999/02/05 00:06:16 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -33,85 +33,6 @@ * ***************************************************/ -/* : - * Log: rf_reconbuffer.c,v - * Revision 1.33 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.32 1996/07/17 21:00:58 jimz - * clean up timer interface, tracing - * - * Revision 1.31 1996/07/13 00:00:59 jimz - * sanitized generalized reconstruction architecture - * cleaned up head sep, rbuf problems - * - * Revision 1.30 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.29 1996/06/06 01:23:58 jimz - * don't free reconCtrlPtr until after all fields have been used out of it - * - * Revision 1.28 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.27 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.26 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.25 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.24 1996/05/30 12:59:18 jimz - * make etimer happier, more portable - * - * Revision 1.23 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.22 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.21 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.20 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.19 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.18 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.17 1995/12/06 15:03:24 root - * added copyright info - * - */ - -#ifdef _KERNEL -#define KERNEL -#endif - #include "rf_raid.h" #include "rf_reconbuffer.h" #include "rf_acctrace.h" @@ -122,33 +43,11 @@ #include "rf_reconutil.h" #include "rf_nwayxor.h" -#ifdef KERNEL #define Dprintf1(s,a) if (rf_reconbufferDebug) printf(s,a) #define Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s,a,b) #define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s,a,b,c) #define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s,a,b,c,d) #define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s,a,b,c,d,e) -#else /* KERNEL */ -#define Dprintf1(s,a) if (rf_reconbufferDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf2(s,a,b) if (rf_reconbufferDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL) -#define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),NULL,NULL,NULL,NULL) -#define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),NULL,NULL,NULL) -#endif /* KERNEL */ - -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) - -/* XXX XXX XXX This is wrong, for a number of reasons: - a) thread_block doesn't exist with UVM - b) The prototype begin used here is wrong for the regular VM - (regular VM expects a (char *) as an argument. I don't put - that in here as this code uses thread_block with no arguments.. :-/ - -*/ -#if 0 -void thread_block(void); -#endif -#endif /***************************************************************************************** * @@ -174,246 +73,271 @@ void thread_block(void); * bufs into the accumulating sum. */ static RF_VoidFuncPtr nWayXorFuncs[] = { - NULL, - (RF_VoidFuncPtr)rf_nWayXor1, - (RF_VoidFuncPtr)rf_nWayXor2, - (RF_VoidFuncPtr)rf_nWayXor3, - (RF_VoidFuncPtr)rf_nWayXor4, - (RF_VoidFuncPtr)rf_nWayXor5, - (RF_VoidFuncPtr)rf_nWayXor6, - (RF_VoidFuncPtr)rf_nWayXor7, - (RF_VoidFuncPtr)rf_nWayXor8, - (RF_VoidFuncPtr)rf_nWayXor9 + NULL, + (RF_VoidFuncPtr) rf_nWayXor1, + (RF_VoidFuncPtr) rf_nWayXor2, + (RF_VoidFuncPtr) rf_nWayXor3, + (RF_VoidFuncPtr) rf_nWayXor4, + (RF_VoidFuncPtr) rf_nWayXor5, + (RF_VoidFuncPtr) rf_nWayXor6, + (RF_VoidFuncPtr) rf_nWayXor7, + (RF_VoidFuncPtr) rf_nWayXor8, + (RF_VoidFuncPtr) rf_nWayXor9 }; - -int rf_SubmitReconBuffer(rbuf, keep_it, use_committed) - RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */ - int keep_it; /* whether we can keep this buffer or we have to return it */ - int use_committed; /* whether to use a committed or an available recon buffer */ + +int +rf_SubmitReconBuffer(rbuf, keep_it, use_committed) + RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */ + int keep_it; /* whether we can keep this buffer or we have + * to return it */ + int use_committed; /* whether to use a committed or an available + * recon buffer */ { - RF_LayoutSW_t *lp; - int rc; + RF_LayoutSW_t *lp; + int rc; - lp = rbuf->raidPtr->Layout.map; - rc = lp->SubmitReconBuffer(rbuf, keep_it, use_committed); - return(rc); + lp = rbuf->raidPtr->Layout.map; + rc = lp->SubmitReconBuffer(rbuf, keep_it, use_committed); + return (rc); } -int rf_SubmitReconBufferBasic(rbuf, keep_it, use_committed) - RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */ - int keep_it; /* whether we can keep this buffer or we have to return it */ - int use_committed; /* whether to use a committed or an available recon buffer */ +int +rf_SubmitReconBufferBasic(rbuf, keep_it, use_committed) + RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */ + int keep_it; /* whether we can keep this buffer or we have + * to return it */ + int use_committed; /* whether to use a committed or an available + * recon buffer */ { - RF_Raid_t *raidPtr = rbuf->raidPtr; - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[rbuf->row]; - RF_ReconParityStripeStatus_t *pssPtr; - RF_ReconBuffer_t *targetRbuf, *t = NULL; /* temporary rbuf pointers */ - caddr_t ta; /* temporary data buffer pointer */ - RF_CallbackDesc_t *cb, *p; - int retcode = 0, created = 0; - - RF_Etimer_t timer; - - /* makes no sense to have a submission from the failed disk */ - RF_ASSERT(rbuf); - RF_ASSERT(rbuf->col != reconCtrlPtr->fcol); - - Dprintf5("RECON: submission by row %d col %d for psid %ld ru %d (failed offset %ld)\n", - rbuf->row, rbuf->col, (long)rbuf->parityStripeID, rbuf->which_ru, (long)rbuf->failedDiskSectorOffset); - - RF_LOCK_PSS_MUTEX(raidPtr,rbuf->row,rbuf->parityStripeID); - - RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); - - pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created); - RF_ASSERT(pssPtr); /* if it didn't exist, we wouldn't have gotten an rbuf for it */ - - /* check to see if enough buffers have accumulated to do an XOR. If so, there's no need to - * acquire a floating rbuf. Before we can do any XORing, we must have acquired a destination - * buffer. If we have, then we can go ahead and do the XOR if (1) including this buffer, enough - * bufs have accumulated, or (2) this is the last submission for this stripe. - * Otherwise, we have to go acquire a floating rbuf. - */ - - targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; - if ( (targetRbuf != NULL) && - ((pssPtr->xorBufCount == rf_numBufsToAccumulate-1) || (targetRbuf->count + pssPtr->xorBufCount + 1 == layoutPtr->numDataCol)) ) { - pssPtr->rbufsForXor[ pssPtr->xorBufCount++ ] = rbuf; /* install this buffer */ - Dprintf3("RECON: row %d col %d invoking a %d-way XOR\n",rbuf->row, rbuf->col,pssPtr->xorBufCount); - RF_ETIMER_START(timer); - rf_MultiWayReconXor(raidPtr, pssPtr); - RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); - raidPtr->accumXorTimeUs += RF_ETIMER_VAL_US(timer); - if (!keep_it) { - raidPtr->recon_tracerecs[rbuf->col].xor_us = RF_ETIMER_VAL_US(timer); - RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us += - RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - - rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]); - } - rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol); - - /* if use_committed is on, we _must_ consume a buffer off the committed list. */ - if (use_committed) { - t = reconCtrlPtr->committedRbufs; - RF_ASSERT(t); - reconCtrlPtr->committedRbufs = t->next; - rf_ReleaseFloatingReconBuffer(raidPtr, rbuf->row, t); - } - if (keep_it) { - RF_UNLOCK_PSS_MUTEX( raidPtr,rbuf->row,rbuf->parityStripeID); - RF_UNLOCK_MUTEX( reconCtrlPtr->rb_mutex ); - rf_FreeReconBuffer(rbuf); - return(retcode); - } - goto out; - } - - /* set the value of "t", which we'll use as the rbuf from here on */ - if (keep_it) { - t = rbuf; - } - else { - if (use_committed) { /* if a buffer has been committed to us, use it */ - t = reconCtrlPtr->committedRbufs; - RF_ASSERT(t); - reconCtrlPtr->committedRbufs = t->next; - t->next = NULL; - } else if (reconCtrlPtr->floatingRbufs) { - t = reconCtrlPtr->floatingRbufs; - reconCtrlPtr->floatingRbufs = t->next; - t->next = NULL; - } - } - - /* If we weren't able to acquire a buffer, - * append to the end of the buf list in the recon ctrl struct. - */ - if (!t) { - RF_ASSERT(!keep_it && !use_committed); - Dprintf2("RECON: row %d col %d failed to acquire floating rbuf\n",rbuf->row, rbuf->col); - - raidPtr->procsInBufWait++; - if ( (raidPtr->procsInBufWait == raidPtr->numCol -1) && (raidPtr->numFullReconBuffers == 0)) { - printf("Buffer wait deadlock detected. Exiting.\n"); - rf_PrintPSStatusTable(raidPtr, rbuf->row); - RF_PANIC(); - } - pssPtr->flags |= RF_PSS_BUFFERWAIT; - cb = rf_AllocCallbackDesc(); /* append to buf wait list in recon ctrl structure */ - cb->row = rbuf->row; cb->col = rbuf->col; - cb->callbackArg.v = rbuf->parityStripeID; - cb->callbackArg2.v = rbuf->which_ru; - cb->next = NULL; - if (!reconCtrlPtr->bufferWaitList) reconCtrlPtr->bufferWaitList = cb; - else { /* might want to maintain head/tail pointers here rather than search for end of list */ - for (p = reconCtrlPtr->bufferWaitList; p->next; p=p->next); - p->next = cb; - } - retcode = 1; - goto out; - } - Dprintf2("RECON: row %d col %d acquired rbuf\n",rbuf->row, rbuf->col); - RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us += - RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - - rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]); - - /* initialize the buffer */ - if (t!=rbuf) { - t->row = rbuf->row; t->col = reconCtrlPtr->fcol; - t->parityStripeID = rbuf->parityStripeID; - t->which_ru = rbuf->which_ru; - t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset; - t->spRow=rbuf->spRow; - t->spCol=rbuf->spCol; - t->spOffset=rbuf->spOffset; - - ta = t->buffer; t->buffer = rbuf->buffer; rbuf->buffer = ta; /* swap buffers */ - } - - /* the first installation always gets installed as the destination buffer. - * subsequent installations get stacked up to allow for multi-way XOR - */ - if (!pssPtr->rbuf) {pssPtr->rbuf = t; t->count = 1;} - else pssPtr->rbufsForXor[ pssPtr->xorBufCount++ ] = t; /* install this buffer */ - - rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol); /* the buffer is full if G=2 */ - -out: - RF_UNLOCK_PSS_MUTEX( raidPtr,rbuf->row,rbuf->parityStripeID); - RF_UNLOCK_MUTEX( reconCtrlPtr->rb_mutex ); - return(retcode); + RF_Raid_t *raidPtr = rbuf->raidPtr; + RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; + RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[rbuf->row]; + RF_ReconParityStripeStatus_t *pssPtr; + RF_ReconBuffer_t *targetRbuf, *t = NULL; /* temporary rbuf + * pointers */ + caddr_t ta; /* temporary data buffer pointer */ + RF_CallbackDesc_t *cb, *p; + int retcode = 0, created = 0; + + RF_Etimer_t timer; + + /* makes no sense to have a submission from the failed disk */ + RF_ASSERT(rbuf); + RF_ASSERT(rbuf->col != reconCtrlPtr->fcol); + + Dprintf5("RECON: submission by row %d col %d for psid %ld ru %d (failed offset %ld)\n", + rbuf->row, rbuf->col, (long) rbuf->parityStripeID, rbuf->which_ru, (long) rbuf->failedDiskSectorOffset); + + RF_LOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); + + RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); + + pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created); + RF_ASSERT(pssPtr); /* if it didn't exist, we wouldn't have gotten + * an rbuf for it */ + + /* check to see if enough buffers have accumulated to do an XOR. If + * so, there's no need to acquire a floating rbuf. Before we can do + * any XORing, we must have acquired a destination buffer. If we + * have, then we can go ahead and do the XOR if (1) including this + * buffer, enough bufs have accumulated, or (2) this is the last + * submission for this stripe. Otherwise, we have to go acquire a + * floating rbuf. */ + + targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; + if ((targetRbuf != NULL) && + ((pssPtr->xorBufCount == rf_numBufsToAccumulate - 1) || (targetRbuf->count + pssPtr->xorBufCount + 1 == layoutPtr->numDataCol))) { + pssPtr->rbufsForXor[pssPtr->xorBufCount++] = rbuf; /* install this buffer */ + Dprintf3("RECON: row %d col %d invoking a %d-way XOR\n", rbuf->row, rbuf->col, pssPtr->xorBufCount); + RF_ETIMER_START(timer); + rf_MultiWayReconXor(raidPtr, pssPtr); + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + raidPtr->accumXorTimeUs += RF_ETIMER_VAL_US(timer); + if (!keep_it) { + raidPtr->recon_tracerecs[rbuf->col].xor_us = RF_ETIMER_VAL_US(timer); + RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer); + RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer); + raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us += + RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer); + RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer); + + rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]); + } + rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol); + + /* if use_committed is on, we _must_ consume a buffer off the + * committed list. */ + if (use_committed) { + t = reconCtrlPtr->committedRbufs; + RF_ASSERT(t); + reconCtrlPtr->committedRbufs = t->next; + rf_ReleaseFloatingReconBuffer(raidPtr, rbuf->row, t); + } + if (keep_it) { + RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); + RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); + rf_FreeReconBuffer(rbuf); + return (retcode); + } + goto out; + } + /* set the value of "t", which we'll use as the rbuf from here on */ + if (keep_it) { + t = rbuf; + } else { + if (use_committed) { /* if a buffer has been committed to + * us, use it */ + t = reconCtrlPtr->committedRbufs; + RF_ASSERT(t); + reconCtrlPtr->committedRbufs = t->next; + t->next = NULL; + } else + if (reconCtrlPtr->floatingRbufs) { + t = reconCtrlPtr->floatingRbufs; + reconCtrlPtr->floatingRbufs = t->next; + t->next = NULL; + } + } + + /* If we weren't able to acquire a buffer, append to the end of the + * buf list in the recon ctrl struct. */ + if (!t) { + RF_ASSERT(!keep_it && !use_committed); + Dprintf2("RECON: row %d col %d failed to acquire floating rbuf\n", rbuf->row, rbuf->col); + + raidPtr->procsInBufWait++; + if ((raidPtr->procsInBufWait == raidPtr->numCol - 1) && (raidPtr->numFullReconBuffers == 0)) { + printf("Buffer wait deadlock detected. Exiting.\n"); + rf_PrintPSStatusTable(raidPtr, rbuf->row); + RF_PANIC(); + } + pssPtr->flags |= RF_PSS_BUFFERWAIT; + cb = rf_AllocCallbackDesc(); /* append to buf wait list in + * recon ctrl structure */ + cb->row = rbuf->row; + cb->col = rbuf->col; + cb->callbackArg.v = rbuf->parityStripeID; + cb->callbackArg2.v = rbuf->which_ru; + cb->next = NULL; + if (!reconCtrlPtr->bufferWaitList) + reconCtrlPtr->bufferWaitList = cb; + else { /* might want to maintain head/tail pointers + * here rather than search for end of list */ + for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next); + p->next = cb; + } + retcode = 1; + goto out; + } + Dprintf2("RECON: row %d col %d acquired rbuf\n", rbuf->row, rbuf->col); + RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer); + RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer); + raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us += + RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer); + RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer); + + rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]); + + /* initialize the buffer */ + if (t != rbuf) { + t->row = rbuf->row; + t->col = reconCtrlPtr->fcol; + t->parityStripeID = rbuf->parityStripeID; + t->which_ru = rbuf->which_ru; + t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset; + t->spRow = rbuf->spRow; + t->spCol = rbuf->spCol; + t->spOffset = rbuf->spOffset; + + ta = t->buffer; + t->buffer = rbuf->buffer; + rbuf->buffer = ta; /* swap buffers */ + } + /* the first installation always gets installed as the destination + * buffer. subsequent installations get stacked up to allow for + * multi-way XOR */ + if (!pssPtr->rbuf) { + pssPtr->rbuf = t; + t->count = 1; + } else + pssPtr->rbufsForXor[pssPtr->xorBufCount++] = t; /* install this buffer */ + + rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol); /* the buffer is full if + * G=2 */ + +out: + RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); + RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); + return (retcode); } -int rf_MultiWayReconXor(raidPtr, pssPtr) - RF_Raid_t *raidPtr; - RF_ReconParityStripeStatus_t *pssPtr; /* the pss descriptor for this parity stripe */ +int +rf_MultiWayReconXor(raidPtr, pssPtr) + RF_Raid_t *raidPtr; + RF_ReconParityStripeStatus_t *pssPtr; /* the pss descriptor for this + * parity stripe */ { - int i, numBufs = pssPtr->xorBufCount; - int numBytes = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU); - RF_ReconBuffer_t **rbufs = (RF_ReconBuffer_t **) pssPtr->rbufsForXor; - RF_ReconBuffer_t *targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; - - RF_ASSERT(pssPtr->rbuf != NULL); - RF_ASSERT(numBufs > 0 && numBufs < RF_PS_MAX_BUFS); -#ifdef KERNEL + int i, numBufs = pssPtr->xorBufCount; + int numBytes = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU); + RF_ReconBuffer_t **rbufs = (RF_ReconBuffer_t **) pssPtr->rbufsForXor; + RF_ReconBuffer_t *targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; + + RF_ASSERT(pssPtr->rbuf != NULL); + RF_ASSERT(numBufs > 0 && numBufs < RF_PS_MAX_BUFS); +#ifdef _KERNEL #if !defined(__NetBSD__) && !defined(__OpenBSD__) - thread_block(); /* yield the processor before doing a big XOR */ + thread_block(); /* yield the processor before doing a big XOR */ #endif -#endif /* KERNEL */ - /* - * XXX - * - * What if more than 9 bufs? - */ - nWayXorFuncs[numBufs](pssPtr->rbufsForXor, targetRbuf, numBytes/sizeof(long)); - - /* release all the reconstruction buffers except the last one, which belongs to the - * the disk who's submission caused this XOR to take place - */ - for (i=0; i < numBufs-1; i++) { - if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING) rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]->row, rbufs[i]); - else if (rbufs[i]->type == RF_RBUF_TYPE_FORCED) rf_FreeReconBuffer(rbufs[i]); - else RF_ASSERT(0); - } - targetRbuf->count += pssPtr->xorBufCount; - pssPtr->xorBufCount = 0; - return(0); +#endif /* _KERNEL */ + /* + * XXX + * + * What if more than 9 bufs? + */ + nWayXorFuncs[numBufs] (pssPtr->rbufsForXor, targetRbuf, numBytes / sizeof(long)); + + /* release all the reconstruction buffers except the last one, which + * belongs to the the disk who's submission caused this XOR to take + * place */ + for (i = 0; i < numBufs - 1; i++) { + if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING) + rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]->row, rbufs[i]); + else + if (rbufs[i]->type == RF_RBUF_TYPE_FORCED) + rf_FreeReconBuffer(rbufs[i]); + else + RF_ASSERT(0); + } + targetRbuf->count += pssPtr->xorBufCount; + pssPtr->xorBufCount = 0; + return (0); } - /* removes one full buffer from one of the full-buffer lists and returns it. * * ASSUMES THE RB_MUTEX IS UNLOCKED AT ENTRY. */ -RF_ReconBuffer_t *rf_GetFullReconBuffer(reconCtrlPtr) - RF_ReconCtrl_t *reconCtrlPtr; +RF_ReconBuffer_t * +rf_GetFullReconBuffer(reconCtrlPtr) + RF_ReconCtrl_t *reconCtrlPtr; { - RF_ReconBuffer_t *p; - - RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); - - if ( (p=reconCtrlPtr->priorityList) != NULL) { - reconCtrlPtr->priorityList = p->next; - p->next = NULL; - goto out; - } - if ( (p=reconCtrlPtr->fullBufferList) != NULL) { - reconCtrlPtr->fullBufferList = p->next; - p->next = NULL; - goto out; - } - + RF_ReconBuffer_t *p; + + RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); + + if ((p = reconCtrlPtr->priorityList) != NULL) { + reconCtrlPtr->priorityList = p->next; + p->next = NULL; + goto out; + } + if ((p = reconCtrlPtr->fullBufferList) != NULL) { + reconCtrlPtr->fullBufferList = p->next; + p->next = NULL; + goto out; + } out: - RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); - return(p); + RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); + return (p); } @@ -422,96 +346,100 @@ out: * * ASSUMES THE RB_MUTEX IS LOCKED AT ENTRY. */ -int rf_CheckForFullRbuf(raidPtr, reconCtrl, pssPtr, numDataCol) - RF_Raid_t *raidPtr; - RF_ReconCtrl_t *reconCtrl; - RF_ReconParityStripeStatus_t *pssPtr; - int numDataCol; +int +rf_CheckForFullRbuf(raidPtr, reconCtrl, pssPtr, numDataCol) + RF_Raid_t *raidPtr; + RF_ReconCtrl_t *reconCtrl; + RF_ReconParityStripeStatus_t *pssPtr; + int numDataCol; { - RF_ReconBuffer_t *p, *pt, *rbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; - - if (rbuf->count == numDataCol) { - raidPtr->numFullReconBuffers++; - Dprintf2("RECON: rbuf for psid %ld ru %d has filled\n", - (long)rbuf->parityStripeID, rbuf->which_ru); - if (!reconCtrl->fullBufferList || (rbuf->failedDiskSectorOffset < reconCtrl->fullBufferList->failedDiskSectorOffset)) { - Dprintf2("RECON: rbuf for psid %ld ru %d is head of list\n", - (long)rbuf->parityStripeID, rbuf->which_ru); - rbuf->next = reconCtrl->fullBufferList; - reconCtrl->fullBufferList = rbuf; - } - else { - for (pt = reconCtrl->fullBufferList, p = pt->next; p && p->failedDiskSectorOffset < rbuf->failedDiskSectorOffset; pt=p, p=p->next); - rbuf->next = p; - pt->next = rbuf; - Dprintf2("RECON: rbuf for psid %ld ru %d is in list\n", - (long)rbuf->parityStripeID, rbuf->which_ru); - } + RF_ReconBuffer_t *p, *pt, *rbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; + + if (rbuf->count == numDataCol) { + raidPtr->numFullReconBuffers++; + Dprintf2("RECON: rbuf for psid %ld ru %d has filled\n", + (long) rbuf->parityStripeID, rbuf->which_ru); + if (!reconCtrl->fullBufferList || (rbuf->failedDiskSectorOffset < reconCtrl->fullBufferList->failedDiskSectorOffset)) { + Dprintf2("RECON: rbuf for psid %ld ru %d is head of list\n", + (long) rbuf->parityStripeID, rbuf->which_ru); + rbuf->next = reconCtrl->fullBufferList; + reconCtrl->fullBufferList = rbuf; + } else { + for (pt = reconCtrl->fullBufferList, p = pt->next; p && p->failedDiskSectorOffset < rbuf->failedDiskSectorOffset; pt = p, p = p->next); + rbuf->next = p; + pt->next = rbuf; + Dprintf2("RECON: rbuf for psid %ld ru %d is in list\n", + (long) rbuf->parityStripeID, rbuf->which_ru); + } #if 0 - pssPtr->writeRbuf = pssPtr->rbuf; /* DEBUG ONLY: we like to be able to find this rbuf while it's awaiting write */ + pssPtr->writeRbuf = pssPtr->rbuf; /* DEBUG ONLY: we like + * to be able to find + * this rbuf while it's + * awaiting write */ #else - rbuf->pssPtr = pssPtr; + rbuf->pssPtr = pssPtr; #endif - pssPtr->rbuf = NULL; - rf_CauseReconEvent(raidPtr, rbuf->row, rbuf->col, NULL, RF_REVENT_BUFREADY); - } - return(0); + pssPtr->rbuf = NULL; + rf_CauseReconEvent(raidPtr, rbuf->row, rbuf->col, NULL, RF_REVENT_BUFREADY); + } + return (0); } /* release a floating recon buffer for someone else to use. * assumes the rb_mutex is LOCKED at entry */ -void rf_ReleaseFloatingReconBuffer(raidPtr, row, rbuf) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_ReconBuffer_t *rbuf; +void +rf_ReleaseFloatingReconBuffer(raidPtr, row, rbuf) + RF_Raid_t *raidPtr; + RF_RowCol_t row; + RF_ReconBuffer_t *rbuf; { - RF_ReconCtrl_t *rcPtr = raidPtr->reconControl[row]; - RF_CallbackDesc_t *cb; - - Dprintf2("RECON: releasing rbuf for psid %ld ru %d\n", - (long)rbuf->parityStripeID, rbuf->which_ru); - - /* if anyone is waiting on buffers, wake one of them up. They will subsequently wake up anyone - * else waiting on their RU - */ - if (rcPtr->bufferWaitList) { - rbuf->next = rcPtr->committedRbufs; - rcPtr->committedRbufs = rbuf; - cb = rcPtr->bufferWaitList; - rcPtr->bufferWaitList = cb->next; - rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 1, RF_REVENT_BUFCLEAR); /* arg==1 => we've committed a buffer */ - rf_FreeCallbackDesc(cb); - raidPtr->procsInBufWait--; - } else { - rbuf->next = rcPtr->floatingRbufs; - rcPtr->floatingRbufs = rbuf; - } + RF_ReconCtrl_t *rcPtr = raidPtr->reconControl[row]; + RF_CallbackDesc_t *cb; + + Dprintf2("RECON: releasing rbuf for psid %ld ru %d\n", + (long) rbuf->parityStripeID, rbuf->which_ru); + + /* if anyone is waiting on buffers, wake one of them up. They will + * subsequently wake up anyone else waiting on their RU */ + if (rcPtr->bufferWaitList) { + rbuf->next = rcPtr->committedRbufs; + rcPtr->committedRbufs = rbuf; + cb = rcPtr->bufferWaitList; + rcPtr->bufferWaitList = cb->next; + rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 1, RF_REVENT_BUFCLEAR); /* arg==1 => we've + * committed a buffer */ + rf_FreeCallbackDesc(cb); + raidPtr->procsInBufWait--; + } else { + rbuf->next = rcPtr->floatingRbufs; + rcPtr->floatingRbufs = rbuf; + } } - /* release any disk that is waiting on a buffer for the indicated RU. * assumes the rb_mutex is LOCKED at entry */ -void rf_ReleaseBufferWaiters(raidPtr, pssPtr) - RF_Raid_t *raidPtr; - RF_ReconParityStripeStatus_t *pssPtr; +void +rf_ReleaseBufferWaiters(raidPtr, pssPtr) + RF_Raid_t *raidPtr; + RF_ReconParityStripeStatus_t *pssPtr; { - RF_CallbackDesc_t *cb1, *cb = pssPtr->bufWaitList; - - Dprintf2("RECON: releasing buf waiters for psid %ld ru %d\n", - (long)pssPtr->parityStripeID, pssPtr->which_ru); - pssPtr->flags &= ~RF_PSS_BUFFERWAIT; - while (cb) { - cb1 = cb->next; - cb->next = NULL; - rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFCLEAR); /* arg==0 => we haven't committed a buffer */ - rf_FreeCallbackDesc(cb); - cb = cb1; - } - pssPtr->bufWaitList = NULL; + RF_CallbackDesc_t *cb1, *cb = pssPtr->bufWaitList; + + Dprintf2("RECON: releasing buf waiters for psid %ld ru %d\n", + (long) pssPtr->parityStripeID, pssPtr->which_ru); + pssPtr->flags &= ~RF_PSS_BUFFERWAIT; + while (cb) { + cb1 = cb->next; + cb->next = NULL; + rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFCLEAR); /* arg==0 => we haven't + * committed a buffer */ + rf_FreeCallbackDesc(cb); + cb = cb1; + } + pssPtr->bufWaitList = NULL; } - /* when reconstruction is forced on an RU, there may be some disks waiting to * acquire a buffer for that RU. Since we allocate a new buffer as part of * the forced-reconstruction process, we no longer have to wait for any @@ -519,20 +447,24 @@ void rf_ReleaseBufferWaiters(raidPtr, pssPtr) * * assumes the rb_mutex is LOCKED at entry */ -void rf_ReleaseBufferWaiter(rcPtr, rbuf) - RF_ReconCtrl_t *rcPtr; - RF_ReconBuffer_t *rbuf; +void +rf_ReleaseBufferWaiter(rcPtr, rbuf) + RF_ReconCtrl_t *rcPtr; + RF_ReconBuffer_t *rbuf; { - RF_CallbackDesc_t *cb, *cbt; - - for (cbt = NULL, cb = rcPtr->bufferWaitList; cb; cbt = cb, cb=cb->next) { - if ( (cb->callbackArg.v == rbuf->parityStripeID) && ( cb->callbackArg2.v == rbuf->which_ru)) { - Dprintf2("RECON: Dropping row %d col %d from buffer wait list\n", cb->row, cb->col); - if (cbt) cbt->next = cb->next; - else rcPtr->bufferWaitList = cb->next; - rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFREADY); /* arg==0 => no committed buffer */ - rf_FreeCallbackDesc(cb); - return; - } - } + RF_CallbackDesc_t *cb, *cbt; + + for (cbt = NULL, cb = rcPtr->bufferWaitList; cb; cbt = cb, cb = cb->next) { + if ((cb->callbackArg.v == rbuf->parityStripeID) && (cb->callbackArg2.v == rbuf->which_ru)) { + Dprintf2("RECON: Dropping row %d col %d from buffer wait list\n", cb->row, cb->col); + if (cbt) + cbt->next = cb->next; + else + rcPtr->bufferWaitList = cb->next; + rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFREADY); /* arg==0 => no + * committed buffer */ + rf_FreeCallbackDesc(cb); + return; + } + } } diff --git a/sys/dev/raidframe/rf_reconbuffer.h b/sys/dev/raidframe/rf_reconbuffer.h index 61ec9c1c4ff..84921fc5393 100644 --- a/sys/dev/raidframe/rf_reconbuffer.h +++ b/sys/dev/raidframe/rf_reconbuffer.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_reconbuffer.h,v 1.1 1999/01/11 14:29:45 niklas Exp $ */ -/* $NetBSD: rf_reconbuffer.h,v 1.1 1998/11/13 04:20:33 oster Exp $ */ +/* $OpenBSD: rf_reconbuffer.h,v 1.2 1999/02/16 00:03:21 niklas Exp $ */ +/* $NetBSD: rf_reconbuffer.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -33,66 +33,31 @@ * *******************************************************************/ -/* : - * Log: rf_reconbuffer.h,v - * Revision 1.9 1996/07/13 00:00:59 jimz - * sanitized generalized reconstruction architecture - * cleaned up head sep, rbuf problems - * - * Revision 1.8 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.7 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.6 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.5 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.4 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.3 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.2 1995/12/06 15:04:47 root - * added copyright info - * - */ - #ifndef _RF__RF_RECONBUFFER_H_ #define _RF__RF_RECONBUFFER_H_ #include "rf_types.h" #include "rf_reconstruct.h" -int rf_SubmitReconBuffer(RF_ReconBuffer_t *rbuf, int keep_int, - int use_committed); -int rf_SubmitReconBufferBasic(RF_ReconBuffer_t *rbuf, int keep_int, - int use_committed); -int rf_MultiWayReconXor(RF_Raid_t *raidPtr, - RF_ReconParityStripeStatus_t *pssPtr); -RF_ReconBuffer_t *rf_GetFullReconBuffer(RF_ReconCtrl_t *reconCtrlPtr); -int rf_CheckForFullRbuf(RF_Raid_t *raidPtr, RF_ReconCtrl_t *reconCtrl, - RF_ReconParityStripeStatus_t *pssPtr, int numDataCol); -void rf_ReleaseFloatingReconBuffer(RF_Raid_t *raidPtr, RF_RowCol_t row, - RF_ReconBuffer_t *rbuf); -void rf_ReleaseBufferWaiters(RF_Raid_t *raidPtr, - RF_ReconParityStripeStatus_t *pssPtr); -void rf_ReleaseBufferWaiter(RF_ReconCtrl_t *rcPtr, RF_ReconBuffer_t *rbuf); +int +rf_SubmitReconBuffer(RF_ReconBuffer_t * rbuf, int keep_int, + int use_committed); +int +rf_SubmitReconBufferBasic(RF_ReconBuffer_t * rbuf, int keep_int, + int use_committed); +int +rf_MultiWayReconXor(RF_Raid_t * raidPtr, + RF_ReconParityStripeStatus_t * pssPtr); +RF_ReconBuffer_t *rf_GetFullReconBuffer(RF_ReconCtrl_t * reconCtrlPtr); +int +rf_CheckForFullRbuf(RF_Raid_t * raidPtr, RF_ReconCtrl_t * reconCtrl, + RF_ReconParityStripeStatus_t * pssPtr, int numDataCol); +void +rf_ReleaseFloatingReconBuffer(RF_Raid_t * raidPtr, RF_RowCol_t row, + RF_ReconBuffer_t * rbuf); +void +rf_ReleaseBufferWaiters(RF_Raid_t * raidPtr, + RF_ReconParityStripeStatus_t * pssPtr); +void rf_ReleaseBufferWaiter(RF_ReconCtrl_t * rcPtr, RF_ReconBuffer_t * rbuf); -#endif /* !_RF__RF_RECONBUFFER_H_ */ +#endif /* !_RF__RF_RECONBUFFER_H_ */ diff --git a/sys/dev/raidframe/rf_reconmap.c b/sys/dev/raidframe/rf_reconmap.c index 565a4ca616c..496c8f22d73 100644 --- a/sys/dev/raidframe/rf_reconmap.c +++ b/sys/dev/raidframe/rf_reconmap.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_reconmap.c,v 1.1 1999/01/11 14:29:46 niklas Exp $ */ -/* $NetBSD: rf_reconmap.c,v 1.1 1998/11/13 04:20:33 oster Exp $ */ +/* $OpenBSD: rf_reconmap.c,v 1.2 1999/02/16 00:03:21 niklas Exp $ */ +/* $NetBSD: rf_reconmap.c,v 1.4 1999/02/05 00:06:16 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -34,84 +34,10 @@ * *************************************************************************/ -/* : - * Log: rf_reconmap.c,v - * Revision 1.23 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.22 1996/07/15 17:22:18 jimz - * nit-pick code cleanup - * resolve stdlib problems on DEC OSF - * - * Revision 1.21 1996/06/17 14:38:33 jimz - * properly #if out RF_DEMO code - * fix bug in MakeConfig that was causing weird behavior - * in configuration routines (config was not zeroed at start) - * clean up genplot handling of stacks - * - * Revision 1.20 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.19 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.18 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.17 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.16 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.15 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.14 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.13 1996/05/24 04:40:57 jimz - * don't do recon meter demo stuff in kernel - * - * Revision 1.12 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.11 1996/05/20 16:14:50 jimz - * switch to rf_{mutex,cond}_{init,destroy} - * - * Revision 1.10 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.9 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.8 1995/12/06 15:05:23 root - * added copyright info - * - */ - #include "rf_raid.h" #include <sys/time.h> #include "rf_general.h" #include "rf_utils.h" -#if RF_DEMO > 0 -#include "rf_demo.h" -#endif /* RF_DEMO > 0 */ #include "rf_sys.h" /* special pointer values indicating that a reconstruction unit @@ -128,58 +54,64 @@ #define RU_NIL ((RF_ReconMapListElem_t *) 0) -static void compact_stat_entry(RF_Raid_t *raidPtr, RF_ReconMap_t *mapPtr, - int i); -static void crunch_list(RF_ReconMap_t *mapPtr, RF_ReconMapListElem_t *listPtr); -static RF_ReconMapListElem_t *MakeReconMapListElem(RF_SectorNum_t startSector, - RF_SectorNum_t stopSector, RF_ReconMapListElem_t *next); -static void FreeReconMapListElem(RF_ReconMap_t *mapPtr, - RF_ReconMapListElem_t *p); -static void update_size(RF_ReconMap_t *mapPtr, int size); -static void PrintList(RF_ReconMapListElem_t *listPtr); +static void +compact_stat_entry(RF_Raid_t * raidPtr, RF_ReconMap_t * mapPtr, + int i); +static void crunch_list(RF_ReconMap_t * mapPtr, RF_ReconMapListElem_t * listPtr); +static RF_ReconMapListElem_t * +MakeReconMapListElem(RF_SectorNum_t startSector, + RF_SectorNum_t stopSector, RF_ReconMapListElem_t * next); +static void +FreeReconMapListElem(RF_ReconMap_t * mapPtr, + RF_ReconMapListElem_t * p); +static void update_size(RF_ReconMap_t * mapPtr, int size); +static void PrintList(RF_ReconMapListElem_t * listPtr); /*----------------------------------------------------------------------------- * - * Creates and initializes new Reconstruction map + * Creates and initializes new Reconstruction map * *-----------------------------------------------------------------------------*/ -RF_ReconMap_t *rf_MakeReconMap(raidPtr, ru_sectors, disk_sectors, spareUnitsPerDisk) - RF_Raid_t *raidPtr; - RF_SectorCount_t ru_sectors; /* size of reconstruction unit in sectors */ - RF_SectorCount_t disk_sectors; /* size of disk in sectors */ - RF_ReconUnitCount_t spareUnitsPerDisk; /* zero unless distributed sparing */ +RF_ReconMap_t * +rf_MakeReconMap(raidPtr, ru_sectors, disk_sectors, spareUnitsPerDisk) + RF_Raid_t *raidPtr; + RF_SectorCount_t ru_sectors; /* size of reconstruction unit in + * sectors */ + RF_SectorCount_t disk_sectors; /* size of disk in sectors */ + RF_ReconUnitCount_t spareUnitsPerDisk; /* zero unless distributed + * sparing */ { - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_ReconUnitCount_t num_rus = layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerRU; - RF_ReconMap_t *p; - int rc; - - RF_Malloc(p, sizeof(RF_ReconMap_t), (RF_ReconMap_t *)); - p->sectorsPerReconUnit = ru_sectors; - p->sectorsInDisk = disk_sectors; - - p->totalRUs = num_rus; - p->spareRUs = spareUnitsPerDisk; - p->unitsLeft = num_rus - spareUnitsPerDisk; - - RF_Malloc(p->status, num_rus * sizeof(RF_ReconMapListElem_t *), (RF_ReconMapListElem_t **)); - RF_ASSERT(p->status != (RF_ReconMapListElem_t **) NULL); - - (void) bzero((char *) p->status, num_rus * sizeof(RF_ReconMapListElem_t *)); - - p->size = sizeof(RF_ReconMap_t) + num_rus * sizeof(RF_ReconMapListElem_t *); - p->maxSize = p->size; - - rc = rf_mutex_init(&p->mutex); - if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - RF_Free(p->status, num_rus * sizeof(RF_ReconMapListElem_t *)); - RF_Free(p, sizeof(RF_ReconMap_t)); - return(NULL); - } - return(p); + RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; + RF_ReconUnitCount_t num_rus = layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerRU; + RF_ReconMap_t *p; + int rc; + + RF_Malloc(p, sizeof(RF_ReconMap_t), (RF_ReconMap_t *)); + p->sectorsPerReconUnit = ru_sectors; + p->sectorsInDisk = disk_sectors; + + p->totalRUs = num_rus; + p->spareRUs = spareUnitsPerDisk; + p->unitsLeft = num_rus - spareUnitsPerDisk; + + RF_Malloc(p->status, num_rus * sizeof(RF_ReconMapListElem_t *), (RF_ReconMapListElem_t **)); + RF_ASSERT(p->status != (RF_ReconMapListElem_t **) NULL); + + (void) bzero((char *) p->status, num_rus * sizeof(RF_ReconMapListElem_t *)); + + p->size = sizeof(RF_ReconMap_t) + num_rus * sizeof(RF_ReconMapListElem_t *); + p->maxSize = p->size; + + rc = rf_mutex_init(&p->mutex); + if (rc) { + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + RF_Free(p->status, num_rus * sizeof(RF_ReconMapListElem_t *)); + RF_Free(p, sizeof(RF_ReconMap_t)); + return (NULL); + } + return (p); } @@ -197,42 +129,45 @@ RF_ReconMap_t *rf_MakeReconMap(raidPtr, ru_sectors, disk_sectors, spareUnitsPerD * *-----------------------------------------------------------------------------*/ -void rf_ReconMapUpdate(raidPtr, mapPtr, startSector, stopSector) - RF_Raid_t *raidPtr; - RF_ReconMap_t *mapPtr; - RF_SectorNum_t startSector; - RF_SectorNum_t stopSector; +void +rf_ReconMapUpdate(raidPtr, mapPtr, startSector, stopSector) + RF_Raid_t *raidPtr; + RF_ReconMap_t *mapPtr; + RF_SectorNum_t startSector; + RF_SectorNum_t stopSector; { - RF_SectorCount_t sectorsPerReconUnit = mapPtr->sectorsPerReconUnit; - RF_SectorNum_t i, first_in_RU, last_in_RU; - RF_ReconMapListElem_t *p, *pt; - - RF_LOCK_MUTEX(mapPtr->mutex); - RF_ASSERT(startSector >=0 && stopSector < mapPtr->sectorsInDisk && stopSector > startSector); - - while (startSector <= stopSector) { - i = startSector/mapPtr->sectorsPerReconUnit; - first_in_RU = i*sectorsPerReconUnit; - last_in_RU = first_in_RU + sectorsPerReconUnit -1 ; - p = mapPtr->status[i]; - if (p!=RU_ALL) { - if (p==RU_NOTHING || p->startSector > startSector ) { /* insert at front of list */ - - mapPtr->status[i] = MakeReconMapListElem(startSector, RF_MIN(stopSector,last_in_RU), (p==RU_NOTHING) ? NULL : p); - update_size(mapPtr, sizeof(RF_ReconMapListElem_t)); - - } else { /* general case */ - do { /* search for place to insert */ - pt = p; p = p->next; - } while (p && (p->startSector < startSector)); - pt->next = MakeReconMapListElem(startSector,RF_MIN(stopSector,last_in_RU),p); - update_size(mapPtr, sizeof(RF_ReconMapListElem_t)); - } - compact_stat_entry(raidPtr, mapPtr, i); - } - startSector = RF_MIN(stopSector, last_in_RU) +1; - } - RF_UNLOCK_MUTEX(mapPtr->mutex); + RF_SectorCount_t sectorsPerReconUnit = mapPtr->sectorsPerReconUnit; + RF_SectorNum_t i, first_in_RU, last_in_RU; + RF_ReconMapListElem_t *p, *pt; + + RF_LOCK_MUTEX(mapPtr->mutex); + RF_ASSERT(startSector >= 0 && stopSector < mapPtr->sectorsInDisk && stopSector > startSector); + + while (startSector <= stopSector) { + i = startSector / mapPtr->sectorsPerReconUnit; + first_in_RU = i * sectorsPerReconUnit; + last_in_RU = first_in_RU + sectorsPerReconUnit - 1; + p = mapPtr->status[i]; + if (p != RU_ALL) { + if (p == RU_NOTHING || p->startSector > startSector) { /* insert at front of + * list */ + + mapPtr->status[i] = MakeReconMapListElem(startSector, RF_MIN(stopSector, last_in_RU), (p == RU_NOTHING) ? NULL : p); + update_size(mapPtr, sizeof(RF_ReconMapListElem_t)); + + } else {/* general case */ + do { /* search for place to insert */ + pt = p; + p = p->next; + } while (p && (p->startSector < startSector)); + pt->next = MakeReconMapListElem(startSector, RF_MIN(stopSector, last_in_RU), p); + update_size(mapPtr, sizeof(RF_ReconMapListElem_t)); + } + compact_stat_entry(raidPtr, mapPtr, i); + } + startSector = RF_MIN(stopSector, last_in_RU) + 1; + } + RF_UNLOCK_MUTEX(mapPtr->mutex); } @@ -253,207 +188,208 @@ void rf_ReconMapUpdate(raidPtr, mapPtr, startSector, stopSector) * *-----------------------------------------------------------------------------*/ -static void compact_stat_entry(raidPtr, mapPtr, i) - RF_Raid_t *raidPtr; - RF_ReconMap_t *mapPtr; - int i; +static void +compact_stat_entry(raidPtr, mapPtr, i) + RF_Raid_t *raidPtr; + RF_ReconMap_t *mapPtr; + int i; { - RF_SectorCount_t sectorsPerReconUnit = mapPtr->sectorsPerReconUnit; - RF_ReconMapListElem_t *p = mapPtr->status[i]; + RF_SectorCount_t sectorsPerReconUnit = mapPtr->sectorsPerReconUnit; + RF_ReconMapListElem_t *p = mapPtr->status[i]; - crunch_list(mapPtr, p); + crunch_list(mapPtr, p); - if ((p->startSector == i*sectorsPerReconUnit) && - (p->stopSector == i*sectorsPerReconUnit +sectorsPerReconUnit -1)) { - mapPtr->status[i] = RU_ALL; - mapPtr->unitsLeft--; - FreeReconMapListElem(mapPtr,p); - } + if ((p->startSector == i * sectorsPerReconUnit) && + (p->stopSector == i * sectorsPerReconUnit + sectorsPerReconUnit - 1)) { + mapPtr->status[i] = RU_ALL; + mapPtr->unitsLeft--; + FreeReconMapListElem(mapPtr, p); + } } -static void crunch_list(mapPtr, listPtr) - RF_ReconMap_t *mapPtr; - RF_ReconMapListElem_t *listPtr; +static void +crunch_list(mapPtr, listPtr) + RF_ReconMap_t *mapPtr; + RF_ReconMapListElem_t *listPtr; { - RF_ReconMapListElem_t *pt, *p = listPtr; - - if (!p) return; - pt = p; p = p->next; - while (p) { - if (pt->stopSector >= p->startSector-1) { - pt->stopSector = RF_MAX(pt->stopSector, p->stopSector); - pt->next = p->next; - FreeReconMapListElem(mapPtr, p); - p = pt->next; - } - else { - pt = p; - p = p->next; - } - } + RF_ReconMapListElem_t *pt, *p = listPtr; + + if (!p) + return; + pt = p; + p = p->next; + while (p) { + if (pt->stopSector >= p->startSector - 1) { + pt->stopSector = RF_MAX(pt->stopSector, p->stopSector); + pt->next = p->next; + FreeReconMapListElem(mapPtr, p); + p = pt->next; + } else { + pt = p; + p = p->next; + } + } } - /*----------------------------------------------------------------------------- - * + * * Allocate and fill a new list element * *-----------------------------------------------------------------------------*/ -static RF_ReconMapListElem_t *MakeReconMapListElem( - RF_SectorNum_t startSector, - RF_SectorNum_t stopSector, - RF_ReconMapListElem_t *next) +static RF_ReconMapListElem_t * +MakeReconMapListElem( + RF_SectorNum_t startSector, + RF_SectorNum_t stopSector, + RF_ReconMapListElem_t * next) { - RF_ReconMapListElem_t *p; - - RF_Malloc(p, sizeof(RF_ReconMapListElem_t), (RF_ReconMapListElem_t *)); - if (p == NULL) - return(NULL); - p->startSector = startSector; - p->stopSector = stopSector; - p->next = next; - return(p); + RF_ReconMapListElem_t *p; + + RF_Malloc(p, sizeof(RF_ReconMapListElem_t), (RF_ReconMapListElem_t *)); + if (p == NULL) + return (NULL); + p->startSector = startSector; + p->stopSector = stopSector; + p->next = next; + return (p); } - /*----------------------------------------------------------------------------- - * + * * Free a list element * *-----------------------------------------------------------------------------*/ -static void FreeReconMapListElem(mapPtr,p) - RF_ReconMap_t *mapPtr; - RF_ReconMapListElem_t *p; +static void +FreeReconMapListElem(mapPtr, p) + RF_ReconMap_t *mapPtr; + RF_ReconMapListElem_t *p; { - int delta; + int delta; - if (mapPtr) { - delta = 0 - (int)sizeof(RF_ReconMapListElem_t); - update_size(mapPtr, delta); - } - RF_Free(p, sizeof(*p)); + if (mapPtr) { + delta = 0 - (int) sizeof(RF_ReconMapListElem_t); + update_size(mapPtr, delta); + } + RF_Free(p, sizeof(*p)); } - /*----------------------------------------------------------------------------- - * + * * Free an entire status structure. Inefficient, but can be called at any time. * *-----------------------------------------------------------------------------*/ -void rf_FreeReconMap(mapPtr) - RF_ReconMap_t *mapPtr; +void +rf_FreeReconMap(mapPtr) + RF_ReconMap_t *mapPtr; { - RF_ReconMapListElem_t *p, *q; - RF_ReconUnitCount_t numRUs; - RF_ReconUnitNum_t i; - - numRUs = mapPtr->sectorsInDisk / mapPtr->sectorsPerReconUnit; - if (mapPtr->sectorsInDisk % mapPtr->sectorsPerReconUnit) - numRUs++; - - for (i=0; i<numRUs; i++) { - p = mapPtr->status[i]; - while (p != RU_NOTHING && p != RU_ALL) { - q = p; p = p->next; - RF_Free(q, sizeof(*q)); - } - } - rf_mutex_destroy(&mapPtr->mutex); - RF_Free(mapPtr->status, mapPtr->totalRUs * sizeof(RF_ReconMapListElem_t *)); - RF_Free(mapPtr, sizeof(RF_ReconMap_t)); + RF_ReconMapListElem_t *p, *q; + RF_ReconUnitCount_t numRUs; + RF_ReconUnitNum_t i; + + numRUs = mapPtr->sectorsInDisk / mapPtr->sectorsPerReconUnit; + if (mapPtr->sectorsInDisk % mapPtr->sectorsPerReconUnit) + numRUs++; + + for (i = 0; i < numRUs; i++) { + p = mapPtr->status[i]; + while (p != RU_NOTHING && p != RU_ALL) { + q = p; + p = p->next; + RF_Free(q, sizeof(*q)); + } + } + rf_mutex_destroy(&mapPtr->mutex); + RF_Free(mapPtr->status, mapPtr->totalRUs * sizeof(RF_ReconMapListElem_t *)); + RF_Free(mapPtr, sizeof(RF_ReconMap_t)); } - /*----------------------------------------------------------------------------- * * returns nonzero if the indicated RU has been reconstructed already * *---------------------------------------------------------------------------*/ -int rf_CheckRUReconstructed(mapPtr, startSector) - RF_ReconMap_t *mapPtr; - RF_SectorNum_t startSector; +int +rf_CheckRUReconstructed(mapPtr, startSector) + RF_ReconMap_t *mapPtr; + RF_SectorNum_t startSector; { - RF_ReconMapListElem_t *l; /* used for searching */ - RF_ReconUnitNum_t i; + RF_ReconMapListElem_t *l; /* used for searching */ + RF_ReconUnitNum_t i; - i = startSector / mapPtr->sectorsPerReconUnit; - l = mapPtr->status[i]; - return( (l == RU_ALL) ? 1 : 0 ); + i = startSector / mapPtr->sectorsPerReconUnit; + l = mapPtr->status[i]; + return ((l == RU_ALL) ? 1 : 0); } -RF_ReconUnitCount_t rf_UnitsLeftToReconstruct(mapPtr) - RF_ReconMap_t *mapPtr; +RF_ReconUnitCount_t +rf_UnitsLeftToReconstruct(mapPtr) + RF_ReconMap_t *mapPtr; { - RF_ASSERT(mapPtr != NULL); - return( mapPtr->unitsLeft ); + RF_ASSERT(mapPtr != NULL); + return (mapPtr->unitsLeft); } - /* updates the size fields of a status descriptor */ -static void update_size(mapPtr, size) - RF_ReconMap_t *mapPtr; - int size; +static void +update_size(mapPtr, size) + RF_ReconMap_t *mapPtr; + int size; { - mapPtr->size += size; - mapPtr->maxSize = RF_MAX(mapPtr->size, mapPtr->maxSize); + mapPtr->size += size; + mapPtr->maxSize = RF_MAX(mapPtr->size, mapPtr->maxSize); } -static void PrintList(listPtr) - RF_ReconMapListElem_t *listPtr; +static void +PrintList(listPtr) + RF_ReconMapListElem_t *listPtr; { - while (listPtr) { - printf("%d,%d -> ",(int)listPtr->startSector,(int)listPtr->stopSector); - listPtr = listPtr->next; - } - printf("\n"); + while (listPtr) { + printf("%d,%d -> ", (int) listPtr->startSector, (int) listPtr->stopSector); + listPtr = listPtr->next; + } + printf("\n"); } - -void rf_PrintReconMap(raidPtr, mapPtr, frow, fcol) - RF_Raid_t *raidPtr; - RF_ReconMap_t *mapPtr; - RF_RowCol_t frow; - RF_RowCol_t fcol; + +void +rf_PrintReconMap(raidPtr, mapPtr, frow, fcol) + RF_Raid_t *raidPtr; + RF_ReconMap_t *mapPtr; + RF_RowCol_t frow; + RF_RowCol_t fcol; { - RF_ReconUnitCount_t numRUs; - RF_ReconMapListElem_t *p; - RF_ReconUnitNum_t i; - - numRUs = mapPtr->totalRUs; - if (mapPtr->sectorsInDisk % mapPtr->sectorsPerReconUnit) - numRUs++; - - for (i=0; i<numRUs; i++) { - p = mapPtr->status[i]; - if (p==RU_ALL) /*printf("[%d] ALL\n",i)*/; - else if (p == RU_NOTHING) { - printf("%d: Unreconstructed\n",i); - } else { - printf("%d: ", i); - PrintList(p); - } - } + RF_ReconUnitCount_t numRUs; + RF_ReconMapListElem_t *p; + RF_ReconUnitNum_t i; + + numRUs = mapPtr->totalRUs; + if (mapPtr->sectorsInDisk % mapPtr->sectorsPerReconUnit) + numRUs++; + + for (i = 0; i < numRUs; i++) { + p = mapPtr->status[i]; + if (p == RU_ALL)/* printf("[%d] ALL\n",i) */ + ; + else + if (p == RU_NOTHING) { + printf("%d: Unreconstructed\n", i); + } else { + printf("%d: ", i); + PrintList(p); + } + } } -void rf_PrintReconSchedule(mapPtr, starttime) - RF_ReconMap_t *mapPtr; - struct timeval *starttime; +void +rf_PrintReconSchedule(mapPtr, starttime) + RF_ReconMap_t *mapPtr; + struct timeval *starttime; { - static int old_pctg = -1; - struct timeval tv, diff; - int new_pctg; - - new_pctg = 100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 / mapPtr->totalRUs); - if (new_pctg != old_pctg) { - RF_GETTIME(tv); - RF_TIMEVAL_DIFF(starttime, &tv, &diff); -#if RF_DEMO > 0 - if (rf_demoMode) { - rf_update_recon_meter(new_pctg); - } - else { - printf("%d %d.%06d\n",new_pctg, diff.tv_sec, diff.tv_usec); - } -#else /* RF_DEMO > 0 */ - printf("%d %d.%06d\n",(int)new_pctg, (int)diff.tv_sec, (int)diff.tv_usec); -#endif /* RF_DEMO > 0 */ - old_pctg = new_pctg; - } + static int old_pctg = -1; + struct timeval tv, diff; + int new_pctg; + + new_pctg = 100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 / mapPtr->totalRUs); + if (new_pctg != old_pctg) { + RF_GETTIME(tv); + RF_TIMEVAL_DIFF(starttime, &tv, &diff); + printf("%d %d.%06d\n", (int) new_pctg, (int) diff.tv_sec, (int) diff.tv_usec); + old_pctg = new_pctg; + } } diff --git a/sys/dev/raidframe/rf_reconmap.h b/sys/dev/raidframe/rf_reconmap.h index 5d03baefb1b..476acab9cb2 100644 --- a/sys/dev/raidframe/rf_reconmap.h +++ b/sys/dev/raidframe/rf_reconmap.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_reconmap.h,v 1.1 1999/01/11 14:29:46 niklas Exp $ */ -/* $NetBSD: rf_reconmap.h,v 1.1 1998/11/13 04:20:33 oster Exp $ */ +/* $OpenBSD: rf_reconmap.h,v 1.2 1999/02/16 00:03:21 niklas Exp $ */ +/* $NetBSD: rf_reconmap.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -31,39 +31,6 @@ * rf_reconMap.h -- Header file describing reconstruction status data structure ******************************************************************************/ -/* : - * Log: rf_reconmap.h,v - * Revision 1.10 1996/08/01 15:59:25 jimz - * minor cleanup - * - * Revision 1.9 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.8 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.7 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.6 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.5 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.4 1995/12/06 15:04:01 root - * added copyright info - * - */ - #ifndef _RF__RF_RECONMAP_H_ #define _RF__RF_RECONMAP_H_ @@ -75,40 +42,45 @@ * monitoring only: they have no function for reconstruction. */ struct RF_ReconMap_s { - RF_SectorCount_t sectorsPerReconUnit; /* sectors per reconstruct unit */ - RF_SectorCount_t sectorsInDisk; /* total sectors in disk */ - RF_SectorCount_t unitsLeft; /* recon units left to recon */ - RF_ReconUnitCount_t totalRUs; /* total recon units on disk */ - RF_ReconUnitCount_t spareRUs; /* total number of spare RUs on failed disk */ - RF_StripeCount_t totalParityStripes; /* total number of parity stripes in array */ - u_int size; /* overall size of this structure */ - u_int maxSize; /* maximum size so far */ - RF_ReconMapListElem_t **status; /* array of ptrs to list elements */ - RF_DECLARE_MUTEX(mutex) + RF_SectorCount_t sectorsPerReconUnit; /* sectors per reconstruct + * unit */ + RF_SectorCount_t sectorsInDisk; /* total sectors in disk */ + RF_SectorCount_t unitsLeft; /* recon units left to recon */ + RF_ReconUnitCount_t totalRUs; /* total recon units on disk */ + RF_ReconUnitCount_t spareRUs; /* total number of spare RUs on failed + * disk */ + RF_StripeCount_t totalParityStripes; /* total number of parity + * stripes in array */ + u_int size; /* overall size of this structure */ + u_int maxSize; /* maximum size so far */ + RF_ReconMapListElem_t **status; /* array of ptrs to list elements */ + RF_DECLARE_MUTEX(mutex) }; - /* a list element */ struct RF_ReconMapListElem_s { - RF_SectorNum_t startSector; /* bounding sect nums on this block */ - RF_SectorNum_t stopSector; - RF_ReconMapListElem_t *next; /* next element in list */ + RF_SectorNum_t startSector; /* bounding sect nums on this block */ + RF_SectorNum_t stopSector; + RF_ReconMapListElem_t *next; /* next element in list */ }; -RF_ReconMap_t *rf_MakeReconMap(RF_Raid_t *raidPtr, RF_SectorCount_t ru_sectors, - RF_SectorCount_t disk_sectors, RF_ReconUnitCount_t spareUnitsPerDisk); +RF_ReconMap_t * +rf_MakeReconMap(RF_Raid_t * raidPtr, RF_SectorCount_t ru_sectors, + RF_SectorCount_t disk_sectors, RF_ReconUnitCount_t spareUnitsPerDisk); -void rf_ReconMapUpdate(RF_Raid_t *raidPtr, RF_ReconMap_t *mapPtr, - RF_SectorNum_t startSector, RF_SectorNum_t stopSector); +void +rf_ReconMapUpdate(RF_Raid_t * raidPtr, RF_ReconMap_t * mapPtr, + RF_SectorNum_t startSector, RF_SectorNum_t stopSector); -void rf_FreeReconMap(RF_ReconMap_t *mapPtr); +void rf_FreeReconMap(RF_ReconMap_t * mapPtr); -int rf_CheckRUReconstructed(RF_ReconMap_t *mapPtr, RF_SectorNum_t startSector); +int rf_CheckRUReconstructed(RF_ReconMap_t * mapPtr, RF_SectorNum_t startSector); -RF_ReconUnitCount_t rf_UnitsLeftToReconstruct(RF_ReconMap_t *mapPtr); +RF_ReconUnitCount_t rf_UnitsLeftToReconstruct(RF_ReconMap_t * mapPtr); -void rf_PrintReconMap(RF_Raid_t *raidPtr, RF_ReconMap_t *mapPtr, - RF_RowCol_t frow, RF_RowCol_t fcol); +void +rf_PrintReconMap(RF_Raid_t * raidPtr, RF_ReconMap_t * mapPtr, + RF_RowCol_t frow, RF_RowCol_t fcol); -void rf_PrintReconSchedule(RF_ReconMap_t *mapPtr, struct timeval *starttime); +void rf_PrintReconSchedule(RF_ReconMap_t * mapPtr, struct timeval * starttime); -#endif /* !_RF__RF_RECONMAP_H_ */ +#endif /* !_RF__RF_RECONMAP_H_ */ diff --git a/sys/dev/raidframe/rf_reconstruct.c b/sys/dev/raidframe/rf_reconstruct.c index 7df351a7ec0..a3f7085241f 100644 --- a/sys/dev/raidframe/rf_reconstruct.c +++ b/sys/dev/raidframe/rf_reconstruct.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_reconstruct.c,v 1.1 1999/01/11 14:29:46 niklas Exp $ */ -/* $NetBSD: rf_reconstruct.c,v 1.1 1998/11/13 04:20:33 oster Exp $ */ +/* $OpenBSD: rf_reconstruct.c,v 1.2 1999/02/16 00:03:22 niklas Exp $ */ +/* $NetBSD: rf_reconstruct.c,v 1.4 1999/02/05 00:06:16 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -33,194 +33,9 @@ * ************************************************************/ -/* - * : - * Log: rf_reconstruct.c,v - * Revision 1.65 1996/08/06 22:24:56 jimz - * get rid of sys/buf.h on linux - * - * Revision 1.64 1996/07/30 04:28:53 jimz - * include rf_types.h first - * - * Revision 1.63 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.62 1996/07/17 21:00:58 jimz - * clean up timer interface, tracing - * - * Revision 1.61 1996/07/15 05:40:41 jimz - * some recon datastructure cleanup - * better handling of multiple failures - * added undocumented double-recon test - * - * Revision 1.60 1996/07/15 02:57:18 jimz - * added debugging (peek at first couple bytes of recon buffers - * as they go by) - * - * Revision 1.59 1996/07/13 00:00:59 jimz - * sanitized generalized reconstruction architecture - * cleaned up head sep, rbuf problems - * - * Revision 1.58 1996/07/11 19:08:00 jimz - * generalize reconstruction mechanism - * allow raid1 reconstructs via copyback (done with array - * quiesced, not online, therefore not disk-directed) - * - * Revision 1.57 1996/06/17 14:38:33 jimz - * properly #if out RF_DEMO code - * fix bug in MakeConfig that was causing weird behavior - * in configuration routines (config was not zeroed at start) - * clean up genplot handling of stacks - * - * Revision 1.56 1996/06/17 03:24:59 jimz - * include shutdown.h for define of now-macroized ShutdownCreate - * - * Revision 1.55 1996/06/11 10:58:36 jimz - * get rid of simulator-testcode artifacts - * add generic ReconDoneProc mechanism instead - * - * Revision 1.54 1996/06/10 14:18:58 jimz - * move user, throughput stats into per-array structure - * - * Revision 1.53 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.52 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.51 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.50 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.49 1996/06/06 01:24:36 jimz - * don't get rid of reconCtrlPtr until we're done with it - * - * Revision 1.48 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.47 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.46 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.45 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.44 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.43 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.42 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.41 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.40 1996/05/24 04:40:40 jimz - * don't do demoMode stuff in kernel - * - * Revision 1.39 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.38 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.37 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.36 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.35 1996/05/01 16:28:16 jimz - * don't include ccmn.h - * - * Revision 1.34 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.33 1995/12/06 15:05:09 root - * added copyright info - * - * Revision 1.32 1995/11/17 19:04:11 wvcii - * added prototyping to ComputePSDiskOffsets - * prow and pcol now type int (were u_int) - * - * Revision 1.31 1995/11/17 01:39:35 amiri - * isolated some demo related stuff - * - * Revision 1.30 1995/10/18 19:33:14 amiri - * removed fflush (stdin/stdout) calls from ReconstructFailedDisk - * - * Revision 1.29 1995/10/11 10:20:33 jimz - * #if 0'd problem code for sigmetrics - * - * Revision 1.28 1995/10/10 23:18:15 amiri - * added fflushes to stdin/stdout before requesting - * input in demo mode. - * - * Revision 1.27 1995/10/10 19:24:47 amiri - * took out update_mode (for demo) from - * KERNEL source. - * - * Revision 1.26 1995/10/09 23:35:48 amiri - * added support for more meters in recon. demo - * - * Revision 1.25 1995/07/03 18:14:30 holland - * changed the way the number of floating recon bufs & - * the head sep limit get set - * - * Revision 1.24 1995/07/02 15:07:42 holland - * bug fixes related to getting distributed sparing numbers - * - * Revision 1.23 1995/06/23 13:36:36 robby - * updeated to prototypes in rf_layout.h - * -*/ - -#ifdef _KERNEL -#define KERNEL -#endif - #include "rf_types.h" #include <sys/time.h> -#ifndef LINUX #include <sys/buf.h> -#endif /* !LINUX */ #include <sys/errno.h> #include "rf_raid.h" #include "rf_reconutil.h" @@ -240,13 +55,7 @@ #include "rf_shutdown.h" #include "rf_sys.h" -#if RF_DEMO > 0 -#include "rf_demo.h" -#endif /* RF_DEMO > 0 */ - -#ifdef KERNEL #include "rf_kintf.h" -#endif /* KERNEL */ /* setting these to -1 causes them to be set to their default values if not set by debug options */ @@ -269,88 +78,94 @@ #define DDprintf7(s,a,b,c,d,e,f,g) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),(void *)((unsigned long)g),NULL) #define DDprintf8(s,a,b,c,d,e,f,g,h) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),(void *)((unsigned long)g),(void *)((unsigned long)h)) -#ifdef KERNEL -static RF_Thread_t recon_thr_handle; -static int recon_thread_initialized = 0; -#endif /* KERNEL */ +static RF_Thread_t recon_thr_handle; +static int recon_thread_initialized = 0; static RF_FreeList_t *rf_recond_freelist; #define RF_MAX_FREE_RECOND 4 #define RF_RECOND_INC 1 -static RF_RaidReconDesc_t *AllocRaidReconDesc(RF_Raid_t *raidPtr, - RF_RowCol_t row, RF_RowCol_t col, RF_RaidDisk_t *spareDiskPtr, - int numDisksDone, RF_RowCol_t srow, RF_RowCol_t scol); -static void FreeReconDesc(RF_RaidReconDesc_t *reconDesc); -static int ProcessReconEvent(RF_Raid_t *raidPtr, RF_RowCol_t frow, - RF_ReconEvent_t *event); -static int IssueNextReadRequest(RF_Raid_t *raidPtr, RF_RowCol_t row, - RF_RowCol_t col); -static int TryToRead(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col); -static int ComputePSDiskOffsets(RF_Raid_t *raidPtr, RF_StripeNum_t psid, - RF_RowCol_t row, RF_RowCol_t col, RF_SectorNum_t *outDiskOffset, - RF_SectorNum_t *outFailedDiskSectorOffset, RF_RowCol_t *spRow, - RF_RowCol_t *spCol, RF_SectorNum_t *spOffset); -static int IssueNextWriteRequest(RF_Raid_t *raidPtr, RF_RowCol_t row); +static RF_RaidReconDesc_t * +AllocRaidReconDesc(RF_Raid_t * raidPtr, + RF_RowCol_t row, RF_RowCol_t col, RF_RaidDisk_t * spareDiskPtr, + int numDisksDone, RF_RowCol_t srow, RF_RowCol_t scol); +static void FreeReconDesc(RF_RaidReconDesc_t * reconDesc); +static int +ProcessReconEvent(RF_Raid_t * raidPtr, RF_RowCol_t frow, + RF_ReconEvent_t * event); +static int +IssueNextReadRequest(RF_Raid_t * raidPtr, RF_RowCol_t row, + RF_RowCol_t col); +static int TryToRead(RF_Raid_t * raidPtr, RF_RowCol_t row, RF_RowCol_t col); +static int +ComputePSDiskOffsets(RF_Raid_t * raidPtr, RF_StripeNum_t psid, + RF_RowCol_t row, RF_RowCol_t col, RF_SectorNum_t * outDiskOffset, + RF_SectorNum_t * outFailedDiskSectorOffset, RF_RowCol_t * spRow, + RF_RowCol_t * spCol, RF_SectorNum_t * spOffset); +static int IssueNextWriteRequest(RF_Raid_t * raidPtr, RF_RowCol_t row); static int ReconReadDoneProc(void *arg, int status); static int ReconWriteDoneProc(void *arg, int status); -static void CheckForNewMinHeadSep(RF_Raid_t *raidPtr, RF_RowCol_t row, - RF_HeadSepLimit_t hsCtr); -static int CheckHeadSeparation(RF_Raid_t *raidPtr, RF_PerDiskReconCtrl_t *ctrl, - RF_RowCol_t row, RF_RowCol_t col, RF_HeadSepLimit_t hsCtr, - RF_ReconUnitNum_t which_ru); -static int CheckForcedOrBlockedReconstruction(RF_Raid_t *raidPtr, - RF_ReconParityStripeStatus_t *pssPtr, RF_PerDiskReconCtrl_t *ctrl, - RF_RowCol_t row, RF_RowCol_t col, RF_StripeNum_t psid, - RF_ReconUnitNum_t which_ru); +static void +CheckForNewMinHeadSep(RF_Raid_t * raidPtr, RF_RowCol_t row, + RF_HeadSepLimit_t hsCtr); +static int +CheckHeadSeparation(RF_Raid_t * raidPtr, RF_PerDiskReconCtrl_t * ctrl, + RF_RowCol_t row, RF_RowCol_t col, RF_HeadSepLimit_t hsCtr, + RF_ReconUnitNum_t which_ru); +static int +CheckForcedOrBlockedReconstruction(RF_Raid_t * raidPtr, + RF_ReconParityStripeStatus_t * pssPtr, RF_PerDiskReconCtrl_t * ctrl, + RF_RowCol_t row, RF_RowCol_t col, RF_StripeNum_t psid, + RF_ReconUnitNum_t which_ru); static void ForceReconReadDoneProc(void *arg, int status); static void rf_ShutdownReconstruction(void *); struct RF_ReconDoneProc_s { - void (*proc)(RF_Raid_t *, void *); - void *arg; - RF_ReconDoneProc_t *next; + void (*proc) (RF_Raid_t *, void *); + void *arg; + RF_ReconDoneProc_t *next; }; static RF_FreeList_t *rf_rdp_freelist; #define RF_MAX_FREE_RDP 4 #define RF_RDP_INC 1 -static void SignalReconDone(RF_Raid_t *raidPtr) +static void +SignalReconDone(RF_Raid_t * raidPtr) { - RF_ReconDoneProc_t *p; + RF_ReconDoneProc_t *p; - RF_LOCK_MUTEX(raidPtr->recon_done_proc_mutex); - for(p=raidPtr->recon_done_procs;p;p=p->next) { - p->proc(raidPtr, p->arg); - } - RF_UNLOCK_MUTEX(raidPtr->recon_done_proc_mutex); + RF_LOCK_MUTEX(raidPtr->recon_done_proc_mutex); + for (p = raidPtr->recon_done_procs; p; p = p->next) { + p->proc(raidPtr, p->arg); + } + RF_UNLOCK_MUTEX(raidPtr->recon_done_proc_mutex); } -int rf_RegisterReconDoneProc( - RF_Raid_t *raidPtr, - void (*proc)(RF_Raid_t *, void *), - void *arg, - RF_ReconDoneProc_t **handlep) +int +rf_RegisterReconDoneProc( + RF_Raid_t * raidPtr, + void (*proc) (RF_Raid_t *, void *), + void *arg, + RF_ReconDoneProc_t ** handlep) { - RF_ReconDoneProc_t *p; - - RF_FREELIST_GET(rf_rdp_freelist,p,next,(RF_ReconDoneProc_t *)); - if (p == NULL) - return(ENOMEM); - p->proc = proc; - p->arg = arg; - RF_LOCK_MUTEX(raidPtr->recon_done_proc_mutex); - p->next = raidPtr->recon_done_procs; - raidPtr->recon_done_procs = p; - RF_UNLOCK_MUTEX(raidPtr->recon_done_proc_mutex); - if (handlep) - *handlep = p; - return(0); + RF_ReconDoneProc_t *p; + + RF_FREELIST_GET(rf_rdp_freelist, p, next, (RF_ReconDoneProc_t *)); + if (p == NULL) + return (ENOMEM); + p->proc = proc; + p->arg = arg; + RF_LOCK_MUTEX(raidPtr->recon_done_proc_mutex); + p->next = raidPtr->recon_done_procs; + raidPtr->recon_done_procs = p; + RF_UNLOCK_MUTEX(raidPtr->recon_done_proc_mutex); + if (handlep) + *handlep = p; + return (0); } - /***************************************************************************************** * * sets up the parameters that will be used by the reconstruction process @@ -360,88 +175,86 @@ int rf_RegisterReconDoneProc( * in the kernel, we fire off the recon thread. * ****************************************************************************************/ -static void rf_ShutdownReconstruction(ignored) - void *ignored; +static void +rf_ShutdownReconstruction(ignored) + void *ignored; { - RF_FREELIST_DESTROY(rf_recond_freelist,next,(RF_RaidReconDesc_t *)); - RF_FREELIST_DESTROY(rf_rdp_freelist,next,(RF_ReconDoneProc_t *)); + RF_FREELIST_DESTROY(rf_recond_freelist, next, (RF_RaidReconDesc_t *)); + RF_FREELIST_DESTROY(rf_rdp_freelist, next, (RF_ReconDoneProc_t *)); } -int rf_ConfigureReconstruction(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureReconstruction(listp) + RF_ShutdownList_t **listp; { - int rc; - - RF_FREELIST_CREATE(rf_recond_freelist, RF_MAX_FREE_RECOND, - RF_RECOND_INC, sizeof(RF_RaidReconDesc_t)); - if (rf_recond_freelist == NULL) - return(ENOMEM); - RF_FREELIST_CREATE(rf_rdp_freelist, RF_MAX_FREE_RDP, - RF_RDP_INC, sizeof(RF_ReconDoneProc_t)); - if (rf_rdp_freelist == NULL) { - RF_FREELIST_DESTROY(rf_recond_freelist,next,(RF_RaidReconDesc_t *)); - return(ENOMEM); - } - rc = rf_ShutdownCreate(listp, rf_ShutdownReconstruction, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); - rf_ShutdownReconstruction(NULL); - return(rc); - } - -#ifdef KERNEL - if (!recon_thread_initialized) { - RF_CREATE_THREAD(recon_thr_handle, rf_ReconKernelThread, NULL); - recon_thread_initialized = 1; - } -#endif /* KERNEL */ - - return(0); + int rc; + + RF_FREELIST_CREATE(rf_recond_freelist, RF_MAX_FREE_RECOND, + RF_RECOND_INC, sizeof(RF_RaidReconDesc_t)); + if (rf_recond_freelist == NULL) + return (ENOMEM); + RF_FREELIST_CREATE(rf_rdp_freelist, RF_MAX_FREE_RDP, + RF_RDP_INC, sizeof(RF_ReconDoneProc_t)); + if (rf_rdp_freelist == NULL) { + RF_FREELIST_DESTROY(rf_recond_freelist, next, (RF_RaidReconDesc_t *)); + return (ENOMEM); + } + rc = rf_ShutdownCreate(listp, rf_ShutdownReconstruction, NULL); + if (rc) { + RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", + __FILE__, __LINE__, rc); + rf_ShutdownReconstruction(NULL); + return (rc); + } + if (!recon_thread_initialized) { + RF_CREATE_THREAD(recon_thr_handle, rf_ReconKernelThread, NULL); + recon_thread_initialized = 1; + } + return (0); } -static RF_RaidReconDesc_t *AllocRaidReconDesc(raidPtr, row, col, spareDiskPtr, numDisksDone, srow, scol) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_RowCol_t col; - RF_RaidDisk_t *spareDiskPtr; - int numDisksDone; - RF_RowCol_t srow; - RF_RowCol_t scol; +static RF_RaidReconDesc_t * +AllocRaidReconDesc(raidPtr, row, col, spareDiskPtr, numDisksDone, srow, scol) + RF_Raid_t *raidPtr; + RF_RowCol_t row; + RF_RowCol_t col; + RF_RaidDisk_t *spareDiskPtr; + int numDisksDone; + RF_RowCol_t srow; + RF_RowCol_t scol; { - - RF_RaidReconDesc_t *reconDesc; - - RF_FREELIST_GET(rf_recond_freelist,reconDesc,next,(RF_RaidReconDesc_t *)); - - reconDesc->raidPtr = raidPtr; - reconDesc->row = row; - reconDesc->col = col; - reconDesc->spareDiskPtr=spareDiskPtr; - reconDesc->numDisksDone=numDisksDone; - reconDesc->srow=srow; - reconDesc->scol=scol; - reconDesc->state = 0; - reconDesc->next = NULL; - - return(reconDesc); + + RF_RaidReconDesc_t *reconDesc; + + RF_FREELIST_GET(rf_recond_freelist, reconDesc, next, (RF_RaidReconDesc_t *)); + + reconDesc->raidPtr = raidPtr; + reconDesc->row = row; + reconDesc->col = col; + reconDesc->spareDiskPtr = spareDiskPtr; + reconDesc->numDisksDone = numDisksDone; + reconDesc->srow = srow; + reconDesc->scol = scol; + reconDesc->state = 0; + reconDesc->next = NULL; + + return (reconDesc); } -static void FreeReconDesc(reconDesc) - RF_RaidReconDesc_t *reconDesc; +static void +FreeReconDesc(reconDesc) + RF_RaidReconDesc_t *reconDesc; { #if RF_RECON_STATS > 0 - printf("RAIDframe: %lu recon event waits, %lu recon delays\n", - (long)reconDesc->numReconEventWaits, (long)reconDesc->numReconExecDelays); -#endif /* RF_RECON_STATS > 0 */ -#ifdef KERNEL - printf("RAIDframe: %lu max exec ticks\n", - (long)reconDesc->maxReconExecTicks); -#endif /* KERNEL */ + printf("RAIDframe: %lu recon event waits, %lu recon delays\n", + (long) reconDesc->numReconEventWaits, (long) reconDesc->numReconExecDelays); +#endif /* RF_RECON_STATS > 0 */ + printf("RAIDframe: %lu max exec ticks\n", + (long) reconDesc->maxReconExecTicks); #if (RF_RECON_STATS > 0) || defined(KERNEL) - printf("\n"); -#endif /* (RF_RECON_STATS > 0) || KERNEL */ - RF_FREELIST_FREE(rf_recond_freelist,reconDesc,next); + printf("\n"); +#endif /* (RF_RECON_STATS > 0) || KERNEL */ + RF_FREELIST_FREE(rf_recond_freelist, reconDesc, next); } @@ -451,476 +264,402 @@ static void FreeReconDesc(reconDesc) * within its own thread. It won't return until reconstruction completes, * fails, or is aborted. ****************************************************************************************/ -int rf_ReconstructFailedDisk(raidPtr, row, col) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_RowCol_t col; +int +rf_ReconstructFailedDisk(raidPtr, row, col) + RF_Raid_t *raidPtr; + RF_RowCol_t row; + RF_RowCol_t col; { -#ifdef SIMULATE - RF_PendingRecon_t *pend; - RF_RowCol_t r, c; -#endif /* SIMULATE */ - RF_LayoutSW_t *lp; - int rc; - - lp = raidPtr->Layout.map; - if (lp->SubmitReconBuffer) { - /* - * The current infrastructure only supports reconstructing one - * disk at a time for each array. - */ -#ifdef SIMULATE - if (raidPtr->reconInProgress) { - RF_Malloc(pend, sizeof(RF_PendingRecon_t), (RF_PendingRecon_t *)); - pend->row = row; - pend->col = col; - pend->next = raidPtr->pendingRecon; - raidPtr->pendingRecon = pend; - /* defer until current recon completes */ - return(0); - } - raidPtr->reconInProgress++; -#else /* SIMULATE */ - RF_LOCK_MUTEX(raidPtr->mutex); - while (raidPtr->reconInProgress) { - RF_WAIT_COND(raidPtr->waitForReconCond, raidPtr->mutex); - } - raidPtr->reconInProgress++; - RF_UNLOCK_MUTEX(raidPtr->mutex); -#endif /* SIMULATE */ - rc = rf_ReconstructFailedDiskBasic(raidPtr, row, col); - } - else { - RF_ERRORMSG1("RECON: no way to reconstruct failed disk for arch %c\n", - lp->parityConfig); - rc = EIO; - } -#ifdef SIMULATE - pend = raidPtr->pendingRecon; - if (pend) { - /* launch next recon */ - raidPtr->pendingRecon = pend->next; - r = pend->row; - c = pend->col; - RF_Free(pend, sizeof(RF_PendingRecon_t)); - return(rf_ReconstructFailedDisk(raidPtr, r, c)); - } -#else /* SIMULATE */ - RF_LOCK_MUTEX(raidPtr->mutex); - raidPtr->reconInProgress--; - RF_UNLOCK_MUTEX(raidPtr->mutex); - RF_SIGNAL_COND(raidPtr->waitForReconCond); -#if 1 -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) - wakeup(&raidPtr->waitForReconCond); /* XXX Methinks this will be needed - at some point... GO*/ -#endif -#endif -#endif /* SIMULATE */ - return(rc); + RF_LayoutSW_t *lp; + int rc; + + lp = raidPtr->Layout.map; + if (lp->SubmitReconBuffer) { + /* + * The current infrastructure only supports reconstructing one + * disk at a time for each array. + */ + RF_LOCK_MUTEX(raidPtr->mutex); + while (raidPtr->reconInProgress) { + RF_WAIT_COND(raidPtr->waitForReconCond, raidPtr->mutex); + } + raidPtr->reconInProgress++; + RF_UNLOCK_MUTEX(raidPtr->mutex); + rc = rf_ReconstructFailedDiskBasic(raidPtr, row, col); + } else { + RF_ERRORMSG1("RECON: no way to reconstruct failed disk for arch %c\n", + lp->parityConfig); + rc = EIO; + } + RF_LOCK_MUTEX(raidPtr->mutex); + raidPtr->reconInProgress--; + RF_UNLOCK_MUTEX(raidPtr->mutex); + RF_SIGNAL_COND(raidPtr->waitForReconCond); + wakeup(&raidPtr->waitForReconCond); /* XXX Methinks this will be + * needed at some point... GO */ + return (rc); } -int rf_ReconstructFailedDiskBasic(raidPtr, row, col) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_RowCol_t col; +int +rf_ReconstructFailedDiskBasic(raidPtr, row, col) + RF_Raid_t *raidPtr; + RF_RowCol_t row; + RF_RowCol_t col; { - RF_RaidDisk_t *spareDiskPtr = NULL; - RF_RaidReconDesc_t *reconDesc; - RF_RowCol_t srow, scol; - int numDisksDone=0, rc; - - /* first look for a spare drive onto which to reconstruct the data */ - /* spare disk descriptors are stored in row 0. This may have to change eventually */ - - RF_LOCK_MUTEX(raidPtr->mutex); - RF_ASSERT (raidPtr->Disks[row][col].status == rf_ds_failed); - - if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { - if (raidPtr->status[row] != rf_rs_degraded) { - RF_ERRORMSG2("Unable to reconstruct disk at row %d col %d because status not degraded\n",row,col); - RF_UNLOCK_MUTEX(raidPtr->mutex); - return(EINVAL); - } - srow = row; - scol = (-1); - } - else { - srow = 0; - for (scol=raidPtr->numCol; scol<raidPtr->numCol + raidPtr->numSpare; scol++) { - if (raidPtr->Disks[srow][scol].status == rf_ds_spare) { - spareDiskPtr = &raidPtr->Disks[srow][scol]; - spareDiskPtr->status = rf_ds_used_spare; - break; - } - } - if (!spareDiskPtr) { - RF_ERRORMSG2("Unable to reconstruct disk at row %d col %d because no spares are available\n",row,col); - RF_UNLOCK_MUTEX(raidPtr->mutex); - return(ENOSPC); - } - -#if RF_DEMO > 0 - if (!rf_demoMode) { -#endif /* RF_DEMO > 0 */ - printf("RECON: initiating reconstruction on row %d col %d -> spare at row %d col %d\n",row, col, srow, scol); -#if RF_DEMO > 0 - } -#endif /* RF_DEMO > 0 */ - } - RF_UNLOCK_MUTEX(raidPtr->mutex); - - reconDesc = AllocRaidReconDesc((void *) raidPtr, row, col,spareDiskPtr, numDisksDone, srow , scol); - raidPtr->reconDesc = (void *) reconDesc; + RF_RaidDisk_t *spareDiskPtr = NULL; + RF_RaidReconDesc_t *reconDesc; + RF_RowCol_t srow, scol; + int numDisksDone = 0, rc; + + /* first look for a spare drive onto which to reconstruct the data */ + /* spare disk descriptors are stored in row 0. This may have to + * change eventually */ + + RF_LOCK_MUTEX(raidPtr->mutex); + RF_ASSERT(raidPtr->Disks[row][col].status == rf_ds_failed); + + if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { + if (raidPtr->status[row] != rf_rs_degraded) { + RF_ERRORMSG2("Unable to reconstruct disk at row %d col %d because status not degraded\n", row, col); + RF_UNLOCK_MUTEX(raidPtr->mutex); + return (EINVAL); + } + srow = row; + scol = (-1); + } else { + srow = 0; + for (scol = raidPtr->numCol; scol < raidPtr->numCol + raidPtr->numSpare; scol++) { + if (raidPtr->Disks[srow][scol].status == rf_ds_spare) { + spareDiskPtr = &raidPtr->Disks[srow][scol]; + spareDiskPtr->status = rf_ds_used_spare; + break; + } + } + if (!spareDiskPtr) { + RF_ERRORMSG2("Unable to reconstruct disk at row %d col %d because no spares are available\n", row, col); + RF_UNLOCK_MUTEX(raidPtr->mutex); + return (ENOSPC); + } + printf("RECON: initiating reconstruction on row %d col %d -> spare at row %d col %d\n", row, col, srow, scol); + } + RF_UNLOCK_MUTEX(raidPtr->mutex); + + reconDesc = AllocRaidReconDesc((void *) raidPtr, row, col, spareDiskPtr, numDisksDone, srow, scol); + raidPtr->reconDesc = (void *) reconDesc; #if RF_RECON_STATS > 0 - reconDesc->hsStallCount = 0; - reconDesc->numReconExecDelays = 0; - reconDesc->numReconEventWaits = 0; -#endif /* RF_RECON_STATS > 0 */ -#ifdef KERNEL - reconDesc->reconExecTimerRunning = 0; - reconDesc->reconExecTicks = 0; - reconDesc->maxReconExecTicks = 0; -#endif /* KERNEL */ -#if RF_DEMO > 0 && !defined(SIMULATE) - if (rf_demoMode) { - char cbuf[10]; - printf("About to start reconstruction, hit return to continue:"); - gets(cbuf); - } -#endif /* RF_DEMO > 0 && !SIMULATE */ - rc = rf_ContinueReconstructFailedDisk(reconDesc); - return(rc); + reconDesc->hsStallCount = 0; + reconDesc->numReconExecDelays = 0; + reconDesc->numReconEventWaits = 0; +#endif /* RF_RECON_STATS > 0 */ + reconDesc->reconExecTimerRunning = 0; + reconDesc->reconExecTicks = 0; + reconDesc->maxReconExecTicks = 0; + rc = rf_ContinueReconstructFailedDisk(reconDesc); + return (rc); } -int rf_ContinueReconstructFailedDisk(reconDesc) - RF_RaidReconDesc_t *reconDesc; +int +rf_ContinueReconstructFailedDisk(reconDesc) + RF_RaidReconDesc_t *reconDesc; { - RF_Raid_t *raidPtr=reconDesc->raidPtr; - RF_RowCol_t row=reconDesc->row; - RF_RowCol_t col=reconDesc->col; - RF_RowCol_t srow=reconDesc->srow; - RF_RowCol_t scol=reconDesc->scol; - RF_ReconMap_t *mapPtr; - - RF_ReconEvent_t *event; - struct timeval etime, elpsd; - unsigned long xor_s, xor_resid_us; - int retcode,i, ds; - - switch (reconDesc->state) - { - - - case 0: - - raidPtr->accumXorTimeUs = 0; - - /* create one trace record per physical disk */ - RF_Malloc(raidPtr->recon_tracerecs, raidPtr->numCol * sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); - - /* quiesce the array prior to starting recon. this is needed to assure no nasty interactions - * with pending user writes. We need to do this before we change the disk or row status. - */ - reconDesc->state=1; - - Dprintf("RECON: begin request suspend\n"); - retcode = rf_SuspendNewRequestsAndWait(raidPtr); - Dprintf("RECON: end request suspend\n"); - rf_StartUserStats(raidPtr); /* zero out the stats kept on user accs */ - -#ifdef SIMULATE - if (retcode) return(0); -#endif /* SIMULATE */ - - /* fall through to state 1 */ - - case 1: - - RF_LOCK_MUTEX(raidPtr->mutex); - - /* create the reconstruction control pointer and install it in the right slot */ - raidPtr->reconControl[row] = rf_MakeReconControl(reconDesc, row, col, srow, scol); - mapPtr=raidPtr->reconControl[row]->reconMap; - raidPtr->status[row] = rf_rs_reconstructing; - raidPtr->Disks[row][col].status = rf_ds_reconstructing; - raidPtr->Disks[row][col].spareRow = srow; - raidPtr->Disks[row][col].spareCol = scol; - - RF_UNLOCK_MUTEX(raidPtr->mutex); - - RF_GETTIME(raidPtr->reconControl[row]->starttime); -#if RF_DEMO > 0 - if (rf_demoMode) { - rf_demo_update_mode(RF_DEMO_RECON); - rf_startup_recon_demo(rf_demoMeterVpos, raidPtr->numCol, - raidPtr->Layout.numDataCol+raidPtr->Layout.numParityCol, 0); - } -#endif /* RF_DEMO > 0 */ - - /* now start up the actual reconstruction: issue a read for each surviving disk */ - rf_start_cpu_monitor(); - reconDesc->numDisksDone = 0; - for (i=0; i<raidPtr->numCol; i++) { - if (i != col) { - /* find and issue the next I/O on the indicated disk */ - if (IssueNextReadRequest(raidPtr, row, i)) { - Dprintf2("RECON: done issuing for r%d c%d\n", row, i); - reconDesc->numDisksDone++; - } - } - } - - case 2: - Dprintf("RECON: resume requests\n"); - rf_ResumeNewRequests(raidPtr); - - - reconDesc->state=3; - - case 3: - - /* process reconstruction events until all disks report that they've completed all work */ - mapPtr=raidPtr->reconControl[row]->reconMap; - - - - while (reconDesc->numDisksDone < raidPtr->numCol-1) { - - event = rf_GetNextReconEvent(reconDesc, row, (void (*)(void *))rf_ContinueReconstructFailedDisk,reconDesc); -#ifdef SIMULATE - if (event==NULL) {return(0);} -#else /* SIMULATE */ - RF_ASSERT(event); -#endif /* SIMULATE */ - - if (ProcessReconEvent(raidPtr, row, event)) reconDesc->numDisksDone++; - raidPtr->reconControl[row]->percentComplete = 100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 / mapPtr->totalRUs); -#if RF_DEMO > 0 - if (rf_prReconSched || rf_demoMode) -#else /* RF_DEMO > 0 */ - if (rf_prReconSched) -#endif /* RF_DEMO > 0 */ - { - rf_PrintReconSchedule(raidPtr->reconControl[row]->reconMap, &(raidPtr->reconControl[row]->starttime)); - } - } - - - - reconDesc->state=4; - - - case 4: - mapPtr=raidPtr->reconControl[row]->reconMap; - if (rf_reconDebug) { - printf("RECON: all reads completed\n"); - } - - - - /* at this point all the reads have completed. We now wait for any pending writes - * to complete, and then we're done - */ - - while (rf_UnitsLeftToReconstruct(raidPtr->reconControl[row]->reconMap) > 0) { - - event = rf_GetNextReconEvent(reconDesc, row, (void (*)(void *))rf_ContinueReconstructFailedDisk,reconDesc); -#ifdef SIMULATE - if (event==NULL) {return(0);} -#else /* SIMULATE */ - RF_ASSERT(event); -#endif /* SIMULATE */ - - (void) ProcessReconEvent(raidPtr, row, event); /* ignore return code */ - raidPtr->reconControl[row]->percentComplete = 100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 / mapPtr->totalRUs); -#if RF_DEMO > 0 - if (rf_prReconSched || rf_demoMode) -#else /* RF_DEMO > 0 */ - if (rf_prReconSched) -#endif /* RF_DEMO > 0 */ - { - rf_PrintReconSchedule(raidPtr->reconControl[row]->reconMap, &(raidPtr->reconControl[row]->starttime)); - } - } - reconDesc->state=5; - - case 5: - rf_stop_cpu_monitor(); - - /* Success: mark the dead disk as reconstructed. We quiesce the array here to assure no - * nasty interactions with pending user accesses when we free up the psstatus structure - * as part of FreeReconControl() - */ - - - - reconDesc->state=6; - - retcode = rf_SuspendNewRequestsAndWait(raidPtr); - rf_StopUserStats(raidPtr); - rf_PrintUserStats(raidPtr); /* print out the stats on user accs accumulated during recon */ - -#ifdef SIMULATE - if (retcode) return(0); -#endif /* SIMULATE */ - - /* fall through to state 6 */ - case 6: - - - - RF_LOCK_MUTEX(raidPtr->mutex); - raidPtr->numFailures--; - ds = (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE); - raidPtr->Disks[row][col].status = (ds) ? rf_ds_dist_spared : rf_ds_spared; - raidPtr->status[row] = (ds) ? rf_rs_reconfigured : rf_rs_optimal; - RF_UNLOCK_MUTEX(raidPtr->mutex); - RF_GETTIME(etime); - RF_TIMEVAL_DIFF(&(raidPtr->reconControl[row]->starttime), &etime, &elpsd); - - /* XXX -- why is state 7 different from state 6 if there is no return() here? -- XXX - * Note that I set elpsd above & use it below, so if you put a return - * here you'll have to fix this. (also, FreeReconControl is called below) - */ - - case 7: - - rf_ResumeNewRequests(raidPtr); - -#if RF_DEMO > 0 - if (rf_demoMode) { - rf_finish_recon_demo(&elpsd); - } - else { -#endif /* RF_DEMO > 0 */ - printf("Reconstruction of disk at row %d col %d completed and spare disk reassigned\n", row, col); - xor_s = raidPtr->accumXorTimeUs/1000000; - xor_resid_us = raidPtr->accumXorTimeUs%1000000; - printf("Recon time was %d.%06d seconds, accumulated XOR time was %ld us (%ld.%06ld)\n", - (int)elpsd.tv_sec,(int)elpsd.tv_usec,raidPtr->accumXorTimeUs,xor_s,xor_resid_us); - printf(" (start time %d sec %d usec, end time %d sec %d usec)\n", - (int)raidPtr->reconControl[row]->starttime.tv_sec, - (int)raidPtr->reconControl[row]->starttime.tv_usec, - (int)etime.tv_sec, (int)etime.tv_usec); - rf_print_cpu_util("reconstruction"); + RF_Raid_t *raidPtr = reconDesc->raidPtr; + RF_RowCol_t row = reconDesc->row; + RF_RowCol_t col = reconDesc->col; + RF_RowCol_t srow = reconDesc->srow; + RF_RowCol_t scol = reconDesc->scol; + RF_ReconMap_t *mapPtr; + + RF_ReconEvent_t *event; + struct timeval etime, elpsd; + unsigned long xor_s, xor_resid_us; + int retcode, i, ds; + + switch (reconDesc->state) { + + + case 0: + + raidPtr->accumXorTimeUs = 0; + + /* create one trace record per physical disk */ + RF_Malloc(raidPtr->recon_tracerecs, raidPtr->numCol * sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); + + /* quiesce the array prior to starting recon. this is needed + * to assure no nasty interactions with pending user writes. + * We need to do this before we change the disk or row status. */ + reconDesc->state = 1; + + Dprintf("RECON: begin request suspend\n"); + retcode = rf_SuspendNewRequestsAndWait(raidPtr); + Dprintf("RECON: end request suspend\n"); + rf_StartUserStats(raidPtr); /* zero out the stats kept on + * user accs */ + + /* fall through to state 1 */ + + case 1: + + RF_LOCK_MUTEX(raidPtr->mutex); + + /* create the reconstruction control pointer and install it in + * the right slot */ + raidPtr->reconControl[row] = rf_MakeReconControl(reconDesc, row, col, srow, scol); + mapPtr = raidPtr->reconControl[row]->reconMap; + raidPtr->status[row] = rf_rs_reconstructing; + raidPtr->Disks[row][col].status = rf_ds_reconstructing; + raidPtr->Disks[row][col].spareRow = srow; + raidPtr->Disks[row][col].spareCol = scol; + + RF_UNLOCK_MUTEX(raidPtr->mutex); + + RF_GETTIME(raidPtr->reconControl[row]->starttime); + + /* now start up the actual reconstruction: issue a read for + * each surviving disk */ + rf_start_cpu_monitor(); + reconDesc->numDisksDone = 0; + for (i = 0; i < raidPtr->numCol; i++) { + if (i != col) { + /* find and issue the next I/O on the + * indicated disk */ + if (IssueNextReadRequest(raidPtr, row, i)) { + Dprintf2("RECON: done issuing for r%d c%d\n", row, i); + reconDesc->numDisksDone++; + } + } + } + + case 2: + Dprintf("RECON: resume requests\n"); + rf_ResumeNewRequests(raidPtr); + + + reconDesc->state = 3; + + case 3: + + /* process reconstruction events until all disks report that + * they've completed all work */ + mapPtr = raidPtr->reconControl[row]->reconMap; + + + + while (reconDesc->numDisksDone < raidPtr->numCol - 1) { + + event = rf_GetNextReconEvent(reconDesc, row, (void (*) (void *)) rf_ContinueReconstructFailedDisk, reconDesc); + RF_ASSERT(event); + + if (ProcessReconEvent(raidPtr, row, event)) + reconDesc->numDisksDone++; + raidPtr->reconControl[row]->percentComplete = 100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 / mapPtr->totalRUs); + if (rf_prReconSched) { + rf_PrintReconSchedule(raidPtr->reconControl[row]->reconMap, &(raidPtr->reconControl[row]->starttime)); + } + } + + + + reconDesc->state = 4; + + + case 4: + mapPtr = raidPtr->reconControl[row]->reconMap; + if (rf_reconDebug) { + printf("RECON: all reads completed\n"); + } + /* at this point all the reads have completed. We now wait + * for any pending writes to complete, and then we're done */ + + while (rf_UnitsLeftToReconstruct(raidPtr->reconControl[row]->reconMap) > 0) { + + event = rf_GetNextReconEvent(reconDesc, row, (void (*) (void *)) rf_ContinueReconstructFailedDisk, reconDesc); + RF_ASSERT(event); + + (void) ProcessReconEvent(raidPtr, row, event); /* ignore return code */ + raidPtr->reconControl[row]->percentComplete = 100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 / mapPtr->totalRUs); + if (rf_prReconSched) { + rf_PrintReconSchedule(raidPtr->reconControl[row]->reconMap, &(raidPtr->reconControl[row]->starttime)); + } + } + reconDesc->state = 5; + + case 5: + rf_stop_cpu_monitor(); + + /* Success: mark the dead disk as reconstructed. We quiesce + * the array here to assure no nasty interactions with pending + * user accesses when we free up the psstatus structure as + * part of FreeReconControl() */ + + + + reconDesc->state = 6; + + retcode = rf_SuspendNewRequestsAndWait(raidPtr); + rf_StopUserStats(raidPtr); + rf_PrintUserStats(raidPtr); /* print out the stats on user + * accs accumulated during + * recon */ + + /* fall through to state 6 */ + case 6: + + + + RF_LOCK_MUTEX(raidPtr->mutex); + raidPtr->numFailures--; + ds = (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE); + raidPtr->Disks[row][col].status = (ds) ? rf_ds_dist_spared : rf_ds_spared; + raidPtr->status[row] = (ds) ? rf_rs_reconfigured : rf_rs_optimal; + RF_UNLOCK_MUTEX(raidPtr->mutex); + RF_GETTIME(etime); + RF_TIMEVAL_DIFF(&(raidPtr->reconControl[row]->starttime), &etime, &elpsd); + + /* XXX -- why is state 7 different from state 6 if there is no + * return() here? -- XXX Note that I set elpsd above & use it + * below, so if you put a return here you'll have to fix this. + * (also, FreeReconControl is called below) */ + + case 7: + + rf_ResumeNewRequests(raidPtr); + + printf("Reconstruction of disk at row %d col %d completed and spare disk reassigned\n", row, col); + xor_s = raidPtr->accumXorTimeUs / 1000000; + xor_resid_us = raidPtr->accumXorTimeUs % 1000000; + printf("Recon time was %d.%06d seconds, accumulated XOR time was %ld us (%ld.%06ld)\n", + (int) elpsd.tv_sec, (int) elpsd.tv_usec, raidPtr->accumXorTimeUs, xor_s, xor_resid_us); + printf(" (start time %d sec %d usec, end time %d sec %d usec)\n", + (int) raidPtr->reconControl[row]->starttime.tv_sec, + (int) raidPtr->reconControl[row]->starttime.tv_usec, + (int) etime.tv_sec, (int) etime.tv_usec); + rf_print_cpu_util("reconstruction"); #if RF_RECON_STATS > 0 - printf("Total head-sep stall count was %d\n", - (int)reconDesc->hsStallCount); -#endif /* RF_RECON_STATS > 0 */ -#if RF_DEMO > 0 - } -#endif /* RF_DEMO > 0 */ - rf_FreeReconControl(raidPtr, row); - RF_Free(raidPtr->recon_tracerecs, raidPtr->numCol * sizeof(RF_AccTraceEntry_t)); - FreeReconDesc(reconDesc); - - } - - SignalReconDone(raidPtr); - return (0); -} + printf("Total head-sep stall count was %d\n", + (int) reconDesc->hsStallCount); +#endif /* RF_RECON_STATS > 0 */ + rf_FreeReconControl(raidPtr, row); + RF_Free(raidPtr->recon_tracerecs, raidPtr->numCol * sizeof(RF_AccTraceEntry_t)); + FreeReconDesc(reconDesc); + } + + SignalReconDone(raidPtr); + return (0); +} /***************************************************************************************** * do the right thing upon each reconstruction event. * returns nonzero if and only if there is nothing left unread on the indicated disk ****************************************************************************************/ -static int ProcessReconEvent(raidPtr, frow, event) - RF_Raid_t *raidPtr; - RF_RowCol_t frow; - RF_ReconEvent_t *event; +static int +ProcessReconEvent(raidPtr, frow, event) + RF_Raid_t *raidPtr; + RF_RowCol_t frow; + RF_ReconEvent_t *event; { - int retcode = 0, submitblocked; - RF_ReconBuffer_t *rbuf; - RF_SectorCount_t sectorsPerRU; - - Dprintf1("RECON: ProcessReconEvent type %d\n", event->type); - switch(event->type) { - - /* a read I/O has completed */ - case RF_REVENT_READDONE: - rbuf = raidPtr->reconControl[frow]->perDiskInfo[event->col].rbuf; - Dprintf3("RECON: READDONE EVENT: row %d col %d psid %ld\n", - frow, event->col, rbuf->parityStripeID); - Dprintf7("RECON: done read psid %ld buf %lx %02x %02x %02x %02x %02x\n", - rbuf->parityStripeID, rbuf->buffer, rbuf->buffer[0]&0xff, rbuf->buffer[1]&0xff, - rbuf->buffer[2]&0xff, rbuf->buffer[3]&0xff, rbuf->buffer[4]&0xff); - rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg); - submitblocked = rf_SubmitReconBuffer(rbuf, 0, 0); - Dprintf1("RECON: submitblocked=%d\n", submitblocked); - if (!submitblocked) retcode = IssueNextReadRequest(raidPtr, frow, event->col); - break; - - /* a write I/O has completed */ - case RF_REVENT_WRITEDONE: - if (rf_floatingRbufDebug) { - rf_CheckFloatingRbufCount(raidPtr, 1); - } - sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU; - rbuf = (RF_ReconBuffer_t *) event->arg; - rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg); - Dprintf3("RECON: WRITEDONE EVENT: psid %d ru %d (%d %% complete)\n", - rbuf->parityStripeID, rbuf->which_ru, raidPtr->reconControl[frow]->percentComplete); - rf_ReconMapUpdate(raidPtr, raidPtr->reconControl[frow]->reconMap, - rbuf->failedDiskSectorOffset, rbuf->failedDiskSectorOffset + sectorsPerRU -1); - rf_RemoveFromActiveReconTable(raidPtr, frow, rbuf->parityStripeID, rbuf->which_ru); - - if (rbuf->type == RF_RBUF_TYPE_FLOATING) { - RF_LOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex); - raidPtr->numFullReconBuffers--; - rf_ReleaseFloatingReconBuffer(raidPtr, frow, rbuf); - RF_UNLOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex); - } else if (rbuf->type == RF_RBUF_TYPE_FORCED) rf_FreeReconBuffer(rbuf); - else RF_ASSERT(0); - break; - - case RF_REVENT_BUFCLEAR: /* A buffer-stall condition has been cleared */ - Dprintf2("RECON: BUFCLEAR EVENT: row %d col %d\n",frow, event->col); - submitblocked = rf_SubmitReconBuffer(raidPtr->reconControl[frow]->perDiskInfo[event->col].rbuf, 0, (int) (long)event->arg); - RF_ASSERT(!submitblocked); /* we wouldn't have gotten the BUFCLEAR event if we couldn't submit */ - retcode = IssueNextReadRequest(raidPtr, frow, event->col); - break; - - case RF_REVENT_BLOCKCLEAR: /* A user-write reconstruction blockage has been cleared */ - DDprintf2("RECON: BLOCKCLEAR EVENT: row %d col %d\n",frow, event->col); - retcode = TryToRead(raidPtr, frow, event->col); - break; - - case RF_REVENT_HEADSEPCLEAR: /* A max-head-separation reconstruction blockage has been cleared */ - Dprintf2("RECON: HEADSEPCLEAR EVENT: row %d col %d\n",frow, event->col); - retcode = TryToRead(raidPtr, frow, event->col); - break; - - /* a buffer has become ready to write */ - case RF_REVENT_BUFREADY: - Dprintf2("RECON: BUFREADY EVENT: row %d col %d\n",frow, event->col); - retcode = IssueNextWriteRequest(raidPtr, frow); - if (rf_floatingRbufDebug) { - rf_CheckFloatingRbufCount(raidPtr, 1); - } - break; - - /* we need to skip the current RU entirely because it got recon'd while we were waiting for something else to happen */ - case RF_REVENT_SKIP: - DDprintf2("RECON: SKIP EVENT: row %d col %d\n",frow, event->col); - retcode = IssueNextReadRequest(raidPtr, frow, event->col); - break; - - /* a forced-reconstruction read access has completed. Just submit the buffer */ - case RF_REVENT_FORCEDREADDONE: - rbuf = (RF_ReconBuffer_t *) event->arg; - rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg); - DDprintf2("RECON: FORCEDREADDONE EVENT: row %d col %d\n",frow, event->col); - submitblocked = rf_SubmitReconBuffer(rbuf, 1, 0); - RF_ASSERT(!submitblocked); - break; - - default: - RF_PANIC(); - } - rf_FreeReconEventDesc(event); - return(retcode); + int retcode = 0, submitblocked; + RF_ReconBuffer_t *rbuf; + RF_SectorCount_t sectorsPerRU; + + Dprintf1("RECON: ProcessReconEvent type %d\n", event->type); + switch (event->type) { + + /* a read I/O has completed */ + case RF_REVENT_READDONE: + rbuf = raidPtr->reconControl[frow]->perDiskInfo[event->col].rbuf; + Dprintf3("RECON: READDONE EVENT: row %d col %d psid %ld\n", + frow, event->col, rbuf->parityStripeID); + Dprintf7("RECON: done read psid %ld buf %lx %02x %02x %02x %02x %02x\n", + rbuf->parityStripeID, rbuf->buffer, rbuf->buffer[0] & 0xff, rbuf->buffer[1] & 0xff, + rbuf->buffer[2] & 0xff, rbuf->buffer[3] & 0xff, rbuf->buffer[4] & 0xff); + rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg); + submitblocked = rf_SubmitReconBuffer(rbuf, 0, 0); + Dprintf1("RECON: submitblocked=%d\n", submitblocked); + if (!submitblocked) + retcode = IssueNextReadRequest(raidPtr, frow, event->col); + break; + + /* a write I/O has completed */ + case RF_REVENT_WRITEDONE: + if (rf_floatingRbufDebug) { + rf_CheckFloatingRbufCount(raidPtr, 1); + } + sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU; + rbuf = (RF_ReconBuffer_t *) event->arg; + rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg); + Dprintf3("RECON: WRITEDONE EVENT: psid %d ru %d (%d %% complete)\n", + rbuf->parityStripeID, rbuf->which_ru, raidPtr->reconControl[frow]->percentComplete); + rf_ReconMapUpdate(raidPtr, raidPtr->reconControl[frow]->reconMap, + rbuf->failedDiskSectorOffset, rbuf->failedDiskSectorOffset + sectorsPerRU - 1); + rf_RemoveFromActiveReconTable(raidPtr, frow, rbuf->parityStripeID, rbuf->which_ru); + + if (rbuf->type == RF_RBUF_TYPE_FLOATING) { + RF_LOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex); + raidPtr->numFullReconBuffers--; + rf_ReleaseFloatingReconBuffer(raidPtr, frow, rbuf); + RF_UNLOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex); + } else + if (rbuf->type == RF_RBUF_TYPE_FORCED) + rf_FreeReconBuffer(rbuf); + else + RF_ASSERT(0); + break; + + case RF_REVENT_BUFCLEAR: /* A buffer-stall condition has been + * cleared */ + Dprintf2("RECON: BUFCLEAR EVENT: row %d col %d\n", frow, event->col); + submitblocked = rf_SubmitReconBuffer(raidPtr->reconControl[frow]->perDiskInfo[event->col].rbuf, 0, (int) (long) event->arg); + RF_ASSERT(!submitblocked); /* we wouldn't have gotten the + * BUFCLEAR event if we + * couldn't submit */ + retcode = IssueNextReadRequest(raidPtr, frow, event->col); + break; + + case RF_REVENT_BLOCKCLEAR: /* A user-write reconstruction + * blockage has been cleared */ + DDprintf2("RECON: BLOCKCLEAR EVENT: row %d col %d\n", frow, event->col); + retcode = TryToRead(raidPtr, frow, event->col); + break; + + case RF_REVENT_HEADSEPCLEAR: /* A max-head-separation + * reconstruction blockage has been + * cleared */ + Dprintf2("RECON: HEADSEPCLEAR EVENT: row %d col %d\n", frow, event->col); + retcode = TryToRead(raidPtr, frow, event->col); + break; + + /* a buffer has become ready to write */ + case RF_REVENT_BUFREADY: + Dprintf2("RECON: BUFREADY EVENT: row %d col %d\n", frow, event->col); + retcode = IssueNextWriteRequest(raidPtr, frow); + if (rf_floatingRbufDebug) { + rf_CheckFloatingRbufCount(raidPtr, 1); + } + break; + + /* we need to skip the current RU entirely because it got + * recon'd while we were waiting for something else to happen */ + case RF_REVENT_SKIP: + DDprintf2("RECON: SKIP EVENT: row %d col %d\n", frow, event->col); + retcode = IssueNextReadRequest(raidPtr, frow, event->col); + break; + + /* a forced-reconstruction read access has completed. Just + * submit the buffer */ + case RF_REVENT_FORCEDREADDONE: + rbuf = (RF_ReconBuffer_t *) event->arg; + rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg); + DDprintf2("RECON: FORCEDREADDONE EVENT: row %d col %d\n", frow, event->col); + submitblocked = rf_SubmitReconBuffer(rbuf, 1, 0); + RF_ASSERT(!submitblocked); + break; + + default: + RF_PANIC(); + } + rf_FreeReconEventDesc(event); + return (retcode); } /***************************************************************************************** @@ -934,145 +673,155 @@ static int ProcessReconEvent(raidPtr, frow, event) * * ctrl->{ru_count, curPSID, diskOffset} and rbuf->failedDiskSectorOffset are * maintained to point the the unit we're currently accessing. Note that this deviates - * from the standard C idiom of having counters point to the next thing to be + * from the standard C idiom of having counters point to the next thing to be * accessed. This allows us to easily retry when we're blocked by head separation * or reconstruction-blockage events. * * returns nonzero if and only if there is nothing left unread on the indicated disk ****************************************************************************************/ -static int IssueNextReadRequest(raidPtr, row, col) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_RowCol_t col; +static int +IssueNextReadRequest(raidPtr, row, col) + RF_Raid_t *raidPtr; + RF_RowCol_t row; + RF_RowCol_t col; { - RF_PerDiskReconCtrl_t *ctrl = &raidPtr->reconControl[row]->perDiskInfo[col]; - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_ReconBuffer_t *rbuf = ctrl->rbuf; - RF_ReconUnitCount_t RUsPerPU = layoutPtr->SUsPerPU / layoutPtr->SUsPerRU; - RF_SectorCount_t sectorsPerRU = layoutPtr->sectorsPerStripeUnit * layoutPtr->SUsPerRU; - int do_new_check = 0, retcode = 0, status; - - /* if we are currently the slowest disk, mark that we have to do a new check */ - if (ctrl->headSepCounter <= raidPtr->reconControl[row]->minHeadSepCounter) do_new_check = 1; - - while (1) { - - ctrl->ru_count++; - if (ctrl->ru_count < RUsPerPU) { - ctrl->diskOffset += sectorsPerRU; - rbuf->failedDiskSectorOffset += sectorsPerRU; - } else { - ctrl->curPSID++; - ctrl->ru_count = 0; - /* code left over from when head-sep was based on parity stripe id */ - if (ctrl->curPSID >= raidPtr->reconControl[row]->lastPSID) { - CheckForNewMinHeadSep(raidPtr, row, ++(ctrl->headSepCounter)); - return(1); /* finito! */ - } - - /* find the disk offsets of the start of the parity stripe on both the current disk and the failed disk. - * skip this entire parity stripe if either disk does not appear in the indicated PS - */ - status = ComputePSDiskOffsets(raidPtr, ctrl->curPSID, row, col, &ctrl->diskOffset, &rbuf->failedDiskSectorOffset, - &rbuf->spRow, &rbuf->spCol, &rbuf->spOffset); - if (status) { - ctrl->ru_count = RUsPerPU-1; continue; - } - } - rbuf->which_ru = ctrl->ru_count; - - /* skip this RU if it's already been reconstructed */ - if (rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, rbuf->failedDiskSectorOffset)) { - Dprintf2("Skipping psid %ld ru %d: already reconstructed\n",ctrl->curPSID,ctrl->ru_count); - continue; - } - break; - } - ctrl->headSepCounter++; - if (do_new_check) CheckForNewMinHeadSep(raidPtr, row, ctrl->headSepCounter); /* update min if needed */ - - - /* at this point, we have definitely decided what to do, and we have only to see if we can actually do it now */ - rbuf->parityStripeID = ctrl->curPSID; - rbuf->which_ru = ctrl->ru_count; - bzero((char *)&raidPtr->recon_tracerecs[col], sizeof(raidPtr->recon_tracerecs[col])); - raidPtr->recon_tracerecs[col].reconacc = 1; - RF_ETIMER_START(raidPtr->recon_tracerecs[col].recon_timer); - retcode = TryToRead(raidPtr, row, col); - return(retcode); + RF_PerDiskReconCtrl_t *ctrl = &raidPtr->reconControl[row]->perDiskInfo[col]; + RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; + RF_ReconBuffer_t *rbuf = ctrl->rbuf; + RF_ReconUnitCount_t RUsPerPU = layoutPtr->SUsPerPU / layoutPtr->SUsPerRU; + RF_SectorCount_t sectorsPerRU = layoutPtr->sectorsPerStripeUnit * layoutPtr->SUsPerRU; + int do_new_check = 0, retcode = 0, status; + + /* if we are currently the slowest disk, mark that we have to do a new + * check */ + if (ctrl->headSepCounter <= raidPtr->reconControl[row]->minHeadSepCounter) + do_new_check = 1; + + while (1) { + + ctrl->ru_count++; + if (ctrl->ru_count < RUsPerPU) { + ctrl->diskOffset += sectorsPerRU; + rbuf->failedDiskSectorOffset += sectorsPerRU; + } else { + ctrl->curPSID++; + ctrl->ru_count = 0; + /* code left over from when head-sep was based on + * parity stripe id */ + if (ctrl->curPSID >= raidPtr->reconControl[row]->lastPSID) { + CheckForNewMinHeadSep(raidPtr, row, ++(ctrl->headSepCounter)); + return (1); /* finito! */ + } + /* find the disk offsets of the start of the parity + * stripe on both the current disk and the failed + * disk. skip this entire parity stripe if either disk + * does not appear in the indicated PS */ + status = ComputePSDiskOffsets(raidPtr, ctrl->curPSID, row, col, &ctrl->diskOffset, &rbuf->failedDiskSectorOffset, + &rbuf->spRow, &rbuf->spCol, &rbuf->spOffset); + if (status) { + ctrl->ru_count = RUsPerPU - 1; + continue; + } + } + rbuf->which_ru = ctrl->ru_count; + + /* skip this RU if it's already been reconstructed */ + if (rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, rbuf->failedDiskSectorOffset)) { + Dprintf2("Skipping psid %ld ru %d: already reconstructed\n", ctrl->curPSID, ctrl->ru_count); + continue; + } + break; + } + ctrl->headSepCounter++; + if (do_new_check) + CheckForNewMinHeadSep(raidPtr, row, ctrl->headSepCounter); /* update min if needed */ + + + /* at this point, we have definitely decided what to do, and we have + * only to see if we can actually do it now */ + rbuf->parityStripeID = ctrl->curPSID; + rbuf->which_ru = ctrl->ru_count; + bzero((char *) &raidPtr->recon_tracerecs[col], sizeof(raidPtr->recon_tracerecs[col])); + raidPtr->recon_tracerecs[col].reconacc = 1; + RF_ETIMER_START(raidPtr->recon_tracerecs[col].recon_timer); + retcode = TryToRead(raidPtr, row, col); + return (retcode); } - /* tries to issue the next read on the indicated disk. We may be blocked by (a) the heads being too * far apart, or (b) recon on the indicated RU being blocked due to a write by a user thread. * In this case, we issue a head-sep or blockage wait request, which will cause this same routine * to be invoked again later when the blockage has cleared. */ -static int TryToRead(raidPtr, row, col) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_RowCol_t col; +static int +TryToRead(raidPtr, row, col) + RF_Raid_t *raidPtr; + RF_RowCol_t row; + RF_RowCol_t col; { - RF_PerDiskReconCtrl_t *ctrl = &raidPtr->reconControl[row]->perDiskInfo[col]; - RF_SectorCount_t sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU; - RF_StripeNum_t psid = ctrl->curPSID; - RF_ReconUnitNum_t which_ru = ctrl->ru_count; - RF_DiskQueueData_t *req; - int status, created = 0; - RF_ReconParityStripeStatus_t *pssPtr; - - /* if the current disk is too far ahead of the others, issue a head-separation wait and return */ - if (CheckHeadSeparation(raidPtr, ctrl, row, col, ctrl->headSepCounter, which_ru)) return(0); - RF_LOCK_PSS_MUTEX(raidPtr, row, psid); - pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl[row]->pssTable, psid, which_ru, RF_PSS_CREATE, &created); - - /* if recon is blocked on the indicated parity stripe, issue a block-wait request and return. - * this also must mark the indicated RU in the stripe as under reconstruction if not blocked. - */ - status = CheckForcedOrBlockedReconstruction(raidPtr, pssPtr, ctrl, row, col, psid, which_ru); - if (status == RF_PSS_RECON_BLOCKED) { - Dprintf2("RECON: Stalling psid %ld ru %d: recon blocked\n",psid,which_ru); - goto out; - } else if (status == RF_PSS_FORCED_ON_WRITE) { - rf_CauseReconEvent(raidPtr, row, col, NULL, RF_REVENT_SKIP); - goto out; - } - - /* make one last check to be sure that the indicated RU didn't get reconstructed while - * we were waiting for something else to happen. This is unfortunate in that it causes - * us to make this check twice in the normal case. Might want to make some attempt to - * re-work this so that we only do this check if we've definitely blocked on one of the - * above checks. When this condition is detected, we may have just created a bogus - * status entry, which we need to delete. - */ - if (rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, ctrl->rbuf->failedDiskSectorOffset)) { - Dprintf2("RECON: Skipping psid %ld ru %d: prior recon after stall\n",psid,which_ru); - if (created) rf_PSStatusDelete(raidPtr, raidPtr->reconControl[row]->pssTable, pssPtr); - rf_CauseReconEvent(raidPtr, row, col, NULL, RF_REVENT_SKIP); - goto out; - } - - /* found something to read. issue the I/O */ - Dprintf5("RECON: Read for psid %ld on row %d col %d offset %ld buf %lx\n", - psid, row, col, ctrl->diskOffset, ctrl->rbuf->buffer); - RF_ETIMER_STOP(raidPtr->recon_tracerecs[col].recon_timer); - RF_ETIMER_EVAL(raidPtr->recon_tracerecs[col].recon_timer); - raidPtr->recon_tracerecs[col].specific.recon.recon_start_to_fetch_us = - RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[col].recon_timer); - RF_ETIMER_START(raidPtr->recon_tracerecs[col].recon_timer); - - /* should be ok to use a NULL proc pointer here, all the bufs we use should be in kernel space */ - req = rf_CreateDiskQueueData(RF_IO_TYPE_READ, ctrl->diskOffset, sectorsPerRU, ctrl->rbuf->buffer, psid, which_ru, - ReconReadDoneProc, (void *) ctrl, NULL, &raidPtr->recon_tracerecs[col], (void *)raidPtr, 0, NULL); - - RF_ASSERT(req); /* XXX -- fix this -- XXX */ - - ctrl->rbuf->arg = (void *) req; - rf_DiskIOEnqueue(&raidPtr->Queues[row][col], req, RF_IO_RECON_PRIORITY); - pssPtr->issued[col] = 1; + RF_PerDiskReconCtrl_t *ctrl = &raidPtr->reconControl[row]->perDiskInfo[col]; + RF_SectorCount_t sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU; + RF_StripeNum_t psid = ctrl->curPSID; + RF_ReconUnitNum_t which_ru = ctrl->ru_count; + RF_DiskQueueData_t *req; + int status, created = 0; + RF_ReconParityStripeStatus_t *pssPtr; + + /* if the current disk is too far ahead of the others, issue a + * head-separation wait and return */ + if (CheckHeadSeparation(raidPtr, ctrl, row, col, ctrl->headSepCounter, which_ru)) + return (0); + RF_LOCK_PSS_MUTEX(raidPtr, row, psid); + pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl[row]->pssTable, psid, which_ru, RF_PSS_CREATE, &created); + + /* if recon is blocked on the indicated parity stripe, issue a + * block-wait request and return. this also must mark the indicated RU + * in the stripe as under reconstruction if not blocked. */ + status = CheckForcedOrBlockedReconstruction(raidPtr, pssPtr, ctrl, row, col, psid, which_ru); + if (status == RF_PSS_RECON_BLOCKED) { + Dprintf2("RECON: Stalling psid %ld ru %d: recon blocked\n", psid, which_ru); + goto out; + } else + if (status == RF_PSS_FORCED_ON_WRITE) { + rf_CauseReconEvent(raidPtr, row, col, NULL, RF_REVENT_SKIP); + goto out; + } + /* make one last check to be sure that the indicated RU didn't get + * reconstructed while we were waiting for something else to happen. + * This is unfortunate in that it causes us to make this check twice + * in the normal case. Might want to make some attempt to re-work + * this so that we only do this check if we've definitely blocked on + * one of the above checks. When this condition is detected, we may + * have just created a bogus status entry, which we need to delete. */ + if (rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, ctrl->rbuf->failedDiskSectorOffset)) { + Dprintf2("RECON: Skipping psid %ld ru %d: prior recon after stall\n", psid, which_ru); + if (created) + rf_PSStatusDelete(raidPtr, raidPtr->reconControl[row]->pssTable, pssPtr); + rf_CauseReconEvent(raidPtr, row, col, NULL, RF_REVENT_SKIP); + goto out; + } + /* found something to read. issue the I/O */ + Dprintf5("RECON: Read for psid %ld on row %d col %d offset %ld buf %lx\n", + psid, row, col, ctrl->diskOffset, ctrl->rbuf->buffer); + RF_ETIMER_STOP(raidPtr->recon_tracerecs[col].recon_timer); + RF_ETIMER_EVAL(raidPtr->recon_tracerecs[col].recon_timer); + raidPtr->recon_tracerecs[col].specific.recon.recon_start_to_fetch_us = + RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[col].recon_timer); + RF_ETIMER_START(raidPtr->recon_tracerecs[col].recon_timer); + + /* should be ok to use a NULL proc pointer here, all the bufs we use + * should be in kernel space */ + req = rf_CreateDiskQueueData(RF_IO_TYPE_READ, ctrl->diskOffset, sectorsPerRU, ctrl->rbuf->buffer, psid, which_ru, + ReconReadDoneProc, (void *) ctrl, NULL, &raidPtr->recon_tracerecs[col], (void *) raidPtr, 0, NULL); + + RF_ASSERT(req); /* XXX -- fix this -- XXX */ + + ctrl->rbuf->arg = (void *) req; + rf_DiskIOEnqueue(&raidPtr->Queues[row][col], req, RF_IO_RECON_PRIORITY); + pssPtr->issued[col] = 1; out: - RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid); - return(0); + RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid); + return (0); } @@ -1096,231 +845,254 @@ out: * ASSUMES THAT THE STRIPE IDENTIFIER IDENTIFIES THE DISKS COMPRISING THE STRIPE * IN THE CORRECT ORDER */ -static int ComputePSDiskOffsets( - RF_Raid_t *raidPtr, /* raid descriptor */ - RF_StripeNum_t psid, /* parity stripe identifier */ - RF_RowCol_t row, /* row and column of disk to find the offsets for */ - RF_RowCol_t col, - RF_SectorNum_t *outDiskOffset, - RF_SectorNum_t *outFailedDiskSectorOffset, - RF_RowCol_t *spRow, /* OUT: row,col of spare unit for failed unit */ - RF_RowCol_t *spCol, - RF_SectorNum_t *spOffset) /* OUT: offset into disk containing spare unit */ -{ - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_RowCol_t fcol = raidPtr->reconControl[row]->fcol; - RF_RaidAddr_t sosRaidAddress; /* start-of-stripe */ - RF_RowCol_t *diskids; - u_int i, j, k, i_offset, j_offset; - RF_RowCol_t prow, pcol; - int testcol, testrow; - RF_RowCol_t stripe; - RF_SectorNum_t poffset; - char i_is_parity=0, j_is_parity=0; - RF_RowCol_t stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; - - /* get a listing of the disks comprising that stripe */ - sosRaidAddress = rf_ParityStripeIDToRaidAddress(layoutPtr, psid); - (layoutPtr->map->IdentifyStripe)(raidPtr, sosRaidAddress, &diskids, &stripe); - RF_ASSERT(diskids); - - /* reject this entire parity stripe if it does not contain the indicated disk or it does not contain the failed disk */ - if (row != stripe) - goto skipit; - for (i=0; i<stripeWidth; i++) { - if (col == diskids[i]) - break; - } - if (i == stripeWidth) - goto skipit; - for (j=0; j<stripeWidth; j++) { - if (fcol == diskids[j]) - break; - } - if (j == stripeWidth) { - goto skipit; - } - - /* find out which disk the parity is on */ - (layoutPtr->map->MapParity)(raidPtr, sosRaidAddress, &prow, &pcol, &poffset, RF_DONT_REMAP); - - /* find out if either the current RU or the failed RU is parity */ - /* also, if the parity occurs in this stripe prior to the data and/or failed col, we need to decrement i and/or j */ - for (k=0; k<stripeWidth; k++) - if (diskids[k] == pcol) - break; - RF_ASSERT(k < stripeWidth); - i_offset = i; j_offset=j; - if (k < i) i_offset--; else if (k==i) {i_is_parity = 1; i_offset = 0;} /* set offsets to zero to disable multiply below */ - if (k < j) j_offset--; else if (k==j) {j_is_parity = 1; j_offset = 0;} - - /* at this point, [ij]_is_parity tells us whether the [current,failed] disk is parity at - * the start of this RU, and, if data, "[ij]_offset" tells us how far into the stripe - * the [current,failed] disk is. - */ - - /* call the mapping routine to get the offset into the current disk, repeat for failed disk. */ - if (i_is_parity) - layoutPtr->map->MapParity(raidPtr, sosRaidAddress + i_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outDiskOffset, RF_DONT_REMAP); - else - layoutPtr->map->MapSector(raidPtr, sosRaidAddress + i_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outDiskOffset, RF_DONT_REMAP); - - RF_ASSERT(row == testrow && col == testcol); - - if (j_is_parity) - layoutPtr->map->MapParity(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outFailedDiskSectorOffset, RF_DONT_REMAP); - else - layoutPtr->map->MapSector(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outFailedDiskSectorOffset, RF_DONT_REMAP); - RF_ASSERT(row == testrow && fcol == testcol); - - /* now locate the spare unit for the failed unit */ - if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) { - if (j_is_parity) - layoutPtr->map->MapParity(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, spRow, spCol, spOffset, RF_REMAP); - else - layoutPtr->map->MapSector(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, spRow, spCol, spOffset, RF_REMAP); - } else { - *spRow = raidPtr->reconControl[row]->spareRow; - *spCol = raidPtr->reconControl[row]->spareCol; - *spOffset = *outFailedDiskSectorOffset; - } - - return(0); +static int +ComputePSDiskOffsets( + RF_Raid_t * raidPtr, /* raid descriptor */ + RF_StripeNum_t psid, /* parity stripe identifier */ + RF_RowCol_t row, /* row and column of disk to find the offsets + * for */ + RF_RowCol_t col, + RF_SectorNum_t * outDiskOffset, + RF_SectorNum_t * outFailedDiskSectorOffset, + RF_RowCol_t * spRow, /* OUT: row,col of spare unit for failed unit */ + RF_RowCol_t * spCol, + RF_SectorNum_t * spOffset) +{ /* OUT: offset into disk containing spare unit */ + RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; + RF_RowCol_t fcol = raidPtr->reconControl[row]->fcol; + RF_RaidAddr_t sosRaidAddress; /* start-of-stripe */ + RF_RowCol_t *diskids; + u_int i, j, k, i_offset, j_offset; + RF_RowCol_t prow, pcol; + int testcol, testrow; + RF_RowCol_t stripe; + RF_SectorNum_t poffset; + char i_is_parity = 0, j_is_parity = 0; + RF_RowCol_t stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; + + /* get a listing of the disks comprising that stripe */ + sosRaidAddress = rf_ParityStripeIDToRaidAddress(layoutPtr, psid); + (layoutPtr->map->IdentifyStripe) (raidPtr, sosRaidAddress, &diskids, &stripe); + RF_ASSERT(diskids); + + /* reject this entire parity stripe if it does not contain the + * indicated disk or it does not contain the failed disk */ + if (row != stripe) + goto skipit; + for (i = 0; i < stripeWidth; i++) { + if (col == diskids[i]) + break; + } + if (i == stripeWidth) + goto skipit; + for (j = 0; j < stripeWidth; j++) { + if (fcol == diskids[j]) + break; + } + if (j == stripeWidth) { + goto skipit; + } + /* find out which disk the parity is on */ + (layoutPtr->map->MapParity) (raidPtr, sosRaidAddress, &prow, &pcol, &poffset, RF_DONT_REMAP); + + /* find out if either the current RU or the failed RU is parity */ + /* also, if the parity occurs in this stripe prior to the data and/or + * failed col, we need to decrement i and/or j */ + for (k = 0; k < stripeWidth; k++) + if (diskids[k] == pcol) + break; + RF_ASSERT(k < stripeWidth); + i_offset = i; + j_offset = j; + if (k < i) + i_offset--; + else + if (k == i) { + i_is_parity = 1; + i_offset = 0; + } /* set offsets to zero to disable multiply + * below */ + if (k < j) + j_offset--; + else + if (k == j) { + j_is_parity = 1; + j_offset = 0; + } + /* at this point, [ij]_is_parity tells us whether the [current,failed] + * disk is parity at the start of this RU, and, if data, "[ij]_offset" + * tells us how far into the stripe the [current,failed] disk is. */ + + /* call the mapping routine to get the offset into the current disk, + * repeat for failed disk. */ + if (i_is_parity) + layoutPtr->map->MapParity(raidPtr, sosRaidAddress + i_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outDiskOffset, RF_DONT_REMAP); + else + layoutPtr->map->MapSector(raidPtr, sosRaidAddress + i_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outDiskOffset, RF_DONT_REMAP); + + RF_ASSERT(row == testrow && col == testcol); + + if (j_is_parity) + layoutPtr->map->MapParity(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outFailedDiskSectorOffset, RF_DONT_REMAP); + else + layoutPtr->map->MapSector(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outFailedDiskSectorOffset, RF_DONT_REMAP); + RF_ASSERT(row == testrow && fcol == testcol); + + /* now locate the spare unit for the failed unit */ + if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) { + if (j_is_parity) + layoutPtr->map->MapParity(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, spRow, spCol, spOffset, RF_REMAP); + else + layoutPtr->map->MapSector(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, spRow, spCol, spOffset, RF_REMAP); + } else { + *spRow = raidPtr->reconControl[row]->spareRow; + *spCol = raidPtr->reconControl[row]->spareCol; + *spOffset = *outFailedDiskSectorOffset; + } + + return (0); skipit: - Dprintf3("RECON: Skipping psid %ld: nothing needed from r%d c%d\n", - psid, row, col); - return(1); + Dprintf3("RECON: Skipping psid %ld: nothing needed from r%d c%d\n", + psid, row, col); + return (1); } - /* this is called when a buffer has become ready to write to the replacement disk */ -static int IssueNextWriteRequest(raidPtr, row) - RF_Raid_t *raidPtr; - RF_RowCol_t row; +static int +IssueNextWriteRequest(raidPtr, row) + RF_Raid_t *raidPtr; + RF_RowCol_t row; { - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_SectorCount_t sectorsPerRU = layoutPtr->sectorsPerStripeUnit * layoutPtr->SUsPerRU; - RF_RowCol_t fcol = raidPtr->reconControl[row]->fcol; - RF_ReconBuffer_t *rbuf; - RF_DiskQueueData_t *req; - - rbuf = rf_GetFullReconBuffer(raidPtr->reconControl[row]); - RF_ASSERT(rbuf); /* there must be one available, or we wouldn't have gotten the event that sent us here */ - RF_ASSERT(rbuf->pssPtr); - - rbuf->pssPtr->writeRbuf = rbuf; - rbuf->pssPtr = NULL; - - Dprintf7("RECON: New write (r %d c %d offs %d) for psid %ld ru %d (failed disk offset %ld) buf %lx\n", - rbuf->spRow, rbuf->spCol, rbuf->spOffset, rbuf->parityStripeID, - rbuf->which_ru, rbuf->failedDiskSectorOffset, rbuf->buffer); - Dprintf6("RECON: new write psid %ld %02x %02x %02x %02x %02x\n", - rbuf->parityStripeID, rbuf->buffer[0]&0xff, rbuf->buffer[1]&0xff, - rbuf->buffer[2]&0xff, rbuf->buffer[3]&0xff, rbuf->buffer[4]&0xff); - - /* should be ok to use a NULL b_proc here b/c all addrs should be in kernel space */ - req = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, rbuf->spOffset, - sectorsPerRU, rbuf->buffer, - rbuf->parityStripeID, rbuf->which_ru, - ReconWriteDoneProc, (void *) rbuf, NULL, - &raidPtr->recon_tracerecs[fcol], - (void *)raidPtr, 0, NULL); - - RF_ASSERT(req); /* XXX -- fix this -- XXX */ - - rbuf->arg = (void *) req; - rf_DiskIOEnqueue(&raidPtr->Queues[rbuf->spRow][rbuf->spCol], req, RF_IO_RECON_PRIORITY); - - return(0); + RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; + RF_SectorCount_t sectorsPerRU = layoutPtr->sectorsPerStripeUnit * layoutPtr->SUsPerRU; + RF_RowCol_t fcol = raidPtr->reconControl[row]->fcol; + RF_ReconBuffer_t *rbuf; + RF_DiskQueueData_t *req; + + rbuf = rf_GetFullReconBuffer(raidPtr->reconControl[row]); + RF_ASSERT(rbuf); /* there must be one available, or we wouldn't + * have gotten the event that sent us here */ + RF_ASSERT(rbuf->pssPtr); + + rbuf->pssPtr->writeRbuf = rbuf; + rbuf->pssPtr = NULL; + + Dprintf7("RECON: New write (r %d c %d offs %d) for psid %ld ru %d (failed disk offset %ld) buf %lx\n", + rbuf->spRow, rbuf->spCol, rbuf->spOffset, rbuf->parityStripeID, + rbuf->which_ru, rbuf->failedDiskSectorOffset, rbuf->buffer); + Dprintf6("RECON: new write psid %ld %02x %02x %02x %02x %02x\n", + rbuf->parityStripeID, rbuf->buffer[0] & 0xff, rbuf->buffer[1] & 0xff, + rbuf->buffer[2] & 0xff, rbuf->buffer[3] & 0xff, rbuf->buffer[4] & 0xff); + + /* should be ok to use a NULL b_proc here b/c all addrs should be in + * kernel space */ + req = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, rbuf->spOffset, + sectorsPerRU, rbuf->buffer, + rbuf->parityStripeID, rbuf->which_ru, + ReconWriteDoneProc, (void *) rbuf, NULL, + &raidPtr->recon_tracerecs[fcol], + (void *) raidPtr, 0, NULL); + + RF_ASSERT(req); /* XXX -- fix this -- XXX */ + + rbuf->arg = (void *) req; + rf_DiskIOEnqueue(&raidPtr->Queues[rbuf->spRow][rbuf->spCol], req, RF_IO_RECON_PRIORITY); + + return (0); } - /* this gets called upon the completion of a reconstruction read operation * the arg is a pointer to the per-disk reconstruction control structure * for the process that just finished a read. * * called at interrupt context in the kernel, so don't do anything illegal here. */ -static int ReconReadDoneProc(arg, status) - void *arg; - int status; +static int +ReconReadDoneProc(arg, status) + void *arg; + int status; { - RF_PerDiskReconCtrl_t *ctrl = (RF_PerDiskReconCtrl_t *) arg; - RF_Raid_t *raidPtr = ctrl->reconCtrl->reconDesc->raidPtr; - - if (status) { - /* - * XXX - */ - printf("Recon read failed!\n"); - RF_PANIC(); - } - - RF_ETIMER_STOP(raidPtr->recon_tracerecs[ctrl->col].recon_timer); - RF_ETIMER_EVAL(raidPtr->recon_tracerecs[ctrl->col].recon_timer); - raidPtr->recon_tracerecs[ctrl->col].specific.recon.recon_fetch_to_return_us = - RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[ctrl->col].recon_timer); - RF_ETIMER_START(raidPtr->recon_tracerecs[ctrl->col].recon_timer); - - rf_CauseReconEvent(raidPtr, ctrl->row, ctrl->col, NULL, RF_REVENT_READDONE); - return(0); + RF_PerDiskReconCtrl_t *ctrl = (RF_PerDiskReconCtrl_t *) arg; + RF_Raid_t *raidPtr = ctrl->reconCtrl->reconDesc->raidPtr; + + if (status) { + /* + * XXX + */ + printf("Recon read failed!\n"); + RF_PANIC(); + } + RF_ETIMER_STOP(raidPtr->recon_tracerecs[ctrl->col].recon_timer); + RF_ETIMER_EVAL(raidPtr->recon_tracerecs[ctrl->col].recon_timer); + raidPtr->recon_tracerecs[ctrl->col].specific.recon.recon_fetch_to_return_us = + RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[ctrl->col].recon_timer); + RF_ETIMER_START(raidPtr->recon_tracerecs[ctrl->col].recon_timer); + + rf_CauseReconEvent(raidPtr, ctrl->row, ctrl->col, NULL, RF_REVENT_READDONE); + return (0); } - /* this gets called upon the completion of a reconstruction write operation. * the arg is a pointer to the rbuf that was just written * * called at interrupt context in the kernel, so don't do anything illegal here. */ -static int ReconWriteDoneProc(arg, status) - void *arg; - int status; +static int +ReconWriteDoneProc(arg, status) + void *arg; + int status; { - RF_ReconBuffer_t *rbuf = (RF_ReconBuffer_t *) arg; + RF_ReconBuffer_t *rbuf = (RF_ReconBuffer_t *) arg; - Dprintf2("Reconstruction completed on psid %ld ru %d\n",rbuf->parityStripeID, rbuf->which_ru); - if (status) {printf("Recon write failed!\n"); /*fprintf(stderr,"Recon write failed!\n");*/ RF_PANIC();} - rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, rbuf->row, rbuf->col, arg, RF_REVENT_WRITEDONE); - return(0); + Dprintf2("Reconstruction completed on psid %ld ru %d\n", rbuf->parityStripeID, rbuf->which_ru); + if (status) { + printf("Recon write failed!\n"); /* fprintf(stderr,"Recon + * write failed!\n"); */ + RF_PANIC(); + } + rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, rbuf->row, rbuf->col, arg, RF_REVENT_WRITEDONE); + return (0); } /* computes a new minimum head sep, and wakes up anyone who needs to be woken as a result */ -static void CheckForNewMinHeadSep(raidPtr, row, hsCtr) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_HeadSepLimit_t hsCtr; +static void +CheckForNewMinHeadSep(raidPtr, row, hsCtr) + RF_Raid_t *raidPtr; + RF_RowCol_t row; + RF_HeadSepLimit_t hsCtr; { - RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[row]; - RF_HeadSepLimit_t new_min; - RF_RowCol_t i; - RF_CallbackDesc_t *p; - RF_ASSERT(hsCtr >= reconCtrlPtr->minHeadSepCounter); /* from the definition of a minimum */ - - - RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); - - new_min = ~ (1L<< (8*sizeof(long)-1)); /* 0x7FFF....FFF */ - for (i=0; i<raidPtr->numCol; i++) if (i != reconCtrlPtr->fcol) { - if (reconCtrlPtr->perDiskInfo[i].headSepCounter < new_min) new_min = reconCtrlPtr->perDiskInfo[i].headSepCounter; - } - - /* set the new minimum and wake up anyone who can now run again */ - if (new_min != reconCtrlPtr->minHeadSepCounter) { - reconCtrlPtr->minHeadSepCounter = new_min; - Dprintf1("RECON: new min head pos counter val is %ld\n",new_min); - while (reconCtrlPtr->headSepCBList) { - if (reconCtrlPtr->headSepCBList->callbackArg.v > new_min) break; - p = reconCtrlPtr->headSepCBList; - reconCtrlPtr->headSepCBList = p->next; - p->next = NULL; - rf_CauseReconEvent(raidPtr, p->row, p->col, NULL, RF_REVENT_HEADSEPCLEAR); - rf_FreeCallbackDesc(p); - } - - } - - RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); -} + RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[row]; + RF_HeadSepLimit_t new_min; + RF_RowCol_t i; + RF_CallbackDesc_t *p; + RF_ASSERT(hsCtr >= reconCtrlPtr->minHeadSepCounter); /* from the definition + * of a minimum */ + + + RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); + + new_min = ~(1L << (8 * sizeof(long) - 1)); /* 0x7FFF....FFF */ + for (i = 0; i < raidPtr->numCol; i++) + if (i != reconCtrlPtr->fcol) { + if (reconCtrlPtr->perDiskInfo[i].headSepCounter < new_min) + new_min = reconCtrlPtr->perDiskInfo[i].headSepCounter; + } + /* set the new minimum and wake up anyone who can now run again */ + if (new_min != reconCtrlPtr->minHeadSepCounter) { + reconCtrlPtr->minHeadSepCounter = new_min; + Dprintf1("RECON: new min head pos counter val is %ld\n", new_min); + while (reconCtrlPtr->headSepCBList) { + if (reconCtrlPtr->headSepCBList->callbackArg.v > new_min) + break; + p = reconCtrlPtr->headSepCBList; + reconCtrlPtr->headSepCBList = p->next; + p->next = NULL; + rf_CauseReconEvent(raidPtr, p->row, p->col, NULL, RF_REVENT_HEADSEPCLEAR); + rf_FreeCallbackDesc(p); + } + } + RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); +} /* checks to see that the maximum head separation will not be violated * if we initiate a reconstruction I/O on the indicated disk. Limiting the * maximum head separation between two disks eliminates the nasty buffer-stall @@ -1332,60 +1104,64 @@ static void CheckForNewMinHeadSep(raidPtr, row, hsCtr) * returns non-zero if and only if we have to stop working on the indicated disk * due to a head-separation delay. */ -static int CheckHeadSeparation( - RF_Raid_t *raidPtr, - RF_PerDiskReconCtrl_t *ctrl, - RF_RowCol_t row, - RF_RowCol_t col, - RF_HeadSepLimit_t hsCtr, - RF_ReconUnitNum_t which_ru) +static int +CheckHeadSeparation( + RF_Raid_t * raidPtr, + RF_PerDiskReconCtrl_t * ctrl, + RF_RowCol_t row, + RF_RowCol_t col, + RF_HeadSepLimit_t hsCtr, + RF_ReconUnitNum_t which_ru) { - RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[row]; - RF_CallbackDesc_t *cb, *p, *pt; - int retval = 0, tid; - - /* if we're too far ahead of the slowest disk, stop working on this disk - * until the slower ones catch up. We do this by scheduling a wakeup callback - * for the time when the slowest disk has caught up. We define "caught up" - * with 20% hysteresis, i.e. the head separation must have fallen to at most - * 80% of the max allowable head separation before we'll wake up. - * - */ - rf_get_threadid(tid); - RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); - if ((raidPtr->headSepLimit >= 0) && - ((ctrl->headSepCounter - reconCtrlPtr->minHeadSepCounter) > raidPtr->headSepLimit)) - { - Dprintf6("[%d] RECON: head sep stall: row %d col %d hsCtr %ld minHSCtr %ld limit %ld\n", - tid,row,col,ctrl->headSepCounter, reconCtrlPtr->minHeadSepCounter, raidPtr->headSepLimit); - cb = rf_AllocCallbackDesc(); - /* the minHeadSepCounter value we have to get to before we'll wake up. build in 20% hysteresis. */ - cb->callbackArg.v = (ctrl->headSepCounter - raidPtr->headSepLimit + raidPtr->headSepLimit/5); - cb->row = row; cb->col = col; - cb->next = NULL; - - /* insert this callback descriptor into the sorted list of pending head-sep callbacks */ - p = reconCtrlPtr->headSepCBList; - if (!p) reconCtrlPtr->headSepCBList = cb; - else if (cb->callbackArg.v < p->callbackArg.v) { - cb->next = reconCtrlPtr->headSepCBList; - reconCtrlPtr->headSepCBList = cb; - } - else { - for (pt=p, p=p->next; p && (p->callbackArg.v < cb->callbackArg.v); pt=p,p=p->next); - cb->next = p; - pt->next = cb; - } - retval = 1; + RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[row]; + RF_CallbackDesc_t *cb, *p, *pt; + int retval = 0, tid; + + /* if we're too far ahead of the slowest disk, stop working on this + * disk until the slower ones catch up. We do this by scheduling a + * wakeup callback for the time when the slowest disk has caught up. + * We define "caught up" with 20% hysteresis, i.e. the head separation + * must have fallen to at most 80% of the max allowable head + * separation before we'll wake up. + * + */ + rf_get_threadid(tid); + RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); + if ((raidPtr->headSepLimit >= 0) && + ((ctrl->headSepCounter - reconCtrlPtr->minHeadSepCounter) > raidPtr->headSepLimit)) { + Dprintf6("[%d] RECON: head sep stall: row %d col %d hsCtr %ld minHSCtr %ld limit %ld\n", + tid, row, col, ctrl->headSepCounter, reconCtrlPtr->minHeadSepCounter, raidPtr->headSepLimit); + cb = rf_AllocCallbackDesc(); + /* the minHeadSepCounter value we have to get to before we'll + * wake up. build in 20% hysteresis. */ + cb->callbackArg.v = (ctrl->headSepCounter - raidPtr->headSepLimit + raidPtr->headSepLimit / 5); + cb->row = row; + cb->col = col; + cb->next = NULL; + + /* insert this callback descriptor into the sorted list of + * pending head-sep callbacks */ + p = reconCtrlPtr->headSepCBList; + if (!p) + reconCtrlPtr->headSepCBList = cb; + else + if (cb->callbackArg.v < p->callbackArg.v) { + cb->next = reconCtrlPtr->headSepCBList; + reconCtrlPtr->headSepCBList = cb; + } else { + for (pt = p, p = p->next; p && (p->callbackArg.v < cb->callbackArg.v); pt = p, p = p->next); + cb->next = p; + pt->next = cb; + } + retval = 1; #if RF_RECON_STATS > 0 - ctrl->reconCtrl->reconDesc->hsStallCount++; -#endif /* RF_RECON_STATS > 0 */ - } - RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); + ctrl->reconCtrl->reconDesc->hsStallCount++; +#endif /* RF_RECON_STATS > 0 */ + } + RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); - return(retval); + return (retval); } - /* checks to see if reconstruction has been either forced or blocked by a user operation. * if forced, we skip this RU entirely. * else if blocked, put ourselves on the wait list. @@ -1393,203 +1169,231 @@ static int CheckHeadSeparation( * * ASSUMES THE PSS MUTEX IS LOCKED UPON ENTRY */ -static int CheckForcedOrBlockedReconstruction( - RF_Raid_t *raidPtr, - RF_ReconParityStripeStatus_t *pssPtr, - RF_PerDiskReconCtrl_t *ctrl, - RF_RowCol_t row, - RF_RowCol_t col, - RF_StripeNum_t psid, - RF_ReconUnitNum_t which_ru) +static int +CheckForcedOrBlockedReconstruction( + RF_Raid_t * raidPtr, + RF_ReconParityStripeStatus_t * pssPtr, + RF_PerDiskReconCtrl_t * ctrl, + RF_RowCol_t row, + RF_RowCol_t col, + RF_StripeNum_t psid, + RF_ReconUnitNum_t which_ru) { - RF_CallbackDesc_t *cb; - int retcode = 0; - - if ((pssPtr->flags & RF_PSS_FORCED_ON_READ) || (pssPtr->flags & RF_PSS_FORCED_ON_WRITE)) retcode = RF_PSS_FORCED_ON_WRITE; - else if (pssPtr->flags & RF_PSS_RECON_BLOCKED) { - Dprintf4("RECON: row %d col %d blocked at psid %ld ru %d\n",row, col, psid, which_ru); - cb = rf_AllocCallbackDesc(); /* append ourselves to the blockage-wait list */ - cb->row = row; cb->col = col; - cb->next = pssPtr->blockWaitList; - pssPtr->blockWaitList = cb; - retcode = RF_PSS_RECON_BLOCKED; - } - - if (!retcode) pssPtr->flags |= RF_PSS_UNDER_RECON; /* mark this RU as under reconstruction */ - - return(retcode); + RF_CallbackDesc_t *cb; + int retcode = 0; + + if ((pssPtr->flags & RF_PSS_FORCED_ON_READ) || (pssPtr->flags & RF_PSS_FORCED_ON_WRITE)) + retcode = RF_PSS_FORCED_ON_WRITE; + else + if (pssPtr->flags & RF_PSS_RECON_BLOCKED) { + Dprintf4("RECON: row %d col %d blocked at psid %ld ru %d\n", row, col, psid, which_ru); + cb = rf_AllocCallbackDesc(); /* append ourselves to + * the blockage-wait + * list */ + cb->row = row; + cb->col = col; + cb->next = pssPtr->blockWaitList; + pssPtr->blockWaitList = cb; + retcode = RF_PSS_RECON_BLOCKED; + } + if (!retcode) + pssPtr->flags |= RF_PSS_UNDER_RECON; /* mark this RU as under + * reconstruction */ + + return (retcode); } - /* if reconstruction is currently ongoing for the indicated stripeID, reconstruction * is forced to completion and we return non-zero to indicate that the caller must * wait. If not, then reconstruction is blocked on the indicated stripe and the * routine returns zero. If and only if we return non-zero, we'll cause the cbFunc * to get invoked with the cbArg when the reconstruction has completed. */ -int rf_ForceOrBlockRecon(raidPtr, asmap, cbFunc, cbArg) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *asmap; - void (*cbFunc)(RF_Raid_t *,void *); - void *cbArg; +int +rf_ForceOrBlockRecon(raidPtr, asmap, cbFunc, cbArg) + RF_Raid_t *raidPtr; + RF_AccessStripeMap_t *asmap; + void (*cbFunc) (RF_Raid_t *, void *); + void *cbArg; { - RF_RowCol_t row = asmap->physInfo->row; /* which row of the array we're working on */ - RF_StripeNum_t stripeID = asmap->stripeID; /* the stripe ID we're forcing recon on */ - RF_SectorCount_t sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU; /* num sects in one RU */ - RF_ReconParityStripeStatus_t *pssPtr; /* a pointer to the parity stripe status structure */ - RF_StripeNum_t psid; /* parity stripe id */ - RF_SectorNum_t offset, fd_offset; /* disk offset, failed-disk offset */ - RF_RowCol_t *diskids; - RF_RowCol_t stripe; - int tid; - RF_ReconUnitNum_t which_ru; /* RU within parity stripe */ - RF_RowCol_t fcol, diskno, i; - RF_ReconBuffer_t *new_rbuf; /* ptr to newly allocated rbufs */ - RF_DiskQueueData_t *req; /* disk I/O req to be enqueued */ - RF_CallbackDesc_t *cb; - int created = 0, nPromoted; - - rf_get_threadid(tid); - psid = rf_MapStripeIDToParityStripeID(&raidPtr->Layout, stripeID, &which_ru); - - RF_LOCK_PSS_MUTEX(raidPtr, row, psid); - - pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl[row]->pssTable, psid, which_ru, RF_PSS_CREATE|RF_PSS_RECON_BLOCKED, &created); - - /* if recon is not ongoing on this PS, just return */ - if (!(pssPtr->flags & RF_PSS_UNDER_RECON)) { - RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid); - return(0); - } - - /* otherwise, we have to wait for reconstruction to complete on this RU. */ - /* In order to avoid waiting for a potentially large number of low-priority accesses to - * complete, we force a normal-priority (i.e. not low-priority) reconstruction - * on this RU. - */ - if (!(pssPtr->flags & RF_PSS_FORCED_ON_WRITE) && !(pssPtr->flags & RF_PSS_FORCED_ON_READ)) { - DDprintf1("Forcing recon on psid %ld\n",psid); - pssPtr->flags |= RF_PSS_FORCED_ON_WRITE; /* mark this RU as under forced recon */ - pssPtr->flags &= ~RF_PSS_RECON_BLOCKED; /* clear the blockage that we just set */ - fcol = raidPtr->reconControl[row]->fcol; - - /* get a listing of the disks comprising the indicated stripe */ - (raidPtr->Layout.map->IdentifyStripe)(raidPtr, asmap->raidAddress, &diskids, &stripe); - RF_ASSERT(row == stripe); - - /* For previously issued reads, elevate them to normal priority. If the I/O has already completed, - * it won't be found in the queue, and hence this will be a no-op. - * For unissued reads, allocate buffers and issue new reads. The fact that we've set the - * FORCED bit means that the regular recon procs will not re-issue these reqs - */ - for (i=0; i<raidPtr->Layout.numDataCol+raidPtr->Layout.numParityCol; i++) if ( (diskno = diskids[i]) != fcol) { - if (pssPtr->issued[diskno]) { - nPromoted = rf_DiskIOPromote(&raidPtr->Queues[row][diskno], psid, which_ru); - if (rf_reconDebug && nPromoted) printf("[%d] promoted read from row %d col %d\n",tid,row,diskno); - } else { - new_rbuf = rf_MakeReconBuffer(raidPtr, row, diskno, RF_RBUF_TYPE_FORCED); /* create new buf */ - ComputePSDiskOffsets(raidPtr, psid, row, diskno, &offset, &fd_offset, - &new_rbuf->spRow, &new_rbuf->spCol, &new_rbuf->spOffset); /* find offsets & spare location */ - new_rbuf->parityStripeID = psid; /* fill in the buffer */ - new_rbuf->which_ru = which_ru; - new_rbuf->failedDiskSectorOffset = fd_offset; - new_rbuf->priority = RF_IO_NORMAL_PRIORITY; - - /* use NULL b_proc b/c all addrs should be in kernel space */ - req = rf_CreateDiskQueueData(RF_IO_TYPE_READ, offset + which_ru * sectorsPerRU, sectorsPerRU, new_rbuf->buffer, - psid, which_ru, (int (*)(void *, int))ForceReconReadDoneProc, (void *) new_rbuf, NULL, - NULL,(void *)raidPtr, 0, NULL); - - RF_ASSERT(req); /* XXX -- fix this -- XXX */ - - new_rbuf->arg = req; - rf_DiskIOEnqueue(&raidPtr->Queues[row][diskno], req, RF_IO_NORMAL_PRIORITY); /* enqueue the I/O */ - Dprintf3("[%d] Issued new read req on row %d col %d\n",tid,row,diskno); - } - } - - /* if the write is sitting in the disk queue, elevate its priority */ - if (rf_DiskIOPromote(&raidPtr->Queues[row][fcol], psid, which_ru)) printf("[%d] promoted write to row %d col %d\n",tid,row,fcol); - } - - /* install a callback descriptor to be invoked when recon completes on this parity stripe. */ - cb = rf_AllocCallbackDesc(); - /* XXX the following is bogus.. These functions don't really match!! GO */ - cb->callbackFunc = (void (*)(RF_CBParam_t))cbFunc; - cb->callbackArg.p = (void *) cbArg; - cb->next = pssPtr->procWaitList; - pssPtr->procWaitList = cb; - DDprintf2("[%d] Waiting for forced recon on psid %ld\n",tid,psid); - - RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid); - return(1); + RF_RowCol_t row = asmap->physInfo->row; /* which row of the array + * we're working on */ + RF_StripeNum_t stripeID = asmap->stripeID; /* the stripe ID we're + * forcing recon on */ + RF_SectorCount_t sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU; /* num sects in one RU */ + RF_ReconParityStripeStatus_t *pssPtr; /* a pointer to the parity + * stripe status structure */ + RF_StripeNum_t psid; /* parity stripe id */ + RF_SectorNum_t offset, fd_offset; /* disk offset, failed-disk + * offset */ + RF_RowCol_t *diskids; + RF_RowCol_t stripe; + int tid; + RF_ReconUnitNum_t which_ru; /* RU within parity stripe */ + RF_RowCol_t fcol, diskno, i; + RF_ReconBuffer_t *new_rbuf; /* ptr to newly allocated rbufs */ + RF_DiskQueueData_t *req;/* disk I/O req to be enqueued */ + RF_CallbackDesc_t *cb; + int created = 0, nPromoted; + + rf_get_threadid(tid); + psid = rf_MapStripeIDToParityStripeID(&raidPtr->Layout, stripeID, &which_ru); + + RF_LOCK_PSS_MUTEX(raidPtr, row, psid); + + pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl[row]->pssTable, psid, which_ru, RF_PSS_CREATE | RF_PSS_RECON_BLOCKED, &created); + + /* if recon is not ongoing on this PS, just return */ + if (!(pssPtr->flags & RF_PSS_UNDER_RECON)) { + RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid); + return (0); + } + /* otherwise, we have to wait for reconstruction to complete on this + * RU. */ + /* In order to avoid waiting for a potentially large number of + * low-priority accesses to complete, we force a normal-priority (i.e. + * not low-priority) reconstruction on this RU. */ + if (!(pssPtr->flags & RF_PSS_FORCED_ON_WRITE) && !(pssPtr->flags & RF_PSS_FORCED_ON_READ)) { + DDprintf1("Forcing recon on psid %ld\n", psid); + pssPtr->flags |= RF_PSS_FORCED_ON_WRITE; /* mark this RU as under + * forced recon */ + pssPtr->flags &= ~RF_PSS_RECON_BLOCKED; /* clear the blockage + * that we just set */ + fcol = raidPtr->reconControl[row]->fcol; + + /* get a listing of the disks comprising the indicated stripe */ + (raidPtr->Layout.map->IdentifyStripe) (raidPtr, asmap->raidAddress, &diskids, &stripe); + RF_ASSERT(row == stripe); + + /* For previously issued reads, elevate them to normal + * priority. If the I/O has already completed, it won't be + * found in the queue, and hence this will be a no-op. For + * unissued reads, allocate buffers and issue new reads. The + * fact that we've set the FORCED bit means that the regular + * recon procs will not re-issue these reqs */ + for (i = 0; i < raidPtr->Layout.numDataCol + raidPtr->Layout.numParityCol; i++) + if ((diskno = diskids[i]) != fcol) { + if (pssPtr->issued[diskno]) { + nPromoted = rf_DiskIOPromote(&raidPtr->Queues[row][diskno], psid, which_ru); + if (rf_reconDebug && nPromoted) + printf("[%d] promoted read from row %d col %d\n", tid, row, diskno); + } else { + new_rbuf = rf_MakeReconBuffer(raidPtr, row, diskno, RF_RBUF_TYPE_FORCED); /* create new buf */ + ComputePSDiskOffsets(raidPtr, psid, row, diskno, &offset, &fd_offset, + &new_rbuf->spRow, &new_rbuf->spCol, &new_rbuf->spOffset); /* find offsets & spare + * location */ + new_rbuf->parityStripeID = psid; /* fill in the buffer */ + new_rbuf->which_ru = which_ru; + new_rbuf->failedDiskSectorOffset = fd_offset; + new_rbuf->priority = RF_IO_NORMAL_PRIORITY; + + /* use NULL b_proc b/c all addrs + * should be in kernel space */ + req = rf_CreateDiskQueueData(RF_IO_TYPE_READ, offset + which_ru * sectorsPerRU, sectorsPerRU, new_rbuf->buffer, + psid, which_ru, (int (*) (void *, int)) ForceReconReadDoneProc, (void *) new_rbuf, NULL, + NULL, (void *) raidPtr, 0, NULL); + + RF_ASSERT(req); /* XXX -- fix this -- + * XXX */ + + new_rbuf->arg = req; + rf_DiskIOEnqueue(&raidPtr->Queues[row][diskno], req, RF_IO_NORMAL_PRIORITY); /* enqueue the I/O */ + Dprintf3("[%d] Issued new read req on row %d col %d\n", tid, row, diskno); + } + } + /* if the write is sitting in the disk queue, elevate its + * priority */ + if (rf_DiskIOPromote(&raidPtr->Queues[row][fcol], psid, which_ru)) + printf("[%d] promoted write to row %d col %d\n", tid, row, fcol); + } + /* install a callback descriptor to be invoked when recon completes on + * this parity stripe. */ + cb = rf_AllocCallbackDesc(); + /* XXX the following is bogus.. These functions don't really match!! + * GO */ + cb->callbackFunc = (void (*) (RF_CBParam_t)) cbFunc; + cb->callbackArg.p = (void *) cbArg; + cb->next = pssPtr->procWaitList; + pssPtr->procWaitList = cb; + DDprintf2("[%d] Waiting for forced recon on psid %ld\n", tid, psid); + + RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid); + return (1); } - /* called upon the completion of a forced reconstruction read. * all we do is schedule the FORCEDREADONE event. * called at interrupt context in the kernel, so don't do anything illegal here. */ -static void ForceReconReadDoneProc(arg, status) - void *arg; - int status; +static void +ForceReconReadDoneProc(arg, status) + void *arg; + int status; { - RF_ReconBuffer_t *rbuf = arg; + RF_ReconBuffer_t *rbuf = arg; - if (status) {printf("Forced recon read failed!\n"); /*fprintf(stderr,"Forced recon read failed!\n");*/ RF_PANIC();} - rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, rbuf->row, rbuf->col, (void *) rbuf, RF_REVENT_FORCEDREADDONE); + if (status) { + printf("Forced recon read failed!\n"); /* fprintf(stderr,"Forced + * recon read + * failed!\n"); */ + RF_PANIC(); + } + rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, rbuf->row, rbuf->col, (void *) rbuf, RF_REVENT_FORCEDREADDONE); } - /* releases a block on the reconstruction of the indicated stripe */ -int rf_UnblockRecon(raidPtr, asmap) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *asmap; +int +rf_UnblockRecon(raidPtr, asmap) + RF_Raid_t *raidPtr; + RF_AccessStripeMap_t *asmap; { - RF_RowCol_t row = asmap->origRow; - RF_StripeNum_t stripeID = asmap->stripeID; - RF_ReconParityStripeStatus_t *pssPtr; - RF_ReconUnitNum_t which_ru; - RF_StripeNum_t psid; - int tid, created = 0; - RF_CallbackDesc_t *cb; - - rf_get_threadid(tid); - psid = rf_MapStripeIDToParityStripeID(&raidPtr->Layout, stripeID, &which_ru); - RF_LOCK_PSS_MUTEX( raidPtr, row, psid); - pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl[row]->pssTable, psid, which_ru, RF_PSS_NONE, &created); - - /* When recon is forced, the pss desc can get deleted before we get back to unblock recon. - * But, this can _only_ happen when recon is forced. - * It would be good to put some kind of sanity check here, but how to decide if recon - * was just forced or not? - */ - if (!pssPtr) { - /*printf("Warning: no pss descriptor upon unblock on psid %ld RU %d\n",psid,which_ru);*/ - if (rf_reconDebug || rf_pssDebug) printf("Warning: no pss descriptor upon unblock on psid %ld RU %d\n",(long)psid,which_ru); - goto out; - } - - pssPtr->blockCount--; - Dprintf3("[%d] unblocking recon on psid %ld: blockcount is %d\n",tid,psid,pssPtr->blockCount); - if (pssPtr->blockCount == 0) { /* if recon blockage has been released */ - - /* unblock recon before calling CauseReconEvent in case CauseReconEvent causes us to - * try to issue a new read before returning here. - */ - pssPtr->flags &= ~RF_PSS_RECON_BLOCKED; - - - while (pssPtr->blockWaitList) { /* spin through the block-wait list and release all the waiters */ - cb = pssPtr->blockWaitList; - pssPtr->blockWaitList = cb->next; - cb->next = NULL; - rf_CauseReconEvent(raidPtr, cb->row, cb->col, NULL, RF_REVENT_BLOCKCLEAR); - rf_FreeCallbackDesc(cb); - } - if (!(pssPtr->flags & RF_PSS_UNDER_RECON)) { /* if no recon was requested while recon was blocked */ - rf_PSStatusDelete(raidPtr, raidPtr->reconControl[row]->pssTable, pssPtr); - } - } - + RF_RowCol_t row = asmap->origRow; + RF_StripeNum_t stripeID = asmap->stripeID; + RF_ReconParityStripeStatus_t *pssPtr; + RF_ReconUnitNum_t which_ru; + RF_StripeNum_t psid; + int tid, created = 0; + RF_CallbackDesc_t *cb; + + rf_get_threadid(tid); + psid = rf_MapStripeIDToParityStripeID(&raidPtr->Layout, stripeID, &which_ru); + RF_LOCK_PSS_MUTEX(raidPtr, row, psid); + pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl[row]->pssTable, psid, which_ru, RF_PSS_NONE, &created); + + /* When recon is forced, the pss desc can get deleted before we get + * back to unblock recon. But, this can _only_ happen when recon is + * forced. It would be good to put some kind of sanity check here, but + * how to decide if recon was just forced or not? */ + if (!pssPtr) { + /* printf("Warning: no pss descriptor upon unblock on psid %ld + * RU %d\n",psid,which_ru); */ + if (rf_reconDebug || rf_pssDebug) + printf("Warning: no pss descriptor upon unblock on psid %ld RU %d\n", (long) psid, which_ru); + goto out; + } + pssPtr->blockCount--; + Dprintf3("[%d] unblocking recon on psid %ld: blockcount is %d\n", tid, psid, pssPtr->blockCount); + if (pssPtr->blockCount == 0) { /* if recon blockage has been released */ + + /* unblock recon before calling CauseReconEvent in case + * CauseReconEvent causes us to try to issue a new read before + * returning here. */ + pssPtr->flags &= ~RF_PSS_RECON_BLOCKED; + + + while (pssPtr->blockWaitList) { /* spin through the block-wait + * list and release all the + * waiters */ + cb = pssPtr->blockWaitList; + pssPtr->blockWaitList = cb->next; + cb->next = NULL; + rf_CauseReconEvent(raidPtr, cb->row, cb->col, NULL, RF_REVENT_BLOCKCLEAR); + rf_FreeCallbackDesc(cb); + } + if (!(pssPtr->flags & RF_PSS_UNDER_RECON)) { /* if no recon was + * requested while recon + * was blocked */ + rf_PSStatusDelete(raidPtr, raidPtr->reconControl[row]->pssTable, pssPtr); + } + } out: - RF_UNLOCK_PSS_MUTEX( raidPtr, row, psid ); - return(0); + RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid); + return (0); } diff --git a/sys/dev/raidframe/rf_reconstruct.h b/sys/dev/raidframe/rf_reconstruct.h index 5913e626609..c8bc680f0f6 100644 --- a/sys/dev/raidframe/rf_reconstruct.h +++ b/sys/dev/raidframe/rf_reconstruct.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_reconstruct.h,v 1.1 1999/01/11 14:29:47 niklas Exp $ */ -/* $NetBSD: rf_reconstruct.h,v 1.1 1998/11/13 04:20:34 oster Exp $ */ +/* $OpenBSD: rf_reconstruct.h,v 1.2 1999/02/16 00:03:23 niklas Exp $ */ +/* $NetBSD: rf_reconstruct.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -31,94 +31,6 @@ * rf_reconstruct.h -- header file for reconstruction code *********************************************************/ -/* : - * Log: rf_reconstruct.h,v - * Revision 1.25 1996/08/01 15:57:24 jimz - * minor cleanup - * - * Revision 1.24 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.23 1996/07/15 05:40:41 jimz - * some recon datastructure cleanup - * better handling of multiple failures - * added undocumented double-recon test - * - * Revision 1.22 1996/07/13 00:00:59 jimz - * sanitized generalized reconstruction architecture - * cleaned up head sep, rbuf problems - * - * Revision 1.21 1996/07/11 19:08:00 jimz - * generalize reconstruction mechanism - * allow raid1 reconstructs via copyback (done with array - * quiesced, not online, therefore not disk-directed) - * - * Revision 1.20 1996/06/11 10:57:30 jimz - * add rf_RegisterReconDoneProc - * - * Revision 1.19 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.18 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.17 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.16 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.15 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.14 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.13 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.12 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.11 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.10 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.9 1995/12/06 15:04:55 root - * added copyright info - * - */ - #ifndef _RF__RF_RECONSTRUCT_H_ #define _RF__RF_RECONSTRUCT_H_ @@ -129,31 +41,37 @@ /* reconstruction configuration information */ struct RF_ReconConfig_s { - unsigned numFloatingReconBufs; /* number of floating recon bufs to use */ - RF_HeadSepLimit_t headSepLimit; /* how far apart the heads are allow to become, in parity stripes */ + unsigned numFloatingReconBufs; /* number of floating recon bufs to + * use */ + RF_HeadSepLimit_t headSepLimit; /* how far apart the heads are allow + * to become, in parity stripes */ }; - /* a reconstruction buffer */ struct RF_ReconBuffer_s { - RF_Raid_t *raidPtr; /* void * to avoid recursive includes */ - caddr_t buffer; /* points to the data */ - RF_StripeNum_t parityStripeID; /* the parity stripe that this data relates to */ - int which_ru; /* which reconstruction unit within the PSS */ - RF_SectorNum_t failedDiskSectorOffset;/* the offset into the failed disk */ - RF_RowCol_t row, col; /* which disk this buffer belongs to or is targeted at */ - RF_StripeCount_t count; /* counts the # of SUs installed so far */ - int priority; /* used to force hi priority recon */ - RF_RbufType_t type; /* FORCED or FLOATING */ - char *arrived; /* [x] = 1/0 if SU from disk x has/hasn't arrived */ - RF_ReconBuffer_t *next; /* used for buffer management */ - void *arg; /* generic field for general use */ - RF_RowCol_t spRow, spCol; /* spare disk to which this buf should be written */ - /* if dist sparing off, always identifies the replacement disk */ - RF_SectorNum_t spOffset; /* offset into the spare disk */ - /* if dist sparing off, identical to failedDiskSectorOffset */ - RF_ReconParityStripeStatus_t *pssPtr; /* debug- pss associated with issue-pending write */ + RF_Raid_t *raidPtr; /* void * to avoid recursive includes */ + caddr_t buffer; /* points to the data */ + RF_StripeNum_t parityStripeID; /* the parity stripe that this data + * relates to */ + int which_ru; /* which reconstruction unit within the PSS */ + RF_SectorNum_t failedDiskSectorOffset; /* the offset into the failed + * disk */ + RF_RowCol_t row, col; /* which disk this buffer belongs to or is + * targeted at */ + RF_StripeCount_t count; /* counts the # of SUs installed so far */ + int priority; /* used to force hi priority recon */ + RF_RbufType_t type; /* FORCED or FLOATING */ + char *arrived; /* [x] = 1/0 if SU from disk x has/hasn't + * arrived */ + RF_ReconBuffer_t *next; /* used for buffer management */ + void *arg; /* generic field for general use */ + RF_RowCol_t spRow, spCol; /* spare disk to which this buf should + * be written */ + /* if dist sparing off, always identifies the replacement disk */ + RF_SectorNum_t spOffset;/* offset into the spare disk */ + /* if dist sparing off, identical to failedDiskSectorOffset */ + RF_ReconParityStripeStatus_t *pssPtr; /* debug- pss associated with + * issue-pending write */ }; - /* a reconstruction event descriptor. The event types currently are: * RF_REVENT_READDONE -- a read operation has completed * RF_REVENT_WRITEDONE -- a write operation has completed @@ -172,87 +90,108 @@ typedef enum RF_Revent_e { RF_REVENT_HEADSEPCLEAR, RF_REVENT_SKIP, RF_REVENT_FORCEDREADDONE -} RF_Revent_t; +} RF_Revent_t; struct RF_ReconEvent_s { - RF_Revent_t type; /* what kind of event has occurred */ - RF_RowCol_t col; /* row ID is implicit in the queue in which the event is placed */ - void *arg; /* a generic argument */ - RF_ReconEvent_t *next; + RF_Revent_t type; /* what kind of event has occurred */ + RF_RowCol_t col; /* row ID is implicit in the queue in which + * the event is placed */ + void *arg; /* a generic argument */ + RF_ReconEvent_t *next; }; - /* * Reconstruction control information maintained per-disk * (for surviving disks) */ struct RF_PerDiskReconCtrl_s { - RF_ReconCtrl_t *reconCtrl; - RF_RowCol_t row, col; /* to make this structure self-identifying */ - RF_StripeNum_t curPSID; /* the next parity stripe ID to check on this disk */ - RF_HeadSepLimit_t headSepCounter; /* counter used to control maximum head separation */ - RF_SectorNum_t diskOffset; /* the offset into the indicated disk of the current PU */ - RF_ReconUnitNum_t ru_count; /* this counts off the recon units within each parity unit */ - RF_ReconBuffer_t *rbuf; /* the recon buffer assigned to this disk */ + RF_ReconCtrl_t *reconCtrl; + RF_RowCol_t row, col; /* to make this structure self-identifying */ + RF_StripeNum_t curPSID; /* the next parity stripe ID to check on this + * disk */ + RF_HeadSepLimit_t headSepCounter; /* counter used to control + * maximum head separation */ + RF_SectorNum_t diskOffset; /* the offset into the indicated disk + * of the current PU */ + RF_ReconUnitNum_t ru_count; /* this counts off the recon units + * within each parity unit */ + RF_ReconBuffer_t *rbuf; /* the recon buffer assigned to this disk */ }; - /* main reconstruction control structure */ struct RF_ReconCtrl_s { - RF_RaidReconDesc_t *reconDesc; - RF_RowCol_t fcol; /* which column has failed */ - RF_PerDiskReconCtrl_t *perDiskInfo; /* information maintained per-disk */ - RF_ReconMap_t *reconMap; /* map of what has/has not been reconstructed */ - RF_RowCol_t spareRow; /* which of the spare disks we're using */ - RF_RowCol_t spareCol; - RF_StripeNum_t lastPSID; /* the ID of the last parity stripe we want reconstructed */ - int percentComplete; /* percentage completion of reconstruction */ - - /* reconstruction event queue */ - RF_ReconEvent_t *eventQueue; /* queue of pending reconstruction events */ - RF_DECLARE_MUTEX(eq_mutex) /* mutex for locking event queue */ - RF_DECLARE_COND(eq_cond) /* condition variable for signalling recon events */ - int eq_count; /* debug only */ - - /* reconstruction buffer management */ - RF_DECLARE_MUTEX(rb_mutex) /* mutex for messing around with recon buffers */ - RF_ReconBuffer_t *floatingRbufs; /* available floating reconstruction buffers */ - RF_ReconBuffer_t *committedRbufs; /* recon buffers that have been committed to some waiting disk */ - RF_ReconBuffer_t *fullBufferList; /* full buffers waiting to be written out */ - RF_ReconBuffer_t *priorityList; /* full buffers that have been elevated to higher priority */ - RF_CallbackDesc_t *bufferWaitList; /* disks that are currently blocked waiting for buffers */ - - /* parity stripe status table */ - RF_PSStatusHeader_t *pssTable; /* stores the reconstruction status of active parity stripes */ - - /* maximum-head separation control */ - RF_HeadSepLimit_t minHeadSepCounter; /* the minimum hs counter over all disks */ - RF_CallbackDesc_t *headSepCBList; /* list of callbacks to be done as minPSID advances */ - - /* performance monitoring */ - struct timeval starttime; /* recon start time */ - - void (*continueFunc)(void *); /* function to call when io returns*/ - void *continueArg; /* argument for Func */ + RF_RaidReconDesc_t *reconDesc; + RF_RowCol_t fcol; /* which column has failed */ + RF_PerDiskReconCtrl_t *perDiskInfo; /* information maintained + * per-disk */ + RF_ReconMap_t *reconMap;/* map of what has/has not been reconstructed */ + RF_RowCol_t spareRow; /* which of the spare disks we're using */ + RF_RowCol_t spareCol; + RF_StripeNum_t lastPSID;/* the ID of the last parity stripe we want + * reconstructed */ + int percentComplete;/* percentage completion of reconstruction */ + + /* reconstruction event queue */ + RF_ReconEvent_t *eventQueue; /* queue of pending reconstruction + * events */ + RF_DECLARE_MUTEX(eq_mutex) /* mutex for locking event + * queue */ + RF_DECLARE_COND(eq_cond) /* condition variable for + * signalling recon events */ + int eq_count; /* debug only */ + + /* reconstruction buffer management */ + RF_DECLARE_MUTEX(rb_mutex) /* mutex for messing around + * with recon buffers */ + RF_ReconBuffer_t *floatingRbufs; /* available floating + * reconstruction buffers */ + RF_ReconBuffer_t *committedRbufs; /* recon buffers that have + * been committed to some + * waiting disk */ + RF_ReconBuffer_t *fullBufferList; /* full buffers waiting to be + * written out */ + RF_ReconBuffer_t *priorityList; /* full buffers that have been + * elevated to higher priority */ + RF_CallbackDesc_t *bufferWaitList; /* disks that are currently + * blocked waiting for buffers */ + + /* parity stripe status table */ + RF_PSStatusHeader_t *pssTable; /* stores the reconstruction status of + * active parity stripes */ + + /* maximum-head separation control */ + RF_HeadSepLimit_t minHeadSepCounter; /* the minimum hs counter over + * all disks */ + RF_CallbackDesc_t *headSepCBList; /* list of callbacks to be + * done as minPSID advances */ + + /* performance monitoring */ + struct timeval starttime; /* recon start time */ + + void (*continueFunc) (void *); /* function to call when io + * returns */ + void *continueArg; /* argument for Func */ }; - /* the default priority for reconstruction accesses */ #define RF_IO_RECON_PRIORITY RF_IO_LOW_PRIORITY -int rf_ConfigureReconstruction(RF_ShutdownList_t **listp); +int rf_ConfigureReconstruction(RF_ShutdownList_t ** listp); -int rf_ReconstructFailedDisk(RF_Raid_t *raidPtr, RF_RowCol_t row, - RF_RowCol_t col); +int +rf_ReconstructFailedDisk(RF_Raid_t * raidPtr, RF_RowCol_t row, + RF_RowCol_t col); -int rf_ReconstructFailedDiskBasic(RF_Raid_t *raidPtr, RF_RowCol_t row, - RF_RowCol_t col); +int +rf_ReconstructFailedDiskBasic(RF_Raid_t * raidPtr, RF_RowCol_t row, + RF_RowCol_t col); -int rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t *reconDesc); +int rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t * reconDesc); -int rf_ForceOrBlockRecon(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, - void (*cbFunc)(RF_Raid_t *,void *), void *cbArg); +int +rf_ForceOrBlockRecon(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, + void (*cbFunc) (RF_Raid_t *, void *), void *cbArg); -int rf_UnblockRecon(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap); + int rf_UnblockRecon(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap); -int rf_RegisterReconDoneProc(RF_Raid_t *raidPtr, void (*proc)(RF_Raid_t *, void *), void *arg, - RF_ReconDoneProc_t **handlep); + int rf_RegisterReconDoneProc(RF_Raid_t * raidPtr, void (*proc) (RF_Raid_t *, void *), void *arg, + RF_ReconDoneProc_t ** handlep); -#endif /* !_RF__RF_RECONSTRUCT_H_ */ +#endif /* !_RF__RF_RECONSTRUCT_H_ */ diff --git a/sys/dev/raidframe/rf_reconstub.c b/sys/dev/raidframe/rf_reconstub.c index 2502462ea8b..d6a84a98d20 100644 --- a/sys/dev/raidframe/rf_reconstub.c +++ b/sys/dev/raidframe/rf_reconstub.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_reconstub.c,v 1.1 1999/01/11 14:29:47 niklas Exp $ */ -/* $NetBSD: rf_reconstub.c,v 1.1 1998/11/13 04:20:34 oster Exp $ */ +/* $OpenBSD: rf_reconstub.c,v 1.2 1999/02/16 00:03:23 niklas Exp $ */ +/* $NetBSD: rf_reconstub.c,v 1.3 1999/02/05 00:06:17 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -40,49 +40,64 @@ * **************************************************************************/ -/* : - * Log: rf_reconstub.c,v - * Revision 1.9 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.8 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.7 1996/04/03 23:25:33 jimz - * make inclusion of raidframe_recon.h #ifdef KERNEL - * - * Revision 1.6 1995/12/06 15:06:54 root - * added copyright info - * - */ - -#ifdef KERNEL -#include <raidframe_recon.h> -#endif /* KERNEL */ #include <sys/errno.h> #if RAIDFRAME_RECON == 0 -int rf_ConfigureReconstruction() { return(0); } -int rf_ConfigureReconEvent() { return(0); } -int rf_ConfigurePSStatus() { return(0); } -int rf_ConfigureNWayXor() { return(0); } -int rf_ConfigureCopyback() { return(0); } -int rf_ShutdownCopyback() { return(0); } -int rf_ShutdownReconstruction() { return(0); } -int rf_ShutdownReconEvent() { return(0); } -int rf_ShutdownPSStatus() { return(0); } -int rf_ShutdownNWayXor() { return(0); } - -int rf_ForceOrBlockRecon() { return(0); } -int rf_UnblockRecon() { return(0); } -int rf_ReconstructFailedDisk() { return(ENOTTY); } -int rf_CheckRUReconstructed() { return(0); } +int rf_ConfigureReconstruction() { + return (0); +} +int rf_ConfigureReconEvent() { + return (0); +} +int rf_ConfigurePSStatus() { + return (0); +} +int rf_ConfigureNWayXor() { + return (0); +} +int rf_ConfigureCopyback() { + return (0); +} +int rf_ShutdownCopyback() { + return (0); +} +int rf_ShutdownReconstruction() { + return (0); +} +int rf_ShutdownReconEvent() { + return (0); +} +int rf_ShutdownPSStatus() { + return (0); +} +int rf_ShutdownNWayXor() { + return (0); +} -void rf_start_cpu_monitor() {} -void rf_stop_cpu_monitor() {} -void rf_print_cpu_util() {} +int rf_ForceOrBlockRecon() { + return (0); +} +int rf_UnblockRecon() { + return (0); +} +int rf_ReconstructFailedDisk() { + return (ENOTTY); +} +int rf_CheckRUReconstructed() { + return (0); +} -#endif /* RAIDFRAME_RECON == 0 */ +void +rf_start_cpu_monitor() +{ +} +void +rf_stop_cpu_monitor() +{ +} +void +rf_print_cpu_util() +{ +} +#endif /* RAIDFRAME_RECON == 0 */ diff --git a/sys/dev/raidframe/rf_reconutil.c b/sys/dev/raidframe/rf_reconutil.c index 51267198a7e..f4c83910850 100644 --- a/sys/dev/raidframe/rf_reconutil.c +++ b/sys/dev/raidframe/rf_reconutil.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_reconutil.c,v 1.1 1999/01/11 14:29:47 niklas Exp $ */ -/* $NetBSD: rf_reconutil.c,v 1.1 1998/11/13 04:20:34 oster Exp $ */ +/* $OpenBSD: rf_reconutil.c,v 1.2 1999/02/16 00:03:23 niklas Exp $ */ +/* $NetBSD: rf_reconutil.c,v 1.3 1999/02/05 00:06:17 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -31,89 +31,6 @@ * rf_reconutil.c -- reconstruction utilities ********************************************/ -/* : - * Log: rf_reconutil.c,v - * Revision 1.32 1996/07/29 14:05:12 jimz - * fix numPUs/numRUs confusion (everything is now numRUs) - * clean up some commenting, return values - * - * Revision 1.31 1996/07/15 05:40:41 jimz - * some recon datastructure cleanup - * better handling of multiple failures - * added undocumented double-recon test - * - * Revision 1.30 1996/07/13 00:00:59 jimz - * sanitized generalized reconstruction architecture - * cleaned up head sep, rbuf problems - * - * Revision 1.29 1996/06/19 17:53:48 jimz - * move GetNumSparePUs, InstallSpareTable ops into layout switch - * - * Revision 1.28 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.27 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.26 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.25 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.24 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.23 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.22 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.21 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.20 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.19 1996/05/20 16:14:55 jimz - * switch to rf_{mutex,cond}_{init,destroy} - * - * Revision 1.18 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.17 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.16 1995/12/06 15:05:31 root - * added copyright info - * - */ - #include "rf_types.h" #include "rf_raid.h" #include "rf_desc.h" @@ -128,281 +45,292 @@ /******************************************************************* * allocates/frees the reconstruction control information structures *******************************************************************/ -RF_ReconCtrl_t *rf_MakeReconControl(reconDesc, frow, fcol, srow, scol) - RF_RaidReconDesc_t *reconDesc; - RF_RowCol_t frow; /* failed row and column */ - RF_RowCol_t fcol; - RF_RowCol_t srow; /* identifies which spare we're using */ - RF_RowCol_t scol; +RF_ReconCtrl_t * +rf_MakeReconControl(reconDesc, frow, fcol, srow, scol) + RF_RaidReconDesc_t *reconDesc; + RF_RowCol_t frow; /* failed row and column */ + RF_RowCol_t fcol; + RF_RowCol_t srow; /* identifies which spare we're using */ + RF_RowCol_t scol; { - RF_Raid_t *raidPtr = reconDesc->raidPtr; - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_ReconUnitCount_t RUsPerPU = layoutPtr->SUsPerPU / layoutPtr->SUsPerRU; - RF_ReconUnitCount_t numSpareRUs; - RF_ReconCtrl_t *reconCtrlPtr; - RF_ReconBuffer_t *rbuf; - RF_LayoutSW_t *lp; - int retcode, rc; - RF_RowCol_t i; - - lp = raidPtr->Layout.map; - - /* make and zero the global reconstruction structure and the per-disk structure */ - RF_Calloc(reconCtrlPtr, 1, sizeof(RF_ReconCtrl_t), (RF_ReconCtrl_t *)); - RF_Calloc(reconCtrlPtr->perDiskInfo, raidPtr->numCol, sizeof(RF_PerDiskReconCtrl_t), (RF_PerDiskReconCtrl_t *)); /* this zeros it */ - reconCtrlPtr->reconDesc = reconDesc; - reconCtrlPtr->fcol = fcol; - reconCtrlPtr->spareRow = srow; - reconCtrlPtr->spareCol = scol; - reconCtrlPtr->lastPSID = layoutPtr->numStripe/layoutPtr->SUsPerPU; - reconCtrlPtr->percentComplete = 0; - - /* initialize each per-disk recon information structure */ - for (i=0; i<raidPtr->numCol; i++) { - reconCtrlPtr->perDiskInfo[i].reconCtrl = reconCtrlPtr; - reconCtrlPtr->perDiskInfo[i].row = frow; - reconCtrlPtr->perDiskInfo[i].col = i; - reconCtrlPtr->perDiskInfo[i].curPSID = -1; /* make it appear as if we just finished an RU */ - reconCtrlPtr->perDiskInfo[i].ru_count = RUsPerPU-1; - } - - /* Get the number of spare units per disk and the sparemap in case spare is distributed */ - - if (lp->GetNumSpareRUs) { - numSpareRUs = lp->GetNumSpareRUs(raidPtr); - } - else { - numSpareRUs = 0; - } - - /* - * Not all distributed sparing archs need dynamic mappings - */ - if (lp->InstallSpareTable) { - retcode = rf_InstallSpareTable(raidPtr, frow, fcol); - if (retcode) { - RF_PANIC(); /* XXX fix this*/ - } - } - - /* make the reconstruction map */ - reconCtrlPtr->reconMap = rf_MakeReconMap(raidPtr, (int) (layoutPtr->SUsPerRU * layoutPtr->sectorsPerStripeUnit), - raidPtr->sectorsPerDisk, numSpareRUs); - - /* make the per-disk reconstruction buffers */ - for (i=0; i<raidPtr->numCol; i++) { - reconCtrlPtr->perDiskInfo[i].rbuf = (i==fcol) ? NULL : rf_MakeReconBuffer(raidPtr, frow, i, RF_RBUF_TYPE_EXCLUSIVE); - } - - /* initialize the event queue */ - rc = rf_mutex_init(&reconCtrlPtr->eq_mutex); - if (rc) { - /* XXX deallocate, cleanup */ - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return(NULL); - } - rc = rf_cond_init(&reconCtrlPtr->eq_cond); - if (rc) { - /* XXX deallocate, cleanup */ - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return(NULL); - } - reconCtrlPtr->eventQueue = NULL; - reconCtrlPtr->eq_count = 0; - - /* make the floating recon buffers and append them to the free list */ - rc = rf_mutex_init(&reconCtrlPtr->rb_mutex); - if (rc) { - /* XXX deallocate, cleanup */ - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - return(NULL); - } - reconCtrlPtr->fullBufferList= NULL; - reconCtrlPtr->priorityList = NULL; - reconCtrlPtr->floatingRbufs = NULL; - reconCtrlPtr->committedRbufs= NULL; - for (i=0; i<raidPtr->numFloatingReconBufs; i++) { - rbuf = rf_MakeReconBuffer(raidPtr, frow, fcol, RF_RBUF_TYPE_FLOATING); - rbuf->next = reconCtrlPtr->floatingRbufs; - reconCtrlPtr->floatingRbufs = rbuf; - } - - /* create the parity stripe status table */ - reconCtrlPtr->pssTable = rf_MakeParityStripeStatusTable(raidPtr); - - /* set the initial min head sep counter val */ - reconCtrlPtr->minHeadSepCounter = 0; - - return(reconCtrlPtr); + RF_Raid_t *raidPtr = reconDesc->raidPtr; + RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; + RF_ReconUnitCount_t RUsPerPU = layoutPtr->SUsPerPU / layoutPtr->SUsPerRU; + RF_ReconUnitCount_t numSpareRUs; + RF_ReconCtrl_t *reconCtrlPtr; + RF_ReconBuffer_t *rbuf; + RF_LayoutSW_t *lp; + int retcode, rc; + RF_RowCol_t i; + + lp = raidPtr->Layout.map; + + /* make and zero the global reconstruction structure and the per-disk + * structure */ + RF_Calloc(reconCtrlPtr, 1, sizeof(RF_ReconCtrl_t), (RF_ReconCtrl_t *)); + RF_Calloc(reconCtrlPtr->perDiskInfo, raidPtr->numCol, sizeof(RF_PerDiskReconCtrl_t), (RF_PerDiskReconCtrl_t *)); /* this zeros it */ + reconCtrlPtr->reconDesc = reconDesc; + reconCtrlPtr->fcol = fcol; + reconCtrlPtr->spareRow = srow; + reconCtrlPtr->spareCol = scol; + reconCtrlPtr->lastPSID = layoutPtr->numStripe / layoutPtr->SUsPerPU; + reconCtrlPtr->percentComplete = 0; + + /* initialize each per-disk recon information structure */ + for (i = 0; i < raidPtr->numCol; i++) { + reconCtrlPtr->perDiskInfo[i].reconCtrl = reconCtrlPtr; + reconCtrlPtr->perDiskInfo[i].row = frow; + reconCtrlPtr->perDiskInfo[i].col = i; + reconCtrlPtr->perDiskInfo[i].curPSID = -1; /* make it appear as if + * we just finished an + * RU */ + reconCtrlPtr->perDiskInfo[i].ru_count = RUsPerPU - 1; + } + + /* Get the number of spare units per disk and the sparemap in case + * spare is distributed */ + + if (lp->GetNumSpareRUs) { + numSpareRUs = lp->GetNumSpareRUs(raidPtr); + } else { + numSpareRUs = 0; + } + + /* + * Not all distributed sparing archs need dynamic mappings + */ + if (lp->InstallSpareTable) { + retcode = rf_InstallSpareTable(raidPtr, frow, fcol); + if (retcode) { + RF_PANIC(); /* XXX fix this */ + } + } + /* make the reconstruction map */ + reconCtrlPtr->reconMap = rf_MakeReconMap(raidPtr, (int) (layoutPtr->SUsPerRU * layoutPtr->sectorsPerStripeUnit), + raidPtr->sectorsPerDisk, numSpareRUs); + + /* make the per-disk reconstruction buffers */ + for (i = 0; i < raidPtr->numCol; i++) { + reconCtrlPtr->perDiskInfo[i].rbuf = (i == fcol) ? NULL : rf_MakeReconBuffer(raidPtr, frow, i, RF_RBUF_TYPE_EXCLUSIVE); + } + + /* initialize the event queue */ + rc = rf_mutex_init(&reconCtrlPtr->eq_mutex); + if (rc) { + /* XXX deallocate, cleanup */ + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + return (NULL); + } + rc = rf_cond_init(&reconCtrlPtr->eq_cond); + if (rc) { + /* XXX deallocate, cleanup */ + RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + return (NULL); + } + reconCtrlPtr->eventQueue = NULL; + reconCtrlPtr->eq_count = 0; + + /* make the floating recon buffers and append them to the free list */ + rc = rf_mutex_init(&reconCtrlPtr->rb_mutex); + if (rc) { + /* XXX deallocate, cleanup */ + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + return (NULL); + } + reconCtrlPtr->fullBufferList = NULL; + reconCtrlPtr->priorityList = NULL; + reconCtrlPtr->floatingRbufs = NULL; + reconCtrlPtr->committedRbufs = NULL; + for (i = 0; i < raidPtr->numFloatingReconBufs; i++) { + rbuf = rf_MakeReconBuffer(raidPtr, frow, fcol, RF_RBUF_TYPE_FLOATING); + rbuf->next = reconCtrlPtr->floatingRbufs; + reconCtrlPtr->floatingRbufs = rbuf; + } + + /* create the parity stripe status table */ + reconCtrlPtr->pssTable = rf_MakeParityStripeStatusTable(raidPtr); + + /* set the initial min head sep counter val */ + reconCtrlPtr->minHeadSepCounter = 0; + + return (reconCtrlPtr); } -void rf_FreeReconControl(raidPtr, row) - RF_Raid_t *raidPtr; - RF_RowCol_t row; +void +rf_FreeReconControl(raidPtr, row) + RF_Raid_t *raidPtr; + RF_RowCol_t row; { - RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[row]; - RF_ReconBuffer_t *t; - RF_ReconUnitNum_t i; - - RF_ASSERT(reconCtrlPtr); - for (i=0; i<raidPtr->numCol; i++) if (reconCtrlPtr->perDiskInfo[i].rbuf) rf_FreeReconBuffer(reconCtrlPtr->perDiskInfo[i].rbuf); - for (i=0; i<raidPtr->numFloatingReconBufs; i++) { - t = reconCtrlPtr->floatingRbufs; - RF_ASSERT(t); - reconCtrlPtr->floatingRbufs = t->next; - rf_FreeReconBuffer(t); - } - rf_mutex_destroy(&reconCtrlPtr->rb_mutex); - rf_mutex_destroy(&reconCtrlPtr->eq_mutex); - rf_cond_destroy(&reconCtrlPtr->eq_cond); - rf_FreeReconMap(reconCtrlPtr->reconMap); - rf_FreeParityStripeStatusTable(raidPtr, reconCtrlPtr->pssTable); - RF_Free(reconCtrlPtr->perDiskInfo, raidPtr->numCol * sizeof(RF_PerDiskReconCtrl_t)); - RF_Free(reconCtrlPtr, sizeof(*reconCtrlPtr)); + RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[row]; + RF_ReconBuffer_t *t; + RF_ReconUnitNum_t i; + + RF_ASSERT(reconCtrlPtr); + for (i = 0; i < raidPtr->numCol; i++) + if (reconCtrlPtr->perDiskInfo[i].rbuf) + rf_FreeReconBuffer(reconCtrlPtr->perDiskInfo[i].rbuf); + for (i = 0; i < raidPtr->numFloatingReconBufs; i++) { + t = reconCtrlPtr->floatingRbufs; + RF_ASSERT(t); + reconCtrlPtr->floatingRbufs = t->next; + rf_FreeReconBuffer(t); + } + rf_mutex_destroy(&reconCtrlPtr->rb_mutex); + rf_mutex_destroy(&reconCtrlPtr->eq_mutex); + rf_cond_destroy(&reconCtrlPtr->eq_cond); + rf_FreeReconMap(reconCtrlPtr->reconMap); + rf_FreeParityStripeStatusTable(raidPtr, reconCtrlPtr->pssTable); + RF_Free(reconCtrlPtr->perDiskInfo, raidPtr->numCol * sizeof(RF_PerDiskReconCtrl_t)); + RF_Free(reconCtrlPtr, sizeof(*reconCtrlPtr)); } /****************************************************************************** * computes the default head separation limit *****************************************************************************/ -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimit(raidPtr) - RF_Raid_t *raidPtr; +RF_HeadSepLimit_t +rf_GetDefaultHeadSepLimit(raidPtr) + RF_Raid_t *raidPtr; { - RF_HeadSepLimit_t hsl; - RF_LayoutSW_t *lp; - - lp = raidPtr->Layout.map; - if (lp->GetDefaultHeadSepLimit == NULL) - return(-1); - hsl = lp->GetDefaultHeadSepLimit(raidPtr); - return(hsl); + RF_HeadSepLimit_t hsl; + RF_LayoutSW_t *lp; + + lp = raidPtr->Layout.map; + if (lp->GetDefaultHeadSepLimit == NULL) + return (-1); + hsl = lp->GetDefaultHeadSepLimit(raidPtr); + return (hsl); } /****************************************************************************** * computes the default number of floating recon buffers *****************************************************************************/ -int rf_GetDefaultNumFloatingReconBuffers(raidPtr) - RF_Raid_t *raidPtr; +int +rf_GetDefaultNumFloatingReconBuffers(raidPtr) + RF_Raid_t *raidPtr; { - RF_LayoutSW_t *lp; - int nrb; - - lp = raidPtr->Layout.map; - if (lp->GetDefaultNumFloatingReconBuffers == NULL) - return(3 * raidPtr->numCol); - nrb = lp->GetDefaultNumFloatingReconBuffers(raidPtr); - return(nrb); + RF_LayoutSW_t *lp; + int nrb; + + lp = raidPtr->Layout.map; + if (lp->GetDefaultNumFloatingReconBuffers == NULL) + return (3 * raidPtr->numCol); + nrb = lp->GetDefaultNumFloatingReconBuffers(raidPtr); + return (nrb); } /****************************************************************************** * creates and initializes a reconstruction buffer *****************************************************************************/ -RF_ReconBuffer_t *rf_MakeReconBuffer( - RF_Raid_t *raidPtr, - RF_RowCol_t row, - RF_RowCol_t col, - RF_RbufType_t type) +RF_ReconBuffer_t * +rf_MakeReconBuffer( + RF_Raid_t * raidPtr, + RF_RowCol_t row, + RF_RowCol_t col, + RF_RbufType_t type) { - RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_ReconBuffer_t *t; - u_int recon_buffer_size = rf_RaidAddressToByte(raidPtr, layoutPtr->SUsPerRU * layoutPtr->sectorsPerStripeUnit); - - RF_Malloc(t, sizeof(RF_ReconBuffer_t), (RF_ReconBuffer_t *)); - RF_Malloc(t->buffer, recon_buffer_size, (caddr_t)); - RF_Malloc(t->arrived, raidPtr->numCol * sizeof(char), (char *)); - t->raidPtr = raidPtr; - t->row = row; t->col = col; - t->priority = RF_IO_RECON_PRIORITY; - t->type = type; - t->pssPtr = NULL; - t->next = NULL; - return(t); + RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; + RF_ReconBuffer_t *t; + u_int recon_buffer_size = rf_RaidAddressToByte(raidPtr, layoutPtr->SUsPerRU * layoutPtr->sectorsPerStripeUnit); + + RF_Malloc(t, sizeof(RF_ReconBuffer_t), (RF_ReconBuffer_t *)); + RF_Malloc(t->buffer, recon_buffer_size, (caddr_t)); + RF_Malloc(t->arrived, raidPtr->numCol * sizeof(char), (char *)); + t->raidPtr = raidPtr; + t->row = row; + t->col = col; + t->priority = RF_IO_RECON_PRIORITY; + t->type = type; + t->pssPtr = NULL; + t->next = NULL; + return (t); } - /****************************************************************************** * frees a reconstruction buffer *****************************************************************************/ -void rf_FreeReconBuffer(rbuf) - RF_ReconBuffer_t *rbuf; +void +rf_FreeReconBuffer(rbuf) + RF_ReconBuffer_t *rbuf; { - RF_Raid_t *raidPtr = rbuf->raidPtr; - u_int recon_buffer_size = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.SUsPerRU * raidPtr->Layout.sectorsPerStripeUnit); - - RF_Free(rbuf->arrived, raidPtr->numCol * sizeof(char)); - RF_Free(rbuf->buffer, recon_buffer_size); - RF_Free(rbuf, sizeof(*rbuf)); + RF_Raid_t *raidPtr = rbuf->raidPtr; + u_int recon_buffer_size = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.SUsPerRU * raidPtr->Layout.sectorsPerStripeUnit); + + RF_Free(rbuf->arrived, raidPtr->numCol * sizeof(char)); + RF_Free(rbuf->buffer, recon_buffer_size); + RF_Free(rbuf, sizeof(*rbuf)); } /****************************************************************************** * debug only: sanity check the number of floating recon bufs in use *****************************************************************************/ -void rf_CheckFloatingRbufCount(raidPtr, dolock) - RF_Raid_t *raidPtr; - int dolock; +void +rf_CheckFloatingRbufCount(raidPtr, dolock) + RF_Raid_t *raidPtr; + int dolock; { - RF_ReconParityStripeStatus_t *p; - RF_PSStatusHeader_t *pssTable; - RF_ReconBuffer_t *rbuf; - int i, j, sum = 0; - RF_RowCol_t frow=0; - - for (i=0; i<raidPtr->numRow; i++) - if (raidPtr->reconControl[i]) { - frow = i; - break; - } - RF_ASSERT(frow >= 0); - - if (dolock) - RF_LOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex); - pssTable = raidPtr->reconControl[frow]->pssTable; - - for (i=0; i<raidPtr->pssTableSize; i++) { - RF_LOCK_MUTEX(pssTable[i].mutex); - for (p = pssTable[i].chain; p; p=p->next) { - rbuf = (RF_ReconBuffer_t *) p->rbuf; - if (rbuf && rbuf->type == RF_RBUF_TYPE_FLOATING) - sum++; - - rbuf = (RF_ReconBuffer_t *) p->writeRbuf; - if (rbuf && rbuf->type == RF_RBUF_TYPE_FLOATING) - sum++; - - for (j=0; j<p->xorBufCount; j++) { - rbuf = (RF_ReconBuffer_t *) p->rbufsForXor[j]; - RF_ASSERT(rbuf); - if (rbuf->type == RF_RBUF_TYPE_FLOATING) - sum++; - } - } - RF_UNLOCK_MUTEX(pssTable[i].mutex); - } - - for (rbuf = raidPtr->reconControl[frow]->floatingRbufs; rbuf; rbuf = rbuf->next) { - if (rbuf->type == RF_RBUF_TYPE_FLOATING) - sum++; - } - for (rbuf = raidPtr->reconControl[frow]->committedRbufs; rbuf; rbuf = rbuf->next) { - if (rbuf->type == RF_RBUF_TYPE_FLOATING) - sum++; - } - for (rbuf = raidPtr->reconControl[frow]->fullBufferList; rbuf; rbuf = rbuf->next) { - if (rbuf->type == RF_RBUF_TYPE_FLOATING) - sum++; - } - for (rbuf = raidPtr->reconControl[frow]->priorityList; rbuf; rbuf = rbuf->next) { - if (rbuf->type == RF_RBUF_TYPE_FLOATING) - sum++; - } - - RF_ASSERT(sum == raidPtr->numFloatingReconBufs); - - if (dolock) - RF_UNLOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex); + RF_ReconParityStripeStatus_t *p; + RF_PSStatusHeader_t *pssTable; + RF_ReconBuffer_t *rbuf; + int i, j, sum = 0; + RF_RowCol_t frow = 0; + + for (i = 0; i < raidPtr->numRow; i++) + if (raidPtr->reconControl[i]) { + frow = i; + break; + } + RF_ASSERT(frow >= 0); + + if (dolock) + RF_LOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex); + pssTable = raidPtr->reconControl[frow]->pssTable; + + for (i = 0; i < raidPtr->pssTableSize; i++) { + RF_LOCK_MUTEX(pssTable[i].mutex); + for (p = pssTable[i].chain; p; p = p->next) { + rbuf = (RF_ReconBuffer_t *) p->rbuf; + if (rbuf && rbuf->type == RF_RBUF_TYPE_FLOATING) + sum++; + + rbuf = (RF_ReconBuffer_t *) p->writeRbuf; + if (rbuf && rbuf->type == RF_RBUF_TYPE_FLOATING) + sum++; + + for (j = 0; j < p->xorBufCount; j++) { + rbuf = (RF_ReconBuffer_t *) p->rbufsForXor[j]; + RF_ASSERT(rbuf); + if (rbuf->type == RF_RBUF_TYPE_FLOATING) + sum++; + } + } + RF_UNLOCK_MUTEX(pssTable[i].mutex); + } + + for (rbuf = raidPtr->reconControl[frow]->floatingRbufs; rbuf; rbuf = rbuf->next) { + if (rbuf->type == RF_RBUF_TYPE_FLOATING) + sum++; + } + for (rbuf = raidPtr->reconControl[frow]->committedRbufs; rbuf; rbuf = rbuf->next) { + if (rbuf->type == RF_RBUF_TYPE_FLOATING) + sum++; + } + for (rbuf = raidPtr->reconControl[frow]->fullBufferList; rbuf; rbuf = rbuf->next) { + if (rbuf->type == RF_RBUF_TYPE_FLOATING) + sum++; + } + for (rbuf = raidPtr->reconControl[frow]->priorityList; rbuf; rbuf = rbuf->next) { + if (rbuf->type == RF_RBUF_TYPE_FLOATING) + sum++; + } + + RF_ASSERT(sum == raidPtr->numFloatingReconBufs); + + if (dolock) + RF_UNLOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex); } diff --git a/sys/dev/raidframe/rf_reconutil.h b/sys/dev/raidframe/rf_reconutil.h index f4ea1c6f5f7..4c8d1b9924f 100644 --- a/sys/dev/raidframe/rf_reconutil.h +++ b/sys/dev/raidframe/rf_reconutil.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_reconutil.h,v 1.1 1999/01/11 14:29:48 niklas Exp $ */ -/* $NetBSD: rf_reconutil.h,v 1.1 1998/11/13 04:20:34 oster Exp $ */ +/* $OpenBSD: rf_reconutil.h,v 1.2 1999/02/16 00:03:24 niklas Exp $ */ +/* $NetBSD: rf_reconutil.h,v 1.3 1999/02/05 00:06:17 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -31,66 +31,22 @@ * rf_reconutil.h -- header file for reconstruction utilities ************************************************************/ -/* : - * Log: rf_reconutil.h,v - * Revision 1.10 1996/07/15 05:40:41 jimz - * some recon datastructure cleanup - * better handling of multiple failures - * added undocumented double-recon test - * - * Revision 1.9 1996/07/13 00:00:59 jimz - * sanitized generalized reconstruction architecture - * cleaned up head sep, rbuf problems - * - * Revision 1.8 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.7 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.6 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.5 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.4 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.3 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.2 1995/12/06 15:06:47 root - * added copyright info - * - */ - #ifndef _RF__RF_RECONUTIL_H_ #define _RF__RF_RECONUTIL_H_ #include "rf_types.h" #include "rf_reconstruct.h" -RF_ReconCtrl_t *rf_MakeReconControl(RF_RaidReconDesc_t *reconDesc, - RF_RowCol_t frow, RF_RowCol_t fcol, RF_RowCol_t srow, RF_RowCol_t scol); -void rf_FreeReconControl(RF_Raid_t *raidPtr, RF_RowCol_t row); -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimit(RF_Raid_t *raidPtr); -int rf_GetDefaultNumFloatingReconBuffers(RF_Raid_t *raidPtr); -RF_ReconBuffer_t *rf_MakeReconBuffer(RF_Raid_t *raidPtr, RF_RowCol_t row, - RF_RowCol_t col, RF_RbufType_t type); -void rf_FreeReconBuffer(RF_ReconBuffer_t *rbuf); -void rf_CheckFloatingRbufCount(RF_Raid_t *raidPtr, int dolock); +RF_ReconCtrl_t * +rf_MakeReconControl(RF_RaidReconDesc_t * reconDesc, + RF_RowCol_t frow, RF_RowCol_t fcol, RF_RowCol_t srow, RF_RowCol_t scol); +void rf_FreeReconControl(RF_Raid_t * raidPtr, RF_RowCol_t row); +RF_HeadSepLimit_t rf_GetDefaultHeadSepLimit(RF_Raid_t * raidPtr); +int rf_GetDefaultNumFloatingReconBuffers(RF_Raid_t * raidPtr); +RF_ReconBuffer_t * +rf_MakeReconBuffer(RF_Raid_t * raidPtr, RF_RowCol_t row, + RF_RowCol_t col, RF_RbufType_t type); +void rf_FreeReconBuffer(RF_ReconBuffer_t * rbuf); +void rf_CheckFloatingRbufCount(RF_Raid_t * raidPtr, int dolock); -#endif /* !_RF__RF_RECONUTIL_H_ */ +#endif /* !_RF__RF_RECONUTIL_H_ */ diff --git a/sys/dev/raidframe/rf_revent.c b/sys/dev/raidframe/rf_revent.c index c4236962b64..e58736c7cd9 100644 --- a/sys/dev/raidframe/rf_revent.c +++ b/sys/dev/raidframe/rf_revent.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_revent.c,v 1.1 1999/01/11 14:29:48 niklas Exp $ */ -/* $NetBSD: rf_revent.c,v 1.1 1998/11/13 04:20:34 oster Exp $ */ +/* $OpenBSD: rf_revent.c,v 1.2 1999/02/16 00:03:24 niklas Exp $ */ +/* $NetBSD: rf_revent.c,v 1.3 1999/02/05 00:06:17 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,61 +29,6 @@ /* * revent.c -- reconstruction event handling code */ -/* - * : - * Log: rf_revent.c,v - * Revision 1.22 1996/08/11 00:41:11 jimz - * extern hz only for kernel - * - * Revision 1.21 1996/07/15 05:40:41 jimz - * some recon datastructure cleanup - * better handling of multiple failures - * added undocumented double-recon test - * - * Revision 1.20 1996/06/17 03:18:04 jimz - * include shutdown.h for macroized ShutdownCreate - * - * Revision 1.19 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.18 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.17 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.16 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.15 1996/05/30 12:59:18 jimz - * make etimer happier, more portable - * - * Revision 1.14 1996/05/20 16:13:40 jimz - * switch to rf_{mutex,cond}_{init,destroy} - * use RF_FREELIST for revents - * - * Revision 1.13 1996/05/18 20:09:47 jimz - * bit of cleanup to compile cleanly in kernel, once again - * - * Revision 1.12 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - */ - -#ifdef _KERNEL -#define KERNEL -#endif #include <sys/errno.h> @@ -101,206 +46,181 @@ static RF_FreeList_t *rf_revent_freelist; #define RF_REVENT_INITIAL 8 -#ifdef KERNEL #include <sys/proc.h> extern int hz; -#if !defined(__NetBSD__) && !defined(__OpenBSD__) -#define DO_WAIT(_rc) mpsleep(&(_rc)->eventQueue, PZERO, "raidframe eventq", 0, \ - (void *) simple_lock_addr((_rc)->eq_mutex), MS_LOCK_SIMPLE) -#else #define DO_WAIT(_rc) tsleep(&(_rc)->eventQueue, PRIBIO | PCATCH, "raidframe eventq", 0) -#endif #define DO_SIGNAL(_rc) wakeup(&(_rc)->eventQueue) -#else /* KERNEL */ - -#define DO_WAIT(_rc) RF_WAIT_COND((_rc)->eq_cond, (_rc)->eq_mutex) -#define DO_SIGNAL(_rc) RF_SIGNAL_COND((_rc)->eq_cond) - -#endif /* KERNEL */ static void rf_ShutdownReconEvent(void *); -static RF_ReconEvent_t *GetReconEventDesc(RF_RowCol_t row, RF_RowCol_t col, - void *arg, RF_Revent_t type); -RF_ReconEvent_t *rf_GetNextReconEvent(RF_RaidReconDesc_t *, - RF_RowCol_t, void (*continueFunc)(void *), - void *); +static RF_ReconEvent_t * +GetReconEventDesc(RF_RowCol_t row, RF_RowCol_t col, + void *arg, RF_Revent_t type); +RF_ReconEvent_t * +rf_GetNextReconEvent(RF_RaidReconDesc_t *, + RF_RowCol_t, void (*continueFunc) (void *), + void *); -static void rf_ShutdownReconEvent(ignored) - void *ignored; + static void rf_ShutdownReconEvent(ignored) + void *ignored; { - RF_FREELIST_DESTROY(rf_revent_freelist,next,(RF_ReconEvent_t *)); + RF_FREELIST_DESTROY(rf_revent_freelist, next, (RF_ReconEvent_t *)); } -int rf_ConfigureReconEvent(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureReconEvent(listp) + RF_ShutdownList_t **listp; { - int rc; - - RF_FREELIST_CREATE(rf_revent_freelist, RF_MAX_FREE_REVENT, - RF_REVENT_INC, sizeof(RF_ReconEvent_t)); - if (rf_revent_freelist == NULL) - return(ENOMEM); - rc = rf_ShutdownCreate(listp, rf_ShutdownReconEvent, NULL); - if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - rf_ShutdownReconEvent(NULL); - return(rc); - } - RF_FREELIST_PRIME(rf_revent_freelist, RF_REVENT_INITIAL,next, - (RF_ReconEvent_t *)); - return(0); + int rc; + + RF_FREELIST_CREATE(rf_revent_freelist, RF_MAX_FREE_REVENT, + RF_REVENT_INC, sizeof(RF_ReconEvent_t)); + if (rf_revent_freelist == NULL) + return (ENOMEM); + rc = rf_ShutdownCreate(listp, rf_ShutdownReconEvent, NULL); + if (rc) { + RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); + rf_ShutdownReconEvent(NULL); + return (rc); + } + RF_FREELIST_PRIME(rf_revent_freelist, RF_REVENT_INITIAL, next, + (RF_ReconEvent_t *)); + return (0); } - /* returns the next reconstruction event, blocking the calling thread until * one becomes available */ /* will now return null if it is blocked or will return an event if it is not */ -RF_ReconEvent_t *rf_GetNextReconEvent(reconDesc, row, continueFunc, continueArg) - RF_RaidReconDesc_t *reconDesc; - RF_RowCol_t row; - void (*continueFunc)(void *); - void *continueArg; +RF_ReconEvent_t * +rf_GetNextReconEvent(reconDesc, row, continueFunc, continueArg) + RF_RaidReconDesc_t *reconDesc; + RF_RowCol_t row; + void (*continueFunc) (void *); + void *continueArg; { - RF_Raid_t *raidPtr = reconDesc->raidPtr; - RF_ReconCtrl_t *rctrl = raidPtr->reconControl[row]; - RF_ReconEvent_t *event; - - RF_ASSERT( row >= 0 && row <= raidPtr->numRow ); - RF_LOCK_MUTEX(rctrl->eq_mutex); - RF_ASSERT( (rctrl->eventQueue==NULL) == (rctrl->eq_count == 0)); /* q null and count==0 must be equivalent conditions */ + RF_Raid_t *raidPtr = reconDesc->raidPtr; + RF_ReconCtrl_t *rctrl = raidPtr->reconControl[row]; + RF_ReconEvent_t *event; + RF_ASSERT(row >= 0 && row <= raidPtr->numRow); + RF_LOCK_MUTEX(rctrl->eq_mutex); + RF_ASSERT((rctrl->eventQueue == NULL) == (rctrl->eq_count == 0)); /* q null and count==0 + * must be equivalent + * conditions */ - rctrl->continueFunc=continueFunc; - rctrl->continueArg=continueArg; -#ifdef SIMULATE - if (!rctrl->eventQueue) { - RF_UNLOCK_MUTEX(rctrl->eq_mutex); - return (NULL); - } -#else /* SIMULATE */ + rctrl->continueFunc = continueFunc; + rctrl->continueArg = continueArg; -#ifdef KERNEL /* mpsleep timeout value: secs = timo_val/hz. 'ticks' here is defined as cycle-counter ticks, not softclock ticks */ -#define MAX_RECON_EXEC_TICKS 15000000 /* 150 Mhz => this many ticks in 100 ms */ +#define MAX_RECON_EXEC_TICKS 15000000 /* 150 Mhz => this many ticks in 100 + * ms */ #define RECON_DELAY_MS 25 #define RECON_TIMO ((RECON_DELAY_MS * hz) / 1000) - /* we are not pre-emptible in the kernel, but we don't want to run forever. If we run w/o blocking - * for more than MAX_RECON_EXEC_TICKS ticks of the cycle counter, delay for RECON_DELAY before continuing. - * this may murder us with context switches, so we may need to increase both the MAX...TICKS and the RECON_DELAY_MS. - */ - if (reconDesc->reconExecTimerRunning) { - int status; - - RF_ETIMER_STOP(reconDesc->recon_exec_timer); - RF_ETIMER_EVAL(reconDesc->recon_exec_timer); - reconDesc->reconExecTicks += RF_ETIMER_VAL_TICKS(reconDesc->recon_exec_timer); - if (reconDesc->reconExecTicks > reconDesc->maxReconExecTicks) - reconDesc->maxReconExecTicks = reconDesc->reconExecTicks; - if (reconDesc->reconExecTicks >= MAX_RECON_EXEC_TICKS) { - /* we've been running too long. delay for RECON_DELAY_MS */ + /* we are not pre-emptible in the kernel, but we don't want to run + * forever. If we run w/o blocking for more than MAX_RECON_EXEC_TICKS + * ticks of the cycle counter, delay for RECON_DELAY before + * continuing. this may murder us with context switches, so we may + * need to increase both the MAX...TICKS and the RECON_DELAY_MS. */ + if (reconDesc->reconExecTimerRunning) { + int status; + + RF_ETIMER_STOP(reconDesc->recon_exec_timer); + RF_ETIMER_EVAL(reconDesc->recon_exec_timer); + reconDesc->reconExecTicks += RF_ETIMER_VAL_TICKS(reconDesc->recon_exec_timer); + if (reconDesc->reconExecTicks > reconDesc->maxReconExecTicks) + reconDesc->maxReconExecTicks = reconDesc->reconExecTicks; + if (reconDesc->reconExecTicks >= MAX_RECON_EXEC_TICKS) { + /* we've been running too long. delay for + * RECON_DELAY_MS */ #if RF_RECON_STATS > 0 - reconDesc->numReconExecDelays++; -#endif /* RF_RECON_STATS > 0 */ -#if !defined(__NetBSD__) && !defined(__OpenBSD__) - status = mpsleep(&reconDesc->reconExecTicks, PZERO, "recon delay", RECON_TIMO, (void *) simple_lock_addr(rctrl->eq_mutex), MS_LOCK_SIMPLE); -#else - status = tsleep(&reconDesc->reconExecTicks, PRIBIO | PCATCH, "recon delay", RECON_TIMO ); -#endif - RF_ASSERT(status == EWOULDBLOCK); - reconDesc->reconExecTicks = 0; - } - } - -#endif /* KERNEL */ - - while (!rctrl->eventQueue) { + reconDesc->numReconExecDelays++; +#endif /* RF_RECON_STATS > 0 */ + status = tsleep(&reconDesc->reconExecTicks, PRIBIO | PCATCH, "recon delay", RECON_TIMO); + RF_ASSERT(status == EWOULDBLOCK); + reconDesc->reconExecTicks = 0; + } + } + while (!rctrl->eventQueue) { #if RF_RECON_STATS > 0 - reconDesc->numReconEventWaits++; -#endif /* RF_RECON_STATS > 0 */ - DO_WAIT(rctrl); -#ifdef KERNEL - reconDesc->reconExecTicks = 0; /* we've just waited */ -#endif /* KERNEL */ - } - -#endif /* SIMULATE */ - -#ifdef KERNEL - reconDesc->reconExecTimerRunning = 1; - RF_ETIMER_START(reconDesc->recon_exec_timer); -#endif /* KERNEL */ - - event = rctrl->eventQueue; - rctrl->eventQueue = event->next; - event->next = NULL; - rctrl->eq_count--; - RF_ASSERT( (rctrl->eventQueue==NULL) == (rctrl->eq_count == 0)); /* q null and count==0 must be equivalent conditions */ - RF_UNLOCK_MUTEX(rctrl->eq_mutex); - return(event); + reconDesc->numReconEventWaits++; +#endif /* RF_RECON_STATS > 0 */ + DO_WAIT(rctrl); + reconDesc->reconExecTicks = 0; /* we've just waited */ + } + + reconDesc->reconExecTimerRunning = 1; + RF_ETIMER_START(reconDesc->recon_exec_timer); + + event = rctrl->eventQueue; + rctrl->eventQueue = event->next; + event->next = NULL; + rctrl->eq_count--; + RF_ASSERT((rctrl->eventQueue == NULL) == (rctrl->eq_count == 0)); /* q null and count==0 + * must be equivalent + * conditions */ + RF_UNLOCK_MUTEX(rctrl->eq_mutex); + return (event); } - /* enqueues a reconstruction event on the indicated queue */ -void rf_CauseReconEvent(raidPtr, row, col, arg, type) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_RowCol_t col; - void *arg; - RF_Revent_t type; +void +rf_CauseReconEvent(raidPtr, row, col, arg, type) + RF_Raid_t *raidPtr; + RF_RowCol_t row; + RF_RowCol_t col; + void *arg; + RF_Revent_t type; { - RF_ReconCtrl_t *rctrl = raidPtr->reconControl[row]; - RF_ReconEvent_t *event = GetReconEventDesc(row, col, arg, type); - - if (type == RF_REVENT_BUFCLEAR) { - RF_ASSERT(col != rctrl->fcol); - } - - RF_ASSERT( row >= 0 && row <= raidPtr->numRow && col >=0 && col <= raidPtr->numCol ); - RF_LOCK_MUTEX(rctrl->eq_mutex); - RF_ASSERT( (rctrl->eventQueue==NULL) == (rctrl->eq_count == 0)); /* q null and count==0 must be equivalent conditions */ - event->next = rctrl->eventQueue; - rctrl->eventQueue = event; - rctrl->eq_count++; - RF_UNLOCK_MUTEX(rctrl->eq_mutex); - -#ifndef SIMULATE - DO_SIGNAL(rctrl); -#else /* !SIMULATE */ - (rctrl->continueFunc)(rctrl->continueArg); -#endif /* !SIMULATE */ + RF_ReconCtrl_t *rctrl = raidPtr->reconControl[row]; + RF_ReconEvent_t *event = GetReconEventDesc(row, col, arg, type); + + if (type == RF_REVENT_BUFCLEAR) { + RF_ASSERT(col != rctrl->fcol); + } + RF_ASSERT(row >= 0 && row <= raidPtr->numRow && col >= 0 && col <= raidPtr->numCol); + RF_LOCK_MUTEX(rctrl->eq_mutex); + RF_ASSERT((rctrl->eventQueue == NULL) == (rctrl->eq_count == 0)); /* q null and count==0 + * must be equivalent + * conditions */ + event->next = rctrl->eventQueue; + rctrl->eventQueue = event; + rctrl->eq_count++; + RF_UNLOCK_MUTEX(rctrl->eq_mutex); + + DO_SIGNAL(rctrl); } - /* allocates and initializes a recon event descriptor */ -static RF_ReconEvent_t *GetReconEventDesc(row, col, arg, type) - RF_RowCol_t row; - RF_RowCol_t col; - void *arg; - RF_Revent_t type; +static RF_ReconEvent_t * +GetReconEventDesc(row, col, arg, type) + RF_RowCol_t row; + RF_RowCol_t col; + void *arg; + RF_Revent_t type; { RF_ReconEvent_t *t; - RF_FREELIST_GET(rf_revent_freelist,t,next,(RF_ReconEvent_t *)); + RF_FREELIST_GET(rf_revent_freelist, t, next, (RF_ReconEvent_t *)); if (t == NULL) - return(NULL); + return (NULL); t->col = col; t->arg = arg; t->type = type; - return(t); + return (t); } -void rf_FreeReconEventDesc(event) - RF_ReconEvent_t *event; +void +rf_FreeReconEventDesc(event) + RF_ReconEvent_t *event; { - RF_FREELIST_FREE(rf_revent_freelist,event,next); + RF_FREELIST_FREE(rf_revent_freelist, event, next); } diff --git a/sys/dev/raidframe/rf_revent.h b/sys/dev/raidframe/rf_revent.h index 7029a8ef74d..a4be2d4d03a 100644 --- a/sys/dev/raidframe/rf_revent.h +++ b/sys/dev/raidframe/rf_revent.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_revent.h,v 1.1 1999/01/11 14:29:48 niklas Exp $ */ -/* $NetBSD: rf_revent.h,v 1.1 1998/11/13 04:20:34 oster Exp $ */ +/* $OpenBSD: rf_revent.h,v 1.2 1999/02/16 00:03:25 niklas Exp $ */ +/* $NetBSD: rf_revent.h,v 1.3 1999/02/05 00:06:17 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -33,50 +33,20 @@ * *******************************************************************/ -/* : - * Log: rf_revent.h,v - * Revision 1.7 1996/07/15 05:40:41 jimz - * some recon datastructure cleanup - * better handling of multiple failures - * added undocumented double-recon test - * - * Revision 1.6 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.5 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.4 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.3 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.2 1995/12/06 15:04:20 root - * added copyright info - * - */ - #ifndef _RF__RF_REVENT_H_ #define _RF__RF_REVENT_H_ #include "rf_types.h" -int rf_ConfigureReconEvent(RF_ShutdownList_t **listp); +int rf_ConfigureReconEvent(RF_ShutdownList_t ** listp); -RF_ReconEvent_t *rf_GetNextReconEvent(RF_RaidReconDesc_t *reconDesc, - RF_RowCol_t row, void (*continueFunc)(void *), void *continueArg); +RF_ReconEvent_t * +rf_GetNextReconEvent(RF_RaidReconDesc_t * reconDesc, + RF_RowCol_t row, void (*continueFunc) (void *), void *continueArg); -void rf_CauseReconEvent(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col, - void *arg, RF_Revent_t type); + void rf_CauseReconEvent(RF_Raid_t * raidPtr, RF_RowCol_t row, RF_RowCol_t col, + void *arg, RF_Revent_t type); -void rf_FreeReconEventDesc(RF_ReconEvent_t *event); + void rf_FreeReconEventDesc(RF_ReconEvent_t * event); -#endif /* !_RF__RF_REVENT_H_ */ +#endif /* !_RF__RF_REVENT_H_ */ diff --git a/sys/dev/raidframe/rf_rst.h b/sys/dev/raidframe/rf_rst.h deleted file mode 100644 index 06e66275cd2..00000000000 --- a/sys/dev/raidframe/rf_rst.h +++ /dev/null @@ -1,78 +0,0 @@ -/* $OpenBSD: rf_rst.h,v 1.1 1999/01/11 14:29:49 niklas Exp $ */ -/* $NetBSD: rf_rst.h,v 1.1 1998/11/13 04:20:34 oster Exp $ */ -/* - * Copyright (c) 1995 Carnegie-Mellon University. - * All rights reserved. - * - * Author: Mark Holland - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* rf_rst.h - defines raidSim trace entry */ - -/* : - * Log: rf_rst.h,v - * Revision 1.7 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.6 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.5 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.4 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.3 1995/12/06 15:03:15 root - * added copyright info - * - */ - -#ifndef _RF__RF_RST_H_ -#define _RF__RF_RST_H_ - -#include "rf_types.h" - -typedef struct RF_ScriptTraceEntry_s { - RF_int32 blkno; - RF_int32 size; - double delay; - RF_int16 pid; - RF_int8 op; - RF_int8 async_flag; -} RF_ScriptTraceEntry_t; - -typedef struct RF_ScriptTraceEntryList_s RF_ScriptTraceEntryList_t; -struct RF_ScriptTraceEntryList_s { - RF_ScriptTraceEntry_t entry; - RF_ScriptTraceEntryList_t *next; -}; - -#endif /* !_RF__RF_RST_H_ */ diff --git a/sys/dev/raidframe/rf_shutdown.c b/sys/dev/raidframe/rf_shutdown.c index 3e0dfc96a37..5091cbb7b02 100644 --- a/sys/dev/raidframe/rf_shutdown.c +++ b/sys/dev/raidframe/rf_shutdown.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_shutdown.c,v 1.1 1999/01/11 14:29:49 niklas Exp $ */ -/* $NetBSD: rf_shutdown.c,v 1.1 1998/11/13 04:20:34 oster Exp $ */ +/* $OpenBSD: rf_shutdown.c,v 1.2 1999/02/16 00:03:26 niklas Exp $ */ +/* $NetBSD: rf_shutdown.c,v 1.4 1999/02/05 00:06:17 oster Exp $ */ /* * rf_shutdown.c */ @@ -41,74 +41,64 @@ #include "rf_freelist.h" #include "rf_threadid.h" -static void rf_FreeShutdownEnt(RF_ShutdownList_t *ent) +static void +rf_FreeShutdownEnt(RF_ShutdownList_t * ent) { -#ifdef KERNEL - FREE(ent, M_DEVBUF); -#else /* KERNEL */ - free(ent); -#endif /* KERNEL */ + FREE(ent, M_RAIDFRAME); } -int _rf_ShutdownCreate( - RF_ShutdownList_t **listp, - void (*cleanup)(void *arg), - void *arg, - char *file, - int line) +int +_rf_ShutdownCreate( + RF_ShutdownList_t ** listp, + void (*cleanup) (void *arg), + void *arg, + char *file, + int line) { - RF_ShutdownList_t *ent; + RF_ShutdownList_t *ent; - /* - * Have to directly allocate memory here, since we start up before - * and shutdown after RAIDframe internal allocation system. - */ -#ifdef KERNEL - ent = (RF_ShutdownList_t *)malloc( sizeof(RF_ShutdownList_t), M_DEVBUF, M_WAITOK); -#if 0 - MALLOC(ent, RF_ShutdownList_t *, sizeof(RF_ShutdownList_t), M_DEVBUF, M_WAITOK); -#endif -#else /* KERNEL */ - ent = (RF_ShutdownList_t *)malloc(sizeof(RF_ShutdownList_t)); -#endif /* KERNEL */ - if (ent == NULL) - return(ENOMEM); - ent->cleanup = cleanup; - ent->arg = arg; - ent->file = file; - ent->line = line; - ent->next = *listp; - *listp = ent; - return(0); + /* + * Have to directly allocate memory here, since we start up before + * and shutdown after RAIDframe internal allocation system. + */ + ent = (RF_ShutdownList_t *) malloc(sizeof(RF_ShutdownList_t), M_RAIDFRAME, M_WAITOK); + if (ent == NULL) + return (ENOMEM); + ent->cleanup = cleanup; + ent->arg = arg; + ent->file = file; + ent->line = line; + ent->next = *listp; + *listp = ent; + return (0); } -int rf_ShutdownList(RF_ShutdownList_t **list) +int +rf_ShutdownList(RF_ShutdownList_t ** list) { - RF_ShutdownList_t *r, *next; - char *file; - int line; + RF_ShutdownList_t *r, *next; + char *file; + int line; - for(r=*list;r;r=next) { - next = r->next; - file = r->file; - line = r->line; + for (r = *list; r; r = next) { + next = r->next; + file = r->file; + line = r->line; - if (rf_shutdownDebug) { - int tid; - rf_get_threadid(tid); - printf("[%d] call shutdown, created %s:%d\n", tid, file, line); - } + if (rf_shutdownDebug) { + int tid; + rf_get_threadid(tid); + printf("[%d] call shutdown, created %s:%d\n", tid, file, line); + } + r->cleanup(r->arg); - r->cleanup(r->arg); - - if (rf_shutdownDebug) { - int tid; - rf_get_threadid(tid); - printf("[%d] completed shutdown, created %s:%d\n", tid, file, line); - } - - rf_FreeShutdownEnt(r); - } - *list = NULL; - return(0); + if (rf_shutdownDebug) { + int tid; + rf_get_threadid(tid); + printf("[%d] completed shutdown, created %s:%d\n", tid, file, line); + } + rf_FreeShutdownEnt(r); + } + *list = NULL; + return (0); } diff --git a/sys/dev/raidframe/rf_shutdown.h b/sys/dev/raidframe/rf_shutdown.h index bddfe7f9c0d..002dad7fbc6 100644 --- a/sys/dev/raidframe/rf_shutdown.h +++ b/sys/dev/raidframe/rf_shutdown.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_shutdown.h,v 1.1 1999/01/11 14:29:49 niklas Exp $ */ -/* $NetBSD: rf_shutdown.h,v 1.1 1998/11/13 04:20:34 oster Exp $ */ +/* $OpenBSD: rf_shutdown.h,v 1.2 1999/02/16 00:03:26 niklas Exp $ */ +/* $NetBSD: rf_shutdown.h,v 1.2 1999/02/05 00:06:17 oster Exp $ */ /* * rf_shutdown.h */ @@ -51,18 +51,17 @@ */ struct RF_ShutdownList_s { - void (*cleanup)(void *arg); - void *arg; - char *file; - int line; - RF_ShutdownList_t *next; + void (*cleanup) (void *arg); + void *arg; + char *file; + int line; + RF_ShutdownList_t *next; }; - #define rf_ShutdownCreate(_listp_,_func_,_arg_) \ _rf_ShutdownCreate(_listp_,_func_,_arg_,__FILE__,__LINE__) -int _rf_ShutdownCreate(RF_ShutdownList_t **listp, void (*cleanup)(void *arg), - void *arg, char *file, int line); -int rf_ShutdownList(RF_ShutdownList_t **listp); +int _rf_ShutdownCreate(RF_ShutdownList_t ** listp, void (*cleanup) (void *arg), + void *arg, char *file, int line); +int rf_ShutdownList(RF_ShutdownList_t ** listp); -#endif /* !_RF__RF_SHUTDOWN_H_ */ +#endif /* !_RF__RF_SHUTDOWN_H_ */ diff --git a/sys/dev/raidframe/rf_sstf.c b/sys/dev/raidframe/rf_sstf.c index 21d97eef046..107190ce46b 100644 --- a/sys/dev/raidframe/rf_sstf.c +++ b/sys/dev/raidframe/rf_sstf.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_sstf.c,v 1.1 1999/01/11 14:29:50 niklas Exp $ */ -/* $NetBSD: rf_sstf.c,v 1.1 1998/11/13 04:20:34 oster Exp $ */ +/* $OpenBSD: rf_sstf.c,v 1.2 1999/02/16 00:03:27 niklas Exp $ */ +/* $NetBSD: rf_sstf.c,v 1.3 1999/02/05 00:06:17 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -33,35 +33,6 @@ * ******************************************************************************/ -/* - * : - * Log: rf_sstf.c,v - * Revision 1.7 1996/06/19 14:09:56 jimz - * SstfPeek wasn't calling closest_to_arm() properly- would bogart - * low priority I/Os - * - * Revision 1.6 1996/06/18 20:53:11 jimz - * fix up disk queueing (remove configure routine, - * add shutdown list arg to create routines) - * - * Revision 1.5 1996/06/13 20:42:13 jimz - * add scan, cscan - * - * Revision 1.4 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.3 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.2 1996/06/06 01:11:35 jimz - * fixed many priority-related bugs - * - * Revision 1.1 1996/06/05 19:17:40 jimz - * Initial revision - * - */ - #include "rf_alloclist.h" #include "rf_stripelocks.h" #include "rf_layout.h" @@ -81,21 +52,24 @@ #define QSUM(_sstfq_) (((_sstfq_)->lopri.qlen)+((_sstfq_)->left.qlen)+((_sstfq_)->right.qlen)) -static void do_sstf_ord_q(RF_DiskQueueData_t **, - RF_DiskQueueData_t **, - RF_DiskQueueData_t *); +static void +do_sstf_ord_q(RF_DiskQueueData_t **, + RF_DiskQueueData_t **, + RF_DiskQueueData_t *); -static RF_DiskQueueData_t *closest_to_arm(RF_SstfQ_t *, - RF_SectorNum_t, - int *, - int); -static void do_dequeue(RF_SstfQ_t *, RF_DiskQueueData_t *); +static RF_DiskQueueData_t * +closest_to_arm(RF_SstfQ_t *, + RF_SectorNum_t, + int *, + int); +static void do_dequeue(RF_SstfQ_t *, RF_DiskQueueData_t *); -static void do_sstf_ord_q(queuep, tailp, req) - RF_DiskQueueData_t **queuep; - RF_DiskQueueData_t **tailp; - RF_DiskQueueData_t *req; +static void +do_sstf_ord_q(queuep, tailp, req) + RF_DiskQueueData_t **queuep; + RF_DiskQueueData_t **tailp; + RF_DiskQueueData_t *req; { RF_DiskQueueData_t *r, *s; @@ -119,7 +93,7 @@ static void do_sstf_ord_q(queuep, tailp, req) s = *tailp; goto q_at_end; } - for(s=NULL,r=*queuep;r;s=r,r=r->next) { + for (s = NULL, r = *queuep; r; s = r, r = r->next) { if (r->sectorOffset >= req->sectorOffset) { /* insert after s, before r */ RF_ASSERT(s); @@ -140,7 +114,6 @@ q_at_end: s->next = req; *tailp = req; } - /* for removing from head-of-queue */ #define DO_HEAD_DEQ(_r_,_q_) { \ _r_ = (_q_)->queue; \ @@ -186,46 +159,42 @@ q_at_end: } \ } -static RF_DiskQueueData_t *closest_to_arm(queue, arm_pos, dir, allow_reverse) - RF_SstfQ_t *queue; - RF_SectorNum_t arm_pos; - int *dir; - int allow_reverse; +static RF_DiskQueueData_t * +closest_to_arm(queue, arm_pos, dir, allow_reverse) + RF_SstfQ_t *queue; + RF_SectorNum_t arm_pos; + int *dir; + int allow_reverse; { - RF_SectorNum_t best_pos_l=0, this_pos_l=0, last_pos=0; - RF_SectorNum_t best_pos_r=0, this_pos_r=0; + RF_SectorNum_t best_pos_l = 0, this_pos_l = 0, last_pos = 0; + RF_SectorNum_t best_pos_r = 0, this_pos_r = 0; RF_DiskQueueData_t *r, *best_l, *best_r; best_r = best_l = NULL; - for(r=queue->queue;r;r=r->next) { + for (r = queue->queue; r; r = r->next) { if (r->sectorOffset < arm_pos) { if (best_l == NULL) { best_l = r; last_pos = best_pos_l = this_pos_l; - } - else { + } else { this_pos_l = arm_pos - r->sectorOffset; if (this_pos_l < best_pos_l) { best_l = r; last_pos = best_pos_l = this_pos_l; - } - else { + } else { last_pos = this_pos_l; } } - } - else { + } else { if (best_r == NULL) { best_r = r; last_pos = best_pos_r = this_pos_r; - } - else { + } else { this_pos_r = r->sectorOffset - arm_pos; if (this_pos_r < best_pos_r) { best_r = r; last_pos = best_pos_r = this_pos_r; - } - else { + } else { last_pos = this_pos_r; } if (this_pos_r > last_pos) { @@ -236,20 +205,20 @@ static RF_DiskQueueData_t *closest_to_arm(queue, arm_pos, dir, allow_reverse) } } if ((best_r == NULL) && (best_l == NULL)) - return(NULL); + return (NULL); if ((*dir == DIR_RIGHT) && best_r) - return(best_r); + return (best_r); if ((*dir == DIR_LEFT) && best_l) - return(best_l); + return (best_l); if (*dir == DIR_EITHER) { if (best_l == NULL) - return(best_r); + return (best_r); if (best_r == NULL) - return(best_l); + return (best_l); if (best_pos_r < best_pos_l) - return(best_r); + return (best_r); else - return(best_l); + return (best_l); } /* * Nothing in the direction we want to go. Reverse or @@ -259,139 +228,141 @@ static RF_DiskQueueData_t *closest_to_arm(queue, arm_pos, dir, allow_reverse) if (allow_reverse) { if (*dir == DIR_RIGHT) { *dir = DIR_LEFT; - return(best_l); - } - else { + return (best_l); + } else { *dir = DIR_RIGHT; - return(best_r); + return (best_r); } } /* * Reset (beginning of queue). */ RF_ASSERT(*dir == DIR_RIGHT); - return(queue->queue); + return (queue->queue); } -void *rf_SstfCreate(sect_per_disk, cl_list, listp) - RF_SectorCount_t sect_per_disk; - RF_AllocListElem_t *cl_list; - RF_ShutdownList_t **listp; +void * +rf_SstfCreate(sect_per_disk, cl_list, listp) + RF_SectorCount_t sect_per_disk; + RF_AllocListElem_t *cl_list; + RF_ShutdownList_t **listp; { RF_Sstf_t *sstfq; RF_CallocAndAdd(sstfq, 1, sizeof(RF_Sstf_t), (RF_Sstf_t *), cl_list); sstfq->dir = DIR_EITHER; sstfq->allow_reverse = 1; - return((void *)sstfq); + return ((void *) sstfq); } -void *rf_ScanCreate(sect_per_disk, cl_list, listp) - RF_SectorCount_t sect_per_disk; - RF_AllocListElem_t *cl_list; - RF_ShutdownList_t **listp; +void * +rf_ScanCreate(sect_per_disk, cl_list, listp) + RF_SectorCount_t sect_per_disk; + RF_AllocListElem_t *cl_list; + RF_ShutdownList_t **listp; { RF_Sstf_t *scanq; RF_CallocAndAdd(scanq, 1, sizeof(RF_Sstf_t), (RF_Sstf_t *), cl_list); scanq->dir = DIR_RIGHT; scanq->allow_reverse = 1; - return((void *)scanq); + return ((void *) scanq); } -void *rf_CscanCreate(sect_per_disk, cl_list, listp) - RF_SectorCount_t sect_per_disk; - RF_AllocListElem_t *cl_list; - RF_ShutdownList_t **listp; +void * +rf_CscanCreate(sect_per_disk, cl_list, listp) + RF_SectorCount_t sect_per_disk; + RF_AllocListElem_t *cl_list; + RF_ShutdownList_t **listp; { RF_Sstf_t *cscanq; RF_CallocAndAdd(cscanq, 1, sizeof(RF_Sstf_t), (RF_Sstf_t *), cl_list); cscanq->dir = DIR_RIGHT; - return((void *)cscanq); + return ((void *) cscanq); } -void rf_SstfEnqueue(qptr, req, priority) - void *qptr; - RF_DiskQueueData_t *req; - int priority; +void +rf_SstfEnqueue(qptr, req, priority) + void *qptr; + RF_DiskQueueData_t *req; + int priority; { RF_Sstf_t *sstfq; - sstfq = (RF_Sstf_t *)qptr; + sstfq = (RF_Sstf_t *) qptr; if (priority == RF_IO_LOW_PRIORITY) { if (rf_sstfDebug || rf_scanDebug || rf_cscanDebug) { RF_DiskQueue_t *dq; - int tid; + int tid; rf_get_threadid(tid); - dq = (RF_DiskQueue_t *)req->queue; + dq = (RF_DiskQueue_t *) req->queue; printf("[%d] ENQ lopri %d,%d queues are %d,%d,%d\n", - tid, dq->row, dq->col, sstfq->left.qlen, sstfq->right.qlen, - sstfq->lopri.qlen); + tid, dq->row, dq->col, sstfq->left.qlen, sstfq->right.qlen, + sstfq->lopri.qlen); } do_sstf_ord_q(&sstfq->lopri.queue, &sstfq->lopri.qtail, req); sstfq->lopri.qlen++; - } - else { + } else { if (req->sectorOffset < sstfq->last_sector) { do_sstf_ord_q(&sstfq->left.queue, &sstfq->left.qtail, req); sstfq->left.qlen++; - } - else { + } else { do_sstf_ord_q(&sstfq->right.queue, &sstfq->right.qtail, req); sstfq->right.qlen++; } } } -static void do_dequeue(queue, req) - RF_SstfQ_t *queue; - RF_DiskQueueData_t *req; +static void +do_dequeue(queue, req) + RF_SstfQ_t *queue; + RF_DiskQueueData_t *req; { - RF_DiskQueueData_t *req2; + RF_DiskQueueData_t *req2; if (rf_sstfDebug || rf_scanDebug || rf_cscanDebug) { - int tid; + int tid; rf_get_threadid(tid); printf("[%d] do_dequeue\n", tid); } if (req == queue->queue) { - DO_HEAD_DEQ(req2,queue); - RF_ASSERT(req2 == req); - } - else if (req == queue->qtail) { - DO_TAIL_DEQ(req2,queue); + DO_HEAD_DEQ(req2, queue); RF_ASSERT(req2 == req); - } - else { - /* dequeue from middle of list */ - RF_ASSERT(req->next); - RF_ASSERT(req->prev); - queue->qlen--; - req->next->prev = req->prev; - req->prev->next = req->next; - req->next = req->prev = NULL; - } + } else + if (req == queue->qtail) { + DO_TAIL_DEQ(req2, queue); + RF_ASSERT(req2 == req); + } else { + /* dequeue from middle of list */ + RF_ASSERT(req->next); + RF_ASSERT(req->prev); + queue->qlen--; + req->next->prev = req->prev; + req->prev->next = req->next; + req->next = req->prev = NULL; + } } -RF_DiskQueueData_t *rf_SstfDequeue(qptr) - void *qptr; +RF_DiskQueueData_t * +rf_SstfDequeue(qptr) + void *qptr; { - RF_DiskQueueData_t *req=NULL; + RF_DiskQueueData_t *req = NULL; RF_Sstf_t *sstfq; - sstfq = (RF_Sstf_t *)qptr; + sstfq = (RF_Sstf_t *) qptr; if (rf_sstfDebug) { RF_DiskQueue_t *dq; - int tid; + int tid; rf_get_threadid(tid); - dq = (RF_DiskQueue_t *)req->queue; - RF_ASSERT(QSUM(sstfq)==dq->queueLength); + dq = (RF_DiskQueue_t *) req->queue; + RF_ASSERT(QSUM(sstfq) == dq->queueLength); printf("[%d] sstf: Dequeue %d,%d queues are %d,%d,%d\n", tid, - dq->row, dq->col, sstfq->left.qlen, sstfq->right.qlen, - sstfq->lopri.qlen); + dq->row, dq->col, sstfq->left.qlen, sstfq->right.qlen, + sstfq->lopri.qlen); } if (sstfq->left.queue == NULL) { RF_ASSERT(sstfq->left.qlen == 0); @@ -399,66 +370,62 @@ RF_DiskQueueData_t *rf_SstfDequeue(qptr) RF_ASSERT(sstfq->right.qlen == 0); if (sstfq->lopri.queue == NULL) { RF_ASSERT(sstfq->lopri.qlen == 0); - return(NULL); + return (NULL); } if (rf_sstfDebug) { - int tid; + int tid; rf_get_threadid(tid); printf("[%d] sstf: check for close lopri", tid); } req = closest_to_arm(&sstfq->lopri, sstfq->last_sector, - &sstfq->dir, sstfq->allow_reverse); + &sstfq->dir, sstfq->allow_reverse); if (rf_sstfDebug) { - int tid; + int tid; rf_get_threadid(tid); - printf("[%d] sstf: closest_to_arm said %lx", tid, (long)req); + printf("[%d] sstf: closest_to_arm said %lx", tid, (long) req); } if (req == NULL) - return(NULL); + return (NULL); do_dequeue(&sstfq->lopri, req); + } else { + DO_BEST_DEQ(sstfq->last_sector, req, &sstfq->right); } - else { - DO_BEST_DEQ(sstfq->last_sector,req,&sstfq->right); - } - } - else { + } else { if (sstfq->right.queue == NULL) { RF_ASSERT(sstfq->right.qlen == 0); - DO_BEST_DEQ(sstfq->last_sector,req,&sstfq->left); - } - else { - if (SNUM_DIFF(sstfq->last_sector,sstfq->right.queue->sectorOffset) - < SNUM_DIFF(sstfq->last_sector,sstfq->left.qtail->sectorOffset)) - { - DO_HEAD_DEQ(req,&sstfq->right); - } - else { - DO_TAIL_DEQ(req,&sstfq->left); + DO_BEST_DEQ(sstfq->last_sector, req, &sstfq->left); + } else { + if (SNUM_DIFF(sstfq->last_sector, sstfq->right.queue->sectorOffset) + < SNUM_DIFF(sstfq->last_sector, sstfq->left.qtail->sectorOffset)) { + DO_HEAD_DEQ(req, &sstfq->right); + } else { + DO_TAIL_DEQ(req, &sstfq->left); } } } RF_ASSERT(req); sstfq->last_sector = req->sectorOffset; - return(req); + return (req); } -RF_DiskQueueData_t *rf_ScanDequeue(qptr) - void *qptr; +RF_DiskQueueData_t * +rf_ScanDequeue(qptr) + void *qptr; { - RF_DiskQueueData_t *req=NULL; + RF_DiskQueueData_t *req = NULL; RF_Sstf_t *scanq; - scanq = (RF_Sstf_t *)qptr; + scanq = (RF_Sstf_t *) qptr; if (rf_scanDebug) { RF_DiskQueue_t *dq; - int tid; + int tid; rf_get_threadid(tid); - dq = (RF_DiskQueue_t *)req->queue; - RF_ASSERT(QSUM(scanq)==dq->queueLength); + dq = (RF_DiskQueue_t *) req->queue; + RF_ASSERT(QSUM(scanq) == dq->queueLength); printf("[%d] scan: Dequeue %d,%d queues are %d,%d,%d\n", tid, - dq->row, dq->col, scanq->left.qlen, scanq->right.qlen, - scanq->lopri.qlen); + dq->row, dq->col, scanq->left.qlen, scanq->right.qlen, + scanq->lopri.qlen); } if (scanq->left.queue == NULL) { RF_ASSERT(scanq->left.qlen == 0); @@ -466,77 +433,73 @@ RF_DiskQueueData_t *rf_ScanDequeue(qptr) RF_ASSERT(scanq->right.qlen == 0); if (scanq->lopri.queue == NULL) { RF_ASSERT(scanq->lopri.qlen == 0); - return(NULL); + return (NULL); } req = closest_to_arm(&scanq->lopri, scanq->last_sector, - &scanq->dir, scanq->allow_reverse); + &scanq->dir, scanq->allow_reverse); if (req == NULL) - return(NULL); + return (NULL); do_dequeue(&scanq->lopri, req); - } - else { + } else { scanq->dir = DIR_RIGHT; - DO_HEAD_DEQ(req,&scanq->right); + DO_HEAD_DEQ(req, &scanq->right); } - } - else if (scanq->right.queue == NULL) { - RF_ASSERT(scanq->right.qlen == 0); - RF_ASSERT(scanq->left.queue); - scanq->dir = DIR_LEFT; - DO_TAIL_DEQ(req,&scanq->left); - } - else { - RF_ASSERT(scanq->right.queue); - RF_ASSERT(scanq->left.queue); - if (scanq->dir == DIR_RIGHT) { - DO_HEAD_DEQ(req,&scanq->right); - } - else { - DO_TAIL_DEQ(req,&scanq->left); + } else + if (scanq->right.queue == NULL) { + RF_ASSERT(scanq->right.qlen == 0); + RF_ASSERT(scanq->left.queue); + scanq->dir = DIR_LEFT; + DO_TAIL_DEQ(req, &scanq->left); + } else { + RF_ASSERT(scanq->right.queue); + RF_ASSERT(scanq->left.queue); + if (scanq->dir == DIR_RIGHT) { + DO_HEAD_DEQ(req, &scanq->right); + } else { + DO_TAIL_DEQ(req, &scanq->left); + } } - } RF_ASSERT(req); scanq->last_sector = req->sectorOffset; - return(req); + return (req); } -RF_DiskQueueData_t *rf_CscanDequeue(qptr) - void *qptr; +RF_DiskQueueData_t * +rf_CscanDequeue(qptr) + void *qptr; { - RF_DiskQueueData_t *req=NULL; + RF_DiskQueueData_t *req = NULL; RF_Sstf_t *cscanq; - cscanq = (RF_Sstf_t *)qptr; + cscanq = (RF_Sstf_t *) qptr; RF_ASSERT(cscanq->dir == DIR_RIGHT); if (rf_cscanDebug) { RF_DiskQueue_t *dq; - int tid; + int tid; rf_get_threadid(tid); - dq = (RF_DiskQueue_t *)req->queue; - RF_ASSERT(QSUM(cscanq)==dq->queueLength); + dq = (RF_DiskQueue_t *) req->queue; + RF_ASSERT(QSUM(cscanq) == dq->queueLength); printf("[%d] scan: Dequeue %d,%d queues are %d,%d,%d\n", tid, - dq->row, dq->col, cscanq->left.qlen, cscanq->right.qlen, - cscanq->lopri.qlen); + dq->row, dq->col, cscanq->left.qlen, cscanq->right.qlen, + cscanq->lopri.qlen); } if (cscanq->right.queue) { - DO_HEAD_DEQ(req,&cscanq->right); - } - else { + DO_HEAD_DEQ(req, &cscanq->right); + } else { RF_ASSERT(cscanq->right.qlen == 0); if (cscanq->left.queue == NULL) { RF_ASSERT(cscanq->left.qlen == 0); if (cscanq->lopri.queue == NULL) { RF_ASSERT(cscanq->lopri.qlen == 0); - return(NULL); + return (NULL); } req = closest_to_arm(&cscanq->lopri, cscanq->last_sector, - &cscanq->dir, cscanq->allow_reverse); + &cscanq->dir, cscanq->allow_reverse); if (req == NULL) - return(NULL); + return (NULL); do_dequeue(&cscanq->lopri, req); - } - else { + } else { /* * There's I/Os to the left of the arm. Swing * on back (swap queues). @@ -544,39 +507,37 @@ RF_DiskQueueData_t *rf_CscanDequeue(qptr) cscanq->right = cscanq->left; cscanq->left.qlen = 0; cscanq->left.queue = cscanq->left.qtail = NULL; - DO_HEAD_DEQ(req,&cscanq->right); + DO_HEAD_DEQ(req, &cscanq->right); } } RF_ASSERT(req); cscanq->last_sector = req->sectorOffset; - return(req); + return (req); } -RF_DiskQueueData_t *rf_SstfPeek(qptr) - void *qptr; +RF_DiskQueueData_t * +rf_SstfPeek(qptr) + void *qptr; { RF_DiskQueueData_t *req; RF_Sstf_t *sstfq; - sstfq = (RF_Sstf_t *)qptr; + sstfq = (RF_Sstf_t *) qptr; if ((sstfq->left.queue == NULL) && (sstfq->right.queue == NULL)) { req = closest_to_arm(&sstfq->lopri, sstfq->last_sector, &sstfq->dir, - sstfq->allow_reverse); - } - else { + sstfq->allow_reverse); + } else { if (sstfq->left.queue == NULL) req = sstfq->right.queue; else { if (sstfq->right.queue == NULL) req = sstfq->left.queue; else { - if (SNUM_DIFF(sstfq->last_sector,sstfq->right.queue->sectorOffset) - <SNUM_DIFF(sstfq->last_sector,sstfq->left.qtail->sectorOffset)) - { + if (SNUM_DIFF(sstfq->last_sector, sstfq->right.queue->sectorOffset) + < SNUM_DIFF(sstfq->last_sector, sstfq->left.qtail->sectorOffset)) { req = sstfq->right.queue; - } - else { + } else { req = sstfq->left.qtail; } } @@ -585,17 +546,18 @@ RF_DiskQueueData_t *rf_SstfPeek(qptr) if (req == NULL) { RF_ASSERT(QSUM(sstfq) == 0); } - return(req); + return (req); } -RF_DiskQueueData_t *rf_ScanPeek(qptr) - void *qptr; +RF_DiskQueueData_t * +rf_ScanPeek(qptr) + void *qptr; { RF_DiskQueueData_t *req; RF_Sstf_t *scanq; - int dir; + int dir; - scanq = (RF_Sstf_t *)qptr; + scanq = (RF_Sstf_t *) qptr; dir = scanq->dir; if (scanq->left.queue == NULL) { @@ -604,60 +566,56 @@ RF_DiskQueueData_t *rf_ScanPeek(qptr) RF_ASSERT(scanq->right.qlen == 0); if (scanq->lopri.queue == NULL) { RF_ASSERT(scanq->lopri.qlen == 0); - return(NULL); + return (NULL); } req = closest_to_arm(&scanq->lopri, scanq->last_sector, - &dir, scanq->allow_reverse); - } - else { - req = scanq->right.queue; - } - } - else if (scanq->right.queue == NULL) { - RF_ASSERT(scanq->right.qlen == 0); - RF_ASSERT(scanq->left.queue); - req = scanq->left.qtail; - } - else { - RF_ASSERT(scanq->right.queue); - RF_ASSERT(scanq->left.queue); - if (scanq->dir == DIR_RIGHT) { + &dir, scanq->allow_reverse); + } else { req = scanq->right.queue; } - else { + } else + if (scanq->right.queue == NULL) { + RF_ASSERT(scanq->right.qlen == 0); + RF_ASSERT(scanq->left.queue); req = scanq->left.qtail; + } else { + RF_ASSERT(scanq->right.queue); + RF_ASSERT(scanq->left.queue); + if (scanq->dir == DIR_RIGHT) { + req = scanq->right.queue; + } else { + req = scanq->left.qtail; + } } - } if (req == NULL) { RF_ASSERT(QSUM(scanq) == 0); } - return(req); + return (req); } -RF_DiskQueueData_t *rf_CscanPeek(qptr) - void *qptr; +RF_DiskQueueData_t * +rf_CscanPeek(qptr) + void *qptr; { RF_DiskQueueData_t *req; RF_Sstf_t *cscanq; - cscanq = (RF_Sstf_t *)qptr; + cscanq = (RF_Sstf_t *) qptr; RF_ASSERT(cscanq->dir == DIR_RIGHT); if (cscanq->right.queue) { req = cscanq->right.queue; - } - else { + } else { RF_ASSERT(cscanq->right.qlen == 0); if (cscanq->left.queue == NULL) { RF_ASSERT(cscanq->left.qlen == 0); if (cscanq->lopri.queue == NULL) { RF_ASSERT(cscanq->lopri.qlen == 0); - return(NULL); + return (NULL); } req = closest_to_arm(&cscanq->lopri, cscanq->last_sector, - &cscanq->dir, cscanq->allow_reverse); - } - else { + &cscanq->dir, cscanq->allow_reverse); + } else { /* * There's I/Os to the left of the arm. We'll end * up swinging on back. @@ -668,50 +626,50 @@ RF_DiskQueueData_t *rf_CscanPeek(qptr) if (req == NULL) { RF_ASSERT(QSUM(cscanq) == 0); } - return(req); + return (req); } -int rf_SstfPromote(qptr, parityStripeID, which_ru) - void *qptr; - RF_StripeNum_t parityStripeID; - RF_ReconUnitNum_t which_ru; +int +rf_SstfPromote(qptr, parityStripeID, which_ru) + void *qptr; + RF_StripeNum_t parityStripeID; + RF_ReconUnitNum_t which_ru; { RF_DiskQueueData_t *r, *next; RF_Sstf_t *sstfq; - int n; + int n; - sstfq = (RF_Sstf_t *)qptr; + sstfq = (RF_Sstf_t *) qptr; n = 0; if (rf_sstfDebug || rf_scanDebug || rf_cscanDebug) { - int tid; + int tid; rf_get_threadid(tid); printf("[%d] promote %ld %d queues are %d,%d,%d\n", - tid, (long)parityStripeID, (int)which_ru, - sstfq->left.qlen, - sstfq->right.qlen, - sstfq->lopri.qlen); + tid, (long) parityStripeID, (int) which_ru, + sstfq->left.qlen, + sstfq->right.qlen, + sstfq->lopri.qlen); } - for(r=sstfq->lopri.queue;r;r=next) { + for (r = sstfq->lopri.queue; r; r = next) { next = r->next; if (rf_sstfDebug || rf_scanDebug || rf_cscanDebug) { - int tid; + int tid; rf_get_threadid(tid); - printf("[%d] check promote %lx\n", tid, (long)r); + printf("[%d] check promote %lx\n", tid, (long) r); } if ((r->parityStripeID == parityStripeID) - && (r->which_ru == which_ru)) - { + && (r->which_ru == which_ru)) { do_dequeue(&sstfq->lopri, r); rf_SstfEnqueue(qptr, r, RF_IO_NORMAL_PRIORITY); n++; } } if (rf_sstfDebug || rf_scanDebug || rf_cscanDebug) { - int tid; + int tid; rf_get_threadid(tid); printf("[%d] promoted %d matching I/Os queues are %d,%d,%d\n", - tid, n, sstfq->left.qlen, sstfq->right.qlen, sstfq->lopri.qlen); + tid, n, sstfq->left.qlen, sstfq->right.qlen, sstfq->lopri.qlen); } - return(n); + return (n); } diff --git a/sys/dev/raidframe/rf_sstf.h b/sys/dev/raidframe/rf_sstf.h index 9d81a090826..d704e62d206 100644 --- a/sys/dev/raidframe/rf_sstf.h +++ b/sys/dev/raidframe/rf_sstf.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_sstf.h,v 1.1 1999/01/11 14:29:50 niklas Exp $ */ -/* $NetBSD: rf_sstf.h,v 1.1 1998/11/13 04:20:34 oster Exp $ */ +/* $OpenBSD: rf_sstf.h,v 1.2 1999/02/16 00:03:27 niklas Exp $ */ +/* $NetBSD: rf_sstf.h,v 1.3 1999/02/05 00:06:17 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,64 +27,44 @@ * rights to redistribute these changes. */ -/* : - * Log: rf_sstf.h,v - * Revision 1.6 1996/06/18 20:53:11 jimz - * fix up disk queueing (remove configure routine, - * add shutdown list arg to create routines) - * - * Revision 1.5 1996/06/13 20:42:08 jimz - * add scan, cscan - * - * Revision 1.4 1996/06/07 22:26:27 jimz - * type-ify which_ru (RF_ReconUnitNum_t) - * - * Revision 1.3 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.2 1996/06/06 01:22:24 jimz - * minor cleanup - * - * Revision 1.1 1996/06/05 19:17:40 jimz - * Initial revision - * - */ - #ifndef _RF__RF_SSTF_H_ #define _RF__RF_SSTF_H_ #include "rf_diskqueue.h" typedef struct RF_SstfQ_s { - RF_DiskQueueData_t *queue; - RF_DiskQueueData_t *qtail; - int qlen; -} RF_SstfQ_t; + RF_DiskQueueData_t *queue; + RF_DiskQueueData_t *qtail; + int qlen; +} RF_SstfQ_t; typedef struct RF_Sstf_s { - RF_SstfQ_t left; - RF_SstfQ_t right; - RF_SstfQ_t lopri; - RF_SectorNum_t last_sector; - int dir; - int allow_reverse; -} RF_Sstf_t; + RF_SstfQ_t left; + RF_SstfQ_t right; + RF_SstfQ_t lopri; + RF_SectorNum_t last_sector; + int dir; + int allow_reverse; +} RF_Sstf_t; -void *rf_SstfCreate(RF_SectorCount_t sect_per_disk, - RF_AllocListElem_t *cl_list, RF_ShutdownList_t **listp); -void *rf_ScanCreate(RF_SectorCount_t sect_per_disk, - RF_AllocListElem_t *cl_list, RF_ShutdownList_t **listp); -void *rf_CscanCreate(RF_SectorCount_t sect_per_disk, - RF_AllocListElem_t *cl_list, RF_ShutdownList_t **listp); -void rf_SstfEnqueue(void *qptr, RF_DiskQueueData_t *req, int priority); +void * +rf_SstfCreate(RF_SectorCount_t sect_per_disk, + RF_AllocListElem_t * cl_list, RF_ShutdownList_t ** listp); +void * +rf_ScanCreate(RF_SectorCount_t sect_per_disk, + RF_AllocListElem_t * cl_list, RF_ShutdownList_t ** listp); +void * +rf_CscanCreate(RF_SectorCount_t sect_per_disk, + RF_AllocListElem_t * cl_list, RF_ShutdownList_t ** listp); +void rf_SstfEnqueue(void *qptr, RF_DiskQueueData_t * req, int priority); RF_DiskQueueData_t *rf_SstfDequeue(void *qptr); RF_DiskQueueData_t *rf_SstfPeek(void *qptr); -int rf_SstfPromote(void *qptr, RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru); +int +rf_SstfPromote(void *qptr, RF_StripeNum_t parityStripeID, + RF_ReconUnitNum_t which_ru); RF_DiskQueueData_t *rf_ScanDequeue(void *qptr); RF_DiskQueueData_t *rf_ScanPeek(void *qptr); RF_DiskQueueData_t *rf_CscanDequeue(void *qptr); RF_DiskQueueData_t *rf_CscanPeek(void *qptr); -#endif /* !_RF__RF_SSTF_H_ */ +#endif /* !_RF__RF_SSTF_H_ */ diff --git a/sys/dev/raidframe/rf_states.c b/sys/dev/raidframe/rf_states.c index 1bad7bd4ab7..6cb524a6f8c 100644 --- a/sys/dev/raidframe/rf_states.c +++ b/sys/dev/raidframe/rf_states.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_states.c,v 1.1 1999/01/11 14:29:50 niklas Exp $ */ -/* $NetBSD: rf_states.c,v 1.2 1998/11/13 13:47:56 drochner Exp $ */ +/* $OpenBSD: rf_states.c,v 1.2 1999/02/16 00:03:28 niklas Exp $ */ +/* $NetBSD: rf_states.c,v 1.6 1999/02/05 00:06:17 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,197 +27,6 @@ * rights to redistribute these changes. */ -/* - * : - * Log: rf_states.c,v - * Revision 1.45 1996/07/28 20:31:39 jimz - * i386netbsd port - * true/false fixup - * - * Revision 1.44 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.43 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.42 1996/07/17 21:00:58 jimz - * clean up timer interface, tracing - * - * Revision 1.41 1996/07/11 19:08:00 jimz - * generalize reconstruction mechanism - * allow raid1 reconstructs via copyback (done with array - * quiesced, not online, therefore not disk-directed) - * - * Revision 1.40 1996/06/17 14:38:33 jimz - * properly #if out RF_DEMO code - * fix bug in MakeConfig that was causing weird behavior - * in configuration routines (config was not zeroed at start) - * clean up genplot handling of stacks - * - * Revision 1.39 1996/06/11 18:12:17 jimz - * got rid of evil race condition in LastState - * - * Revision 1.38 1996/06/10 14:18:58 jimz - * move user, throughput stats into per-array structure - * - * Revision 1.37 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.36 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.35 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.34 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.33 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.32 1996/05/30 12:59:18 jimz - * make etimer happier, more portable - * - * Revision 1.31 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.30 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.29 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.28 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.27 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.26 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.25 1996/05/20 19:31:46 jimz - * straighten out syntax problems - * - * Revision 1.24 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.23 1996/05/16 23:37:33 jimz - * fix misspelled "else" - * - * Revision 1.22 1996/05/15 22:33:32 jimz - * appropriately #ifdef cache stuff - * - * Revision 1.21 1996/05/06 22:09:20 wvcii - * rf_State_ExecuteDAG now only executes the first dag - * of each parity stripe in a multi-stripe access - * - * rf_State_ProcessDAG now executes all dags in a - * multi-stripe access except the first dag of each stripe. - * - * Revision 1.20 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.19 1995/11/19 16:29:50 wvcii - * replaced LaunchDAGState with CreateDAGState, ExecuteDAGState - * created rf_ContinueDagAccess - * - * Revision 1.18 1995/11/07 15:37:23 wvcii - * deleted states SendDAGState, RetryDAGState - * added staes: LaunchDAGState, ProcessDAGState - * code no longer has a hard-coded retry count of 1 but will support - * retries until a dag can not be found (selected) to perform the user request - * - * Revision 1.17 1995/10/09 23:36:08 amiri - * *** empty log message *** - * - * Revision 1.16 1995/10/09 18:36:58 jimz - * moved call to StopThroughput for user-level driver to rf_driver.c - * - * Revision 1.15 1995/10/09 18:07:23 wvcii - * lastState now call rf_StopThroughputStats - * - * Revision 1.14 1995/10/05 18:56:31 jimz - * no-op file if !INCLUDE_VS - * - * Revision 1.13 1995/09/30 20:38:24 jimz - * LogTraceRec now takes a Raid * as its first argument - * - * Revision 1.12 1995/09/19 22:58:54 jimz - * integrate DKUSAGE into raidframe - * - * Revision 1.11 1995/09/07 01:26:55 jimz - * Achive basic compilation in kernel. Kernel functionality - * is not guaranteed at all, but it'll compile. Mostly. I hope. - * - * Revision 1.10 1995/07/26 03:28:31 robby - * intermediary checkin - * - * Revision 1.9 1995/07/23 02:50:33 robby - * oops. fixed boo boo - * - * Revision 1.8 1995/07/22 22:54:54 robby - * removed incorrect comment - * - * Revision 1.7 1995/07/21 19:30:26 robby - * added idle state for rf_when-idle.c - * - * Revision 1.6 1995/07/10 19:06:28 rachad - * *** empty log message *** - * - * Revision 1.5 1995/07/10 17:30:38 robby - * added virtual striping lock states - * - * Revision 1.4 1995/07/08 18:05:39 rachad - * Linked up Claudsons code with the real cache - * - * Revision 1.3 1995/07/06 14:38:50 robby - * changed get_thread_id to get_threadid - * - * Revision 1.2 1995/07/06 14:24:15 robby - * added log - * - */ - -#ifdef _KERNEL -#define KERNEL -#endif - -#ifdef KERNEL -#if !defined(__NetBSD__) && !defined(__OpenBSD__) -#include <dkusage.h> -#endif /* !__NetBSD__ && !__OpenBSD__ */ -#endif /* KERNEL */ - #include <sys/errno.h> #include "rf_archs.h" @@ -241,7 +50,7 @@ #include <io/cam/dec_cam.h> #include <io/cam/cam.h> #include <io/cam/pdrv.h> -#endif /* KERNEL && DKUSAGE > 0 */ +#endif /* KERNEL && DKUSAGE > 0 */ /* prototypes for some of the available states. @@ -255,357 +64,342 @@ - increment desc->state when they have finished their work. */ - -#ifdef SIMULATE -extern int global_async_flag; -#endif /* SIMULATE */ - -static char *StateName(RF_AccessState_t state) +static char * +StateName(RF_AccessState_t state) { - switch (state) { - case rf_QuiesceState: return "QuiesceState"; - case rf_MapState: return "MapState"; - case rf_LockState: return "LockState"; - case rf_CreateDAGState: return "CreateDAGState"; - case rf_ExecuteDAGState: return "ExecuteDAGState"; - case rf_ProcessDAGState: return "ProcessDAGState"; - case rf_CleanupState: return "CleanupState"; - case rf_LastState: return "LastState"; - case rf_IncrAccessesCountState: return "IncrAccessesCountState"; - case rf_DecrAccessesCountState: return "DecrAccessesCountState"; - default: return "!!! UnnamedState !!!"; - } + switch (state) { + case rf_QuiesceState:return "QuiesceState"; + case rf_MapState: + return "MapState"; + case rf_LockState: + return "LockState"; + case rf_CreateDAGState: + return "CreateDAGState"; + case rf_ExecuteDAGState: + return "ExecuteDAGState"; + case rf_ProcessDAGState: + return "ProcessDAGState"; + case rf_CleanupState: + return "CleanupState"; + case rf_LastState: + return "LastState"; + case rf_IncrAccessesCountState: + return "IncrAccessesCountState"; + case rf_DecrAccessesCountState: + return "DecrAccessesCountState"; + default: + return "!!! UnnamedState !!!"; + } } -void rf_ContinueRaidAccess(RF_RaidAccessDesc_t *desc) +void +rf_ContinueRaidAccess(RF_RaidAccessDesc_t * desc) { - int suspended = RF_FALSE; - int current_state_index = desc->state; - RF_AccessState_t current_state = desc->states[current_state_index]; - -#ifdef SIMULATE - rf_SetCurrentOwner(desc->owner); -#endif /* SIMULATE */ - - do { - - current_state_index = desc->state; - current_state = desc->states [current_state_index]; - - switch (current_state) { - - case rf_QuiesceState: suspended = rf_State_Quiesce(desc); - break; - case rf_IncrAccessesCountState: suspended = rf_State_IncrAccessCount(desc); - break; - case rf_MapState: suspended = rf_State_Map(desc); - break; - case rf_LockState: suspended = rf_State_Lock(desc); - break; - case rf_CreateDAGState: suspended = rf_State_CreateDAG(desc); - break; - case rf_ExecuteDAGState: suspended = rf_State_ExecuteDAG(desc); - break; - case rf_ProcessDAGState: suspended = rf_State_ProcessDAG(desc); - break; - case rf_CleanupState: suspended = rf_State_Cleanup(desc); - break; - case rf_DecrAccessesCountState: suspended = rf_State_DecrAccessCount(desc); - break; - case rf_LastState: suspended = rf_State_LastState(desc); - break; - } - - /* after this point, we cannot dereference desc since desc may - have been freed. desc is only freed in LastState, so if we - renter this function or loop back up, desc should be valid. */ - - if (rf_printStatesDebug) { - int tid; - rf_get_threadid (tid); - - printf ("[%d] State: %-24s StateIndex: %3i desc: 0x%ld %s\n", - tid, StateName(current_state), current_state_index, (long)desc, - suspended ? "callback scheduled" : "looping"); - } - } while (!suspended && current_state != rf_LastState); - - return; + int suspended = RF_FALSE; + int current_state_index = desc->state; + RF_AccessState_t current_state = desc->states[current_state_index]; + + do { + + current_state_index = desc->state; + current_state = desc->states[current_state_index]; + + switch (current_state) { + + case rf_QuiesceState: + suspended = rf_State_Quiesce(desc); + break; + case rf_IncrAccessesCountState: + suspended = rf_State_IncrAccessCount(desc); + break; + case rf_MapState: + suspended = rf_State_Map(desc); + break; + case rf_LockState: + suspended = rf_State_Lock(desc); + break; + case rf_CreateDAGState: + suspended = rf_State_CreateDAG(desc); + break; + case rf_ExecuteDAGState: + suspended = rf_State_ExecuteDAG(desc); + break; + case rf_ProcessDAGState: + suspended = rf_State_ProcessDAG(desc); + break; + case rf_CleanupState: + suspended = rf_State_Cleanup(desc); + break; + case rf_DecrAccessesCountState: + suspended = rf_State_DecrAccessCount(desc); + break; + case rf_LastState: + suspended = rf_State_LastState(desc); + break; + } + + /* after this point, we cannot dereference desc since desc may + * have been freed. desc is only freed in LastState, so if we + * renter this function or loop back up, desc should be valid. */ + + if (rf_printStatesDebug) { + int tid; + rf_get_threadid(tid); + + printf("[%d] State: %-24s StateIndex: %3i desc: 0x%ld %s\n", + tid, StateName(current_state), current_state_index, (long) desc, + suspended ? "callback scheduled" : "looping"); + } + } while (!suspended && current_state != rf_LastState); + + return; } -void rf_ContinueDagAccess (RF_DagList_t *dagList) +void +rf_ContinueDagAccess(RF_DagList_t * dagList) { - RF_AccTraceEntry_t *tracerec = &(dagList->desc->tracerec); - RF_RaidAccessDesc_t *desc; - RF_DagHeader_t *dag_h; - RF_Etimer_t timer; - int i; - - desc = dagList->desc; - - timer = tracerec->timer; - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->specific.user.exec_us = RF_ETIMER_VAL_US(timer); - RF_ETIMER_START(tracerec->timer); - - /* skip to dag which just finished */ - dag_h = dagList->dags; - for (i = 0; i < dagList->numDagsDone; i++) { - dag_h = dag_h->next; - } - - /* check to see if retry is required */ - if (dag_h->status == rf_rollBackward) { - /* when a dag fails, mark desc status as bad and allow all other dags - * in the desc to execute to completion. then, free all dags and start over */ - desc->status = 1; /* bad status */ -#if RF_DEMO > 0 - if (!rf_demoMode) -#endif /* RF_DEMO > 0 */ - { - printf("[%d] DAG failure: %c addr 0x%lx (%ld) nblk 0x%x (%d) buf 0x%lx\n", - desc->tid, desc->type, (long)desc->raidAddress, - (long)desc->raidAddress,(int)desc->numBlocks, - (int)desc->numBlocks, (unsigned long) (desc->bufPtr)); - } - } - - dagList->numDagsDone++; - rf_ContinueRaidAccess(desc); + RF_AccTraceEntry_t *tracerec = &(dagList->desc->tracerec); + RF_RaidAccessDesc_t *desc; + RF_DagHeader_t *dag_h; + RF_Etimer_t timer; + int i; + + desc = dagList->desc; + + timer = tracerec->timer; + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->specific.user.exec_us = RF_ETIMER_VAL_US(timer); + RF_ETIMER_START(tracerec->timer); + + /* skip to dag which just finished */ + dag_h = dagList->dags; + for (i = 0; i < dagList->numDagsDone; i++) { + dag_h = dag_h->next; + } + + /* check to see if retry is required */ + if (dag_h->status == rf_rollBackward) { + /* when a dag fails, mark desc status as bad and allow all + * other dags in the desc to execute to completion. then, + * free all dags and start over */ + desc->status = 1; /* bad status */ + { + printf("[%d] DAG failure: %c addr 0x%lx (%ld) nblk 0x%x (%d) buf 0x%lx\n", + desc->tid, desc->type, (long) desc->raidAddress, + (long) desc->raidAddress, (int) desc->numBlocks, + (int) desc->numBlocks, (unsigned long) (desc->bufPtr)); + } + } + dagList->numDagsDone++; + rf_ContinueRaidAccess(desc); } -int rf_State_LastState(RF_RaidAccessDesc_t *desc) +int +rf_State_LastState(RF_RaidAccessDesc_t * desc) { - void (*callbackFunc)(RF_CBParam_t) = desc->callbackFunc; - RF_CBParam_t callbackArg; - - callbackArg.p = desc->callbackArg; - -#ifdef SIMULATE - int tid; - rf_get_threadid(tid); - - if (rf_accessDebug) - printf("async_flag set to %d\n",global_async_flag); - global_async_flag=desc->async_flag; - if (rf_accessDebug) - printf("Will now do clean up for %d\n",rf_GetCurrentOwner()); - rf_FreeRaidAccDesc(desc); - - if (callbackFunc) - callbackFunc(callbackArg); -#else /* SIMULATE */ - -#ifndef KERNEL - - if (!(desc->flags & RF_DAG_NONBLOCKING_IO)) { - /* bummer that we have to take another lock here */ - RF_LOCK_MUTEX(desc->mutex); - RF_ASSERT(desc->flags&RF_DAG_ACCESS_COMPLETE); - RF_SIGNAL_COND(desc->cond); /* DoAccess frees the desc in the blocking-I/O case */ - RF_UNLOCK_MUTEX(desc->mutex); - } - else - rf_FreeRaidAccDesc(desc); - - if (callbackFunc) - callbackFunc(callbackArg); - -#else /* KERNEL */ - if (!(desc->flags & RF_DAG_TEST_ACCESS)) {/* don't biodone if this */ -#if DKUSAGE > 0 - RF_DKU_END_IO(((RF_Raid_t *)desc->raidPtr)->raidid,(struct buf *)desc->bp); -#else - RF_DKU_END_IO(((RF_Raid_t *)desc->raidPtr)->raidid); -#endif /* DKUSAGE > 0 */ - /* printf("Calling biodone on 0x%x\n",desc->bp); */ - biodone(desc->bp); /* access came through ioctl */ - } + void (*callbackFunc) (RF_CBParam_t) = desc->callbackFunc; + RF_CBParam_t callbackArg; - if (callbackFunc) callbackFunc(callbackArg); - rf_FreeRaidAccDesc(desc); + callbackArg.p = desc->callbackArg; -#endif /* ! KERNEL */ -#endif /* SIMULATE */ - - return RF_FALSE; + if (!(desc->flags & RF_DAG_TEST_ACCESS)) { /* don't biodone if this */ +#if DKUSAGE > 0 + RF_DKU_END_IO(((RF_Raid_t *) desc->raidPtr)->raidid, (struct buf *) desc->bp); +#else + RF_DKU_END_IO(((RF_Raid_t *) desc->raidPtr)->raidid); +#endif /* DKUSAGE > 0 */ + + /* + * If this is not an async request, wake up the caller + */ + if (desc->async_flag == 0) + wakeup(desc->bp); + + /* printf("Calling biodone on 0x%x\n",desc->bp); */ + biodone(desc->bp); /* access came through ioctl */ + } + if (callbackFunc) + callbackFunc(callbackArg); + rf_FreeRaidAccDesc(desc); + + return RF_FALSE; } -int rf_State_IncrAccessCount(RF_RaidAccessDesc_t *desc) +int +rf_State_IncrAccessCount(RF_RaidAccessDesc_t * desc) { - RF_Raid_t *raidPtr; + RF_Raid_t *raidPtr; - raidPtr = desc->raidPtr; - /* Bummer. We have to do this to be 100% safe w.r.t. the increment below */ - RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); - raidPtr->accs_in_flight++; /* used to detect quiescence */ - RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); + raidPtr = desc->raidPtr; + /* Bummer. We have to do this to be 100% safe w.r.t. the increment + * below */ + RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); + raidPtr->accs_in_flight++; /* used to detect quiescence */ + RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); - desc->state++; - return RF_FALSE; + desc->state++; + return RF_FALSE; } -int rf_State_DecrAccessCount(RF_RaidAccessDesc_t *desc) +int +rf_State_DecrAccessCount(RF_RaidAccessDesc_t * desc) { - RF_Raid_t *raidPtr; + RF_Raid_t *raidPtr; - raidPtr = desc->raidPtr; + raidPtr = desc->raidPtr; - RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); - raidPtr->accs_in_flight--; - if (raidPtr->accesses_suspended && raidPtr->accs_in_flight == 0) { - rf_SignalQuiescenceLock(raidPtr, raidPtr->reconDesc); - } - rf_UpdateUserStats(raidPtr, RF_ETIMER_VAL_US(desc->timer), desc->numBlocks); - RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); + RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); + raidPtr->accs_in_flight--; + if (raidPtr->accesses_suspended && raidPtr->accs_in_flight == 0) { + rf_SignalQuiescenceLock(raidPtr, raidPtr->reconDesc); + } + rf_UpdateUserStats(raidPtr, RF_ETIMER_VAL_US(desc->timer), desc->numBlocks); + RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); - desc->state++; - return RF_FALSE; + desc->state++; + return RF_FALSE; } -int rf_State_Quiesce(RF_RaidAccessDesc_t *desc) +int +rf_State_Quiesce(RF_RaidAccessDesc_t * desc) { - RF_AccTraceEntry_t *tracerec = &desc->tracerec; - RF_Etimer_t timer; - int suspended = RF_FALSE; - RF_Raid_t *raidPtr; - - raidPtr = desc->raidPtr; - - RF_ETIMER_START(timer); - RF_ETIMER_START(desc->timer); - - RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); - if (raidPtr->accesses_suspended) { - RF_CallbackDesc_t *cb; - cb = rf_AllocCallbackDesc(); - /* XXX the following cast is quite bogus... rf_ContinueRaidAccess - takes a (RF_RaidAccessDesc_t *) as an argument.. GO */ - cb->callbackFunc = (void (*)(RF_CBParam_t))rf_ContinueRaidAccess; - cb->callbackArg.p = (void *) desc; - cb->next = raidPtr->quiesce_wait_list; - raidPtr->quiesce_wait_list = cb; - suspended = RF_TRUE; - } - - RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->specific.user.suspend_ovhd_us += RF_ETIMER_VAL_US(timer); - - if (suspended && rf_quiesceDebug) - printf("Stalling access due to quiescence lock\n"); - - desc->state++; - return suspended; + RF_AccTraceEntry_t *tracerec = &desc->tracerec; + RF_Etimer_t timer; + int suspended = RF_FALSE; + RF_Raid_t *raidPtr; + + raidPtr = desc->raidPtr; + + RF_ETIMER_START(timer); + RF_ETIMER_START(desc->timer); + + RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); + if (raidPtr->accesses_suspended) { + RF_CallbackDesc_t *cb; + cb = rf_AllocCallbackDesc(); + /* XXX the following cast is quite bogus... + * rf_ContinueRaidAccess takes a (RF_RaidAccessDesc_t *) as an + * argument.. GO */ + cb->callbackFunc = (void (*) (RF_CBParam_t)) rf_ContinueRaidAccess; + cb->callbackArg.p = (void *) desc; + cb->next = raidPtr->quiesce_wait_list; + raidPtr->quiesce_wait_list = cb; + suspended = RF_TRUE; + } + RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); + + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->specific.user.suspend_ovhd_us += RF_ETIMER_VAL_US(timer); + + if (suspended && rf_quiesceDebug) + printf("Stalling access due to quiescence lock\n"); + + desc->state++; + return suspended; } -int rf_State_Map(RF_RaidAccessDesc_t *desc) +int +rf_State_Map(RF_RaidAccessDesc_t * desc) { - RF_Raid_t *raidPtr = desc->raidPtr; - RF_AccTraceEntry_t *tracerec = &desc->tracerec; - RF_Etimer_t timer; - - RF_ETIMER_START(timer); - - if (!(desc->asmap = rf_MapAccess(raidPtr, desc->raidAddress, desc->numBlocks, - desc->bufPtr, RF_DONT_REMAP))) - RF_PANIC(); - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->specific.user.map_us = RF_ETIMER_VAL_US(timer); - - desc->state ++; - return RF_FALSE; + RF_Raid_t *raidPtr = desc->raidPtr; + RF_AccTraceEntry_t *tracerec = &desc->tracerec; + RF_Etimer_t timer; + + RF_ETIMER_START(timer); + + if (!(desc->asmap = rf_MapAccess(raidPtr, desc->raidAddress, desc->numBlocks, + desc->bufPtr, RF_DONT_REMAP))) + RF_PANIC(); + + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->specific.user.map_us = RF_ETIMER_VAL_US(timer); + + desc->state++; + return RF_FALSE; } -int rf_State_Lock(RF_RaidAccessDesc_t *desc) +int +rf_State_Lock(RF_RaidAccessDesc_t * desc) { - RF_AccTraceEntry_t *tracerec = &desc->tracerec; - RF_Raid_t *raidPtr = desc->raidPtr; - RF_AccessStripeMapHeader_t *asmh = desc->asmap; - RF_AccessStripeMap_t *asm_p; - RF_Etimer_t timer; - int suspended = RF_FALSE; - - RF_ETIMER_START(timer); - if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) { - RF_StripeNum_t lastStripeID = -1; - - /* acquire each lock that we don't already hold */ - for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) { - RF_ASSERT(RF_IO_IS_R_OR_W(desc->type)); - if (!rf_suppressLocksAndLargeWrites && - asm_p->parityInfo && - !(desc->flags& RF_DAG_SUPPRESS_LOCKS) && - !(asm_p->flags & RF_ASM_FLAGS_LOCK_TRIED)) - { - asm_p->flags |= RF_ASM_FLAGS_LOCK_TRIED; - RF_ASSERT(asm_p->stripeID > lastStripeID); /* locks must be acquired - hierarchically */ - lastStripeID = asm_p->stripeID; - /* XXX the cast to (void (*)(RF_CBParam_t)) below is bogus! GO */ - RF_INIT_LOCK_REQ_DESC(asm_p->lockReqDesc, desc->type, - (void (*)(struct buf *))rf_ContinueRaidAccess, desc, asm_p, - raidPtr->Layout.dataSectorsPerStripe); - if (rf_AcquireStripeLock(raidPtr->lockTable, asm_p->stripeID, - &asm_p->lockReqDesc)) - { - suspended = RF_TRUE; - break; - } - } - - if (desc->type == RF_IO_TYPE_WRITE && - raidPtr->status[asm_p->physInfo->row] == rf_rs_reconstructing) - { - if (! (asm_p->flags & RF_ASM_FLAGS_FORCE_TRIED) ) { - int val; - - asm_p->flags |= RF_ASM_FLAGS_FORCE_TRIED; - /* XXX the cast below is quite bogus!!! XXX GO */ - val = rf_ForceOrBlockRecon(raidPtr, asm_p, - (void (*)(RF_Raid_t *,void *))rf_ContinueRaidAccess, desc); - if (val == 0) { - asm_p->flags |= RF_ASM_FLAGS_RECON_BLOCKED; - } - else { - suspended = RF_TRUE; - break; - } - } - else { - if (rf_pssDebug) { - printf("[%d] skipping force/block because already done, psid %ld\n", - desc->tid,(long)asm_p->stripeID); - } - } - } - else { - if (rf_pssDebug) { - printf("[%d] skipping force/block because not write or not under recon, psid %ld\n", - desc->tid,(long)asm_p->stripeID); - } - } - } - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer); - - if (suspended) - return(RF_TRUE); - } - - desc->state++; - return(RF_FALSE); + RF_AccTraceEntry_t *tracerec = &desc->tracerec; + RF_Raid_t *raidPtr = desc->raidPtr; + RF_AccessStripeMapHeader_t *asmh = desc->asmap; + RF_AccessStripeMap_t *asm_p; + RF_Etimer_t timer; + int suspended = RF_FALSE; + + RF_ETIMER_START(timer); + if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) { + RF_StripeNum_t lastStripeID = -1; + + /* acquire each lock that we don't already hold */ + for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) { + RF_ASSERT(RF_IO_IS_R_OR_W(desc->type)); + if (!rf_suppressLocksAndLargeWrites && + asm_p->parityInfo && + !(desc->flags & RF_DAG_SUPPRESS_LOCKS) && + !(asm_p->flags & RF_ASM_FLAGS_LOCK_TRIED)) { + asm_p->flags |= RF_ASM_FLAGS_LOCK_TRIED; + RF_ASSERT(asm_p->stripeID > lastStripeID); /* locks must be + * acquired + * hierarchically */ + lastStripeID = asm_p->stripeID; + /* XXX the cast to (void (*)(RF_CBParam_t)) + * below is bogus! GO */ + RF_INIT_LOCK_REQ_DESC(asm_p->lockReqDesc, desc->type, + (void (*) (struct buf *)) rf_ContinueRaidAccess, desc, asm_p, + raidPtr->Layout.dataSectorsPerStripe); + if (rf_AcquireStripeLock(raidPtr->lockTable, asm_p->stripeID, + &asm_p->lockReqDesc)) { + suspended = RF_TRUE; + break; + } + } + if (desc->type == RF_IO_TYPE_WRITE && + raidPtr->status[asm_p->physInfo->row] == rf_rs_reconstructing) { + if (!(asm_p->flags & RF_ASM_FLAGS_FORCE_TRIED)) { + int val; + + asm_p->flags |= RF_ASM_FLAGS_FORCE_TRIED; + /* XXX the cast below is quite + * bogus!!! XXX GO */ + val = rf_ForceOrBlockRecon(raidPtr, asm_p, + (void (*) (RF_Raid_t *, void *)) rf_ContinueRaidAccess, desc); + if (val == 0) { + asm_p->flags |= RF_ASM_FLAGS_RECON_BLOCKED; + } else { + suspended = RF_TRUE; + break; + } + } else { + if (rf_pssDebug) { + printf("[%d] skipping force/block because already done, psid %ld\n", + desc->tid, (long) asm_p->stripeID); + } + } + } else { + if (rf_pssDebug) { + printf("[%d] skipping force/block because not write or not under recon, psid %ld\n", + desc->tid, (long) asm_p->stripeID); + } + } + } + + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer); + + if (suspended) + return (RF_TRUE); + } + desc->state++; + return (RF_FALSE); } - /* * the following three states create, execute, and post-process dags * the error recovery unit is a single dag. @@ -627,50 +421,49 @@ int rf_State_Lock(RF_RaidAccessDesc_t *desc) * else * done (FAIL) */ -int rf_State_CreateDAG (RF_RaidAccessDesc_t *desc) +int +rf_State_CreateDAG(RF_RaidAccessDesc_t * desc) { - RF_AccTraceEntry_t *tracerec = &desc->tracerec; - RF_Etimer_t timer; - RF_DagHeader_t *dag_h; - int i, selectStatus; - - /* generate a dag for the access, and fire it off. When the dag - completes, we'll get re-invoked in the next state. */ - RF_ETIMER_START(timer); - /* SelectAlgorithm returns one or more dags */ - selectStatus = rf_SelectAlgorithm(desc, desc->flags|RF_DAG_SUPPRESS_LOCKS); - if (rf_printDAGsDebug) - for (i = 0; i < desc->numStripes; i++) - rf_PrintDAGList(desc->dagArray[i].dags); - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - /* update time to create all dags */ - tracerec->specific.user.dag_create_us = RF_ETIMER_VAL_US(timer); - - desc->status = 0; /* good status */ - - if (selectStatus) { - /* failed to create a dag */ - /* this happens when there are too many faults or incomplete dag libraries */ - printf("[Failed to create a DAG\n]"); - RF_PANIC(); - } - else { - /* bind dags to desc */ - for (i = 0; i < desc->numStripes; i++) { - dag_h = desc->dagArray[i].dags; - while (dag_h) { -#ifdef KERNEL - dag_h->bp = (struct buf *) desc->bp; -#endif /* KERNEL */ - dag_h->tracerec = tracerec; - dag_h = dag_h->next; - } - } - desc->flags |= RF_DAG_DISPATCH_RETURNED; - desc->state++; /* next state should be rf_State_ExecuteDAG */ - } - return RF_FALSE; + RF_AccTraceEntry_t *tracerec = &desc->tracerec; + RF_Etimer_t timer; + RF_DagHeader_t *dag_h; + int i, selectStatus; + + /* generate a dag for the access, and fire it off. When the dag + * completes, we'll get re-invoked in the next state. */ + RF_ETIMER_START(timer); + /* SelectAlgorithm returns one or more dags */ + selectStatus = rf_SelectAlgorithm(desc, desc->flags | RF_DAG_SUPPRESS_LOCKS); + if (rf_printDAGsDebug) + for (i = 0; i < desc->numStripes; i++) + rf_PrintDAGList(desc->dagArray[i].dags); + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + /* update time to create all dags */ + tracerec->specific.user.dag_create_us = RF_ETIMER_VAL_US(timer); + + desc->status = 0; /* good status */ + + if (selectStatus) { + /* failed to create a dag */ + /* this happens when there are too many faults or incomplete + * dag libraries */ + printf("[Failed to create a DAG\n]"); + RF_PANIC(); + } else { + /* bind dags to desc */ + for (i = 0; i < desc->numStripes; i++) { + dag_h = desc->dagArray[i].dags; + while (dag_h) { + dag_h->bp = (struct buf *) desc->bp; + dag_h->tracerec = tracerec; + dag_h = dag_h->next; + } + } + desc->flags |= RF_DAG_DISPATCH_RETURNED; + desc->state++; /* next state should be rf_State_ExecuteDAG */ + } + return RF_FALSE; } @@ -681,35 +474,37 @@ int rf_State_CreateDAG (RF_RaidAccessDesc_t *desc) * - this preserves atomic parity update * dags for independents parity groups (stripes) are fired concurrently */ -int rf_State_ExecuteDAG(RF_RaidAccessDesc_t *desc) +int +rf_State_ExecuteDAG(RF_RaidAccessDesc_t * desc) { - int i; - RF_DagHeader_t *dag_h; - RF_DagList_t *dagArray = desc->dagArray; - - /* next state is always rf_State_ProcessDAG - * important to do this before firing the first dag - * (it may finish before we leave this routine) */ - desc->state++; - - /* sweep dag array, a stripe at a time, firing the first dag in each stripe */ - for (i = 0; i < desc->numStripes; i++) { - RF_ASSERT(dagArray[i].numDags > 0); - RF_ASSERT(dagArray[i].numDagsDone == 0); - RF_ASSERT(dagArray[i].numDagsFired == 0); - RF_ETIMER_START(dagArray[i].tracerec.timer); - /* fire first dag in this stripe */ - dag_h = dagArray[i].dags; - RF_ASSERT(dag_h); - dagArray[i].numDagsFired++; - /* XXX Yet another case where we pass in a conflicting function pointer - :-( XXX GO */ - rf_DispatchDAG(dag_h, (void (*)(void *))rf_ContinueDagAccess, &dagArray[i]); - } - - /* the DAG will always call the callback, even if there was no - * blocking, so we are always suspended in this state */ - return RF_TRUE; + int i; + RF_DagHeader_t *dag_h; + RF_DagList_t *dagArray = desc->dagArray; + + /* next state is always rf_State_ProcessDAG important to do this + * before firing the first dag (it may finish before we leave this + * routine) */ + desc->state++; + + /* sweep dag array, a stripe at a time, firing the first dag in each + * stripe */ + for (i = 0; i < desc->numStripes; i++) { + RF_ASSERT(dagArray[i].numDags > 0); + RF_ASSERT(dagArray[i].numDagsDone == 0); + RF_ASSERT(dagArray[i].numDagsFired == 0); + RF_ETIMER_START(dagArray[i].tracerec.timer); + /* fire first dag in this stripe */ + dag_h = dagArray[i].dags; + RF_ASSERT(dag_h); + dagArray[i].numDagsFired++; + /* XXX Yet another case where we pass in a conflicting + * function pointer :-( XXX GO */ + rf_DispatchDAG(dag_h, (void (*) (void *)) rf_ContinueDagAccess, &dagArray[i]); + } + + /* the DAG will always call the callback, even if there was no + * blocking, so we are always suspended in this state */ + return RF_TRUE; } @@ -718,156 +513,149 @@ int rf_State_ExecuteDAG(RF_RaidAccessDesc_t *desc) * first, check to all dags in the access have completed * if not, fire as many dags as possible */ -int rf_State_ProcessDAG(RF_RaidAccessDesc_t *desc) +int +rf_State_ProcessDAG(RF_RaidAccessDesc_t * desc) { - RF_AccessStripeMapHeader_t *asmh = desc->asmap; - RF_Raid_t *raidPtr = desc->raidPtr; - RF_DagHeader_t *dag_h; - int i, j, done = RF_TRUE; - RF_DagList_t *dagArray = desc->dagArray; - RF_Etimer_t timer; - - /* check to see if this is the last dag */ - for (i = 0; i < desc->numStripes; i++) - if (dagArray[i].numDags != dagArray[i].numDagsDone) - done = RF_FALSE; - - if (done) { - if (desc->status) { - /* a dag failed, retry */ - RF_ETIMER_START(timer); - /* free all dags */ - for (i = 0; i < desc->numStripes; i++) { - rf_FreeDAG(desc->dagArray[i].dags); - } - rf_MarkFailuresInASMList(raidPtr, asmh); - /* back up to rf_State_CreateDAG */ - desc->state = desc->state - 2; - return RF_FALSE; - } - else { - /* move on to rf_State_Cleanup */ - desc->state++; - } - return RF_FALSE; - } - else { - /* more dags to execute */ - /* see if any are ready to be fired. if so, fire them */ - /* don't fire the initial dag in a list, it's fired in rf_State_ExecuteDAG */ - for (i = 0; i < desc->numStripes; i++) { - if ((dagArray[i].numDagsDone < dagArray[i].numDags) - && (dagArray[i].numDagsDone == dagArray[i].numDagsFired) - && (dagArray[i].numDagsFired > 0)) { - RF_ETIMER_START(dagArray[i].tracerec.timer); - /* fire next dag in this stripe */ - /* first, skip to next dag awaiting execution */ - dag_h = dagArray[i].dags; - for (j = 0; j < dagArray[i].numDagsDone; j++) - dag_h = dag_h->next; - dagArray[i].numDagsFired++; - /* XXX and again we pass a different function pointer.. GO */ - rf_DispatchDAG(dag_h, (void (*)(void *))rf_ContinueDagAccess, - &dagArray[i]); - } - } - return RF_TRUE; - } + RF_AccessStripeMapHeader_t *asmh = desc->asmap; + RF_Raid_t *raidPtr = desc->raidPtr; + RF_DagHeader_t *dag_h; + int i, j, done = RF_TRUE; + RF_DagList_t *dagArray = desc->dagArray; + RF_Etimer_t timer; + + /* check to see if this is the last dag */ + for (i = 0; i < desc->numStripes; i++) + if (dagArray[i].numDags != dagArray[i].numDagsDone) + done = RF_FALSE; + + if (done) { + if (desc->status) { + /* a dag failed, retry */ + RF_ETIMER_START(timer); + /* free all dags */ + for (i = 0; i < desc->numStripes; i++) { + rf_FreeDAG(desc->dagArray[i].dags); + } + rf_MarkFailuresInASMList(raidPtr, asmh); + /* back up to rf_State_CreateDAG */ + desc->state = desc->state - 2; + return RF_FALSE; + } else { + /* move on to rf_State_Cleanup */ + desc->state++; + } + return RF_FALSE; + } else { + /* more dags to execute */ + /* see if any are ready to be fired. if so, fire them */ + /* don't fire the initial dag in a list, it's fired in + * rf_State_ExecuteDAG */ + for (i = 0; i < desc->numStripes; i++) { + if ((dagArray[i].numDagsDone < dagArray[i].numDags) + && (dagArray[i].numDagsDone == dagArray[i].numDagsFired) + && (dagArray[i].numDagsFired > 0)) { + RF_ETIMER_START(dagArray[i].tracerec.timer); + /* fire next dag in this stripe */ + /* first, skip to next dag awaiting execution */ + dag_h = dagArray[i].dags; + for (j = 0; j < dagArray[i].numDagsDone; j++) + dag_h = dag_h->next; + dagArray[i].numDagsFired++; + /* XXX and again we pass a different function + * pointer.. GO */ + rf_DispatchDAG(dag_h, (void (*) (void *)) rf_ContinueDagAccess, + &dagArray[i]); + } + } + return RF_TRUE; + } } - /* only make it this far if all dags complete successfully */ -int rf_State_Cleanup(RF_RaidAccessDesc_t *desc) +int +rf_State_Cleanup(RF_RaidAccessDesc_t * desc) { - RF_AccTraceEntry_t *tracerec = &desc->tracerec; - RF_AccessStripeMapHeader_t *asmh = desc->asmap; - RF_Raid_t *raidPtr = desc->raidPtr; - RF_AccessStripeMap_t *asm_p; - RF_DagHeader_t *dag_h; - RF_Etimer_t timer; - int tid, i; - - desc->state ++; - - rf_get_threadid(tid); - - timer = tracerec->timer; - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->specific.user.dag_retry_us = RF_ETIMER_VAL_US(timer); - - /* the RAID I/O is complete. Clean up. */ - tracerec->specific.user.dag_retry_us = 0; - - RF_ETIMER_START(timer); - if (desc->flags & RF_DAG_RETURN_DAG) { - /* copy dags into paramDAG */ - *(desc->paramDAG) = desc->dagArray[0].dags; - dag_h = *(desc->paramDAG); - for (i = 1; i < desc->numStripes; i++) { - /* concatenate dags from remaining stripes */ - RF_ASSERT(dag_h); - while (dag_h->next) - dag_h = dag_h->next; - dag_h->next = desc->dagArray[i].dags; - } - } - else { - /* free all dags */ - for (i = 0; i < desc->numStripes; i++) { - rf_FreeDAG(desc->dagArray[i].dags); - } - } - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->specific.user.cleanup_us = RF_ETIMER_VAL_US(timer); - - RF_ETIMER_START(timer); - if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) { - for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) { - if (!rf_suppressLocksAndLargeWrites && - asm_p->parityInfo && - !(desc->flags&RF_DAG_SUPPRESS_LOCKS)) - { - RF_ASSERT_VALID_LOCKREQ(&asm_p->lockReqDesc); - rf_ReleaseStripeLock(raidPtr->lockTable, asm_p->stripeID, - &asm_p->lockReqDesc); - } - if (asm_p->flags & RF_ASM_FLAGS_RECON_BLOCKED) { - rf_UnblockRecon(raidPtr, asm_p); - } - } - } - -#ifdef SIMULATE - /* refresh current owner in case blocked ios where allowed to run */ - rf_SetCurrentOwner(desc->owner); -#endif /* SIMULATE */ - - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer); - - RF_ETIMER_START(timer); - if (desc->flags & RF_DAG_RETURN_ASM) - *(desc->paramASM) = asmh; - else - rf_FreeAccessStripeMap(asmh); - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - tracerec->specific.user.cleanup_us += RF_ETIMER_VAL_US(timer); - - RF_ETIMER_STOP(desc->timer); - RF_ETIMER_EVAL(desc->timer); - - timer = desc->tracerec.tot_timer; - RF_ETIMER_STOP(timer); - RF_ETIMER_EVAL(timer); - desc->tracerec.total_us = RF_ETIMER_VAL_US(timer); - - rf_LogTraceRec(raidPtr, tracerec); - - desc->flags |= RF_DAG_ACCESS_COMPLETE; - - return RF_FALSE; + RF_AccTraceEntry_t *tracerec = &desc->tracerec; + RF_AccessStripeMapHeader_t *asmh = desc->asmap; + RF_Raid_t *raidPtr = desc->raidPtr; + RF_AccessStripeMap_t *asm_p; + RF_DagHeader_t *dag_h; + RF_Etimer_t timer; + int tid, i; + + desc->state++; + + rf_get_threadid(tid); + + timer = tracerec->timer; + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->specific.user.dag_retry_us = RF_ETIMER_VAL_US(timer); + + /* the RAID I/O is complete. Clean up. */ + tracerec->specific.user.dag_retry_us = 0; + + RF_ETIMER_START(timer); + if (desc->flags & RF_DAG_RETURN_DAG) { + /* copy dags into paramDAG */ + *(desc->paramDAG) = desc->dagArray[0].dags; + dag_h = *(desc->paramDAG); + for (i = 1; i < desc->numStripes; i++) { + /* concatenate dags from remaining stripes */ + RF_ASSERT(dag_h); + while (dag_h->next) + dag_h = dag_h->next; + dag_h->next = desc->dagArray[i].dags; + } + } else { + /* free all dags */ + for (i = 0; i < desc->numStripes; i++) { + rf_FreeDAG(desc->dagArray[i].dags); + } + } + + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->specific.user.cleanup_us = RF_ETIMER_VAL_US(timer); + + RF_ETIMER_START(timer); + if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) { + for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) { + if (!rf_suppressLocksAndLargeWrites && + asm_p->parityInfo && + !(desc->flags & RF_DAG_SUPPRESS_LOCKS)) { + RF_ASSERT_VALID_LOCKREQ(&asm_p->lockReqDesc); + rf_ReleaseStripeLock(raidPtr->lockTable, asm_p->stripeID, + &asm_p->lockReqDesc); + } + if (asm_p->flags & RF_ASM_FLAGS_RECON_BLOCKED) { + rf_UnblockRecon(raidPtr, asm_p); + } + } + } + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer); + + RF_ETIMER_START(timer); + if (desc->flags & RF_DAG_RETURN_ASM) + *(desc->paramASM) = asmh; + else + rf_FreeAccessStripeMap(asmh); + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->specific.user.cleanup_us += RF_ETIMER_VAL_US(timer); + + RF_ETIMER_STOP(desc->timer); + RF_ETIMER_EVAL(desc->timer); + + timer = desc->tracerec.tot_timer; + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + desc->tracerec.total_us = RF_ETIMER_VAL_US(timer); + + rf_LogTraceRec(raidPtr, tracerec); + + desc->flags |= RF_DAG_ACCESS_COMPLETE; + + return RF_FALSE; } diff --git a/sys/dev/raidframe/rf_states.h b/sys/dev/raidframe/rf_states.h index 2e2895caa5e..25beba5905a 100644 --- a/sys/dev/raidframe/rf_states.h +++ b/sys/dev/raidframe/rf_states.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_states.h,v 1.1 1999/01/11 14:29:51 niklas Exp $ */ -/* $NetBSD: rf_states.h,v 1.1 1998/11/13 04:20:34 oster Exp $ */ +/* $OpenBSD: rf_states.h,v 1.2 1999/02/16 00:03:28 niklas Exp $ */ +/* $NetBSD: rf_states.h,v 1.3 1999/02/05 00:06:17 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,44 +27,22 @@ * rights to redistribute these changes. */ -/* : - * Log: rf_states.h,v - * Revision 1.5 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.4 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.3 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.2 1996/05/06 22:08:28 wvcii - * added copyright info and change log - * - * Revision 1.1 1995/07/06 14:23:39 robby - * Initial revision - * - */ - #ifndef _RF__RF_STATES_H_ #define _RF__RF_STATES_H_ #include "rf_types.h" -void rf_ContinueRaidAccess(RF_RaidAccessDesc_t *desc); -void rf_ContinueDagAccess(RF_DagList_t *dagList); -int rf_State_LastState(RF_RaidAccessDesc_t *desc); -int rf_State_IncrAccessCount(RF_RaidAccessDesc_t *desc); -int rf_State_DecrAccessCount(RF_RaidAccessDesc_t *desc); -int rf_State_Quiesce(RF_RaidAccessDesc_t *desc); -int rf_State_Map(RF_RaidAccessDesc_t *desc); -int rf_State_Lock(RF_RaidAccessDesc_t *desc); -int rf_State_CreateDAG(RF_RaidAccessDesc_t *desc); -int rf_State_ExecuteDAG(RF_RaidAccessDesc_t *desc); -int rf_State_ProcessDAG(RF_RaidAccessDesc_t *desc); -int rf_State_Cleanup(RF_RaidAccessDesc_t *desc); +void rf_ContinueRaidAccess(RF_RaidAccessDesc_t * desc); +void rf_ContinueDagAccess(RF_DagList_t * dagList); +int rf_State_LastState(RF_RaidAccessDesc_t * desc); +int rf_State_IncrAccessCount(RF_RaidAccessDesc_t * desc); +int rf_State_DecrAccessCount(RF_RaidAccessDesc_t * desc); +int rf_State_Quiesce(RF_RaidAccessDesc_t * desc); +int rf_State_Map(RF_RaidAccessDesc_t * desc); +int rf_State_Lock(RF_RaidAccessDesc_t * desc); +int rf_State_CreateDAG(RF_RaidAccessDesc_t * desc); +int rf_State_ExecuteDAG(RF_RaidAccessDesc_t * desc); +int rf_State_ProcessDAG(RF_RaidAccessDesc_t * desc); +int rf_State_Cleanup(RF_RaidAccessDesc_t * desc); -#endif /* !_RF__RF_STATES_H_ */ +#endif /* !_RF__RF_STATES_H_ */ diff --git a/sys/dev/raidframe/rf_stripelocks.c b/sys/dev/raidframe/rf_stripelocks.c index c9b9502ad70..a2e0f4dc81d 100644 --- a/sys/dev/raidframe/rf_stripelocks.c +++ b/sys/dev/raidframe/rf_stripelocks.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_stripelocks.c,v 1.1 1999/01/11 14:29:51 niklas Exp $ */ -/* $NetBSD: rf_stripelocks.c,v 1.1 1998/11/13 04:20:34 oster Exp $ */ +/* $OpenBSD: rf_stripelocks.c,v 1.2 1999/02/16 00:03:28 niklas Exp $ */ +/* $NetBSD: rf_stripelocks.c,v 1.3 1999/02/05 00:06:17 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,93 +27,6 @@ * rights to redistribute these changes. */ -/* : - * Log: rf_stripelocks.c,v - * Revision 1.35 1996/06/10 12:50:57 jimz - * Add counters to freelists to track number of allocations, frees, - * grows, max size, etc. Adjust a couple sets of PRIME params based - * on the results. - * - * Revision 1.34 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.33 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.32 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.31 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.30 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.29 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.28 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.27 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.26 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.25 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.24 1996/05/20 16:15:00 jimz - * switch to rf_{mutex,cond}_{init,destroy} - * - * Revision 1.23 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.22 1996/05/16 22:28:11 jimz - * misc cleanup - * - * Revision 1.21 1996/05/15 23:39:52 jimz - * remove #if 0 code - * - * Revision 1.20 1996/05/15 23:37:38 jimz - * convert to using RF_FREELIST stuff for StripeLockDesc allocation - * - * Revision 1.19 1996/05/08 18:00:53 jimz - * fix number of args to debug printf - * - * Revision 1.18 1996/05/06 22:33:07 jimz - * added better debug info - * - * Revision 1.17 1996/05/06 22:09:01 wvcii - * added copyright info and change log - * - */ - /* * stripelocks.c -- code to lock stripes for read and write access * @@ -144,10 +57,6 @@ * searching through stripe lock descriptors. */ -#ifdef _KERNEL -#define KERNEL -#endif - #include "rf_types.h" #include "rf_raid.h" #include "rf_stripelocks.h" @@ -168,19 +77,15 @@ #define Dprintf7(s,a,b,c,d,e,f,g) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),(void *)((unsigned long)g),NULL) #define Dprintf8(s,a,b,c,d,e,f,g,h) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),(void *)((unsigned long)g),(void *)((unsigned long)h)) -#ifndef KERNEL -#define FLUSH fflush(stdout) -#else /* !KERNEL */ #define FLUSH -#endif /* !KERNEL */ #define HASH_STRIPEID(_sid_) ( (_sid_) & (rf_lockTableSize-1) ) #define MAX_FREELIST 100 -static void AddToWaitersQueue(RF_LockTableEntry_t *lockTable, RF_StripeLockDesc_t *lockDesc, RF_LockReqDesc_t *lockReqDesc); +static void AddToWaitersQueue(RF_LockTableEntry_t * lockTable, RF_StripeLockDesc_t * lockDesc, RF_LockReqDesc_t * lockReqDesc); static RF_StripeLockDesc_t *AllocStripeLockDesc(RF_StripeNum_t stripeID); -static void FreeStripeLockDesc(RF_StripeLockDesc_t *p); -static void PrintLockedStripes(RF_LockTableEntry_t *lockTable); +static void FreeStripeLockDesc(RF_StripeLockDesc_t * p); +static void PrintLockedStripes(RF_LockTableEntry_t * lockTable); /* determines if two ranges overlap. always yields false if either start value is negative */ #define SINGLE_RANGE_OVERLAP(_strt1, _stop1, _strt2, _stop2) \ @@ -216,427 +121,550 @@ static RF_FreeList_t *rf_stripelock_freelist; static void rf_ShutdownStripeLockFreeList(void *); static void rf_RaidShutdownStripeLocks(void *); -static void rf_ShutdownStripeLockFreeList(ignored) - void *ignored; +static void +rf_ShutdownStripeLockFreeList(ignored) + void *ignored; { - RF_FREELIST_DESTROY(rf_stripelock_freelist,next,(RF_StripeLockDesc_t *)); + RF_FREELIST_DESTROY(rf_stripelock_freelist, next, (RF_StripeLockDesc_t *)); } -int rf_ConfigureStripeLockFreeList(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureStripeLockFreeList(listp) + RF_ShutdownList_t **listp; { unsigned mask; - int rc; + int rc; RF_FREELIST_CREATE(rf_stripelock_freelist, RF_MAX_FREE_STRIPELOCK, - RF_STRIPELOCK_INITIAL,sizeof(RF_StripeLockDesc_t)); + RF_STRIPELOCK_INITIAL, sizeof(RF_StripeLockDesc_t)); rc = rf_ShutdownCreate(listp, rf_ShutdownStripeLockFreeList, NULL); if (rc) { RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); + __FILE__, __LINE__, rc); rf_ShutdownStripeLockFreeList(NULL); - return(rc); + return (rc); } - RF_FREELIST_PRIME(rf_stripelock_freelist,RF_STRIPELOCK_INITIAL,next, - (RF_StripeLockDesc_t *)); - for (mask=0x1; mask; mask<<=1) - if (rf_lockTableSize==mask) + RF_FREELIST_PRIME(rf_stripelock_freelist, RF_STRIPELOCK_INITIAL, next, + (RF_StripeLockDesc_t *)); + for (mask = 0x1; mask; mask <<= 1) + if (rf_lockTableSize == mask) break; if (!mask) { - printf("[WARNING: lock table size must be a power of two. Setting to %d.]\n",RF_DEFAULT_LOCK_TABLE_SIZE); + printf("[WARNING: lock table size must be a power of two. Setting to %d.]\n", RF_DEFAULT_LOCK_TABLE_SIZE); rf_lockTableSize = RF_DEFAULT_LOCK_TABLE_SIZE; } - return(0); + return (0); } -RF_LockTableEntry_t *rf_MakeLockTable() +RF_LockTableEntry_t * +rf_MakeLockTable() { RF_LockTableEntry_t *lockTable; - int i, rc; + int i, rc; RF_Calloc(lockTable, ((int) rf_lockTableSize), sizeof(RF_LockTableEntry_t), (RF_LockTableEntry_t *)); if (lockTable == NULL) - return(NULL); - for (i=0; i<rf_lockTableSize; i++) { + return (NULL); + for (i = 0; i < rf_lockTableSize; i++) { rc = rf_mutex_init(&lockTable[i].mutex); if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, + __LINE__, rc); /* XXX clean up other mutexes */ - return(NULL); + return (NULL); } } - return(lockTable); + return (lockTable); } -void rf_ShutdownStripeLocks(RF_LockTableEntry_t *lockTable) +void +rf_ShutdownStripeLocks(RF_LockTableEntry_t * lockTable) { - int i; + int i; if (rf_stripeLockDebug) { PrintLockedStripes(lockTable); } - for (i=0; i<rf_lockTableSize; i++) { + for (i = 0; i < rf_lockTableSize; i++) { rf_mutex_destroy(&lockTable[i].mutex); } - RF_Free(lockTable, rf_lockTableSize*sizeof(RF_LockTableEntry_t)); + RF_Free(lockTable, rf_lockTableSize * sizeof(RF_LockTableEntry_t)); } -static void rf_RaidShutdownStripeLocks(arg) - void *arg; +static void +rf_RaidShutdownStripeLocks(arg) + void *arg; { - RF_Raid_t *raidPtr = (RF_Raid_t *)arg; + RF_Raid_t *raidPtr = (RF_Raid_t *) arg; rf_ShutdownStripeLocks(raidPtr->lockTable); } -int rf_ConfigureStripeLocks( - RF_ShutdownList_t **listp, - RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr) +int +rf_ConfigureStripeLocks( + RF_ShutdownList_t ** listp, + RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr) { - int rc; + int rc; raidPtr->lockTable = rf_MakeLockTable(); if (raidPtr->lockTable == NULL) - return(ENOMEM); + return (ENOMEM); rc = rf_ShutdownCreate(listp, rf_RaidShutdownStripeLocks, raidPtr); if (rc) { RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); + __FILE__, __LINE__, rc); rf_ShutdownStripeLocks(raidPtr->lockTable); - return(rc); + return (rc); } - return(0); + return (0); } - /* returns 0 if you've got the lock, and non-zero if you have to wait. * if and only if you have to wait, we'll cause cbFunc to get invoked * with cbArg when you are granted the lock. We store a tag in *releaseTag * that you need to give back to us when you release the lock. */ -int rf_AcquireStripeLock( - RF_LockTableEntry_t *lockTable, - RF_StripeNum_t stripeID, - RF_LockReqDesc_t *lockReqDesc) +int +rf_AcquireStripeLock( + RF_LockTableEntry_t * lockTable, + RF_StripeNum_t stripeID, + RF_LockReqDesc_t * lockReqDesc) { - RF_StripeLockDesc_t *lockDesc; - RF_LockReqDesc_t *p; - int tid=0, hashval = HASH_STRIPEID(stripeID); - int retcode = 0; - - RF_ASSERT(RF_IO_IS_R_OR_W(lockReqDesc->type)); - - if (rf_stripeLockDebug) { - rf_get_threadid(tid); - if (stripeID == -1) Dprintf1("[%d] Lock acquisition supressed (stripeID == -1)\n",tid); - else { - Dprintf8("[%d] Trying to acquire stripe lock table 0x%lx SID %ld type %c range %ld-%ld, range2 %ld-%ld hashval %d\n", - tid, (unsigned long) lockTable, stripeID, lockReqDesc->type, lockReqDesc->start, - lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2); - Dprintf3("[%d] lock %ld hashval %d\n", tid, stripeID, hashval); - FLUSH; - } - } - if (stripeID == -1) return(0); - lockReqDesc->next = NULL; /* just to be sure */ - - RF_LOCK_MUTEX(lockTable[hashval].mutex); - for (lockDesc = lockTable[hashval].descList; lockDesc; lockDesc=lockDesc->next) { - if (lockDesc->stripeID == stripeID) break; - } - - if (!lockDesc) { /* no entry in table => no one reading or writing */ - lockDesc = AllocStripeLockDesc(stripeID); - lockDesc->next = lockTable[hashval].descList; - lockTable[hashval].descList = lockDesc; - if (lockReqDesc->type == RF_IO_TYPE_WRITE) lockDesc->nWriters++; - lockDesc->granted = lockReqDesc; - if (rf_stripeLockDebug) {Dprintf7("[%d] no one waiting: lock %ld %c %ld-%ld %ld-%ld granted\n", - tid,stripeID,lockReqDesc->type,lockReqDesc->start,lockReqDesc->stop,lockReqDesc->start2,lockReqDesc->stop2); FLUSH;} - } else { - - if (lockReqDesc->type == RF_IO_TYPE_WRITE) lockDesc->nWriters++; - - if (lockDesc->nWriters == 0) { /* no need to search any lists if there are no writers anywhere */ - lockReqDesc->next = lockDesc->granted; - lockDesc->granted = lockReqDesc; - if (rf_stripeLockDebug) {Dprintf7("[%d] no writers: lock %ld %c %ld-%ld %ld-%ld granted\n", - tid,stripeID,lockReqDesc->type,lockReqDesc->start,lockReqDesc->stop,lockReqDesc->start2,lockReqDesc->stop2); FLUSH;} - } else { - - /* search the granted & waiting lists for a conflict. stop searching as soon as we find one */ - retcode = 0; - for (p = lockDesc->granted; p; p=p->next) if (STRIPELOCK_CONFLICT(lockReqDesc, p)) {retcode = 1; break;} - if (!retcode) for (p = lockDesc->waitersH; p; p=p->next) if (STRIPELOCK_CONFLICT(lockReqDesc, p)) {retcode = 2; break;} - - if (!retcode) { - lockReqDesc->next = lockDesc->granted; /* no conflicts found => grant lock */ - lockDesc->granted = lockReqDesc; + RF_StripeLockDesc_t *lockDesc; + RF_LockReqDesc_t *p; + int tid = 0, hashval = HASH_STRIPEID(stripeID); + int retcode = 0; + + RF_ASSERT(RF_IO_IS_R_OR_W(lockReqDesc->type)); + if (rf_stripeLockDebug) { - Dprintf7("[%d] no conflicts: lock %ld %c %ld-%ld %ld-%ld granted\n", - tid,stripeID,lockReqDesc->type,lockReqDesc->start,lockReqDesc->stop, - lockReqDesc->start2,lockReqDesc->stop2); - FLUSH; + rf_get_threadid(tid); + if (stripeID == -1) + Dprintf1("[%d] Lock acquisition supressed (stripeID == -1)\n", tid); + else { + Dprintf8("[%d] Trying to acquire stripe lock table 0x%lx SID %ld type %c range %ld-%ld, range2 %ld-%ld hashval %d\n", + tid, (unsigned long) lockTable, stripeID, lockReqDesc->type, lockReqDesc->start, + lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2); + Dprintf3("[%d] lock %ld hashval %d\n", tid, stripeID, hashval); + FLUSH; + } } - } else { - if (rf_stripeLockDebug) { - Dprintf6("[%d] conflict: lock %ld %c %ld-%ld hashval=%d not granted\n", - tid,stripeID,lockReqDesc->type,lockReqDesc->start,lockReqDesc->stop, - hashval); - Dprintf3("[%d] lock %ld retcode=%d\n", tid, stripeID, retcode); - FLUSH; + if (stripeID == -1) + return (0); + lockReqDesc->next = NULL; /* just to be sure */ + + RF_LOCK_MUTEX(lockTable[hashval].mutex); + for (lockDesc = lockTable[hashval].descList; lockDesc; lockDesc = lockDesc->next) { + if (lockDesc->stripeID == stripeID) + break; } - AddToWaitersQueue(lockTable, lockDesc, lockReqDesc); /* conflict => the current access must wait */ - } - } - } - RF_UNLOCK_MUTEX(lockTable[hashval].mutex); - return(retcode); + if (!lockDesc) { /* no entry in table => no one reading or + * writing */ + lockDesc = AllocStripeLockDesc(stripeID); + lockDesc->next = lockTable[hashval].descList; + lockTable[hashval].descList = lockDesc; + if (lockReqDesc->type == RF_IO_TYPE_WRITE) + lockDesc->nWriters++; + lockDesc->granted = lockReqDesc; + if (rf_stripeLockDebug) { + Dprintf7("[%d] no one waiting: lock %ld %c %ld-%ld %ld-%ld granted\n", + tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2); + FLUSH; + } + } else { + + if (lockReqDesc->type == RF_IO_TYPE_WRITE) + lockDesc->nWriters++; + + if (lockDesc->nWriters == 0) { /* no need to search any lists + * if there are no writers + * anywhere */ + lockReqDesc->next = lockDesc->granted; + lockDesc->granted = lockReqDesc; + if (rf_stripeLockDebug) { + Dprintf7("[%d] no writers: lock %ld %c %ld-%ld %ld-%ld granted\n", + tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2); + FLUSH; + } + } else { + + /* search the granted & waiting lists for a conflict. + * stop searching as soon as we find one */ + retcode = 0; + for (p = lockDesc->granted; p; p = p->next) + if (STRIPELOCK_CONFLICT(lockReqDesc, p)) { + retcode = 1; + break; + } + if (!retcode) + for (p = lockDesc->waitersH; p; p = p->next) + if (STRIPELOCK_CONFLICT(lockReqDesc, p)) { + retcode = 2; + break; + } + if (!retcode) { + lockReqDesc->next = lockDesc->granted; /* no conflicts found => + * grant lock */ + lockDesc->granted = lockReqDesc; + if (rf_stripeLockDebug) { + Dprintf7("[%d] no conflicts: lock %ld %c %ld-%ld %ld-%ld granted\n", + tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, + lockReqDesc->start2, lockReqDesc->stop2); + FLUSH; + } + } else { + if (rf_stripeLockDebug) { + Dprintf6("[%d] conflict: lock %ld %c %ld-%ld hashval=%d not granted\n", + tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, + hashval); + Dprintf3("[%d] lock %ld retcode=%d\n", tid, stripeID, retcode); + FLUSH; + } + AddToWaitersQueue(lockTable, lockDesc, lockReqDesc); /* conflict => the + * current access must + * wait */ + } + } + } + + RF_UNLOCK_MUTEX(lockTable[hashval].mutex); + return (retcode); } -void rf_ReleaseStripeLock( - RF_LockTableEntry_t *lockTable, - RF_StripeNum_t stripeID, - RF_LockReqDesc_t *lockReqDesc) +void +rf_ReleaseStripeLock( + RF_LockTableEntry_t * lockTable, + RF_StripeNum_t stripeID, + RF_LockReqDesc_t * lockReqDesc) { - RF_StripeLockDesc_t *lockDesc, *ld_t; - RF_LockReqDesc_t *lr, *lr_t, *callbacklist, *t; - RF_IoType_t type = lockReqDesc->type; - int tid=0, hashval = HASH_STRIPEID(stripeID); - int release_it, consider_it; - RF_LockReqDesc_t *candidate, *candidate_t, *predecessor; - - RF_ASSERT(RF_IO_IS_R_OR_W(type)); - - if (rf_stripeLockDebug) { - rf_get_threadid(tid); - if (stripeID == -1) Dprintf1("[%d] Lock release supressed (stripeID == -1)\n",tid); - else {Dprintf8("[%d] Releasing stripe lock on stripe ID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", - tid,stripeID,lockReqDesc->type,lockReqDesc->start,lockReqDesc->stop,lockReqDesc->start2,lockReqDesc->stop2, lockTable); FLUSH;} - } - - if (stripeID == -1) return; - - RF_LOCK_MUTEX(lockTable[hashval].mutex); - - /* find the stripe lock descriptor */ - for (ld_t = NULL, lockDesc = lockTable[hashval].descList; lockDesc; ld_t = lockDesc, lockDesc=lockDesc->next) { - if (lockDesc->stripeID == stripeID) break; - } - RF_ASSERT(lockDesc); /* major error to release a lock that doesn't exist */ - - /* find the stripe lock request descriptor & delete it from the list */ - for (lr_t = NULL, lr = lockDesc->granted; lr; lr_t = lr, lr=lr->next) if (lr == lockReqDesc) break; - - RF_ASSERT(lr && (lr == lockReqDesc)); /* major error to release a lock that hasn't been granted */ - if (lr_t) lr_t->next = lr->next; else { - RF_ASSERT(lr == lockDesc->granted); - lockDesc->granted = lr->next; - } - lr->next = NULL; - - if (lockReqDesc->type == RF_IO_TYPE_WRITE) lockDesc->nWriters--; - - /* search through the waiters list to see if anyone needs to be woken up. - * for each such descriptor in the wait list, we check it against everything granted and against - * everything _in front_ of it in the waiters queue. If it conflicts with none of these, we release it. - * - * DON'T TOUCH THE TEMPLINK POINTER OF ANYTHING IN THE GRANTED LIST HERE. This will roach the case where - * the callback tries to acquire a new lock in the same stripe. There are some asserts to try and detect this. - * - * We apply 2 performance optimizations: - * (1) if releasing this lock results in no more writers to this stripe, we just release everybody waiting, - * since we place no restrictions on the number of concurrent reads. - * (2) we consider as candidates for wakeup only those waiters that have a range overlap with either - * the descriptor being woken up or with something in the callbacklist (i.e. something we've just now woken up). - * This allows us to avoid the long evaluation for some descriptors. - */ - - callbacklist = NULL; - if (lockDesc->nWriters == 0) { /* performance tweak (1) */ - while (lockDesc->waitersH) { - - lr = lockDesc->waitersH; /* delete from waiters list */ - lockDesc->waitersH = lr->next; - - RF_ASSERT(lr->type == RF_IO_TYPE_READ); - - lr->next = lockDesc->granted; /* add to granted list */ - lockDesc->granted = lr; - - RF_ASSERT(!lr->templink); - lr->templink = callbacklist; /* put on callback list so that we'll invoke callback below */ - callbacklist = lr; - if (rf_stripeLockDebug) {Dprintf8("[%d] No writers: granting lock stripe ID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", - tid,stripeID,lr->type,lr->start,lr->stop,lr->start2,lr->stop2,(unsigned long) lockTable); FLUSH;} - } - lockDesc->waitersT = NULL; /* we've purged the whole waiters list */ - - } else for (candidate_t = NULL, candidate = lockDesc->waitersH; candidate; ) { - - /* performance tweak (2) */ - consider_it = 0; - if (RANGE_OVERLAP(lockReqDesc, candidate)) consider_it = 1; - else for (t = callbacklist; t; t=t->templink) if (RANGE_OVERLAP(t, candidate)) { - consider_it = 1; - break; - } - if (!consider_it) { - if (rf_stripeLockDebug) {Dprintf8("[%d] No overlap: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", - tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, - (unsigned long) lockTable); FLUSH;} - candidate_t = candidate; candidate = candidate->next; - continue; - } - - - /* we have a candidate for release. check to make sure it is not blocked by any granted locks */ - release_it = 1; - for (predecessor = lockDesc->granted; predecessor; predecessor = predecessor->next) { - if (STRIPELOCK_CONFLICT(candidate, predecessor)) { + RF_StripeLockDesc_t *lockDesc, *ld_t; + RF_LockReqDesc_t *lr, *lr_t, *callbacklist, *t; + RF_IoType_t type = lockReqDesc->type; + int tid = 0, hashval = HASH_STRIPEID(stripeID); + int release_it, consider_it; + RF_LockReqDesc_t *candidate, *candidate_t, *predecessor; + + RF_ASSERT(RF_IO_IS_R_OR_W(type)); + if (rf_stripeLockDebug) { - Dprintf8("[%d] Conflicts with granted lock: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", - tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, - (unsigned long) lockTable); FLUSH; + rf_get_threadid(tid); + if (stripeID == -1) + Dprintf1("[%d] Lock release supressed (stripeID == -1)\n", tid); + else { + Dprintf8("[%d] Releasing stripe lock on stripe ID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", + tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2, lockTable); + FLUSH; + } } - release_it = 0; break; - } - } - - /* now check to see if the candidate is blocked by any waiters that occur before it it the wait queue */ - if (release_it) for (predecessor = lockDesc->waitersH; predecessor != candidate; predecessor = predecessor->next) { - if (STRIPELOCK_CONFLICT(candidate, predecessor)) { - if (rf_stripeLockDebug) { - Dprintf8("[%d] Conflicts with waiting lock: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", - tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, - (unsigned long) lockTable); FLUSH; + if (stripeID == -1) + return; + + RF_LOCK_MUTEX(lockTable[hashval].mutex); + + /* find the stripe lock descriptor */ + for (ld_t = NULL, lockDesc = lockTable[hashval].descList; lockDesc; ld_t = lockDesc, lockDesc = lockDesc->next) { + if (lockDesc->stripeID == stripeID) + break; } - release_it = 0; break; - } - } - - /* release it if indicated */ - if (release_it) { - if (rf_stripeLockDebug) {Dprintf8("[%d] Granting lock to candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", - tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, - (unsigned long) lockTable); FLUSH;} - if (candidate_t) { - candidate_t->next = candidate->next; - if (lockDesc->waitersT == candidate) lockDesc->waitersT = candidate_t; /* cannot be waitersH since candidate_t is not NULL */ - } else { - RF_ASSERT(candidate == lockDesc->waitersH); - lockDesc->waitersH = lockDesc->waitersH->next; - if (!lockDesc->waitersH) lockDesc->waitersT = NULL; - } - candidate->next = lockDesc->granted; /* move it to the granted list */ - lockDesc->granted = candidate; - - RF_ASSERT(!candidate->templink); - candidate->templink = callbacklist; /* put it on the list of things to be called after we release the mutex */ - callbacklist = candidate; - - if (!candidate_t) candidate = lockDesc->waitersH; else candidate = candidate_t->next; /* continue with the rest of the list */ - } else { - candidate_t = candidate; candidate = candidate->next; /* continue with the rest of the list */ - } - } - - /* delete the descriptor if no one is waiting or active */ - if (!lockDesc->granted && !lockDesc->waitersH) { - RF_ASSERT(lockDesc->nWriters == 0); - if (rf_stripeLockDebug) { - Dprintf3("[%d] Last lock released (table 0x%lx): deleting desc for stripeID %ld\n",tid,(unsigned long) lockTable, stripeID); FLUSH; - } - if (ld_t) ld_t->next = lockDesc->next; else { - RF_ASSERT(lockDesc == lockTable[hashval].descList); - lockTable[hashval].descList = lockDesc->next; - } - FreeStripeLockDesc(lockDesc); - lockDesc = NULL; /* only for the ASSERT below */ - } - - RF_UNLOCK_MUTEX(lockTable[hashval].mutex); - - /* now that we've unlocked the mutex, invoke the callback on all the descriptors in the list */ - RF_ASSERT(!( (callbacklist) && (!lockDesc) )); /* if we deleted the descriptor, we should have no callbacks to do */ - for (candidate = callbacklist; candidate; ) { - t = candidate; - candidate = candidate->templink; - t->templink = NULL; - (t->cbFunc)(t->cbArg); - } -} + RF_ASSERT(lockDesc); /* major error to release a lock that doesn't + * exist */ + /* find the stripe lock request descriptor & delete it from the list */ + for (lr_t = NULL, lr = lockDesc->granted; lr; lr_t = lr, lr = lr->next) + if (lr == lockReqDesc) + break; + + RF_ASSERT(lr && (lr == lockReqDesc)); /* major error to release a + * lock that hasn't been + * granted */ + if (lr_t) + lr_t->next = lr->next; + else { + RF_ASSERT(lr == lockDesc->granted); + lockDesc->granted = lr->next; + } + lr->next = NULL; + + if (lockReqDesc->type == RF_IO_TYPE_WRITE) + lockDesc->nWriters--; + + /* search through the waiters list to see if anyone needs to be woken + * up. for each such descriptor in the wait list, we check it against + * everything granted and against everything _in front_ of it in the + * waiters queue. If it conflicts with none of these, we release it. + * + * DON'T TOUCH THE TEMPLINK POINTER OF ANYTHING IN THE GRANTED LIST HERE. + * This will roach the case where the callback tries to acquire a new + * lock in the same stripe. There are some asserts to try and detect + * this. + * + * We apply 2 performance optimizations: (1) if releasing this lock + * results in no more writers to this stripe, we just release + * everybody waiting, since we place no restrictions on the number of + * concurrent reads. (2) we consider as candidates for wakeup only + * those waiters that have a range overlap with either the descriptor + * being woken up or with something in the callbacklist (i.e. + * something we've just now woken up). This allows us to avoid the + * long evaluation for some descriptors. */ + + callbacklist = NULL; + if (lockDesc->nWriters == 0) { /* performance tweak (1) */ + while (lockDesc->waitersH) { + + lr = lockDesc->waitersH; /* delete from waiters + * list */ + lockDesc->waitersH = lr->next; + + RF_ASSERT(lr->type == RF_IO_TYPE_READ); + + lr->next = lockDesc->granted; /* add to granted list */ + lockDesc->granted = lr; + + RF_ASSERT(!lr->templink); + lr->templink = callbacklist; /* put on callback list + * so that we'll invoke + * callback below */ + callbacklist = lr; + if (rf_stripeLockDebug) { + Dprintf8("[%d] No writers: granting lock stripe ID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", + tid, stripeID, lr->type, lr->start, lr->stop, lr->start2, lr->stop2, (unsigned long) lockTable); + FLUSH; + } + } + lockDesc->waitersT = NULL; /* we've purged the whole + * waiters list */ + + } else + for (candidate_t = NULL, candidate = lockDesc->waitersH; candidate;) { + + /* performance tweak (2) */ + consider_it = 0; + if (RANGE_OVERLAP(lockReqDesc, candidate)) + consider_it = 1; + else + for (t = callbacklist; t; t = t->templink) + if (RANGE_OVERLAP(t, candidate)) { + consider_it = 1; + break; + } + if (!consider_it) { + if (rf_stripeLockDebug) { + Dprintf8("[%d] No overlap: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", + tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, + (unsigned long) lockTable); + FLUSH; + } + candidate_t = candidate; + candidate = candidate->next; + continue; + } + /* we have a candidate for release. check to make + * sure it is not blocked by any granted locks */ + release_it = 1; + for (predecessor = lockDesc->granted; predecessor; predecessor = predecessor->next) { + if (STRIPELOCK_CONFLICT(candidate, predecessor)) { + if (rf_stripeLockDebug) { + Dprintf8("[%d] Conflicts with granted lock: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", + tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, + (unsigned long) lockTable); + FLUSH; + } + release_it = 0; + break; + } + } + + /* now check to see if the candidate is blocked by any + * waiters that occur before it it the wait queue */ + if (release_it) + for (predecessor = lockDesc->waitersH; predecessor != candidate; predecessor = predecessor->next) { + if (STRIPELOCK_CONFLICT(candidate, predecessor)) { + if (rf_stripeLockDebug) { + Dprintf8("[%d] Conflicts with waiting lock: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", + tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, + (unsigned long) lockTable); + FLUSH; + } + release_it = 0; + break; + } + } + + /* release it if indicated */ + if (release_it) { + if (rf_stripeLockDebug) { + Dprintf8("[%d] Granting lock to candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", + tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, + (unsigned long) lockTable); + FLUSH; + } + if (candidate_t) { + candidate_t->next = candidate->next; + if (lockDesc->waitersT == candidate) + lockDesc->waitersT = candidate_t; /* cannot be waitersH + * since candidate_t is + * not NULL */ + } else { + RF_ASSERT(candidate == lockDesc->waitersH); + lockDesc->waitersH = lockDesc->waitersH->next; + if (!lockDesc->waitersH) + lockDesc->waitersT = NULL; + } + candidate->next = lockDesc->granted; /* move it to the + * granted list */ + lockDesc->granted = candidate; + + RF_ASSERT(!candidate->templink); + candidate->templink = callbacklist; /* put it on the list of + * things to be called + * after we release the + * mutex */ + callbacklist = candidate; + + if (!candidate_t) + candidate = lockDesc->waitersH; + else + candidate = candidate_t->next; /* continue with the + * rest of the list */ + } else { + candidate_t = candidate; + candidate = candidate->next; /* continue with the + * rest of the list */ + } + } + + /* delete the descriptor if no one is waiting or active */ + if (!lockDesc->granted && !lockDesc->waitersH) { + RF_ASSERT(lockDesc->nWriters == 0); + if (rf_stripeLockDebug) { + Dprintf3("[%d] Last lock released (table 0x%lx): deleting desc for stripeID %ld\n", tid, (unsigned long) lockTable, stripeID); + FLUSH; + } + if (ld_t) + ld_t->next = lockDesc->next; + else { + RF_ASSERT(lockDesc == lockTable[hashval].descList); + lockTable[hashval].descList = lockDesc->next; + } + FreeStripeLockDesc(lockDesc); + lockDesc = NULL;/* only for the ASSERT below */ + } + RF_UNLOCK_MUTEX(lockTable[hashval].mutex); + + /* now that we've unlocked the mutex, invoke the callback on all the + * descriptors in the list */ + RF_ASSERT(!((callbacklist) && (!lockDesc))); /* if we deleted the + * descriptor, we should + * have no callbacks to + * do */ + for (candidate = callbacklist; candidate;) { + t = candidate; + candidate = candidate->templink; + t->templink = NULL; + (t->cbFunc) (t->cbArg); + } +} /* must have the indicated lock table mutex upon entry */ -static void AddToWaitersQueue( - RF_LockTableEntry_t *lockTable, - RF_StripeLockDesc_t *lockDesc, - RF_LockReqDesc_t *lockReqDesc) +static void +AddToWaitersQueue( + RF_LockTableEntry_t * lockTable, + RF_StripeLockDesc_t * lockDesc, + RF_LockReqDesc_t * lockReqDesc) { - int tid; - - if (rf_stripeLockDebug) { - rf_get_threadid(tid); - Dprintf3("[%d] Waiting on lock for stripe %ld table 0x%lx\n", tid, lockDesc->stripeID, (unsigned long) lockTable); FLUSH; - } - if (!lockDesc->waitersH) { - lockDesc->waitersH = lockDesc->waitersT = lockReqDesc; - } else { - lockDesc->waitersT->next = lockReqDesc; - lockDesc->waitersT = lockReqDesc; - } + int tid; + + if (rf_stripeLockDebug) { + rf_get_threadid(tid); + Dprintf3("[%d] Waiting on lock for stripe %ld table 0x%lx\n", tid, lockDesc->stripeID, (unsigned long) lockTable); + FLUSH; + } + if (!lockDesc->waitersH) { + lockDesc->waitersH = lockDesc->waitersT = lockReqDesc; + } else { + lockDesc->waitersT->next = lockReqDesc; + lockDesc->waitersT = lockReqDesc; + } } -static RF_StripeLockDesc_t *AllocStripeLockDesc(RF_StripeNum_t stripeID) +static RF_StripeLockDesc_t * +AllocStripeLockDesc(RF_StripeNum_t stripeID) { RF_StripeLockDesc_t *p; - RF_FREELIST_GET(rf_stripelock_freelist,p,next,(RF_StripeLockDesc_t *)); + RF_FREELIST_GET(rf_stripelock_freelist, p, next, (RF_StripeLockDesc_t *)); if (p) { p->stripeID = stripeID; } - return(p); + return (p); } -static void FreeStripeLockDesc(RF_StripeLockDesc_t *p) +static void +FreeStripeLockDesc(RF_StripeLockDesc_t * p) { - RF_FREELIST_FREE(rf_stripelock_freelist,p,next); + RF_FREELIST_FREE(rf_stripelock_freelist, p, next); } -static void PrintLockedStripes(lockTable) - RF_LockTableEntry_t *lockTable; +static void +PrintLockedStripes(lockTable) + RF_LockTableEntry_t *lockTable; { - int i, j, foundone = 0, did; - RF_StripeLockDesc_t *p; - RF_LockReqDesc_t *q; - - RF_LOCK_MUTEX(rf_printf_mutex); - printf("Locked stripes:\n"); - for (i=0; i<rf_lockTableSize; i++) if (lockTable[i].descList) { - foundone = 1; - for (p = lockTable[i].descList; p; p=p->next) { - printf("Stripe ID 0x%lx (%d) nWriters %d\n", - (long)p->stripeID, (int)p->stripeID, p->nWriters); - - if (! (p->granted) ) printf("Granted: (none)\n"); else printf("Granted:\n"); - for (did=1,j=0,q = p->granted; q; j++,q=q->next) { - printf(" %c(%ld-%ld",q->type,(long)q->start,(long)q->stop); - if (q->start2 != -1) printf(",%ld-%ld) ",(long)q->start2, - (long)q->stop2); else printf(") "); - if (j && !(j%4)) {printf("\n"); did=1;} else did=0; - } - if (!did) printf("\n"); - - if (! (p->waitersH) ) printf("Waiting: (none)\n"); else printf("Waiting:\n"); - for (did=1,j=0,q = p->waitersH; q; j++,q=q->next) { - printf("%c(%ld-%ld",q->type,(long)q->start,(long)q->stop); - if (q->start2 != -1) printf(",%ld-%ld) ",(long)q->start2,(long)q->stop2); else printf(") "); - if (j && !(j%4)) {printf("\n "); did=1;} else did=0; - } - if (!did) printf("\n"); - } - } - if (!foundone) printf("(none)\n"); else printf("\n"); - RF_UNLOCK_MUTEX(rf_printf_mutex); + int i, j, foundone = 0, did; + RF_StripeLockDesc_t *p; + RF_LockReqDesc_t *q; + + RF_LOCK_MUTEX(rf_printf_mutex); + printf("Locked stripes:\n"); + for (i = 0; i < rf_lockTableSize; i++) + if (lockTable[i].descList) { + foundone = 1; + for (p = lockTable[i].descList; p; p = p->next) { + printf("Stripe ID 0x%lx (%d) nWriters %d\n", + (long) p->stripeID, (int) p->stripeID, p->nWriters); + + if (!(p->granted)) + printf("Granted: (none)\n"); + else + printf("Granted:\n"); + for (did = 1, j = 0, q = p->granted; q; j++, q = q->next) { + printf(" %c(%ld-%ld", q->type, (long) q->start, (long) q->stop); + if (q->start2 != -1) + printf(",%ld-%ld) ", (long) q->start2, + (long) q->stop2); + else + printf(") "); + if (j && !(j % 4)) { + printf("\n"); + did = 1; + } else + did = 0; + } + if (!did) + printf("\n"); + + if (!(p->waitersH)) + printf("Waiting: (none)\n"); + else + printf("Waiting:\n"); + for (did = 1, j = 0, q = p->waitersH; q; j++, q = q->next) { + printf("%c(%ld-%ld", q->type, (long) q->start, (long) q->stop); + if (q->start2 != -1) + printf(",%ld-%ld) ", (long) q->start2, (long) q->stop2); + else + printf(") "); + if (j && !(j % 4)) { + printf("\n "); + did = 1; + } else + did = 0; + } + if (!did) + printf("\n"); + } + } + if (!foundone) + printf("(none)\n"); + else + printf("\n"); + RF_UNLOCK_MUTEX(rf_printf_mutex); } diff --git a/sys/dev/raidframe/rf_stripelocks.h b/sys/dev/raidframe/rf_stripelocks.h index 46412504247..d339e2ae520 100644 --- a/sys/dev/raidframe/rf_stripelocks.h +++ b/sys/dev/raidframe/rf_stripelocks.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_stripelocks.h,v 1.1 1999/01/11 14:29:51 niklas Exp $ */ -/* $NetBSD: rf_stripelocks.h,v 1.1 1998/11/13 04:20:34 oster Exp $ */ +/* $OpenBSD: rf_stripelocks.h,v 1.2 1999/02/16 00:03:29 niklas Exp $ */ +/* $NetBSD: rf_stripelocks.h,v 1.3 1999/02/05 00:06:18 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,61 +27,9 @@ * rights to redistribute these changes. */ -/* : - * Log: rf_stripelocks.h,v - * Revision 1.22 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.21 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.20 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.19 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.18 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.17 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.16 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.15 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.14 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.13 1996/05/06 22:08:46 wvcii - * added copyright info and change log - * - */ - /***************************************************************************** * - * stripelocks.h -- header file for locking stripes + * stripelocks.h -- header file for locking stripes * * Note that these functions are called from the execution routines of certain * DAG Nodes, and so they must be NON-BLOCKING to assure maximum parallelism @@ -104,33 +52,33 @@ #include "rf_general.h" struct RF_LockReqDesc_s { - RF_IoType_t type; /* read or write */ - RF_int64 start, stop; /* start and end of range to be locked */ - RF_int64 start2, stop2; /* start and end of 2nd range to be locked */ - void (*cbFunc)(struct buf *);/* callback function */ - void *cbArg; /* argument to callback function */ - RF_LockReqDesc_t *next; /* next element in chain */ - RF_LockReqDesc_t *templink; /* for making short-lived lists of request descriptors */ + RF_IoType_t type; /* read or write */ + RF_int64 start, stop; /* start and end of range to be locked */ + RF_int64 start2, stop2; /* start and end of 2nd range to be locked */ + void (*cbFunc) (struct buf *); /* callback function */ + void *cbArg; /* argument to callback function */ + RF_LockReqDesc_t *next; /* next element in chain */ + RF_LockReqDesc_t *templink; /* for making short-lived lists of + * request descriptors */ }; - #define RF_ASSERT_VALID_LOCKREQ(_lr_) { \ RF_ASSERT(RF_IO_IS_R_OR_W((_lr_)->type)); \ } struct RF_StripeLockDesc_s { - RF_StripeNum_t stripeID; /* the stripe ID */ - RF_LockReqDesc_t *granted; /* unordered list of granted requests */ - RF_LockReqDesc_t *waitersH; /* FIFO queue of all waiting reqs, both read and write (Head and Tail) */ - RF_LockReqDesc_t *waitersT; - int nWriters; /* number of writers either granted or waiting */ - RF_StripeLockDesc_t *next; /* for hash table collision resolution */ + RF_StripeNum_t stripeID;/* the stripe ID */ + RF_LockReqDesc_t *granted; /* unordered list of granted requests */ + RF_LockReqDesc_t *waitersH; /* FIFO queue of all waiting reqs, + * both read and write (Head and Tail) */ + RF_LockReqDesc_t *waitersT; + int nWriters; /* number of writers either granted or waiting */ + RF_StripeLockDesc_t *next; /* for hash table collision resolution */ }; struct RF_LockTableEntry_s { - RF_DECLARE_MUTEX(mutex) /* mutex on this hash chain */ - RF_StripeLockDesc_t *descList; /* hash chain of lock descriptors */ + RF_DECLARE_MUTEX(mutex) /* mutex on this hash chain */ + RF_StripeLockDesc_t *descList; /* hash chain of lock descriptors */ }; - /* * Initializes a stripe lock descriptor. _defSize is the number of sectors * that we lock when there is no parity information in the ASM (e.g. RAID0). @@ -157,14 +105,17 @@ struct RF_LockTableEntry_s { (_lrd).cbArg = (void *) (_cba); \ } -int rf_ConfigureStripeLockFreeList(RF_ShutdownList_t **listp); +int rf_ConfigureStripeLockFreeList(RF_ShutdownList_t ** listp); RF_LockTableEntry_t *rf_MakeLockTable(void); -void rf_ShutdownStripeLocks(RF_LockTableEntry_t *lockTable); -int rf_ConfigureStripeLocks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, - RF_Config_t *cfgPtr); -int rf_AcquireStripeLock(RF_LockTableEntry_t *lockTable, - RF_StripeNum_t stripeID, RF_LockReqDesc_t *lockReqDesc); -void rf_ReleaseStripeLock(RF_LockTableEntry_t *lockTable, - RF_StripeNum_t stripeID, RF_LockReqDesc_t *lockReqDesc); +void rf_ShutdownStripeLocks(RF_LockTableEntry_t * lockTable); +int +rf_ConfigureStripeLocks(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr); +int +rf_AcquireStripeLock(RF_LockTableEntry_t * lockTable, + RF_StripeNum_t stripeID, RF_LockReqDesc_t * lockReqDesc); +void +rf_ReleaseStripeLock(RF_LockTableEntry_t * lockTable, + RF_StripeNum_t stripeID, RF_LockReqDesc_t * lockReqDesc); -#endif /* !_RF__RF_STRIPELOCKS_H_ */ +#endif /* !_RF__RF_STRIPELOCKS_H_ */ diff --git a/sys/dev/raidframe/rf_strutils.c b/sys/dev/raidframe/rf_strutils.c index 1c42b6b6b56..c55e98a77bb 100644 --- a/sys/dev/raidframe/rf_strutils.c +++ b/sys/dev/raidframe/rf_strutils.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_strutils.c,v 1.1 1999/01/11 14:29:51 niklas Exp $ */ -/* $NetBSD: rf_strutils.c,v 1.1 1998/11/13 04:20:35 oster Exp $ */ +/* $OpenBSD: rf_strutils.c,v 1.2 1999/02/16 00:03:29 niklas Exp $ */ +/* $NetBSD: rf_strutils.c,v 1.3 1999/02/05 00:06:18 oster Exp $ */ /* * rf_strutils.c * @@ -36,27 +36,21 @@ * I put them in a file by themselves because they're needed in * setconfig, in the user-level driver, and in the kernel. * - * : - * Log: rf_strutils.c,v - * Revision 1.2 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * */ #include "rf_utils.h" /* finds a non-white character in the line */ -char *rf_find_non_white(char *p) +char * +rf_find_non_white(char *p) { - for (; *p != '\0' && (*p == ' ' || *p == '\t'); p++); - return(p); + for (; *p != '\0' && (*p == ' ' || *p == '\t'); p++); + return (p); } - /* finds a white character in the line */ -char *rf_find_white(char *p) +char * +rf_find_white(char *p) { - for (; *p != '\0' && (*p != ' ' && *p != '\t'); p++); - return(p); + for (; *p != '\0' && (*p != ' ' && *p != '\t'); p++); + return (p); } diff --git a/sys/dev/raidframe/rf_sys.c b/sys/dev/raidframe/rf_sys.c index e6eb17bb7ef..671fa46dcc9 100644 --- a/sys/dev/raidframe/rf_sys.c +++ b/sys/dev/raidframe/rf_sys.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_sys.c,v 1.1 1999/01/11 14:29:53 niklas Exp $ */ -/* $NetBSD: rf_sys.c,v 1.1 1998/11/13 04:20:35 oster Exp $ */ +/* $OpenBSD: rf_sys.c,v 1.2 1999/02/16 00:03:29 niklas Exp $ */ +/* $NetBSD: rf_sys.c,v 1.3 1999/02/05 00:06:18 oster Exp $ */ /* * rf_sys.c * @@ -32,229 +32,31 @@ * rights to redistribute these changes. */ -#ifdef _KERNEL -#define KERNEL -#endif - #include "rf_types.h" #include "rf_sys.h" -#ifndef KERNEL -#include <errno.h> -#include <fcntl.h> -#include <nlist.h> -#include <stdio.h> -#include <unistd.h> -#endif /* !KERNEL */ #include <sys/param.h> -#if !defined(sun) && !defined(__NetBSD__) && !defined(__OpenBSD__) && !defined(LINUX) && (!defined(MACH) || defined(__osf__)) -#include <sys/sysinfo.h> -#endif /* !sun && !__NetBSD__ && !__OpenBSD__ && !LINUX && (!MACH || __osf__) */ #include <sys/time.h> -#ifdef __osf__ -#include <machine/rpb.h> -#include <machine/hal/hal_sysinfo.h> -#endif /* __osf__ */ #include "rf_etimer.h" #include "rf_general.h" #include "rf_threadstuff.h" -#ifdef KERNEL extern struct rpb *rpb; -#endif /* KERNEL */ /* timer stuff */ -#ifdef __alpha -long rf_timer_max_val; -long rf_timer_ticks_per_second; -unsigned long rf_timer_ticks_per_usec; -#endif /* __alpha */ - - -#if defined(__NetBSD__) || defined(__OpenBSD__) -long rf_timer_max_val; -long rf_timer_ticks_per_second; +long rf_timer_max_val; +long rf_timer_ticks_per_second; unsigned long rf_timer_ticks_per_usec; -#endif /* __NetBSD__ || __OpenBSD__ */ -#if !defined(KERNEL) && !defined(SIMULATE) && (RF_UTILITY == 0) -pthread_attr_t raidframe_attr_default; - -int rf_thread_create( - RF_Thread_t *thread, - pthread_attr_t attr, - void (*func)(), - RF_ThreadArg_t arg) -{ - int rc; - -#ifdef __osf__ - rc = pthread_create(thread, attr, (pthread_startroutine_t)func, arg); -#endif /* __osf__ */ -#ifdef AIX - rc = pthread_create(thread, &attr, (void *(*)(void *))func, arg); -#endif /* AIX */ - if (rc) - return(errno); - rc = pthread_detach(thread); - if (rc) { - /* don't return error, because the thread exists, and must be cleaned up */ - RF_ERRORMSG1("RAIDFRAME WARNING: failed detaching thread %lx\n", thread); - } - return(0); -} -#endif /* !KERNEL && !SIMULATE && (RF_UTILITY == 0) */ -#if defined(__osf__) && !defined(KERNEL) -int rf_get_cpu_ticks_per_sec(long *ticksp) +int +rf_ConfigureEtimer(listp) + RF_ShutdownList_t **listp; { - char *kmemdevname, buf[sizeof(struct rpb)+8]; - char *memdevname, kernel_name[MAXPATHLEN+1]; - struct nlist nl[2], *np; - unsigned long rpb_addr; - int kfd, rc, fd, bad; - struct rpb rpb; - off_t off; - - kmemdevname = "/dev/kmem"; - memdevname = "/dev/mem"; - - np = &nl[0]; - bzero((char *)np, sizeof(nl)); - nl[0].n_name = "pmap_physhwrpb"; - nl[1].n_name = NULL; - - bad = 0; - - /* get running kernel name */ - bzero(kernel_name, MAXPATHLEN+1); - kernel_name[0] = '/'; - rc = getsysinfo(GSI_BOOTEDFILE, &kernel_name[1], MAXPATHLEN, 0, 0); - if (rc != 1) { - RF_ERRORMSG("RAIDFRAME: cannot get booted kernel name\n"); - if (errno) - return(errno); - else - return(EIO); - } - - rc = nlist(kernel_name, np); - if (rc) { - RF_ERRORMSG1("RAIDFRAME: cannot nlist %s\n", kernel_name); - return(EIO); - } - - if (np->n_type == 0) { - RF_ERRORMSG1("RAIDFRAME: cannot usefully nlist %s\n", kernel_name); - return(EIO); - } - - kfd = open(kmemdevname, O_RDONLY); - if (kfd < 0) { - perror(kmemdevname); - return(errno); - } - fd = open(memdevname, O_RDONLY); - if (fd < 0) { - perror(kmemdevname); - return(errno); - } - - /* - * pmap_physhwrpb is a variable in the kernel containing the physical - * address of the hardware RPB. We'll just find that variable and - * read it, then use that as a physical memory address to read the - * rpb itself. - */ - - off = lseek(kfd, np->n_value, SEEK_SET); - if (off != np->n_value) { - RF_ERRORMSG("RAIDFRAME: cannot seek to address of hwrpb addr\n"); - return(EIO); - } - - rc = read(kfd, &rpb_addr, sizeof(rpb_addr)); - if (rc != sizeof(rpb_addr)) { - RF_ERRORMSG("RAIDFRAME: cannot read address of hwrpb addr\n"); - if (rc < 0) - bad = errno; - bad = EIO; - goto isbad; - } - - off = lseek(fd, rpb_addr, SEEK_SET); - if (off != rpb_addr) { - RF_ERRORMSG("RAIDFRAME: cannot seek to rpb addr\n"); - bad = EIO; - goto isbad; - } - - rc = read(fd, &rpb, sizeof(rpb)); - if (rc != sizeof(rpb)) { - RF_ERRORMSG1("RAIDFRAME: cannot read rpb (rc=%d)\n", rc); - if (rc < 0) - bad = errno; - bad = EIO; - goto isbad; - } - - /* - * One extra sanity check: the RPB is self-identifying. - * This field is guaranteed to have the value - * 0x0000004250525748, always. - */ - if (rpb.rpb_string != 0x0000004250525748) { - bad = EIO; - goto isbad; - } - -isbad: - if (bad) { - RF_ERRORMSG("ERROR: rpb failed validation\n"); - RF_ERRORMSG1("RAIDFRAME: perhaps %s has changed since booting?\n", - kernel_name); - return(bad); - } - - *ticksp = rpb.rpb_counter; - - close(kfd); - close(fd); - - return(0); -} -#endif /* __osf__ && !KERNEL */ - -int rf_ConfigureEtimer(listp) - RF_ShutdownList_t **listp; -{ -#ifdef __osf__ - int rc; - -#ifdef KERNEL - rf_timer_ticks_per_second = rpb->rpb_counter; -#else /* KERNEL */ - rc = rf_get_cpu_ticks_per_sec(&rf_timer_ticks_per_second); - if (rc) - return(rc); -#endif /* KERNEL */ - rf_timer_max_val = RF_DEF_TIMER_MAX_VAL; - rf_timer_ticks_per_usec = rf_timer_ticks_per_second/1000000; -#endif /* __osf__ */ -#if defined(NETBSD_ALPHA) || defined(OPENBSD_ALPHA) - /* - * XXX cgd fix this - */ - rf_timer_ticks_per_second = 233100233; - rf_timer_max_val = RF_DEF_TIMER_MAX_VAL; - rf_timer_ticks_per_usec = rf_timer_ticks_per_second/1000000; -#endif /* NETBSD_ALPHA || OPENBSD_ALPHA */ -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) - /* XXX just picking some random values to keep things happy... without these - set, stuff will panic on division by zero errors!! */ - rf_timer_ticks_per_second = 233100233; - rf_timer_max_val = RF_DEF_TIMER_MAX_VAL; - rf_timer_ticks_per_usec = rf_timer_ticks_per_second/1000000; + /* XXX just picking some random values to keep things happy... without + * these set, stuff will panic on division by zero errors!! */ + rf_timer_ticks_per_second = 233100233; + rf_timer_max_val = RF_DEF_TIMER_MAX_VAL; + rf_timer_ticks_per_usec = rf_timer_ticks_per_second / 1000000; -#endif - return(0); + return (0); } diff --git a/sys/dev/raidframe/rf_sys.h b/sys/dev/raidframe/rf_sys.h index f9606708c2e..72a9bf65d05 100644 --- a/sys/dev/raidframe/rf_sys.h +++ b/sys/dev/raidframe/rf_sys.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_sys.h,v 1.1 1999/01/11 14:29:53 niklas Exp $ */ -/* $NetBSD: rf_sys.h,v 1.1 1998/11/13 04:20:35 oster Exp $ */ +/* $OpenBSD: rf_sys.h,v 1.2 1999/02/16 00:03:31 niklas Exp $ */ +/* $NetBSD: rf_sys.h,v 1.3 1999/02/05 00:06:18 oster Exp $ */ /* * rf_sys.h * @@ -37,33 +37,6 @@ #include "rf_types.h" -int rf_ConfigureEtimer(RF_ShutdownList_t **listp); +int rf_ConfigureEtimer(RF_ShutdownList_t ** listp); -#if defined(__osf__) && !defined(KERNEL) -int rf_get_cpu_ticks_per_sec(long *ticksp); -#endif /* __osf__ && !KERNEL */ - -#ifdef AIX -#include <nlist.h> -#include <sys/time.h> -#if RF_AIXVers == 3 -int gettimeofday(struct timeval *tp, struct timezone *tzp); -#endif /* RF_AIXVers == 3 */ -int knlist(struct nlist *namelist, int nel, int size); -int ffs(int index); -#endif /* AIX */ - -#ifdef sun -#define bcopy(a,b,n) memcpy(b,a,n) -#define bzero(b,n) memset(b,0,n) -#define bcmp(a,b,n) memcmp(a,b,n) -#endif /* sun */ - -#ifdef __GNUC__ -/* we use gcc -Wall to check our anal-retentiveness level, occasionally */ -#if defined(DEC_OSF) && !defined(KERNEL) -extern int ioctl(int fd, int req, ...); -#endif /* DEC_OSF && !KERNEL */ -#endif /* __GNUC__ */ - -#endif /* !_RF__RF_SYS_H_ */ +#endif /* !_RF__RF_SYS_H_ */ diff --git a/sys/dev/raidframe/rf_threadid.h b/sys/dev/raidframe/rf_threadid.h index ef77020b554..39a5d26150d 100644 --- a/sys/dev/raidframe/rf_threadid.h +++ b/sys/dev/raidframe/rf_threadid.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_threadid.h,v 1.1 1999/01/11 14:29:53 niklas Exp $ */ -/* $NetBSD: rf_threadid.h,v 1.1 1998/11/13 04:20:35 oster Exp $ */ +/* $OpenBSD: rf_threadid.h,v 1.2 1999/02/16 00:03:31 niklas Exp $ */ +/* $NetBSD: rf_threadid.h,v 1.3 1999/02/05 00:06:18 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -38,193 +38,18 @@ * none of this is used in the kernel, so it all gets compiled out if KERNEL is defined */ -/* : - * Log: rf_threadid.h,v - * Revision 1.17 1996/08/12 20:11:17 jimz - * fix up for AIX4 - * - * Revision 1.16 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.15 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.14 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.13 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.12 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.11 1996/05/20 16:13:46 jimz - * switch to rf_{mutex,cond}_{init,destroy} - * - * Revision 1.10 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.9 1996/05/17 13:29:06 jimz - * did a dance on get_threadid such that it will do the pthread_attr_t -> int - * assignment without warnings, even on really anal compilers - * - * Revision 1.8 1995/12/06 15:15:00 root - * added copyright info - * - */ - #ifndef _RF__RF_THREADID_H_ #define _RF__RF_THREADID_H_ -#ifdef _KERNEL -#define KERNEL -#endif - -#ifndef SIMULATE -#ifndef KERNEL - -/* - * User - */ - -#include "rf_threadstuff.h" - -extern int rf_numThrsRegistered; -extern pthread_key_t rf_thread_id_key; -RF_DECLARE_EXTERN_MUTEX(rf_threadid_mutex) - -#define RF_THREAD_MAX 200 - -/* these should be global since a function is declared. Should be invoked at only one place in code */ -#define RF_DECLARE_GLOBAL_THREADID \ - int rf_numThrsRegistered = 0; \ - pthread_key_t rf_thread_id_key; \ - RF_DECLARE_MUTEX(rf_threadid_mutex) \ - RF_Thread_t rf_regdThrs[RF_THREAD_MAX]; \ - void rf_ThreadIdEmptyFunc() {} - -/* setup must be called exactly once, i.e. it can't be called by each thread */ - -#ifdef AIX -typedef void (*pthread_destructor_t)(void *); -#endif /* AIX */ - -#ifdef __osf__ -#define rf_setup_threadid() { \ - extern void rf_ThreadIdEmptyFunc(); \ - pthread_keycreate(&rf_thread_id_key, (pthread_destructor_t) rf_ThreadIdEmptyFunc); \ - rf_mutex_init(&rf_threadid_mutex); /* XXX check return val */ \ - rf_numThrsRegistered = 0; \ -} -#endif /* __osf__ */ - -#ifdef AIX -#define rf_setup_threadid() { \ - extern void rf_ThreadIdEmptyFunc(); \ - pthread_key_create(&rf_thread_id_key, (pthread_destructor_t) rf_ThreadIdEmptyFunc); \ - rf_mutex_init(&rf_threadid_mutex); /* XXX check return val */ \ - rf_numThrsRegistered = 0; \ -} -#endif /* AIX */ - -#define rf_shutdown_threadid() { \ - rf_mutex_destroy(&rf_threadid_mutex); \ -} - -#ifdef __osf__ -typedef pthread_addr_t RF_THID_cast_t; -#endif /* __osf__ */ - -#ifdef AIX -typedef void *RF_THID_cast_t; -#endif /* AIX */ - -#define rf_assign_threadid() {RF_LOCK_MUTEX(rf_threadid_mutex); \ - if (pthread_setspecific(rf_thread_id_key, (RF_THID_cast_t) ((unsigned long)(rf_numThrsRegistered++)))) { RF_PANIC(); } \ - RF_UNLOCK_MUTEX(rf_threadid_mutex);} - -#ifdef __osf__ -#define rf_get_threadid(_id_) { \ - RF_THID_cast_t _val; \ - unsigned long _val2; \ - if (pthread_getspecific(rf_thread_id_key, &_val)) \ - RF_PANIC(); \ - (_val2) = (unsigned long)_val; \ - (_id_) = (int)_val2; \ -} -#endif /* __osf__ */ - -#ifdef AIX -#define rf_get_threadid(_id_) { \ - RF_THID_cast_t _val; \ - unsigned long _val2; \ - _val = pthread_getspecific(rf_thread_id_key); \ - (_val2) = (unsigned long)_val; \ - (_id_) = (int)_val2; \ -} -#endif /* AIX */ - -#else /* KERNEL */ - /* * Kernel */ -#if !defined(__NetBSD__) && !defined(__OpenBSD__) -#include <kern/task.h> -#include <kern/thread.h> -#include <mach/machine/vm_param.h> -#endif - #define RF_DECLARE_GLOBAL_THREADID #define rf_setup_threadid() #define rf_shutdown_threadid() #define rf_assign_threadid() - - -#if defined(__NetBSD__) || defined(__OpenBSD__) - #define rf_get_threadid(_id_) _id_ = 0; -#else -#define rf_get_threadid(_id_) { \ - thread_t thread = current_thread(); \ - _id_ = (int)(((thread->thread_self)>>(8*sizeof(int *)))&0x0fffffff); \ -} -#endif /* __NetBSD__ || __OpenBSD__ */ -#endif /* KERNEL */ - -#else /* SIMULATE */ - -/* - * Simulator - */ - -#include "rf_diskevent.h" - -#define RF_DECLARE_GLOBAL_THREADID -#define rf_setup_threadid() -#define rf_shutdown_threadid() -#define rf_assign_threadid() - -#define rf_get_threadid(_id_) _id_ = rf_GetCurrentOwner() - -#endif /* SIMULATE */ -#endif /* !_RF__RF_THREADID_H_ */ +#endif /* !_RF__RF_THREADID_H_ */ diff --git a/sys/dev/raidframe/rf_threadstuff.c b/sys/dev/raidframe/rf_threadstuff.c index 0de5f36c679..55dd660f6e5 100644 --- a/sys/dev/raidframe/rf_threadstuff.c +++ b/sys/dev/raidframe/rf_threadstuff.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_threadstuff.c,v 1.1 1999/01/11 14:29:53 niklas Exp $ */ -/* $NetBSD: rf_threadstuff.c,v 1.1 1998/11/13 04:20:35 oster Exp $ */ +/* $OpenBSD: rf_threadstuff.c,v 1.2 1999/02/16 00:03:32 niklas Exp $ */ +/* $NetBSD: rf_threadstuff.c,v 1.3 1999/02/05 00:06:18 oster Exp $ */ /* * rf_threadstuff.c */ @@ -30,11 +30,6 @@ * rights to redistribute these changes. */ -#ifdef _KERNEL -#define KERNEL -#endif - - #include "rf_types.h" #include "rf_threadstuff.h" #include "rf_general.h" @@ -42,16 +37,17 @@ static void mutex_destroyer(void *); static void cond_destroyer(void *); -void thread_wakeup(void *); +void thread_wakeup(void *); /* * Shared stuff */ -static void mutex_destroyer(arg) - void *arg; +static void +mutex_destroyer(arg) + void *arg; { - int rc; + int rc; rc = rf_mutex_destroy(arg); if (rc) { @@ -59,10 +55,11 @@ static void mutex_destroyer(arg) } } -static void cond_destroyer(arg) - void *arg; +static void +cond_destroyer(arg) + void *arg; { - int rc; + int rc; rc = rf_cond_destroy(arg); if (rc) { @@ -70,18 +67,19 @@ static void cond_destroyer(arg) } } -int _rf_create_managed_mutex(listp, m, file, line) - RF_ShutdownList_t **listp; - RF_DECLARE_MUTEX(*m) - char *file; - int line; +int +_rf_create_managed_mutex(listp, m, file, line) + RF_ShutdownList_t **listp; +RF_DECLARE_MUTEX(*m) + char *file; + int line; { - int rc, rc1; + int rc, rc1; rc = rf_mutex_init(m); if (rc) - return(rc); - rc = _rf_ShutdownCreate(listp, mutex_destroyer, (void *)m, file, line); + return (rc); + rc = _rf_ShutdownCreate(listp, mutex_destroyer, (void *) m, file, line); if (rc) { RF_ERRORMSG1("RAIDFRAME: Error %d adding shutdown entry\n", rc); rc1 = rf_mutex_destroy(m); @@ -89,21 +87,22 @@ int _rf_create_managed_mutex(listp, m, file, line) RF_ERRORMSG1("RAIDFRAME: Error %d destroying mutex\n", rc1); } } - return(rc); + return (rc); } -int _rf_create_managed_cond(listp, c, file, line) - RF_ShutdownList_t **listp; - RF_DECLARE_COND(*c) - char *file; - int line; +int +_rf_create_managed_cond(listp, c, file, line) + RF_ShutdownList_t **listp; +RF_DECLARE_COND(*c) + char *file; + int line; { - int rc, rc1; + int rc, rc1; rc = rf_cond_init(c); if (rc) - return(rc); - rc = _rf_ShutdownCreate(listp, cond_destroyer, (void *)c, file, line); + return (rc); + rc = _rf_ShutdownCreate(listp, cond_destroyer, (void *) c, file, line); if (rc) { RF_ERRORMSG1("RAIDFRAME: Error %d adding shutdown entry\n", rc); rc1 = rf_cond_destroy(c); @@ -111,367 +110,109 @@ int _rf_create_managed_cond(listp, c, file, line) RF_ERRORMSG1("RAIDFRAME: Error %d destroying cond\n", rc1); } } - return(rc); + return (rc); } -int _rf_init_managed_threadgroup(listp, g, file, line) - RF_ShutdownList_t **listp; - RF_ThreadGroup_t *g; - char *file; - int line; +int +_rf_init_managed_threadgroup(listp, g, file, line) + RF_ShutdownList_t **listp; + RF_ThreadGroup_t *g; + char *file; + int line; { - int rc; + int rc; rc = _rf_create_managed_mutex(listp, &g->mutex, file, line); if (rc) - return(rc); + return (rc); rc = _rf_create_managed_cond(listp, &g->cond, file, line); if (rc) - return(rc); + return (rc); g->created = g->running = g->shutdown = 0; - return(0); + return (0); } -int _rf_destroy_threadgroup(g, file, line) - RF_ThreadGroup_t *g; - char *file; - int line; +int +_rf_destroy_threadgroup(g, file, line) + RF_ThreadGroup_t *g; + char *file; + int line; { - int rc1, rc2; + int rc1, rc2; #if RF_DEBUG_ATOMIC > 0 - rc1 = _rf_mutex_destroy(&g->mutex, file, line); - rc2 = _rf_cond_destroy(&g->cond, file, line); -#else /* RF_DEBUG_ATOMIC > 0 */ - rc1 = rf_mutex_destroy(&g->mutex); - rc2 = rf_cond_destroy(&g->cond); -#endif /* RF_DEBUG_ATOMIC > 0 */ - if (rc1) - return(rc1); - return(rc2); + rc1 = _rf_mutex_destroy(&g->mutex, file, line); + rc2 = _rf_cond_destroy(&g->cond, file, line); +#else /* RF_DEBUG_ATOMIC > 0 */ + rc1 = rf_mutex_destroy(&g->mutex); + rc2 = rf_cond_destroy(&g->cond); +#endif /* RF_DEBUG_ATOMIC > 0 */ + if (rc1) + return (rc1); + return (rc2); } -int _rf_init_threadgroup(g, file, line) - RF_ThreadGroup_t *g; - char *file; - int line; +int +_rf_init_threadgroup(g, file, line) + RF_ThreadGroup_t *g; + char *file; + int line; { - int rc; + int rc; #if RF_DEBUG_ATOMIC > 0 - rc = _rf_mutex_init(&g->mutex, file, line); - if (rc) - return(rc); - rc = _rf_cond_init(&g->cond, file, line); - if (rc) { - _rf_mutex_destroy(&g->mutex, file, line); - return(rc); - } -#else /* RF_DEBUG_ATOMIC > 0 */ - rc = rf_mutex_init(&g->mutex); - if (rc) - return(rc); - rc = rf_cond_init(&g->cond); - if (rc) { - rf_mutex_destroy(&g->mutex); - return(rc); - } -#endif /* RF_DEBUG_ATOMIC > 0 */ - g->created = g->running = g->shutdown = 0; - return(0); -} - -/* - * User - */ - -#if !defined(KERNEL) && !defined(SIMULATE) - -#if RF_DEBUG_ATOMIC > 0 - -static RF_ATEnt_t rf_atent_list; -static RF_ATEnt_t *rf_atent_done_list=NULL; - -static pthread_mutex_t rf_atent_mutex; - -void rf_atent_init() -{ - int rc; - - rc = pthread_mutex_init(&rf_atent_mutex, pthread_mutexattr_default); - if (rc) { - fprintf(stderr, "ERROR: rc=%d creating rf_atent_mutex\n", rc); - fflush(stderr); - RF_PANIC(); - } - rf_atent_list.next = rf_atent_list.prev = &rf_atent_list; -} - -#define ATENT_TYPE(_e_) ((((_e_)->type == 0)||((_e_)->type > 2)) ? 0 : (_e_)->type) -#define ATENT_OTYPE(_e_) ((((_e_)->otype == 0)||((_e_)->otype > 2)) ? 0 : (_e_)->otype) - -void rf_atent_shutdown() -{ - int rc, num_freed[3], num_not_freed[3]; - RF_ATEnt_t *r, *n; - - num_freed[0] = num_freed[1] = num_freed[2] = 0; - num_not_freed[0] = num_not_freed[1] = num_not_freed[2] = 0; - printf("rf_atent_shutdown:\n"); - for(r=rf_atent_list.next;r!=&rf_atent_list;r=r->next) { - printf("r=%lx type=%d file=%s line=%d\n", r, r->type, r->file, r->line); - num_not_freed[ATENT_TYPE(r)]++; - } - rc = pthread_mutex_destroy(&rf_atent_mutex); - if (rc) { - fprintf(stderr, "ERROR: rc=%d destroying rf_atent_mutex\n", rc); - fflush(stderr); - RF_PANIC(); - } - for(r=rf_atent_done_list;r;r=n) { - n = r->next; - num_freed[ATENT_OTYPE(r)]++; - free(r); - } - printf("%d mutexes not freed %d conditions not freed %d bogus not freed\n", - num_not_freed[1], num_not_freed[2], num_not_freed[0]); - printf("%d mutexes freed %d conditions freed %d bogus freed\n", - num_freed[1], num_freed[2], num_freed[0]); - fflush(stdout); - fflush(stderr); -} - -static RF_ATEnt_t *AllocATEnt(file,line) - char *file; - int line; -{ - RF_ATEnt_t *t; - - t = (RF_ATEnt_t *)malloc(sizeof(RF_ATEnt_t)); - if (t == NULL) { - RF_PANIC(); - } - t->file = file; - t->line = line; - t->type = 0; - return(t); -} - -static void FreeATEnt(t) - RF_ATEnt_t *t; -{ - t->otype = t->type; - t->type = 0; - t->next = rf_atent_done_list; - rf_atent_done_list = t; -} - -int _rf_mutex_init(m, file, line) - RF_ATEnt_t **m; - char *file; - int line; -{ - RF_ATEnt_t *a; - int rc; - - a = AllocATEnt(file,line); - rc = pthread_mutex_init(&a->m, pthread_mutexattr_default); - if (rc == 0) { - pthread_mutex_lock(&rf_atent_mutex); - a->next = rf_atent_list.next; - a->prev = &rf_atent_list; - a->type = RF_ATENT_M; - a->next->prev = a; - a->prev->next = a; - pthread_mutex_unlock(&rf_atent_mutex); - } - else { - fprintf(stderr, "ERROR: rc=%d allocating mutex %s:%d\n", - rc, file, line); - fflush(stderr); - RF_PANIC(); - } - *m = a; - return(0); -} - -int _rf_mutex_destroy(m, file, line) - RF_ATEnt_t **m; - char *file; - int line; -{ - RF_ATEnt_t *r; - int rc; - - r = *m; - rc = pthread_mutex_destroy(&r->m); + rc = _rf_mutex_init(&g->mutex, file, line); + if (rc) + return (rc); + rc = _rf_cond_init(&g->cond, file, line); if (rc) { - fprintf(stderr, "ERROR: rc=%d destroying mutex %s:%d\n", - rc, file, line); - fflush(stderr); - RF_PANIC(); - } - pthread_mutex_lock(&rf_atent_mutex); - r->next->prev = r->prev; - r->prev->next = r->next; - FreeATEnt(r); - pthread_mutex_unlock(&rf_atent_mutex); - *m = NULL; - return(0); -} - -int _rf_cond_init(c, file, line) - RF_ATEnt_t **c; - char *file; - int line; -{ - RF_ATEnt_t *a; - int rc; - - a = AllocATEnt(file,line); - rc = pthread_cond_init(&a->c, pthread_condattr_default); - if (rc == 0) { - pthread_mutex_lock(&rf_atent_mutex); - a->next = rf_atent_list.next; - a->prev = &rf_atent_list; - a->next->prev = a; - a->prev->next = a; - a->type = RF_ATENT_C; - pthread_mutex_unlock(&rf_atent_mutex); + _rf_mutex_destroy(&g->mutex, file, line); + return (rc); } - else { - fprintf(stderr, "ERROR: rc=%d allocating cond %s:%d\n", - rc, file, line); - fflush(stderr); - RF_PANIC(); - } - *c = a; - return(0); -} - -int _rf_cond_destroy(c, file, line) - RF_ATEnt_t **c; - char *file; - int line; -{ - RF_ATEnt_t *r; - int rc; - - r = *c; - rc = pthread_cond_destroy(&r->c); +#else /* RF_DEBUG_ATOMIC > 0 */ + rc = rf_mutex_init(&g->mutex); + if (rc) + return (rc); + rc = rf_cond_init(&g->cond); if (rc) { - fprintf(stderr, "ERROR: rc=%d destroying cond %s:%d\n", - rc, file, line); - fflush(stderr); - RF_PANIC(); + rf_mutex_destroy(&g->mutex); + return (rc); } - pthread_mutex_lock(&rf_atent_mutex); - r->next->prev = r->prev; - r->prev->next = r->next; - FreeATEnt(r); - pthread_mutex_unlock(&rf_atent_mutex); - *c = NULL; - return(0); -} - -#else /* RF_DEBUG_ATOMIC > 0 */ - -int rf_mutex_init(m) - pthread_mutex_t *m; -{ -#ifdef __osf__ - return(pthread_mutex_init(m, pthread_mutexattr_default)); -#endif /* __osf__ */ -#ifdef AIX - return(pthread_mutex_init(m, &pthread_mutexattr_default)); -#endif /* AIX */ -} - -int rf_mutex_destroy(m) - pthread_mutex_t *m; -{ - return(pthread_mutex_destroy(m)); -} - -int rf_cond_init(c) - pthread_cond_t *c; -{ -#ifdef __osf__ - return(pthread_cond_init(c, pthread_condattr_default)); -#endif /* __osf__ */ -#ifdef AIX - return(pthread_cond_init(c, &pthread_condattr_default)); -#endif /* AIX */ -} - -int rf_cond_destroy(c) - pthread_cond_t *c; -{ - return(pthread_cond_destroy(c)); +#endif /* RF_DEBUG_ATOMIC > 0 */ + g->created = g->running = g->shutdown = 0; + return (0); } -#endif /* RF_DEBUG_ATOMIC > 0 */ - -#endif /* !KERNEL && !SIMULATE */ /* * Kernel */ -#ifdef KERNEL -int rf_mutex_init(m) - decl_simple_lock_data(,*m) +int +rf_mutex_init(m) +decl_simple_lock_data(, *m) { simple_lock_init(m); - return(0); -} - -int rf_mutex_destroy(m) - decl_simple_lock_data(,*m) -{ - return(0); -} - -int rf_cond_init(c) - RF_DECLARE_COND(*c) -{ - *c = 0; /* no reason */ - return(0); -} - -int rf_cond_destroy(c) - RF_DECLARE_COND(*c) -{ - return(0); -} - - -#endif /* KERNEL */ - -/* - * Simulator - */ -#ifdef SIMULATE -int rf_mutex_init(m) - RF_DECLARE_MUTEX(*m) -{ - return(0); + return (0); } -int rf_mutex_destroy(m) - RF_DECLARE_MUTEX(*m) +int +rf_mutex_destroy(m) +decl_simple_lock_data(, *m) { - return(0); + return (0); } -int rf_cond_init(c) - RF_DECLARE_COND(*c) +int +rf_cond_init(c) +RF_DECLARE_COND(*c) { - return(0); + *c = 0; /* no reason */ + return (0); } -int rf_cond_destroy(c) - RF_DECLARE_COND(*c) +int +rf_cond_destroy(c) +RF_DECLARE_COND(*c) { - return(0); + return (0); } -#endif /* SIMULATE */ diff --git a/sys/dev/raidframe/rf_threadstuff.h b/sys/dev/raidframe/rf_threadstuff.h index 1437b2b0edf..bc44ef348c4 100644 --- a/sys/dev/raidframe/rf_threadstuff.h +++ b/sys/dev/raidframe/rf_threadstuff.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_threadstuff.h,v 1.1 1999/01/11 14:29:54 niklas Exp $ */ -/* $NetBSD: rf_threadstuff.h,v 1.1 1998/11/13 04:20:35 oster Exp $ */ +/* $OpenBSD: rf_threadstuff.h,v 1.2 1999/02/16 00:03:32 niklas Exp $ */ +/* $NetBSD: rf_threadstuff.h,v 1.3 1999/02/05 00:06:18 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -41,98 +41,17 @@ * */ -/* : - * Log: rf_threadstuff.h,v - * Revision 1.38 1996/08/12 22:37:47 jimz - * add AIX stuff for user driver - * - * Revision 1.37 1996/08/11 00:47:09 jimz - * make AIX friendly - * - * Revision 1.36 1996/07/23 22:06:59 jimz - * add rf_destroy_threadgroup - * - * Revision 1.35 1996/07/23 21:31:16 jimz - * add init_threadgroup - * - * Revision 1.34 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.33 1996/07/15 17:22:18 jimz - * nit-pick code cleanup - * resolve stdlib problems on DEC OSF - * - * Revision 1.32 1996/06/17 03:01:11 jimz - * get rid of JOIN stuff - * - * Revision 1.31 1996/06/14 23:15:38 jimz - * attempt to deal with thread GC problem - * - * Revision 1.30 1996/06/11 18:12:36 jimz - * get rid of JOIN operations - * use ThreadGroup stuff instead - * fix some allocation/deallocation and sync bugs - * - * Revision 1.29 1996/06/11 13:48:10 jimz - * make kernel RF_THREAD_CREATE give back happier return vals - * - * Revision 1.28 1996/06/10 16:40:01 jimz - * break user-level stuff out into lib+apps - * - * Revision 1.27 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.26 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.25 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.24 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.23 1996/05/20 19:31:54 jimz - * add atomic debug (mutex and cond leak finder) stuff - * - * Revision 1.22 1996/05/20 16:24:49 jimz - * get happy in simulator - * - * Revision 1.21 1996/05/20 16:15:07 jimz - * switch to rf_{mutex,cond}_{init,destroy} - * - * Revision 1.20 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.19 1996/05/09 17:16:53 jimz - * correct arg to JOIN_THREAD - * - * Revision 1.18 1995/12/12 18:10:06 jimz - * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT - * fix 80-column brain damage in comments - * - * Revision 1.17 1995/12/06 15:15:21 root - * added copyright info - * - */ - #ifndef _RF__RF_THREADSTUFF_H_ #define _RF__RF_THREADSTUFF_H_ #include "rf_types.h" +#include <sys/types.h> +#include <sys/param.h> +#ifdef _KERNEL +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/kthread.h> +#endif #define rf_create_managed_mutex(a,b) _rf_create_managed_mutex(a,b,__FILE__,__LINE__) #define rf_create_managed_cond(a,b) _rf_create_managed_cond(a,b,__FILE__,__LINE__) @@ -140,152 +59,17 @@ #define rf_init_threadgroup(a) _rf_init_threadgroup(a,__FILE__,__LINE__) #define rf_destroy_threadgroup(a) _rf_destroy_threadgroup(a,__FILE__,__LINE__) -int _rf_init_threadgroup(RF_ThreadGroup_t *g, char *file, int line); -int _rf_destroy_threadgroup(RF_ThreadGroup_t *g, char *file, int line); -int _rf_init_managed_threadgroup(RF_ShutdownList_t **listp, - RF_ThreadGroup_t *g, char *file, int line); - -#ifndef SIMULATE /* will null all this calls */ -#ifndef KERNEL - -#if defined(__osf__) || defined(AIX) -#include <pthread.h> -#endif /* __osf__ || AIX */ - -#define RF_DEBUG_ATOMIC 0 - -#if RF_DEBUG_ATOMIC > 0 -#define RF_ATENT_M 1 -#define RF_ATENT_C 2 -typedef struct RF_ATEnt_s RF_ATEnt_t; -struct RF_ATEnt_s { - char *file; - int line; - pthread_mutex_t m; - pthread_cond_t c; - int type; - int otype; - RF_ATEnt_t *next; - RF_ATEnt_t *prev; -}; - -#define RF_DECLARE_MUTEX(_m_) RF_ATEnt_t *_m_; -#define RF_DECLARE_STATIC_MUTEX(_m_) static RF_ATEnt_t *_m_; -#define RF_DECLARE_EXTERN_MUTEX(_m_) extern RF_ATEnt_t *_m_; -#define RF_DECLARE_COND(_c_) RF_ATEnt_t *_c_; -#define RF_DECLARE_STATIC_COND(_c_) static RF_ATEnt_t *_c_; -#define RF_DECLARE_EXTERN_COND(_c_) extern RF_ATEnt_t *_c_; - -int _rf_mutex_init(RF_ATEnt_t **m, char *file, int line); -int _rf_mutex_destroy(RF_ATEnt_t **m, char *file, int line); -int _rf_cond_init(RF_ATEnt_t **c, char *file, int line); -int _rf_cond_destroy(RF_ATEnt_t **c, char *file, int line); -void rf_atent_init(void); -void rf_atent_shutdown(void); - -#define rf_mutex_init(_m_) _rf_mutex_init(_m_,__FILE__,__LINE__) -#define rf_mutex_destroy(_m_) _rf_mutex_destroy(_m_,__FILE__,__LINE__) -#define rf_cond_init(_m_) _rf_cond_init(_m_,__FILE__,__LINE__) -#define rf_cond_destroy(_m_) _rf_cond_destroy(_m_,__FILE__,__LINE__) - -#define RF_LOCK_MUTEX(_a_) {RF_ASSERT((_a_)->type == RF_ATENT_M); pthread_mutex_lock(&((_a_)->m));} -#define RF_UNLOCK_MUTEX(_a_) {RF_ASSERT((_a_)->type == RF_ATENT_M); pthread_mutex_unlock(&((_a_)->m));} - -#define RF_WAIT_COND(_c_,_m_) { \ - RF_ASSERT((_c_)->type == RF_ATENT_C); \ - RF_ASSERT((_m_)->type == RF_ATENT_M); \ - pthread_cond_wait( &((_c_)->c), &((_m_)->m) ); \ -} -#define RF_SIGNAL_COND(_c_) {RF_ASSERT((_c_)->type == RF_ATENT_C); pthread_cond_signal( &((_c_)->c));} -#define RF_BROADCAST_COND(_c_) {RF_ASSERT((_c_)->type == RF_ATENT_C); pthread_cond_broadcast(&((_c_)->c));} +int _rf_init_threadgroup(RF_ThreadGroup_t * g, char *file, int line); +int _rf_destroy_threadgroup(RF_ThreadGroup_t * g, char *file, int line); +int +_rf_init_managed_threadgroup(RF_ShutdownList_t ** listp, + RF_ThreadGroup_t * g, char *file, int line); -#else /* RF_DEBUG_ATOMIC > 0 */ - -/* defining these as macros allows us to NULL them out in the kernel */ -#define RF_DECLARE_MUTEX(_m_) pthread_mutex_t _m_; -#define RF_DECLARE_STATIC_MUTEX(_m_) static pthread_mutex_t _m_; -#define RF_DECLARE_EXTERN_MUTEX(_m_) extern pthread_mutex_t _m_; -#define RF_DECLARE_COND(_c_) pthread_cond_t _c_; -#define RF_DECLARE_STATIC_COND(_c_) static pthread_cond_t _c_; -#define RF_DECLARE_EXTERN_COND(_c_) extern pthread_cond_t _c_; - -int rf_mutex_init(pthread_mutex_t *m); -int rf_mutex_destroy(pthread_mutex_t *m); -int rf_cond_init(pthread_cond_t *c); -int rf_cond_destroy(pthread_cond_t *c); - -#define RF_LOCK_MUTEX(_m_) {pthread_mutex_lock(&(_m_));} -#define RF_UNLOCK_MUTEX(_m_) pthread_mutex_unlock(&(_m_)) - -#define RF_WAIT_COND(_c_,_m_) pthread_cond_wait( &(_c_), &(_m_) ) -#define RF_SIGNAL_COND(_c_) pthread_cond_signal( &(_c_) ) -#define RF_BROADCAST_COND(_c_) pthread_cond_broadcast(&(_c_)) - -#endif /* RF_DEBUG_ATOMIC > 0 */ - -int _rf_create_managed_mutex(RF_ShutdownList_t **listp, pthread_mutex_t *m, char *file, int line); -int _rf_create_managed_cond(RF_ShutdownList_t **listp, pthread_cond_t *c, char *file, int line); - -typedef pthread_t RF_Thread_t; -#ifdef __osf__ -typedef pthread_addr_t RF_ThreadArg_t; /* the argument to a thread function */ -#else /* __osf__ */ -typedef void *RF_ThreadArg_t; /* the argument to a thread function */ -#endif /* __osf__ */ -typedef pthread_attr_t RF_ThreadAttr_t; /* a thread creation attribute structure */ - -#ifdef __osf__ -#define RF_EXIT_THREAD(_status_) pthread_exit( (pthread_addr_t) (_status_) ) -#else /* __osf__ */ -#define RF_EXIT_THREAD(_status_) pthread_exit( (void *) (_status_) ) -#endif /* __osf__ */ -#define RF_DELAY_THREAD(_secs_, _msecs_) {struct timespec interval; \ - interval.tv_sec = (_secs_); \ - interval.tv_nsec = (_msecs_)*1000000; \ - pthread_delay_np(&interval); \ - } -#define RF_DELAY_THREAD_TS(_ts_) pthread_delay_np(&(_ts_)) - -#ifdef __osf__ -#define RF_THREAD_ATTR_CREATE(_attr_) pthread_attr_create( &(_attr_) ) -#define RF_THREAD_ATTR_DELETE(_attr_) pthread_attr_delete( &(_attr_) ) -#endif /* __osf__ */ -#ifdef AIX -#define RF_THREAD_ATTR_CREATE(_attr_) pthread_attr_init( &(_attr_) ) -#define RF_THREAD_ATTR_DELETE(_attr_) pthread_attr_destroy( &(_attr_) ) -#endif /* AIX */ -#define RF_THREAD_ATTR_SETSTACKSIZE(_attr_,_sz_) pthread_attr_setstacksize(&(_attr_), (long) (_sz_)) -#define RF_THREAD_ATTR_GETSTACKSIZE(_attr_) pthread_attr_getstacksize(_attr_) -#define RF_THREAD_ATTR_SETSCHED(_attr_,_sched_) pthread_attr_setsched(&(_attr_), (_sched_)) -#define RF_CREATE_ATTR_THREAD(_handle_, _attr_, _func_, _arg_) \ - pthread_create(&(_handle_), (_attr_), (pthread_startroutine_t) (_func_), (_arg_)) - - -extern pthread_attr_t raidframe_attr_default; -int rf_thread_create(RF_Thread_t *thread, pthread_attr_t attr, - void (*func)(), RF_ThreadArg_t arg); - -#define RF_CREATE_THREAD(_handle_, _func_, _arg_) \ - rf_thread_create(&(_handle_), raidframe_attr_default, (_func_), (_arg_)) - -#else /* KERNEL */ -#if defined(__NetBSD__) || defined(__OpenBSD__) #include <sys/lock.h> #define decl_simple_lock_data(a,b) a struct simplelock b; #define simple_lock_addr(a) ((struct simplelock *)&(a)) -#else -#include <kern/task.h> -#include <kern/thread.h> -#include <kern/lock.h> -#include <kern/sched_prim.h> -#define decl_simple_lock_data(a,b) a int (b); -#endif /* __NetBSD__ || __OpenBSD__ */ -#if defined(__NetBSD__) || defined(__OpenBSD__) typedef struct proc *RF_Thread_t; -#else -typedef thread_t RF_Thread_t; -#endif typedef void *RF_ThreadArg_t; #define RF_DECLARE_MUTEX(_m_) decl_simple_lock_data(,(_m_)) @@ -299,10 +83,6 @@ typedef void *RF_ThreadArg_t; #define RF_LOCK_MUTEX(_m_) simple_lock(&(_m_)) #define RF_UNLOCK_MUTEX(_m_) simple_unlock(&(_m_)) - -#if defined(__NetBSD__) || defined(__OpenBSD__) -#include <sys/types.h> -#include <sys/kthread.h> /* * In Net- and OpenBSD, kernel threads are simply processes which share several * substructures and never run in userspace. @@ -320,72 +100,14 @@ typedef void *RF_ThreadArg_t; #define RF_CREATE_THREAD(_handle_, _func_, _arg_) \ kthread_create((void (*) __P((void *)))(_func_), (void *)(_arg_), \ (struct proc **)&(_handle_), "raid") -#else /* ! __NetBSD__ && ! __OpenBSD__ */ -/* - * Digital UNIX/Mach threads. - */ -#define RF_WAIT_COND(_c_,_m_) { \ - assert_wait((vm_offset_t)&(_c_), TRUE); \ - RF_UNLOCK_MUTEX(_m_); \ - thread_block(); \ - RF_LOCK_MUTEX(_m_); \ -} -#define RF_SIGNAL_COND(_c_) thread_wakeup_one(((vm_offset_t)&(_c_))) -#define RF_BROADCAST_COND(_c_) thread_wakeup(((vm_offset_t)&(_c_))) -extern task_t first_task; -#define RF_CREATE_THREAD(_handle_, _func_, _arg_) \ - (((_handle_ = kernel_thread_w_arg(first_task, (void (*)())_func_, (void *)(_arg_))) != THREAD_NULL) ? 0 : ENOMEM) -#endif /* __NetBSD__ || __OpenBSD__ */ -#endif /* KERNEL */ -#else /* SIMULATE */ - -#define RF_DECLARE_MUTEX(_m_) int _m_; -#define RF_DECLARE_STATIC_MUTEX(_m_) static int _m_; -#define RF_DECLARE_EXTERN_MUTEX(_m_) extern int _m_; -#define RF_DECLARE_COND(_c_) int _c_; -#define RF_DECLARE_STATIC_COND(_c_) static int _c_; -#define RF_DECLARE_EXTERN_COND(_c_) extern int _c_; - -extern int rf_mutex_init(int *m); -extern int rf_mutex_destroy(int *m); -extern int rf_cond_init(int *c); -extern int rf_cond_destroy(int *c); - -int rf_mutex_init(int *m); -int rf_mutex_destroy(int *m); -int _rf_create_managed_mutex(RF_ShutdownList_t **listp, int *m, char *file, int line); -int _rf_create_managed_cond(RF_ShutdownList_t **listp, int *m, char *file, int line); - -typedef void *RF_ThreadArg_t; /* the argument to a thread function */ - -#define RF_LOCK_MUTEX(_m_) -#define RF_UNLOCK_MUTEX(_m_) - -#define RF_WAIT_COND(_c_,_m_) -#define RF_SIGNAL_COND(_c_) -#define RF_BROADCAST_COND(_c_) - -#define RF_EXIT_THREAD(_status_) -#define RF_DELAY_THREAD(_secs_, _msecs_) - -#define RF_THREAD_ATTR_CREATE(_attr_) ; -#define RF_THREAD_ATTR_DELETE(_attr_) ; -#define RF_THREAD_ATTR_SETSTACKSIZE(_attr_,_sz_) ; -#define RF_THREAD_ATTR_SETSCHED(_attr_,_sched_) ; -#define RF_CREATE_ATTR_THREAD(_handle_, _attr_, _func_, _arg_) ; - -#define RF_CREATE_THREAD(_handle_, _func_, _arg_) 1 - -#endif /* SIMULATE */ struct RF_ThreadGroup_s { - int created; - int running; - int shutdown; - RF_DECLARE_MUTEX(mutex) - RF_DECLARE_COND(cond) + int created; + int running; + int shutdown; + RF_DECLARE_MUTEX(mutex) + RF_DECLARE_COND(cond) }; - /* * Someone has started a thread in the group */ @@ -439,8 +161,8 @@ struct RF_ThreadGroup_s { RF_UNLOCK_MUTEX((_g_)->mutex); \ } #else - /* XXX Note that we've removed the assert. That should get put back - in once we actually get something like a kernel thread running */ + /* XXX Note that we've removed the assert. That should get put back in once + * we actually get something like a kernel thread running */ #define RF_THREADGROUP_WAIT_STOP(_g_) { \ RF_LOCK_MUTEX((_g_)->mutex); \ while((_g_)->shutdown < (_g_)->running) { \ @@ -450,16 +172,16 @@ struct RF_ThreadGroup_s { } #endif -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) -int rf_mutex_init(struct simplelock *); -int rf_mutex_destroy(struct simplelock *); -int _rf_create_managed_mutex(RF_ShutdownList_t **, struct simplelock *, - char *, int); -int _rf_create_managed_cond(RF_ShutdownList_t **listp, int *, - char *file, int line); +int rf_mutex_init(struct simplelock *); +int rf_mutex_destroy(struct simplelock *); +int +_rf_create_managed_mutex(RF_ShutdownList_t **, struct simplelock *, + char *, int); +int +_rf_create_managed_cond(RF_ShutdownList_t ** listp, int *, + char *file, int line); -int rf_cond_init(int *c); /* XXX need to write?? */ -int rf_cond_destroy(int *c); /* XXX need to write?? */ -#endif -#endif /* !_RF__RF_THREADSTUFF_H_ */ +int rf_cond_init(int *c); /* XXX need to write?? */ +int rf_cond_destroy(int *c);/* XXX need to write?? */ +#endif /* !_RF__RF_THREADSTUFF_H_ */ diff --git a/sys/dev/raidframe/rf_types.h b/sys/dev/raidframe/rf_types.h index 6df3e9e5d78..900cea96020 100644 --- a/sys/dev/raidframe/rf_types.h +++ b/sys/dev/raidframe/rf_types.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_types.h,v 1.1 1999/01/11 14:29:54 niklas Exp $ */ -/* $NetBSD: rf_types.h,v 1.2 1998/11/16 04:14:10 mycroft Exp $ */ +/* $OpenBSD: rf_types.h,v 1.2 1999/02/16 00:03:32 niklas Exp $ */ +/* $NetBSD: rf_types.h,v 1.4 1999/02/05 00:06:18 oster Exp $ */ /* * rf_types.h */ @@ -34,254 +34,25 @@ * rf_types.h -- standard types for RAIDframe * ***********************************************************/ -/* - * : - * Log: rf_types.h,v - * Revision 1.35 1996/08/09 18:48:29 jimz - * correct mips definition - * - * Revision 1.34 1996/08/07 22:50:14 jimz - * monkey with linux includes to get a good compile - * - * Revision 1.33 1996/08/07 21:09:28 jimz - * add SGI mips stuff (note: 64-bit stuff may be wrong, I didn't have - * a machine to test on) - * - * Revision 1.32 1996/08/06 22:24:27 jimz - * add LINUX_I386 - * - * Revision 1.31 1996/07/31 16:30:12 jimz - * move in RF_LONGSHIFT - * - * Revision 1.30 1996/07/30 04:51:58 jimz - * ultrix port - * - * Revision 1.29 1996/07/29 16:37:34 jimz - * define DEC_OSF for osf/1 kernel - * - * Revision 1.28 1996/07/28 20:31:39 jimz - * i386netbsd port - * true/false fixup - * - * Revision 1.27 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.26 1996/07/27 18:40:24 jimz - * cleanup sweep - * - * Revision 1.25 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.24 1996/07/18 22:57:14 jimz - * port simulator to AIX - * - * Revision 1.23 1996/07/15 17:22:18 jimz - * nit-pick code cleanup - * resolve stdlib problems on DEC OSF - * - * Revision 1.22 1996/06/11 18:11:57 jimz - * add ThreadGroup - * - * Revision 1.21 1996/06/11 10:58:47 jimz - * add RF_ReconDoneProc_t - * - * Revision 1.20 1996/06/10 14:18:58 jimz - * move user, throughput stats into per-array structure - * - * Revision 1.19 1996/06/10 11:55:47 jimz - * Straightened out some per-array/not-per-array distinctions, fixed - * a couple bugs related to confusion. Added shutdown lists. Removed - * layout shutdown function (now subsumed by shutdown lists). - * - * Revision 1.18 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.17 1996/06/05 19:38:32 jimz - * fixed up disk queueing types config - * added sstf disk queueing - * fixed exit bug on diskthreads (ref-ing bad mem) - * - * Revision 1.16 1996/06/05 18:06:02 jimz - * Major code cleanup. The Great Renaming is now done. - * Better modularity. Better typing. Fixed a bunch of - * synchronization bugs. Made a lot of global stuff - * per-desc or per-array. Removed dead code. - * - * Revision 1.15 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.14 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.13 1996/05/31 22:26:54 jimz - * fix a lot of mapping problems, memory allocation problems - * found some weird lock issues, fixed 'em - * more code cleanup - * - * Revision 1.12 1996/05/30 23:22:16 jimz - * bugfixes of serialization, timing problems - * more cleanup - * - * Revision 1.11 1996/05/30 11:29:41 jimz - * Numerous bug fixes. Stripe lock release code disagreed with the taking code - * about when stripes should be locked (I made it consistent: no parity, no lock) - * There was a lot of extra serialization of I/Os which I've removed- a lot of - * it was to calculate values for the cache code, which is no longer with us. - * More types, function, macro cleanup. Added code to properly quiesce the array - * on shutdown. Made a lot of stuff array-specific which was (bogusly) general - * before. Fixed memory allocation, freeing bugs. - * - * Revision 1.10 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.9 1996/05/24 22:17:04 jimz - * continue code + namespace cleanup - * typed a bunch of flags - * - * Revision 1.8 1996/05/24 04:28:55 jimz - * release cleanup ckpt - * - * Revision 1.7 1996/05/24 01:59:45 jimz - * another checkpoint in code cleanup for release - * time to sync kernel tree - * - * Revision 1.6 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.5 1996/05/23 00:33:23 jimz - * code cleanup: move all debug decls to rf_options.c, all extern - * debug decls to rf_options.h, all debug vars preceded by rf_ - * - * Revision 1.4 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.3 1996/05/10 16:22:46 jimz - * RF_offset -> RF_Offset - * add RF_SectorCount - * - * Revision 1.2 1996/05/02 14:58:50 jimz - * switch to _t for non-base-integral types - * - * Revision 1.1 1995/12/14 18:36:51 jimz - * Initial revision - * - */ #ifndef _RF__RF_TYPES_H_ #define _RF__RF_TYPES_H_ -#ifdef _KERNEL -#define KERNEL -#endif - #include "rf_archs.h" -#ifndef KERNEL -#ifdef LINUX -#include <stdlib.h> -#include <sys/types.h> -#endif /* LINUX */ -#include <fcntl.h> -#include <stdio.h> - -#ifdef __osf__ -/* - * The following monkeying is to get around some problems with - * conflicting definitions in /usr/include/random.h and /usr/include/stdlib.h - * on Digital Unix. They - * (1) define the same symbols - * (2) differently than one another - * (3) also differently from the DU libc sources - * This loses, bad. - */ -#include <standards.h> -#include <cma.h> -#ifdef _OSF_SOURCE -#undef _OSF_SOURCE -#define _RF_SPANKME -#endif /* _OSF_SOURCE */ -#endif /* __osf__ */ -#include <stdlib.h> -#ifdef __osf__ -#ifdef _RF_SPANKME -#undef _RF_SPANKME -#define _OSF_SOURCE -#endif /* _RF_SPANKME */ -#endif /* __osf__ */ - -#include <string.h> -#include <unistd.h> -#endif /* !KERNEL */ #include <sys/errno.h> #include <sys/types.h> -#ifdef AIX -#include <sys/stream.h> -#endif /* AIX */ - -#if defined(hpux) || defined(__hpux) -/* - * Yeah, we get one of hpux or __hpux, but not both. This is because - * HP didn't really want to provide an ANSI C compiler. Apparantly, they - * don't like standards. This explains a lot about their API. You might - * try using gcc, but you'll discover that it's sufficiently buggy that - * it can't even compile the core library. - * - * Hatred update: c89, the one thing which could both handle prototypes, - * and compile /usr/include/sys/timeout.h, can't do 64-bit ints. - * - * Note: the hpux port is incomplete. Why? Well, because I can't find - * a working C compiler. I've tried cc (both with and without -Ae), - * c89, and gcc, all with and without -D_HPUX_SOURCE. Sod it. - * - * -Jim Zelenka, 22 July 1996 - */ -#ifndef hpux -#define hpux -#endif /* !hpux */ -#include <sys/hpibio.h> -#endif /* hpux || __hpux*/ - -#ifdef sun -#ifndef KERNEL -#include <errno.h> -#endif /* !KERNEL */ -#endif /* sun */ - -#if defined(OSF) && defined(__alpha) && defined(KERNEL) -#ifndef DEC_OSF -#define DEC_OSF -#endif /* !DEC_OSF */ -#endif /* OSF && __alpha && KERNEL */ - -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(KERNEL) #include <sys/uio.h> #include <sys/param.h> #include <sys/lock.h> /* XXX not sure about these... */ -/* #define PZERO 0 */ /* actually defined in <sys/param.h> */ +/* #define PZERO 0 *//* actually defined in <sys/param.h> */ #define MS_LOCK_SIMPLE 1 -#define TRUE 1 /* XXX why isn't this done somewhere already!! */ - -#endif /* (__NetBSD__ || __OpenBSD__) && KERNEL */ +#define TRUE 1 /* XXX why isn't this done somewhere already!! */ /* * First, define system-dependent types and constants. @@ -296,8 +67,6 @@ * */ -#if defined(__NetBSD__) || defined(__OpenBSD__) - #include <sys/types.h> #include <machine/endian.h> #include <machine/limits.h> @@ -309,14 +78,14 @@ #else #error byte order not defined #endif -typedef int8_t RF_int8; -typedef u_int8_t RF_uint8; -typedef int16_t RF_int16; -typedef u_int16_t RF_uint16; -typedef int32_t RF_int32; -typedef u_int32_t RF_uint32; -typedef int64_t RF_int64; -typedef u_int64_t RF_uint64; +typedef int8_t RF_int8; +typedef u_int8_t RF_uint8; +typedef int16_t RF_int16; +typedef u_int16_t RF_uint16; +typedef int32_t RF_int32; +typedef u_int32_t RF_uint32; +typedef int64_t RF_int64; +typedef u_int64_t RF_uint64; #if LONG_BIT == 32 #define RF_LONGSHIFT 2 #elif LONG_BIT == 64 @@ -325,115 +94,6 @@ typedef u_int64_t RF_uint64; #error word size not defined #endif -#else /* __NetBSD__ || __OpenBSD__ */ - -#ifdef __alpha -#define RF_IS_BIG_ENDIAN 0 -typedef signed char RF_int8; -typedef unsigned char RF_uint8; -typedef short RF_int16; -typedef unsigned short RF_uint16; -typedef int RF_int32; -typedef unsigned int RF_uint32; -typedef long RF_int64; -typedef unsigned long RF_uint64; -#define RF_LONGSHIFT 3 -#endif /* __alpha */ - -#ifdef _IBMR2 -#define RF_IS_BIG_ENDIAN 1 -typedef signed char RF_int8; -typedef unsigned char RF_uint8; -typedef short RF_int16; -typedef unsigned short RF_uint16; -typedef int RF_int32; -typedef unsigned int RF_uint32; -typedef long long RF_int64; -typedef unsigned long long RF_uint64; -#define RF_LONGSHIFT 2 -#endif /* _IBMR2 */ - -#ifdef hpux -#define RF_IS_BIG_ENDIAN 1 -typedef signed char RF_int8; -typedef unsigned char RF_uint8; -typedef short RF_int16; -typedef unsigned short RF_uint16; -typedef int RF_int32; -typedef unsigned int RF_uint32; -typedef long long RF_int64; -typedef unsigned long long RF_uint64; -#define RF_LONGSHIFT 2 -#endif /* hpux */ - -#ifdef sun -#define RF_IS_BIG_ENDIAN 1 -typedef char RF_int8; -typedef unsigned char RF_uint8; -typedef short RF_int16; -typedef unsigned short RF_uint16; -typedef int RF_int32; -typedef unsigned int RF_uint32; -typedef long long RF_int64; -typedef unsigned long long RF_uint64; -#define RF_LONGSHIFT 2 -#endif /* sun */ - -#if defined(NETBSD_I386) || defined(NETBSD_I386) || defined(LINUX_I386) -#define RF_IS_BIG_ENDIAN 0 -typedef char RF_int8; -typedef unsigned char RF_uint8; -typedef short RF_int16; -typedef unsigned short RF_uint16; -typedef int RF_int32; -typedef unsigned int RF_uint32; -typedef long long RF_int64; -typedef unsigned long long RF_uint64; -#define RF_LONGSHIFT 2 -#endif /* NETBSD_I386 || OPENBSD_I386 || LINUX_I386 */ - -#if defined(mips) && !defined(SGI) && !defined(__NetBSD__) && !defined(__OpenBSD__) -#define RF_IS_BIG_ENDIAN 0 -typedef char RF_int8; -typedef unsigned char RF_uint8; -typedef short RF_int16; -typedef unsigned short RF_uint16; -typedef int RF_int32; -typedef unsigned int RF_uint32; -typedef long long RF_int64; -typedef unsigned long long RF_uint64; -#define RF_LONGSHIFT 2 -#endif /* mips && !SGI */ - -#ifdef SGI -#if _MIPS_SZLONG == 64 -#define RF_IS_BIG_ENDIAN 1 -typedef signed char RF_int8; -typedef unsigned char RF_uint8; -typedef short RF_int16; -typedef unsigned short RF_uint16; -typedef int RF_int32; -typedef unsigned int RF_uint32; -typedef long RF_int64; -typedef unsigned long RF_uint64; -#define RF_LONGSHIFT 3 -#endif /* _MIPS_SZLONG == 64 */ -#if _MIPS_SZLONG == 32 -#define RF_IS_BIG_ENDIAN 1 -typedef char RF_int8; -typedef unsigned char RF_uint8; -typedef short RF_int16; -typedef unsigned short RF_uint16; -typedef int RF_int32; -typedef unsigned int RF_uint32; -typedef long long RF_int64; -typedef unsigned long long RF_uint64; -#define RF_LONGSHIFT 2 -#endif /* _MIPS_SZLONG == 32 */ -#endif /* SGI */ - -#endif /* __NetBSD__ || __OpenBSD__ */ - /* * These are just zero and non-zero. We don't use "TRUE" * and "FALSE" because there's too much nonsense trying @@ -447,18 +107,19 @@ typedef unsigned long long RF_uint64; /* * Now, some generic types */ -typedef RF_uint64 RF_IoCount_t; -typedef RF_uint64 RF_Offset_t; -typedef RF_uint32 RF_PSSFlags_t; -typedef RF_uint64 RF_SectorCount_t; -typedef RF_uint64 RF_StripeCount_t; -typedef RF_int64 RF_SectorNum_t; /* these are unsigned so we can set them to (-1) for "uninitialized" */ -typedef RF_int64 RF_StripeNum_t; -typedef RF_int64 RF_RaidAddr_t; -typedef int RF_RowCol_t; /* unsigned so it can be (-1) */ -typedef RF_int64 RF_HeadSepLimit_t; -typedef RF_int64 RF_ReconUnitCount_t; -typedef int RF_ReconUnitNum_t; +typedef RF_uint64 RF_IoCount_t; +typedef RF_uint64 RF_Offset_t; +typedef RF_uint32 RF_PSSFlags_t; +typedef RF_uint64 RF_SectorCount_t; +typedef RF_uint64 RF_StripeCount_t; +typedef RF_int64 RF_SectorNum_t;/* these are unsigned so we can set them to + * (-1) for "uninitialized" */ +typedef RF_int64 RF_StripeNum_t; +typedef RF_int64 RF_RaidAddr_t; +typedef int RF_RowCol_t; /* unsigned so it can be (-1) */ +typedef RF_int64 RF_HeadSepLimit_t; +typedef RF_int64 RF_ReconUnitCount_t; +typedef int RF_ReconUnitNum_t; typedef char RF_ParityConfig_t; @@ -467,9 +128,11 @@ typedef char RF_DiskQueueType_t[1024]; /* values for the 'type' field in a reconstruction buffer */ typedef int RF_RbufType_t; -#define RF_RBUF_TYPE_EXCLUSIVE 0 /* this buf assigned exclusively to one disk */ -#define RF_RBUF_TYPE_FLOATING 1 /* this is a floating recon buf */ -#define RF_RBUF_TYPE_FORCED 2 /* this rbuf was allocated to complete a forced recon */ +#define RF_RBUF_TYPE_EXCLUSIVE 0 /* this buf assigned exclusively to + * one disk */ +#define RF_RBUF_TYPE_FLOATING 1 /* this is a floating recon buf */ +#define RF_RBUF_TYPE_FORCED 2 /* this rbuf was allocated to complete + * a forced recon */ typedef char RF_IoType_t; #define RF_IO_TYPE_READ 'r' @@ -481,9 +144,9 @@ typedef char RF_IoType_t; #ifdef SIMULATE typedef double RF_TICS_t; typedef int RF_Owner_t; -#endif /* SIMULATE */ +#endif /* SIMULATE */ -typedef void (*RF_VoidFuncPtr)(void *,...); +typedef void (*RF_VoidFuncPtr) (void *,...); typedef RF_uint32 RF_AccessStripeMapFlags_t; typedef RF_uint32 RF_DiskQueueDataFlags_t; @@ -492,62 +155,62 @@ typedef RF_uint32 RF_RaidAccessFlags_t; #define RF_DISKQUEUE_DATA_FLAGS_NONE ((RF_DiskQueueDataFlags_t)0) -typedef struct RF_AccessStripeMap_s RF_AccessStripeMap_t; -typedef struct RF_AccessStripeMapHeader_s RF_AccessStripeMapHeader_t; -typedef struct RF_AllocListElem_s RF_AllocListElem_t; -typedef struct RF_CallbackDesc_s RF_CallbackDesc_t; -typedef struct RF_ChunkDesc_s RF_ChunkDesc_t; -typedef struct RF_CommonLogData_s RF_CommonLogData_t; -typedef struct RF_Config_s RF_Config_t; -typedef struct RF_CumulativeStats_s RF_CumulativeStats_t; -typedef struct RF_DagHeader_s RF_DagHeader_t; -typedef struct RF_DagList_s RF_DagList_t; -typedef struct RF_DagNode_s RF_DagNode_t; -typedef struct RF_DeclusteredConfigInfo_s RF_DeclusteredConfigInfo_t; -typedef struct RF_DiskId_s RF_DiskId_t; -typedef struct RF_DiskMap_s RF_DiskMap_t; -typedef struct RF_DiskQueue_s RF_DiskQueue_t; -typedef struct RF_DiskQueueData_s RF_DiskQueueData_t; -typedef struct RF_DiskQueueSW_s RF_DiskQueueSW_t; -typedef struct RF_Etimer_s RF_Etimer_t; -typedef struct RF_EventCreate_s RF_EventCreate_t; -typedef struct RF_FreeList_s RF_FreeList_t; -typedef struct RF_LockReqDesc_s RF_LockReqDesc_t; -typedef struct RF_LockTableEntry_s RF_LockTableEntry_t; -typedef struct RF_MCPair_s RF_MCPair_t; -typedef struct RF_OwnerInfo_s RF_OwnerInfo_t; -typedef struct RF_ParityLog_s RF_ParityLog_t; -typedef struct RF_ParityLogAppendQueue_s RF_ParityLogAppendQueue_t; -typedef struct RF_ParityLogData_s RF_ParityLogData_t; -typedef struct RF_ParityLogDiskQueue_s RF_ParityLogDiskQueue_t; -typedef struct RF_ParityLogQueue_s RF_ParityLogQueue_t; -typedef struct RF_ParityLogRecord_s RF_ParityLogRecord_t; -typedef struct RF_PerDiskReconCtrl_s RF_PerDiskReconCtrl_t; -typedef struct RF_PSStatusHeader_s RF_PSStatusHeader_t; -typedef struct RF_PhysDiskAddr_s RF_PhysDiskAddr_t; -typedef struct RF_PropHeader_s RF_PropHeader_t; -typedef struct RF_Raid_s RF_Raid_t; -typedef struct RF_RaidAccessDesc_s RF_RaidAccessDesc_t; -typedef struct RF_RaidDisk_s RF_RaidDisk_t; -typedef struct RF_RaidLayout_s RF_RaidLayout_t; -typedef struct RF_RaidReconDesc_s RF_RaidReconDesc_t; -typedef struct RF_ReconBuffer_s RF_ReconBuffer_t; -typedef struct RF_ReconConfig_s RF_ReconConfig_t; -typedef struct RF_ReconCtrl_s RF_ReconCtrl_t; -typedef struct RF_ReconDoneProc_s RF_ReconDoneProc_t; -typedef struct RF_ReconEvent_s RF_ReconEvent_t; -typedef struct RF_ReconMap_s RF_ReconMap_t; -typedef struct RF_ReconMapListElem_s RF_ReconMapListElem_t; -typedef struct RF_ReconParityStripeStatus_s RF_ReconParityStripeStatus_t; -typedef struct RF_RedFuncs_s RF_RedFuncs_t; -typedef struct RF_RegionBufferQueue_s RF_RegionBufferQueue_t; -typedef struct RF_RegionInfo_s RF_RegionInfo_t; -typedef struct RF_ShutdownList_s RF_ShutdownList_t; -typedef struct RF_SpareTableEntry_s RF_SpareTableEntry_t; -typedef struct RF_SparetWait_s RF_SparetWait_t; -typedef struct RF_StripeLockDesc_s RF_StripeLockDesc_t; -typedef struct RF_ThreadGroup_s RF_ThreadGroup_t; -typedef struct RF_ThroughputStats_s RF_ThroughputStats_t; +typedef struct RF_AccessStripeMap_s RF_AccessStripeMap_t; +typedef struct RF_AccessStripeMapHeader_s RF_AccessStripeMapHeader_t; +typedef struct RF_AllocListElem_s RF_AllocListElem_t; +typedef struct RF_CallbackDesc_s RF_CallbackDesc_t; +typedef struct RF_ChunkDesc_s RF_ChunkDesc_t; +typedef struct RF_CommonLogData_s RF_CommonLogData_t; +typedef struct RF_Config_s RF_Config_t; +typedef struct RF_CumulativeStats_s RF_CumulativeStats_t; +typedef struct RF_DagHeader_s RF_DagHeader_t; +typedef struct RF_DagList_s RF_DagList_t; +typedef struct RF_DagNode_s RF_DagNode_t; +typedef struct RF_DeclusteredConfigInfo_s RF_DeclusteredConfigInfo_t; +typedef struct RF_DiskId_s RF_DiskId_t; +typedef struct RF_DiskMap_s RF_DiskMap_t; +typedef struct RF_DiskQueue_s RF_DiskQueue_t; +typedef struct RF_DiskQueueData_s RF_DiskQueueData_t; +typedef struct RF_DiskQueueSW_s RF_DiskQueueSW_t; +typedef struct RF_Etimer_s RF_Etimer_t; +typedef struct RF_EventCreate_s RF_EventCreate_t; +typedef struct RF_FreeList_s RF_FreeList_t; +typedef struct RF_LockReqDesc_s RF_LockReqDesc_t; +typedef struct RF_LockTableEntry_s RF_LockTableEntry_t; +typedef struct RF_MCPair_s RF_MCPair_t; +typedef struct RF_OwnerInfo_s RF_OwnerInfo_t; +typedef struct RF_ParityLog_s RF_ParityLog_t; +typedef struct RF_ParityLogAppendQueue_s RF_ParityLogAppendQueue_t; +typedef struct RF_ParityLogData_s RF_ParityLogData_t; +typedef struct RF_ParityLogDiskQueue_s RF_ParityLogDiskQueue_t; +typedef struct RF_ParityLogQueue_s RF_ParityLogQueue_t; +typedef struct RF_ParityLogRecord_s RF_ParityLogRecord_t; +typedef struct RF_PerDiskReconCtrl_s RF_PerDiskReconCtrl_t; +typedef struct RF_PSStatusHeader_s RF_PSStatusHeader_t; +typedef struct RF_PhysDiskAddr_s RF_PhysDiskAddr_t; +typedef struct RF_PropHeader_s RF_PropHeader_t; +typedef struct RF_Raid_s RF_Raid_t; +typedef struct RF_RaidAccessDesc_s RF_RaidAccessDesc_t; +typedef struct RF_RaidDisk_s RF_RaidDisk_t; +typedef struct RF_RaidLayout_s RF_RaidLayout_t; +typedef struct RF_RaidReconDesc_s RF_RaidReconDesc_t; +typedef struct RF_ReconBuffer_s RF_ReconBuffer_t; +typedef struct RF_ReconConfig_s RF_ReconConfig_t; +typedef struct RF_ReconCtrl_s RF_ReconCtrl_t; +typedef struct RF_ReconDoneProc_s RF_ReconDoneProc_t; +typedef struct RF_ReconEvent_s RF_ReconEvent_t; +typedef struct RF_ReconMap_s RF_ReconMap_t; +typedef struct RF_ReconMapListElem_s RF_ReconMapListElem_t; +typedef struct RF_ReconParityStripeStatus_s RF_ReconParityStripeStatus_t; +typedef struct RF_RedFuncs_s RF_RedFuncs_t; +typedef struct RF_RegionBufferQueue_s RF_RegionBufferQueue_t; +typedef struct RF_RegionInfo_s RF_RegionInfo_t; +typedef struct RF_ShutdownList_s RF_ShutdownList_t; +typedef struct RF_SpareTableEntry_s RF_SpareTableEntry_t; +typedef struct RF_SparetWait_s RF_SparetWait_t; +typedef struct RF_StripeLockDesc_s RF_StripeLockDesc_t; +typedef struct RF_ThreadGroup_s RF_ThreadGroup_t; +typedef struct RF_ThroughputStats_s RF_ThroughputStats_t; /* * Important assumptions regarding ordering of the states in this list @@ -555,29 +218,30 @@ typedef struct RF_ThroughputStats_s RF_ThroughputStats_t; * Before disturbing this ordering, look at code in rf_states.c */ typedef enum RF_AccessState_e { - /* original states */ - rf_QuiesceState, /* handles queisence for reconstruction */ - rf_IncrAccessesCountState, /* count accesses in flight */ - rf_DecrAccessesCountState, - rf_MapState, /* map access to disk addresses */ - rf_LockState, /* take stripe locks */ - rf_CreateDAGState, /* create DAGs */ - rf_ExecuteDAGState, /* execute DAGs */ - rf_ProcessDAGState, /* DAGs are completing- check if correct, or if we need to retry */ - rf_CleanupState, /* release stripe locks, clean up */ - rf_LastState /* must be the last state */ -} RF_AccessState_t; - -#define RF_MAXROW 10 /* these are arbitrary and can be modified at will */ + /* original states */ + rf_QuiesceState, /* handles queisence for reconstruction */ + rf_IncrAccessesCountState, /* count accesses in flight */ + rf_DecrAccessesCountState, + rf_MapState, /* map access to disk addresses */ + rf_LockState, /* take stripe locks */ + rf_CreateDAGState, /* create DAGs */ + rf_ExecuteDAGState, /* execute DAGs */ + rf_ProcessDAGState, /* DAGs are completing- check if correct, or + * if we need to retry */ + rf_CleanupState, /* release stripe locks, clean up */ + rf_LastState /* must be the last state */ +} RF_AccessState_t; +#define RF_MAXROW 10 /* these are arbitrary and can be modified at + * will */ #define RF_MAXCOL 40 #define RF_MAXSPARE 10 -#define RF_MAXDBGV 75 /* max number of debug variables */ +#define RF_MAXDBGV 75 /* max number of debug variables */ union RF_GenericParam_u { - void *p; - RF_uint64 v; + void *p; + RF_uint64 v; }; typedef union RF_GenericParam_u RF_DagParam_t; typedef union RF_GenericParam_u RF_CBParam_t; -#endif /* _RF__RF_TYPES_H_ */ +#endif /* _RF__RF_TYPES_H_ */ diff --git a/sys/dev/raidframe/rf_utils.c b/sys/dev/raidframe/rf_utils.c index be379ed8e58..7ebe92e1e4d 100644 --- a/sys/dev/raidframe/rf_utils.c +++ b/sys/dev/raidframe/rf_utils.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_utils.c,v 1.1 1999/01/11 14:29:54 niklas Exp $ */ -/* $NetBSD: rf_utils.c,v 1.1 1998/11/13 04:20:35 oster Exp $ */ +/* $OpenBSD: rf_utils.c,v 1.2 1999/02/16 00:03:33 niklas Exp $ */ +/* $NetBSD: rf_utils.c,v 1.3 1999/02/05 00:06:18 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -33,70 +33,9 @@ * ****************************************/ -/* : - * Log: rf_utils.c,v - * Revision 1.20 1996/07/27 23:36:08 jimz - * Solaris port of simulator - * - * Revision 1.19 1996/07/22 19:52:16 jimz - * switched node params to RF_DagParam_t, a union of - * a 64-bit int and a void *, for better portability - * attempted hpux port, but failed partway through for - * lack of a single C compiler capable of compiling all - * source files - * - * Revision 1.18 1996/07/15 17:22:18 jimz - * nit-pick code cleanup - * resolve stdlib problems on DEC OSF - * - * Revision 1.17 1996/06/09 02:36:46 jimz - * lots of little crufty cleanup- fixup whitespace - * issues, comment #ifdefs, improve typing in some - * places (esp size-related) - * - * Revision 1.16 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.15 1996/06/03 23:28:26 jimz - * more bugfixes - * check in tree to sync for IPDS runs with current bugfixes - * there still may be a problem with threads in the script test - * getting I/Os stuck- not trivially reproducible (runs ~50 times - * in a row without getting stuck) - * - * Revision 1.14 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.13 1996/05/27 18:56:37 jimz - * more code cleanup - * better typing - * compiles in all 3 environments - * - * Revision 1.12 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.11 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.10 1995/12/06 15:17:44 root - * added copyright info - * - */ #include "rf_threadstuff.h" -#ifdef _KERNEL -#define KERNEL -#endif - -#ifndef KERNEL -#include <stdio.h> -#endif /* !KERNEL */ #include <sys/time.h> #include "rf_threadid.h" @@ -106,102 +45,74 @@ #include "rf_general.h" #include "rf_sys.h" -#ifndef KERNEL -#include "rf_randmacros.h" -#endif /* !KERNEL */ - /* creates & zeros 2-d array with b rows and k columns (MCH) */ -RF_RowCol_t **rf_make_2d_array(b, k, allocList) - int b; - int k; - RF_AllocListElem_t *allocList; +RF_RowCol_t ** +rf_make_2d_array(b, k, allocList) + int b; + int k; + RF_AllocListElem_t *allocList; { - RF_RowCol_t **retval, i; - - RF_MallocAndAdd(retval, b * sizeof(RF_RowCol_t *), (RF_RowCol_t **), allocList); - for (i=0; i<b; i++) { - RF_MallocAndAdd(retval[i], k * sizeof(RF_RowCol_t), (RF_RowCol_t *), allocList); - (void) bzero((char *) retval[i], k*sizeof(RF_RowCol_t)); - } - return(retval); + RF_RowCol_t **retval, i; + + RF_MallocAndAdd(retval, b * sizeof(RF_RowCol_t *), (RF_RowCol_t **), allocList); + for (i = 0; i < b; i++) { + RF_MallocAndAdd(retval[i], k * sizeof(RF_RowCol_t), (RF_RowCol_t *), allocList); + (void) bzero((char *) retval[i], k * sizeof(RF_RowCol_t)); + } + return (retval); } -void rf_free_2d_array(a, b, k) - RF_RowCol_t **a; - int b; - int k; +void +rf_free_2d_array(a, b, k) + RF_RowCol_t **a; + int b; + int k; { - RF_RowCol_t i; + RF_RowCol_t i; - for (i=0; i<b; i++) - RF_Free(a[i], k*sizeof(RF_RowCol_t)); - RF_Free(a, b*sizeof(RF_RowCol_t)); + for (i = 0; i < b; i++) + RF_Free(a[i], k * sizeof(RF_RowCol_t)); + RF_Free(a, b * sizeof(RF_RowCol_t)); } /* creates & zeros a 1-d array with c columns */ -RF_RowCol_t *rf_make_1d_array(c, allocList) - int c; - RF_AllocListElem_t *allocList; +RF_RowCol_t * +rf_make_1d_array(c, allocList) + int c; + RF_AllocListElem_t *allocList; { - RF_RowCol_t *retval; + RF_RowCol_t *retval; - RF_MallocAndAdd(retval, c * sizeof(RF_RowCol_t), (RF_RowCol_t *), allocList); - (void) bzero((char *) retval, c*sizeof(RF_RowCol_t)); - return(retval); + RF_MallocAndAdd(retval, c * sizeof(RF_RowCol_t), (RF_RowCol_t *), allocList); + (void) bzero((char *) retval, c * sizeof(RF_RowCol_t)); + return (retval); } -void rf_free_1d_array(a, n) - RF_RowCol_t *a; - int n; +void +rf_free_1d_array(a, n) + RF_RowCol_t *a; + int n; { - RF_Free(a, n * sizeof(RF_RowCol_t)); + RF_Free(a, n * sizeof(RF_RowCol_t)); } - /* Euclid's algorithm: finds and returns the greatest common divisor * between a and b. (MCH) */ -int rf_gcd(m, n) - int m; - int n; +int +rf_gcd(m, n) + int m; + int n; { - int t; - - while (m>0) { - t = n % m; - n = m; - m = t; - } - return(n); + int t; + + while (m > 0) { + t = n % m; + n = m; + m = t; + } + return (n); } - -#if !defined(KERNEL) && !defined(SIMULATE) && defined(__osf__) -/* this is used to generate a random number when _FASTRANDOM is off - * in randmacros.h - */ -long rf_do_random(rval, rdata) - long *rval; - struct random_data *rdata; -{ - int a, b; - long c; - /* - * random_r() generates random 32-bit values. OR them together. - */ - if (random_r(&a, rdata)!=0) { - fprintf(stderr,"Yikes! call to random_r failed\n"); - exit(1); - } - if (random_r(&b, rdata)!=0) { - fprintf(stderr,"Yikes! call to random_r failed\n"); - exit(1); - } - c = ((long)a)<<32; - *rval = c|b; - return(*rval); -} -#endif /* !KERNEL && !SIMULATE && __osf__ */ - /* these convert between text and integer. Apparently the regular C macros * for doing this are not available in the kernel */ @@ -212,20 +123,27 @@ long rf_do_random(rval, rdata) #define HC2INT(x) ( ((x) >= 'a' && (x) <= 'f') ? (x) - 'a' + 10 : \ ( ((x) >= 'A' && (x) <= 'F') ? (x) - 'A' + 10 : (x - '0') ) ) -int rf_atoi(p) - char *p; +int +rf_atoi(p) + char *p; { - int val = 0, negate = 0; - - if (*p == '-') {negate=1; p++;} - for ( ; ISDIGIT(*p); p++) val = 10 * val + (*p - '0'); - return((negate) ? -val : val); + int val = 0, negate = 0; + + if (*p == '-') { + negate = 1; + p++; + } + for (; ISDIGIT(*p); p++) + val = 10 * val + (*p - '0'); + return ((negate) ? -val : val); } -int rf_htoi(p) - char *p; +int +rf_htoi(p) + char *p; { - int val = 0; - for ( ; ISHEXCHAR(*p); p++) val = 16 * val + HC2INT(*p); - return(val); + int val = 0; + for (; ISHEXCHAR(*p); p++) + val = 16 * val + HC2INT(*p); + return (val); } diff --git a/sys/dev/raidframe/rf_utils.h b/sys/dev/raidframe/rf_utils.h index 73eede8f131..fb53ecd9719 100644 --- a/sys/dev/raidframe/rf_utils.h +++ b/sys/dev/raidframe/rf_utils.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_utils.h,v 1.1 1999/01/11 14:29:55 niklas Exp $ */ -/* $NetBSD: rf_utils.h,v 1.1 1998/11/13 04:20:35 oster Exp $ */ +/* $OpenBSD: rf_utils.h,v 1.2 1999/02/16 00:03:33 niklas Exp $ */ +/* $NetBSD: rf_utils.h,v 1.3 1999/02/05 00:06:18 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -33,29 +33,6 @@ * ***************************************/ -/* : - * Log: rf_utils.h,v - * Revision 1.7 1996/06/07 21:33:04 jimz - * begin using consistent types for sector numbers, - * stripe numbers, row+col numbers, recon unit numbers - * - * Revision 1.6 1996/06/02 17:31:48 jimz - * Moved a lot of global stuff into array structure, where it belongs. - * Fixed up paritylogging, pss modules in this manner. Some general - * code cleanup. Removed lots of dead code, some dead files. - * - * Revision 1.5 1996/05/23 21:46:35 jimz - * checkpoint in code cleanup (release prep) - * lots of types, function names have been fixed - * - * Revision 1.4 1996/05/18 19:51:34 jimz - * major code cleanup- fix syntax, make some types consistent, - * add prototypes, clean out dead code, et cetera - * - * Revision 1.3 1995/12/06 15:17:53 root - * added copyright info - * - */ #ifndef _RF__RF_UTILS_H_ #define _RF__RF_UTILS_H_ @@ -64,15 +41,15 @@ #include "rf_alloclist.h" #include "rf_threadstuff.h" -char *rf_find_non_white(char *p); -char *rf_find_white(char *p); -RF_RowCol_t **rf_make_2d_array(int b, int k, RF_AllocListElem_t *allocList); -RF_RowCol_t *rf_make_1d_array(int c, RF_AllocListElem_t *allocList); -void rf_free_2d_array(RF_RowCol_t **a, int b, int k); -void rf_free_1d_array(RF_RowCol_t *a, int n); -int rf_gcd(int m, int n); -int rf_atoi(char *p); -int rf_htoi(char *p); +char *rf_find_non_white(char *p); +char *rf_find_white(char *p); +RF_RowCol_t **rf_make_2d_array(int b, int k, RF_AllocListElem_t * allocList); +RF_RowCol_t *rf_make_1d_array(int c, RF_AllocListElem_t * allocList); +void rf_free_2d_array(RF_RowCol_t ** a, int b, int k); +void rf_free_1d_array(RF_RowCol_t * a, int n); +int rf_gcd(int m, int n); +int rf_atoi(char *p); +int rf_htoi(char *p); #define RF_USEC_PER_SEC 1000000 #define RF_TIMEVAL_DIFF(_start_,_end_,_diff_) { \ @@ -87,4 +64,4 @@ int rf_htoi(char *p); } \ } -#endif /* !_RF__RF_UTILS_H_ */ +#endif /* !_RF__RF_UTILS_H_ */ |