diff options
137 files changed, 19383 insertions, 15494 deletions
diff --git a/sys/dev/raidframe/TODO b/sys/dev/raidframe/TODO new file mode 100644 index 00000000000..234a9aca17d --- /dev/null +++ b/sys/dev/raidframe/TODO @@ -0,0 +1,11 @@ +rf_debugprint.c -- redesign to use real circular queue +Dprintf -- use the 'DPRINTF((...));' form like in FW stuff +rf_freelist.h -- get rid of those macro usage !!! + +rf_driver.c +rf_memchunk.c +rf_openbsdkintf.c +rf_revent.c +rf_states.c +rf_threadstuff.c + diff --git a/sys/dev/raidframe/rf_acctrace.c b/sys/dev/raidframe/rf_acctrace.c index c570fce74e8..d95b47c5e61 100644 --- a/sys/dev/raidframe/rf_acctrace.c +++ b/sys/dev/raidframe/rf_acctrace.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_acctrace.c,v 1.3 2000/01/07 14:50:19 peter Exp $ */ +/* $OpenBSD: rf_acctrace.c,v 1.4 2002/12/16 07:01:02 tdeval Exp $ */ /* $NetBSD: rf_acctrace.c,v 1.4 1999/08/13 03:41:52 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,7 +30,7 @@ /***************************************************************************** * - * acctrace.c -- code to support collecting information about each access + * acctrace.c -- Code to support collecting information about each access. * *****************************************************************************/ @@ -52,77 +53,84 @@ static int accessTraceBufCount = 0; static RF_AccTraceEntry_t *access_tracebuf; static long traceCount; -int rf_stopCollectingTraces; -RF_DECLARE_MUTEX(rf_tracing_mutex) - int rf_trace_fd; +int rf_stopCollectingTraces; +RF_DECLARE_MUTEX(rf_tracing_mutex); +int rf_trace_fd; - static void rf_ShutdownAccessTrace(void *); +void rf_ShutdownAccessTrace(void *); - static void rf_ShutdownAccessTrace(ignored) - void *ignored; +void +rf_ShutdownAccessTrace(void *ignored) { if (rf_accessTraceBufSize) { if (accessTraceBufCount) rf_FlushAccessTraceBuf(); - RF_Free(access_tracebuf, rf_accessTraceBufSize * sizeof(RF_AccTraceEntry_t)); + RF_Free(access_tracebuf, rf_accessTraceBufSize * + sizeof(RF_AccTraceEntry_t)); } rf_mutex_destroy(&rf_tracing_mutex); } -int -rf_ConfigureAccessTrace(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureAccessTrace(RF_ShutdownList_t **listp) { - int rc; + int rc; numTracesSoFar = accessTraceBufCount = rf_stopCollectingTraces = 0; if (rf_accessTraceBufSize) { - RF_Malloc(access_tracebuf, rf_accessTraceBufSize * sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); + RF_Malloc(access_tracebuf, rf_accessTraceBufSize * + sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); accessTraceBufCount = 0; } traceCount = 0; numTracesSoFar = 0; rc = rf_mutex_init(&rf_tracing_mutex); if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d.\n", + __FILE__, __LINE__, rc); } rc = rf_ShutdownCreate(listp, rf_ShutdownAccessTrace, NULL); if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + RF_ERRORMSG3("Unable to add to shutdown list file %s line %d" + " rc=%d.\n", __FILE__, __LINE__, rc); if (rf_accessTraceBufSize) { - RF_Free(access_tracebuf, rf_accessTraceBufSize * sizeof(RF_AccTraceEntry_t)); + RF_Free(access_tracebuf, rf_accessTraceBufSize * + sizeof(RF_AccTraceEntry_t)); rf_mutex_destroy(&rf_tracing_mutex); } } return (rc); } -/* install a trace record. cause a flush to disk or to the trace collector daemon - * if the trace buffer is at least 1/2 full. + +/* + * Install a trace record. Cause a flush to disk or to the trace collector + * daemon if the trace buffer is at least 1/2 full. */ -void -rf_LogTraceRec(raid, rec) - RF_Raid_t *raid; - RF_AccTraceEntry_t *rec; +void +rf_LogTraceRec(RF_Raid_t *raid, RF_AccTraceEntry_t *rec) { RF_AccTotals_t *acc = &raid->acc_totals; #if 0 RF_Etimer_t timer; - int i, n; + int i, n; #endif - if (rf_stopCollectingTraces || ((rf_maxNumTraces >= 0) && (numTracesSoFar >= rf_maxNumTraces))) + if (rf_stopCollectingTraces || ((rf_maxNumTraces >= 0) && + (numTracesSoFar >= rf_maxNumTraces))) return; - /* update AccTotals for this device */ + /* Update AccTotals for this device. */ if (!raid->keep_acc_totals) return; + acc->num_log_ents++; if (rec->reconacc) { - acc->recon_start_to_fetch_us += rec->specific.recon.recon_start_to_fetch_us; - acc->recon_fetch_to_return_us += rec->specific.recon.recon_fetch_to_return_us; - acc->recon_return_to_submit_us += rec->specific.recon.recon_return_to_submit_us; + acc->recon_start_to_fetch_us += + rec->specific.recon.recon_start_to_fetch_us; + acc->recon_fetch_to_return_us += + rec->specific.recon.recon_fetch_to_return_us; + acc->recon_return_to_submit_us += + rec->specific.recon.recon_return_to_submit_us; acc->recon_num_phys_ios += rec->num_phys_ios; acc->recon_phys_io_us += rec->phys_io_us; acc->recon_diskwait_us += rec->diskwait_us; @@ -130,11 +138,15 @@ rf_LogTraceRec(raid, rec) } else { RF_HIST_ADD(acc->tot_hist, rec->total_us); RF_HIST_ADD(acc->dw_hist, rec->diskwait_us); - /* count of physical ios which are too big. often due to - * thermal recalibration */ - /* if bigvals > 0, you should probably ignore this data set */ + /* + * Count of physical IOs that are too big. (often due to + * thermal recalibration) + * + * If bigvals > 0, you should probably ignore this data set. + */ if (rec->diskwait_us > 100000) acc->bigvals++; + acc->total_us += rec->total_us; acc->suspend_ovhd_us += rec->specific.user.suspend_ovhd_us; acc->map_us += rec->specific.user.map_us; @@ -156,12 +168,13 @@ rf_LogTraceRec(raid, rec) } -/* assumes the tracing mutex is locked at entry. In order to allow this to be called - * from interrupt context, we don't do any copyouts here, but rather just wake trace - * buffer collector thread. +/* + * Assumes the tracing mutex is locked at entry. In order to allow this to + * be called from interrupt context, we don't do any copyouts here, but rather + * just wake the trace buffer collector thread. */ -void -rf_FlushAccessTraceBuf() +void +rf_FlushAccessTraceBuf(void) { accessTraceBufCount = 0; } diff --git a/sys/dev/raidframe/rf_acctrace.h b/sys/dev/raidframe/rf_acctrace.h index f7ca09eb173..4c31bd17d32 100644 --- a/sys/dev/raidframe/rf_acctrace.h +++ b/sys/dev/raidframe/rf_acctrace.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_acctrace.h,v 1.2 1999/02/16 00:02:22 niklas Exp $ */ +/* $OpenBSD: rf_acctrace.h,v 1.3 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_acctrace.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,106 +30,133 @@ /***************************************************************************** * - * acctrace.h -- header file for acctrace.c + * acctrace.h -- Header file for acctrace.c * *****************************************************************************/ -#ifndef _RF__RF_ACCTRACE_H_ -#define _RF__RF_ACCTRACE_H_ +#ifndef _RF__RF_ACCTRACE_H_ +#define _RF__RF_ACCTRACE_H_ #include "rf_types.h" #include "rf_hist.h" #include "rf_etimer.h" typedef struct RF_user_acc_stats_s { - RF_uint64 suspend_ovhd_us; /* us spent mucking in the - * access-suspension code */ - RF_uint64 map_us; /* us spent mapping the access */ - RF_uint64 lock_us; /* us spent locking & unlocking stripes, - * including time spent blocked */ - RF_uint64 dag_create_us;/* us spent creating the DAGs */ - RF_uint64 dag_retry_us; /* _total_ us spent retrying the op -- not - * broken down into components */ - RF_uint64 exec_us; /* us spent in DispatchDAG */ - RF_uint64 exec_engine_us; /* us spent in engine, not including - * blocking time */ - RF_uint64 cleanup_us; /* us spent tearing down the dag & maps, and - * generally cleaning up */ -} RF_user_acc_stats_t; + RF_uint64 suspend_ovhd_us;/* + * usec spent mucking in the + * access-suspension code. + */ + RF_uint64 map_us; /* usec spent mapping the access. */ + RF_uint64 lock_us; /* + * usec spent locking & unlocking + * stripes, including time spent + * blocked. + */ + RF_uint64 dag_create_us; /* usec spent creating the DAGs. */ + RF_uint64 dag_retry_us; /* + * _total_ usec spent retrying the op + * -- not broken down into components. + */ + RF_uint64 exec_us; /* usec spent in DispatchDAG. */ + RF_uint64 exec_engine_us; /* + * usec spent in engine, not including + * blocking time. + */ + RF_uint64 cleanup_us; /* + * usec spent tearing down the dag & + * maps, and generally cleaning up. + */ +} RF_user_acc_stats_t; typedef struct RF_recon_acc_stats_s { - RF_uint32 recon_start_to_fetch_us; - RF_uint32 recon_fetch_to_return_us; - RF_uint32 recon_return_to_submit_us; -} RF_recon_acc_stats_t; + RF_uint32 recon_start_to_fetch_us; + RF_uint32 recon_fetch_to_return_us; + RF_uint32 recon_return_to_submit_us; +} RF_recon_acc_stats_t; typedef struct RF_acctrace_entry_s { union { - RF_user_acc_stats_t user; - RF_recon_acc_stats_t recon; - } specific; - RF_uint8 reconacc; /* whether this is a tracerec for a user acc - * or a recon acc */ - RF_uint64 xor_us; /* us spent doing XORs */ - RF_uint64 q_us; /* us spent doing XORs */ - RF_uint64 plog_us; /* us spent waiting to stuff parity into log */ - RF_uint64 diskqueue_us; /* _total_ us spent in disk queue(s), incl - * concurrent ops */ - RF_uint64 diskwait_us; /* _total_ us spent waiting actually waiting - * on the disk, incl concurrent ops */ - RF_uint64 total_us; /* total us spent on this access */ - RF_uint64 num_phys_ios; /* number of physical I/Os invoked */ - RF_uint64 phys_io_us; /* time of physical I/O */ - RF_Etimer_t tot_timer; /* a timer used to compute total access time */ - RF_Etimer_t timer; /* a generic timer val for timing events that - * live across procedure boundaries */ - RF_Etimer_t recon_timer;/* generic timer for recon stuff */ - RF_uint64 index; -} RF_AccTraceEntry_t; + RF_user_acc_stats_t user; + RF_recon_acc_stats_t recon; + } specific; + RF_uint8 reconacc; /* + * Whether this is a tracerec for a + * user acc or a recon acc. + */ + RF_uint64 xor_us; /* usec spent doing XORs. */ + RF_uint64 q_us; /* usec spent doing XORs. */ + RF_uint64 plog_us; /* + * usec spent waiting to stuff parity + * into log. + */ + RF_uint64 diskqueue_us; /* + * _total_ usec spent in disk queue(s), + * incl concurrent ops. + */ + RF_uint64 diskwait_us; /* + * _total_ usec spent actually waiting + * on the disk, incl concurrent ops. + */ + RF_uint64 total_us; /* Total usec spent on this access. */ + RF_uint64 num_phys_ios; /* Number of physical I/Os invoked. */ + RF_uint64 phys_io_us; /* Time of physical I/O. */ + RF_Etimer_t tot_timer; /* + * A timer used to compute total + * access time. + */ + RF_Etimer_t timer; /* + * A generic timer val for timing + * events that live across procedure + * boundaries. + */ + RF_Etimer_t recon_timer; /* Generic timer for recon stuff. */ + RF_uint64 index; +} RF_AccTraceEntry_t; typedef struct RF_AccTotals_s { - /* user acc stats */ - RF_uint64 suspend_ovhd_us; - RF_uint64 map_us; - RF_uint64 lock_us; - RF_uint64 dag_create_us; - RF_uint64 dag_retry_us; - RF_uint64 exec_us; - RF_uint64 exec_engine_us; - RF_uint64 cleanup_us; - RF_uint64 user_reccount; - /* recon acc stats */ - RF_uint64 recon_start_to_fetch_us; - RF_uint64 recon_fetch_to_return_us; - RF_uint64 recon_return_to_submit_us; - RF_uint64 recon_io_overflow_count; - RF_uint64 recon_phys_io_us; - RF_uint64 recon_num_phys_ios; - RF_uint64 recon_diskwait_us; - RF_uint64 recon_reccount; - /* trace entry stats */ - RF_uint64 xor_us; - RF_uint64 q_us; - RF_uint64 plog_us; - RF_uint64 diskqueue_us; - RF_uint64 diskwait_us; - RF_uint64 total_us; - RF_uint64 num_log_ents; - RF_uint64 phys_io_overflow_count; - RF_uint64 num_phys_ios; - RF_uint64 phys_io_us; - RF_uint64 bigvals; - /* histograms */ - RF_Hist_t dw_hist[RF_HIST_NUM_BUCKETS]; - RF_Hist_t tot_hist[RF_HIST_NUM_BUCKETS]; -} RF_AccTotals_t; -#if RF_UTILITY == 0 -RF_DECLARE_EXTERN_MUTEX(rf_tracing_mutex) -#endif /* RF_UTILITY == 0 */ + /* User acc stats. */ + RF_uint64 suspend_ovhd_us; + RF_uint64 map_us; + RF_uint64 lock_us; + RF_uint64 dag_create_us; + RF_uint64 dag_retry_us; + RF_uint64 exec_us; + RF_uint64 exec_engine_us; + RF_uint64 cleanup_us; + RF_uint64 user_reccount; + /* Recon acc stats. */ + RF_uint64 recon_start_to_fetch_us; + RF_uint64 recon_fetch_to_return_us; + RF_uint64 recon_return_to_submit_us; + RF_uint64 recon_io_overflow_count; + RF_uint64 recon_phys_io_us; + RF_uint64 recon_num_phys_ios; + RF_uint64 recon_diskwait_us; + RF_uint64 recon_reccount; + /* Trace entry stats. */ + RF_uint64 xor_us; + RF_uint64 q_us; + RF_uint64 plog_us; + RF_uint64 diskqueue_us; + RF_uint64 diskwait_us; + RF_uint64 total_us; + RF_uint64 num_log_ents; + RF_uint64 phys_io_overflow_count; + RF_uint64 num_phys_ios; + RF_uint64 phys_io_us; + RF_uint64 bigvals; + /* Histograms. */ + RF_Hist_t dw_hist[RF_HIST_NUM_BUCKETS]; + RF_Hist_t tot_hist[RF_HIST_NUM_BUCKETS]; +} RF_AccTotals_t; + +#if RF_UTILITY == 0 +RF_DECLARE_EXTERN_MUTEX(rf_tracing_mutex); +#endif /* RF_UTILITY == 0 */ - int rf_ConfigureAccessTrace(RF_ShutdownList_t ** listp); - void rf_LogTraceRec(RF_Raid_t * raid, RF_AccTraceEntry_t * rec); - void rf_FlushAccessTraceBuf(void); +int rf_ConfigureAccessTrace(RF_ShutdownList_t **); +void rf_LogTraceRec(RF_Raid_t * raid, RF_AccTraceEntry_t *); +void rf_FlushAccessTraceBuf(void); -#endif /* !_RF__RF_ACCTRACE_H_ */ +#endif /* !_RF__RF_ACCTRACE_H_ */ diff --git a/sys/dev/raidframe/rf_alloclist.c b/sys/dev/raidframe/rf_alloclist.c index f0ddd462625..9da2da7feaa 100644 --- a/sys/dev/raidframe/rf_alloclist.c +++ b/sys/dev/raidframe/rf_alloclist.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_alloclist.c,v 1.3 2000/01/07 14:50:19 peter Exp $ */ +/* $OpenBSD: rf_alloclist.c,v 1.4 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_alloclist.c,v 1.4 1999/08/13 03:41:53 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,10 +30,10 @@ /**************************************************************************** * - * Alloclist.c -- code to manipulate allocation lists + * Alloclist.c -- Code to manipulate allocation lists. * - * an allocation list is just a list of AllocListElem structures. Each - * such structure contains a fixed-size array of pointers. Calling + * An allocation list is just a list of AllocListElem structures. Each + * such structure contains a fixed-size array of pointers. Calling * FreeAList() causes each pointer to be freed. * ***************************************************************************/ @@ -45,20 +46,20 @@ #include "rf_general.h" #include "rf_shutdown.h" -RF_DECLARE_STATIC_MUTEX(alist_mutex) - static unsigned int fl_hit_count, fl_miss_count; +RF_DECLARE_STATIC_MUTEX(alist_mutex); +static unsigned int fl_hit_count, fl_miss_count; - static RF_AllocListElem_t *al_free_list = NULL; - static int al_free_list_count; +static RF_AllocListElem_t *al_free_list = NULL; +static int al_free_list_count; -#define RF_AL_FREELIST_MAX 256 +#define RF_AL_FREELIST_MAX 256 -#define DO_FREE(_p,_sz) RF_Free((_p),(_sz)) +#define DO_FREE(_p,_sz) RF_Free((_p), (_sz)) - static void rf_ShutdownAllocList(void *); +void rf_ShutdownAllocList(void *); - static void rf_ShutdownAllocList(ignored) - void *ignored; +void +rf_ShutdownAllocList(void *ignored) { RF_AllocListElem_t *p, *pt; @@ -69,30 +70,30 @@ RF_DECLARE_STATIC_MUTEX(alist_mutex) } rf_mutex_destroy(&alist_mutex); /* - printf("Alloclist: Free list hit count %lu (%lu %%) miss count %lu (%lu %%)\n", - fl_hit_count, (100*fl_hit_count)/(fl_hit_count+fl_miss_count), - fl_miss_count, (100*fl_miss_count)/(fl_hit_count+fl_miss_count)); - */ + * printf("Alloclist: Free list hit count %lu (%lu %%) miss count %lu" + * " (%lu %%).\n", fl_hit_count, + * (100*fl_hit_count)/(fl_hit_count+fl_miss_count), + * fl_miss_count, (100*fl_miss_count)/(fl_hit_count+fl_miss_count)); + */ } -int -rf_ConfigureAllocList(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureAllocList(RF_ShutdownList_t **listp) { - int rc; + int rc; rc = rf_mutex_init(&alist_mutex); if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d.\n", + __FILE__, __LINE__, rc); return (rc); } al_free_list = NULL; fl_hit_count = fl_miss_count = al_free_list_count = 0; rc = rf_ShutdownCreate(listp, rf_ShutdownAllocList, NULL); if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); + RF_ERRORMSG3("Unable to add to shutdown list file %s line %d" + " rc=%d.\n", __FILE__, __LINE__, rc); rf_mutex_destroy(&alist_mutex); return (rc); } @@ -100,23 +101,21 @@ rf_ConfigureAllocList(listp) } -/* we expect the lists to have at most one or two elements, so we're willing - * to search for the end. If you ever observe the lists growing longer, +/* + * We expect the lists to have at most one or two elements, so we're willing + * to search for the end. If you ever observe the lists growing longer, * increase POINTERS_PER_ALLOC_LIST_ELEMENT. */ -void -rf_real_AddToAllocList(l, p, size, lockflag) - RF_AllocListElem_t *l; - void *p; - int size; - int lockflag; +void +rf_real_AddToAllocList(RF_AllocListElem_t *l, void *p, int size, int lockflag) { RF_AllocListElem_t *newelem; for (; l->next; l = l->next) - RF_ASSERT(l->numPointers == RF_POINTERS_PER_ALLOC_LIST_ELEMENT); /* find end of list */ + RF_ASSERT(l->numPointers == RF_POINTERS_PER_ALLOC_LIST_ELEMENT); /* Find end of list. */ - RF_ASSERT(l->numPointers >= 0 && l->numPointers <= RF_POINTERS_PER_ALLOC_LIST_ELEMENT); + RF_ASSERT(l->numPointers >= 0 && + l->numPointers <= RF_POINTERS_PER_ALLOC_LIST_ELEMENT); if (l->numPointers == RF_POINTERS_PER_ALLOC_LIST_ELEMENT) { newelem = rf_real_MakeAllocList(lockflag); l->next = newelem; @@ -129,22 +128,23 @@ rf_real_AddToAllocList(l, p, size, lockflag) } -/* we use the debug_mem_mutex here because we need to lock it anyway to call free. - * this is probably a bug somewhere else in the code, but when I call malloc/free - * outside of any lock I have endless trouble with malloc appearing to return the - * same pointer twice. Since we have to lock it anyway, we might as well use it - * as the lock around the al_free_list. Note that we can't call Free with the - * debug_mem_mutex locked. +/* + * We use the debug_mem_mutex here because we need to lock it anyway to call + * free. This is probably a bug somewhere else in the code, but when I call + * malloc/free outside of any lock, I have endless trouble with malloc + * appearing to return the same pointer twice. Since we have to lock it + * anyway, we might as well use it as the lock around the al_free_list. + * Note that we can't call Free with the debug_mem_mutex locked. */ -void -rf_FreeAllocList(l) - RF_AllocListElem_t *l; +void +rf_FreeAllocList(RF_AllocListElem_t *l) { - int i; + int i; RF_AllocListElem_t *temp, *p; for (p = l; p; p = p->next) { - RF_ASSERT(p->numPointers >= 0 && p->numPointers <= RF_POINTERS_PER_ALLOC_LIST_ELEMENT); + RF_ASSERT(p->numPointers >= 0 && + p->numPointers <= RF_POINTERS_PER_ALLOC_LIST_ELEMENT); for (i = 0; i < p->numPointers; i++) { RF_ASSERT(p->pointers[i]); RF_Free(p->pointers[i], p->sizes[i]); @@ -164,8 +164,7 @@ rf_FreeAllocList(l) } RF_AllocListElem_t * -rf_real_MakeAllocList(lockflag) - int lockflag; +rf_real_MakeAllocList(int lockflag) { RF_AllocListElem_t *p; @@ -176,9 +175,12 @@ rf_real_MakeAllocList(lockflag) al_free_list_count--; } else { fl_miss_count++; - RF_Malloc(p, sizeof(RF_AllocListElem_t), (RF_AllocListElem_t *)); /* no allocation locking - * in kernel, so this is - * fine */ + RF_Malloc(p, sizeof(RF_AllocListElem_t), + (RF_AllocListElem_t *)); /* + * No allocation locking + * in kernel, so this is + * fine. + */ } if (p == NULL) { return (NULL); diff --git a/sys/dev/raidframe/rf_alloclist.h b/sys/dev/raidframe/rf_alloclist.h index 8426b1cd7fd..3f7874e5a25 100644 --- a/sys/dev/raidframe/rf_alloclist.h +++ b/sys/dev/raidframe/rf_alloclist.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_alloclist.h,v 1.2 1999/02/16 00:02:23 niklas Exp $ */ +/* $OpenBSD: rf_alloclist.h,v 1.3 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_alloclist.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,32 +30,33 @@ /**************************************************************************** * - * alloclist.h -- header file for alloclist.c + * alloclist.h -- Header file for alloclist.c * ***************************************************************************/ -#ifndef _RF__RF_ALLOCLIST_H_ -#define _RF__RF_ALLOCLIST_H_ +#ifndef _RF__RF_ALLOCLIST_H_ +#define _RF__RF_ALLOCLIST_H_ #include "rf_types.h" -#define RF_POINTERS_PER_ALLOC_LIST_ELEMENT 20 +#define RF_POINTERS_PER_ALLOC_LIST_ELEMENT 20 struct RF_AllocListElem_s { - void *pointers[RF_POINTERS_PER_ALLOC_LIST_ELEMENT]; - int sizes[RF_POINTERS_PER_ALLOC_LIST_ELEMENT]; - int numPointers; - RF_AllocListElem_t *next; + void *pointers[RF_POINTERS_PER_ALLOC_LIST_ELEMENT]; + int sizes[RF_POINTERS_PER_ALLOC_LIST_ELEMENT]; + int numPointers; + RF_AllocListElem_t *next; }; -#define rf_MakeAllocList(_ptr_) _ptr_ = rf_real_MakeAllocList(1); -#define rf_AddToAllocList(_l_,_ptr_,_sz_) rf_real_AddToAllocList((_l_), (_ptr_), (_sz_), 1) +#define rf_MakeAllocList(_ptr_) _ptr_ = rf_real_MakeAllocList(1); +#define rf_AddToAllocList(_l_,_ptr_,_sz_) \ + rf_real_AddToAllocList((_l_), (_ptr_), (_sz_), 1) -int rf_ConfigureAllocList(RF_ShutdownList_t ** listp); +int rf_ConfigureAllocList(RF_ShutdownList_t **); -#if RF_UTILITY == 0 -void rf_real_AddToAllocList(RF_AllocListElem_t * l, void *p, int size, int lockflag); -void rf_FreeAllocList(RF_AllocListElem_t * l); -RF_AllocListElem_t *rf_real_MakeAllocList(int lockflag); -#endif /* RF_UTILITY == 0 */ +#if RF_UTILITY == 0 +void rf_real_AddToAllocList(RF_AllocListElem_t *, void *, int, int); +void rf_FreeAllocList(RF_AllocListElem_t *); +RF_AllocListElem_t *rf_real_MakeAllocList(int); +#endif /* RF_UTILITY == 0 */ -#endif /* !_RF__RF_ALLOCLIST_H_ */ +#endif /* !_RF__RF_ALLOCLIST_H_ */ diff --git a/sys/dev/raidframe/rf_archs.h b/sys/dev/raidframe/rf_archs.h index 700746d15f0..72fb7891dda 100644 --- a/sys/dev/raidframe/rf_archs.h +++ b/sys/dev/raidframe/rf_archs.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_archs.h,v 1.5 2000/08/08 16:07:38 peter Exp $ */ +/* $OpenBSD: rf_archs.h,v 1.6 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_archs.h,v 1.9 2000/03/04 03:27:13 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,46 +28,46 @@ * rights to redistribute these changes. */ -/* rf_archs.h -- defines for which architectures you want to - * include is some particular build of raidframe. Unfortunately, +/* + * rf_archs.h -- Defines for which architectures you want to + * include in some particular build of RAIDframe. Unfortunately, * it's difficult to exclude declustering, P+Q, and distributed * sparing because the code is intermixed with RAID5 code. This * should be fixed. * - * this is really intended only for use in the kernel, where I + * This is really intended only for use in the kernel, where I * am worried about the size of the object module. At user level and * in the simulator, I don't really care that much, so all the * architectures can be compiled together. Note that by itself, turning * off these defines does not affect the size of the executable; you * have to edit the makefile for that. * - * comment out any line below to eliminate that architecture. - * the list below includes all the modules that can be compiled + * Comment out any line below to eliminate that architecture. + * The list below includes all the modules that can be compiled * out. - * */ -#ifndef _RF__RF_ARCHS_H_ -#define _RF__RF_ARCHS_H_ +#ifndef _RF__RF_ARCHS_H_ +#define _RF__RF_ARCHS_H_ -#define RF_INCLUDE_EVENODD 1 +#define RF_INCLUDE_EVENODD 1 -#define RF_INCLUDE_RAID5_RS 1 -#define RF_INCLUDE_PARITYLOGGING 1 +#define RF_INCLUDE_RAID5_RS 1 +#define RF_INCLUDE_PARITYLOGGING 1 -#define RF_INCLUDE_CHAINDECLUSTER 1 -#define RF_INCLUDE_INTERDECLUSTER 1 +#define RF_INCLUDE_CHAINDECLUSTER 1 +#define RF_INCLUDE_INTERDECLUSTER 1 -#define RF_INCLUDE_RAID0 1 -#define RF_INCLUDE_RAID1 1 -#define RF_INCLUDE_RAID4 1 -#define RF_INCLUDE_RAID5 1 -#define RF_INCLUDE_RAID6 0 -#define RF_INCLUDE_DECL_PQ 0 +#define RF_INCLUDE_RAID0 1 +#define RF_INCLUDE_RAID1 1 +#define RF_INCLUDE_RAID4 1 +#define RF_INCLUDE_RAID5 1 +#define RF_INCLUDE_RAID6 1 +#define RF_INCLUDE_DECL_PQ 1 -#define RF_MEMORY_REDZONES 0 -#define RF_RECON_STATS 1 +#define RF_MEMORY_REDZONES 0 +#define RF_RECON_STATS 1 #include "rf_options.h" -#endif /* !_RF__RF_ARCHS_H_ */ +#endif /* !_RF__RF_ARCHS_H_ */ diff --git a/sys/dev/raidframe/rf_aselect.c b/sys/dev/raidframe/rf_aselect.c index b50be26171b..e376909ec8b 100644 --- a/sys/dev/raidframe/rf_aselect.c +++ b/sys/dev/raidframe/rf_aselect.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_aselect.c,v 1.2 1999/02/16 00:02:23 niklas Exp $ */ +/* $OpenBSD: rf_aselect.c,v 1.3 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_aselect.c,v 1.3 1999/02/05 00:06:06 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,7 +28,7 @@ * rights to redistribute these changes. */ -/***************************************************************************** +/**************************************************************************** * * aselect.c -- algorithm selection code * @@ -45,28 +46,25 @@ #include "rf_map.h" #if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) -/* the function below is not used... so don't define it! */ +/* The function below is not used... so don't define it! */ #else -static void TransferDagMemory(RF_DagHeader_t *, RF_DagHeader_t *); +void rf_TransferDagMemory(RF_DagHeader_t *, RF_DagHeader_t *); #endif -static int InitHdrNode(RF_DagHeader_t **, RF_Raid_t *, int); -static void UpdateNodeHdrPtr(RF_DagHeader_t *, RF_DagNode_t *); -int rf_SelectAlgorithm(RF_RaidAccessDesc_t *, RF_RaidAccessFlags_t); +int rf_InitHdrNode(RF_DagHeader_t **, RF_Raid_t *, int); +void rf_UpdateNodeHdrPtr(RF_DagHeader_t *, RF_DagNode_t *); +int rf_SelectAlgorithm(RF_RaidAccessDesc_t *, RF_RaidAccessFlags_t); -/****************************************************************************** +/***************************************************************************** * - * Create and Initialiaze a dag header and termination node + * Create and Initialize a dag header and termination node. * *****************************************************************************/ -static int -InitHdrNode(hdr, raidPtr, memChunkEnable) - RF_DagHeader_t **hdr; - RF_Raid_t *raidPtr; - int memChunkEnable; +int +rf_InitHdrNode(RF_DagHeader_t **hdr, RF_Raid_t *raidPtr, int memChunkEnable) { - /* create and initialize dag hdr */ + /* Create and initialize dag hdr. */ *hdr = rf_AllocDAGHeader(); rf_MakeAllocList((*hdr)->allocList); if ((*hdr)->allocList == NULL) { @@ -79,61 +77,70 @@ InitHdrNode(hdr, raidPtr, memChunkEnable) (*hdr)->next = NULL; return (0); } -/****************************************************************************** + + +/***************************************************************************** * - * Transfer allocation list and mem chunks from one dag to another + * Transfer allocation list and mem chunks from one dag to another. * *****************************************************************************/ #if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) -/* the function below is not used... so don't define it! */ +/* The function below is not used... so don't define it! */ #else -static void -TransferDagMemory(daga, dagb) - RF_DagHeader_t *daga; - RF_DagHeader_t *dagb; +void +rf_TransferDagMemory(RF_DagHeader_t *daga, RF_DagHeader_t *dagb) { RF_AccessStripeMapHeader_t *end; RF_AllocListElem_t *p; - int i, memChunksXfrd = 0, xtraChunksXfrd = 0; + int i, memChunksXfrd = 0, xtraChunksXfrd = 0; - /* transfer allocList from dagb to daga */ + /* Transfer allocList from dagb to daga. */ for (p = dagb->allocList; p; p = p->next) { for (i = 0; i < p->numPointers; i++) { - rf_AddToAllocList(daga->allocList, p->pointers[i], p->sizes[i]); + rf_AddToAllocList(daga->allocList, p->pointers[i], + p->sizes[i]); p->pointers[i] = NULL; p->sizes[i] = 0; } p->numPointers = 0; } - /* transfer chunks from dagb to daga */ - while ((memChunksXfrd + xtraChunksXfrd < dagb->chunkIndex + dagb->xtraChunkIndex) && (daga->chunkIndex < RF_MAXCHUNKS)) { - /* stuff chunks into daga's memChunk array */ + /* Transfer chunks from dagb to daga. */ + while ((memChunksXfrd + xtraChunksXfrd < + dagb->chunkIndex + dagb->xtraChunkIndex) && + (daga->chunkIndex < RF_MAXCHUNKS)) { + /* Stuff chunks into daga's memChunk array. */ if (memChunksXfrd < dagb->chunkIndex) { - daga->memChunk[daga->chunkIndex++] = dagb->memChunk[memChunksXfrd]; + daga->memChunk[daga->chunkIndex++] = + dagb->memChunk[memChunksXfrd]; dagb->memChunk[memChunksXfrd++] = NULL; } else { - daga->memChunk[daga->xtraChunkIndex++] = dagb->xtraMemChunk[xtraChunksXfrd]; + daga->memChunk[daga->xtraChunkIndex++] = + dagb->xtraMemChunk[xtraChunksXfrd]; dagb->xtraMemChunk[xtraChunksXfrd++] = NULL; } } - /* use escape hatch to hold excess chunks */ - while (memChunksXfrd + xtraChunksXfrd < dagb->chunkIndex + dagb->xtraChunkIndex) { + /* Use escape hatch to hold excess chunks. */ + while (memChunksXfrd + xtraChunksXfrd < + dagb->chunkIndex + dagb->xtraChunkIndex) { if (memChunksXfrd < dagb->chunkIndex) { - daga->xtraMemChunk[daga->xtraChunkIndex++] = dagb->memChunk[memChunksXfrd]; + daga->xtraMemChunk[daga->xtraChunkIndex++] = + dagb->memChunk[memChunksXfrd]; dagb->memChunk[memChunksXfrd++] = NULL; } else { - daga->xtraMemChunk[daga->xtraChunkIndex++] = dagb->xtraMemChunk[xtraChunksXfrd]; + daga->xtraMemChunk[daga->xtraChunkIndex++] = + dagb->xtraMemChunk[xtraChunksXfrd]; dagb->xtraMemChunk[xtraChunksXfrd++] = NULL; } } - RF_ASSERT((memChunksXfrd == dagb->chunkIndex) && (xtraChunksXfrd == dagb->xtraChunkIndex)); + RF_ASSERT((memChunksXfrd == dagb->chunkIndex) && + (xtraChunksXfrd == dagb->xtraChunkIndex)); RF_ASSERT(daga->chunkIndex <= RF_MAXCHUNKS); RF_ASSERT(daga->xtraChunkIndex <= daga->xtraChunkCnt); dagb->chunkIndex = 0; dagb->xtraChunkIndex = 0; - /* transfer asmList from dagb to daga */ + /* Transfer asmList from dagb to daga. */ if (dagb->asmList) { if (daga->asmList) { end = daga->asmList; @@ -145,35 +152,36 @@ TransferDagMemory(daga, dagb) dagb->asmList = NULL; } } -#endif /* __NetBSD__ || __OpenBSD__ */ +#endif /* __NetBSD__ || __OpenBSD__ */ + -/***************************************************************************************** +/***************************************************************************** * - * Ensure that all node->dagHdr fields in a dag are consistent + * Ensure that all node->dagHdr fields in a dag are consistent. * - * IMPORTANT: This routine recursively searches all succedents of the node. If a - * succedent is encountered whose dagHdr ptr does not require adjusting, that node's - * succedents WILL NOT BE EXAMINED. + * IMPORTANT: This routine recursively searches all succedents of the node. + * If a succedent is encountered whose dagHdr ptr does not require adjusting, + * that node's succedents WILL NOT BE EXAMINED. * - ****************************************************************************************/ -static void -UpdateNodeHdrPtr(hdr, node) - RF_DagHeader_t *hdr; - RF_DagNode_t *node; + *****************************************************************************/ +void +rf_UpdateNodeHdrPtr(RF_DagHeader_t *hdr, RF_DagNode_t *node) { - int i; + int i; RF_ASSERT(hdr != NULL && node != NULL); for (i = 0; i < node->numSuccedents; i++) if (node->succedents[i]->dagHdr != hdr) - UpdateNodeHdrPtr(hdr, node->succedents[i]); + rf_UpdateNodeHdrPtr(hdr, node->succedents[i]); node->dagHdr = hdr; } -/****************************************************************************** + + +/***************************************************************************** * * Create a DAG to do a read or write operation. * - * create an array of dagLists, one list per parity stripe. - * return the lists in the array desc->dagArray. + * Create an array of dagLists, one list per parity stripe. + * Return the lists in the array desc->dagArray. * * Normally, each list contains one dag for the entire stripe. In some * tricky cases, we break this into multiple dags, either one per stripe @@ -181,7 +189,7 @@ UpdateNodeHdrPtr(hdr, node) * as a linked list (dagList) which is executed sequentially (to preserve * atomic parity updates in the stripe). * - * dags which operate on independent parity goups (stripes) are returned in + * Dags that operate on independent parity goups (stripes) are returned in * independent dagLists (distinct elements in desc->dagArray) and may be * executed concurrently. * @@ -193,42 +201,40 @@ UpdateNodeHdrPtr(hdr, node) * 2) create dags and concatenate/merge to form the final dag. * * Because dag's are basic blocks (single entry, single exit, unconditional - * control flow, we can add the following optimizations (future work): + * control flow), we can add the following optimizations (future work): * first-pass optimizer to allow max concurrency (need all data dependencies) * second-pass optimizer to eliminate common subexpressions (need true - * data dependencies) + * data dependencies) * third-pass optimizer to eliminate dead code (need true data dependencies) *****************************************************************************/ -#define MAXNSTRIPES 50 +#define MAXNSTRIPES 50 -int -rf_SelectAlgorithm(desc, flags) - RF_RaidAccessDesc_t *desc; - RF_RaidAccessFlags_t flags; +int +rf_SelectAlgorithm(RF_RaidAccessDesc_t *desc, RF_RaidAccessFlags_t flags) { RF_AccessStripeMapHeader_t *asm_h = desc->asmap; RF_IoType_t type = desc->type; RF_Raid_t *raidPtr = desc->raidPtr; - void *bp = desc->bp; + void *bp = desc->bp; RF_AccessStripeMap_t *asmap = asm_h->stripeMap; RF_AccessStripeMap_t *asm_p; RF_DagHeader_t *dag_h = NULL, *tempdag_h, *lastdag_h; - int i, j, k; + int i, j, k; RF_VoidFuncPtr *stripeFuncs, normalStripeFuncs[MAXNSTRIPES]; RF_AccessStripeMap_t *asm_up, *asm_bp; RF_AccessStripeMapHeader_t ***asmh_u, *endASMList; RF_AccessStripeMapHeader_t ***asmh_b; RF_VoidFuncPtr **stripeUnitFuncs, uFunc; RF_VoidFuncPtr **blockFuncs, bFunc; - int numStripesBailed = 0, cantCreateDAGs = RF_FALSE; - int numStripeUnitsBailed = 0; - int stripeNum, numUnitDags = 0, stripeUnitNum, numBlockDags = 0; + int numStripesBailed = 0, cantCreateDAGs = RF_FALSE; + int numStripeUnitsBailed = 0; + int stripeNum, numUnitDags = 0, stripeUnitNum, numBlockDags = 0; RF_StripeNum_t numStripeUnits; RF_SectorNum_t numBlocks; RF_RaidAddr_t address; - int length; + int length; RF_PhysDiskAddr_t *physPtr; caddr_t buffer; @@ -237,97 +243,176 @@ rf_SelectAlgorithm(desc, flags) stripeUnitFuncs = NULL; blockFuncs = NULL; - /* get an array of dag-function creation pointers, try to avoid - * calling malloc */ + /* + * Get an array of dag-function creation pointers. + * Try to avoid calling malloc. + */ if (asm_h->numStripes <= MAXNSTRIPES) stripeFuncs = normalStripeFuncs; else - RF_Calloc(stripeFuncs, asm_h->numStripes, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr *)); + RF_Calloc(stripeFuncs, asm_h->numStripes, + sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr *)); - /* walk through the asm list once collecting information */ - /* attempt to find a single creation function for each stripe */ + /* + * Walk through the asm list once collecting information. + * Attempt to find a single creation function for each stripe. + */ desc->numStripes = 0; for (i = 0, asm_p = asmap; asm_p; asm_p = asm_p->next, i++) { desc->numStripes++; - (raidPtr->Layout.map->SelectionFunc) (raidPtr, type, asm_p, &stripeFuncs[i]); - /* check to see if we found a creation func for this stripe */ + (raidPtr->Layout.map->SelectionFunc) (raidPtr, type, asm_p, + &stripeFuncs[i]); + /* Check to see if we found a creation func for this stripe. */ if (stripeFuncs[i] == (RF_VoidFuncPtr) NULL) { - /* could not find creation function for entire stripe - * so, let's see if we can find one for each stripe - * unit in the stripe */ + /* + * Could not find creation function for entire stripe. + * So, let's see if we can find one for each stripe + * unit in the stripe. + */ if (numStripesBailed == 0) { - /* one stripe map header for each stripe we - * bail on */ - RF_Malloc(asmh_u, sizeof(RF_AccessStripeMapHeader_t **) * asm_h->numStripes, (RF_AccessStripeMapHeader_t ***)); - /* create an array of ptrs to arrays of - * stripeFuncs */ - RF_Calloc(stripeUnitFuncs, asm_h->numStripes, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr **)); + /* + * One stripe map header for each stripe we + * bail on. + */ + RF_Malloc(asmh_u, + sizeof(RF_AccessStripeMapHeader_t **) * + asm_h->numStripes, + (RF_AccessStripeMapHeader_t ***)); + /* + * Create an array of ptrs to arrays of + * stripeFuncs. + */ + RF_Calloc(stripeUnitFuncs, asm_h->numStripes, + sizeof(RF_VoidFuncPtr), + (RF_VoidFuncPtr **)); } - /* create an array of creation funcs (called - * stripeFuncs) for this stripe */ + /* + * Create an array of creation funcs (called + * stripeFuncs) for this stripe. + */ numStripeUnits = asm_p->numStripeUnitsAccessed; - RF_Calloc(stripeUnitFuncs[numStripesBailed], numStripeUnits, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr *)); - RF_Malloc(asmh_u[numStripesBailed], numStripeUnits * sizeof(RF_AccessStripeMapHeader_t *), (RF_AccessStripeMapHeader_t **)); - - /* lookup array of stripeUnitFuncs for this stripe */ - for (j = 0, physPtr = asm_p->physInfo; physPtr; physPtr = physPtr->next, j++) { - /* remap for series of single stripe-unit - * accesses */ + RF_Calloc(stripeUnitFuncs[numStripesBailed], + numStripeUnits, sizeof(RF_VoidFuncPtr), + (RF_VoidFuncPtr *)); + RF_Malloc(asmh_u[numStripesBailed], numStripeUnits * + sizeof(RF_AccessStripeMapHeader_t *), + (RF_AccessStripeMapHeader_t **)); + + /* Lookup array of stripeUnitFuncs for this stripe. */ + for (j = 0, physPtr = asm_p->physInfo; physPtr; + physPtr = physPtr->next, j++) { + /* + * Remap for series of single stripe-unit + * accesses. + */ address = physPtr->raidAddress; length = physPtr->numSector; buffer = physPtr->bufPtr; - asmh_u[numStripesBailed][j] = rf_MapAccess(raidPtr, address, length, buffer, RF_DONT_REMAP); + asmh_u[numStripesBailed][j] = + rf_MapAccess(raidPtr, address, length, + buffer, RF_DONT_REMAP); asm_up = asmh_u[numStripesBailed][j]->stripeMap; - /* get the creation func for this stripe unit */ - (raidPtr->Layout.map->SelectionFunc) (raidPtr, type, asm_up, &(stripeUnitFuncs[numStripesBailed][j])); - - /* check to see if we found a creation func - * for this stripe unit */ - if (stripeUnitFuncs[numStripesBailed][j] == (RF_VoidFuncPtr) NULL) { - /* could not find creation function - * for stripe unit so, let's see if we - * can find one for each block in the - * stripe unit */ + /* + * Get the creation func for this + * stripe unit. + */ + (raidPtr->Layout.map->SelectionFunc) (raidPtr, + type, asm_up, + &(stripeUnitFuncs[numStripesBailed][j])); + + /* + * Check to see if we found a creation func + * for this stripe unit. + */ + if (stripeUnitFuncs[numStripesBailed][j] == + (RF_VoidFuncPtr) NULL) { + /* + * Could not find creation function + * for stripe unit. So, let's see if + * we can find one for each block in + * the stripe unit. + */ if (numStripeUnitsBailed == 0) { - /* one stripe map header for - * each stripe unit we bail on */ - RF_Malloc(asmh_b, sizeof(RF_AccessStripeMapHeader_t **) * asm_h->numStripes * raidPtr->Layout.numDataCol, (RF_AccessStripeMapHeader_t ***)); - /* create an array of ptrs to - * arrays of blockFuncs */ - RF_Calloc(blockFuncs, asm_h->numStripes * raidPtr->Layout.numDataCol, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr **)); + /* + * one stripe map header for + * each stripe unit we bail on. + */ + RF_Malloc(asmh_b, + sizeof(RF_AccessStripeMapHeader_t **) * + asm_h->numStripes * + raidPtr->Layout.numDataCol, + (RF_AccessStripeMapHeader_t ***)); + /* + * Create an array of ptrs to + * arrays of blockFuncs. + */ + RF_Calloc(blockFuncs, + asm_h->numStripes * + raidPtr->Layout.numDataCol, + sizeof(RF_VoidFuncPtr), + (RF_VoidFuncPtr **)); } - /* create an array of creation funcs + /* + * Create an array of creation funcs * (called blockFuncs) for this stripe - * unit */ + * unit. + */ numBlocks = physPtr->numSector; numBlockDags += numBlocks; - RF_Calloc(blockFuncs[numStripeUnitsBailed], numBlocks, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr *)); - RF_Malloc(asmh_b[numStripeUnitsBailed], numBlocks * sizeof(RF_AccessStripeMapHeader_t *), (RF_AccessStripeMapHeader_t **)); - - /* lookup array of blockFuncs for this - * stripe unit */ + RF_Calloc( + blockFuncs[numStripeUnitsBailed], + numBlocks, sizeof(RF_VoidFuncPtr), + (RF_VoidFuncPtr *)); + RF_Malloc(asmh_b[numStripeUnitsBailed], + numBlocks * + sizeof(RF_AccessStripeMapHeader_t *), + (RF_AccessStripeMapHeader_t **)); + + /* + * Lookup array of blockFuncs for this + * stripe unit. + */ for (k = 0; k < numBlocks; k++) { - /* remap for series of single - * stripe-unit accesses */ - address = physPtr->raidAddress + k; + /* + * Remap for series of single + * stripe-unit accesses. + */ + address = physPtr->raidAddress + + k; length = 1; - buffer = physPtr->bufPtr + (k * (1 << raidPtr->logBytesPerSector)); - - asmh_b[numStripeUnitsBailed][k] = rf_MapAccess(raidPtr, address, length, buffer, RF_DONT_REMAP); - asm_bp = asmh_b[numStripeUnitsBailed][k]->stripeMap; - - /* get the creation func for - * this stripe unit */ - (raidPtr->Layout.map->SelectionFunc) (raidPtr, type, asm_bp, &(blockFuncs[numStripeUnitsBailed][k])); - - /* check to see if we found a + buffer = physPtr->bufPtr + + (k * (1 << + raidPtr->logBytesPerSector)); + + asmh_b[numStripeUnitsBailed][k] + = rf_MapAccess(raidPtr, + address, length, buffer, + RF_DONT_REMAP); + asm_bp = + asmh_b[numStripeUnitsBailed][k]->stripeMap; + + /* + * Get the creation func for + * this stripe unit. + */ + (raidPtr->Layout.map-> + SelectionFunc) (raidPtr, + type, asm_bp, + &(blockFuncs[numStripeUnitsBailed][k])); + + /* + * Check to see if we found a * creation func for this - * stripe unit */ - if (blockFuncs[numStripeUnitsBailed][k] == NULL) - cantCreateDAGs = RF_TRUE; + * stripe unit. + */ + if (blockFuncs + [numStripeUnitsBailed][k] + == NULL) + cantCreateDAGs = + RF_TRUE; } numStripeUnitsBailed++; } else { @@ -340,85 +425,125 @@ rf_SelectAlgorithm(desc, flags) } if (cantCreateDAGs) { - /* free memory and punt */ + /* Free memory and punt. */ if (asm_h->numStripes > MAXNSTRIPES) - RF_Free(stripeFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr)); + RF_Free(stripeFuncs, asm_h->numStripes * + sizeof(RF_VoidFuncPtr)); if (numStripesBailed > 0) { stripeNum = 0; - for (i = 0, asm_p = asmap; asm_p; asm_p = asm_p->next, i++) + for (i = 0, asm_p = asmap; asm_p; + asm_p = asm_p->next, i++) if (stripeFuncs[i] == NULL) { - numStripeUnits = asm_p->numStripeUnitsAccessed; + numStripeUnits = + asm_p->numStripeUnitsAccessed; for (j = 0; j < numStripeUnits; j++) - rf_FreeAccessStripeMap(asmh_u[stripeNum][j]); - RF_Free(asmh_u[stripeNum], numStripeUnits * sizeof(RF_AccessStripeMapHeader_t *)); - RF_Free(stripeUnitFuncs[stripeNum], numStripeUnits * sizeof(RF_VoidFuncPtr)); + rf_FreeAccessStripeMap( + asmh_u[stripeNum][j]); + RF_Free(asmh_u[stripeNum], + numStripeUnits * + sizeof(RF_AccessStripeMapHeader_t *)); + RF_Free(stripeUnitFuncs[stripeNum], + numStripeUnits * + sizeof(RF_VoidFuncPtr)); stripeNum++; } RF_ASSERT(stripeNum == numStripesBailed); - RF_Free(stripeUnitFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr)); - RF_Free(asmh_u, asm_h->numStripes * sizeof(RF_AccessStripeMapHeader_t **)); + RF_Free(stripeUnitFuncs, asm_h->numStripes * + sizeof(RF_VoidFuncPtr)); + RF_Free(asmh_u, asm_h->numStripes * + sizeof(RF_AccessStripeMapHeader_t **)); } return (1); } else { - /* begin dag creation */ + /* Begin dag creation. */ stripeNum = 0; stripeUnitNum = 0; - /* create an array of dagLists and fill them in */ - RF_CallocAndAdd(desc->dagArray, desc->numStripes, sizeof(RF_DagList_t), (RF_DagList_t *), desc->cleanupList); + /* Create an array of dagLists and fill them in. */ + RF_CallocAndAdd(desc->dagArray, desc->numStripes, + sizeof(RF_DagList_t), (RF_DagList_t *), desc->cleanupList); for (i = 0, asm_p = asmap; asm_p; asm_p = asm_p->next, i++) { - /* grab dag header for this stripe */ + /* Grab dag header for this stripe. */ dag_h = NULL; desc->dagArray[i].desc = desc; if (stripeFuncs[i] == (RF_VoidFuncPtr) NULL) { - /* use bailout functions for this stripe */ - for (j = 0, physPtr = asm_p->physInfo; physPtr; physPtr = physPtr->next, j++) { + /* Use bailout functions for this stripe. */ + for (j = 0, physPtr = asm_p->physInfo; physPtr; + physPtr = physPtr->next, j++) { uFunc = stripeUnitFuncs[stripeNum][j]; if (uFunc == (RF_VoidFuncPtr) NULL) { - /* use bailout functions for - * this stripe unit */ - for (k = 0; k < physPtr->numSector; k++) { - /* create a dag for - * this block */ - InitHdrNode(&tempdag_h, raidPtr, rf_useMemChunks); - desc->dagArray[i].numDags++; + /* + * Use bailout functions for + * this stripe unit. + */ + for (k = 0; k < + physPtr->numSector; k++) { + /* + * Create a dag for + * this block. + */ + rf_InitHdrNode( + &tempdag_h, + raidPtr, + rf_useMemChunks); + desc->dagArray[i]. + numDags++; if (dag_h == NULL) { - dag_h = tempdag_h; + dag_h = + tempdag_h; } else { - lastdag_h->next = tempdag_h; + lastdag_h->next + = tempdag_h; } lastdag_h = tempdag_h; - bFunc = blockFuncs[stripeUnitNum][k]; + bFunc = blockFuncs + [stripeUnitNum][k]; RF_ASSERT(bFunc); - asm_bp = asmh_b[stripeUnitNum][k]->stripeMap; - (*bFunc) (raidPtr, asm_bp, tempdag_h, bp, flags, tempdag_h->allocList); + asm_bp = asmh_b + [stripeUnitNum][k] + ->stripeMap; + (*bFunc) (raidPtr, + asm_bp, tempdag_h, + bp, flags, + tempdag_h + ->allocList); } stripeUnitNum++; } else { - /* create a dag for this unit */ - InitHdrNode(&tempdag_h, raidPtr, rf_useMemChunks); + /* + * Create a dag for this unit. + */ + rf_InitHdrNode(&tempdag_h, + raidPtr, rf_useMemChunks); desc->dagArray[i].numDags++; if (dag_h == NULL) { dag_h = tempdag_h; } else { - lastdag_h->next = tempdag_h; + lastdag_h->next = + tempdag_h; } lastdag_h = tempdag_h; - asm_up = asmh_u[stripeNum][j]->stripeMap; - (*uFunc) (raidPtr, asm_up, tempdag_h, bp, flags, tempdag_h->allocList); + asm_up = asmh_u[stripeNum][j] + ->stripeMap; + (*uFunc) (raidPtr, asm_up, + tempdag_h, bp, flags, + tempdag_h->allocList); } } RF_ASSERT(j == asm_p->numStripeUnitsAccessed); - /* merge linked bailout dag to existing dag - * collection */ + /* + * Merge linked bailout dag to existing dag + * collection. + */ stripeNum++; } else { - /* Create a dag for this parity stripe */ - InitHdrNode(&tempdag_h, raidPtr, rf_useMemChunks); + /* Create a dag for this parity stripe. */ + rf_InitHdrNode(&tempdag_h, raidPtr, + rf_useMemChunks); desc->dagArray[i].numDags++; if (dag_h == NULL) { dag_h = tempdag_h; @@ -427,15 +552,17 @@ rf_SelectAlgorithm(desc, flags) } lastdag_h = tempdag_h; - (stripeFuncs[i]) (raidPtr, asm_p, tempdag_h, bp, flags, tempdag_h->allocList); + (stripeFuncs[i]) (raidPtr, asm_p, tempdag_h, + bp, flags, tempdag_h->allocList); } desc->dagArray[i].dags = dag_h; } RF_ASSERT(i == desc->numStripes); - /* free memory */ + /* Free memory. */ if (asm_h->numStripes > MAXNSTRIPES) - RF_Free(stripeFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr)); + RF_Free(stripeFuncs, asm_h->numStripes * + sizeof(RF_VoidFuncPtr)); if ((numStripesBailed > 0) || (numStripeUnitsBailed > 0)) { stripeNum = 0; stripeUnitNum = 0; @@ -445,49 +572,85 @@ rf_SelectAlgorithm(desc, flags) endASMList = endASMList->next; } else endASMList = NULL; - /* walk through io, stripe by stripe */ - for (i = 0, asm_p = asmap; asm_p; asm_p = asm_p->next, i++) + /* Walk through io, stripe by stripe. */ + for (i = 0, asm_p = asmap; asm_p; + asm_p = asm_p->next, i++) if (stripeFuncs[i] == NULL) { - numStripeUnits = asm_p->numStripeUnitsAccessed; - /* walk through stripe, stripe unit by - * stripe unit */ - for (j = 0, physPtr = asm_p->physInfo; physPtr; physPtr = physPtr->next, j++) { - if (stripeUnitFuncs[stripeNum][j] == NULL) { - numBlocks = physPtr->numSector; - /* walk through stripe + numStripeUnits = + asm_p->numStripeUnitsAccessed; + /* + * Walk through stripe, stripe unit by + * stripe unit. + */ + for (j = 0, physPtr = asm_p->physInfo; + physPtr; + physPtr = physPtr->next, j++) { + if (stripeUnitFuncs[stripeNum] + [j] == NULL) { + numBlocks = + physPtr->numSector; + /* + * Walk through stripe * unit, block by - * block */ - for (k = 0; k < numBlocks; k++) - if (dag_h->asmList == NULL) { - dag_h->asmList = asmh_b[stripeUnitNum][k]; - endASMList = dag_h->asmList; + * block. + */ + for (k = 0; k < + numBlocks; k++) + if (dag_h + ->asmList + == NULL) { + dag_h->asmList = + asmh_b[stripeUnitNum][k]; + endASMList = dag_h->asmList; } else { - endASMList->next = asmh_b[stripeUnitNum][k]; - endASMList = endASMList->next; + endASMList->next = + asmh_b[stripeUnitNum][k]; + endASMList = endASMList->next; } - RF_Free(asmh_b[stripeUnitNum], numBlocks * sizeof(RF_AccessStripeMapHeader_t *)); - RF_Free(blockFuncs[stripeUnitNum], numBlocks * sizeof(RF_VoidFuncPtr)); + RF_Free(asmh_b + [stripeUnitNum], numBlocks * + sizeof(RF_AccessStripeMapHeader_t *)); + RF_Free(blockFuncs + [stripeUnitNum], numBlocks * + sizeof(RF_VoidFuncPtr)); stripeUnitNum++; } if (dag_h->asmList == NULL) { - dag_h->asmList = asmh_u[stripeNum][j]; - endASMList = dag_h->asmList; + dag_h->asmList = asmh_u + [stripeNum][j]; + endASMList = dag_h + ->asmList; } else { - endASMList->next = asmh_u[stripeNum][j]; - endASMList = endASMList->next; + endASMList->next = + asmh_u[stripeNum] + [j]; + endASMList = endASMList + ->next; } } - RF_Free(asmh_u[stripeNum], numStripeUnits * sizeof(RF_AccessStripeMapHeader_t *)); - RF_Free(stripeUnitFuncs[stripeNum], numStripeUnits * sizeof(RF_VoidFuncPtr)); + RF_Free(asmh_u[stripeNum], + numStripeUnits * + sizeof( + RF_AccessStripeMapHeader_t *)); + RF_Free(stripeUnitFuncs[stripeNum], + numStripeUnits * + sizeof(RF_VoidFuncPtr)); stripeNum++; } RF_ASSERT(stripeNum == numStripesBailed); - RF_Free(stripeUnitFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr)); - RF_Free(asmh_u, asm_h->numStripes * sizeof(RF_AccessStripeMapHeader_t **)); + RF_Free(stripeUnitFuncs, asm_h->numStripes * + sizeof(RF_VoidFuncPtr)); + RF_Free(asmh_u, asm_h->numStripes * + sizeof(RF_AccessStripeMapHeader_t **)); if (numStripeUnitsBailed > 0) { - RF_ASSERT(stripeUnitNum == numStripeUnitsBailed); - RF_Free(blockFuncs, raidPtr->Layout.numDataCol * asm_h->numStripes * sizeof(RF_VoidFuncPtr)); - RF_Free(asmh_b, raidPtr->Layout.numDataCol * asm_h->numStripes * sizeof(RF_AccessStripeMapHeader_t **)); + RF_ASSERT(stripeUnitNum == + numStripeUnitsBailed); + RF_Free(blockFuncs, raidPtr->Layout.numDataCol + * asm_h->numStripes * + sizeof(RF_VoidFuncPtr)); + RF_Free(asmh_b, raidPtr->Layout.numDataCol * + asm_h->numStripes * + sizeof(RF_AccessStripeMapHeader_t **)); } } return (0); diff --git a/sys/dev/raidframe/rf_aselect.h b/sys/dev/raidframe/rf_aselect.h index 565f042ab53..87a893f487b 100644 --- a/sys/dev/raidframe/rf_aselect.h +++ b/sys/dev/raidframe/rf_aselect.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_aselect.h,v 1.2 1999/02/16 00:02:24 niklas Exp $ */ +/* $OpenBSD: rf_aselect.h,v 1.3 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_aselect.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,15 +30,15 @@ /***************************************************************************** * - * aselect.h -- header file for algorithm selection code + * rf_aselect.h -- Header file for algorithm selection code. * *****************************************************************************/ -#ifndef _RF__RF_ASELECT_H_ -#define _RF__RF_ASELECT_H_ +#ifndef _RF__RF_ASELECT_H_ +#define _RF__RF_ASELECT_H_ #include "rf_desc.h" -int rf_SelectAlgorithm(RF_RaidAccessDesc_t * desc, RF_RaidAccessFlags_t flags); +int rf_SelectAlgorithm(RF_RaidAccessDesc_t *, RF_RaidAccessFlags_t); -#endif /* !_RF__RF_ASELECT_H_ */ +#endif /* !_RF__RF_ASELECT_H_ */ diff --git a/sys/dev/raidframe/rf_callback.c b/sys/dev/raidframe/rf_callback.c index ba7e3869c10..5c973bb2252 100644 --- a/sys/dev/raidframe/rf_callback.c +++ b/sys/dev/raidframe/rf_callback.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_callback.c,v 1.2 1999/02/16 00:02:24 niklas Exp $ */ +/* $OpenBSD: rf_callback.c,v 1.3 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_callback.c,v 1.3 1999/02/05 00:06:06 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,11 +28,11 @@ * rights to redistribute these changes. */ -/***************************************************************************************** +/***************************************************************************** * - * callback.c -- code to manipulate callback descriptor + * rf_callback.c -- Code to manipulate callback descriptor. * - ****************************************************************************************/ + *****************************************************************************/ #include "rf_types.h" @@ -43,23 +44,22 @@ static RF_FreeList_t *rf_callback_freelist; -#define RF_MAX_FREE_CALLBACK 64 -#define RF_CALLBACK_INC 4 -#define RF_CALLBACK_INITIAL 4 +void rf_ShutdownCallback(void *); + +#define RF_MAX_FREE_CALLBACK 64 +#define RF_CALLBACK_INC 4 +#define RF_CALLBACK_INITIAL 4 -static void rf_ShutdownCallback(void *); -static void -rf_ShutdownCallback(ignored) - void *ignored; +void +rf_ShutdownCallback(void *ignored) { RF_FREELIST_DESTROY(rf_callback_freelist, next, (RF_CallbackDesc_t *)); } -int -rf_ConfigureCallback(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureCallback(RF_ShutdownList_t **listp) { - int rc; + int rc; RF_FREELIST_CREATE(rf_callback_freelist, RF_MAX_FREE_CALLBACK, RF_CALLBACK_INC, sizeof(RF_CallbackDesc_t)); @@ -67,8 +67,8 @@ rf_ConfigureCallback(listp) return (ENOMEM); rc = rf_ShutdownCreate(listp, rf_ShutdownCallback, NULL); if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + RF_ERRORMSG3("Unable to add to shutdown list file %s line %d" + " rc=%d.\n", __FILE__, __LINE__, rc); rf_ShutdownCallback(NULL); return (rc); } @@ -78,7 +78,7 @@ rf_ConfigureCallback(listp) } RF_CallbackDesc_t * -rf_AllocCallbackDesc() +rf_AllocCallbackDesc(void) { RF_CallbackDesc_t *p; @@ -86,9 +86,8 @@ rf_AllocCallbackDesc() return (p); } -void -rf_FreeCallbackDesc(p) - RF_CallbackDesc_t *p; +void +rf_FreeCallbackDesc(RF_CallbackDesc_t *p) { RF_FREELIST_FREE(rf_callback_freelist, p, next); } diff --git a/sys/dev/raidframe/rf_callback.h b/sys/dev/raidframe/rf_callback.h index 528eed625b9..b0ac8830ce1 100644 --- a/sys/dev/raidframe/rf_callback.h +++ b/sys/dev/raidframe/rf_callback.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_callback.h,v 1.2 1999/02/16 00:02:24 niklas Exp $ */ +/* $OpenBSD: rf_callback.h,v 1.3 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_callback.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,39 +28,37 @@ * rights to redistribute these changes. */ -/***************************************************************************************** +/***************************************************************************** * - * callback.h -- header file for callback.c + * rf_callback.h -- Header file for callback.c * - * the reconstruction code must manage concurrent I/Os on multiple drives. - * it sometimes needs to suspend operation on a particular drive until some - * condition occurs. we can't block the thread, of course, or we wouldn't - * be able to manage our other outstanding I/Os. Instead we just suspend + * The reconstruction code must manage concurrent I/Os on multiple drives. + * It sometimes needs to suspend operation on a particular drive until some + * condition occurs. We can't block the thread, of course, or we wouldn't + * be able to manage our other outstanding I/Os. Instead we just suspend * new activity on the indicated disk, and create a callback descriptor and * put it someplace where it will get invoked when the condition that's - * stalling us has cleared. When the descriptor is invoked, it will call + * stalling us has cleared. When the descriptor is invoked, it will call * a function that will restart operation on the indicated disk. * - ****************************************************************************************/ + *****************************************************************************/ -#ifndef _RF__RF_CALLBACK_H_ -#define _RF__RF_CALLBACK_H_ +#ifndef _RF__RF_CALLBACK_H_ +#define _RF__RF_CALLBACK_H_ #include "rf_types.h" struct RF_CallbackDesc_s { - void (*callbackFunc) (RF_CBParam_t); /* function to call */ - RF_CBParam_t callbackArg; /* args to give to function, or just - * info about this callback */ - RF_CBParam_t callbackArg2; - RF_RowCol_t row; /* disk row and column IDs to give to the - * callback func */ - RF_RowCol_t col; - RF_CallbackDesc_t *next;/* next entry in list */ + void (*callbackFunc) (RF_CBParam_t); /* Function to call. */ + RF_CBParam_t callbackArg; /* Args to give to function, or */ + RF_CBParam_t callbackArg2; /* just info about this callback. */ + RF_RowCol_t row; /* Disk row and column IDs to */ + RF_RowCol_t col; /* give to the callback func. */ + RF_CallbackDesc_t *next; /* Next entry in list. */ }; -int rf_ConfigureCallback(RF_ShutdownList_t ** listp); +int rf_ConfigureCallback(RF_ShutdownList_t **); RF_CallbackDesc_t *rf_AllocCallbackDesc(void); -void rf_FreeCallbackDesc(RF_CallbackDesc_t * p); +void rf_FreeCallbackDesc(RF_CallbackDesc_t *); -#endif /* !_RF__RF_CALLBACK_H_ */ +#endif /* !_RF__RF_CALLBACK_H_ */ diff --git a/sys/dev/raidframe/rf_chaindecluster.c b/sys/dev/raidframe/rf_chaindecluster.c index 6eab8c5f937..b761cb4100d 100644 --- a/sys/dev/raidframe/rf_chaindecluster.c +++ b/sys/dev/raidframe/rf_chaindecluster.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_chaindecluster.c,v 1.3 2000/01/11 18:02:20 peter Exp $ */ +/* $OpenBSD: rf_chaindecluster.c,v 1.4 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_chaindecluster.c,v 1.4 2000/01/07 03:40:56 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,7 +28,7 @@ * rights to redistribute these changes. */ -/****************************************************************************** +/***************************************************************************** * * rf_chaindecluster.c -- implements chained declustering * @@ -47,32 +48,35 @@ #include "rf_utils.h" typedef struct RF_ChaindeclusterConfigInfo_s { - RF_RowCol_t **stripeIdentifier; /* filled in at config time and used - * by IdentifyStripe */ - RF_StripeCount_t numSparingRegions; - RF_StripeCount_t stripeUnitsPerSparingRegion; - RF_SectorNum_t mirrorStripeOffset; -} RF_ChaindeclusterConfigInfo_t; - -int -rf_ConfigureChainDecluster( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) + RF_RowCol_t **stripeIdentifier; /* + * Filled in at config time and + * used by IdentifyStripe. + */ + RF_StripeCount_t numSparingRegions; + RF_StripeCount_t stripeUnitsPerSparingRegion; + RF_SectorNum_t mirrorStripeOffset; +} RF_ChaindeclusterConfigInfo_t; + + +int +rf_ConfigureChainDecluster(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, + RF_Config_t *cfgPtr) { RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; RF_StripeCount_t num_used_stripeUnitsPerDisk; RF_ChaindeclusterConfigInfo_t *info; RF_RowCol_t i; - /* create a Chained Declustering configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_ChaindeclusterConfigInfo_t), (RF_ChaindeclusterConfigInfo_t *), raidPtr->cleanupList); + /* Create a Chained Declustering configuration structure. */ + RF_MallocAndAdd(info, sizeof(RF_ChaindeclusterConfigInfo_t), + (RF_ChaindeclusterConfigInfo_t *), raidPtr->cleanupList); if (info == NULL) return (ENOMEM); layoutPtr->layoutSpecificInfo = (void *) info; - /* fill in the config structure. */ - info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, 2, raidPtr->cleanupList); + /* Fill in the config structure. */ + info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, 2, + raidPtr->cleanupList); if (info->stripeIdentifier == NULL) return (ENOMEM); for (i = 0; i < raidPtr->numCol; i++) { @@ -82,60 +86,63 @@ rf_ConfigureChainDecluster( RF_ASSERT(raidPtr->numRow == 1); - /* fill in the remaining layout parameters */ - num_used_stripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk - (layoutPtr->stripeUnitsPerDisk % - (2 * raidPtr->numCol - 2)); - info->numSparingRegions = num_used_stripeUnitsPerDisk / (2 * raidPtr->numCol - 2); - info->stripeUnitsPerSparingRegion = raidPtr->numCol * (raidPtr->numCol - 1); - info->mirrorStripeOffset = info->numSparingRegions * (raidPtr->numCol - 1); - layoutPtr->numStripe = info->numSparingRegions * info->stripeUnitsPerSparingRegion; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; + /* Fill in the remaining layout parameters. */ + num_used_stripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk - + (layoutPtr->stripeUnitsPerDisk % (2 * raidPtr->numCol - 2)); + info->numSparingRegions = num_used_stripeUnitsPerDisk / + (2 * raidPtr->numCol - 2); + info->stripeUnitsPerSparingRegion = raidPtr->numCol * + (raidPtr->numCol - 1); + info->mirrorStripeOffset = info->numSparingRegions * + (raidPtr->numCol - 1); + layoutPtr->numStripe = info->numSparingRegions * + info->stripeUnitsPerSparingRegion; + layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << + raidPtr->logBytesPerSector; layoutPtr->numDataCol = 1; - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; + layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * + layoutPtr->sectorsPerStripeUnit; layoutPtr->numParityCol = 1; layoutPtr->dataStripeUnitsPerDisk = num_used_stripeUnitsPerDisk; - raidPtr->sectorsPerDisk = - num_used_stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; + raidPtr->sectorsPerDisk = num_used_stripeUnitsPerDisk * + layoutPtr->sectorsPerStripeUnit; - raidPtr->totalSectors = - (layoutPtr->numStripe) * layoutPtr->sectorsPerStripeUnit; + raidPtr->totalSectors = (layoutPtr->numStripe) * + layoutPtr->sectorsPerStripeUnit; - layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit; + layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / + layoutPtr->sectorsPerStripeUnit; return (0); } -RF_ReconUnitCount_t -rf_GetNumSpareRUsChainDecluster(raidPtr) - RF_Raid_t *raidPtr; +RF_ReconUnitCount_t +rf_GetNumSpareRUsChainDecluster(RF_Raid_t *raidPtr) { - RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) + raidPtr->Layout.layoutSpecificInfo; /* - * The layout uses two stripe units per disk as spare within each - * sparing region. - */ + * The layout uses two stripe units per disk as spare within each + * sparing region. + */ return (2 * info->numSparingRegions); } -/* Maps to the primary copy of the data, i.e. the first mirror pair */ -void -rf_MapSectorChainDecluster( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) +/* Maps to the primary copy of the data, i.e. the first mirror pair. */ +void +rf_MapSectorChainDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap) { - RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) + raidPtr->Layout.layoutSpecificInfo; RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; RF_SectorNum_t index_within_region, index_within_disk; RF_StripeNum_t sparing_region_id; - int col_before_remap; + int col_before_remap; *row = 0; sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; @@ -145,83 +152,93 @@ rf_MapSectorChainDecluster( if (!remap) { *col = col_before_remap; - *diskSector = (index_within_disk + ((raidPtr->numCol - 1) * sparing_region_id)) * - raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + *diskSector = (index_within_disk + ((raidPtr->numCol - 1) * + sparing_region_id)) * raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += (raidSector % + raidPtr->Layout.sectorsPerStripeUnit); } else { - /* remap sector to spare space... */ - *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidPtr->numCol - 1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + /* Remap sector to spare space... */ + *diskSector = sparing_region_id * (raidPtr->numCol + 1) * + raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += (raidPtr->numCol - 1) * + raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += (raidSector % + raidPtr->Layout.sectorsPerStripeUnit); index_within_disk = index_within_region / raidPtr->numCol; if (index_within_disk < col_before_remap) *col = index_within_disk; else if (index_within_disk == raidPtr->numCol - 2) { - *col = (col_before_remap + raidPtr->numCol - 1) % raidPtr->numCol; - *diskSector += raidPtr->Layout.sectorsPerStripeUnit; + *col = (col_before_remap + raidPtr->numCol - 1) + % raidPtr->numCol; + *diskSector += + raidPtr->Layout.sectorsPerStripeUnit; } else - *col = (index_within_disk + 2) % raidPtr->numCol; + *col = (index_within_disk + 2) % + raidPtr->numCol; } } - -/* Maps to the second copy of the mirror pair, which is chain declustered. The second copy is contained - in the next disk (mod numCol) after the disk containing the primary copy. - The offset into the disk is one-half disk down */ -void -rf_MapParityChainDecluster( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) +/* + * Maps to the second copy of the mirror pair, which is chain declustered. + * The second copy is contained in the next disk (mod numCol) after the disk + * containing the primary copy. + * The offset into the disk is one-half disk down. + */ +void +rf_MapParityChainDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap) { - RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) + raidPtr->Layout.layoutSpecificInfo; RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; RF_SectorNum_t index_within_region, index_within_disk; RF_StripeNum_t sparing_region_id; - int col_before_remap; + int col_before_remap; *row = 0; if (!remap) { *col = SUID % raidPtr->numCol; *col = (*col + 1) % raidPtr->numCol; - *diskSector = info->mirrorStripeOffset * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (SUID / raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + *diskSector = info->mirrorStripeOffset * + raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += (SUID / raidPtr->numCol) * + raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += (raidSector % + raidPtr->Layout.sectorsPerStripeUnit); } else { - /* remap parity to spare space ... */ + /* Remap parity to spare space... */ sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; index_within_region = SUID % info->stripeUnitsPerSparingRegion; index_within_disk = index_within_region / raidPtr->numCol; - *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + *diskSector = sparing_region_id * (raidPtr->numCol + 1) * + raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += (raidPtr->numCol) * + raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += (raidSector % + raidPtr->Layout.sectorsPerStripeUnit); col_before_remap = SUID % raidPtr->numCol; if (index_within_disk < col_before_remap) *col = index_within_disk; else if (index_within_disk == raidPtr->numCol - 2) { *col = (col_before_remap + 2) % raidPtr->numCol; - *diskSector -= raidPtr->Layout.sectorsPerStripeUnit; + *diskSector -= + raidPtr->Layout.sectorsPerStripeUnit; } else - *col = (index_within_disk + 2) % raidPtr->numCol; + *col = (index_within_disk + 2) % + raidPtr->numCol; } - } -void -rf_IdentifyStripeChainDecluster( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) +void +rf_IdentifyStripeChainDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t addr, + RF_RowCol_t **diskids, RF_RowCol_t *outRow) { - RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) + raidPtr->Layout.layoutSpecificInfo; RF_StripeNum_t SUID; RF_RowCol_t col; @@ -231,57 +248,57 @@ rf_IdentifyStripeChainDecluster( *diskids = info->stripeIdentifier[col]; } -void -rf_MapSIDToPSIDChainDecluster( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru) +void +rf_MapSIDToPSIDChainDecluster(RF_RaidLayout_t *layoutPtr, + RF_StripeNum_t stripeID, RF_StripeNum_t *psID, RF_ReconUnitNum_t *which_ru) { *which_ru = 0; *psID = stripeID; } -/****************************************************************************** - * select a graph to perform a single-stripe access + + +/**************************************************************************** + * Select a graph to perform a single-stripe access. * - * Parameters: raidPtr - description of the physical array - * type - type of operation (read or write) requested - * asmap - logical & physical addresses for this access - * createFunc - function to use to create the graph (return value) + * Parameters: raidPtr - description of the physical array + * type - type of operation (read or write) requested + * asmap - logical & physical addresses for this access + * createFunc - function to use to create the graph (return value) *****************************************************************************/ -void -rf_RAIDCDagSelect( - RF_Raid_t * raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t * asmap, - RF_VoidFuncPtr * createFunc) -#if 0 - void (**createFunc) (RF_Raid_t *, RF_AccessStripeMap_t *, - RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, -/**INDENT** Warning@258: Extra ) */ - RF_AllocListElem_t *)) -#endif +void +rf_RAIDCDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type, + RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc) { RF_ASSERT(RF_IO_IS_R_OR_W(type)); RF_ASSERT(raidPtr->numRow == 1); if (asmap->numDataFailed + asmap->numParityFailed > 1) { - RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); + RF_ERRORMSG("Multiple disks failed in a single group !" + " Aborting I/O operation.\n"); *createFunc = NULL; return; } - *createFunc = (type == RF_IO_TYPE_READ) ? (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG : (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; + *createFunc = (type == RF_IO_TYPE_READ) ? + (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG : + (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; if (type == RF_IO_TYPE_READ) { - if ((raidPtr->status[0] == rf_rs_degraded) || (raidPtr->status[0] == rf_rs_reconstructing)) - *createFunc = (RF_VoidFuncPtr) rf_CreateRaidCDegradedReadDAG; /* array status is - * degraded, implement - * workload shifting */ + if ((raidPtr->status[0] == rf_rs_degraded) || + (raidPtr->status[0] == rf_rs_reconstructing)) + /* + * Array status is degraded, + * implement workload shifting. + */ + *createFunc = (RF_VoidFuncPtr) + rf_CreateRaidCDegradedReadDAG; else - *createFunc = (RF_VoidFuncPtr) rf_CreateMirrorPartitionReadDAG; /* array status not - * degraded, so use - * mirror partition dag */ + /* + * Array status not degraded, + * so use mirror partition dag. + */ + *createFunc = (RF_VoidFuncPtr) + rf_CreateMirrorPartitionReadDAG; } else *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; } diff --git a/sys/dev/raidframe/rf_chaindecluster.h b/sys/dev/raidframe/rf_chaindecluster.h index f8105d177d8..9a8005467ab 100644 --- a/sys/dev/raidframe/rf_chaindecluster.h +++ b/sys/dev/raidframe/rf_chaindecluster.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_chaindecluster.h,v 1.2 1999/02/16 00:02:26 niklas Exp $ */ +/* $OpenBSD: rf_chaindecluster.h,v 1.3 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_chaindecluster.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,44 +28,27 @@ * rights to redistribute these changes. */ -/* rf_chaindecluster.h - * header file for Chained Declustering +/* + * rf_chaindecluster.h + * Header file for Chained Declustering. */ -#ifndef _RF__RF_CHAINDECLUSTER_H_ -#define _RF__RF_CHAINDECLUSTER_H_ +#ifndef _RF__RF_CHAINDECLUSTER_H_ +#define _RF__RF_CHAINDECLUSTER_H_ -int -rf_ConfigureChainDecluster(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -RF_ReconUnitCount_t rf_GetNumSpareRUsChainDecluster(RF_Raid_t * raidPtr); -void -rf_MapSectorChainDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapParityChainDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_IdentifyStripeChainDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -void -rf_MapSIDToPSIDChainDecluster(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru); -void -rf_RAIDCDagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, - RF_VoidFuncPtr *); -#if 0 -void (**createFunc) (RF_Raid_t *, - RF_AccessStripeMap_t *, - RF_DagHeader_t *, - void *, - RF_RaidAccessFlags_t, - RF_AllocListElem_t *) -/**INDENT** Warning@59: Extra ) */ -); -#endif +int rf_ConfigureChainDecluster(RF_ShutdownList_t **, RF_Raid_t *, + RF_Config_t *); +RF_ReconUnitCount_t rf_GetNumSpareRUsChainDecluster(RF_Raid_t *); +void rf_MapSectorChainDecluster(RF_Raid_t *, RF_RaidAddr_t, + RF_RowCol_t *, RF_RowCol_t *, RF_SectorNum_t *, int); +void rf_MapParityChainDecluster(RF_Raid_t *, RF_RaidAddr_t, + RF_RowCol_t *, RF_RowCol_t *, RF_SectorNum_t *, int); +void rf_IdentifyStripeChainDecluster(RF_Raid_t *, RF_RaidAddr_t, + RF_RowCol_t **, RF_RowCol_t *); +void rf_MapSIDToPSIDChainDecluster(RF_RaidLayout_t *, + RF_StripeNum_t, RF_StripeNum_t *, RF_ReconUnitNum_t *); +void rf_RAIDCDagSelect(RF_Raid_t *, RF_IoType_t, + RF_AccessStripeMap_t *, RF_VoidFuncPtr *); -#endif /* !_RF__RF_CHAINDECLUSTER_H_ */ +#endif /* !_RF__RF_CHAINDECLUSTER_H_ */ diff --git a/sys/dev/raidframe/rf_configure.h b/sys/dev/raidframe/rf_configure.h index fd436a53903..5fdad68294b 100644 --- a/sys/dev/raidframe/rf_configure.h +++ b/sys/dev/raidframe/rf_configure.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_configure.h,v 1.4 2002/05/22 21:22:32 tdeval Exp $ */ +/* $OpenBSD: rf_configure.h,v 1.5 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_configure.h,v 1.4 1999/03/02 03:18:49 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,18 +28,18 @@ * rights to redistribute these changes. */ -/******************************** +/***************************************************************************** * * rf_configure.h * - * header file for raidframe configuration in the kernel version only. - * configuration is invoked via ioctl rather than at boot time + * Header file for RAIDframe configuration in the kernel version only. + * Configuration is invoked via ioctl rather than at boot time. * - *******************************/ + *****************************************************************************/ -#ifndef _RF__RF_CONFIGURE_H_ -#define _RF__RF_CONFIGURE_H_ +#ifndef _RF__RF_CONFIGURE_H_ +#define _RF__RF_CONFIGURE_H_ #include "rf_archs.h" #include "rf_types.h" @@ -48,48 +49,68 @@ #include <sys/ioctl.h> -/* the raidframe configuration, passed down through an ioctl. - * the driver can be reconfigured (with total loss of data) at any time, +/* + * The RAIDframe configuration, passed down through an ioctl. + * The driver can be reconfigured (with total loss of data) at any time, * but it must be shut down first. */ struct RF_Config_s { - RF_RowCol_t numRow, numCol, numSpare; /* number of rows, columns, - * and spare disks */ - dev_t devs[RF_MAXROW][RF_MAXCOL]; /* device numbers for disks - * comprising array */ - char devnames[RF_MAXROW][RF_MAXCOL][50]; /* device names */ - dev_t spare_devs[RF_MAXSPARE]; /* device numbers for spare - * disks */ - char spare_names[RF_MAXSPARE][50]; /* device names */ - RF_SectorNum_t sectPerSU; /* sectors per stripe unit */ - RF_StripeNum_t SUsPerPU;/* stripe units per parity unit */ - RF_StripeNum_t SUsPerRU;/* stripe units per reconstruction unit */ - RF_ParityConfig_t parityConfig; /* identifies the RAID architecture to - * be used */ - RF_DiskQueueType_t diskQueueType; /* 'f' = fifo, 'c' = cvscan, - * not used in kernel */ - char maxOutstandingDiskReqs; /* # concurrent reqs to be sent to a - * disk. not used in kernel. */ - char debugVars[RF_MAXDBGV][RF_MAXDBGVLEN]; /* space for specifying - * debug variables & - * their values */ - unsigned int layoutSpecificSize; /* size in bytes of - * layout-specific info */ - void *layoutSpecific; /* a pointer to a layout-specific structure to - * be copied in */ - int force; /* if !0, ignore many fatal - configuration conditions */ - /* - "force" is used to override cases where the component labels would - indicate that configuration should not proceed without user - intervention + /* Number of rows, columns, and spare disks. */ + RF_RowCol_t numRow, numCol, numSpare; + + /* Device numbers for disks comprising array. */ + dev_t devs[RF_MAXROW][RF_MAXCOL]; + + /* Device names. */ + char devnames[RF_MAXROW][RF_MAXCOL][50]; + + /* Device numbers for spare disks. */ + dev_t spare_devs[RF_MAXSPARE]; + + /* Device names. */ + char spare_names[RF_MAXSPARE][50]; + + /* Sectors per stripe unit. */ + RF_SectorNum_t sectPerSU; + + /* Stripe units per parity unit. */ + RF_StripeNum_t SUsPerPU; + + /* Stripe units per reconstruction unit. */ + RF_StripeNum_t SUsPerRU; + + /* Identifies the RAID architecture to be used. */ + RF_ParityConfig_t parityConfig; + + /* 'f' = fifo, 'c' = cvscan, not used in kernel. */ + RF_DiskQueueType_t diskQueueType; + + /* # concurrent reqs to be sent to a disk. Not used in kernel. */ + char maxOutstandingDiskReqs; + + /* Space for specifying debug variables & their values. */ + char debugVars[RF_MAXDBGV][RF_MAXDBGVLEN]; + + /* Size in bytes of layout-specific info. */ + unsigned int layoutSpecificSize; + + /* A pointer to a layout-specific structure to be copied in. */ + void *layoutSpecific; + + /* If !0, ignore many fatal configuration conditions. */ + int force; + /* + * "force" is used to override cases where the component labels + * would indicate that configuration should not proceed without + * user intervention. */ }; -#ifndef _KERNEL -int rf_MakeConfig(char *configname, RF_Config_t * cfgPtr); -int rf_MakeLayoutSpecificNULL(FILE * fp, RF_Config_t * cfgPtr, void *arg); -int rf_MakeLayoutSpecificDeclustered(FILE * configfp, RF_Config_t * cfgPtr, void *arg); -void *rf_ReadSpareTable(RF_SparetWait_t * req, char *fname); -#endif /* !_KERNEL */ - -#endif /* !_RF__RF_CONFIGURE_H_ */ + +#ifndef _KERNEL +int rf_MakeConfig(char *, RF_Config_t *); +int rf_MakeLayoutSpecificNULL(FILE *, RF_Config_t *, void *); +int rf_MakeLayoutSpecificDeclustered(FILE *, RF_Config_t *, void *); +void *rf_ReadSpareTable(RF_SparetWait_t *, char *); +#endif /* !_KERNEL */ + +#endif /* !_RF__RF_CONFIGURE_H_ */ diff --git a/sys/dev/raidframe/rf_copyback.c b/sys/dev/raidframe/rf_copyback.c index 025aaaca293..9b573cd9234 100644 --- a/sys/dev/raidframe/rf_copyback.c +++ b/sys/dev/raidframe/rf_copyback.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_copyback.c,v 1.6 2000/08/08 16:07:39 peter Exp $ */ +/* $OpenBSD: rf_copyback.c,v 1.7 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_copyback.c,v 1.14 2000/03/07 02:59:50 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,16 +28,17 @@ * rights to redistribute these changes. */ -/***************************************************************************************** + +/***************************************************************************** * - * copyback.c -- code to copy reconstructed data back from spare space to - * the replaced disk. + * copyback.c -- Code to copy reconstructed data back from spare space to + * the replaced disk. * - * the code operates using callbacks on the I/Os to continue with the next - * unit to be copied back. We do this because a simple loop containing blocking I/Os - * will not work in the simulator. + * The code operates using callbacks on the I/Os to continue with the next + * unit to be copied back. We do this because a simple loop containing + * blocking I/Os will not work in the simulator. * - ****************************************************************************************/ + *****************************************************************************/ #include "rf_types.h" @@ -54,47 +56,45 @@ #include "rf_shutdown.h" #include "rf_kintf.h" -#define RF_COPYBACK_DATA 0 -#define RF_COPYBACK_PARITY 1 +#define RF_COPYBACK_DATA 0 +#define RF_COPYBACK_PARITY 1 -int rf_copyback_in_progress; +int rf_copyback_in_progress; -static int rf_CopybackReadDoneProc(RF_CopybackDesc_t * desc, int status); -static int rf_CopybackWriteDoneProc(RF_CopybackDesc_t * desc, int status); -static void rf_CopybackOne(RF_CopybackDesc_t * desc, int typ, - RF_RaidAddr_t addr, RF_RowCol_t testRow, - RF_RowCol_t testCol, - RF_SectorNum_t testOffs); -static void rf_CopybackComplete(RF_CopybackDesc_t * desc, int status); +int rf_CopybackReadDoneProc(RF_CopybackDesc_t *, int); +int rf_CopybackWriteDoneProc(RF_CopybackDesc_t *, int); +void rf_CopybackOne(RF_CopybackDesc_t *, int, RF_RaidAddr_t, + RF_RowCol_t, RF_RowCol_t, RF_SectorNum_t); +void rf_CopybackComplete(RF_CopybackDesc_t *, int); -int -rf_ConfigureCopyback(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureCopyback(RF_ShutdownList_t **listp) { rf_copyback_in_progress = 0; return (0); } + #include <sys/types.h> #include <sys/param.h> #include <sys/systm.h> #include <sys/proc.h> #include <sys/ioctl.h> #include <sys/fcntl.h> -#ifdef __NETBSD__ +#ifdef __NETBSD__ #include <sys/vnode.h> #endif -/* do a complete copyback */ -void -rf_CopybackReconstructedData(raidPtr) - RF_Raid_t *raidPtr; + +/* Do a complete copyback. */ +void +rf_CopybackReconstructedData(RF_Raid_t *raidPtr) { RF_ComponentLabel_t c_label; - int done, retcode; + int done, retcode; RF_CopybackDesc_t *desc; RF_RowCol_t frow, fcol; RF_RaidDisk_t *badDisk; - char *databuf; + char *databuf; struct partinfo dpart; struct vnode *vp; @@ -107,8 +107,10 @@ rf_CopybackReconstructedData(raidPtr) fcol = 0; for (frow = 0; frow < raidPtr->numRow; frow++) { for (fcol = 0; fcol < raidPtr->numCol; fcol++) { - if (raidPtr->Disks[frow][fcol].status == rf_ds_dist_spared - || raidPtr->Disks[frow][fcol].status == rf_ds_spared) { + if (raidPtr->Disks[frow][fcol].status == + rf_ds_dist_spared || + raidPtr->Disks[frow][fcol].status == + rf_ds_spared) { done = 1; break; } @@ -118,18 +120,20 @@ rf_CopybackReconstructedData(raidPtr) } if (frow == raidPtr->numRow) { - printf("COPYBACK: no disks need copyback\n"); + printf("COPYBACK: No disks need copyback.\n"); return; } badDisk = &raidPtr->Disks[frow][fcol]; proc = raidPtr->engine_thread; - /* This device may have been opened successfully the first time. Close - * it before trying to open it again.. */ + /* + * This device may have been opened successfully the first time. + * Close it before trying to open it again. + */ if (raidPtr->raid_cinfo[frow][fcol].ci_vp != NULL) { - printf("Closed the open device: %s\n", + printf("Close the opened device: %s.\n", raidPtr->Disks[frow][fcol].devname); vp = raidPtr->raid_cinfo[frow][fcol].ci_vp; ac = raidPtr->Disks[frow][fcol].auto_configured; @@ -137,32 +141,37 @@ rf_CopybackReconstructedData(raidPtr) raidPtr->raid_cinfo[frow][fcol].ci_vp = NULL; } - /* note that this disk was *not* auto_configured (any longer) */ + /* Note that this disk was *not* auto_configured (any longer). */ raidPtr->Disks[frow][fcol].auto_configured = 0; - - printf("About to (re-)open the device: %s\n", + + printf("About to (re-)open the device: %s.\n", raidPtr->Disks[frow][fcol].devname); retcode = raidlookup(raidPtr->Disks[frow][fcol].devname, proc, &vp); if (retcode) { - printf("COPYBACK: raidlookup on device: %s failed: %d!\n", + printf("COPYBACK: raidlookup on device: %s failed: %d !\n", raidPtr->Disks[frow][fcol].devname, retcode); - /* XXX the component isn't responding properly... must be - * still dead :-( */ + /* + * XXX The component isn't responding properly... Must be + * still dead :-( + */ return; } else { - /* Ok, so we can at least do a lookup... How about actually - * getting a vp for it? */ + /* + * Ok, so we can at least do a lookup... + * How about actually getting a vp for it ? + */ - if ((retcode = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) { + if ((retcode = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) + { return; } - retcode = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart, - FREAD, proc->p_ucred, proc); + retcode = VOP_IOCTL(vp, DIOCGPART, (caddr_t) &dpart, FREAD, + proc->p_ucred, proc); if (retcode) { return; } @@ -174,38 +183,44 @@ rf_CopybackReconstructedData(raidPtr) raidPtr->raid_cinfo[frow][fcol].ci_vp = vp; raidPtr->raid_cinfo[frow][fcol].ci_dev = va.va_rdev; - raidPtr->Disks[frow][fcol].dev = va.va_rdev; /* XXX or the above? */ + /* XXX Or the above ? */ + raidPtr->Disks[frow][fcol].dev = va.va_rdev; - /* we allow the user to specify that only a fraction of the - * disks should be used this is just for debug: it speeds up - * the parity scan */ + /* + * We allow the user to specify that only a fraction of the + * disks should be used this is just for debug: it speeds up + * the parity scan. + */ raidPtr->Disks[frow][fcol].numBlocks = raidPtr->Disks[frow][fcol].numBlocks * rf_sizePercentage / 100; } #if 0 - /* This is the way it was done before the CAM stuff was removed */ + /* This is the way it was done before the CAM stuff was removed. */ if (rf_extract_ids(badDisk->devname, &bus, &targ, &lun)) { - printf("COPYBACK: unable to extract bus, target, lun from devname %s\n", - badDisk->devname); + printf("COPYBACK: unable to extract bus, target, lun from" + " devname %s.\n", badDisk->devname); return; } - /* TUR the disk that's marked as bad to be sure that it's actually - * alive */ + /* + * TUR the disk that's marked as bad to be sure that it's actually + * alive. + */ rf_SCSI_AllocTUR(&tur_op); retcode = rf_SCSI_DoTUR(tur_op, bus, targ, lun, badDisk->dev); rf_SCSI_FreeDiskOp(tur_op, 0); #endif if (retcode) { - printf("COPYBACK: target disk failed TUR\n"); + printf("COPYBACK: target disk failed TUR.\n"); return; } - /* get a buffer to hold one SU */ - RF_Malloc(databuf, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (char *)); + /* Get a buffer to hold one SU. */ + RF_Malloc(databuf, rf_RaidAddressToByte(raidPtr, + raidPtr->Layout.sectorsPerStripeUnit), (char *)); - /* create a descriptor */ + /* Create a descriptor. */ RF_Malloc(desc, sizeof(*desc), (RF_CopybackDesc_t *)); desc->raidPtr = raidPtr; desc->status = 0; @@ -215,124 +230,139 @@ rf_CopybackReconstructedData(raidPtr) desc->spCol = badDisk->spareCol; desc->stripeAddr = 0; desc->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; - desc->sectPerStripe = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.numDataCol; + desc->sectPerStripe = raidPtr->Layout.sectorsPerStripeUnit * + raidPtr->Layout.numDataCol; desc->databuf = databuf; desc->mcpair = rf_AllocMCPair(); - printf("COPYBACK: Quiescing the array\n"); - /* quiesce the array, since we don't want to code support for user - * accs here */ + printf("COPYBACK: Quiescing the array.\n"); + /* + * Quiesce the array, since we don't want to code support for user + * accs here. + */ rf_SuspendNewRequestsAndWait(raidPtr); - /* adjust state of the array and of the disks */ + /* Adjust state of the array and of the disks. */ RF_LOCK_MUTEX(raidPtr->mutex); raidPtr->Disks[desc->frow][desc->fcol].status = rf_ds_optimal; raidPtr->status[desc->frow] = rf_rs_optimal; - rf_copyback_in_progress = 1; /* debug only */ + rf_copyback_in_progress = 1; /* Debug only. */ RF_UNLOCK_MUTEX(raidPtr->mutex); printf("COPYBACK: Beginning\n"); RF_GETTIME(desc->starttime); rf_ContinueCopyback(desc); - /* Data has been restored. Fix up the component label. */ - /* Don't actually need the read here.. */ - raidread_component_label( raidPtr->raid_cinfo[frow][fcol].ci_dev, - raidPtr->raid_cinfo[frow][fcol].ci_vp, - &c_label); - - raid_init_component_label( raidPtr, &c_label ); + /* + * Data has been restored. + * Fix up the component label. + * Don't actually need the read here. + */ + raidread_component_label(raidPtr->raid_cinfo[frow][fcol].ci_dev, + raidPtr->raid_cinfo[frow][fcol].ci_vp, + &c_label); + + raid_init_component_label(raidPtr, &c_label); c_label.row = frow; c_label.column = fcol; - raidwrite_component_label( raidPtr->raid_cinfo[frow][fcol].ci_dev, - raidPtr->raid_cinfo[frow][fcol].ci_vp, - &c_label); + raidwrite_component_label(raidPtr->raid_cinfo[frow][fcol].ci_dev, + raidPtr->raid_cinfo[frow][fcol].ci_vp, + &c_label); } /* - * invoked via callback after a copyback I/O has completed to - * continue on with the next one + * Invoked via callback after a copyback I/O has completed to + * continue on with the next one. */ -void -rf_ContinueCopyback(desc) - RF_CopybackDesc_t *desc; +void +rf_ContinueCopyback(RF_CopybackDesc_t *desc) { RF_SectorNum_t testOffs, stripeAddr; RF_Raid_t *raidPtr = desc->raidPtr; RF_RaidAddr_t addr; RF_RowCol_t testRow, testCol; - int old_pctg, new_pctg, done; + int old_pctg, new_pctg, done; struct timeval t, diff; old_pctg = (-1); while (1) { stripeAddr = desc->stripeAddr; - desc->raidPtr->copyback_stripes_done = stripeAddr - / desc->sectPerStripe; + desc->raidPtr->copyback_stripes_done = stripeAddr / + desc->sectPerStripe; if (rf_prReconSched) { - old_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors; + old_pctg = 100 * desc->stripeAddr / + raidPtr->totalSectors; } desc->stripeAddr += desc->sectPerStripe; if (rf_prReconSched) { - new_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors; + new_pctg = 100 * desc->stripeAddr / + raidPtr->totalSectors; if (new_pctg != old_pctg) { RF_GETTIME(t); RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff); - printf("%d %d.%06d\n", new_pctg, (int) diff.tv_sec, (int) diff.tv_usec); + printf("%d %d.%06d\n", new_pctg, + (int) diff.tv_sec, (int) diff.tv_usec); } } if (stripeAddr >= raidPtr->totalSectors) { rf_CopybackComplete(desc, 0); return; } - /* walk through the current stripe, su-by-su */ - for (done = 0, addr = stripeAddr; addr < stripeAddr + desc->sectPerStripe; addr += desc->sectPerSU) { + /* Walk through the current stripe, su-by-su. */ + for (done = 0, addr = stripeAddr; + addr < stripeAddr + desc->sectPerStripe; + addr += desc->sectPerSU) { - /* map the SU, disallowing remap to spare space */ - (raidPtr->Layout.map->MapSector) (raidPtr, addr, &testRow, &testCol, &testOffs, RF_DONT_REMAP); + /* Map the SU, disallowing remap to spare space. */ + (raidPtr->Layout.map->MapSector) (raidPtr, addr, + &testRow, &testCol, &testOffs, RF_DONT_REMAP); if (testRow == desc->frow && testCol == desc->fcol) { - rf_CopybackOne(desc, RF_COPYBACK_DATA, addr, testRow, testCol, testOffs); + rf_CopybackOne(desc, RF_COPYBACK_DATA, addr, + testRow, testCol, testOffs); done = 1; break; } } if (!done) { - /* we didn't find the failed disk in the data part. - * check parity. */ - - /* map the parity for this stripe, disallowing remap - * to spare space */ - (raidPtr->Layout.map->MapParity) (raidPtr, stripeAddr, &testRow, &testCol, &testOffs, RF_DONT_REMAP); + /* + * We didn't find the failed disk in the data part, + * check parity. + */ + + /* + * Map the parity for this stripe, disallowing remap + * to spare space. + */ + (raidPtr->Layout.map->MapParity) (raidPtr, stripeAddr, + &testRow, &testCol, &testOffs, RF_DONT_REMAP); if (testRow == desc->frow && testCol == desc->fcol) { - rf_CopybackOne(desc, RF_COPYBACK_PARITY, stripeAddr, testRow, testCol, testOffs); + rf_CopybackOne(desc, RF_COPYBACK_PARITY, + stripeAddr, testRow, testCol, testOffs); } } - /* check to see if the last read/write pair failed */ + /* Check to see if the last read/write pair failed. */ if (desc->status) { rf_CopybackComplete(desc, 1); return; } - /* we didn't find any units to copy back in this stripe. - * Continue with the next one */ + /* + * We didn't find any units to copy back in this stripe. + * Continue with the next one. + */ } } -/* copyback one unit */ -static void -rf_CopybackOne(desc, typ, addr, testRow, testCol, testOffs) - RF_CopybackDesc_t *desc; - int typ; - RF_RaidAddr_t addr; - RF_RowCol_t testRow; - RF_RowCol_t testCol; - RF_SectorNum_t testOffs; +/* Copyback one unit. */ +void +rf_CopybackOne(RF_CopybackDesc_t *desc, int typ, RF_RaidAddr_t addr, + RF_RowCol_t testRow, RF_RowCol_t testCol, RF_SectorNum_t testOffs) { RF_SectorCount_t sectPerSU = desc->sectPerSU; RF_Raid_t *raidPtr = desc->raidPtr; @@ -340,37 +370,42 @@ rf_CopybackOne(desc, typ, addr, testRow, testCol, testOffs) RF_RowCol_t spCol = desc->spCol; RF_SectorNum_t spOffs; - /* find the spare spare location for this SU */ + /* Find the spare location for this SU. */ if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { if (typ == RF_COPYBACK_DATA) - raidPtr->Layout.map->MapSector(raidPtr, addr, &spRow, &spCol, &spOffs, RF_REMAP); + raidPtr->Layout.map->MapSector(raidPtr, addr, &spRow, + &spCol, &spOffs, RF_REMAP); else - raidPtr->Layout.map->MapParity(raidPtr, addr, &spRow, &spCol, &spOffs, RF_REMAP); + raidPtr->Layout.map->MapParity(raidPtr, addr, &spRow, + &spCol, &spOffs, RF_REMAP); } else { spOffs = testOffs; } - /* create reqs to read the old location & write the new */ + /* Create reqs to read the old location & write the new. */ desc->readreq = rf_CreateDiskQueueData(RF_IO_TYPE_READ, spOffs, - sectPerSU, desc->databuf, 0L, 0, - (int (*) (void *, int)) rf_CopybackReadDoneProc, desc, - NULL, NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL); + sectPerSU, desc->databuf, 0L, 0, (int (*) (void *, int)) + rf_CopybackReadDoneProc, desc, NULL, NULL, (void *) raidPtr, + RF_DISKQUEUE_DATA_FLAGS_NONE, NULL); desc->writereq = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, testOffs, - sectPerSU, desc->databuf, 0L, 0, - (int (*) (void *, int)) rf_CopybackWriteDoneProc, desc, - NULL, NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL); + sectPerSU, desc->databuf, 0L, 0, (int (*) (void *, int)) + rf_CopybackWriteDoneProc, desc, NULL, NULL, (void *) raidPtr, + RF_DISKQUEUE_DATA_FLAGS_NONE, NULL); desc->frow = testRow; desc->fcol = testCol; - /* enqueue the read. the write will go out as part of the callback on - * the read. at user-level & in the kernel, wait for the read-write - * pair to complete. in the simulator, just return, since everything - * will happen as callbacks */ + /* + * Enqueue the read. The write will go out as part of the callback on + * the read. At user-level & in the kernel, wait for the read-write + * pair to complete. In the simulator, just return, since everything + * will happen as callbacks. + */ RF_LOCK_MUTEX(desc->mcpair->mutex); desc->mcpair->flag = 0; - rf_DiskIOEnqueue(&raidPtr->Queues[spRow][spCol], desc->readreq, RF_IO_NORMAL_PRIORITY); + rf_DiskIOEnqueue(&raidPtr->Queues[spRow][spCol], desc->readreq, + RF_IO_NORMAL_PRIORITY); while (!desc->mcpair->flag) { RF_WAIT_MCPAIR(desc->mcpair); @@ -382,42 +417,47 @@ rf_CopybackOne(desc, typ, addr, testRow, testCol, testOffs) } -/* called at interrupt context when the read has completed. just send out the write */ -static int -rf_CopybackReadDoneProc(desc, status) - RF_CopybackDesc_t *desc; - int status; +/* + * Called at interrupt context when the read has completed. + * Just send out the write. + */ +int +rf_CopybackReadDoneProc(RF_CopybackDesc_t *desc, int status) { - if (status) { /* invoke the callback with bad status */ - printf("COPYBACK: copyback read failed. Aborting.\n"); + if (status) { /* Invoke the callback with bad status. */ + printf("COPYBACK: copyback read failed. Aborting.\n"); (desc->writereq->CompleteFunc) (desc, -100); } else { - rf_DiskIOEnqueue(&(desc->raidPtr->Queues[desc->frow][desc->fcol]), desc->writereq, RF_IO_NORMAL_PRIORITY); + rf_DiskIOEnqueue(&(desc->raidPtr + ->Queues[desc->frow][desc->fcol]), + desc->writereq, RF_IO_NORMAL_PRIORITY); } return (0); } -/* called at interrupt context when the write has completed. - * at user level & in the kernel, wake up the copyback thread. - * in the simulator, invoke the next copyback directly. - * can't free diskqueuedata structs in the kernel b/c we're at interrupt context. + + +/* + * Called at interrupt context when the write has completed. + * At user level & in the kernel, wake up the copyback thread. + * In the simulator, invoke the next copyback directly. + * Can't free diskqueuedata structs in the kernel because we're at + * interrupt context. */ -static int -rf_CopybackWriteDoneProc(desc, status) - RF_CopybackDesc_t *desc; - int status; +int +rf_CopybackWriteDoneProc(RF_CopybackDesc_t *desc, int status) { if (status && status != -100) { - printf("COPYBACK: copyback write failed. Aborting.\n"); + printf("COPYBACK: copyback write failed. Aborting.\n"); } desc->status = status; rf_MCPairWakeupFunc(desc->mcpair); return (0); } -/* invoked when the copyback has completed */ -static void -rf_CopybackComplete(desc, status) - RF_CopybackDesc_t *desc; - int status; + + +/* Invoked when the copyback has completed. */ +void +rf_CopybackComplete(RF_CopybackDesc_t *desc, int status) { RF_Raid_t *raidPtr = desc->raidPtr; struct timeval t, diff; @@ -428,13 +468,14 @@ rf_CopybackComplete(desc, status) RF_ASSERT(raidPtr->Layout.map->parityConfig == 'D'); rf_FreeSpareTable(raidPtr); } else { - raidPtr->Disks[desc->spRow][desc->spCol].status = rf_ds_spare; + raidPtr->Disks[desc->spRow][desc->spCol].status = + rf_ds_spare; } RF_UNLOCK_MUTEX(raidPtr->mutex); RF_GETTIME(t); RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff); - printf("Copyback time was %d.%06d seconds\n", + printf("Copyback time was %d.%06d seconds.\n", (int) diff.tv_sec, (int) diff.tv_usec); } else printf("COPYBACK: Failure.\n"); diff --git a/sys/dev/raidframe/rf_copyback.h b/sys/dev/raidframe/rf_copyback.h index d04066e291f..6ac073017dd 100644 --- a/sys/dev/raidframe/rf_copyback.h +++ b/sys/dev/raidframe/rf_copyback.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_copyback.h,v 1.2 1999/02/16 00:02:27 niklas Exp $ */ +/* $OpenBSD: rf_copyback.h,v 1.3 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_copyback.h,v 1.3 1999/02/05 00:06:06 oster Exp $ */ + /* * rf_copyback.h */ @@ -30,32 +31,32 @@ * rights to redistribute these changes. */ -#ifndef _RF__RF_COPYBACK_H_ -#define _RF__RF_COPYBACK_H_ +#ifndef _RF__RF_COPYBACK_H_ +#define _RF__RF_COPYBACK_H_ #include "rf_types.h" typedef struct RF_CopybackDesc_s { - RF_Raid_t *raidPtr; - RF_RowCol_t frow; - RF_RowCol_t fcol; - RF_RowCol_t spRow; - RF_RowCol_t spCol; - int status; - RF_StripeNum_t stripeAddr; - RF_SectorCount_t sectPerSU; - RF_SectorCount_t sectPerStripe; - char *databuf; - RF_DiskQueueData_t *readreq; - RF_DiskQueueData_t *writereq; - struct timeval starttime; - RF_MCPair_t *mcpair; -} RF_CopybackDesc_t; + RF_Raid_t *raidPtr; + RF_RowCol_t frow; + RF_RowCol_t fcol; + RF_RowCol_t spRow; + RF_RowCol_t spCol; + int status; + RF_StripeNum_t stripeAddr; + RF_SectorCount_t sectPerSU; + RF_SectorCount_t sectPerStripe; + char *databuf; + RF_DiskQueueData_t *readreq; + RF_DiskQueueData_t *writereq; + struct timeval starttime; + RF_MCPair_t *mcpair; +} RF_CopybackDesc_t; extern int rf_copyback_in_progress; -int rf_ConfigureCopyback(RF_ShutdownList_t ** listp); -void rf_CopybackReconstructedData(RF_Raid_t * raidPtr); -void rf_ContinueCopyback(RF_CopybackDesc_t * desc); +int rf_ConfigureCopyback(RF_ShutdownList_t **); +void rf_CopybackReconstructedData(RF_Raid_t *); +void rf_ContinueCopyback(RF_CopybackDesc_t *); -#endif /* !_RF__RF_COPYBACK_H_ */ +#endif /* !_RF__RF_COPYBACK_H_ */ diff --git a/sys/dev/raidframe/rf_cvscan.c b/sys/dev/raidframe/rf_cvscan.c index 488e0501bcb..cf55ef631c9 100644 --- a/sys/dev/raidframe/rf_cvscan.c +++ b/sys/dev/raidframe/rf_cvscan.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_cvscan.c,v 1.4 2000/01/07 14:50:20 peter Exp $ */ +/* $OpenBSD: rf_cvscan.c,v 1.5 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_cvscan.c,v 1.5 1999/08/13 03:41:53 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,13 +28,13 @@ * rights to redistribute these changes. */ -/******************************************************************************* +/***************************************************************************** * * cvscan.c -- prioritized cvscan disk queueing code. * * Nov 9, 1994, adapted from raidSim version (MCH) * - ******************************************************************************/ + *****************************************************************************/ #include "rf_types.h" #include "rf_alloclist.h" @@ -44,28 +45,40 @@ #include "rf_debugMem.h" #include "rf_general.h" -#define DO_CHECK_STATE(_hdr_) CheckCvscanState((_hdr_), __FILE__, __LINE__) +void rf_CheckCvscanState(RF_CvscanHeader_t *, char *, int); +void rf_PriorityInsert(RF_DiskQueueData_t **, RF_DiskQueueData_t *); +void rf_ReqInsert(RF_DiskQueueData_t **, RF_DiskQueueData_t *, + RF_CvscanArmDir_t); +RF_DiskQueueData_t *rf_ReqDequeue(RF_DiskQueueData_t **); +void rf_ReBalance(RF_CvscanHeader_t *); +void rf_Transfer(RF_DiskQueueData_t **, RF_DiskQueueData_t **); +void rf_RealEnqueue(RF_CvscanHeader_t *, RF_DiskQueueData_t *); + +#define DO_CHECK_STATE(_hdr_) rf_CheckCvscanState((_hdr_), __FILE__, __LINE__) + +#define pri_ok(p) (((p) == RF_IO_NORMAL_PRIORITY) || \ + ((p) == RF_IO_LOW_PRIORITY)) -#define pri_ok(p) ( ((p) == RF_IO_NORMAL_PRIORITY) || ((p) == RF_IO_LOW_PRIORITY)) -static void -CheckCvscanState(RF_CvscanHeader_t * hdr, char *file, int line) +void +rf_CheckCvscanState(RF_CvscanHeader_t *hdr, char *file, int line) { - long i, key; + long i, key; RF_DiskQueueData_t *tmp; if (hdr->left != (RF_DiskQueueData_t *) NULL) RF_ASSERT(hdr->left->sectorOffset < hdr->cur_block); for (key = hdr->cur_block, i = 0, tmp = hdr->left; - tmp != (RF_DiskQueueData_t *) NULL; - key = tmp->sectorOffset, i++, tmp = tmp->next) + tmp != (RF_DiskQueueData_t *) NULL; + key = tmp->sectorOffset, i++, tmp = tmp->next) RF_ASSERT(tmp->sectorOffset <= key - && tmp->priority == hdr->nxt_priority && pri_ok(tmp->priority)); + && tmp->priority == hdr->nxt_priority && + pri_ok(tmp->priority)); RF_ASSERT(i == hdr->left_cnt); for (key = hdr->cur_block, i = 0, tmp = hdr->right; - tmp != (RF_DiskQueueData_t *) NULL; - key = tmp->sectorOffset, i++, tmp = tmp->next) { + tmp != (RF_DiskQueueData_t *) NULL; + key = tmp->sectorOffset, i++, tmp = tmp->next) { RF_ASSERT(key <= tmp->sectorOffset); RF_ASSERT(tmp->priority == hdr->nxt_priority); RF_ASSERT(pri_ok(tmp->priority)); @@ -73,8 +86,8 @@ CheckCvscanState(RF_CvscanHeader_t * hdr, char *file, int line) RF_ASSERT(i == hdr->right_cnt); for (key = hdr->nxt_priority - 1, tmp = hdr->burner; - tmp != (RF_DiskQueueData_t *) NULL; - key = tmp->priority, tmp = tmp->next) { + tmp != (RF_DiskQueueData_t *) NULL; + key = tmp->priority, tmp = tmp->next) { RF_ASSERT(tmp); RF_ASSERT(hdr); RF_ASSERT(pri_ok(tmp->priority)); @@ -84,45 +97,47 @@ CheckCvscanState(RF_CvscanHeader_t * hdr, char *file, int line) } - -static void -PriorityInsert(RF_DiskQueueData_t ** list_ptr, RF_DiskQueueData_t * req) +void +rf_PriorityInsert(RF_DiskQueueData_t **list_ptr, RF_DiskQueueData_t *req) { - /* * insert block pointed to by req in to list whose first * entry is - * pointed to by the pointer that list_ptr points to * ie., list_ptr - * is a grandparent of the first entry */ + /* + * Insert block pointed to by req into list whose first entry is + * pointed to by the pointer that list_ptr points to. + * i.e. list_ptr is a grandparent of the first entry. + */ for (; (*list_ptr) != (RF_DiskQueueData_t *) NULL && - (*list_ptr)->priority > req->priority; - list_ptr = &((*list_ptr)->next)) { + (*list_ptr)->priority > req->priority; + list_ptr = &((*list_ptr)->next)) { } req->next = (*list_ptr); (*list_ptr) = req; } - -static void -ReqInsert(RF_DiskQueueData_t ** list_ptr, RF_DiskQueueData_t * req, RF_CvscanArmDir_t order) +void +rf_ReqInsert(RF_DiskQueueData_t **list_ptr, RF_DiskQueueData_t *req, + RF_CvscanArmDir_t order) { - /* * insert block pointed to by req in to list whose first * entry is - * pointed to by the pointer that list_ptr points to * ie., list_ptr - * is a grandparent of the first entry */ + /* + * Insert block pointed to by req into list whose first entry is + * pointed to by the pointer that list_ptr points to. + * i.e. list_ptr is a grandparent of the first entry. + */ for (; (*list_ptr) != (RF_DiskQueueData_t *) NULL && - - ((order == rf_cvscan_RIGHT && (*list_ptr)->sectorOffset <= req->sectorOffset) - || (order == rf_cvscan_LEFT && (*list_ptr)->sectorOffset > req->sectorOffset)); - list_ptr = &((*list_ptr)->next)) { + ((order == rf_cvscan_RIGHT && (*list_ptr)->sectorOffset <= + req->sectorOffset) || (order == rf_cvscan_LEFT && + (*list_ptr)->sectorOffset > req->sectorOffset)); + list_ptr = &((*list_ptr)->next)) { } req->next = (*list_ptr); (*list_ptr) = req; } - -static RF_DiskQueueData_t * -ReqDequeue(RF_DiskQueueData_t ** list_ptr) +RF_DiskQueueData_t * +rf_ReqDequeue(RF_DiskQueueData_t **list_ptr) { RF_DiskQueueData_t *ret = (*list_ptr); if ((*list_ptr) != (RF_DiskQueueData_t *) NULL) { @@ -132,40 +147,39 @@ ReqDequeue(RF_DiskQueueData_t ** list_ptr) } - -static void -ReBalance(RF_CvscanHeader_t * hdr) +void +rf_ReBalance(RF_CvscanHeader_t *hdr) { /* DO_CHECK_STATE(hdr); */ while (hdr->right != (RF_DiskQueueData_t *) NULL && hdr->right->sectorOffset < hdr->cur_block) { hdr->right_cnt--; hdr->left_cnt++; - ReqInsert(&hdr->left, ReqDequeue(&hdr->right), rf_cvscan_LEFT); + rf_ReqInsert(&hdr->left, rf_ReqDequeue(&hdr->right), + rf_cvscan_LEFT); } /* DO_CHECK_STATE(hdr); */ } - -static void -Transfer(RF_DiskQueueData_t ** to_list_ptr, RF_DiskQueueData_t ** from_list_ptr) +void +rf_Transfer(RF_DiskQueueData_t **to_list_ptr, RF_DiskQueueData_t **from_list_ptr) { RF_DiskQueueData_t *gp; for (gp = (*from_list_ptr); gp != (RF_DiskQueueData_t *) NULL;) { RF_DiskQueueData_t *p = gp->next; - PriorityInsert(to_list_ptr, gp); + rf_PriorityInsert(to_list_ptr, gp); gp = p; } (*from_list_ptr) = (RF_DiskQueueData_t *) NULL; } - -static void -RealEnqueue(RF_CvscanHeader_t * hdr, RF_DiskQueueData_t * req) +void +rf_RealEnqueue(RF_CvscanHeader_t *hdr, RF_DiskQueueData_t *req) { - RF_ASSERT(req->priority == RF_IO_NORMAL_PRIORITY || req->priority == RF_IO_LOW_PRIORITY); + RF_ASSERT(req->priority == RF_IO_NORMAL_PRIORITY || + req->priority == RF_IO_LOW_PRIORITY); DO_CHECK_STATE(hdr); if (hdr->left_cnt == 0 && hdr->right_cnt == 0) { @@ -173,27 +187,27 @@ RealEnqueue(RF_CvscanHeader_t * hdr, RF_DiskQueueData_t * req) } if (req->priority > hdr->nxt_priority) { /* - ** dump all other outstanding requests on the back burner - */ - Transfer(&hdr->burner, &hdr->left); - Transfer(&hdr->burner, &hdr->right); + * Dump all other outstanding requests on the back burner. + */ + rf_Transfer(&hdr->burner, &hdr->left); + rf_Transfer(&hdr->burner, &hdr->right); hdr->left_cnt = 0; hdr->right_cnt = 0; hdr->nxt_priority = req->priority; } if (req->priority < hdr->nxt_priority) { /* - ** yet another low priority task! - */ - PriorityInsert(&hdr->burner, req); + * Yet another low priority task ! + */ + rf_PriorityInsert(&hdr->burner, req); } else { if (req->sectorOffset < hdr->cur_block) { - /* this request is to the left of the current arms */ - ReqInsert(&hdr->left, req, rf_cvscan_LEFT); + /* This request is to the left of the current arms. */ + rf_ReqInsert(&hdr->left, req, rf_cvscan_LEFT); hdr->left_cnt++; } else { - /* this request is to the right of the current arms */ - ReqInsert(&hdr->right, req, rf_cvscan_RIGHT); + /* This request is to the right of the current arms. */ + rf_ReqInsert(&hdr->right, req, rf_cvscan_RIGHT); hdr->right_cnt++; } } @@ -201,16 +215,14 @@ RealEnqueue(RF_CvscanHeader_t * hdr, RF_DiskQueueData_t * req) } - -void -rf_CvscanEnqueue(void *q_in, RF_DiskQueueData_t * elem, int priority) +void +rf_CvscanEnqueue(void *q_in, RF_DiskQueueData_t *elem, int priority) { RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in; - RealEnqueue(hdr, elem /* req */ ); + rf_RealEnqueue(hdr, elem /* req */ ); } - RF_DiskQueueData_t * rf_CvscanDequeue(void *q_in) { @@ -224,17 +236,20 @@ rf_CvscanDequeue(void *q_in) if (hdr->left_cnt == 0 && hdr->right_cnt == 0) return ((RF_DiskQueueData_t *) NULL); - range = RF_MIN(hdr->range_for_avg, RF_MIN(hdr->left_cnt, hdr->right_cnt)); + range = RF_MIN(hdr->range_for_avg, RF_MIN(hdr->left_cnt, + hdr->right_cnt)); for (i = 0, tmp = hdr->left, sum_dist_left = - ((hdr->direction == rf_cvscan_RIGHT) ? range * hdr->change_penalty : 0); - tmp != (RF_DiskQueueData_t *) NULL && i < range; - tmp = tmp->next, i++) { + ((hdr->direction == rf_cvscan_RIGHT) ? + range * hdr->change_penalty : 0); + tmp != (RF_DiskQueueData_t *) NULL && i < range; + tmp = tmp->next, i++) { sum_dist_left += hdr->cur_block - tmp->sectorOffset; } for (i = 0, tmp = hdr->right, sum_dist_right = - ((hdr->direction == rf_cvscan_LEFT) ? range * hdr->change_penalty : 0); - tmp != (RF_DiskQueueData_t *) NULL && i < range; - tmp = tmp->next, i++) { + ((hdr->direction == rf_cvscan_LEFT) ? + range * hdr->change_penalty : 0); + tmp != (RF_DiskQueueData_t *) NULL && i < range; + tmp = tmp->next, i++) { sum_dist_right += tmp->sectorOffset - hdr->cur_block; } @@ -243,27 +258,28 @@ rf_CvscanDequeue(void *q_in) hdr->cur_block = hdr->left->sectorOffset + hdr->left->numSector; hdr->left_cnt = RF_MAX(hdr->left_cnt - 1, 0); tmp = hdr->left; - ret = (ReqDequeue(&hdr->left)) /*->parent*/ ; + ret = (rf_ReqDequeue(&hdr->left)) /*->parent*/ ; } else { hdr->direction = rf_cvscan_RIGHT; - hdr->cur_block = hdr->right->sectorOffset + hdr->right->numSector; + hdr->cur_block = hdr->right->sectorOffset + + hdr->right->numSector; hdr->right_cnt = RF_MAX(hdr->right_cnt - 1, 0); tmp = hdr->right; - ret = (ReqDequeue(&hdr->right)) /*->parent*/ ; + ret = (rf_ReqDequeue(&hdr->right)) /*->parent*/ ; } - ReBalance(hdr); + rf_ReBalance(hdr); if (hdr->left_cnt == 0 && hdr->right_cnt == 0 && hdr->burner != (RF_DiskQueueData_t *) NULL) { /* - ** restore low priority requests for next dequeue - */ + * Restore low priority requests for next dequeue. + */ RF_DiskQueueData_t *burner = hdr->burner; hdr->nxt_priority = burner->priority; - while (burner != (RF_DiskQueueData_t *) NULL - && burner->priority == hdr->nxt_priority) { + while (burner != (RF_DiskQueueData_t *) NULL && + burner->priority == hdr->nxt_priority) { RF_DiskQueueData_t *next = burner->next; - RealEnqueue(hdr, burner); + rf_RealEnqueue(hdr, burner); burner = next; } hdr->burner = burner; @@ -273,7 +289,6 @@ rf_CvscanDequeue(void *q_in) } - RF_DiskQueueData_t * rf_CvscanPeek(void *q_in) { @@ -286,17 +301,20 @@ rf_CvscanPeek(void *q_in) if (hdr->left_cnt == 0 && hdr->right_cnt == 0) headElement = NULL; else { - range = RF_MIN(hdr->range_for_avg, RF_MIN(hdr->left_cnt, hdr->right_cnt)); + range = RF_MIN(hdr->range_for_avg, RF_MIN(hdr->left_cnt, + hdr->right_cnt)); for (i = 0, tmp = hdr->left, sum_dist_left = - ((hdr->direction == rf_cvscan_RIGHT) ? range * hdr->change_penalty : 0); - tmp != (RF_DiskQueueData_t *) NULL && i < range; - tmp = tmp->next, i++) { + ((hdr->direction == rf_cvscan_RIGHT) ? + range * hdr->change_penalty : 0); + tmp != (RF_DiskQueueData_t *) NULL && i < range; + tmp = tmp->next, i++) { sum_dist_left += hdr->cur_block - tmp->sectorOffset; } for (i = 0, tmp = hdr->right, sum_dist_right = - ((hdr->direction == rf_cvscan_LEFT) ? range * hdr->change_penalty : 0); - tmp != (RF_DiskQueueData_t *) NULL && i < range; - tmp = tmp->next, i++) { + ((hdr->direction == rf_cvscan_LEFT) ? + range * hdr->change_penalty : 0); + tmp != (RF_DiskQueueData_t *) NULL && i < range; + tmp = tmp->next, i++) { sum_dist_right += tmp->sectorOffset - hdr->cur_block; } @@ -309,33 +327,31 @@ rf_CvscanPeek(void *q_in) } - /* -** CVSCAN( 1, 0 ) is Shortest Seek Time First (SSTF) -** lowest average response time -** CVSCAN( 1, infinity ) is SCAN -** lowest response time standard deviation -*/ + * CVSCAN( 1, 0 ) is Shortest Seek Time First (SSTF) + * lowest average response time + * CVSCAN( 1, infinity ) is SCAN + * lowest response time standard deviation + */ -int -rf_CvscanConfigure() +int +rf_CvscanConfigure(void) { return (0); } - void * -rf_CvscanCreate(RF_SectorCount_t sectPerDisk, - RF_AllocListElem_t * clList, - RF_ShutdownList_t ** listp) +rf_CvscanCreate(RF_SectorCount_t sectPerDisk, RF_AllocListElem_t *clList, + RF_ShutdownList_t **listp) { RF_CvscanHeader_t *hdr; - long range = 2; /* Currently no mechanism to change these */ - long penalty = sectPerDisk / 5; + long range = 2; /* Currently no mechanism to change these. */ + long penalty = sectPerDisk / 5; - RF_MallocAndAdd(hdr, sizeof(RF_CvscanHeader_t), (RF_CvscanHeader_t *), clList); + RF_MallocAndAdd(hdr, sizeof(RF_CvscanHeader_t), (RF_CvscanHeader_t *), + clList); bzero((char *) hdr, sizeof(RF_CvscanHeader_t)); hdr->range_for_avg = RF_MAX(range, 1); hdr->change_penalty = RF_MAX(penalty, 0); @@ -351,10 +367,10 @@ rf_CvscanCreate(RF_SectorCount_t sectPerDisk, #if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) -/* PrintCvscanQueue is not used, so we ignore it... */ +/* rf_PrintCvscanQueue is not used, so we ignore it... */ #else -static void -PrintCvscanQueue(RF_CvscanHeader_t * hdr) +void +rf_PrintCvscanQueue(RF_CvscanHeader_t *hdr) { RF_DiskQueueData_t *tmp; @@ -364,21 +380,24 @@ PrintCvscanQueue(RF_CvscanHeader_t * hdr) (int) hdr->cur_block, (hdr->direction == rf_cvscan_LEFT) ? "LEFT" : "RIGHT"); printf("\tLeft(%d): ", hdr->left_cnt); - for (tmp = hdr->left; tmp != (RF_DiskQueueData_t *) NULL; tmp = tmp->next) + for (tmp = hdr->left; tmp != (RF_DiskQueueData_t *) NULL; + tmp = tmp->next) printf("(%d,%ld,%d) ", (int) tmp->sectorOffset, (long) (tmp->sectorOffset + tmp->numSector), tmp->priority); printf("\n"); printf("\tRight(%d): ", hdr->right_cnt); - for (tmp = hdr->right; tmp != (RF_DiskQueueData_t *) NULL; tmp = tmp->next) + for (tmp = hdr->right; tmp != (RF_DiskQueueData_t *) NULL; + tmp = tmp->next) printf("(%d,%ld,%d) ", (int) tmp->sectorOffset, (long) (tmp->sectorOffset + tmp->numSector), tmp->priority); printf("\n"); printf("\tBurner: "); - for (tmp = hdr->burner; tmp != (RF_DiskQueueData_t *) NULL; tmp = tmp->next) + for (tmp = hdr->burner; tmp != (RF_DiskQueueData_t *) NULL; + tmp = tmp->next) printf("(%d,%ld,%d) ", (int) tmp->sectorOffset, (long) (tmp->sectorOffset + tmp->numSector), @@ -388,21 +407,24 @@ PrintCvscanQueue(RF_CvscanHeader_t * hdr) #endif -/* promotes reconstruction accesses for the given stripeID to normal priority. - * returns 1 if an access was found and zero otherwise. Normally, we should - * only have one or zero entries in the burner queue, so execution time should - * be short. +/* + * Promote reconstruction accesses for the given stripeID to normal priority. + * Return 1 if an access was found and zero otherwise. + * Normally, we should only have one or zero entries in the burner queue, + * so execution time should be short. */ -int -rf_CvscanPromote(void *q_in, RF_StripeNum_t parityStripeID, RF_ReconUnitNum_t which_ru) +int +rf_CvscanPromote(void *q_in, RF_StripeNum_t parityStripeID, + RF_ReconUnitNum_t which_ru) { RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in; RF_DiskQueueData_t *trailer = NULL, *tmp = hdr->burner, *tlist = NULL; - int retval = 0; + int retval = 0; DO_CHECK_STATE(hdr); - while (tmp) { /* handle entries at the front of the list */ - if (tmp->parityStripeID == parityStripeID && tmp->which_ru == which_ru) { + while (tmp) { /* Handle entries at the front of the list. */ + if (tmp->parityStripeID == parityStripeID && + tmp->which_ru == which_ru) { hdr->burner = tmp->next; tmp->priority = RF_IO_NORMAL_PRIORITY; tmp->next = tlist; @@ -415,12 +437,13 @@ rf_CvscanPromote(void *q_in, RF_StripeNum_t parityStripeID, RF_ReconUnitNum_t wh trailer = tmp; tmp = tmp->next; } - while (tmp) { /* handle entries on the rest of the list */ - if (tmp->parityStripeID == parityStripeID && tmp->which_ru == which_ru) { + while (tmp) { /* Handle entries on the rest of the list. */ + if (tmp->parityStripeID == parityStripeID && + tmp->which_ru == which_ru) { trailer->next = tmp->next; tmp->priority = RF_IO_NORMAL_PRIORITY; tmp->next = tlist; - tlist = tmp; /* insert on a temp queue */ + tlist = tmp; /* Insert on a temp queue. */ tmp = trailer->next; } else { trailer = tmp; @@ -430,7 +453,7 @@ rf_CvscanPromote(void *q_in, RF_StripeNum_t parityStripeID, RF_ReconUnitNum_t wh while (tlist) { retval++; tmp = tlist->next; - RealEnqueue(hdr, tlist); + rf_RealEnqueue(hdr, tlist); tlist = tmp; } RF_ASSERT(retval == 0 || retval == 1); diff --git a/sys/dev/raidframe/rf_cvscan.h b/sys/dev/raidframe/rf_cvscan.h index 4175865e6d6..42b61d42df2 100644 --- a/sys/dev/raidframe/rf_cvscan.h +++ b/sys/dev/raidframe/rf_cvscan.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_cvscan.h,v 1.2 1999/02/16 00:02:28 niklas Exp $ */ +/* $OpenBSD: rf_cvscan.h,v 1.3 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_cvscan.h,v 1.3 1999/02/05 00:06:07 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -28,58 +29,55 @@ */ /* -** Disk scheduling by CVSCAN( N, r ) -** -** Given a set of requests, partition them into one set on each -** side of the current arm position. The trick is to pick which -** side you are going to service next; once a side is picked you will -** service the closest request. -** Let there be n1 requests on one side and n2 requests on the other -** side. If one of n1 or n2 is zero, select the other side. -** If both n1 and n2 are nonzero, select a "range" for examination -** that is N' = min( n1, n2, N ). Average the distance from the -** current position to the nearest N' requests on each side giving -** d1 and d2. -** Suppose the last decision was to move toward set 2, then the -** current direction is toward set 2, and you will only switch to set -** 1 if d1+R < d2 where R is r*(total number of cylinders), r in [0,1]. -** -** I extend this by applying only to the set of requests that all -** share the same, highest priority level. -*/ + * Disk scheduling by CVSCAN( N, r ) + * + * Given a set of requests, partition them into one set on each + * side of the current arm position. The trick is to pick which + * side you are going to service next; once a side is picked you will + * service the closest request. + * Let there be n1 requests on one side and n2 requests on the other + * side. If one of n1 or n2 is zero, select the other side. + * If both n1 and n2 are nonzero, select a "range" for examination + * that is N' = min( n1, n2, N ). Average the distance from the + * current position to the nearest N' requests on each side giving + * d1 and d2. + * Suppose the last decision was to move toward set 2, then the + * current direction is toward set 2, and you will only switch to set + * 1 if d1+R < d2 where R is r*(total number of cylinders), r in [0,1]. + * + * I extend this by applying only to the set of requests that all + * share the same, highest priority level. + */ -#ifndef _RF__RF_CVSCAN_H_ -#define _RF__RF_CVSCAN_H_ +#ifndef _RF__RF_CVSCAN_H_ +#define _RF__RF_CVSCAN_H_ #include "rf_diskqueue.h" typedef enum RF_CvscanArmDir_e { rf_cvscan_LEFT, rf_cvscan_RIGHT -} RF_CvscanArmDir_t; +} RF_CvscanArmDir_t; typedef struct RF_CvscanHeader_s { - long range_for_avg; /* CVSCAN param N */ - long change_penalty; /* CVSCAN param R */ - RF_CvscanArmDir_t direction; - RF_SectorNum_t cur_block; - int nxt_priority; - RF_DiskQueueData_t *left; - int left_cnt; - RF_DiskQueueData_t *right; - int right_cnt; - RF_DiskQueueData_t *burner; -} RF_CvscanHeader_t; + long range_for_avg; /* CVSCAN param N */ + long change_penalty; /* CVSCAN param R */ + RF_CvscanArmDir_t direction; + RF_SectorNum_t cur_block; + int nxt_priority; + RF_DiskQueueData_t *left; + int left_cnt; + RF_DiskQueueData_t *right; + int right_cnt; + RF_DiskQueueData_t *burner; +} RF_CvscanHeader_t; -int rf_CvscanConfigure(void); -void * -rf_CvscanCreate(RF_SectorCount_t sect_per_disk, - RF_AllocListElem_t * cl_list, RF_ShutdownList_t ** listp); -void rf_CvscanEnqueue(void *qptr, RF_DiskQueueData_t * req, int priority); -RF_DiskQueueData_t *rf_CvscanDequeue(void *qptr); -RF_DiskQueueData_t *rf_CvscanPeek(void *qptr); -int -rf_CvscanPromote(void *qptr, RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru); +int rf_CvscanConfigure(void); +void *rf_CvscanCreate(RF_SectorCount_t, RF_AllocListElem_t *, + RF_ShutdownList_t **); +void rf_CvscanEnqueue(void *, RF_DiskQueueData_t *, int); +RF_DiskQueueData_t *rf_CvscanDequeue(void *); +RF_DiskQueueData_t *rf_CvscanPeek(void *); +int rf_CvscanPromote(void *, RF_StripeNum_t, RF_ReconUnitNum_t); -#endif /* !_RF__RF_CVSCAN_H_ */ +#endif /* !_RF__RF_CVSCAN_H_ */ diff --git a/sys/dev/raidframe/rf_dag.h b/sys/dev/raidframe/rf_dag.h index 6221d88fbd4..f75153f7716 100644 --- a/sys/dev/raidframe/rf_dag.h +++ b/sys/dev/raidframe/rf_dag.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_dag.h,v 1.3 2002/06/14 21:34:59 todd Exp $ */ +/* $OpenBSD: rf_dag.h,v 1.4 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_dag.h,v 1.3 1999/02/05 00:06:07 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -33,8 +34,8 @@ * * ****************************************************************************/ -#ifndef _RF__RF_DAG_H_ -#define _RF__RF_DAG_H_ +#ifndef _RF__RF_DAG_H_ +#define _RF__RF_DAG_H_ #include "rf_types.h" #include "rf_threadstuff.h" @@ -45,38 +46,50 @@ #include "rf_acctrace.h" #include "rf_memchunk.h" -#define RF_THREAD_CONTEXT 0 /* we were invoked from thread context */ -#define RF_INTR_CONTEXT 1 /* we were invoked from interrupt context */ -#define RF_MAX_ANTECEDENTS 20 /* max num of antecedents a node may posses */ +#define RF_THREAD_CONTEXT 0 /* We were invoked from thread context. */ +#define RF_INTR_CONTEXT 1 /* We were invoked from interrupt context. */ +#define RF_MAX_ANTECEDENTS 20 /* Max num of antecedents a node may possess. */ #include <sys/buf.h> -struct RF_PropHeader_s { /* structure for propagation of results */ - int resultNum; /* bind result # resultNum */ - int paramNum; /* to parameter # paramNum */ - RF_PropHeader_t *next; /* linked list for multiple results/params */ +struct RF_PropHeader_s { /* Structure for propagation of results. */ + int resultNum; /* Bind result # resultNum. */ + int paramNum; /* To parameter # paramNum. */ + RF_PropHeader_t *next; /* Linked list for multiple + * results/params. */ }; typedef enum RF_NodeStatus_e { - rf_bwd1, /* node is ready for undo logging (backward - * error recovery only) */ - rf_bwd2, /* node has completed undo logging (backward - * error recovery only) */ - rf_wait, /* node is waiting to be executed */ - rf_fired, /* node is currently executing its do function */ - rf_good, /* node successfully completed execution of - * its do function */ - rf_bad, /* node failed to successfully execute its do - * function */ - rf_skipped, /* not used anymore, used to imply a node was - * not executed */ - rf_recover, /* node is currently executing its undo - * function */ - rf_panic, /* node failed to successfully execute its - * undo function */ - rf_undone /* node successfully executed its undo - * function */ -} RF_NodeStatus_t; + rf_bwd1, /* + * Node is ready for undo logging + * (backward error recovery only). + */ + rf_bwd2, /* + * Node has completed undo logging + * (backward error recovery only). + */ + rf_wait, /* Node is waiting to be executed. */ + rf_fired, /* Node is currently executing its do function. */ + rf_good, /* + * Node successfully completed execution + * of its do function. + */ + rf_bad, /* + * Node failed to successfully execute + * its do function. + */ + rf_skipped, /* + * Not used anymore, used to imply a node + * was not executed. + */ + rf_recover, /* Node is currently executing its undo function. */ + rf_panic, /* + * Node failed to successfully execute + * its undo function. + */ + rf_undone /* Node successfully executed its undo function. */ +} RF_NodeStatus_t; + /* * These were used to control skipping a node. * Now, these are only used as comments. @@ -86,151 +99,253 @@ typedef enum RF_AntecedentType_e { rf_antiData, rf_outputData, rf_control -} RF_AntecedentType_t; -#define RF_DAG_PTRCACHESIZE 40 -#define RF_DAG_PARAMCACHESIZE 12 +} RF_AntecedentType_t; +#define RF_DAG_PTRCACHESIZE 40 +#define RF_DAG_PARAMCACHESIZE 12 typedef RF_uint8 RF_DagNodeFlags_t; struct RF_DagNode_s { - RF_NodeStatus_t status; /* current status of this node */ - int (*doFunc) (RF_DagNode_t *); /* normal function */ - int (*undoFunc) (RF_DagNode_t *); /* func to remove effect of - * doFunc */ - int (*wakeFunc) (RF_DagNode_t *, int status); /* func called when the - * node completes an I/O */ - int numParams; /* number of parameters required by *funcPtr */ - int numResults; /* number of results produced by *funcPtr */ - int numAntecedents; /* number of antecedents */ - int numAntDone; /* number of antecedents which have finished */ - int numSuccedents; /* number of succedents */ - int numSuccFired; /* incremented when a succedent is fired - * during forward execution */ - int numSuccDone; /* incremented when a succedent finishes - * during rollBackward */ - int commitNode; /* boolean flag - if true, this is a commit - * node */ - RF_DagNode_t **succedents; /* succedents, array size - * numSuccedents */ - RF_DagNode_t **antecedents; /* antecedents, array size - * numAntecedents */ - RF_AntecedentType_t antType[RF_MAX_ANTECEDENTS]; /* type of each - * antecedent */ - void **results; /* array of results produced by *funcPtr */ - RF_DagParam_t *params; /* array of parameters required by *funcPtr */ - RF_PropHeader_t **propList; /* propagation list, size - * numSuccedents */ - RF_DagHeader_t *dagHdr; /* ptr to head of dag containing this node */ - void *dagFuncData; /* dag execution func uses this for whatever - * it wants */ - RF_DagNode_t *next; - int nodeNum; /* used by PrintDAG for debug only */ - int visited; /* used to avoid re-visiting nodes on DAG - * walks */ - /* ANY CODE THAT USES THIS FIELD MUST MAINTAIN THE PROPERTY THAT AFTER - * IT FINISHES, ALL VISITED FLAGS IN THE DAG ARE IDENTICAL */ - char *name; /* debug only */ - RF_DagNodeFlags_t flags;/* see below */ - RF_DagNode_t *dag_ptrs[RF_DAG_PTRCACHESIZE]; /* cache for performance */ - RF_DagParam_t dag_params[RF_DAG_PARAMCACHESIZE]; /* cache for performance */ + RF_NodeStatus_t status; /* Current status of this node. */ + int (*doFunc) (RF_DagNode_t *); + /* Normal function. */ + int (*undoFunc) (RF_DagNode_t *); + /* Func to remove effect of doFunc. */ + int (*wakeFunc) (RF_DagNode_t *, int status); + /* + * Func called when the node completes + * an I/O. + */ + int numParams; /* + * Number of parameters required + * by *funcPtr. + */ + int numResults; /* + * Number of results produced + * by *funcPtr. + */ + int numAntecedents; /* Number of antecedents. */ + int numAntDone; /* + * Number of antecedents that + * have finished. + */ + int numSuccedents; /* Number of succedents. */ + int numSuccFired; /* + * Incremented when a succedent + * is fired during forward execution. + */ + int numSuccDone; /* + * Incremented when a succedent + * finishes during rollBackward. + */ + int commitNode; /* + * Boolean flag - if true, this is + * a commit node. + */ + RF_DagNode_t **succedents; /* + * Succedents, array size + * numSuccedents. + */ + RF_DagNode_t **antecedents; /* + * Antecedents, array size + * numAntecedents. + */ + RF_AntecedentType_t antType[RF_MAX_ANTECEDENTS]; + /* Type of each antecedent. */ + void **results; /* + * Array of results produced + * by *funcPtr. + */ + RF_DagParam_t *params; /* + * Array of parameters required + * by *funcPtr. + */ + RF_PropHeader_t **propList; /* + * Propagation list, + * size numSuccedents. + */ + RF_DagHeader_t *dagHdr; /* + * Ptr to head of dag containing + * this node. + */ + void *dagFuncData; /* + * Dag execution func uses this + * for whatever it wants. + */ + RF_DagNode_t *next; + int nodeNum; /* Used by PrintDAG for debug only. */ + int visited; /* + * Used to avoid re-visiting + * nodes on DAG walks. + */ + /* + * ANY CODE THAT USES THIS FIELD MUST MAINTAIN THE PROPERTY THAT + * AFTER IT FINISHES, ALL VISITED FLAGS IN THE DAG ARE IDENTICAL. + */ + + char *name; /* Debug only. */ + RF_DagNodeFlags_t flags; /* See below. */ + RF_DagNode_t *dag_ptrs[RF_DAG_PTRCACHESIZE]; + /* Cache for performance. */ + RF_DagParam_t dag_params[RF_DAG_PARAMCACHESIZE]; + /* Cache for performance. */ }; + /* - * Bit values for flags field of RF_DagNode_t + * Bit values for flags field of RF_DagNode_t. */ -#define RF_DAGNODE_FLAG_NONE 0x00 -#define RF_DAGNODE_FLAG_YIELD 0x01 /* in the kernel, yield the processor - * before firing this node */ +#define RF_DAGNODE_FLAG_NONE 0x00 +#define RF_DAGNODE_FLAG_YIELD 0x01 /* + * In the kernel, yield the processor + * before firing this node. + */ -/* enable - DAG ready for normal execution, no errors encountered - * rollForward - DAG encountered an error after commit point, rolling forward - * rollBackward - DAG encountered an error prior to commit point, rolling backward +/* + * rf_enable - DAG ready for normal execution, no errors encountered. + * rf_rollForward - DAG encountered an error after commit point, rolling + * forward. + * rf_rollBackward - DAG encountered an error prior to commit point, rolling + * backward. */ typedef enum RF_DagStatus_e { rf_enable, rf_rollForward, rf_rollBackward -} RF_DagStatus_t; -#define RF_MAX_HDR_SUCC 1 +} RF_DagStatus_t; + +#define RF_MAX_HDR_SUCC 1 -#define RF_MAXCHUNKS 10 +#define RF_MAXCHUNKS 10 struct RF_DagHeader_s { - RF_DagStatus_t status; /* status of this DAG */ - int numSuccedents; /* DAG may be a tree, i.e. may have > 1 root */ - int numCommitNodes; /* number of commit nodes in graph */ - int numCommits; /* number of commit nodes which have been - * fired */ - RF_DagNode_t *succedents[RF_MAX_HDR_SUCC]; /* array of succedents, - * size numSuccedents */ - RF_DagHeader_t *next; /* ptr to allow a list of dags */ - RF_AllocListElem_t *allocList; /* ptr to list of ptrs to be freed - * prior to freeing DAG */ - RF_AccessStripeMapHeader_t *asmList; /* list of access stripe maps - * to be freed */ - int nodeNum; /* used by PrintDAG for debug only */ - int numNodesCompleted; - RF_AccTraceEntry_t *tracerec; /* perf mon only */ - - void (*cbFunc) (void *); /* function to call when the dag - * completes */ - void *cbArg; /* argument for cbFunc */ - char *creator; /* name of function used to create this dag */ - - RF_Raid_t *raidPtr; /* the descriptor for the RAID device this DAG - * is for */ - void *bp; /* the bp for this I/O passed down from the - * file system. ignored outside kernel */ - - RF_ChunkDesc_t *memChunk[RF_MAXCHUNKS]; /* experimental- Chunks of - * memory to be retained upon - * DAG free for re-use */ - int chunkIndex; /* the idea is to avoid calls to alloc and - * free */ + RF_DagStatus_t status; /* Status of this DAG. */ + int numSuccedents; /* + * DAG may be a tree, + * i.e. may have > 1 root. + */ + int numCommitNodes; /* + * Number of commit nodes + * in graph. + */ + int numCommits; /* + * Number of commit nodes + * that have been fired. + */ + RF_DagNode_t *succedents[RF_MAX_HDR_SUCC]; /* + * Array of succedents, + * size numSuccedents. + */ + RF_DagHeader_t *next; /* + * Ptr to allow a list + * of dags. + */ + RF_AllocListElem_t *allocList; /* + * Ptr to list of ptrs + * to be freed prior to + * freeing DAG. + */ + RF_AccessStripeMapHeader_t *asmList; /* + * List of access stripe maps + * to be freed. + */ + int nodeNum; /* + * Used by PrintDAG for + * debug only. + */ + int numNodesCompleted; + RF_AccTraceEntry_t *tracerec; /* Perf mon only. */ - RF_ChunkDesc_t **xtraMemChunk; /* escape hatch which allows - * SelectAlgorithm to merge memChunks - * from several dags */ - int xtraChunkIndex; /* number of ptrs to valid chunks */ - int xtraChunkCnt; /* number of ptrs to chunks allocated */ + void (*cbFunc) (void *); /* + * Function to call when + * the dag completes. + */ + void *cbArg; /* Argument for cbFunc. */ + char *creator; /* + * Name of function used + * to create this dag. + */ + RF_Raid_t *raidPtr; /* + * The descriptor for the + * RAID device this DAG + * is for. + */ + void *bp; /* + * The bp for this I/O passed + * down from the file system. + * ignored outside kernel. + */ + + RF_ChunkDesc_t *memChunk[RF_MAXCHUNKS]; /* + * Experimental- Chunks of + * memory to be retained upon + * DAG free for re-use. + */ + int chunkIndex; /* + * The idea is to avoid calls + * to alloc and free. + */ + + RF_ChunkDesc_t **xtraMemChunk; /* + * Escape hatch that allows + * SelectAlgorithm to merge + * memChunks from several dags. + */ + int xtraChunkIndex; /* + * Number of ptrs to valid + * chunks. + */ + int xtraChunkCnt; /* + * Number of ptrs to chunks + * allocated. + */ }; struct RF_DagList_s { - /* common info for a list of dags which will be fired sequentially */ - int numDags; /* number of dags in the list */ - int numDagsFired; /* number of dags in list which have initiated - * execution */ - int numDagsDone; /* number of dags in list which have completed - * execution */ - RF_DagHeader_t *dags; /* list of dags */ - RF_RaidAccessDesc_t *desc; /* ptr to descriptor for this access */ - RF_AccTraceEntry_t tracerec; /* perf mon info for dags (not user - * info) */ + /* Common info for a list of dags that will be fired sequentially. */ + int numDags; /* Number of dags in the list. */ + int numDagsFired; /* + * Number of dags in list that + * have initiated execution. + */ + int numDagsDone; /* + * Number of dags in list that + * have completed execution. + */ + RF_DagHeader_t *dags; /* List of dags. */ + RF_RaidAccessDesc_t *desc; /* Ptr to descriptor for this access. */ + RF_AccTraceEntry_t tracerec; /* + * Perf mon info for dags (not user + * info). + */ }; -/* resets a node so that it can be fired again */ -#define RF_ResetNode(_n_) { \ - (_n_)->status = rf_wait; \ - (_n_)->numAntDone = 0; \ - (_n_)->numSuccFired = 0; \ - (_n_)->numSuccDone = 0; \ - (_n_)->next = NULL; \ -} - -#define RF_ResetDagHeader(_h_) { \ - (_h_)->numNodesCompleted = 0; \ - (_h_)->numCommits = 0; \ - (_h_)->status = rf_enable; \ -} - -/* convenience macro for declaring a create dag function */ - -#define RF_CREATE_DAG_FUNC_DECL(_name_) \ -void _name_ ( \ - RF_Raid_t *raidPtr, \ - RF_AccessStripeMap_t *asmap, \ - RF_DagHeader_t *dag_h, \ - void *bp, \ - RF_RaidAccessFlags_t flags, \ - RF_AllocListElem_t *allocList) - -#endif /* !_RF__RF_DAG_H_ */ + +/* Reset a node so that it can be fired again. */ +#define RF_ResetNode(_n_) do { \ + (_n_)->status = rf_wait; \ + (_n_)->numAntDone = 0; \ + (_n_)->numSuccFired = 0; \ + (_n_)->numSuccDone = 0; \ + (_n_)->next = NULL; \ +} while (0) + +#define RF_ResetDagHeader(_h_) do { \ + (_h_)->numNodesCompleted = 0; \ + (_h_)->numCommits = 0; \ + (_h_)->status = rf_enable; \ +} while (0) + +/* Convenience macro for declaring a create dag function. */ +#define RF_CREATE_DAG_FUNC_DECL(_name_) \ +void _name_ (RF_Raid_t *, RF_AccessStripeMap_t *, RF_DagHeader_t *, \ + void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *); \ +void _name_ ( \ + RF_Raid_t *raidPtr, \ + RF_AccessStripeMap_t *asmap, \ + RF_DagHeader_t *dag_h, \ + void *bp, \ + RF_RaidAccessFlags_t flags, \ + RF_AllocListElem_t *allocList \ +) + +#endif /* !_RF__RF_DAG_H_ */ diff --git a/sys/dev/raidframe/rf_dagdegrd.c b/sys/dev/raidframe/rf_dagdegrd.c index 306fa952d48..be1d06ad1ee 100644 --- a/sys/dev/raidframe/rf_dagdegrd.c +++ b/sys/dev/raidframe/rf_dagdegrd.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_dagdegrd.c,v 1.4 2000/01/11 18:02:20 peter Exp $ */ +/* $OpenBSD: rf_dagdegrd.c,v 1.5 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_dagdegrd.c,v 1.5 2000/01/07 03:40:57 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -30,7 +31,7 @@ /* * rf_dagdegrd.c * - * code for creating degraded read DAGs + * Code for creating degraded read DAGs. */ #include "rf_types.h" @@ -44,14 +45,14 @@ #include "rf_dagdegrd.h" -/****************************************************************************** +/***************************************************************************** * * General comments on DAG creation: * - * All DAGs in this file use roll-away error recovery. Each DAG has a single - * commit node, usually called "Cmt." If an error occurs before the Cmt node + * All DAGs in this file use roll-away error recovery. Each DAG has a single + * commit node, usually called "Cmt". If an error occurs before the Cmt node * is reached, the execution engine will halt forward execution and work - * backward through the graph, executing the undo functions. Assuming that + * backward through the graph, executing the undo functions. Assuming that * each node in the graph prior to the Cmt node are undoable and atomic - or - * does not make changes to permanent state, the graph will fail atomically. * If an error occurs after the Cmt node executes, the engine will roll-forward @@ -60,67 +61,69 @@ * * A graph has only 1 Cmt node. * - */ + *****************************************************************************/ -/****************************************************************************** +/***************************************************************************** * * The following wrappers map the standard DAG creation interface to the - * DAG creation routines. Additionally, these wrappers enable experimentation + * DAG creation routines. Additionally, these wrappers enable experimentation * with new DAG structures by providing an extra level of indirection, allowing * the DAG creation routines to be replaced at this single point. - */ + * + *****************************************************************************/ -void +void rf_CreateRaidFiveDegradedReadDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) + RF_Raid_t *raidPtr, + RF_AccessStripeMap_t *asmap, + RF_DagHeader_t *dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t *allocList) { rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_xorRecoveryFuncs); } -/****************************************************************************** +/***************************************************************************** * - * DAG creation code begins here - */ + * DAG creation code begins here. + * + *****************************************************************************/ -/****************************************************************************** - * Create a degraded read DAG for RAID level 1 +/***************************************************************************** + * Create a degraded read DAG for RAID level 1. * * Hdr -> Nil -> R(p/s)d -> Commit -> Trm * - * The "Rd" node reads data from the surviving disk in the mirror pair + * The "Rd" node reads data from the surviving disk in the mirror pair. * Rpd - read of primary copy * Rsd - read of secondary copy * - * Parameters: raidPtr - description of the physical array - * asmap - logical & physical addresses for this access - * bp - buffer ptr (for holding write data) - * flags - general flags (e.g. disk locking) - * allocList - list of memory allocated in DAG creation + * Parameters: raidPtr - description of the physical array + * asmap - logical & physical addresses for this access + * bp - buffer ptr (for holding write data) + * flags - general flags (e.g. disk locking) + * allocList - list of memory allocated in DAG creation *****************************************************************************/ -void +void rf_CreateRaidOneDegradedReadDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) + RF_Raid_t *raidPtr, + RF_AccessStripeMap_t *asmap, + RF_DagHeader_t *dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t *allocList) { RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode; RF_StripeNum_t parityStripeID; RF_ReconUnitNum_t which_ru; RF_PhysDiskAddr_t *pda; - int useMirror, i; + int useMirror, i; useMirror = 0; parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), @@ -129,14 +132,15 @@ rf_CreateRaidOneDegradedReadDAG( printf("[Creating RAID level 1 degraded read DAG]\n"); } dag_h->creator = "RaidOneDegradedReadDAG"; - /* alloc the Wnd nodes and the Wmir node */ + /* Alloc the Wnd nodes and the Wmir node. */ if (asmap->numDataFailed == 0) useMirror = RF_FALSE; else useMirror = RF_TRUE; - /* total number of nodes = 1 + (block + commit + terminator) */ - RF_CallocAndAdd(nodes, 4, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); + /* Total number of nodes = 1 + (block + commit + terminator). */ + RF_CallocAndAdd(nodes, 4, sizeof(RF_DagNode_t), (RF_DagNode_t *), + allocList); i = 0; rdNode = &nodes[i]; i++; @@ -147,65 +151,71 @@ rf_CreateRaidOneDegradedReadDAG( termNode = &nodes[i]; i++; - /* this dag can not commit until the commit node is reached. errors + /* + * This dag can not commit until the commit node is reached. Errors * prior to the commit point imply the dag has failed and must be - * retried */ + * retried. + */ dag_h->numCommitNodes = 1; dag_h->numCommits = 0; dag_h->numSuccedents = 1; - /* initialize the block, commit, and terminator nodes */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); + /* Initialize the block, commit, and terminator nodes. */ + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); + rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, + rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); pda = asmap->physInfo; RF_ASSERT(pda != NULL); - /* parityInfo must describe entire parity unit */ + /* parityInfo must describe entire parity unit. */ RF_ASSERT(asmap->parityInfo->next == NULL); - /* initialize the data node */ + /* Initialize the data node. */ if (!useMirror) { - /* read primary copy of data */ - rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rpd", allocList); + /* Read primary copy of data. */ + rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, + rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, + dag_h, "Rpd", allocList); rdNode->params[0].p = pda; rdNode->params[1].p = pda->bufPtr; rdNode->params[2].v = parityStripeID; - rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + 0, 0, which_ru); } else { - /* read secondary copy of data */ - rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rsd", allocList); + /* Read secondary copy of data. */ + rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, + rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, + dag_h, "Rsd", allocList); rdNode->params[0].p = asmap->parityInfo; rdNode->params[1].p = pda->bufPtr; rdNode->params[2].v = parityStripeID; - rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + 0, 0, which_ru); } - /* connect header to block node */ + /* Connect header to block node. */ RF_ASSERT(dag_h->numSuccedents == 1); RF_ASSERT(blockNode->numAntecedents == 0); dag_h->succedents[0] = blockNode; - /* connect block node to rdnode */ + /* Connect block node to rdnode. */ RF_ASSERT(blockNode->numSuccedents == 1); RF_ASSERT(rdNode->numAntecedents == 1); blockNode->succedents[0] = rdNode; rdNode->antecedents[0] = blockNode; rdNode->antType[0] = rf_control; - /* connect rdnode to commit node */ + /* Connect rdnode to commit node. */ RF_ASSERT(rdNode->numSuccedents == 1); RF_ASSERT(commitNode->numAntecedents == 1); rdNode->succedents[0] = commitNode; commitNode->antecedents[0] = rdNode; commitNode->antType[0] = rf_control; - /* connect commit node to terminator */ + /* Connect commit node to terminator. */ RF_ASSERT(commitNode->numSuccedents == 1); RF_ASSERT(termNode->numAntecedents == 1); RF_ASSERT(termNode->numSuccedents == 0); @@ -215,64 +225,64 @@ rf_CreateRaidOneDegradedReadDAG( } - -/****************************************************************************** +/***************************************************************************** * - * creates a DAG to perform a degraded-mode read of data within one stripe. + * Create a DAG to perform a degraded-mode read of data within one stripe. * This DAG is as follows: * * Hdr -> Block -> Rud -> Xor -> Cmt -> T - * -> Rrd -> - * -> Rp --> + * -> Rrd -> + * -> Rp --> * - * Each R node is a successor of the L node - * One successor arc from each R node goes to C, and the other to X + * Each R node is a successor of the L node. + * One successor arc from each R node goes to C, and the other to X. * There is one Rud for each chunk of surviving user data requested by the * user, and one Rrd for each chunk of surviving user data _not_ being read by - * the user + * the user. * R = read, ud = user data, rd = recovery (surviving) data, p = parity * X = XOR, C = Commit, T = terminate * * The block node guarantees a single source node. * * Note: The target buffer for the XOR node is set to the actual user buffer - * where the failed data is supposed to end up. This buffer is zero'd by the - * code here. Thus, if you create a degraded read dag, use it, and then + * where the failed data is supposed to end up. This buffer is zero'd by the + * code here. Thus, if you create a degraded read dag, use it, and then * re-use, you have to be sure to zero the target buffer prior to the re-use. * * The recfunc argument at the end specifies the name and function used for - * the redundancy - * recovery function. + * the redundancy recovery function. * *****************************************************************************/ -void +void rf_CreateDegradedReadDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - RF_RedFuncs_t * recFunc) + RF_Raid_t *raidPtr, + RF_AccessStripeMap_t *asmap, + RF_DagHeader_t *dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t *allocList, + RF_RedFuncs_t *recFunc) { RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *xorNode, *blockNode; RF_DagNode_t *commitNode, *rpNode, *termNode; - int nNodes, nRrdNodes, nRudNodes, nXorBufs, i; - int j, paramNum; + int nNodes, nRrdNodes, nRudNodes, nXorBufs, i; + int j, paramNum; RF_SectorCount_t sectorsPerSU; RF_ReconUnitNum_t which_ru; - char *overlappingPDAs;/* a temporary array of flags */ + char *overlappingPDAs; /* A temporary array of flags. */ RF_AccessStripeMapHeader_t *new_asm_h[2]; RF_PhysDiskAddr_t *pda, *parityPDA; RF_StripeNum_t parityStripeID; RF_PhysDiskAddr_t *failedPDA; RF_RaidLayout_t *layoutPtr; - char *rpBuf; + char *rpBuf; layoutPtr = &(raidPtr->Layout); - /* failedPDA points to the pda within the asm that targets the failed - * disk */ + /* + * failedPDA points to the pda within the asm that targets + * the failed disk. + */ failedPDA = asmap->failedPDAs[0]; parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, &which_ru); @@ -285,25 +295,30 @@ rf_CreateDegradedReadDAG( dag_h->creator = "DegradedReadDAG"; /* - * generate two ASMs identifying the surviving data we need - * in order to recover the lost data - */ + * Generate two ASMs identifying the surviving data we need + * in order to recover the lost data. + */ - /* overlappingPDAs array must be zero'd */ - RF_Calloc(overlappingPDAs, asmap->numStripeUnitsAccessed, sizeof(char), (char *)); - rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h, new_asm_h, &nXorBufs, - &rpBuf, overlappingPDAs, allocList); + /* overlappingPDAs array must be zero'd. */ + RF_Calloc(overlappingPDAs, asmap->numStripeUnitsAccessed, + sizeof(char), (char *)); + rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h, + new_asm_h, &nXorBufs, &rpBuf, overlappingPDAs, allocList); /* - * create all the nodes at once - * - * -1 because no access is generated for the failed pda - */ + * Create all the nodes at once. + * + * -1 because no access is generated for the failed pda. + */ nRudNodes = asmap->numStripeUnitsAccessed - 1; - nRrdNodes = ((new_asm_h[0]) ? new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) + - ((new_asm_h[1]) ? new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0); - nNodes = 5 + nRudNodes + nRrdNodes; /* lock, unlock, xor, Rp, Rud, - * Rrd */ + nRrdNodes = ((new_asm_h[0]) ? + new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) + + ((new_asm_h[1]) ? + new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0); + nNodes = 5 + nRudNodes + nRrdNodes; /* + * lock, unlock, xor, Rp, + * Rud, Rrd + */ RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); i = 0; @@ -323,136 +338,154 @@ rf_CreateDegradedReadDAG( i += nRrdNodes; RF_ASSERT(i == nNodes); - /* initialize nodes */ + /* Initialize nodes. */ dag_h->numCommitNodes = 1; dag_h->numCommits = 0; - /* this dag can not commit until the commit node is reached errors - * prior to the commit point imply the dag has failed */ + /* + * This dag can not commit until the commit node is reached. + * Errors prior to the commit point imply the dag has failed. + */ dag_h->numSuccedents = 1; - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, nRudNodes + nRrdNodes + 1, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - rf_InitNode(xorNode, rf_wait, RF_FALSE, recFunc->simple, rf_NullNodeUndoFunc, - NULL, 1, nRudNodes + nRrdNodes + 1, 2 * nXorBufs + 2, 1, dag_h, - recFunc->SimpleName, allocList); - - /* fill in the Rud nodes */ - for (pda = asmap->physInfo, i = 0; i < nRudNodes; i++, pda = pda->next) { + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, nRudNodes + nRrdNodes + 1, 0, 0, 0, + dag_h, "Nil", allocList); + rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, + rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); + rf_InitNode(xorNode, rf_wait, RF_FALSE, recFunc->simple, + rf_NullNodeUndoFunc, NULL, 1, nRudNodes + nRrdNodes + 1, + 2 * nXorBufs + 2, 1, dag_h, recFunc->SimpleName, allocList); + + /* Fill in the Rud nodes. */ + for (pda = asmap->physInfo, i = 0; i < nRudNodes; + i++, pda = pda->next) { if (pda == failedPDA) { i--; continue; } rf_InitNode(&rudNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, - rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, - "Rud", allocList); + rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, + dag_h, "Rud", allocList); RF_ASSERT(pda); rudNodes[i].params[0].p = pda; rudNodes[i].params[1].p = pda->bufPtr; rudNodes[i].params[2].v = parityStripeID; - rudNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + rudNodes[i].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); } - /* fill in the Rrd nodes */ + /* Fill in the Rrd nodes. */ i = 0; if (new_asm_h[0]) { for (pda = new_asm_h[0]->stripeMap->physInfo; - i < new_asm_h[0]->stripeMap->numStripeUnitsAccessed; - i++, pda = pda->next) { - rf_InitNode(&rrdNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, - rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, - dag_h, "Rrd", allocList); + i < new_asm_h[0]->stripeMap->numStripeUnitsAccessed; + i++, pda = pda->next) { + rf_InitNode(&rrdNodes[i], rf_wait, RF_FALSE, + rf_DiskReadFunc, rf_DiskReadUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, + "Rrd", allocList); RF_ASSERT(pda); rrdNodes[i].params[0].p = pda; rrdNodes[i].params[1].p = pda->bufPtr; rrdNodes[i].params[2].v = parityStripeID; - rrdNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + rrdNodes[i].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, + which_ru); } } if (new_asm_h[1]) { for (j = 0, pda = new_asm_h[1]->stripeMap->physInfo; j < new_asm_h[1]->stripeMap->numStripeUnitsAccessed; j++, pda = pda->next) { - rf_InitNode(&rrdNodes[i + j], rf_wait, RF_FALSE, rf_DiskReadFunc, - rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, - dag_h, "Rrd", allocList); + rf_InitNode(&rrdNodes[i + j], rf_wait, RF_FALSE, + rf_DiskReadFunc, rf_DiskReadUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, + "Rrd", allocList); RF_ASSERT(pda); rrdNodes[i + j].params[0].p = pda; rrdNodes[i + j].params[1].p = pda->bufPtr; rrdNodes[i + j].params[2].v = parityStripeID; - rrdNodes[i + j].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + rrdNodes[i + j].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, + which_ru); } } - /* make a PDA for the parity unit */ - RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); + /* Make a PDA for the parity unit. */ + RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t), + (RF_PhysDiskAddr_t *), allocList); parityPDA->row = asmap->parityInfo->row; parityPDA->col = asmap->parityInfo->col; - parityPDA->startSector = ((asmap->parityInfo->startSector / sectorsPerSU) - * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU); + parityPDA->startSector = ((asmap->parityInfo->startSector / + sectorsPerSU) * sectorsPerSU) + + (failedPDA->startSector % sectorsPerSU); parityPDA->numSector = failedPDA->numSector; - /* initialize the Rp node */ - rf_InitNode(rpNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rp ", allocList); + /* Initialize the Rp node. */ + rf_InitNode(rpNode, rf_wait, RF_FALSE, rf_DiskReadFunc, + rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, + "Rp ", allocList); rpNode->params[0].p = parityPDA; rpNode->params[1].p = rpBuf; rpNode->params[2].v = parityStripeID; - rpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + rpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, + which_ru); /* - * the last and nastiest step is to assign all - * the parameters of the Xor node - */ + * The last and nastiest step is to assign all + * the parameters of the Xor node. + */ paramNum = 0; for (i = 0; i < nRrdNodes; i++) { - /* all the Rrd nodes need to be xored together */ + /* All the Rrd nodes need to be xored together. */ xorNode->params[paramNum++] = rrdNodes[i].params[0]; xorNode->params[paramNum++] = rrdNodes[i].params[1]; } for (i = 0; i < nRudNodes; i++) { - /* any Rud nodes that overlap the failed access need to be - * xored in */ + /* Any Rud nodes that overlap the failed access need to be + * xored in. */ if (overlappingPDAs[i]) { - RF_MallocAndAdd(pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - bcopy((char *) rudNodes[i].params[0].p, (char *) pda, sizeof(RF_PhysDiskAddr_t)); - rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_DOBUFFER, 0); + RF_MallocAndAdd(pda, sizeof(RF_PhysDiskAddr_t), + (RF_PhysDiskAddr_t *), allocList); + bcopy((char *) rudNodes[i].params[0].p, (char *) pda, + sizeof(RF_PhysDiskAddr_t)); + rf_RangeRestrictPDA(raidPtr, failedPDA, pda, + RF_RESTRICT_DOBUFFER, 0); xorNode->params[paramNum++].p = pda; xorNode->params[paramNum++].p = pda->bufPtr; } } RF_Free(overlappingPDAs, asmap->numStripeUnitsAccessed * sizeof(char)); - /* install parity pda as last set of params to be xor'd */ + /* Install parity pda as last set of params to be xor'd. */ xorNode->params[paramNum++].p = parityPDA; xorNode->params[paramNum++].p = rpBuf; /* - * the last 2 params to the recovery xor node are - * the failed PDA and the raidPtr - */ + * The last 2 params to the recovery xor node are + * the failed PDA and the raidPtr. + */ xorNode->params[paramNum++].p = failedPDA; xorNode->params[paramNum++].p = raidPtr; RF_ASSERT(paramNum == 2 * nXorBufs + 2); /* - * The xor node uses results[0] as the target buffer. - * Set pointer and zero the buffer. In the kernel, this - * may be a user buffer in which case we have to remap it. - */ + * The xor node uses results[0] as the target buffer. + * Set pointer and zero the buffer. In the kernel, this + * may be a user buffer in which case we have to remap it. + */ xorNode->results[0] = failedPDA->bufPtr; RF_BZERO(bp, failedPDA->bufPtr, rf_RaidAddressToByte(raidPtr, - failedPDA->numSector)); + failedPDA->numSector)); - /* connect nodes to form graph */ - /* connect the header to the block node */ + /* Connect nodes to form graph. */ + /* Connect the header to the block node. */ RF_ASSERT(dag_h->numSuccedents == 1); RF_ASSERT(blockNode->numAntecedents == 0); dag_h->succedents[0] = blockNode; - /* connect the block node to the read nodes */ + /* Connect the block node to the read nodes. */ RF_ASSERT(blockNode->numSuccedents == (1 + nRrdNodes + nRudNodes)); RF_ASSERT(rpNode->numAntecedents == 1); blockNode->succedents[0] = rpNode; @@ -471,7 +504,7 @@ rf_CreateDegradedReadDAG( rudNodes[i].antType[0] = rf_control; } - /* connect the read nodes to the xor node */ + /* Connect the read nodes to the xor node. */ RF_ASSERT(xorNode->numAntecedents == (1 + nRrdNodes + nRudNodes)); RF_ASSERT(rpNode->numSuccedents == 1); rpNode->succedents[0] = xorNode; @@ -490,14 +523,14 @@ rf_CreateDegradedReadDAG( xorNode->antType[1 + nRrdNodes + i] = rf_trueData; } - /* connect the xor node to the commit node */ + /* Connect the xor node to the commit node. */ RF_ASSERT(xorNode->numSuccedents == 1); RF_ASSERT(commitNode->numAntecedents == 1); xorNode->succedents[0] = commitNode; commitNode->antecedents[0] = xorNode; commitNode->antType[0] = rf_control; - /* connect the termNode to the commit node */ + /* Connect the termNode to the commit node. */ RF_ASSERT(commitNode->numSuccedents == 1); RF_ASSERT(termNode->numAntecedents == 1); RF_ASSERT(termNode->numSuccedents == 0); @@ -507,8 +540,8 @@ rf_CreateDegradedReadDAG( } -/****************************************************************************** - * Create a degraded read DAG for Chained Declustering +/***************************************************************************** + * Create a degraded read DAG for Chained Declustering. * * Hdr -> Nil -> R(p/s)d -> Cmt -> Trm * @@ -516,25 +549,26 @@ rf_CreateDegradedReadDAG( * Rpd - read of primary copy * Rsd - read of secondary copy * - * Parameters: raidPtr - description of the physical array - * asmap - logical & physical addresses for this access - * bp - buffer ptr (for holding write data) - * flags - general flags (e.g. disk locking) - * allocList - list of memory allocated in DAG creation + * Parameters: raidPtr - description of the physical array + * asmap - logical & physical addresses for this access + * bp - buffer ptr (for holding write data) + * flags - general flags (e.g. disk locking) + * allocList - list of memory allocated in DAG creation *****************************************************************************/ -void +void rf_CreateRaidCDegradedReadDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) + RF_Raid_t *raidPtr, + RF_AccessStripeMap_t *asmap, + RF_DagHeader_t *dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t *allocList +) { RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode; RF_StripeNum_t parityStripeID; - int useMirror, i, shiftable; + int useMirror, i, shiftable; RF_ReconUnitNum_t which_ru; RF_PhysDiskAddr_t *pda; @@ -551,14 +585,15 @@ rf_CreateRaidCDegradedReadDAG( printf("[Creating RAID C degraded read DAG]\n"); } dag_h->creator = "RaidCDegradedReadDAG"; - /* alloc the Wnd nodes and the Wmir node */ + /* Alloc the Wnd nodes and the Wmir node. */ if (asmap->numDataFailed == 0) useMirror = RF_FALSE; else useMirror = RF_TRUE; /* total number of nodes = 1 + (block + commit + terminator) */ - RF_CallocAndAdd(nodes, 4, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); + RF_CallocAndAdd(nodes, 4, sizeof(RF_DagNode_t), (RF_DagNode_t *), + allocList); i = 0; rdNode = &nodes[i]; i++; @@ -570,74 +605,79 @@ rf_CreateRaidCDegradedReadDAG( i++; /* - * This dag can not commit until the commit node is reached. - * Errors prior to the commit point imply the dag has failed - * and must be retried. - */ + * This dag can not commit until the commit node is reached. + * Errors prior to the commit point imply the dag has failed + * and must be retried. + */ dag_h->numCommitNodes = 1; dag_h->numCommits = 0; dag_h->numSuccedents = 1; /* initialize the block, commit, and terminator nodes */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); + rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList); + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, + rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); pda = asmap->physInfo; RF_ASSERT(pda != NULL); - /* parityInfo must describe entire parity unit */ + /* ParityInfo must describe entire parity unit. */ RF_ASSERT(asmap->parityInfo->next == NULL); - /* initialize the data node */ + /* Initialize the data node. */ if (!useMirror) { - rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rpd", allocList); + rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, + rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, + dag_h, "Rpd", allocList); if (shiftable && rf_compute_workload_shift(raidPtr, pda)) { - /* shift this read to the next disk in line */ + /* Shift this read to the next disk in line. */ rdNode->params[0].p = asmap->parityInfo; rdNode->params[1].p = pda->bufPtr; rdNode->params[2].v = parityStripeID; - rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + rdNode->params[3].v = RF_CREATE_PARAM3( + RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); } else { - /* read primary copy */ + /* Read primary copy. */ rdNode->params[0].p = pda; rdNode->params[1].p = pda->bufPtr; rdNode->params[2].v = parityStripeID; - rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + rdNode->params[3].v = RF_CREATE_PARAM3( + RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); } } else { - /* read secondary copy of data */ - rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rsd", allocList); + /* Read secondary copy of data. */ + rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, + rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, + dag_h, "Rsd", allocList); rdNode->params[0].p = asmap->parityInfo; rdNode->params[1].p = pda->bufPtr; rdNode->params[2].v = parityStripeID; - rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + rdNode->params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); } - /* connect header to block node */ + /* Connect header to block node. */ RF_ASSERT(dag_h->numSuccedents == 1); RF_ASSERT(blockNode->numAntecedents == 0); dag_h->succedents[0] = blockNode; - /* connect block node to rdnode */ + /* Connect block node to rdnode. */ RF_ASSERT(blockNode->numSuccedents == 1); RF_ASSERT(rdNode->numAntecedents == 1); blockNode->succedents[0] = rdNode; rdNode->antecedents[0] = blockNode; rdNode->antType[0] = rf_control; - /* connect rdnode to commit node */ + /* Connect rdnode to commit node. */ RF_ASSERT(rdNode->numSuccedents == 1); RF_ASSERT(commitNode->numAntecedents == 1); rdNode->succedents[0] = commitNode; commitNode->antecedents[0] = rdNode; commitNode->antType[0] = rf_control; - /* connect commit node to terminator */ + /* Connect commit node to terminator. */ RF_ASSERT(commitNode->numSuccedents == 1); RF_ASSERT(termNode->numAntecedents == 1); RF_ASSERT(termNode->numSuccedents == 0); @@ -645,56 +685,66 @@ rf_CreateRaidCDegradedReadDAG( termNode->antecedents[0] = commitNode; termNode->antType[0] = rf_control; } + /* - * XXX move this elsewhere? + * XXX move this elsewhere ? */ -void +void rf_DD_GenerateFailedAccessASMs( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_PhysDiskAddr_t ** pdap, - int *nNodep, - RF_PhysDiskAddr_t ** pqpdap, - int *nPQNodep, - RF_AllocListElem_t * allocList) + RF_Raid_t *raidPtr, + RF_AccessStripeMap_t *asmap, + RF_PhysDiskAddr_t **pdap, + int *nNodep, + RF_PhysDiskAddr_t **pqpdap, + int *nPQNodep, + RF_AllocListElem_t *allocList +) { RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - int PDAPerDisk, i; + int PDAPerDisk, i; RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; - int numDataCol = layoutPtr->numDataCol; - int state; + int numDataCol = layoutPtr->numDataCol; + int state; RF_SectorNum_t suoff, suend; unsigned firstDataCol, napdas, count; RF_SectorNum_t fone_start, fone_end, ftwo_start = 0, ftwo_end = 0; - RF_PhysDiskAddr_t *fone = asmap->failedPDAs[0], *ftwo = asmap->failedPDAs[1]; + RF_PhysDiskAddr_t *fone = asmap->failedPDAs[0]; + RF_PhysDiskAddr_t *ftwo = asmap->failedPDAs[1]; RF_PhysDiskAddr_t *pda_p; RF_PhysDiskAddr_t *phys_p; RF_RaidAddr_t sosAddr; - /* determine how many pda's we will have to generate per unaccess + /* + * Determine how many pda's we will have to generate per unaccessed * stripe. If there is only one failed data unit, it is one; if two, - * possibly two, depending wether they overlap. */ + * possibly two, depending wether they overlap. + */ fone_start = rf_StripeUnitOffset(layoutPtr, fone->startSector); fone_end = fone_start + fone->numSector; -#define CONS_PDA(if,start,num) \ - pda_p->row = asmap->if->row; pda_p->col = asmap->if->col; \ - pda_p->startSector = ((asmap->if->startSector / secPerSU) * secPerSU) + start; \ - pda_p->numSector = num; \ - pda_p->next = NULL; \ - RF_MallocAndAdd(pda_p->bufPtr,rf_RaidAddressToByte(raidPtr,num),(char *), allocList) +#define CONS_PDA(if,start,num) do { \ + pda_p->row = asmap->if->row; \ + pda_p->col = asmap->if->col; \ + pda_p->startSector = ((asmap->if->startSector / secPerSU) * \ + secPerSU) + start; \ + pda_p->numSector = num; \ + pda_p->next = NULL; \ + RF_MallocAndAdd(pda_p->bufPtr, \ + rf_RaidAddressToByte(raidPtr,num),(char *), allocList); \ +} while (0) if (asmap->numDataFailed == 1) { PDAPerDisk = 1; state = 1; - RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); + RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), + (RF_PhysDiskAddr_t *), allocList); pda_p = *pqpdap; - /* build p */ + /* Build p. */ CONS_PDA(parityInfo, fone_start, fone->numSector); pda_p->type = RF_PDA_TYPE_PARITY; pda_p++; - /* build q */ + /* Build q. */ CONS_PDA(qInfo, fone_start, fone->numSector); pda_p->type = RF_PDA_TYPE_Q; } else { @@ -703,7 +753,8 @@ rf_DD_GenerateFailedAccessASMs( if (fone->numSector + ftwo->numSector > secPerSU) { PDAPerDisk = 1; state = 2; - RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); + RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), + (RF_PhysDiskAddr_t *), allocList); pda_p = *pqpdap; CONS_PDA(parityInfo, 0, secPerSU); pda_p->type = RF_PDA_TYPE_PARITY; @@ -713,8 +764,9 @@ rf_DD_GenerateFailedAccessASMs( } else { PDAPerDisk = 2; state = 3; - /* four of them, fone, then ftwo */ - RF_MallocAndAdd(*pqpdap, 4 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); + /* Four of them, fone, then ftwo. */ + RF_MallocAndAdd(*pqpdap, 4 * sizeof(RF_PhysDiskAddr_t), + (RF_PhysDiskAddr_t *), allocList); pda_p = *pqpdap; CONS_PDA(parityInfo, fone_start, fone->numSector); pda_p->type = RF_PDA_TYPE_PARITY; @@ -729,12 +781,15 @@ rf_DD_GenerateFailedAccessASMs( pda_p->type = RF_PDA_TYPE_Q; } } - /* figure out number of nonaccessed pda */ - napdas = PDAPerDisk * (numDataCol - asmap->numStripeUnitsAccessed - (ftwo == NULL ? 1 : 0)); + /* Figure out number of nonaccessed pda. */ + napdas = PDAPerDisk * (numDataCol - asmap->numStripeUnitsAccessed - + (ftwo == NULL ? 1 : 0)); *nPQNodep = PDAPerDisk; - /* sweep over the over accessed pda's, figuring out the number of - * additional pda's to generate. Of course, skip the failed ones */ + /* + * Sweep over the over accessed pda's, figuring out the number of + * additional pda's to generate. Of course, skip the failed ones. + */ count = 0; for (pda_p = asmap->physInfo; pda_p; pda_p = pda_p->next) { @@ -743,19 +798,21 @@ rf_DD_GenerateFailedAccessASMs( suoff = rf_StripeUnitOffset(layoutPtr, pda_p->startSector); suend = suoff + pda_p->numSector; switch (state) { - case 1: /* one failed PDA to overlap */ - /* if a PDA doesn't contain the failed unit, it can - * only miss the start or end, not both */ + case 1: /* One failed PDA to overlap. */ + /* + * If a PDA doesn't contain the failed unit, it can + * only miss the start or end, not both. + */ if ((suoff > fone_start) || (suend < fone_end)) count++; break; - case 2: /* whole stripe */ - if (suoff) /* leak at begining */ + case 2: /* Whole stripe. */ + if (suoff) /* Leak at begining. */ count++; - if (suend < numDataCol) /* leak at end */ + if (suend < numDataCol) /* Leak at end. */ count++; break; - case 3: /* two disjoint units */ + case 3: /* Two disjoint units. */ if ((suoff > fone_start) || (suend < fone_end)) count++; if ((suoff > ftwo_start) || (suend < ftwo_end)) @@ -771,51 +828,65 @@ rf_DD_GenerateFailedAccessASMs( if (napdas == 0) return; /* short circuit */ - /* allocate up our list of pda's */ + /* Allocate up our list of pda's. */ - RF_CallocAndAdd(pda_p, napdas, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); + RF_CallocAndAdd(pda_p, napdas, sizeof(RF_PhysDiskAddr_t), + (RF_PhysDiskAddr_t *), allocList); *pdap = pda_p; - /* linkem together */ + /* Link them together. */ for (i = 0; i < (napdas - 1); i++) pda_p[i].next = pda_p + (i + 1); - /* march through the one's up to the first accessed disk */ - firstDataCol = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), asmap->physInfo->raidAddress) % numDataCol; - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); + /* March through the one's up to the first accessed disk. */ + firstDataCol = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), + asmap->physInfo->raidAddress) % numDataCol; + sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, + asmap->raidAddress); for (i = 0; i < firstDataCol; i++) { if ((pda_p - (*pdap)) == napdas) continue; pda_p->type = RF_PDA_TYPE_DATA; pda_p->raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - /* skip over dead disks */ + (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, + &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); + /* Skip over dead disks. */ if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status)) continue; switch (state) { - case 1: /* fone */ + case 1: /* Fone. */ pda_p->numSector = fone->numSector; pda_p->raidAddress += fone_start; pda_p->startSector += fone_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + RF_MallocAndAdd(pda_p->bufPtr, + rf_RaidAddressToByte(raidPtr, pda_p->numSector), + (char *), allocList); break; - case 2: /* full stripe */ + case 2: /* Full stripe. */ pda_p->numSector = secPerSU; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, secPerSU), (char *), allocList); + RF_MallocAndAdd(pda_p->bufPtr, + rf_RaidAddressToByte(raidPtr, secPerSU), + (char *), allocList); break; - case 3: /* two slabs */ + case 3: /* Two slabs. */ pda_p->numSector = fone->numSector; pda_p->raidAddress += fone_start; pda_p->startSector += fone_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + RF_MallocAndAdd(pda_p->bufPtr, + rf_RaidAddressToByte(raidPtr, pda_p->numSector), + (char *), allocList); pda_p++; pda_p->type = RF_PDA_TYPE_DATA; pda_p->raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); + (raidPtr->Layout.map->MapSector) (raidPtr, + pda_p->raidAddress, &(pda_p->row), &(pda_p->col), + &(pda_p->startSector), 0); pda_p->numSector = ftwo->numSector; pda_p->raidAddress += ftwo_start; pda_p->startSector += ftwo_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + RF_MallocAndAdd(pda_p->bufPtr, + rf_RaidAddressToByte(raidPtr, pda_p->numSector), + (char *), allocList); break; default: RF_PANIC(); @@ -823,97 +894,163 @@ rf_DD_GenerateFailedAccessASMs( pda_p++; } - /* march through the touched stripe units */ + /* March through the touched stripe units. */ for (phys_p = asmap->physInfo; phys_p; phys_p = phys_p->next, i++) { - if ((phys_p == asmap->failedPDAs[0]) || (phys_p == asmap->failedPDAs[1])) + if ((phys_p == asmap->failedPDAs[0]) || + (phys_p == asmap->failedPDAs[1])) continue; suoff = rf_StripeUnitOffset(layoutPtr, phys_p->startSector); suend = suoff + phys_p->numSector; switch (state) { - case 1: /* single buffer */ + case 1: /* Single buffer. */ if (suoff > fone_start) { RF_ASSERT(suend >= fone_end); - /* The data read starts after the mapped - * access, snip off the begining */ + /* + * The data read starts after the mapped + * access, snip off the begining. + */ pda_p->numSector = suoff - fone_start; - pda_p->raidAddress = sosAddr + (i * secPerSU) + fone_start; - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + pda_p->raidAddress = sosAddr + (i * secPerSU) + + fone_start; + (raidPtr->Layout.map->MapSector) (raidPtr, + pda_p->raidAddress, &(pda_p->row), + &(pda_p->col), &(pda_p->startSector), 0); + RF_MallocAndAdd(pda_p->bufPtr, + rf_RaidAddressToByte(raidPtr, + pda_p->numSector), (char *), allocList); pda_p++; } if (suend < fone_end) { RF_ASSERT(suoff <= fone_start); - /* The data read stops before the end of the - * failed access, extend */ + /* + * The data read stops before the end of the + * failed access, extend. + */ pda_p->numSector = fone_end - suend; - pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + pda_p->raidAddress = sosAddr + (i * secPerSU) + + suend; /* off by one? */ + (raidPtr->Layout.map->MapSector) (raidPtr, + pda_p->raidAddress, &(pda_p->row), + &(pda_p->col), &(pda_p->startSector), 0); + RF_MallocAndAdd(pda_p->bufPtr, + rf_RaidAddressToByte(raidPtr, + pda_p->numSector), (char *), allocList); pda_p++; } break; - case 2: /* whole stripe unit */ + case 2: /* Whole stripe unit. */ RF_ASSERT((suoff == 0) || (suend == secPerSU)); - if (suend < secPerSU) { /* short read, snip from end - * on */ + if (suend < secPerSU) { + /* Short read, snip from end on. */ pda_p->numSector = secPerSU - suend; - pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + pda_p->raidAddress = sosAddr + (i * secPerSU) + + suend; /* off by one? */ + (raidPtr->Layout.map->MapSector) (raidPtr, + pda_p->raidAddress, &(pda_p->row), + &(pda_p->col), &(pda_p->startSector), 0); + RF_MallocAndAdd(pda_p->bufPtr, + rf_RaidAddressToByte(raidPtr, + pda_p->numSector), (char *), allocList); pda_p++; } else - if (suoff > 0) { /* short at front */ + if (suoff > 0) { + /* Short at front. */ pda_p->numSector = suoff; - pda_p->raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + pda_p->raidAddress = sosAddr + + (i * secPerSU); + (raidPtr->Layout.map->MapSector) + (raidPtr, pda_p->raidAddress, + &(pda_p->row), &(pda_p->col), + &(pda_p->startSector), 0); + RF_MallocAndAdd(pda_p->bufPtr, + rf_RaidAddressToByte(raidPtr, + pda_p->numSector), (char *), + allocList); pda_p++; } break; - case 3: /* two nonoverlapping failures */ + case 3: /* Two nonoverlapping failures. */ if ((suoff > fone_start) || (suend < fone_end)) { if (suoff > fone_start) { RF_ASSERT(suend >= fone_end); - /* The data read starts after the + /* + * The data read starts after the * mapped access, snip off the - * begining */ + * begining. + */ pda_p->numSector = suoff - fone_start; - pda_p->raidAddress = sosAddr + (i * secPerSU) + fone_start; - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + pda_p->raidAddress = sosAddr + + (i * secPerSU) + fone_start; + (raidPtr->Layout.map->MapSector) + (raidPtr, pda_p->raidAddress, + &(pda_p->row), &(pda_p->col), + &(pda_p->startSector), 0); + RF_MallocAndAdd(pda_p->bufPtr, + rf_RaidAddressToByte(raidPtr, + pda_p->numSector), (char *), + allocList); pda_p++; } if (suend < fone_end) { RF_ASSERT(suoff <= fone_start); - /* The data read stops before the end - * of the failed access, extend */ + /* + * The data read stops before the end + * of the failed access, extend. + */ pda_p->numSector = fone_end - suend; - pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + pda_p->raidAddress = sosAddr + + (i * secPerSU) + + suend; /* Off by one ? */ + (raidPtr->Layout.map->MapSector) + (raidPtr, pda_p->raidAddress, + &(pda_p->row), &(pda_p->col), + &(pda_p->startSector), 0); + RF_MallocAndAdd(pda_p->bufPtr, + rf_RaidAddressToByte(raidPtr, + pda_p->numSector), (char *), + allocList); pda_p++; } } if ((suoff > ftwo_start) || (suend < ftwo_end)) { if (suoff > ftwo_start) { RF_ASSERT(suend >= ftwo_end); - /* The data read starts after the + /* + * The data read starts after the * mapped access, snip off the - * begining */ + * begining. + */ pda_p->numSector = suoff - ftwo_start; - pda_p->raidAddress = sosAddr + (i * secPerSU) + ftwo_start; - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + pda_p->raidAddress = sosAddr + + (i * secPerSU) + ftwo_start; + (raidPtr->Layout.map->MapSector) + (raidPtr, pda_p->raidAddress, + &(pda_p->row), &(pda_p->col), + &(pda_p->startSector), 0); + RF_MallocAndAdd(pda_p->bufPtr, + rf_RaidAddressToByte(raidPtr, + pda_p->numSector), (char *), + allocList); pda_p++; } if (suend < ftwo_end) { RF_ASSERT(suoff <= ftwo_start); - /* The data read stops before the end - * of the failed access, extend */ + /* + * The data read stops before the end + * of the failed access, extend. + */ pda_p->numSector = ftwo_end - suend; - pda_p->raidAddress = sosAddr + (i * secPerSU) + suend; /* off by one? */ - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + pda_p->raidAddress = sosAddr + + (i * secPerSU) + + suend; /* Off by one ? */ + (raidPtr->Layout.map->MapSector) + (raidPtr, pda_p->raidAddress, + &(pda_p->row), &(pda_p->col), + &(pda_p->startSector), 0); + RF_MallocAndAdd(pda_p->bufPtr, + rf_RaidAddressToByte(raidPtr, + pda_p->numSector), (char *), + allocList); pda_p++; } } @@ -923,40 +1060,51 @@ rf_DD_GenerateFailedAccessASMs( } } - /* after the last accessed disk */ + /* After the last accessed disk. */ for (; i < numDataCol; i++) { if ((pda_p - (*pdap)) == napdas) continue; pda_p->type = RF_PDA_TYPE_DATA; pda_p->raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - /* skip over dead disks */ + (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, + &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); + /* Skip over dead disks. */ if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status)) continue; switch (state) { - case 1: /* fone */ + case 1: /* Fone. */ pda_p->numSector = fone->numSector; pda_p->raidAddress += fone_start; pda_p->startSector += fone_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + RF_MallocAndAdd(pda_p->bufPtr, + rf_RaidAddressToByte(raidPtr, pda_p->numSector), + (char *), allocList); break; - case 2: /* full stripe */ + case 2: /* Full stripe. */ pda_p->numSector = secPerSU; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, secPerSU), (char *), allocList); + RF_MallocAndAdd(pda_p->bufPtr, + rf_RaidAddressToByte(raidPtr, secPerSU), + (char *), allocList); break; - case 3: /* two slabs */ + case 3: /* Two slabs. */ pda_p->numSector = fone->numSector; pda_p->raidAddress += fone_start; pda_p->startSector += fone_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + RF_MallocAndAdd(pda_p->bufPtr, + rf_RaidAddressToByte(raidPtr, pda_p->numSector), + (char *), allocList); pda_p++; pda_p->type = RF_PDA_TYPE_DATA; pda_p->raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); + (raidPtr->Layout.map->MapSector) (raidPtr, + pda_p->raidAddress, &(pda_p->row), &(pda_p->col), + &(pda_p->startSector), 0); pda_p->numSector = ftwo->numSector; pda_p->raidAddress += ftwo_start; pda_p->startSector += ftwo_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + RF_MallocAndAdd(pda_p->bufPtr, + rf_RaidAddressToByte(raidPtr, pda_p->numSector), + (char *), allocList); break; default: RF_PANIC(); @@ -967,52 +1115,62 @@ rf_DD_GenerateFailedAccessASMs( RF_ASSERT(pda_p - *pdap == napdas); return; } -#define INIT_DISK_NODE(node,name) \ -rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 2,1,4,0, dag_h, name, allocList); \ -(node)->succedents[0] = unblockNode; \ -(node)->succedents[1] = recoveryNode; \ -(node)->antecedents[0] = blockNode; \ -(node)->antType[0] = rf_control - -#define DISK_NODE_PARAMS(_node_,_p_) \ - (_node_).params[0].p = _p_ ; \ - (_node_).params[1].p = (_p_)->bufPtr; \ - (_node_).params[2].v = parityStripeID; \ - (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru) - -void + +#define INIT_DISK_NODE(node,name) do { \ + rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, \ + rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 2,1,4,0, \ + dag_h, name, allocList); \ + (node)->succedents[0] = unblockNode; \ + (node)->succedents[1] = recoveryNode; \ + (node)->antecedents[0] = blockNode; \ + (node)->antType[0] = rf_control; \ +} while (0) + +#define DISK_NODE_PARAMS(_node_,_p_) do { \ + (_node_).params[0].p = _p_ ; \ + (_node_).params[1].p = (_p_)->bufPtr; \ + (_node_).params[2].v = parityStripeID; \ + (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, \ + 0, 0, which_ru); \ +} while (0) + +void rf_DoubleDegRead( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - char *redundantReadNodeName, - char *recoveryNodeName, - int (*recovFunc) (RF_DagNode_t *)) + RF_Raid_t *raidPtr, + RF_AccessStripeMap_t *asmap, + RF_DagHeader_t *dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t *allocList, + char *redundantReadNodeName, + char *recoveryNodeName, + int (*recovFunc) (RF_DagNode_t *) +) { RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *recoveryNode, *blockNode, - *unblockNode, *rpNodes, *rqNodes, *termNode; + *unblockNode, *rpNodes, *rqNodes, *termNode; RF_PhysDiskAddr_t *pda, *pqPDAs; RF_PhysDiskAddr_t *npdas; - int nNodes, nRrdNodes, nRudNodes, i; + int nNodes, nRrdNodes, nRudNodes, i; RF_ReconUnitNum_t which_ru; - int nReadNodes, nPQNodes; + int nReadNodes, nPQNodes; RF_PhysDiskAddr_t *failedPDA = asmap->failedPDAs[0]; RF_PhysDiskAddr_t *failedPDAtwo = asmap->failedPDAs[1]; - RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, &which_ru); + RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID( + layoutPtr, asmap->raidAddress, &which_ru); if (rf_dagDebug) printf("[Creating Double Degraded Read DAG]\n"); - rf_DD_GenerateFailedAccessASMs(raidPtr, asmap, &npdas, &nRrdNodes, &pqPDAs, &nPQNodes, allocList); + rf_DD_GenerateFailedAccessASMs(raidPtr, asmap, &npdas, &nRrdNodes, + &pqPDAs, &nPQNodes, allocList); nRudNodes = asmap->numStripeUnitsAccessed - (asmap->numDataFailed); nReadNodes = nRrdNodes + nRudNodes + 2 * nPQNodes; - nNodes = 4 /* block, unblock, recovery, term */ + nReadNodes; + nNodes = 4 /* Block, unblock, recovery, term. */ + nReadNodes; - RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); + RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), + allocList); i = 0; blockNode = &nodes[i]; i += 1; @@ -1036,19 +1194,27 @@ rf_DoubleDegRead( dag_h->succedents[0] = blockNode; dag_h->creator = "DoubleDegRead"; dag_h->numCommits = 0; - dag_h->numCommitNodes = 1; /* unblock */ + dag_h->numCommitNodes = 1; /* Unblock. */ - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 2, 0, 0, dag_h, "Trm", allocList); + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, + rf_TerminateUndoFunc, NULL, 0, 2, 0, 0, dag_h, "Trm", allocList); termNode->antecedents[0] = unblockNode; termNode->antType[0] = rf_control; termNode->antecedents[1] = recoveryNode; termNode->antType[1] = rf_control; - /* init the block and unblock nodes */ - /* The block node has all nodes except itself, unblock and recovery as - * successors. Similarly for predecessors of the unblock. */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nReadNodes, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nReadNodes, 0, 0, dag_h, "Nil", allocList); + /* + * Init the block and unblock nodes. + * The block node has all nodes except itself, unblock and + * recovery as successors. + * Similarly for predecessors of the unblock. + */ + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, nReadNodes, 0, 0, 0, dag_h, + "Nil", allocList); + rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, 1, nReadNodes, 0, 0, dag_h, + "Nil", allocList); for (i = 0; i < nReadNodes; i++) { blockNode->succedents[i] = rudNodes + i; @@ -1057,13 +1223,16 @@ rf_DoubleDegRead( } unblockNode->succedents[0] = termNode; - /* The recovery node has all the reads as predecessors, and the term + /* + * The recovery node has all the reads as predecessors, and the term * node as successors. It gets a pda as a param from each of the read - * nodes plus the raidPtr. For each failed unit is has a result pda. */ - rf_InitNode(recoveryNode, rf_wait, RF_FALSE, recovFunc, rf_NullNodeUndoFunc, NULL, - 1, /* succesors */ - nReadNodes, /* preds */ - nReadNodes + 2, /* params */ + * nodes plus the raidPtr. For each failed unit is has a result pda. + */ + rf_InitNode(recoveryNode, rf_wait, RF_FALSE, recovFunc, + rf_NullNodeUndoFunc, NULL, + 1, /* succesors */ + nReadNodes, /* preds */ + nReadNodes + 2, /* params */ asmap->numDataFailed, /* results */ dag_h, recoveryNodeName, allocList); @@ -1073,8 +1242,10 @@ rf_DoubleDegRead( recoveryNode->antType[i] = rf_trueData; } - /* build the read nodes, then come back and fill in recovery params - * and results */ + /* + * Build the read nodes, then come back and fill in recovery params + * and results. + */ pda = asmap->physInfo; for (i = 0; i < nRudNodes; pda = pda->next) { if ((pda == failedPDA) || (pda == failedPDAtwo)) @@ -1092,7 +1263,7 @@ rf_DoubleDegRead( DISK_NODE_PARAMS(rrdNodes[i], pda); } - /* redundancy pdas */ + /* Redundancy pdas. */ pda = pqPDAs; INIT_DISK_NODE(rpNodes, "Rp"); RF_ASSERT(pda); @@ -1111,14 +1282,14 @@ rf_DoubleDegRead( RF_ASSERT(pda); DISK_NODE_PARAMS(rqNodes[1], pda); } - /* fill in recovery node params */ + /* Fill in recovery node params. */ for (i = 0; i < nReadNodes; i++) - recoveryNode->params[i] = rudNodes[i].params[0]; /* pda */ + recoveryNode->params[i] = rudNodes[i].params[0]; /* pda */ recoveryNode->params[i++].p = (void *) raidPtr; recoveryNode->params[i++].p = (void *) asmap; recoveryNode->results[0] = failedPDA; if (asmap->numDataFailed == 2) recoveryNode->results[1] = failedPDAtwo; - /* zero fill the target data buffers? */ + /* Zero fill the target data buffers ? */ } diff --git a/sys/dev/raidframe/rf_dagdegrd.h b/sys/dev/raidframe/rf_dagdegrd.h index 8071bbb6bf5..a794004980d 100644 --- a/sys/dev/raidframe/rf_dagdegrd.h +++ b/sys/dev/raidframe/rf_dagdegrd.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_dagdegrd.h,v 1.2 1999/02/16 00:02:29 niklas Exp $ */ +/* $OpenBSD: rf_dagdegrd.h,v 1.3 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_dagdegrd.h,v 1.3 1999/02/05 00:06:07 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -32,33 +33,21 @@ #include "rf_types.h" -/* degraded read DAG creation routines */ -void -rf_CreateRaidFiveDegradedReadDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); -void -rf_CreateRaidOneDegradedReadDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); -void -rf_CreateDegradedReadDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, - RF_RedFuncs_t * recFunc); -void -rf_CreateRaidCDegradedReadDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); -void -rf_DD_GenerateFailedAccessASMs(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_PhysDiskAddr_t ** pdap, - int *nNodep, RF_PhysDiskAddr_t ** pqpdap, int *nPQNodep, - RF_AllocListElem_t * allocList); -void -rf_DoubleDegRead(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, char *redundantReadNodeName, - char *recoveryNodeName, int (*recovFunc) (RF_DagNode_t *)); +/* Degraded read DAG creation routines. */ +void rf_CreateRaidFiveDegradedReadDAG(RF_Raid_t *, RF_AccessStripeMap_t *, + RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *); +void rf_CreateRaidOneDegradedReadDAG(RF_Raid_t *, RF_AccessStripeMap_t *, + RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *); +void rf_CreateDegradedReadDAG(RF_Raid_t *, RF_AccessStripeMap_t *, + RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *, + RF_RedFuncs_t *); +void rf_CreateRaidCDegradedReadDAG(RF_Raid_t *, RF_AccessStripeMap_t *, + RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *); +void rf_DD_GenerateFailedAccessASMs(RF_Raid_t *, RF_AccessStripeMap_t *, + RF_PhysDiskAddr_t **, int *, RF_PhysDiskAddr_t **, int *, + RF_AllocListElem_t *); +void rf_DoubleDegRead(RF_Raid_t *, RF_AccessStripeMap_t *, RF_DagHeader_t *, + void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *, char *, char *, + int (*) (RF_DagNode_t *)); -#endif /* !_RF__RF_DAGDEGRD_H_ */ +#endif /* !_RF__RF_DAGDEGRD_H_ */ diff --git a/sys/dev/raidframe/rf_dagdegwr.c b/sys/dev/raidframe/rf_dagdegwr.c index c5df4e9cac7..c9305a4de36 100644 --- a/sys/dev/raidframe/rf_dagdegwr.c +++ b/sys/dev/raidframe/rf_dagdegwr.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_dagdegwr.c,v 1.4 2000/01/11 18:02:20 peter Exp $ */ +/* $OpenBSD: rf_dagdegwr.c,v 1.5 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_dagdegwr.c,v 1.5 2000/01/07 03:40:57 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -30,7 +31,7 @@ /* * rf_dagdegwr.c * - * code for creating degraded write DAGs + * Code for creating degraded write DAGs. * */ @@ -45,14 +46,14 @@ #include "rf_dagdegwr.h" -/****************************************************************************** +/***************************************************************************** * * General comments on DAG creation: * - * All DAGs in this file use roll-away error recovery. Each DAG has a single - * commit node, usually called "Cmt." If an error occurs before the Cmt node + * All DAGs in this file use roll-away error recovery. Each DAG has a single + * commit node, usually called "Cmt". If an error occurs before the Cmt node * is reached, the execution engine will halt forward execution and work - * backward through the graph, executing the undo functions. Assuming that + * backward through the graph, executing the undo functions. Assuming that * each node in the graph prior to the Cmt node are undoable and atomic - or - * does not make changes to permanent state, the graph will fail atomically. * If an error occurs after the Cmt node executes, the engine will roll-forward @@ -61,32 +62,28 @@ * * A graph has only 1 Cmt node. * - */ + *****************************************************************************/ -/****************************************************************************** +/***************************************************************************** * * The following wrappers map the standard DAG creation interface to the - * DAG creation routines. Additionally, these wrappers enable experimentation + * DAG creation routines. Additionally, these wrappers enable experimentation * with new DAG structures by providing an extra level of indirection, allowing * the DAG creation routines to be replaced at this single point. - */ + * + *****************************************************************************/ -static RF_CREATE_DAG_FUNC_DECL(rf_CreateSimpleDegradedWriteDAG) { rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RecoveryXorFunc, RF_TRUE); } -void -rf_CreateDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *asmap; - RF_DagHeader_t *dag_h; - void *bp; - RF_RaidAccessFlags_t flags; - RF_AllocListElem_t *allocList; +void +rf_CreateDegradedWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, + RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags, + RF_AllocListElem_t *allocList) { RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); RF_PhysDiskAddr_t *failedPDA = asmap->failedPDAs[0]; @@ -94,82 +91,82 @@ rf_CreateDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList) RF_ASSERT(asmap->numDataFailed == 1); dag_h->creator = "DegradedWriteDAG"; - /* if the access writes only a portion of the failed unit, and also + /* + * If the access writes only a portion of the failed unit, and also * writes some portion of at least one surviving unit, we create two * DAGs, one for the failed component and one for the non-failed - * component, and do them sequentially. Note that the fact that we're + * component, and do them sequentially. Note that the fact that we're * accessing only a portion of the failed unit indicates that the * access either starts or ends in the failed unit, and hence we need - * create only two dags. This is inefficient in that the same data or - * parity can get read and written twice using this structure. I need - * to fix this to do the access all at once. */ - RF_ASSERT(!(asmap->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit)); - rf_CreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList); + * create only two dags. This is inefficient in that the same data or + * parity can get read and written twice using this structure. I need + * to fix this to do the access all at once. + */ + RF_ASSERT(!(asmap->numStripeUnitsAccessed != 1 && + failedPDA->numSector != layoutPtr->sectorsPerStripeUnit)); + rf_CreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, + flags, allocList); } -/****************************************************************************** +/***************************************************************************** * - * DAG creation code begins here - */ - + * DAG creation code begins here. + * + *****************************************************************************/ -/****************************************************************************** +/***************************************************************************** * * CommonCreateSimpleDegradedWriteDAG -- creates a DAG to do a degraded-mode * write, which is as follows * * / {Wnq} --\ * hdr -> blockNode -> Rod -> Xor -> Cmt -> Wnp ----> unblock -> term - * \ {Rod} / \ Wnd ---/ + * \ {Rod} / | Wnd ---/ * \ {Wnd} -/ * - * commit nodes: Xor, Wnd + * Commit nodes: Xor, Wnd * * IMPORTANT: * This DAG generator does not work for double-degraded archs since it does not - * generate Q + * generate Q. * * This dag is essentially identical to the large-write dag, except that the * write to the failed data unit is suppressed. * * IMPORTANT: this dag does not work in the case where the access writes only * a portion of the failed unit, and also writes some portion of at least one - * surviving SU. this case is handled in CreateDegradedWriteDAG above. + * surviving SU. this case is handled in CreateDegradedWriteDAG above. * - * The block & unblock nodes are leftovers from a previous version. They + * The block & unblock nodes are leftovers from a previous version. They * do nothing, but I haven't deleted them because it would be a tremendous * effort to put them back in. * - * This dag is used whenever a one of the data units in a write has failed. + * This dag is used whenever one of the data units in a write has failed. * If it is the parity unit that failed, the nonredundant write dag (below) * is used. + * *****************************************************************************/ -void -rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, - allocList, nfaults, redFunc, allowBufferRecycle) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *asmap; - RF_DagHeader_t *dag_h; - void *bp; - RF_RaidAccessFlags_t flags; - RF_AllocListElem_t *allocList; - int nfaults; - int (*redFunc) (RF_DagNode_t *); - int allowBufferRecycle; +void +rf_CommonCreateSimpleDegradedWriteDAG(RF_Raid_t *raidPtr, + RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp, + RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList, int nfaults, + int (*redFunc) (RF_DagNode_t *), int allowBufferRecycle) { - int nNodes, nRrdNodes, nWndNodes, nXorBufs, i, j, paramNum, - rdnodesFaked; + int nNodes, nRrdNodes, nWndNodes, nXorBufs, i, j, paramNum, + rdnodesFaked; RF_DagNode_t *blockNode, *unblockNode, *wnpNode, *wnqNode, *termNode; RF_DagNode_t *nodes, *wndNodes, *rrdNodes, *xorNode, *commitNode; RF_SectorCount_t sectorsPerSU; RF_ReconUnitNum_t which_ru; - char *xorTargetBuf = NULL; /* the target buffer for the XOR - * operation */ - char *overlappingPDAs;/* a temporary array of flags */ + char *xorTargetBuf = NULL; /* + * The target buffer for the XOR + * operation. + */ + char *overlappingPDAs; /* A temporary array of flags. */ RF_AccessStripeMapHeader_t *new_asm_h[2]; RF_PhysDiskAddr_t *pda, *parityPDA; RF_StripeNum_t parityStripeID; @@ -177,11 +174,13 @@ rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, RF_RaidLayout_t *layoutPtr; layoutPtr = &(raidPtr->Layout); - parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, - &which_ru); + parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, + asmap->raidAddress, &which_ru); sectorsPerSU = layoutPtr->sectorsPerStripeUnit; - /* failedPDA points to the pda within the asm that targets the failed - * disk */ + /* + * failedPDA points to the pda within the asm that targets + * the failed disk. + */ failedPDA = asmap->failedPDAs[0]; if (rf_dagDebug) @@ -191,39 +190,45 @@ rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, dag_h->creator = "SimpleDegradedWriteDAG"; /* - * Generate two ASMs identifying the surviving data - * we need in order to recover the lost data. - */ + * Generate two ASMs identifying the surviving data + * we need in order to recover the lost data. + */ /* overlappingPDAs array must be zero'd */ - RF_Calloc(overlappingPDAs, asmap->numStripeUnitsAccessed, sizeof(char), (char *)); - rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h, new_asm_h, - &nXorBufs, NULL, overlappingPDAs, allocList); - - /* create all the nodes at once */ - nWndNodes = asmap->numStripeUnitsAccessed - 1; /* no access is + RF_Calloc(overlappingPDAs, asmap->numStripeUnitsAccessed, + sizeof(char), (char *)); + rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h, + new_asm_h, &nXorBufs, NULL, overlappingPDAs, allocList); + + /* Create all the nodes at once. */ + nWndNodes = asmap->numStripeUnitsAccessed - 1; /* + * No access is * generated for the - * failed pda */ + * failed pda. + */ - nRrdNodes = ((new_asm_h[0]) ? new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) + - ((new_asm_h[1]) ? new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0); + nRrdNodes = ((new_asm_h[0]) ? + new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) + + ((new_asm_h[1]) ? new_asm_h[1]->stripeMap->numStripeUnitsAccessed + : 0); /* - * XXX - * - * There's a bug with a complete stripe overwrite- that means 0 reads - * of old data, and the rest of the DAG generation code doesn't like - * that. A release is coming, and I don't wanna risk breaking a critical - * DAG generator, so here's what I'm gonna do- if there's no read nodes, - * I'm gonna fake there being a read node, and I'm gonna swap in a - * no-op node in its place (to make all the link-up code happy). - * This should be fixed at some point. --jimz - */ + * XXX + * + * There's a bug with a complete stripe overwrite- that means 0 reads + * of old data, and the rest of the DAG generation code doesn't like + * that. A release is coming, and I don't wanna risk breaking a + * critical DAG generator, so here's what I'm gonna do- if there's + * no read nodes, I'm gonna fake there being a read node, and I'm + * gonna swap in a no-op node in its place (to make all the link-up + * code happy). + * This should be fixed at some point. --jimz + */ if (nRrdNodes == 0) { nRrdNodes = 1; rdnodesFaked = 1; } else { rdnodesFaked = 0; } - /* lock, unlock, xor, Wnd, Rrd, W(nfaults) */ + /* Lock, unlock, xor, Wnd, Rrd, W(nfaults). */ nNodes = 5 + nfaults + nWndNodes + nRrdNodes; RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); @@ -252,154 +257,189 @@ rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, } RF_ASSERT(i == nNodes); - /* this dag can not commit until all rrd and xor Nodes have completed */ + /* + * This dag can not commit until all rrd and xor Nodes have + * completed. + */ dag_h->numCommitNodes = 1; dag_h->numCommits = 0; dag_h->numSuccedents = 1; RF_ASSERT(nRrdNodes > 0); - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, nRrdNodes, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, nWndNodes + nfaults, 1, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, nWndNodes + nfaults, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1, - nRrdNodes, 2 * nXorBufs + 2, nfaults, dag_h, "Xrc", allocList); + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, nRrdNodes, 0, 0, 0, dag_h, + "Nil", allocList); + rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, nWndNodes + nfaults, 1, 0, 0, + dag_h, "Cmt", allocList); + rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, 1, nWndNodes + nfaults, 0, 0, + dag_h, "Nil", allocList); + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, + rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); + rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, + NULL, 1, nRrdNodes, 2 * nXorBufs + 2, nfaults, dag_h, "Xrc", + allocList); /* - * Fill in the Rrd nodes. If any of the rrd buffers are the same size as - * the failed buffer, save a pointer to it so we can use it as the target - * of the XOR. The pdas in the rrd nodes have been range-restricted, so if - * a buffer is the same size as the failed buffer, it must also be at the - * same alignment within the SU. - */ + * Fill in the Rrd nodes. If any of the rrd buffers are the same size + * as the failed buffer, save a pointer to it so we can use it as the + * target of the XOR. The pdas in the rrd nodes have been range- + * restricted, so if a buffer is the same size as the failed buffer, + * it must also be at the same alignment within the SU. + */ i = 0; if (new_asm_h[0]) { for (i = 0, pda = new_asm_h[0]->stripeMap->physInfo; i < new_asm_h[0]->stripeMap->numStripeUnitsAccessed; i++, pda = pda->next) { - rf_InitNode(&rrdNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rrd", allocList); + rf_InitNode(&rrdNodes[i], rf_wait, RF_FALSE, + rf_DiskReadFunc, rf_DiskReadUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, + "Rrd", allocList); RF_ASSERT(pda); rrdNodes[i].params[0].p = pda; rrdNodes[i].params[1].p = pda->bufPtr; rrdNodes[i].params[2].v = parityStripeID; - rrdNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + rrdNodes[i].params[3].v = RF_CREATE_PARAM3( + RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); } } - /* i now equals the number of stripe units accessed in new_asm_h[0] */ + /* i now equals the number of stripe units accessed in new_asm_h[0]. */ if (new_asm_h[1]) { for (j = 0, pda = new_asm_h[1]->stripeMap->physInfo; j < new_asm_h[1]->stripeMap->numStripeUnitsAccessed; j++, pda = pda->next) { - rf_InitNode(&rrdNodes[i + j], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rrd", allocList); + rf_InitNode(&rrdNodes[i + j], rf_wait, RF_FALSE, + rf_DiskReadFunc, rf_DiskReadUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, + "Rrd", allocList); RF_ASSERT(pda); rrdNodes[i + j].params[0].p = pda; rrdNodes[i + j].params[1].p = pda->bufPtr; rrdNodes[i + j].params[2].v = parityStripeID; - rrdNodes[i + j].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - if (allowBufferRecycle && (pda->numSector == failedPDA->numSector)) + rrdNodes[i + j].params[3].v = RF_CREATE_PARAM3( + RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + if (allowBufferRecycle && + (pda->numSector == failedPDA->numSector)) xorTargetBuf = pda->bufPtr; } } if (rdnodesFaked) { /* - * This is where we'll init that fake noop read node - * (XXX should the wakeup func be different?) - */ - rf_InitNode(&rrdNodes[0], rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 1, 0, 0, dag_h, "RrN", allocList); + * This is where we'll init that fake noop read node. + * (XXX should the wakeup func be different ?) + */ + rf_InitNode(&rrdNodes[0], rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, 1, 1, 0, 0, dag_h, "RrN", + allocList); } /* - * Make a PDA for the parity unit. The parity PDA should start at - * the same offset into the SU as the failed PDA. - */ - /* Danner comment: I don't think this copy is really necessary. We are - * in one of two cases here. (1) The entire failed unit is written. - * Then asmap->parityInfo will describe the entire parity. (2) We are - * only writing a subset of the failed unit and nothing else. Then the - * asmap->parityInfo describes the failed unit and the copy can also - * be avoided. */ - - RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); + * Make a PDA for the parity unit. The parity PDA should start at + * the same offset into the SU as the failed PDA. + */ + /* + * Danner comment: I don't think this copy is really necessary. We are + * in one of two cases here. + * (1) The entire failed unit is written. Then asmap->parityInfo will + * describe the entire parity. + * (2) We are only writing a subset of the failed unit and nothing else. + * Then the asmap->parityInfo describes the failed unit and the copy + * can also be avoided. + */ + + RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t), + (RF_PhysDiskAddr_t *), allocList); parityPDA->row = asmap->parityInfo->row; parityPDA->col = asmap->parityInfo->col; - parityPDA->startSector = ((asmap->parityInfo->startSector / sectorsPerSU) - * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU); + parityPDA->startSector = ((asmap->parityInfo->startSector / + sectorsPerSU) * sectorsPerSU) + (failedPDA->startSector % + sectorsPerSU); parityPDA->numSector = failedPDA->numSector; if (!xorTargetBuf) { - RF_CallocAndAdd(xorTargetBuf, 1, - rf_RaidAddressToByte(raidPtr, failedPDA->numSector), (char *), allocList); + RF_CallocAndAdd(xorTargetBuf, 1, rf_RaidAddressToByte(raidPtr, + failedPDA->numSector), (char *), allocList); } - /* init the Wnp node */ - rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnp", allocList); + /* Init the Wnp node. */ + rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, + rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, + dag_h, "Wnp", allocList); wnpNode->params[0].p = parityPDA; wnpNode->params[1].p = xorTargetBuf; wnpNode->params[2].v = parityStripeID; - wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + wnpNode->params[3].v = RF_CREATE_PARAM3( + RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - /* fill in the Wnq Node */ + /* Fill in the Wnq Node. */ if (nfaults == 2) { { RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); parityPDA->row = asmap->qInfo->row; parityPDA->col = asmap->qInfo->col; - parityPDA->startSector = ((asmap->qInfo->startSector / sectorsPerSU) - * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU); + parityPDA->startSector = ((asmap->qInfo->startSector / + sectorsPerSU) * sectorsPerSU) + + (failedPDA->startSector % sectorsPerSU); parityPDA->numSector = failedPDA->numSector; - rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnq", allocList); + rf_InitNode(wnqNode, rf_wait, RF_FALSE, + rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, + "Wnq", allocList); wnqNode->params[0].p = parityPDA; RF_CallocAndAdd(xorNode->results[1], 1, - rf_RaidAddressToByte(raidPtr, failedPDA->numSector), (char *), allocList); + rf_RaidAddressToByte(raidPtr, failedPDA->numSector), + (char *), allocList); wnqNode->params[1].p = xorNode->results[1]; wnqNode->params[2].v = parityStripeID; - wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + wnqNode->params[3].v = RF_CREATE_PARAM3( + RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); } } - /* fill in the Wnd nodes */ - for (pda = asmap->physInfo, i = 0; i < nWndNodes; i++, pda = pda->next) { + /* Fill in the Wnd nodes. */ + for (pda = asmap->physInfo, i = 0; i < nWndNodes; + i++, pda = pda->next) { if (pda == failedPDA) { i--; continue; } - rf_InitNode(&wndNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList); + rf_InitNode(&wndNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, + rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, + dag_h, "Wnd", allocList); RF_ASSERT(pda); wndNodes[i].params[0].p = pda; wndNodes[i].params[1].p = pda->bufPtr; wndNodes[i].params[2].v = parityStripeID; - wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + wndNodes[i].params[3].v = RF_CREATE_PARAM3( + RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); } - /* fill in the results of the xor node */ + /* Fill in the results of the xor node. */ xorNode->results[0] = xorTargetBuf; - /* fill in the params of the xor node */ + /* Fill in the params of the xor node. */ paramNum = 0; if (rdnodesFaked == 0) { for (i = 0; i < nRrdNodes; i++) { - /* all the Rrd nodes need to be xored together */ + /* All the Rrd nodes need to be xored together. */ xorNode->params[paramNum++] = rrdNodes[i].params[0]; xorNode->params[paramNum++] = rrdNodes[i].params[1]; } } for (i = 0; i < nWndNodes; i++) { - /* any Wnd nodes that overlap the failed access need to be - * xored in */ + /* + * Any Wnd nodes that overlap the failed access need to be + * xored in. + */ if (overlappingPDAs[i]) { - RF_MallocAndAdd(pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); - bcopy((char *) wndNodes[i].params[0].p, (char *) pda, sizeof(RF_PhysDiskAddr_t)); - rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_DOBUFFER, 0); + RF_MallocAndAdd(pda, sizeof(RF_PhysDiskAddr_t), + (RF_PhysDiskAddr_t *), allocList); + bcopy((char *) wndNodes[i].params[0].p, (char *) pda, + sizeof(RF_PhysDiskAddr_t)); + rf_RangeRestrictPDA(raidPtr, failedPDA, pda, + RF_RESTRICT_DOBUFFER, 0); xorNode->params[paramNum++].p = pda; xorNode->params[paramNum++].p = pda->bufPtr; } @@ -407,31 +447,31 @@ rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, RF_Free(overlappingPDAs, asmap->numStripeUnitsAccessed * sizeof(char)); /* - * Install the failed PDA into the xor param list so that the - * new data gets xor'd in. - */ + * Install the failed PDA into the xor param list so that the + * new data gets xor'd in. + */ xorNode->params[paramNum++].p = failedPDA; xorNode->params[paramNum++].p = failedPDA->bufPtr; /* - * The last 2 params to the recovery xor node are always the failed - * PDA and the raidPtr. install the failedPDA even though we have just - * done so above. This allows us to use the same XOR function for both - * degraded reads and degraded writes. - */ + * The last 2 params to the recovery xor node are always the failed + * PDA and the raidPtr. Install the failedPDA even though we have just + * done so above. This allows us to use the same XOR function for both + * degraded reads and degraded writes. + */ xorNode->params[paramNum++].p = failedPDA; xorNode->params[paramNum++].p = raidPtr; RF_ASSERT(paramNum == 2 * nXorBufs + 2); /* - * Code to link nodes begins here - */ + * Code to link nodes begins here. + */ - /* link header to block node */ + /* Link header to block node. */ RF_ASSERT(blockNode->numAntecedents == 0); dag_h->succedents[0] = blockNode; - /* link block node to rd nodes */ + /* Link block node to rd nodes. */ RF_ASSERT(blockNode->numSuccedents == nRrdNodes); for (i = 0; i < nRrdNodes; i++) { RF_ASSERT(rrdNodes[i].numAntecedents == 1); @@ -440,7 +480,7 @@ rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, rrdNodes[i].antType[0] = rf_control; } - /* link read nodes to xor node */ + /* Link read nodes to xor node. */ RF_ASSERT(xorNode->numAntecedents == nRrdNodes); for (i = 0; i < nRrdNodes; i++) { RF_ASSERT(rrdNodes[i].numSuccedents == 1); @@ -449,14 +489,14 @@ rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, xorNode->antType[i] = rf_trueData; } - /* link xor node to commit node */ + /* Link xor node to commit node. */ RF_ASSERT(xorNode->numSuccedents == 1); RF_ASSERT(commitNode->numAntecedents == 1); xorNode->succedents[0] = commitNode; commitNode->antecedents[0] = xorNode; commitNode->antType[0] = rf_control; - /* link commit node to wnd nodes */ + /* Link commit node to wnd nodes. */ RF_ASSERT(commitNode->numSuccedents == nfaults + nWndNodes); for (i = 0; i < nWndNodes; i++) { RF_ASSERT(wndNodes[i].numAntecedents == 1); @@ -465,7 +505,7 @@ rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, wndNodes[i].antType[0] = rf_control; } - /* link the commit node to wnp, wnq nodes */ + /* Link the commit node to wnp, wnq nodes. */ RF_ASSERT(wnpNode->numAntecedents == 1); commitNode->succedents[nWndNodes] = wnpNode; wnpNode->antecedents[0] = commitNode; @@ -476,7 +516,7 @@ rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, wnqNode->antecedents[0] = commitNode; wnqNode->antType[0] = rf_control; } - /* link write new data nodes to unblock node */ + /* Link write new data nodes to unblock node. */ RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nfaults)); for (i = 0; i < nWndNodes; i++) { RF_ASSERT(wndNodes[i].numSuccedents == 1); @@ -485,20 +525,20 @@ rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, unblockNode->antType[i] = rf_control; } - /* link write new parity node to unblock node */ + /* Link write new parity node to unblock node. */ RF_ASSERT(wnpNode->numSuccedents == 1); wnpNode->succedents[0] = unblockNode; unblockNode->antecedents[nWndNodes] = wnpNode; unblockNode->antType[nWndNodes] = rf_control; - /* link write new q node to unblock node */ + /* Link write new q node to unblock node. */ if (nfaults == 2) { RF_ASSERT(wnqNode->numSuccedents == 1); wnqNode->succedents[0] = unblockNode; unblockNode->antecedents[nWndNodes + 1] = wnqNode; unblockNode->antType[nWndNodes + 1] = rf_control; } - /* link unblock node to term node */ + /* Link unblock node to term node. */ RF_ASSERT(unblockNode->numSuccedents == 1); RF_ASSERT(termNode->numAntecedents == 1); RF_ASSERT(termNode->numSuccedents == 0); @@ -506,37 +546,40 @@ rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, termNode->antecedents[0] = unblockNode; termNode->antType[0] = rf_control; } -#define CONS_PDA(if,start,num) \ - pda_p->row = asmap->if->row; pda_p->col = asmap->if->col; \ - pda_p->startSector = ((asmap->if->startSector / secPerSU) * secPerSU) + start; \ - pda_p->numSector = num; \ - pda_p->next = NULL; \ - RF_MallocAndAdd(pda_p->bufPtr,rf_RaidAddressToByte(raidPtr,num),(char *), allocList) - -void -rf_WriteGenerateFailedAccessASMs( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_PhysDiskAddr_t ** pdap, - int *nNodep, - RF_PhysDiskAddr_t ** pqpdap, - int *nPQNodep, - RF_AllocListElem_t * allocList) + +#define CONS_PDA(if,start,num) do { \ + pda_p->row = asmap->if->row; \ + pda_p->col = asmap->if->col; \ + pda_p->startSector = ((asmap->if->startSector / secPerSU) * \ + secPerSU) + start; \ + pda_p->numSector = num; \ + pda_p->next = NULL; \ + RF_MallocAndAdd(pda_p->bufPtr, \ + rf_RaidAddressToByte(raidPtr,num),(char *), allocList); \ +} while (0) + +void +rf_WriteGenerateFailedAccessASMs(RF_Raid_t *raidPtr, + RF_AccessStripeMap_t *asmap, RF_PhysDiskAddr_t **pdap, int *nNodep, + RF_PhysDiskAddr_t **pqpdap, int *nPQNodep, RF_AllocListElem_t *allocList) { RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - int PDAPerDisk, i; + int PDAPerDisk, i; RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; - int numDataCol = layoutPtr->numDataCol; - int state; + int numDataCol = layoutPtr->numDataCol; + int state; unsigned napdas; RF_SectorNum_t fone_start, fone_end, ftwo_start = 0, ftwo_end; - RF_PhysDiskAddr_t *fone = asmap->failedPDAs[0], *ftwo = asmap->failedPDAs[1]; + RF_PhysDiskAddr_t *fone = asmap->failedPDAs[0]; + RF_PhysDiskAddr_t *ftwo = asmap->failedPDAs[1]; RF_PhysDiskAddr_t *pda_p; RF_RaidAddr_t sosAddr; - /* determine how many pda's we will have to generate per unaccess + /* + * Determine how many pda's we will have to generate per unaccessed * stripe. If there is only one failed data unit, it is one; if two, - * possibly two, depending wether they overlap. */ + * possibly two, depending wether they overlap. + */ fone_start = rf_StripeUnitOffset(layoutPtr, fone->startSector); fone_end = fone_start + fone->numSector; @@ -544,13 +587,14 @@ rf_WriteGenerateFailedAccessASMs( if (asmap->numDataFailed == 1) { PDAPerDisk = 1; state = 1; - RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); + RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), + (RF_PhysDiskAddr_t *), allocList); pda_p = *pqpdap; - /* build p */ + /* Build p. */ CONS_PDA(parityInfo, fone_start, fone->numSector); pda_p->type = RF_PDA_TYPE_PARITY; pda_p++; - /* build q */ + /* Build q. */ CONS_PDA(qInfo, fone_start, fone->numSector); pda_p->type = RF_PDA_TYPE_Q; } else { @@ -559,7 +603,8 @@ rf_WriteGenerateFailedAccessASMs( if (fone->numSector + ftwo->numSector > secPerSU) { PDAPerDisk = 1; state = 2; - RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); + RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), + (RF_PhysDiskAddr_t *), allocList); pda_p = *pqpdap; CONS_PDA(parityInfo, 0, secPerSU); pda_p->type = RF_PDA_TYPE_PARITY; @@ -569,8 +614,9 @@ rf_WriteGenerateFailedAccessASMs( } else { PDAPerDisk = 2; state = 3; - /* four of them, fone, then ftwo */ - RF_MallocAndAdd(*pqpdap, 4 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); + /* Four of them, fone, then ftwo. */ + RF_MallocAndAdd(*pqpdap, 4 * sizeof(RF_PhysDiskAddr_t), + (RF_PhysDiskAddr_t *), allocList); pda_p = *pqpdap; CONS_PDA(parityInfo, fone_start, fone->numSector); pda_p->type = RF_PDA_TYPE_PARITY; @@ -585,57 +631,66 @@ rf_WriteGenerateFailedAccessASMs( pda_p->type = RF_PDA_TYPE_Q; } } - /* figure out number of nonaccessed pda */ + /* Figure out number of nonaccessed pda. */ napdas = PDAPerDisk * (numDataCol - 2); *nPQNodep = PDAPerDisk; *nNodep = napdas; if (napdas == 0) - return; /* short circuit */ + return; /* Short circuit. */ - /* allocate up our list of pda's */ + /* Allocate up our list of pda's. */ - RF_CallocAndAdd(pda_p, napdas, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList); + RF_CallocAndAdd(pda_p, napdas, sizeof(RF_PhysDiskAddr_t), + (RF_PhysDiskAddr_t *), allocList); *pdap = pda_p; - /* linkem together */ + /* Link them together. */ for (i = 0; i < (napdas - 1); i++) pda_p[i].next = pda_p + (i + 1); - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); + sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, + asmap->raidAddress); for (i = 0; i < numDataCol; i++) { if ((pda_p - (*pdap)) == napdas) continue; pda_p->type = RF_PDA_TYPE_DATA; pda_p->raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); - /* skip over dead disks */ + (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, + &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); + /* Skip over dead disks. */ if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status)) continue; switch (state) { - case 1: /* fone */ + case 1: /* Fone. */ pda_p->numSector = fone->numSector; pda_p->raidAddress += fone_start; pda_p->startSector += fone_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte( + raidPtr, pda_p->numSector), (char *), allocList); break; - case 2: /* full stripe */ + case 2: /* Full stripe. */ pda_p->numSector = secPerSU; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, secPerSU), (char *), allocList); + RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte( + raidPtr, secPerSU), (char *), allocList); break; - case 3: /* two slabs */ + case 3: /* Two slabs. */ pda_p->numSector = fone->numSector; pda_p->raidAddress += fone_start; pda_p->startSector += fone_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte( + raidPtr, pda_p->numSector), (char *), allocList); pda_p++; pda_p->type = RF_PDA_TYPE_DATA; pda_p->raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0); + (raidPtr->Layout.map->MapSector) (raidPtr, + pda_p->raidAddress, &(pda_p->row), &(pda_p->col), + &(pda_p->startSector), 0); pda_p->numSector = ftwo->numSector; pda_p->raidAddress += ftwo_start; pda_p->startSector += ftwo_start; - RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList); + RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte( + raidPtr, pda_p->numSector), (char *), allocList); break; default: RF_PANIC(); @@ -646,53 +701,56 @@ rf_WriteGenerateFailedAccessASMs( RF_ASSERT(pda_p - *pdap == napdas); return; } -#define DISK_NODE_PDA(node) ((node)->params[0].p) - -#define DISK_NODE_PARAMS(_node_,_p_) \ - (_node_).params[0].p = _p_ ; \ - (_node_).params[1].p = (_p_)->bufPtr; \ - (_node_).params[2].v = parityStripeID; \ - (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru) - -void -rf_DoubleDegSmallWrite( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - char *redundantReadNodeName, - char *redundantWriteNodeName, - char *recoveryNodeName, + +#define DISK_NODE_PDA(node) ((node)->params[0].p) + +#define DISK_NODE_PARAMS(_node_,_p_) do { \ + (_node_).params[0].p = _p_ ; \ + (_node_).params[1].p = (_p_)->bufPtr; \ + (_node_).params[2].v = parityStripeID; \ + (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, \ + 0, 0, which_ru); \ +} while (0) + +void +rf_DoubleDegSmallWrite(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, + RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags, + RF_AllocListElem_t *allocList, char *redundantReadNodeName, + char *redundantWriteNodeName, char *recoveryNodeName, int (*recovFunc) (RF_DagNode_t *)) { RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); RF_DagNode_t *nodes, *wudNodes, *rrdNodes, *recoveryNode, *blockNode, - *unblockNode, *rpNodes, *rqNodes, *wpNodes, *wqNodes, *termNode; + *unblockNode, *rpNodes, *rqNodes, *wpNodes, *wqNodes, *termNode; RF_PhysDiskAddr_t *pda, *pqPDAs; RF_PhysDiskAddr_t *npdas; - int nWriteNodes, nNodes, nReadNodes, nRrdNodes, nWudNodes, i; + int nWriteNodes, nNodes, nReadNodes, nRrdNodes, nWudNodes, i; RF_ReconUnitNum_t which_ru; - int nPQNodes; - RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, &which_ru); + int nPQNodes; + RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID( + layoutPtr, asmap->raidAddress, &which_ru); - /* simple small write case - First part looks like a reconstruct-read + /* + * Simple small write case - First part looks like a reconstruct-read * of the failed data units. Then a write of all data units not - * failed. */ + * failed. + */ - /* Hdr | ------Block- / / \ Rrd Rrd ... Rrd Rp Rq \ \ + /* + * Hdr | ------Block- / / \ Rrd Rrd ... Rrd Rp Rq \ \ * / -------PQ----- / \ \ Wud Wp WQ \ | / * --Unblock- | T - * - * Rrd = read recovery data (potentially none) Wud = write user data - * (not incl. failed disks) Wp = Write P (could be two) Wq = Write Q - * (could be two) - * + * + * Rrd = read recovery data (potentially none) + * Wud = write user data (not incl. failed disks) + * Wp = Write P (could be two) + * Wq = Write Q (could be two) + * */ - rf_WriteGenerateFailedAccessASMs(raidPtr, asmap, &npdas, &nRrdNodes, &pqPDAs, &nPQNodes, allocList); + rf_WriteGenerateFailedAccessASMs(raidPtr, asmap, &npdas, &nRrdNodes, + &pqPDAs, &nPQNodes, allocList); RF_ASSERT(asmap->numDataFailed == 1); @@ -701,7 +759,8 @@ rf_DoubleDegSmallWrite( nWriteNodes = nWudNodes + 2 * nPQNodes; nNodes = 4 + nReadNodes + nWriteNodes; - RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); + RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), + allocList); blockNode = nodes; unblockNode = blockNode + 1; termNode = unblockNode + 1; @@ -716,38 +775,46 @@ rf_DoubleDegSmallWrite( dag_h->creator = "PQ_DDSimpleSmallWrite"; dag_h->numSuccedents = 1; dag_h->succedents[0] = blockNode; - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, + rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); termNode->antecedents[0] = unblockNode; termNode->antType[0] = rf_control; - /* init the block and unblock nodes */ - /* The block node has all the read nodes as successors */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nReadNodes, 0, 0, 0, dag_h, "Nil", allocList); + /* Init the block and unblock nodes. */ + /* The block node has all the read nodes as successors. */ + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, nReadNodes, 0, 0, 0, dag_h, + "Nil", allocList); for (i = 0; i < nReadNodes; i++) blockNode->succedents[i] = rrdNodes + i; - /* The unblock node has all the writes as successors */ - rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nWriteNodes, 0, 0, dag_h, "Nil", allocList); + /* The unblock node has all the writes as successors. */ + rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, 1, nWriteNodes, 0, 0, dag_h, + "Nil", allocList); for (i = 0; i < nWriteNodes; i++) { unblockNode->antecedents[i] = wudNodes + i; unblockNode->antType[i] = rf_control; } unblockNode->succedents[0] = termNode; -#define INIT_READ_NODE(node,name) \ - rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, allocList); \ - (node)->succedents[0] = recoveryNode; \ - (node)->antecedents[0] = blockNode; \ - (node)->antType[0] = rf_control; +#define INIT_READ_NODE(node,name) do { \ + rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, \ + rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, \ + dag_h, name, allocList); \ + (node)->succedents[0] = recoveryNode; \ + (node)->antecedents[0] = blockNode; \ + (node)->antType[0] = rf_control; \ +} while (0) - /* build the read nodes */ + /* Build the read nodes. */ pda = npdas; for (i = 0; i < nRrdNodes; i++, pda = pda->next) { INIT_READ_NODE(rrdNodes + i, "rrd"); DISK_NODE_PARAMS(rrdNodes[i], pda); } - /* read redundancy pdas */ + /* Read redundancy pdas. */ pda = pqPDAs; INIT_READ_NODE(rpNodes, "Rp"); RF_ASSERT(pda); @@ -766,18 +833,20 @@ rf_DoubleDegSmallWrite( RF_ASSERT(pda); DISK_NODE_PARAMS(rqNodes[1], pda); } - /* the recovery node has all reads as precedessors and all writes as + /* + * The recovery node has all reads as precedessors and all writes as * successors. It generates a result for every write P or write Q * node. As parameters, it takes a pda per read and a pda per stripe * of user data written. It also takes as the last params the raidPtr - * and asm. For results, it takes PDA for P & Q. */ - + * and asm. For results, it takes PDA for P & Q. + */ - rf_InitNode(recoveryNode, rf_wait, RF_FALSE, recovFunc, rf_NullNodeUndoFunc, NULL, - nWriteNodes, /* succesors */ - nReadNodes, /* preds */ + rf_InitNode(recoveryNode, rf_wait, RF_FALSE, recovFunc, + rf_NullNodeUndoFunc, NULL, + nWriteNodes, /* succesors */ + nReadNodes, /* preds */ nReadNodes + nWudNodes + 3, /* params */ - 2 * nPQNodes, /* results */ + 2 * nPQNodes, /* results */ dag_h, recoveryNodeName, allocList); @@ -807,21 +876,25 @@ rf_DoubleDegSmallWrite( pda++; recoveryNode->results[3] = pda; } - /* fill writes */ -#define INIT_WRITE_NODE(node,name) \ - rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, allocList); \ - (node)->succedents[0] = unblockNode; \ - (node)->antecedents[0] = recoveryNode; \ - (node)->antType[0] = rf_control; + /* Fill writes. */ +#define INIT_WRITE_NODE(node,name) do { \ + rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskWriteFunc, \ + rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, \ + dag_h, name, allocList); \ + (node)->succedents[0] = unblockNode; \ + (node)->antecedents[0] = recoveryNode; \ + (node)->antType[0] = rf_control; \ +} while (0) pda = asmap->physInfo; for (i = 0; i < nWudNodes; i++) { INIT_WRITE_NODE(wudNodes + i, "Wd"); DISK_NODE_PARAMS(wudNodes[i], pda); - recoveryNode->params[nReadNodes + i].p = DISK_NODE_PDA(wudNodes + i); + recoveryNode->params[nReadNodes + i].p = + DISK_NODE_PDA(wudNodes + i); pda = pda->next; } - /* write redundancy pdas */ + /* Write redundancy pdas. */ pda = pqPDAs; INIT_WRITE_NODE(wpNodes, "Wp"); RF_ASSERT(pda); diff --git a/sys/dev/raidframe/rf_dagdegwr.h b/sys/dev/raidframe/rf_dagdegwr.h index 7d24706f264..ad4cc439bb2 100644 --- a/sys/dev/raidframe/rf_dagdegwr.h +++ b/sys/dev/raidframe/rf_dagdegwr.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_dagdegwr.h,v 1.3 2000/01/07 14:50:20 peter Exp $ */ +/* $OpenBSD: rf_dagdegwr.h,v 1.4 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_dagdegwr.h,v 1.4 1999/08/15 02:36:03 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -28,28 +29,24 @@ */ -#ifndef _RF__RF_DAGDEGWR_H_ -#define _RF__RF_DAGDEGWR_H_ +#ifndef _RF__RF_DAGDEGWR_H_ +#define _RF__RF_DAGDEGWR_H_ + +/* Degraded write DAG creation routines. */ -/* degraded write DAG creation routines */ -void rf_CreateDegradedWriteDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); +void rf_CreateDegradedWriteDAG(RF_Raid_t *, RF_AccessStripeMap_t *, + RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *); -void rf_CommonCreateSimpleDegradedWriteDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, - int nfaults, int (*redFunc) (RF_DagNode_t *), int allowBufferRecycle); +void rf_CommonCreateSimpleDegradedWriteDAG(RF_Raid_t *, RF_AccessStripeMap_t *, + RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *, + int, int (*) (RF_DagNode_t *), int); - void rf_WriteGenerateFailedAccessASMs(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_PhysDiskAddr_t ** pdap, - int *nNodep, RF_PhysDiskAddr_t ** pqpdap, - int *nPQNodep, RF_AllocListElem_t * allocList); +void rf_WriteGenerateFailedAccessASMs(RF_Raid_t *, RF_AccessStripeMap_t *, + RF_PhysDiskAddr_t **, int *, RF_PhysDiskAddr_t **, int *, + RF_AllocListElem_t *); - void rf_DoubleDegSmallWrite(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, char *redundantReadNodeName, - char *redundantWriteNodeName, char *recoveryNodeName, - int (*recovFunc) (RF_DagNode_t *)); +void rf_DoubleDegSmallWrite(RF_Raid_t *, RF_AccessStripeMap_t *, + RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *, + char *, char *, char *, int (*) (RF_DagNode_t *)); -#endif /* !_RF__RF_DAGDEGWR_H_ */ +#endif /* !_RF__RF_DAGDEGWR_H_ */ diff --git a/sys/dev/raidframe/rf_dagffrd.c b/sys/dev/raidframe/rf_dagffrd.c index 8e6ed1e5af8..89a485b8ca5 100644 --- a/sys/dev/raidframe/rf_dagffrd.c +++ b/sys/dev/raidframe/rf_dagffrd.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_dagffrd.c,v 1.3 2000/01/11 18:02:21 peter Exp $ */ +/* $OpenBSD: rf_dagffrd.c,v 1.4 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_dagffrd.c,v 1.4 2000/01/07 03:40:58 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -30,7 +31,7 @@ /* * rf_dagffrd.c * - * code for creating fault-free read DAGs + * Code for creating fault-free read DAGs. * */ @@ -44,7 +45,11 @@ #include "rf_general.h" #include "rf_dagffrd.h" -/****************************************************************************** +void rf_CreateMirrorReadDAG( RF_Raid_t *, RF_AccessStripeMap_t *, + RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *, + int (*) (RF_DagNode_t *)); + +/***************************************************************************** * * General comments on DAG creation: * @@ -60,39 +65,42 @@ * * A graph has only 1 Cmt node. * - */ + *****************************************************************************/ -/****************************************************************************** +/***************************************************************************** * * The following wrappers map the standard DAG creation interface to the * DAG creation routines. Additionally, these wrappers enable experimentation * with new DAG structures by providing an extra level of indirection, allowing * the DAG creation routines to be replaced at this single point. - */ + * + *****************************************************************************/ -void +void rf_CreateFaultFreeReadDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) + RF_Raid_t *raidPtr, + RF_AccessStripeMap_t *asmap, + RF_DagHeader_t *dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t *allocList +) { rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList, RF_IO_TYPE_READ); } -/****************************************************************************** +/***************************************************************************** * - * DAG creation code begins here - */ + * DAG creation code begins here. + * + *****************************************************************************/ -/****************************************************************************** +/***************************************************************************** * - * creates a DAG to perform a nonredundant read or write of data within one + * Creates a DAG to perform a nonredundant read or write of data within one * stripe. * For reads, this DAG is as follows: * @@ -124,21 +132,22 @@ rf_CreateFaultFreeReadDAG( * *****************************************************************************/ -void +void rf_CreateNonredundantDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - RF_IoType_t type) + RF_Raid_t *raidPtr, + RF_AccessStripeMap_t *asmap, + RF_DagHeader_t *dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t *allocList, + RF_IoType_t type +) { RF_DagNode_t *nodes, *diskNodes, *blockNode, *commitNode, *termNode; RF_PhysDiskAddr_t *pda = asmap->physInfo; - int (*doFunc) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *); - int i, n, totalNumNodes; - char *name; + int (*doFunc) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *); + int i, n, totalNumNodes; + char *name; n = asmap->numStripeUnitsAccessed; dag_h->creator = "NonredundantDAG"; @@ -164,20 +173,20 @@ rf_CreateNonredundantDAG( } /* - * For reads, the dag can not commit until the block node is reached. - * for writes, the dag commits immediately. - */ + * For reads, the dag can not commit until the block node is reached. + * For writes, the dag commits immediately. + */ dag_h->numCommitNodes = 1; dag_h->numCommits = 0; dag_h->numSuccedents = 1; /* - * Node count: - * 1 block node - * n data reads (or writes) - * 1 commit node - * 1 terminator node - */ + * Node count: + * 1 block node + * n data reads (or writes) + * 1 commit node + * 1 terminator node + */ RF_ASSERT(n > 0); totalNumNodes = n + 3; RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), @@ -193,23 +202,29 @@ rf_CreateNonredundantDAG( i += 1; RF_ASSERT(i == totalNumNodes); - /* initialize nodes */ + /* Initialize nodes. */ switch (type) { case RF_IO_TYPE_READ: - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, n, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, n, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, n, 0, 0, 0, dag_h, "Nil", + allocList); + rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, 1, n, 0, 0, dag_h, "Cmt", + allocList); + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, + rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", + allocList); break; case RF_IO_TYPE_WRITE: - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, n, 1, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, n, 0, 0, dag_h, "Trm", allocList); + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0, dag_h, "Nil", + allocList); + rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, n, 1, 0, 0, dag_h, "Cmt", + allocList); + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, + rf_TerminateUndoFunc, NULL, 0, n, 0, 0, dag_h, "Trm", + allocList); break; default: RF_PANIC(); @@ -217,42 +232,43 @@ rf_CreateNonredundantDAG( for (i = 0; i < n; i++) { RF_ASSERT(pda != NULL); - rf_InitNode(&diskNodes[i], rf_wait, RF_FALSE, doFunc, undoFunc, rf_GenericWakeupFunc, - 1, 1, 4, 0, dag_h, name, allocList); + rf_InitNode(&diskNodes[i], rf_wait, RF_FALSE, doFunc, undoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, allocList); diskNodes[i].params[0].p = pda; diskNodes[i].params[1].p = pda->bufPtr; - /* parity stripe id is not necessary */ + /* Parity stripe id is not necessary. */ diskNodes[i].params[2].v = 0; - diskNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); + diskNodes[i].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); pda = pda->next; } /* - * Connect nodes. - */ + * Connect nodes. + */ - /* connect hdr to block node */ + /* Connect hdr to block node. */ RF_ASSERT(blockNode->numAntecedents == 0); dag_h->succedents[0] = blockNode; if (type == RF_IO_TYPE_READ) { - /* connecting a nonredundant read DAG */ + /* Connecting a nonredundant read DAG. */ RF_ASSERT(blockNode->numSuccedents == n); RF_ASSERT(commitNode->numAntecedents == n); for (i = 0; i < n; i++) { - /* connect block node to each read node */ + /* Connect block node to each read node. */ RF_ASSERT(diskNodes[i].numAntecedents == 1); blockNode->succedents[i] = &diskNodes[i]; diskNodes[i].antecedents[0] = blockNode; diskNodes[i].antType[0] = rf_control; - /* connect each read node to the commit node */ + /* Connect each read node to the commit node. */ RF_ASSERT(diskNodes[i].numSuccedents == 1); diskNodes[i].succedents[0] = commitNode; commitNode->antecedents[i] = &diskNodes[i]; commitNode->antType[i] = rf_control; } - /* connect the commit node to the term node */ + /* Connect the commit node to the term node. */ RF_ASSERT(commitNode->numSuccedents == 1); RF_ASSERT(termNode->numAntecedents == 1); RF_ASSERT(termNode->numSuccedents == 0); @@ -260,8 +276,8 @@ rf_CreateNonredundantDAG( termNode->antecedents[0] = commitNode; termNode->antType[0] = rf_control; } else { - /* connecting a nonredundant write DAG */ - /* connect the block node to the commit node */ + /* Connecting a nonredundant write DAG. */ + /* Connect the block node to the commit node. */ RF_ASSERT(blockNode->numSuccedents == 1); RF_ASSERT(commitNode->numAntecedents == 1); blockNode->succedents[0] = commitNode; @@ -272,13 +288,13 @@ rf_CreateNonredundantDAG( RF_ASSERT(termNode->numAntecedents == n); RF_ASSERT(termNode->numSuccedents == 0); for (i = 0; i < n; i++) { - /* connect the commit node to each write node */ + /* Connect the commit node to each write node. */ RF_ASSERT(diskNodes[i].numAntecedents == 1); commitNode->succedents[i] = &diskNodes[i]; diskNodes[i].antecedents[0] = commitNode; diskNodes[i].antType[0] = rf_control; - /* connect each write node to the term node */ + /* Connect each write node to the term node. */ RF_ASSERT(diskNodes[i].numSuccedents == 1); diskNodes[i].succedents[0] = termNode; termNode->antecedents[i] = &diskNodes[i]; @@ -286,37 +302,38 @@ rf_CreateNonredundantDAG( } } } -/****************************************************************************** - * Create a fault-free read DAG for RAID level 1 +/***************************************************************************** + * Create a fault-free read DAG for RAID level 1. * * Hdr -> Nil -> Rmir -> Cmt -> Trm * * The "Rmir" node schedules a read from the disk in the mirror pair with the - * shortest disk queue. the proper queue is selected at Rmir execution. this + * shortest disk queue. The proper queue is selected at Rmir execution. This * deferred mapping is unlike other archs in RAIDframe which generally fix * mapping at DAG creation time. * - * Parameters: raidPtr - description of the physical array - * asmap - logical & physical addresses for this access - * bp - buffer ptr (for holding read data) - * flags - general flags (e.g. disk locking) - * allocList - list of memory allocated in DAG creation + * Parameters: raidPtr - description of the physical array + * asmap - logical & physical addresses for this access + * bp - buffer ptr (for holding read data) + * flags - general flags (e.g. disk locking) + * allocList - list of memory allocated in DAG creation *****************************************************************************/ -static void -CreateMirrorReadDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - int (*readfunc) (RF_DagNode_t * node)) +void +rf_CreateMirrorReadDAG( + RF_Raid_t *raidPtr, + RF_AccessStripeMap_t *asmap, + RF_DagHeader_t *dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t *allocList, + int (*readfunc) (RF_DagNode_t *) +) { RF_DagNode_t *readNodes, *nodes, *blockNode, *commitNode, *termNode; RF_PhysDiskAddr_t *data_pda = asmap->physInfo; RF_PhysDiskAddr_t *parity_pda = asmap->parityInfo; - int i, n, totalNumNodes; + int i, n, totalNumNodes; n = asmap->numStripeUnitsAccessed; dag_h->creator = "RaidOneReadDAG"; @@ -324,20 +341,20 @@ CreateMirrorReadDAG( printf("[Creating RAID level 1 read DAG]\n"); } /* - * This dag can not commit until the commit node is reached - * errors prior to the commit point imply the dag has failed. - */ + * This dag can not commit until the commit node is reached. + * Errors prior to the commit point imply the dag has failed. + */ dag_h->numCommitNodes = 1; dag_h->numCommits = 0; dag_h->numSuccedents = 1; /* - * Node count: - * n data reads - * 1 block node - * 1 commit node - * 1 terminator node - */ + * Node count: + * n data reads + * 1 block node + * 1 commit node + * 1 terminator node + */ RF_ASSERT(n > 0); totalNumNodes = n + 3; RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), @@ -353,7 +370,7 @@ CreateMirrorReadDAG( i += 1; RF_ASSERT(i == totalNumNodes); - /* initialize nodes */ + /* Initialize nodes. */ rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, n, 0, 0, 0, dag_h, "Nil", allocList); rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, @@ -365,27 +382,28 @@ CreateMirrorReadDAG( RF_ASSERT(data_pda != NULL); RF_ASSERT(parity_pda != NULL); rf_InitNode(&readNodes[i], rf_wait, RF_FALSE, readfunc, - rf_DiskReadMirrorUndoFunc, rf_GenericWakeupFunc, 1, 1, 5, 0, dag_h, - "Rmir", allocList); + rf_DiskReadMirrorUndoFunc, rf_GenericWakeupFunc, 1, 1, 5, + 0, dag_h, "Rmir", allocList); readNodes[i].params[0].p = data_pda; readNodes[i].params[1].p = data_pda->bufPtr; - /* parity stripe id is not necessary */ + /* Parity stripe id is not necessary. */ readNodes[i].params[2].p = 0; - readNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); + readNodes[i].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); readNodes[i].params[4].p = parity_pda; data_pda = data_pda->next; parity_pda = parity_pda->next; } /* - * Connect nodes - */ + * Connect nodes. + */ - /* connect hdr to block node */ + /* Connect hdr to block node. */ RF_ASSERT(blockNode->numAntecedents == 0); dag_h->succedents[0] = blockNode; - /* connect block node to read nodes */ + /* Connect block node to read nodes. */ RF_ASSERT(blockNode->numSuccedents == n); for (i = 0; i < n; i++) { RF_ASSERT(readNodes[i].numAntecedents == 1); @@ -394,7 +412,7 @@ CreateMirrorReadDAG( readNodes[i].antType[0] = rf_control; } - /* connect read nodes to commit node */ + /* Connect read nodes to commit node. */ RF_ASSERT(commitNode->numAntecedents == n); for (i = 0; i < n; i++) { RF_ASSERT(readNodes[i].numSuccedents == 1); @@ -403,7 +421,7 @@ CreateMirrorReadDAG( commitNode->antType[i] = rf_control; } - /* connect commit node to term node */ + /* Connect commit node to term node. */ RF_ASSERT(commitNode->numSuccedents == 1); RF_ASSERT(termNode->numAntecedents == 1); RF_ASSERT(termNode->numSuccedents == 0); @@ -412,28 +430,30 @@ CreateMirrorReadDAG( termNode->antType[0] = rf_control; } -void +void rf_CreateMirrorIdleReadDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) + RF_Raid_t *raidPtr, + RF_AccessStripeMap_t *asmap, + RF_DagHeader_t *dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t *allocList +) { - CreateMirrorReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, + rf_CreateMirrorReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, rf_DiskReadMirrorIdleFunc); } -void +void rf_CreateMirrorPartitionReadDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) + RF_Raid_t *raidPtr, + RF_AccessStripeMap_t *asmap, + RF_DagHeader_t *dag_h, + void *bp, + RF_RaidAccessFlags_t flags, + RF_AllocListElem_t *allocList +) { - CreateMirrorReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, + rf_CreateMirrorReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, rf_DiskReadMirrorPartitionFunc); } diff --git a/sys/dev/raidframe/rf_dagffrd.h b/sys/dev/raidframe/rf_dagffrd.h index ae068900493..4912f0c46b2 100644 --- a/sys/dev/raidframe/rf_dagffrd.h +++ b/sys/dev/raidframe/rf_dagffrd.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_dagffrd.h,v 1.2 1999/02/16 00:02:30 niklas Exp $ */ +/* $OpenBSD: rf_dagffrd.h,v 1.3 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_dagffrd.h,v 1.3 1999/02/05 00:06:07 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,27 +28,21 @@ * rights to redistribute these changes. */ -#ifndef _RF__RF_DAGFFRD_H_ -#define _RF__RF_DAGFFRD_H_ +#ifndef _RF__RF_DAGFFRD_H_ +#define _RF__RF_DAGFFRD_H_ #include "rf_types.h" -/* fault-free read DAG creation routines */ -void -rf_CreateFaultFreeReadDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList); -void -rf_CreateNonredundantDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, RF_IoType_t type); -void -rf_CreateMirrorIdleReadDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); -void -rf_CreateMirrorPartitionReadDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); +/* Fault-free read DAG creation routines. */ + +void rf_CreateFaultFreeReadDAG(RF_Raid_t *, RF_AccessStripeMap_t *, + RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *); +void rf_CreateNonredundantDAG(RF_Raid_t *, RF_AccessStripeMap_t *, + RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *, + RF_IoType_t); +void rf_CreateMirrorIdleReadDAG(RF_Raid_t *, RF_AccessStripeMap_t *, + RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *); +void rf_CreateMirrorPartitionReadDAG(RF_Raid_t *, RF_AccessStripeMap_t *, + RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *); -#endif /* !_RF__RF_DAGFFRD_H_ */ +#endif /* !_RF__RF_DAGFFRD_H_ */ diff --git a/sys/dev/raidframe/rf_dagffwr.c b/sys/dev/raidframe/rf_dagffwr.c index 38e42e43134..0dcbd898d28 100644 --- a/sys/dev/raidframe/rf_dagffwr.c +++ b/sys/dev/raidframe/rf_dagffwr.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_dagffwr.c,v 1.4 2000/01/11 18:02:21 peter Exp $ */ +/* $OpenBSD: rf_dagffwr.c,v 1.5 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_dagffwr.c,v 1.5 2000/01/07 03:40:58 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -30,7 +31,7 @@ /* * rf_dagff.c * - * code for creating fault-free DAGs + * Code for creating fault-free DAGs. * */ @@ -45,14 +46,14 @@ #include "rf_general.h" #include "rf_dagffwr.h" -/****************************************************************************** +/***************************************************************************** * * General comments on DAG creation: * - * All DAGs in this file use roll-away error recovery. Each DAG has a single + * All DAGs in this file use roll-away error recovery. Each DAG has a single * commit node, usually called "Cmt." If an error occurs before the Cmt node * is reached, the execution engine will halt forward execution and work - * backward through the graph, executing the undo functions. Assuming that + * backward through the graph, executing the undo functions. Assuming that * each node in the graph prior to the Cmt node are undoable and atomic - or - * does not make changes to permanent state, the graph will fail atomically. * If an error occurs after the Cmt node executes, the engine will roll-forward @@ -61,82 +62,66 @@ * * A graph has only 1 Cmt node. * - */ + *****************************************************************************/ -/****************************************************************************** +/***************************************************************************** * * The following wrappers map the standard DAG creation interface to the - * DAG creation routines. Additionally, these wrappers enable experimentation + * DAG creation routines. Additionally, these wrappers enable experimentation * with new DAG structures by providing an extra level of indirection, allowing * the DAG creation routines to be replaced at this single point. - */ + * + *****************************************************************************/ -void -rf_CreateNonRedundantWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - RF_IoType_t type) +void +rf_CreateNonRedundantWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, + RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags, + RF_AllocListElem_t *allocList, RF_IoType_t type) { rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList, RF_IO_TYPE_WRITE); } -void -rf_CreateRAID0WriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - RF_IoType_t type) +void +rf_CreateRAID0WriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, + RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags, + RF_AllocListElem_t *allocList, RF_IoType_t type) { rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList, RF_IO_TYPE_WRITE); } -void -rf_CreateSmallWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) +void +rf_CreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, + RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags, + RF_AllocListElem_t *allocList) { - /* "normal" rollaway */ - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - &rf_xorFuncs, NULL); + /* "normal" rollaway. */ + rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, + allocList, &rf_xorFuncs, NULL); } -void -rf_CreateLargeWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) +void +rf_CreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, + RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags, + RF_AllocListElem_t *allocList) { - /* "normal" rollaway */ - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - 1, rf_RegularXorFunc, RF_TRUE); + /* "normal" rollaway. */ + rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, + allocList, 1, rf_RegularXorFunc, RF_TRUE); } -/****************************************************************************** +/***************************************************************************** * - * DAG creation code begins here - */ + * DAG creation code begins here. + * + *****************************************************************************/ -/****************************************************************************** +/***************************************************************************** * * creates a DAG to perform a large-write operation: * @@ -146,52 +131,46 @@ rf_CreateLargeWriteDAG( * \[Wnq]/ * * The XOR node also does the Q calculation in the P+Q architecture. - * All nodes are before the commit node (Cmt) are assumed to be atomic and - * undoable - or - they make no changes to permanent state. + * All nodes that are before the commit node (Cmt) are assumed to be atomic + * and undoable - or - they make no changes to permanent state. * * Rod = read old data * Cmt = commit node * Wnp = write new parity * Wnd = write new data * Wnq = write new "q" - * [] denotes optional segments in the graph + * [] denotes optional segments in the graph. * - * Parameters: raidPtr - description of the physical array - * asmap - logical & physical addresses for this access - * bp - buffer ptr (holds write data) - * flags - general flags (e.g. disk locking) - * allocList - list of memory allocated in DAG creation - * nfaults - number of faults array can tolerate - * (equal to # redundancy units in stripe) - * redfuncs - list of redundancy generating functions + * Parameters: raidPtr - description of the physical array + * asmap - logical & physical addresses for this access + * bp - buffer ptr (holds write data) + * flags - general flags (e.g. disk locking) + * allocList - list of memory allocated in DAG creation + * nfaults - number of faults array can tolerate + * (equal to # redundancy units in stripe) + * redfuncs - list of redundancy generating functions * *****************************************************************************/ -void -rf_CommonCreateLargeWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - int nfaults, - int (*redFunc) (RF_DagNode_t *), +void +rf_CommonCreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, + RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags, + RF_AllocListElem_t *allocList, int nfaults, int (*redFunc) (RF_DagNode_t *), int allowBufferRecycle) { RF_DagNode_t *nodes, *wndNodes, *rodNodes, *xorNode, *wnpNode; RF_DagNode_t *wnqNode, *blockNode, *commitNode, *termNode; - int nWndNodes, nRodNodes, i, nodeNum, asmNum; + int nWndNodes, nRodNodes, i, nodeNum, asmNum; RF_AccessStripeMapHeader_t *new_asm_h[2]; RF_StripeNum_t parityStripeID; - char *sosBuffer, *eosBuffer; + char *sosBuffer, *eosBuffer; RF_ReconUnitNum_t which_ru; RF_RaidLayout_t *layoutPtr; RF_PhysDiskAddr_t *pda; layoutPtr = &(raidPtr->Layout); - parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, - &which_ru); + parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, + asmap->raidAddress, &which_ru); if (rf_dagDebug) { printf("[Creating large-write DAG]\n"); @@ -202,7 +181,7 @@ rf_CommonCreateLargeWriteDAG( dag_h->numCommits = 0; dag_h->numSuccedents = 1; - /* alloc the nodes: Wnd, xor, commit, block, term, and Wnp */ + /* Alloc the nodes: Wnd, xor, commit, block, term, and Wnp. */ nWndNodes = asmap->numStripeUnitsAccessed; RF_CallocAndAdd(nodes, nWndNodes + 4 + nfaults, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); @@ -225,8 +204,8 @@ rf_CommonCreateLargeWriteDAG( } else { wnqNode = NULL; } - rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h, new_asm_h, - &nRodNodes, &sosBuffer, &eosBuffer, allocList); + rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h, + new_asm_h, &nRodNodes, &sosBuffer, &eosBuffer, allocList); if (nRodNodes > 0) { RF_CallocAndAdd(rodNodes, nRodNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); @@ -234,32 +213,38 @@ rf_CommonCreateLargeWriteDAG( rodNodes = NULL; } - /* begin node initialization */ + /* Begin node initialization. */ if (nRodNodes > 0) { - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, nRodNodes, 0, 0, 0, dag_h, "Nil", allocList); + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0, dag_h, + "Nil", allocList); } else { - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0, dag_h, "Nil", + allocList); } - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, - nWndNodes + nfaults, 1, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, - 0, nWndNodes + nfaults, 0, 0, dag_h, "Trm", allocList); + rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, nWndNodes + nfaults, 1, 0, 0, + dag_h, "Cmt", allocList); + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, + rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0, + dag_h, "Trm", allocList); - /* initialize the Rod nodes */ + /* Initialize the Rod nodes. */ for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) { if (new_asm_h[asmNum]) { pda = new_asm_h[asmNum]->stripeMap->physInfo; while (pda) { - rf_InitNode(&rodNodes[nodeNum], rf_wait, RF_FALSE, rf_DiskReadFunc, - rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, - "Rod", allocList); + rf_InitNode(&rodNodes[nodeNum], rf_wait, + RF_FALSE, rf_DiskReadFunc, + rf_DiskReadUndoFunc, rf_GenericWakeupFunc, + 1, 1, 4, 0, dag_h, "Rod", allocList); rodNodes[nodeNum].params[0].p = pda; rodNodes[nodeNum].params[1].p = pda->bufPtr; rodNodes[nodeNum].params[2].v = parityStripeID; - rodNodes[nodeNum].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + rodNodes[nodeNum].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); nodeNum++; pda = pda->next; @@ -268,98 +253,115 @@ rf_CommonCreateLargeWriteDAG( } RF_ASSERT(nodeNum == nRodNodes); - /* initialize the wnd nodes */ + /* Initialize the wnd nodes. */ pda = asmap->physInfo; for (i = 0; i < nWndNodes; i++) { - rf_InitNode(&wndNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList); + rf_InitNode(&wndNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, + rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, + dag_h, "Wnd", allocList); RF_ASSERT(pda != NULL); wndNodes[i].params[0].p = pda; wndNodes[i].params[1].p = pda->bufPtr; wndNodes[i].params[2].v = parityStripeID; - wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + wndNodes[i].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); pda = pda->next; } - /* initialize the redundancy node */ + /* Initialize the redundancy node. */ if (nRodNodes > 0) { - rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1, - nRodNodes, 2 * (nWndNodes + nRodNodes) + 1, nfaults, dag_h, + rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, + rf_NullNodeUndoFunc, NULL, 1, nRodNodes, + 2 * (nWndNodes + nRodNodes) + 1, nfaults, dag_h, "Xr ", allocList); } else { - rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1, - 1, 2 * (nWndNodes + nRodNodes) + 1, nfaults, dag_h, "Xr ", allocList); + rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, + rf_NullNodeUndoFunc, NULL, 1, 1, + 2 * (nWndNodes + nRodNodes) + 1, nfaults, dag_h, + "Xr ", allocList); } xorNode->flags |= RF_DAGNODE_FLAG_YIELD; for (i = 0; i < nWndNodes; i++) { - xorNode->params[2 * i + 0] = wndNodes[i].params[0]; /* pda */ - xorNode->params[2 * i + 1] = wndNodes[i].params[1]; /* buf ptr */ + xorNode->params[2 * i + 0] = + wndNodes[i].params[0]; /* pda */ + xorNode->params[2 * i + 1] = + wndNodes[i].params[1]; /* buf ptr */ } for (i = 0; i < nRodNodes; i++) { - xorNode->params[2 * (nWndNodes + i) + 0] = rodNodes[i].params[0]; /* pda */ - xorNode->params[2 * (nWndNodes + i) + 1] = rodNodes[i].params[1]; /* buf ptr */ + xorNode->params[2 * (nWndNodes + i) + 0] = + rodNodes[i].params[0]; /* pda */ + xorNode->params[2 * (nWndNodes + i) + 1] = + rodNodes[i].params[1]; /* buf ptr */ } - /* xor node needs to get at RAID information */ + /* Xor node needs to get at RAID information. */ xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr; /* - * Look for an Rod node that reads a complete SU. If none, alloc a buffer - * to receive the parity info. Note that we can't use a new data buffer - * because it will not have gotten written when the xor occurs. - */ + * Look for an Rod node that reads a complete SU. If none, alloc + * a buffer to receive the parity info. Note that we can't use a + * new data buffer because it will not have gotten written when + * the xor occurs. + */ if (allowBufferRecycle) { for (i = 0; i < nRodNodes; i++) { - if (((RF_PhysDiskAddr_t *) rodNodes[i].params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit) + if (((RF_PhysDiskAddr_t *) rodNodes[i].params[0].p) + ->numSector == raidPtr->Layout.sectorsPerStripeUnit) break; } } if ((!allowBufferRecycle) || (i == nRodNodes)) { RF_CallocAndAdd(xorNode->results[0], 1, - rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), + rf_RaidAddressToByte(raidPtr, + raidPtr->Layout.sectorsPerStripeUnit), (void *), allocList); } else { xorNode->results[0] = rodNodes[i].params[1].p; } - /* initialize the Wnp node */ - rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnp", allocList); + /* Initialize the Wnp node. */ + rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, + rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, + dag_h, "Wnp", allocList); wnpNode->params[0].p = asmap->parityInfo; wnpNode->params[1].p = xorNode->results[0]; wnpNode->params[2].v = parityStripeID; - wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - /* parityInfo must describe entire parity unit */ + wnpNode->params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + /* parityInfo must describe entire parity unit. */ RF_ASSERT(asmap->parityInfo->next == NULL); if (nfaults == 2) { /* - * We never try to recycle a buffer for the Q calcuation - * in addition to the parity. This would cause two buffers - * to get smashed during the P and Q calculation, guaranteeing - * one would be wrong. - */ + * We never try to recycle a buffer for the Q calculation + * in addition to the parity. This would cause two buffers + * to get smashed during the P and Q calculation, guaranteeing + * one would be wrong. + */ RF_CallocAndAdd(xorNode->results[1], 1, - rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), + rf_RaidAddressToByte(raidPtr, + raidPtr->Layout.sectorsPerStripeUnit), (void *), allocList); - rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnq", allocList); + rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, + rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, + dag_h, "Wnq", allocList); wnqNode->params[0].p = asmap->qInfo; wnqNode->params[1].p = xorNode->results[1]; wnqNode->params[2].v = parityStripeID; - wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - /* parityInfo must describe entire parity unit */ + wnqNode->params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + /* parityInfo must describe entire parity unit. */ RF_ASSERT(asmap->parityInfo->next == NULL); } /* - * Connect nodes to form graph. - */ + * Connect nodes to form graph. + */ - /* connect dag header to block node */ + /* Connect dag header to block node. */ RF_ASSERT(blockNode->numAntecedents == 0); dag_h->succedents[0] = blockNode; if (nRodNodes > 0) { - /* connect the block node to the Rod nodes */ + /* Connect the block node to the Rod nodes. */ RF_ASSERT(blockNode->numSuccedents == nRodNodes); RF_ASSERT(xorNode->numAntecedents == nRodNodes); for (i = 0; i < nRodNodes; i++) { @@ -368,14 +370,14 @@ rf_CommonCreateLargeWriteDAG( rodNodes[i].antecedents[0] = blockNode; rodNodes[i].antType[0] = rf_control; - /* connect the Rod nodes to the Xor node */ + /* Connect the Rod nodes to the Xor node. */ RF_ASSERT(rodNodes[i].numSuccedents == 1); rodNodes[i].succedents[0] = xorNode; xorNode->antecedents[i] = &rodNodes[i]; xorNode->antType[i] = rf_trueData; } } else { - /* connect the block node to the Xor node */ + /* Connect the block node to the Xor node. */ RF_ASSERT(blockNode->numSuccedents == 1); RF_ASSERT(xorNode->numAntecedents == 1); blockNode->succedents[0] = xorNode; @@ -383,14 +385,14 @@ rf_CommonCreateLargeWriteDAG( xorNode->antType[0] = rf_control; } - /* connect the xor node to the commit node */ + /* Connect the xor node to the commit node. */ RF_ASSERT(xorNode->numSuccedents == 1); RF_ASSERT(commitNode->numAntecedents == 1); xorNode->succedents[0] = commitNode; commitNode->antecedents[0] = xorNode; commitNode->antType[0] = rf_control; - /* connect the commit node to the write nodes */ + /* Connect the commit node to the write nodes. */ RF_ASSERT(commitNode->numSuccedents == nWndNodes + nfaults); for (i = 0; i < nWndNodes; i++) { RF_ASSERT(wndNodes->numAntecedents == 1); @@ -408,7 +410,7 @@ rf_CommonCreateLargeWriteDAG( wnqNode->antecedents[0] = commitNode; wnqNode->antType[0] = rf_trueData; } - /* connect the write nodes to the term node */ + /* Connect the write nodes to the term node. */ RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults); RF_ASSERT(termNode->numSuccedents == 0); for (i = 0; i < nWndNodes; i++) { @@ -428,9 +430,9 @@ rf_CommonCreateLargeWriteDAG( termNode->antType[nWndNodes + 1] = rf_control; } } -/****************************************************************************** +/***************************************************************************** * - * creates a DAG to perform a small-write operation (either raid 5 or pq), + * Create a DAG to perform a small-write operation (either raid 5 or pq), * which is as follows: * * Hdr -> Nil -> Rop -> Xor -> Cmt ----> Wnp [Unp] --> Trm @@ -448,46 +450,41 @@ rf_CommonCreateLargeWriteDAG( * Wnp = write new parity * Wnd = write new data * Wnq = write new "q" - * [ ] denotes optional segments in the graph + * [ ] denotes optional segments in the graph. * - * Parameters: raidPtr - description of the physical array - * asmap - logical & physical addresses for this access - * bp - buffer ptr (holds write data) - * flags - general flags (e.g. disk locking) - * allocList - list of memory allocated in DAG creation - * pfuncs - list of parity generating functions - * qfuncs - list of q generating functions + * Parameters: raidPtr - description of the physical array + * asmap - logical & physical addresses for this access + * bp - buffer ptr (holds write data) + * flags - general flags (e.g. disk locking) + * allocList - list of memory allocated in DAG creation + * pfuncs - list of parity generating functions + * qfuncs - list of q generating functions * - * A null qfuncs indicates single fault tolerant + * A null qfuncs indicates single fault tolerant. *****************************************************************************/ -void -rf_CommonCreateSmallWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - RF_RedFuncs_t * pfuncs, - RF_RedFuncs_t * qfuncs) +void +rf_CommonCreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, + RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags, + RF_AllocListElem_t *allocList, RF_RedFuncs_t *pfuncs, RF_RedFuncs_t *qfuncs) { RF_DagNode_t *readDataNodes, *readParityNodes, *readQNodes, *termNode; RF_DagNode_t *unlockDataNodes, *unlockParityNodes, *unlockQNodes; RF_DagNode_t *xorNodes, *qNodes, *blockNode, *commitNode, *nodes; RF_DagNode_t *writeDataNodes, *writeParityNodes, *writeQNodes; - int i, j, nNodes, totalNumNodes, lu_flag; + int i, j, nNodes, totalNumNodes, lu_flag; RF_ReconUnitNum_t which_ru; - int (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *); - int (*qfunc) (RF_DagNode_t *); - int numDataNodes, numParityNodes; + int (*func) (RF_DagNode_t *); + int (*undoFunc) (RF_DagNode_t *); + int (*qfunc) (RF_DagNode_t *); + int numDataNodes, numParityNodes; RF_StripeNum_t parityStripeID; RF_PhysDiskAddr_t *pda; - char *name, *qname; - long nfaults; + char *name, *qname; + long nfaults; nfaults = qfuncs ? 2 : 1; - lu_flag = (rf_enableAtomicRMW) ? 1 : 0; /* lock/unlock flag */ + lu_flag = (rf_enableAtomicRMW) ? 1 : 0; /* Lock/unlock flag. */ parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru); @@ -506,30 +503,32 @@ rf_CommonCreateSmallWriteDAG( dag_h->numSuccedents = 1; /* - * DAG creation occurs in four steps: - * 1. count the number of nodes in the DAG - * 2. create the nodes - * 3. initialize the nodes - * 4. connect the nodes - */ + * DAG creation occurs in four steps: + * 1. Count the number of nodes in the DAG. + * 2. Create the nodes. + * 3. Initialize the nodes. + * 4. Connect the nodes. + */ + + /* + * Step 1. Compute number of nodes in the graph. + */ /* - * Step 1. compute number of nodes in the graph - */ - - /* number of nodes: a read and write for each data unit a redundancy - * computation node for each parity node (nfaults * nparity) a read - * and write for each parity unit a block and commit node (2) a - * terminate node if atomic RMW an unlock node for each data unit, - * redundancy unit */ + * Number of nodes: a read and write for each data unit, a redundancy + * computation node for each parity node (nfaults * nparity), a read + * and write for each parity unit, a block and commit node (2), a + * terminate node if atomic RMW, an unlock node for each + * data/redundancy unit. + */ totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes) + (nfaults * 2 * numParityNodes) + 3; if (lu_flag) { totalNumNodes += (numDataNodes + (nfaults * numParityNodes)); } /* - * Step 2. create the nodes - */ + * Step 2. Create the nodes. + */ RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); i = 0; @@ -576,34 +575,39 @@ rf_CommonCreateSmallWriteDAG( RF_ASSERT(i == totalNumNodes); /* - * Step 3. initialize the nodes - */ - /* initialize block node (Nil) */ + * Step 3. Initialize the nodes. + */ + /* Initialize block node (Nil). */ nNodes = numDataNodes + (nfaults * numParityNodes); - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, nNodes, 0, 0, 0, dag_h, "Nil", allocList); + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, + "Nil", allocList); - /* initialize commit node (Cmt) */ - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, nNodes, (nfaults * numParityNodes), 0, 0, dag_h, "Cmt", allocList); + /* Initialize commit node (Cmt). */ + rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, nNodes, (nfaults * numParityNodes), + 0, 0, dag_h, "Cmt", allocList); - /* initialize terminate node (Trm) */ - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, nNodes, 0, 0, dag_h, "Trm", allocList); + /* Initialize terminate node (Trm). */ + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, + rf_TerminateUndoFunc, NULL, 0, nNodes, 0, 0, dag_h, + "Trm", allocList); - /* initialize nodes which read old data (Rod) */ + /* Initialize nodes which read old data (Rod). */ for (i = 0; i < numDataNodes; i++) { - rf_InitNode(&readDataNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, (nfaults * numParityNodes), 1, 4, 0, dag_h, - "Rod", allocList); + rf_InitNode(&readDataNodes[i], rf_wait, RF_FALSE, + rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, + (nfaults * numParityNodes), 1, 4, 0, dag_h, "Rod", + allocList); RF_ASSERT(pda != NULL); - /* physical disk addr desc */ + /* Physical disk addr desc. */ readDataNodes[i].params[0].p = pda; - /* buffer to hold old data */ + /* Buffer to hold old data. */ readDataNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); readDataNodes[i].params[2].v = parityStripeID; - readDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + readDataNodes[i].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru); pda = pda->next; for (j = 0; j < readDataNodes[i].numSuccedents; j++) { @@ -611,20 +615,21 @@ rf_CommonCreateSmallWriteDAG( } } - /* initialize nodes which read old parity (Rop) */ + /* Initialize nodes which read old parity (Rop). */ pda = asmap->parityInfo; i = 0; for (i = 0; i < numParityNodes; i++) { RF_ASSERT(pda != NULL); - rf_InitNode(&readParityNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, - rf_DiskReadUndoFunc, rf_GenericWakeupFunc, numParityNodes, 1, 4, - 0, dag_h, "Rop", allocList); + rf_InitNode(&readParityNodes[i], rf_wait, RF_FALSE, + rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, + numParityNodes, 1, 4, 0, dag_h, "Rop", allocList); readParityNodes[i].params[0].p = pda; - /* buffer to hold old parity */ + /* Buffer to hold old parity. */ readParityNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); readParityNodes[i].params[2].v = parityStripeID; - readParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + readParityNodes[i].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru); pda = pda->next; for (j = 0; j < readParityNodes[i].numSuccedents; j++) { @@ -632,19 +637,22 @@ rf_CommonCreateSmallWriteDAG( } } - /* initialize nodes which read old Q (Roq) */ + /* Initialize nodes which read old Q (Roq). */ if (nfaults == 2) { pda = asmap->qInfo; for (i = 0; i < numParityNodes; i++) { RF_ASSERT(pda != NULL); - rf_InitNode(&readQNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, - rf_GenericWakeupFunc, numParityNodes, 1, 4, 0, dag_h, "Roq", allocList); + rf_InitNode(&readQNodes[i], rf_wait, RF_FALSE, + rf_DiskReadFunc, rf_DiskReadUndoFunc, + rf_GenericWakeupFunc, numParityNodes, + 1, 4, 0, dag_h, "Roq", allocList); readQNodes[i].params[0].p = pda; - /* buffer to hold old Q */ - readQNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, - allocList); + /* Buffer to hold old Q. */ + readQNodes[i].params[1].p = rf_AllocBuffer(raidPtr, + dag_h, pda, allocList); readQNodes[i].params[2].v = parityStripeID; - readQNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + readQNodes[i].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru); pda = pda->next; for (j = 0; j < readQNodes[i].numSuccedents; j++) { @@ -652,46 +660,51 @@ rf_CommonCreateSmallWriteDAG( } } } - /* initialize nodes which write new data (Wnd) */ + /* Initialize nodes which write new data (Wnd). */ pda = asmap->physInfo; for (i = 0; i < numDataNodes; i++) { RF_ASSERT(pda != NULL); - rf_InitNode(&writeDataNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, - rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, + rf_InitNode(&writeDataNodes[i], rf_wait, RF_FALSE, + rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList); - /* physical disk addr desc */ + /* Physical disk addr desc. */ writeDataNodes[i].params[0].p = pda; - /* buffer holding new data to be written */ + /* Buffer holding new data to be written. */ writeDataNodes[i].params[1].p = pda->bufPtr; writeDataNodes[i].params[2].v = parityStripeID; - writeDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, 0, which_ru); + writeDataNodes[i].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockDataNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, - rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, + /* Initialize node to unlock the disk queue. */ + rf_InitNode(&unlockDataNodes[i], rf_wait, RF_FALSE, + rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, + rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Und", allocList); - /* physical disk addr desc */ + /* Physical disk addr desc. */ unlockDataNodes[i].params[0].p = pda; - unlockDataNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + unlockDataNodes[i].params[1].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru); } pda = pda->next; } /* - * Initialize nodes which compute new parity and Q. - */ + * Initialize nodes which compute new parity and Q. + */ /* - * We use the simple XOR func in the double-XOR case, and when - * we're accessing only a portion of one stripe unit. The distinction - * between the two is that the regular XOR func assumes that the targbuf - * is a full SU in size, and examines the pda associated with the buffer - * to decide where within the buffer to XOR the data, whereas - * the simple XOR func just XORs the data into the start of the buffer. - */ - if ((numParityNodes == 2) || ((numDataNodes == 1) - && (asmap->totalSectorsAccessed < raidPtr->Layout.sectorsPerStripeUnit))) { + * We use the simple XOR func in the double-XOR case, and when + * we're accessing only a portion of one stripe unit. + * The distinction between the two is that the regular XOR func + * assumes that the targbuf is a full SU in size, and examines + * the pda associated with the buffer to decide where within + * the buffer to XOR the data, whereas the simple XOR func just + * XORs the data into the start of the buffer. + */ + if ((numParityNodes == 2) || ((numDataNodes == 1) && + (asmap->totalSectorsAccessed < + raidPtr->Layout.sectorsPerStripeUnit))) { func = pfuncs->simple; undoFunc = rf_NullNodeUndoFunc; name = pfuncs->SimpleName; @@ -715,15 +728,16 @@ rf_CommonCreateSmallWriteDAG( } } /* - * Initialize the xor nodes: params are {pda,buf} - * from {Rod,Wnd,Rop} nodes, and raidPtr - */ + * Initialize the xor nodes: params are {pda,buf}. + * From {Rod,Wnd,Rop} nodes, and raidPtr. + */ if (numParityNodes == 2) { - /* double-xor case */ + /* Double-xor case. */ for (i = 0; i < numParityNodes; i++) { - /* note: no wakeup func for xor */ - rf_InitNode(&xorNodes[i], rf_wait, RF_FALSE, func, undoFunc, NULL, - 1, (numDataNodes + numParityNodes), 7, 1, dag_h, name, allocList); + /* Note: no wakeup func for xor. */ + rf_InitNode(&xorNodes[i], rf_wait, RF_FALSE, func, + undoFunc, NULL, 1, (numDataNodes + numParityNodes), + 7, 1, dag_h, name, allocList); xorNodes[i].flags |= RF_DAGNODE_FLAG_YIELD; xorNodes[i].params[0] = readDataNodes[i].params[0]; xorNodes[i].params[1] = readDataNodes[i].params[1]; @@ -732,138 +746,163 @@ rf_CommonCreateSmallWriteDAG( xorNodes[i].params[4] = writeDataNodes[i].params[0]; xorNodes[i].params[5] = writeDataNodes[i].params[1]; xorNodes[i].params[6].p = raidPtr; - /* use old parity buf as target buf */ + /* Use old parity buf as target buf. */ xorNodes[i].results[0] = readParityNodes[i].params[1].p; if (nfaults == 2) { - /* note: no wakeup func for qor */ - rf_InitNode(&qNodes[i], rf_wait, RF_FALSE, qfunc, undoFunc, NULL, 1, - (numDataNodes + numParityNodes), 7, 1, dag_h, qname, allocList); - qNodes[i].params[0] = readDataNodes[i].params[0]; - qNodes[i].params[1] = readDataNodes[i].params[1]; + /* Note: no wakeup func for qor. */ + rf_InitNode(&qNodes[i], rf_wait, RF_FALSE, + qfunc, undoFunc, NULL, 1, + (numDataNodes + numParityNodes), 7, 1, + dag_h, qname, allocList); + qNodes[i].params[0] = + readDataNodes[i].params[0]; + qNodes[i].params[1] = + readDataNodes[i].params[1]; qNodes[i].params[2] = readQNodes[i].params[0]; qNodes[i].params[3] = readQNodes[i].params[1]; - qNodes[i].params[4] = writeDataNodes[i].params[0]; - qNodes[i].params[5] = writeDataNodes[i].params[1]; + qNodes[i].params[4] = + writeDataNodes[i].params[0]; + qNodes[i].params[5] = + writeDataNodes[i].params[1]; qNodes[i].params[6].p = raidPtr; - /* use old Q buf as target buf */ - qNodes[i].results[0] = readQNodes[i].params[1].p; + /* Use old Q buf as target buf. */ + qNodes[i].results[0] = + readQNodes[i].params[1].p; } } } else { - /* there is only one xor node in this case */ - rf_InitNode(&xorNodes[0], rf_wait, RF_FALSE, func, undoFunc, NULL, 1, - (numDataNodes + numParityNodes), - (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, name, allocList); + /* There is only one xor node in this case. */ + rf_InitNode(&xorNodes[0], rf_wait, RF_FALSE, func, undoFunc, + NULL, 1, (numDataNodes + numParityNodes), + (2 * (numDataNodes + numDataNodes + 1) + 1), 1, + dag_h, name, allocList); xorNodes[0].flags |= RF_DAGNODE_FLAG_YIELD; for (i = 0; i < numDataNodes + 1; i++) { - /* set up params related to Rod and Rop nodes */ - xorNodes[0].params[2 * i + 0] = readDataNodes[i].params[0]; /* pda */ - xorNodes[0].params[2 * i + 1] = readDataNodes[i].params[1]; /* buffer ptr */ + /* Set up params related to Rod and Rop nodes. */ + xorNodes[0].params[2 * i + 0] = + readDataNodes[i].params[0]; /* pda */ + xorNodes[0].params[2 * i + 1] = + readDataNodes[i].params[1]; /* buffer ptr */ } for (i = 0; i < numDataNodes; i++) { - /* set up params related to Wnd and Wnp nodes */ - xorNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = /* pda */ - writeDataNodes[i].params[0]; - xorNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = /* buffer ptr */ - writeDataNodes[i].params[1]; + /* Set up params related to Wnd and Wnp nodes. */ + xorNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = + writeDataNodes[i].params[0]; /* pda */ + xorNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = + writeDataNodes[i].params[1]; /* buffer ptr */ } - /* xor node needs to get at RAID information */ - xorNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr; + /* Xor node needs to get at RAID information. */ + xorNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = + raidPtr; xorNodes[0].results[0] = readParityNodes[0].params[1].p; if (nfaults == 2) { - rf_InitNode(&qNodes[0], rf_wait, RF_FALSE, qfunc, undoFunc, NULL, 1, - (numDataNodes + numParityNodes), - (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, - qname, allocList); + rf_InitNode(&qNodes[0], rf_wait, RF_FALSE, qfunc, + undoFunc, NULL, 1, (numDataNodes + numParityNodes), + (2 * (numDataNodes + numDataNodes + 1) + 1), 1, + dag_h, qname, allocList); for (i = 0; i < numDataNodes; i++) { - /* set up params related to Rod */ - qNodes[0].params[2 * i + 0] = readDataNodes[i].params[0]; /* pda */ - qNodes[0].params[2 * i + 1] = readDataNodes[i].params[1]; /* buffer ptr */ + /* Set up params related to Rod. */ + qNodes[0].params[2 * i + 0] = + readDataNodes[i].params[0]; /* pda */ + qNodes[0].params[2 * i + 1] = + readDataNodes[i].params[1]; /* buffer ptr */ } - /* and read old q */ - qNodes[0].params[2 * numDataNodes + 0] = /* pda */ - readQNodes[0].params[0]; - qNodes[0].params[2 * numDataNodes + 1] = /* buffer ptr */ - readQNodes[0].params[1]; + /* And read old q. */ + qNodes[0].params[2 * numDataNodes + 0] = + readQNodes[0].params[0]; /* pda */ + qNodes[0].params[2 * numDataNodes + 1] = + readQNodes[0].params[1]; /* buffer ptr */ for (i = 0; i < numDataNodes; i++) { - /* set up params related to Wnd nodes */ - qNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = /* pda */ + /* Set up params related to Wnd nodes. */ + qNodes[0].params + [2 * (numDataNodes + 1 + i) + 0] = + /* pda */ writeDataNodes[i].params[0]; - qNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = /* buffer ptr */ + qNodes[0].params + [2 * (numDataNodes + 1 + i) + 1] = + /* buffer ptr */ writeDataNodes[i].params[1]; } - /* xor node needs to get at RAID information */ - qNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr; + /* Xor node needs to get at RAID information. */ + qNodes[0].params + [2 * (numDataNodes + numDataNodes + 1)].p = raidPtr; qNodes[0].results[0] = readQNodes[0].params[1].p; } } - /* initialize nodes which write new parity (Wnp) */ + /* Initialize nodes which write new parity (Wnp). */ pda = asmap->parityInfo; for (i = 0; i < numParityNodes; i++) { - rf_InitNode(&writeParityNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, - rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, + rf_InitNode(&writeParityNodes[i], rf_wait, RF_FALSE, + rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnp", allocList); RF_ASSERT(pda != NULL); - writeParityNodes[i].params[0].p = pda; /* param 1 (bufPtr) - * filled in by xor node */ - writeParityNodes[i].params[1].p = xorNodes[i].results[0]; /* buffer pointer for - * parity write - * operation */ + /* Param 1 (bufPtr) filled in by xor node. */ + writeParityNodes[i].params[0].p = pda; + /* Buffer pointer for parity write operation. */ + writeParityNodes[i].params[1].p = xorNodes[i].results[0]; writeParityNodes[i].params[2].v = parityStripeID; - writeParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, 0, which_ru); + writeParityNodes[i].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockParityNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, - rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, + /* Initialize node to unlock the disk queue. */ + rf_InitNode(&unlockParityNodes[i], rf_wait, RF_FALSE, + rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, + rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Unp", allocList); - unlockParityNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - unlockParityNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + /* Physical disk addr desc. */ + unlockParityNodes[i].params[0].p = pda; + unlockParityNodes[i].params[1].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru); } pda = pda->next; } - /* initialize nodes which write new Q (Wnq) */ + /* Initialize nodes which write new Q (Wnq). */ if (nfaults == 2) { pda = asmap->qInfo; for (i = 0; i < numParityNodes; i++) { - rf_InitNode(&writeQNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, - rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, + rf_InitNode(&writeQNodes[i], rf_wait, RF_FALSE, + rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnq", allocList); RF_ASSERT(pda != NULL); - writeQNodes[i].params[0].p = pda; /* param 1 (bufPtr) - * filled in by xor node */ - writeQNodes[i].params[1].p = qNodes[i].results[0]; /* buffer pointer for - * parity write - * operation */ + /* Param 1 (bufPtr) filled in by xor node. */ + writeQNodes[i].params[0].p = pda; + writeQNodes[i].params[1].p = qNodes[i].results[0]; + /* Buffer pointer for parity write operation. */ writeQNodes[i].params[2].v = parityStripeID; - writeQNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + writeQNodes[i].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockQNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, - rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, - "Unq", allocList); - unlockQNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - unlockQNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + /* Initialize node to unlock the disk queue. */ + rf_InitNode(&unlockQNodes[i], rf_wait, + RF_FALSE, rf_DiskUnlockFunc, + rf_DiskUnlockUndoFunc, + rf_GenericWakeupFunc, 1, 1, 2, 0, + dag_h, "Unq", allocList); + /* Physical disk addr desc. */ + unlockQNodes[i].params[0].p = pda; + unlockQNodes[i].params[1].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru); } pda = pda->next; } } /* - * Step 4. connect the nodes. - */ + * Step 4. Connect the nodes. + */ - /* connect header to block node */ + /* Connect header to block node. */ dag_h->succedents[0] = blockNode; - /* connect block node to read old data nodes */ - RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults))); + /* Connect block node to read old data nodes. */ + RF_ASSERT(blockNode->numSuccedents == + (numDataNodes + (numParityNodes * nfaults))); for (i = 0; i < numDataNodes; i++) { blockNode->succedents[i] = &readDataNodes[i]; RF_ASSERT(readDataNodes[i].numAntecedents == 1); @@ -871,7 +910,7 @@ rf_CommonCreateSmallWriteDAG( readDataNodes[i].antType[0] = rf_control; } - /* connect block node to read old parity nodes */ + /* Connect block node to read old parity nodes. */ for (i = 0; i < numParityNodes; i++) { blockNode->succedents[numDataNodes + i] = &readParityNodes[i]; RF_ASSERT(readParityNodes[i].numAntecedents == 1); @@ -879,59 +918,68 @@ rf_CommonCreateSmallWriteDAG( readParityNodes[i].antType[0] = rf_control; } - /* connect block node to read old Q nodes */ + /* Connect block node to read old Q nodes. */ if (nfaults == 2) { for (i = 0; i < numParityNodes; i++) { - blockNode->succedents[numDataNodes + numParityNodes + i] = &readQNodes[i]; + blockNode->succedents[numDataNodes + numParityNodes + i] + = &readQNodes[i]; RF_ASSERT(readQNodes[i].numAntecedents == 1); readQNodes[i].antecedents[0] = blockNode; readQNodes[i].antType[0] = rf_control; } } - /* connect read old data nodes to xor nodes */ + /* Connect read old data nodes to xor nodes. */ for (i = 0; i < numDataNodes; i++) { - RF_ASSERT(readDataNodes[i].numSuccedents == (nfaults * numParityNodes)); + RF_ASSERT(readDataNodes[i].numSuccedents == + (nfaults * numParityNodes)); for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(xorNodes[j].numAntecedents == numDataNodes + numParityNodes); + RF_ASSERT(xorNodes[j].numAntecedents == + numDataNodes + numParityNodes); readDataNodes[i].succedents[j] = &xorNodes[j]; xorNodes[j].antecedents[i] = &readDataNodes[i]; xorNodes[j].antType[i] = rf_trueData; } } - /* connect read old data nodes to q nodes */ + /* Connect read old data nodes to q nodes. */ if (nfaults == 2) { for (i = 0; i < numDataNodes; i++) { for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(qNodes[j].numAntecedents == numDataNodes + numParityNodes); - readDataNodes[i].succedents[numParityNodes + j] = &qNodes[j]; + RF_ASSERT(qNodes[j].numAntecedents == + numDataNodes + numParityNodes); + readDataNodes[i].succedents[numParityNodes + j] + = &qNodes[j]; qNodes[j].antecedents[i] = &readDataNodes[i]; qNodes[j].antType[i] = rf_trueData; } } } - /* connect read old parity nodes to xor nodes */ + /* Connect read old parity nodes to xor nodes. */ for (i = 0; i < numParityNodes; i++) { RF_ASSERT(readParityNodes[i].numSuccedents == numParityNodes); for (j = 0; j < numParityNodes; j++) { readParityNodes[i].succedents[j] = &xorNodes[j]; - xorNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i]; + xorNodes[j].antecedents[numDataNodes + i] = + &readParityNodes[i]; xorNodes[j].antType[numDataNodes + i] = rf_trueData; } } - /* connect read old q nodes to q nodes */ + /* Connect read old q nodes to q nodes. */ if (nfaults == 2) { for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(readParityNodes[i].numSuccedents == numParityNodes); + RF_ASSERT(readParityNodes[i].numSuccedents == + numParityNodes); for (j = 0; j < numParityNodes; j++) { readQNodes[i].succedents[j] = &qNodes[j]; - qNodes[j].antecedents[numDataNodes + i] = &readQNodes[i]; - qNodes[j].antType[numDataNodes + i] = rf_trueData; + qNodes[j].antecedents[numDataNodes + i] = + &readQNodes[i]; + qNodes[j].antType[numDataNodes + i] = + rf_trueData; } } } - /* connect xor nodes to commit node */ + /* Connect xor nodes to commit node. */ RF_ASSERT(commitNode->numAntecedents == (nfaults * numParityNodes)); for (i = 0; i < numParityNodes; i++) { RF_ASSERT(xorNodes[i].numSuccedents == 1); @@ -940,17 +988,19 @@ rf_CommonCreateSmallWriteDAG( commitNode->antType[i] = rf_control; } - /* connect q nodes to commit node */ + /* Connect q nodes to commit node. */ if (nfaults == 2) { for (i = 0; i < numParityNodes; i++) { RF_ASSERT(qNodes[i].numSuccedents == 1); qNodes[i].succedents[0] = commitNode; - commitNode->antecedents[i + numParityNodes] = &qNodes[i]; + commitNode->antecedents[i + numParityNodes] = + &qNodes[i]; commitNode->antType[i + numParityNodes] = rf_control; } } - /* connect commit node to write nodes */ - RF_ASSERT(commitNode->numSuccedents == (numDataNodes + (nfaults * numParityNodes))); + /* Connect commit node to write nodes. */ + RF_ASSERT(commitNode->numSuccedents == + (numDataNodes + (nfaults * numParityNodes))); for (i = 0; i < numDataNodes; i++) { RF_ASSERT(writeDataNodes[i].numAntecedents == 1); commitNode->succedents[i] = &writeDataNodes[i]; @@ -966,31 +1016,35 @@ rf_CommonCreateSmallWriteDAG( if (nfaults == 2) { for (i = 0; i < numParityNodes; i++) { RF_ASSERT(writeQNodes[i].numAntecedents == 1); - commitNode->succedents[i + numDataNodes + numParityNodes] = &writeQNodes[i]; + commitNode->succedents + [i + numDataNodes + numParityNodes] = + &writeQNodes[i]; writeQNodes[i].antecedents[0] = commitNode; writeQNodes[i].antType[0] = rf_trueData; } } - RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); + RF_ASSERT(termNode->numAntecedents == + (numDataNodes + (nfaults * numParityNodes))); RF_ASSERT(termNode->numSuccedents == 0); for (i = 0; i < numDataNodes; i++) { if (lu_flag) { - /* connect write new data nodes to unlock nodes */ + /* Connect write new data nodes to unlock nodes. */ RF_ASSERT(writeDataNodes[i].numSuccedents == 1); RF_ASSERT(unlockDataNodes[i].numAntecedents == 1); writeDataNodes[i].succedents[0] = &unlockDataNodes[i]; unlockDataNodes[i].antecedents[0] = &writeDataNodes[i]; unlockDataNodes[i].antType[0] = rf_control; - /* connect unlock nodes to term node */ + /* Connect unlock nodes to term node. */ RF_ASSERT(unlockDataNodes[i].numSuccedents == 1); unlockDataNodes[i].succedents[0] = termNode; termNode->antecedents[i] = &unlockDataNodes[i]; termNode->antType[i] = rf_control; } else { - /* connect write new data nodes to term node */ + /* Connect write new data nodes to term node. */ RF_ASSERT(writeDataNodes[i].numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); + RF_ASSERT(termNode->numAntecedents == + (numDataNodes + (nfaults * numParityNodes))); writeDataNodes[i].succedents[0] = termNode; termNode->antecedents[i] = &writeDataNodes[i]; termNode->antType[i] = rf_control; @@ -999,22 +1053,26 @@ rf_CommonCreateSmallWriteDAG( for (i = 0; i < numParityNodes; i++) { if (lu_flag) { - /* connect write new parity nodes to unlock nodes */ + /* Connect write new parity nodes to unlock nodes. */ RF_ASSERT(writeParityNodes[i].numSuccedents == 1); RF_ASSERT(unlockParityNodes[i].numAntecedents == 1); - writeParityNodes[i].succedents[0] = &unlockParityNodes[i]; - unlockParityNodes[i].antecedents[0] = &writeParityNodes[i]; + writeParityNodes[i].succedents[0] = + &unlockParityNodes[i]; + unlockParityNodes[i].antecedents[0] = + &writeParityNodes[i]; unlockParityNodes[i].antType[0] = rf_control; - /* connect unlock nodes to term node */ + /* Connect unlock nodes to term node. */ RF_ASSERT(unlockParityNodes[i].numSuccedents == 1); unlockParityNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + i] = &unlockParityNodes[i]; + termNode->antecedents[numDataNodes + i] = + &unlockParityNodes[i]; termNode->antType[numDataNodes + i] = rf_control; } else { RF_ASSERT(writeParityNodes[i].numSuccedents == 1); writeParityNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + i] = &writeParityNodes[i]; + termNode->antecedents[numDataNodes + i] = + &writeParityNodes[i]; termNode->antType[numDataNodes + i] = rf_control; } } @@ -1022,57 +1080,62 @@ rf_CommonCreateSmallWriteDAG( if (nfaults == 2) { for (i = 0; i < numParityNodes; i++) { if (lu_flag) { - /* connect write new Q nodes to unlock nodes */ + /* Connect write new Q nodes to unlock nodes. */ RF_ASSERT(writeQNodes[i].numSuccedents == 1); RF_ASSERT(unlockQNodes[i].numAntecedents == 1); writeQNodes[i].succedents[0] = &unlockQNodes[i]; - unlockQNodes[i].antecedents[0] = &writeQNodes[i]; + unlockQNodes[i].antecedents[0] = + &writeQNodes[i]; unlockQNodes[i].antType[0] = rf_control; - /* connect unlock nodes to unblock node */ + /* Connect unlock nodes to unblock node. */ RF_ASSERT(unlockQNodes[i].numSuccedents == 1); unlockQNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + numParityNodes + i] = &unlockQNodes[i]; - termNode->antType[numDataNodes + numParityNodes + i] = rf_control; + termNode->antecedents + [numDataNodes + numParityNodes + i] = + &unlockQNodes[i]; + termNode->antType + [numDataNodes + numParityNodes + i] = + rf_control; } else { RF_ASSERT(writeQNodes[i].numSuccedents == 1); writeQNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + numParityNodes + i] = &writeQNodes[i]; - termNode->antType[numDataNodes + numParityNodes + i] = rf_control; + termNode->antecedents + [numDataNodes + numParityNodes + i] = + &writeQNodes[i]; + termNode->antType + [numDataNodes + numParityNodes + i] = + rf_control; } } } } -/****************************************************************************** - * create a write graph (fault-free or degraded) for RAID level 1 +/***************************************************************************** + * Create a write graph (fault-free or degraded) for RAID level 1. * * Hdr -> Commit -> Wpd -> Nil -> Trm - * -> Wsd -> + * -> Wsd -> * - * The "Wpd" node writes data to the primary copy in the mirror pair - * The "Wsd" node writes data to the secondary copy in the mirror pair + * The "Wpd" node writes data to the primary copy in the mirror pair. + * The "Wsd" node writes data to the secondary copy in the mirror pair. * - * Parameters: raidPtr - description of the physical array - * asmap - logical & physical addresses for this access - * bp - buffer ptr (holds write data) - * flags - general flags (e.g. disk locking) - * allocList - list of memory allocated in DAG creation + * Parameters: raidPtr - description of the physical array + * asmap - logical & physical addresses for this access + * bp - buffer ptr (holds write data) + * flags - general flags (e.g. disk locking) + * allocList - list of memory allocated in DAG creation *****************************************************************************/ -void -rf_CreateRaidOneWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) +void +rf_CreateRaidOneWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, + RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags, + RF_AllocListElem_t *allocList) { RF_DagNode_t *unblockNode, *termNode, *commitNode; RF_DagNode_t *nodes, *wndNode, *wmirNode; - int nWndNodes, nWmirNodes, i; + int nWndNodes, nWmirNodes, i; RF_ReconUnitNum_t which_ru; RF_PhysDiskAddr_t *pda, *pdaP; RF_StripeNum_t parityStripeID; @@ -1084,18 +1147,20 @@ rf_CreateRaidOneWriteDAG( } dag_h->creator = "RaidOneWriteDAG"; - /* 2 implies access not SU aligned */ + /* 2 implies access not SU aligned. */ nWmirNodes = (asmap->parityInfo->next) ? 2 : 1; nWndNodes = (asmap->physInfo->next) ? 2 : 1; - /* alloc the Wnd nodes and the Wmir node */ + /* Alloc the Wnd nodes and the Wmir node. */ if (asmap->numDataFailed == 1) nWndNodes--; if (asmap->numParityFailed == 1) nWmirNodes--; - /* total number of nodes = nWndNodes + nWmirNodes + (commit + unblock - * + terminator) */ + /* + * Total number of nodes = nWndNodes + nWmirNodes + * + (commit + unblock + terminator) + */ RF_CallocAndAdd(nodes, nWndNodes + nWmirNodes + 3, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); i = 0; @@ -1111,58 +1176,68 @@ rf_CreateRaidOneWriteDAG( i += 1; RF_ASSERT(i == (nWndNodes + nWmirNodes + 3)); - /* this dag can commit immediately */ + /* This dag can commit immediately. */ dag_h->numCommitNodes = 1; dag_h->numCommits = 0; dag_h->numSuccedents = 1; - /* initialize the commit, unblock, and term nodes */ - rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, (nWndNodes + nWmirNodes), 0, 0, 0, dag_h, "Cmt", allocList); - rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, - NULL, 1, (nWndNodes + nWmirNodes), 0, 0, dag_h, "Nil", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, - NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - - /* initialize the wnd nodes */ + /* Initialize the commit, unblock, and term nodes. */ + rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, (nWndNodes + nWmirNodes), 0, 0, 0, + dag_h, "Cmt", allocList); + rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, 1, (nWndNodes + nWmirNodes), 0, 0, + dag_h, "Nil", allocList); + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, + rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); + + /* Initialize the wnd nodes. */ if (nWndNodes > 0) { pda = asmap->physInfo; for (i = 0; i < nWndNodes; i++) { - rf_InitNode(&wndNode[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wpd", allocList); + rf_InitNode(&wndNode[i], rf_wait, RF_FALSE, + rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, + "Wpd", allocList); RF_ASSERT(pda != NULL); wndNode[i].params[0].p = pda; wndNode[i].params[1].p = pda->bufPtr; wndNode[i].params[2].v = parityStripeID; - wndNode[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + wndNode[i].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + 0, 0, which_ru); pda = pda->next; } RF_ASSERT(pda == NULL); } - /* initialize the mirror nodes */ + /* Initialize the mirror nodes. */ if (nWmirNodes > 0) { pda = asmap->physInfo; pdaP = asmap->parityInfo; for (i = 0; i < nWmirNodes; i++) { - rf_InitNode(&wmirNode[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wsd", allocList); + rf_InitNode(&wmirNode[i], rf_wait, RF_FALSE, + rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, + "Wsd", allocList); RF_ASSERT(pda != NULL); wmirNode[i].params[0].p = pdaP; wmirNode[i].params[1].p = pda->bufPtr; wmirNode[i].params[2].v = parityStripeID; - wmirNode[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + wmirNode[i].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + 0, 0, which_ru); pda = pda->next; pdaP = pdaP->next; } RF_ASSERT(pda == NULL); RF_ASSERT(pdaP == NULL); } - /* link the header node to the commit node */ + /* Link the header node to the commit node. */ RF_ASSERT(dag_h->numSuccedents == 1); RF_ASSERT(commitNode->numAntecedents == 0); dag_h->succedents[0] = commitNode; - /* link the commit node to the write nodes */ + /* Link the commit node to the write nodes. */ RF_ASSERT(commitNode->numSuccedents == (nWndNodes + nWmirNodes)); for (i = 0; i < nWndNodes; i++) { RF_ASSERT(wndNode[i].numAntecedents == 1); @@ -1177,7 +1252,7 @@ rf_CreateRaidOneWriteDAG( wmirNode[i].antType[0] = rf_control; } - /* link the write nodes to the unblock node */ + /* Link the write nodes to the unblock node. */ RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes)); for (i = 0; i < nWndNodes; i++) { RF_ASSERT(wndNode[i].numSuccedents == 1); @@ -1192,7 +1267,7 @@ rf_CreateRaidOneWriteDAG( unblockNode->antType[i + nWndNodes] = rf_control; } - /* link the unblock node to the term node */ + /* Link the unblock node to the term node. */ RF_ASSERT(unblockNode->numSuccedents == 1); RF_ASSERT(termNode->numAntecedents == 1); RF_ASSERT(termNode->numSuccedents == 0); @@ -1203,39 +1278,35 @@ rf_CreateRaidOneWriteDAG( -/* DAGs which have no commit points. +/* + * DAGs that have no commit points. * - * The following DAGs are used in forward and backward error recovery experiments. - * They are identical to the DAGs above this comment with the exception that the + * The following DAGs are used in forward and backward error recovery + * experiments. + * They are identical to the DAGs above this comment with the exception that * the commit points have been removed. */ - -void -rf_CommonCreateLargeWriteDAGFwd( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - int nfaults, - int (*redFunc) (RF_DagNode_t *), +void +rf_CommonCreateLargeWriteDAGFwd(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, + RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags, + RF_AllocListElem_t *allocList, int nfaults, int (*redFunc) (RF_DagNode_t *), int allowBufferRecycle) { RF_DagNode_t *nodes, *wndNodes, *rodNodes, *xorNode, *wnpNode; RF_DagNode_t *wnqNode, *blockNode, *syncNode, *termNode; - int nWndNodes, nRodNodes, i, nodeNum, asmNum; + int nWndNodes, nRodNodes, i, nodeNum, asmNum; RF_AccessStripeMapHeader_t *new_asm_h[2]; RF_StripeNum_t parityStripeID; - char *sosBuffer, *eosBuffer; + char *sosBuffer, *eosBuffer; RF_ReconUnitNum_t which_ru; RF_RaidLayout_t *layoutPtr; RF_PhysDiskAddr_t *pda; layoutPtr = &(raidPtr->Layout); - parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru); + parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), + asmap->raidAddress, &which_ru); if (rf_dagDebug) printf("[Creating large-write DAG]\n"); @@ -1245,9 +1316,10 @@ rf_CommonCreateLargeWriteDAGFwd( dag_h->numCommits = 0; dag_h->numSuccedents = 1; - /* alloc the nodes: Wnd, xor, commit, block, term, and Wnp */ + /* Alloc the nodes: Wnd, xor, commit, block, term, and Wnp. */ nWndNodes = asmap->numStripeUnitsAccessed; - RF_CallocAndAdd(nodes, nWndNodes + 4 + nfaults, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); + RF_CallocAndAdd(nodes, nWndNodes + 4 + nfaults, sizeof(RF_DagNode_t), + (RF_DagNode_t *), allocList); i = 0; wndNodes = &nodes[i]; i += nWndNodes; @@ -1267,34 +1339,51 @@ rf_CommonCreateLargeWriteDAGFwd( } else { wnqNode = NULL; } - rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h, new_asm_h, &nRodNodes, &sosBuffer, &eosBuffer, allocList); + rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h, + new_asm_h, &nRodNodes, &sosBuffer, &eosBuffer, allocList); if (nRodNodes > 0) { - RF_CallocAndAdd(rodNodes, nRodNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); + RF_CallocAndAdd(rodNodes, nRodNodes, sizeof(RF_DagNode_t), + (RF_DagNode_t *), allocList); } else { rodNodes = NULL; } - /* begin node initialization */ + /* Begin node initialization. */ if (nRodNodes > 0) { - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(syncNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nWndNodes + 1, nRodNodes, 0, 0, dag_h, "Nil", allocList); + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0, dag_h, + "Nil", allocList); + rf_InitNode(syncNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, nWndNodes + 1, nRodNodes, 0, 0, + dag_h, "Nil", allocList); } else { - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(syncNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nWndNodes + 1, 1, 0, 0, dag_h, "Nil", allocList); + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0, dag_h, "Nil", + allocList); + rf_InitNode(syncNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, nWndNodes + 1, 1, 0, 0, dag_h, + "Nil", allocList); } - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0, dag_h, "Trm", allocList); + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, + rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0, + dag_h, "Trm", allocList); - /* initialize the Rod nodes */ + /* Initialize the Rod nodes. */ for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) { if (new_asm_h[asmNum]) { pda = new_asm_h[asmNum]->stripeMap->physInfo; while (pda) { - rf_InitNode(&rodNodes[nodeNum], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rod", allocList); + rf_InitNode(&rodNodes[nodeNum], rf_wait, + RF_FALSE, rf_DiskReadFunc, + rf_DiskReadUndoFunc, rf_GenericWakeupFunc, + 1, 1, 4, 0, dag_h, "Rod", allocList); rodNodes[nodeNum].params[0].p = pda; rodNodes[nodeNum].params[1].p = pda->bufPtr; rodNodes[nodeNum].params[2].v = parityStripeID; - rodNodes[nodeNum].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + rodNodes[nodeNum].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + 0, 0, which_ru); nodeNum++; pda = pda->next; } @@ -1302,79 +1391,104 @@ rf_CommonCreateLargeWriteDAGFwd( } RF_ASSERT(nodeNum == nRodNodes); - /* initialize the wnd nodes */ + /* Initialize the wnd nodes. */ pda = asmap->physInfo; for (i = 0; i < nWndNodes; i++) { - rf_InitNode(&wndNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList); + rf_InitNode(&wndNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, + rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, + dag_h, "Wnd", allocList); RF_ASSERT(pda != NULL); wndNodes[i].params[0].p = pda; wndNodes[i].params[1].p = pda->bufPtr; wndNodes[i].params[2].v = parityStripeID; - wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + wndNodes[i].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); pda = pda->next; } - /* initialize the redundancy node */ - rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1, nfaults, 2 * (nWndNodes + nRodNodes) + 1, nfaults, dag_h, "Xr ", allocList); + /* Initialize the redundancy node. */ + rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, + NULL, 1, nfaults, 2 * (nWndNodes + nRodNodes) + 1, nfaults, dag_h, + "Xr ", allocList); xorNode->flags |= RF_DAGNODE_FLAG_YIELD; for (i = 0; i < nWndNodes; i++) { - xorNode->params[2 * i + 0] = wndNodes[i].params[0]; /* pda */ - xorNode->params[2 * i + 1] = wndNodes[i].params[1]; /* buf ptr */ + xorNode->params[2 * i + 0] = + wndNodes[i].params[0]; /* pda */ + xorNode->params[2 * i + 1] = + wndNodes[i].params[1]; /* buf ptr */ } for (i = 0; i < nRodNodes; i++) { - xorNode->params[2 * (nWndNodes + i) + 0] = rodNodes[i].params[0]; /* pda */ - xorNode->params[2 * (nWndNodes + i) + 1] = rodNodes[i].params[1]; /* buf ptr */ + xorNode->params[2 * (nWndNodes + i) + 0] = + rodNodes[i].params[0]; /* pda */ + xorNode->params[2 * (nWndNodes + i) + 1] = + rodNodes[i].params[1]; /* buf ptr */ } - xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr; /* xor node needs to get - * at RAID information */ + /* Xor node needs to get at RAID information. */ + xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr; - /* look for an Rod node that reads a complete SU. If none, alloc a + /* + * Look for an Rod node that reads a complete SU. If none, alloc a * buffer to receive the parity info. Note that we can't use a new * data buffer because it will not have gotten written when the xor - * occurs. */ + * occurs. + */ if (allowBufferRecycle) { for (i = 0; i < nRodNodes; i++) - if (((RF_PhysDiskAddr_t *) rodNodes[i].params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit) + if (((RF_PhysDiskAddr_t *) rodNodes[i].params[0].p) + ->numSector == raidPtr->Layout.sectorsPerStripeUnit) break; } if ((!allowBufferRecycle) || (i == nRodNodes)) { - RF_CallocAndAdd(xorNode->results[0], 1, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (void *), allocList); + RF_CallocAndAdd(xorNode->results[0], 1, + rf_RaidAddressToByte(raidPtr, + raidPtr->Layout.sectorsPerStripeUnit), + (void *), allocList); } else xorNode->results[0] = rodNodes[i].params[1].p; - /* initialize the Wnp node */ - rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnp", allocList); + /* Initialize the Wnp node. */ + rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, + rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, + dag_h, "Wnp", allocList); wnpNode->params[0].p = asmap->parityInfo; wnpNode->params[1].p = xorNode->results[0]; wnpNode->params[2].v = parityStripeID; - wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - RF_ASSERT(asmap->parityInfo->next == NULL); /* parityInfo must - * describe entire - * parity unit */ + wnpNode->params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + /* parityInfo must describe entire parity unit. */ + RF_ASSERT(asmap->parityInfo->next == NULL); if (nfaults == 2) { - /* we never try to recycle a buffer for the Q calcuation in - * addition to the parity. This would cause two buffers to get - * smashed during the P and Q calculation, guaranteeing one - * would be wrong. */ - RF_CallocAndAdd(xorNode->results[1], 1, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (void *), allocList); - rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnq", allocList); + /* + * Never try to recycle a buffer for the Q calcuation in + * addition to the parity. This would cause two buffers to + * get smashed during the P and Q calculation, guaranteeing + * one would be wrong. + */ + RF_CallocAndAdd(xorNode->results[1], 1, + rf_RaidAddressToByte(raidPtr, + raidPtr->Layout.sectorsPerStripeUnit), + (void *), allocList); + rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, + rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, + dag_h, "Wnq", allocList); wnqNode->params[0].p = asmap->qInfo; wnqNode->params[1].p = xorNode->results[1]; wnqNode->params[2].v = parityStripeID; - wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); - RF_ASSERT(asmap->parityInfo->next == NULL); /* parityInfo must - * describe entire - * parity unit */ + wnqNode->params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + /* parityInfo must describe entire parity unit. */ + RF_ASSERT(asmap->parityInfo->next == NULL); } - /* connect nodes to form graph */ - /* connect dag header to block node */ + /* Connect nodes to form graph. */ + + /* Connect dag header to block node. */ RF_ASSERT(blockNode->numAntecedents == 0); dag_h->succedents[0] = blockNode; if (nRodNodes > 0) { - /* connect the block node to the Rod nodes */ + /* Connect the block node to the Rod nodes. */ RF_ASSERT(blockNode->numSuccedents == nRodNodes); RF_ASSERT(syncNode->numAntecedents == nRodNodes); for (i = 0; i < nRodNodes; i++) { @@ -1383,14 +1497,14 @@ rf_CommonCreateLargeWriteDAGFwd( rodNodes[i].antecedents[0] = blockNode; rodNodes[i].antType[0] = rf_control; - /* connect the Rod nodes to the Nil node */ + /* Connect the Rod nodes to the Nil node. */ RF_ASSERT(rodNodes[i].numSuccedents == 1); rodNodes[i].succedents[0] = syncNode; syncNode->antecedents[i] = &rodNodes[i]; syncNode->antType[i] = rf_trueData; } } else { - /* connect the block node to the Nil node */ + /* Connect the block node to the Nil node. */ RF_ASSERT(blockNode->numSuccedents == 1); RF_ASSERT(syncNode->numAntecedents == 1); blockNode->succedents[0] = syncNode; @@ -1398,7 +1512,7 @@ rf_CommonCreateLargeWriteDAGFwd( syncNode->antType[0] = rf_control; } - /* connect the sync node to the Wnd nodes */ + /* Connect the sync node to the Wnd nodes. */ RF_ASSERT(syncNode->numSuccedents == (1 + nWndNodes)); for (i = 0; i < nWndNodes; i++) { RF_ASSERT(wndNodes->numAntecedents == 1); @@ -1407,13 +1521,13 @@ rf_CommonCreateLargeWriteDAGFwd( wndNodes[i].antType[0] = rf_control; } - /* connect the sync node to the Xor node */ + /* Connect the sync node to the Xor node. */ RF_ASSERT(xorNode->numAntecedents == 1); syncNode->succedents[nWndNodes] = xorNode; xorNode->antecedents[0] = syncNode; xorNode->antType[0] = rf_control; - /* connect the xor node to the write parity node */ + /* Connect the xor node to the write parity node. */ RF_ASSERT(xorNode->numSuccedents == nfaults); RF_ASSERT(wnpNode->numAntecedents == 1); xorNode->succedents[0] = wnpNode; @@ -1425,7 +1539,7 @@ rf_CommonCreateLargeWriteDAGFwd( wnqNode->antecedents[0] = xorNode; wnqNode->antType[0] = rf_trueData; } - /* connect the write nodes to the term node */ + /* Connect the write nodes to the term node. */ RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults); RF_ASSERT(termNode->numSuccedents == 0); for (i = 0; i < nWndNodes; i++) { @@ -1447,9 +1561,9 @@ rf_CommonCreateLargeWriteDAGFwd( } -/****************************************************************************** +/***************************************************************************** * - * creates a DAG to perform a small-write operation (either raid 5 or pq), + * Create a DAG to perform a small-write operation (either raid 5 or pq), * which is as follows: * * Hdr -> Nil -> Rop - Xor - Wnp [Unp] -- Trm @@ -1467,48 +1581,44 @@ rf_CommonCreateLargeWriteDAGFwd( * Wnp = write new parity * Wnd = write new data * Wnq = write new "q" - * [ ] denotes optional segments in the graph + * [ ] denotes optional segments in the graph. * - * Parameters: raidPtr - description of the physical array - * asmap - logical & physical addresses for this access - * bp - buffer ptr (holds write data) - * flags - general flags (e.g. disk locking) - * allocList - list of memory allocated in DAG creation - * pfuncs - list of parity generating functions - * qfuncs - list of q generating functions + * Parameters: raidPtr - description of the physical array + * asmap - logical & physical addresses for this access + * bp - buffer ptr (holds write data) + * flags - general flags (e.g. disk locking) + * allocList - list of memory allocated in DAG creation + * pfuncs - list of parity generating functions + * qfuncs - list of q generating functions * - * A null qfuncs indicates single fault tolerant + * A null qfuncs indicates single fault tolerant. *****************************************************************************/ -void -rf_CommonCreateSmallWriteDAGFwd( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - RF_RedFuncs_t * pfuncs, - RF_RedFuncs_t * qfuncs) +void +rf_CommonCreateSmallWriteDAGFwd(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, + RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags, + RF_AllocListElem_t *allocList, RF_RedFuncs_t *pfuncs, RF_RedFuncs_t *qfuncs) { RF_DagNode_t *readDataNodes, *readParityNodes, *readQNodes, *termNode; RF_DagNode_t *unlockDataNodes, *unlockParityNodes, *unlockQNodes; RF_DagNode_t *xorNodes, *qNodes, *blockNode, *nodes; RF_DagNode_t *writeDataNodes, *writeParityNodes, *writeQNodes; - int i, j, nNodes, totalNumNodes, lu_flag; + int i, j, nNodes, totalNumNodes, lu_flag; RF_ReconUnitNum_t which_ru; - int (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *); - int (*qfunc) (RF_DagNode_t *); - int numDataNodes, numParityNodes; + int (*func) (RF_DagNode_t *); + int (*undoFunc) (RF_DagNode_t *); + int (*qfunc) (RF_DagNode_t *); + int numDataNodes, numParityNodes; RF_StripeNum_t parityStripeID; RF_PhysDiskAddr_t *pda; - char *name, *qname; - long nfaults; + char *name, *qname; + long nfaults; nfaults = qfuncs ? 2 : 1; - lu_flag = (rf_enableAtomicRMW) ? 1 : 0; /* lock/unlock flag */ + lu_flag = (rf_enableAtomicRMW) ? 1 : 0; /* Lock/unlock flag. */ - parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru); + parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), + asmap->raidAddress, &which_ru); pda = asmap->physInfo; numDataNodes = asmap->numStripeUnitsAccessed; numParityNodes = (asmap->parityInfo->next) ? 2 : 1; @@ -1525,23 +1635,31 @@ rf_CommonCreateSmallWriteDAGFwd( qfunc = NULL; qname = NULL; - /* DAG creation occurs in four steps: 1. count the number of nodes in - * the DAG 2. create the nodes 3. initialize the nodes 4. connect the - * nodes */ + /* + * DAG creation occurs in four steps: + * 1. Count the number of nodes in the DAG. + * 2. Create the nodes. + * 3. Initialize the nodes. + * 4. Connect the nodes. + */ - /* Step 1. compute number of nodes in the graph */ + /* Step 1. Compute number of nodes in the graph. */ - /* number of nodes: a read and write for each data unit a redundancy - * computation node for each parity node (nfaults * nparity) a read - * and write for each parity unit a block node a terminate node if - * atomic RMW an unlock node for each data unit, redundancy unit */ - totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes) + (nfaults * 2 * numParityNodes) + 2; + /* + * Number of nodes: a read and write for each data unit, a redundancy + * computation node for each parity node (nfaults * nparity), a read + * and write for each parity unit, a block node, a terminate node if + * atomic RMW, an unlock node for each data/redundancy unit. + */ + totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes) + + (nfaults * 2 * numParityNodes) + 2; if (lu_flag) totalNumNodes += (numDataNodes + (nfaults * numParityNodes)); - /* Step 2. create the nodes */ - RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); + /* Step 2. Create the nodes. */ + RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), + (RF_DagNode_t *), allocList); i = 0; blockNode = &nodes[i]; i += 1; @@ -1583,92 +1701,127 @@ rf_CommonCreateSmallWriteDAGFwd( } RF_ASSERT(i == totalNumNodes); - /* Step 3. initialize the nodes */ - /* initialize block node (Nil) */ + /* Step 3. Initialize the nodes. */ + /* Initialize block node (Nil). */ nNodes = numDataNodes + (nfaults * numParityNodes); - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", allocList); + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, + "Nil", allocList); - /* initialize terminate node (Trm) */ - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, nNodes, 0, 0, dag_h, "Trm", allocList); + /* Initialize terminate node (Trm). */ + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, + rf_TerminateUndoFunc, NULL, 0, nNodes, 0, 0, dag_h, + "Trm", allocList); - /* initialize nodes which read old data (Rod) */ + /* Initialize nodes which read old data (Rod). */ for (i = 0; i < numDataNodes; i++) { - rf_InitNode(&readDataNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, (numParityNodes * nfaults) + 1, 1, 4, 0, dag_h, "Rod", allocList); + rf_InitNode(&readDataNodes[i], rf_wait, RF_FALSE, + rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, + (numParityNodes * nfaults) + 1, 1, 4, 0, dag_h, + "Rod", allocList); RF_ASSERT(pda != NULL); - readDataNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - readDataNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old - * data */ + /* Physical disk addr desc. */ + readDataNodes[i].params[0].p = pda; + /* Buffer to hold old data. */ + readDataNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, + pda, allocList); readDataNodes[i].params[2].v = parityStripeID; - readDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru); + readDataNodes[i].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + lu_flag, 0, which_ru); pda = pda->next; for (j = 0; j < readDataNodes[i].numSuccedents; j++) readDataNodes[i].propList[j] = NULL; } - /* initialize nodes which read old parity (Rop) */ + /* Initialize nodes which read old parity (Rop). */ pda = asmap->parityInfo; i = 0; for (i = 0; i < numParityNodes; i++) { RF_ASSERT(pda != NULL); - rf_InitNode(&readParityNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, numParityNodes, 1, 4, 0, dag_h, "Rop", allocList); + rf_InitNode(&readParityNodes[i], rf_wait, RF_FALSE, + rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, + numParityNodes, 1, 4, 0, dag_h, "Rop", allocList); readParityNodes[i].params[0].p = pda; - readParityNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old - * parity */ + /* Buffer to hold old parity. */ + readParityNodes[i].params[1].p = rf_AllocBuffer(raidPtr, + dag_h, pda, allocList); readParityNodes[i].params[2].v = parityStripeID; - readParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru); + readParityNodes[i].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + lu_flag, 0, which_ru); for (j = 0; j < readParityNodes[i].numSuccedents; j++) readParityNodes[i].propList[0] = NULL; pda = pda->next; } - /* initialize nodes which read old Q (Roq) */ + /* Initialize nodes which read old Q (Roq). */ if (nfaults == 2) { pda = asmap->qInfo; for (i = 0; i < numParityNodes; i++) { RF_ASSERT(pda != NULL); - rf_InitNode(&readQNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, numParityNodes, 1, 4, 0, dag_h, "Roq", allocList); + rf_InitNode(&readQNodes[i], rf_wait, RF_FALSE, + rf_DiskReadFunc, rf_DiskReadUndoFunc, + rf_GenericWakeupFunc, numParityNodes, 1, 4, 0, + dag_h, "Roq", allocList); readQNodes[i].params[0].p = pda; - readQNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old Q */ + /* Buffer to hold old Q. */ + readQNodes[i].params[1].p = rf_AllocBuffer(raidPtr, + dag_h, pda, allocList); readQNodes[i].params[2].v = parityStripeID; - readQNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru); + readQNodes[i].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + lu_flag, 0, which_ru); for (j = 0; j < readQNodes[i].numSuccedents; j++) readQNodes[i].propList[0] = NULL; pda = pda->next; } } - /* initialize nodes which write new data (Wnd) */ + /* Initialize nodes which write new data (Wnd). */ pda = asmap->physInfo; for (i = 0; i < numDataNodes; i++) { RF_ASSERT(pda != NULL); - rf_InitNode(&writeDataNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList); - writeDataNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - writeDataNodes[i].params[1].p = pda->bufPtr; /* buffer holding new - * data to be written */ + rf_InitNode(&writeDataNodes[i], rf_wait, RF_FALSE, + rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, + dag_h, "Wnd", allocList); + /* Physical disk addr desc. */ + writeDataNodes[i].params[0].p = pda; + /* Buffer holding new data to be written. */ + writeDataNodes[i].params[1].p = pda->bufPtr; writeDataNodes[i].params[2].v = parityStripeID; - writeDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + writeDataNodes[i].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockDataNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Und", allocList); - unlockDataNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - unlockDataNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru); + /* Initialize node to unlock the disk queue. */ + rf_InitNode(&unlockDataNodes[i], rf_wait, RF_FALSE, + rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, + rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, + "Und", allocList); + /* Physical disk addr desc. */ + unlockDataNodes[i].params[0].p = pda; + unlockDataNodes[i].params[1].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + 0, lu_flag, which_ru); } pda = pda->next; } - /* initialize nodes which compute new parity and Q */ - /* we use the simple XOR func in the double-XOR case, and when we're - * accessing only a portion of one stripe unit. the distinction + /* Initialize nodes which compute new parity and Q. */ + /* + * Use the simple XOR func in the double-XOR case, and when + * accessing only a portion of one stripe unit. The distinction * between the two is that the regular XOR func assumes that the * targbuf is a full SU in size, and examines the pda associated with * the buffer to decide where within the buffer to XOR the data, * whereas the simple XOR func just XORs the data into the start of - * the buffer. */ - if ((numParityNodes == 2) || ((numDataNodes == 1) && (asmap->totalSectorsAccessed < raidPtr->Layout.sectorsPerStripeUnit))) { + * the buffer. + */ + if ((numParityNodes == 2) || ((numDataNodes == 1) && + (asmap->totalSectorsAccessed < + raidPtr->Layout.sectorsPerStripeUnit))) { func = pfuncs->simple; undoFunc = rf_NullNodeUndoFunc; name = pfuncs->SimpleName; @@ -1685,12 +1838,16 @@ rf_CommonCreateSmallWriteDAGFwd( qname = qfuncs->RegularName; } } - /* initialize the xor nodes: params are {pda,buf} from {Rod,Wnd,Rop} - * nodes, and raidPtr */ - if (numParityNodes == 2) { /* double-xor case */ + /* + * Initialize the xor nodes: params are {pda,buf} from {Rod,Wnd,Rop} + * nodes, and raidPtr. + */ + if (numParityNodes == 2) { /* Double-xor case. */ for (i = 0; i < numParityNodes; i++) { - rf_InitNode(&xorNodes[i], rf_wait, RF_FALSE, func, undoFunc, NULL, numParityNodes, numParityNodes + numDataNodes, 7, 1, dag_h, name, allocList); /* no wakeup func for - * xor */ + /* No wakeup func for xor. */ + rf_InitNode(&xorNodes[i], rf_wait, RF_FALSE, func, + undoFunc, NULL, numParityNodes, numParityNodes + + numDataNodes, 7, 1, dag_h, name, allocList); xorNodes[i].flags |= RF_DAGNODE_FLAG_YIELD; xorNodes[i].params[0] = readDataNodes[i].params[0]; xorNodes[i].params[1] = readDataNodes[i].params[1]; @@ -1699,114 +1856,166 @@ rf_CommonCreateSmallWriteDAGFwd( xorNodes[i].params[4] = writeDataNodes[i].params[0]; xorNodes[i].params[5] = writeDataNodes[i].params[1]; xorNodes[i].params[6].p = raidPtr; - xorNodes[i].results[0] = readParityNodes[i].params[1].p; /* use old parity buf as - * target buf */ + /* Use old parity buf as target buf. */ + xorNodes[i].results[0] = readParityNodes[i].params[1].p; if (nfaults == 2) { - rf_InitNode(&qNodes[i], rf_wait, RF_FALSE, qfunc, undoFunc, NULL, numParityNodes, numParityNodes + numDataNodes, 7, 1, dag_h, qname, allocList); /* no wakeup func for - * xor */ - qNodes[i].params[0] = readDataNodes[i].params[0]; - qNodes[i].params[1] = readDataNodes[i].params[1]; + /* No wakeup func for xor. */ + rf_InitNode(&qNodes[i], rf_wait, RF_FALSE, + qfunc, undoFunc, NULL, numParityNodes, + numParityNodes + numDataNodes, 7, 1, + dag_h, qname, allocList); + qNodes[i].params[0] = + readDataNodes[i].params[0]; + qNodes[i].params[1] = + readDataNodes[i].params[1]; qNodes[i].params[2] = readQNodes[i].params[0]; qNodes[i].params[3] = readQNodes[i].params[1]; - qNodes[i].params[4] = writeDataNodes[i].params[0]; - qNodes[i].params[5] = writeDataNodes[i].params[1]; + qNodes[i].params[4] = + writeDataNodes[i].params[0]; + qNodes[i].params[5] = + writeDataNodes[i].params[1]; qNodes[i].params[6].p = raidPtr; - qNodes[i].results[0] = readQNodes[i].params[1].p; /* use old Q buf as - * target buf */ + /* Use old Q buf as target buf. */ + qNodes[i].results[0] = + readQNodes[i].params[1].p; } } } else { - /* there is only one xor node in this case */ - rf_InitNode(&xorNodes[0], rf_wait, RF_FALSE, func, undoFunc, NULL, numParityNodes, numParityNodes + numDataNodes, (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, name, allocList); + /* There is only one xor node in this case. */ + rf_InitNode(&xorNodes[0], rf_wait, RF_FALSE, func, undoFunc, + NULL, numParityNodes, numParityNodes + numDataNodes, + (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, + name, allocList); xorNodes[0].flags |= RF_DAGNODE_FLAG_YIELD; for (i = 0; i < numDataNodes + 1; i++) { - /* set up params related to Rod and Rop nodes */ - xorNodes[0].params[2 * i + 0] = readDataNodes[i].params[0]; /* pda */ - xorNodes[0].params[2 * i + 1] = readDataNodes[i].params[1]; /* buffer pointer */ + /* Set up params related to Rod and Rop nodes. */ + xorNodes[0].params[2 * i + 0] = + readDataNodes[i].params[0]; /* pda */ + xorNodes[0].params[2 * i + 1] = + readDataNodes[i].params[1]; /* buffer pointer */ } for (i = 0; i < numDataNodes; i++) { - /* set up params related to Wnd and Wnp nodes */ - xorNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = writeDataNodes[i].params[0]; /* pda */ - xorNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = writeDataNodes[i].params[1]; /* buffer pointer */ + /* Set up params related to Wnd and Wnp nodes. */ + xorNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = + writeDataNodes[i].params[0]; /* pda */ + xorNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = + writeDataNodes[i].params[1]; /* buffer pointer */ } - xorNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr; /* xor node needs to get - * at RAID information */ + xorNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = + raidPtr; /* xor node needs to get at RAID information */ xorNodes[0].results[0] = readParityNodes[0].params[1].p; if (nfaults == 2) { - rf_InitNode(&qNodes[0], rf_wait, RF_FALSE, qfunc, undoFunc, NULL, numParityNodes, numParityNodes + numDataNodes, (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, qname, allocList); + rf_InitNode(&qNodes[0], rf_wait, RF_FALSE, qfunc, + undoFunc, NULL, numParityNodes, + numParityNodes + numDataNodes, + (2 * (numDataNodes + numDataNodes + 1) + 1), + 1, dag_h, qname, allocList); for (i = 0; i < numDataNodes; i++) { - /* set up params related to Rod */ - qNodes[0].params[2 * i + 0] = readDataNodes[i].params[0]; /* pda */ - qNodes[0].params[2 * i + 1] = readDataNodes[i].params[1]; /* buffer pointer */ + /* Set up params related to Rod. */ + /* pda */ + qNodes[0].params[2 * i + 0] = + readDataNodes[i].params[0]; + /* buffer pointer */ + qNodes[0].params[2 * i + 1] = + readDataNodes[i].params[1]; } - /* and read old q */ - qNodes[0].params[2 * numDataNodes + 0] = readQNodes[0].params[0]; /* pda */ - qNodes[0].params[2 * numDataNodes + 1] = readQNodes[0].params[1]; /* buffer pointer */ + /* And read old q. */ + qNodes[0].params[2 * numDataNodes + 0] = + readQNodes[0].params[0]; /* pda */ + qNodes[0].params[2 * numDataNodes + 1] = + readQNodes[0].params[1]; /* buffer pointer */ for (i = 0; i < numDataNodes; i++) { - /* set up params related to Wnd nodes */ - qNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = writeDataNodes[i].params[0]; /* pda */ - qNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = writeDataNodes[i].params[1]; /* buffer pointer */ + /* Set up params related to Wnd nodes. */ + /* pda */ + qNodes[0].params + [2 * (numDataNodes + 1 + i) + 0] = + writeDataNodes[i].params[0]; + /* buffer pointer */ + qNodes[0].params + [2 * (numDataNodes + 1 + i) + 1] = + writeDataNodes[i].params[1]; } - qNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr; /* xor node needs to get - * at RAID information */ + /* Xor node needs to get at RAID information. */ + qNodes[0].params + [2 * (numDataNodes + numDataNodes + 1)].p = + raidPtr; qNodes[0].results[0] = readQNodes[0].params[1].p; } } - /* initialize nodes which write new parity (Wnp) */ + /* Initialize nodes which write new parity (Wnp). */ pda = asmap->parityInfo; for (i = 0; i < numParityNodes; i++) { - rf_InitNode(&writeParityNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, numParityNodes, 4, 0, dag_h, "Wnp", allocList); + rf_InitNode(&writeParityNodes[i], rf_wait, RF_FALSE, + rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + rf_GenericWakeupFunc, 1, numParityNodes, + 4, 0, dag_h, "Wnp", allocList); RF_ASSERT(pda != NULL); - writeParityNodes[i].params[0].p = pda; /* param 1 (bufPtr) - * filled in by xor node */ - writeParityNodes[i].params[1].p = xorNodes[i].results[0]; /* buffer pointer for - * parity write - * operation */ + /* Param 1 (bufPtr) filled in by xor node. */ + writeParityNodes[i].params[0].p = pda; + /* Buffer pointer for parity write operation. */ + writeParityNodes[i].params[1].p = xorNodes[i].results[0]; writeParityNodes[i].params[2].v = parityStripeID; - writeParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + writeParityNodes[i].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockParityNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Unp", allocList); - unlockParityNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - unlockParityNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru); + /* Initialize node to unlock the disk queue. */ + rf_InitNode(&unlockParityNodes[i], rf_wait, RF_FALSE, + rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, + rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, + "Unp", allocList); + unlockParityNodes[i].params[0].p = + pda; /* Physical disk addr desc. */ + unlockParityNodes[i].params[1].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + 0, lu_flag, which_ru); } pda = pda->next; } - /* initialize nodes which write new Q (Wnq) */ + /* Initialize nodes which write new Q (Wnq). */ if (nfaults == 2) { pda = asmap->qInfo; for (i = 0; i < numParityNodes; i++) { - rf_InitNode(&writeQNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, numParityNodes, 4, 0, dag_h, "Wnq", allocList); + rf_InitNode(&writeQNodes[i], rf_wait, RF_FALSE, + rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + rf_GenericWakeupFunc, 1, numParityNodes, + 4, 0, dag_h, "Wnq", allocList); RF_ASSERT(pda != NULL); - writeQNodes[i].params[0].p = pda; /* param 1 (bufPtr) - * filled in by xor node */ - writeQNodes[i].params[1].p = qNodes[i].results[0]; /* buffer pointer for - * parity write - * operation */ + /* Param 1 (bufPtr) filled in by xor node. */ + writeQNodes[i].params[0].p = pda; + /* Buffer pointer for parity write operation. */ + writeQNodes[i].params[1].p = qNodes[i].results[0]; writeQNodes[i].params[2].v = parityStripeID; - writeQNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + writeQNodes[i].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + 0, 0, which_ru); if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockQNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Unq", allocList); - unlockQNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - unlockQNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru); + /* Initialize node to unlock the disk queue. */ + rf_InitNode(&unlockQNodes[i], rf_wait, + RF_FALSE, rf_DiskUnlockFunc, + rf_DiskUnlockUndoFunc, + rf_GenericWakeupFunc, 1, 1, 2, 0, + dag_h, "Unq", allocList); + /* Physical disk addr desc. */ + unlockQNodes[i].params[0].p = pda; + unlockQNodes[i].params[1].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + 0, lu_flag, which_ru); } pda = pda->next; } } - /* Step 4. connect the nodes */ + /* Step 4. Connect the nodes. */ - /* connect header to block node */ + /* Connect header to block node. */ dag_h->succedents[0] = blockNode; - /* connect block node to read old data nodes */ - RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults))); + /* Connect block node to read old data nodes. */ + RF_ASSERT(blockNode->numSuccedents == + (numDataNodes + (numParityNodes * nfaults))); for (i = 0; i < numDataNodes; i++) { blockNode->succedents[i] = &readDataNodes[i]; RF_ASSERT(readDataNodes[i].numAntecedents == 1); @@ -1814,7 +2023,7 @@ rf_CommonCreateSmallWriteDAGFwd( readDataNodes[i].antType[0] = rf_control; } - /* connect block node to read old parity nodes */ + /* Connect block node to read old parity nodes. */ for (i = 0; i < numParityNodes; i++) { blockNode->succedents[numDataNodes + i] = &readParityNodes[i]; RF_ASSERT(readParityNodes[i].numAntecedents == 1); @@ -1822,66 +2031,76 @@ rf_CommonCreateSmallWriteDAGFwd( readParityNodes[i].antType[0] = rf_control; } - /* connect block node to read old Q nodes */ + /* Connect block node to read old Q nodes. */ if (nfaults == 2) for (i = 0; i < numParityNodes; i++) { - blockNode->succedents[numDataNodes + numParityNodes + i] = &readQNodes[i]; + blockNode->succedents[numDataNodes + + numParityNodes + i] = &readQNodes[i]; RF_ASSERT(readQNodes[i].numAntecedents == 1); readQNodes[i].antecedents[0] = blockNode; readQNodes[i].antType[0] = rf_control; } - /* connect read old data nodes to write new data nodes */ + /* Connect read old data nodes to write new data nodes. */ for (i = 0; i < numDataNodes; i++) { - RF_ASSERT(readDataNodes[i].numSuccedents == ((nfaults * numParityNodes) + 1)); + RF_ASSERT(readDataNodes[i].numSuccedents == + ((nfaults * numParityNodes) + 1)); RF_ASSERT(writeDataNodes[i].numAntecedents == 1); readDataNodes[i].succedents[0] = &writeDataNodes[i]; writeDataNodes[i].antecedents[0] = &readDataNodes[i]; writeDataNodes[i].antType[0] = rf_antiData; } - /* connect read old data nodes to xor nodes */ + /* Connect read old data nodes to xor nodes. */ for (i = 0; i < numDataNodes; i++) { for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(xorNodes[j].numAntecedents == numDataNodes + numParityNodes); + RF_ASSERT(xorNodes[j].numAntecedents == + numDataNodes + numParityNodes); readDataNodes[i].succedents[1 + j] = &xorNodes[j]; xorNodes[j].antecedents[i] = &readDataNodes[i]; xorNodes[j].antType[i] = rf_trueData; } } - /* connect read old data nodes to q nodes */ + /* Connect read old data nodes to q nodes. */ if (nfaults == 2) for (i = 0; i < numDataNodes; i++) for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(qNodes[j].numAntecedents == numDataNodes + numParityNodes); - readDataNodes[i].succedents[1 + numParityNodes + j] = &qNodes[j]; + RF_ASSERT(qNodes[j].numAntecedents == + numDataNodes + numParityNodes); + readDataNodes[i].succedents + [1 + numParityNodes + j] = &qNodes[j]; qNodes[j].antecedents[i] = &readDataNodes[i]; qNodes[j].antType[i] = rf_trueData; } - /* connect read old parity nodes to xor nodes */ + /* Connect read old parity nodes to xor nodes. */ for (i = 0; i < numParityNodes; i++) { for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(readParityNodes[i].numSuccedents == numParityNodes); + RF_ASSERT(readParityNodes[i].numSuccedents == + numParityNodes); readParityNodes[i].succedents[j] = &xorNodes[j]; - xorNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i]; + xorNodes[j].antecedents[numDataNodes + i] = + &readParityNodes[i]; xorNodes[j].antType[numDataNodes + i] = rf_trueData; } } - /* connect read old q nodes to q nodes */ + /* Connect read old q nodes to q nodes. */ if (nfaults == 2) for (i = 0; i < numParityNodes; i++) { for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(readQNodes[i].numSuccedents == numParityNodes); + RF_ASSERT(readQNodes[i].numSuccedents == + numParityNodes); readQNodes[i].succedents[j] = &qNodes[j]; - qNodes[j].antecedents[numDataNodes + i] = &readQNodes[i]; - qNodes[j].antType[numDataNodes + i] = rf_trueData; + qNodes[j].antecedents[numDataNodes + i] = + &readQNodes[i]; + qNodes[j].antType[numDataNodes + i] = + rf_trueData; } } - /* connect xor nodes to the write new parity nodes */ + /* Connect xor nodes to the write new parity nodes. */ for (i = 0; i < numParityNodes; i++) { RF_ASSERT(writeParityNodes[i].numAntecedents == numParityNodes); for (j = 0; j < numParityNodes; j++) { @@ -1892,10 +2111,11 @@ rf_CommonCreateSmallWriteDAGFwd( } } - /* connect q nodes to the write new q nodes */ + /* Connect q nodes to the write new q nodes. */ if (nfaults == 2) for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(writeQNodes[i].numAntecedents == numParityNodes); + RF_ASSERT(writeQNodes[i].numAntecedents == + numParityNodes); for (j = 0; j < numParityNodes; j++) { RF_ASSERT(qNodes[j].numSuccedents == 1); qNodes[i].succedents[j] = &writeQNodes[j]; @@ -1904,26 +2124,28 @@ rf_CommonCreateSmallWriteDAGFwd( } } - RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); + RF_ASSERT(termNode->numAntecedents == + (numDataNodes + (nfaults * numParityNodes))); RF_ASSERT(termNode->numSuccedents == 0); for (i = 0; i < numDataNodes; i++) { if (lu_flag) { - /* connect write new data nodes to unlock nodes */ + /* Connect write new data nodes to unlock nodes. */ RF_ASSERT(writeDataNodes[i].numSuccedents == 1); RF_ASSERT(unlockDataNodes[i].numAntecedents == 1); writeDataNodes[i].succedents[0] = &unlockDataNodes[i]; unlockDataNodes[i].antecedents[0] = &writeDataNodes[i]; unlockDataNodes[i].antType[0] = rf_control; - /* connect unlock nodes to term node */ + /* Connect unlock nodes to term nodes. */ RF_ASSERT(unlockDataNodes[i].numSuccedents == 1); unlockDataNodes[i].succedents[0] = termNode; termNode->antecedents[i] = &unlockDataNodes[i]; termNode->antType[i] = rf_control; } else { - /* connect write new data nodes to term node */ + /* Connect write new data nodes to term node. */ RF_ASSERT(writeDataNodes[i].numSuccedents == 1); - RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); + RF_ASSERT(termNode->numAntecedents == + (numDataNodes + (nfaults * numParityNodes))); writeDataNodes[i].succedents[0] = termNode; termNode->antecedents[i] = &writeDataNodes[i]; termNode->antType[i] = rf_control; @@ -1932,22 +2154,26 @@ rf_CommonCreateSmallWriteDAGFwd( for (i = 0; i < numParityNodes; i++) { if (lu_flag) { - /* connect write new parity nodes to unlock nodes */ + /* Connect write new parity nodes to unlock nodes. */ RF_ASSERT(writeParityNodes[i].numSuccedents == 1); RF_ASSERT(unlockParityNodes[i].numAntecedents == 1); - writeParityNodes[i].succedents[0] = &unlockParityNodes[i]; - unlockParityNodes[i].antecedents[0] = &writeParityNodes[i]; + writeParityNodes[i].succedents[0] = + &unlockParityNodes[i]; + unlockParityNodes[i].antecedents[0] = + &writeParityNodes[i]; unlockParityNodes[i].antType[0] = rf_control; - /* connect unlock nodes to term node */ + /* Connect unlock nodes to term node. */ RF_ASSERT(unlockParityNodes[i].numSuccedents == 1); unlockParityNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + i] = &unlockParityNodes[i]; + termNode->antecedents[numDataNodes + i] = + &unlockParityNodes[i]; termNode->antType[numDataNodes + i] = rf_control; } else { RF_ASSERT(writeParityNodes[i].numSuccedents == 1); writeParityNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + i] = &writeParityNodes[i]; + termNode->antecedents[numDataNodes + i] = + &writeParityNodes[i]; termNode->antType[numDataNodes + i] = rf_control; } } @@ -1955,57 +2181,58 @@ rf_CommonCreateSmallWriteDAGFwd( if (nfaults == 2) for (i = 0; i < numParityNodes; i++) { if (lu_flag) { - /* connect write new Q nodes to unlock nodes */ + /* Connect write new Q nodes to unlock nodes. */ RF_ASSERT(writeQNodes[i].numSuccedents == 1); RF_ASSERT(unlockQNodes[i].numAntecedents == 1); writeQNodes[i].succedents[0] = &unlockQNodes[i]; - unlockQNodes[i].antecedents[0] = &writeQNodes[i]; + unlockQNodes[i].antecedents[0] = + &writeQNodes[i]; unlockQNodes[i].antType[0] = rf_control; - /* connect unlock nodes to unblock node */ + /* Connect unlock nodes to unblock node. */ RF_ASSERT(unlockQNodes[i].numSuccedents == 1); unlockQNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + numParityNodes + i] = &unlockQNodes[i]; - termNode->antType[numDataNodes + numParityNodes + i] = rf_control; + termNode->antecedents[numDataNodes + + numParityNodes + i] = &unlockQNodes[i]; + termNode->antType[numDataNodes + + numParityNodes + i] = rf_control; } else { RF_ASSERT(writeQNodes[i].numSuccedents == 1); writeQNodes[i].succedents[0] = termNode; - termNode->antecedents[numDataNodes + numParityNodes + i] = &writeQNodes[i]; - termNode->antType[numDataNodes + numParityNodes + i] = rf_control; + termNode->antecedents[numDataNodes + + numParityNodes + i] = &writeQNodes[i]; + termNode->antType[numDataNodes + + numParityNodes + i] = rf_control; } } } -/****************************************************************************** - * create a write graph (fault-free or degraded) for RAID level 1 +/***************************************************************************** + * Create a write graph (fault-free or degraded) for RAID level 1. * * Hdr Nil -> Wpd -> Nil -> Trm - * Nil -> Wsd -> + * Nil -> Wsd -> * - * The "Wpd" node writes data to the primary copy in the mirror pair - * The "Wsd" node writes data to the secondary copy in the mirror pair + * The "Wpd" node writes data to the primary copy in the mirror pair. + * The "Wsd" node writes data to the secondary copy in the mirror pair. * - * Parameters: raidPtr - description of the physical array - * asmap - logical & physical addresses for this access - * bp - buffer ptr (holds write data) - * flags - general flags (e.g. disk locking) - * allocList - list of memory allocated in DAG creation + * Parameters: raidPtr - description of the physical array + * asmap - logical & physical addresses for this access + * bp - buffer ptr (holds write data) + * flags - general flags (e.g. disk locking) + * allocList - list of memory allocated in DAG creation *****************************************************************************/ -void -rf_CreateRaidOneWriteDAGFwd( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList) +void +rf_CreateRaidOneWriteDAGFwd(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, + RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags, + RF_AllocListElem_t *allocList) { RF_DagNode_t *blockNode, *unblockNode, *termNode; RF_DagNode_t *nodes, *wndNode, *wmirNode; - int nWndNodes, nWmirNodes, i; + int nWndNodes, nWmirNodes, i; RF_ReconUnitNum_t which_ru; RF_PhysDiskAddr_t *pda, *pdaP; RF_StripeNum_t parityStripeID; @@ -2015,19 +2242,22 @@ rf_CreateRaidOneWriteDAGFwd( if (rf_dagDebug) { printf("[Creating RAID level 1 write DAG]\n"); } - nWmirNodes = (asmap->parityInfo->next) ? 2 : 1; /* 2 implies access not - * SU aligned */ + /* 2 implies access not SU aligned. */ + nWmirNodes = (asmap->parityInfo->next) ? 2 : 1; nWndNodes = (asmap->physInfo->next) ? 2 : 1; - /* alloc the Wnd nodes and the Wmir node */ + /* Alloc the Wnd nodes and the Wmir node. */ if (asmap->numDataFailed == 1) nWndNodes--; if (asmap->numParityFailed == 1) nWmirNodes--; - /* total number of nodes = nWndNodes + nWmirNodes + (block + unblock + - * terminator) */ - RF_CallocAndAdd(nodes, nWndNodes + nWmirNodes + 3, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); + /* + * Total number of nodes = nWndNodes + nWmirNodes + + * (block + unblock + terminator) + */ + RF_CallocAndAdd(nodes, nWndNodes + nWmirNodes + 3, + sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); i = 0; wndNode = &nodes[i]; i += nWndNodes; @@ -2041,53 +2271,68 @@ rf_CreateRaidOneWriteDAGFwd( i += 1; RF_ASSERT(i == (nWndNodes + nWmirNodes + 3)); - /* this dag can commit immediately */ + /* This dag can commit immediately. */ dag_h->numCommitNodes = 0; dag_h->numCommits = 0; dag_h->numSuccedents = 1; - /* initialize the unblock and term nodes */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, (nWndNodes + nWmirNodes), 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, (nWndNodes + nWmirNodes), 0, 0, dag_h, "Nil", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - - /* initialize the wnd nodes */ + /* Initialize the unblock and term nodes. */ + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, (nWndNodes + nWmirNodes), + 0, 0, 0, dag_h, "Nil", allocList); + rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, 1, (nWndNodes + nWmirNodes), + 0, 0, dag_h, "Nil", allocList); + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, + rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); + + /* Initialize the wnd nodes. */ if (nWndNodes > 0) { pda = asmap->physInfo; for (i = 0; i < nWndNodes; i++) { - rf_InitNode(&wndNode[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wpd", allocList); + rf_InitNode(&wndNode[i], rf_wait, RF_FALSE, + rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, + "Wpd", allocList); RF_ASSERT(pda != NULL); wndNode[i].params[0].p = pda; wndNode[i].params[1].p = pda->bufPtr; wndNode[i].params[2].v = parityStripeID; - wndNode[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + wndNode[i].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + 0, 0, which_ru); pda = pda->next; } RF_ASSERT(pda == NULL); } - /* initialize the mirror nodes */ + /* Initialize the mirror nodes. */ if (nWmirNodes > 0) { pda = asmap->physInfo; pdaP = asmap->parityInfo; for (i = 0; i < nWmirNodes; i++) { - rf_InitNode(&wmirNode[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wsd", allocList); + rf_InitNode(&wmirNode[i], rf_wait, RF_FALSE, + rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, + "Wsd", allocList); RF_ASSERT(pda != NULL); wmirNode[i].params[0].p = pdaP; wmirNode[i].params[1].p = pda->bufPtr; wmirNode[i].params[2].v = parityStripeID; - wmirNode[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + wmirNode[i].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + 0, 0, which_ru); pda = pda->next; pdaP = pdaP->next; } RF_ASSERT(pda == NULL); RF_ASSERT(pdaP == NULL); } - /* link the header node to the block node */ + /* Link the header node to the block node. */ RF_ASSERT(dag_h->numSuccedents == 1); RF_ASSERT(blockNode->numAntecedents == 0); dag_h->succedents[0] = blockNode; - /* link the block node to the write nodes */ + /* Link the block node to the write nodes. */ RF_ASSERT(blockNode->numSuccedents == (nWndNodes + nWmirNodes)); for (i = 0; i < nWndNodes; i++) { RF_ASSERT(wndNode[i].numAntecedents == 1); @@ -2102,7 +2347,7 @@ rf_CreateRaidOneWriteDAGFwd( wmirNode[i].antType[0] = rf_control; } - /* link the write nodes to the unblock node */ + /* Link the write nodes to the unblock node. */ RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes)); for (i = 0; i < nWndNodes; i++) { RF_ASSERT(wndNode[i].numSuccedents == 1); @@ -2117,7 +2362,7 @@ rf_CreateRaidOneWriteDAGFwd( unblockNode->antType[i + nWndNodes] = rf_control; } - /* link the unblock node to the term node */ + /* Link the unblock node to the term node. */ RF_ASSERT(unblockNode->numSuccedents == 1); RF_ASSERT(termNode->numAntecedents == 1); RF_ASSERT(termNode->numSuccedents == 0); diff --git a/sys/dev/raidframe/rf_dagffwr.h b/sys/dev/raidframe/rf_dagffwr.h index b7b50da1e70..7e3ed9ac897 100644 --- a/sys/dev/raidframe/rf_dagffwr.h +++ b/sys/dev/raidframe/rf_dagffwr.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_dagffwr.h,v 1.2 1999/02/16 00:02:31 niklas Exp $ */ +/* $OpenBSD: rf_dagffwr.h,v 1.3 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_dagffwr.h,v 1.3 1999/02/05 00:06:08 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,51 +28,38 @@ * rights to redistribute these changes. */ -#ifndef _RF__RF_DAGFFWR_H_ -#define _RF__RF_DAGFFWR_H_ +#ifndef _RF__RF_DAGFFWR_H_ +#define _RF__RF_DAGFFWR_H_ #include "rf_types.h" -/* fault-free write DAG creation routines */ -void -rf_CreateNonRedundantWriteDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, - RF_IoType_t type); -void -rf_CreateRAID0WriteDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, RF_IoType_t type); -void -rf_CreateSmallWriteDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList); -void -rf_CreateLargeWriteDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList); -void -rf_CommonCreateLargeWriteDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, int nfaults, - int (*redFunc) (RF_DagNode_t *), int allowBufferRecycle); - void rf_CommonCreateLargeWriteDAGFwd(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, int nfaults, - int (*redFunc) (RF_DagNode_t *), int allowBufferRecycle); - void rf_CommonCreateSmallWriteDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, - RF_RedFuncs_t * pfuncs, RF_RedFuncs_t * qfuncs); - void rf_CommonCreateSmallWriteDAGFwd(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, - RF_RedFuncs_t * pfuncs, RF_RedFuncs_t * qfuncs); - void rf_CreateRaidOneWriteDAG(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, void *bp, RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList); - void rf_CreateRaidOneWriteDAGFwd(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, void *bp, - RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList); +/* Fault-free write DAG creation routines. */ + +void rf_CreateNonRedundantWriteDAG(RF_Raid_t *, RF_AccessStripeMap_t *, + RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *, + RF_IoType_t); +void rf_CreateRAID0WriteDAG(RF_Raid_t *, RF_AccessStripeMap_t *, + RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *, + RF_IoType_t); +void rf_CreateSmallWriteDAG(RF_Raid_t *, RF_AccessStripeMap_t *, + RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *); +void rf_CreateLargeWriteDAG(RF_Raid_t *, RF_AccessStripeMap_t *, + RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *); +void rf_CommonCreateLargeWriteDAG(RF_Raid_t *, RF_AccessStripeMap_t *, + RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *, + int, int (*) (RF_DagNode_t *), int); +void rf_CommonCreateLargeWriteDAGFwd(RF_Raid_t *, RF_AccessStripeMap_t *, + RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *, + int, int (*) (RF_DagNode_t *), int); +void rf_CommonCreateSmallWriteDAG(RF_Raid_t *, RF_AccessStripeMap_t *, + RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *, + RF_RedFuncs_t *, RF_RedFuncs_t *); +void rf_CommonCreateSmallWriteDAGFwd(RF_Raid_t *, RF_AccessStripeMap_t *, + RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *, + RF_RedFuncs_t *, RF_RedFuncs_t *); +void rf_CreateRaidOneWriteDAG(RF_Raid_t *, RF_AccessStripeMap_t *, + RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *); +void rf_CreateRaidOneWriteDAGFwd(RF_Raid_t *, RF_AccessStripeMap_t *, + RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *); -#endif /* !_RF__RF_DAGFFWR_H_ */ +#endif /* !_RF__RF_DAGFFWR_H_ */ diff --git a/sys/dev/raidframe/rf_dagflags.h b/sys/dev/raidframe/rf_dagflags.h index a978088ce9f..29e2acb0141 100644 --- a/sys/dev/raidframe/rf_dagflags.h +++ b/sys/dev/raidframe/rf_dagflags.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_dagflags.h,v 1.2 1999/02/16 00:02:31 niklas Exp $ */ +/* $OpenBSD: rf_dagflags.h,v 1.3 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_dagflags.h,v 1.3 1999/02/05 00:06:08 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,17 +28,18 @@ * rights to redistribute these changes. */ -/************************************************************************************** +/***************************************************************************** * - * dagflags.h -- flags that can be given to DoAccess - * I pulled these out of dag.h because routines that call DoAccess may need these flags, - * but certainly do not need the declarations related to the DAG data structures. + * dagflags.h -- Flags that can be given to DoAccess. + * I pulled these out of dag.h because routines that call DoAccess may need + * these flags, but certainly do not need the declarations related to the DAG + * data structures. * - **************************************************************************************/ + *****************************************************************************/ -#ifndef _RF__RF_DAGFLAGS_H_ -#define _RF__RF_DAGFLAGS_H_ +#ifndef _RF__RF_DAGFLAGS_H_ +#define _RF__RF_DAGFLAGS_H_ /* * Bitmasks for the "flags" parameter (RF_RaidAccessFlags_t) used @@ -48,21 +50,35 @@ * specify USE_DAG. */ -#define RF_DAG_FLAGS_NONE 0 /* no flags */ -#define RF_DAG_SUPPRESS_LOCKS (1<<0) /* supress all stripe locks in - * the DAG */ -#define RF_DAG_RETURN_ASM (1<<1) /* create an ASM and return it - * instead of freeing it */ -#define RF_DAG_RETURN_DAG (1<<2) /* create a DAG and return it - * instead of freeing it */ -#define RF_DAG_NONBLOCKING_IO (1<<3) /* cause DoAccess to be - * non-blocking */ -#define RF_DAG_ACCESS_COMPLETE (1<<4) /* the access is complete */ -#define RF_DAG_DISPATCH_RETURNED (1<<5) /* used to handle the case +#define RF_DAG_FLAGS_NONE 0 /* No flags */ +#define RF_DAG_SUPPRESS_LOCKS (1<<0) /* + * Supress all stripe locks in + * the DAG. + */ +#define RF_DAG_RETURN_ASM (1<<1) /* + * Create an ASM and return it + * instead of freeing it. + */ +#define RF_DAG_RETURN_DAG (1<<2) /* + * Create a DAG and return it + * instead of freeing it. + */ +#define RF_DAG_NONBLOCKING_IO (1<<3) /* + * Cause DoAccess to be + * non-blocking. + */ +#define RF_DAG_ACCESS_COMPLETE (1<<4) /* + * The access is complete. + */ +#define RF_DAG_DISPATCH_RETURNED (1<<5) /* + * Used to handle the case * where the dag invokes no - * I/O */ -#define RF_DAG_TEST_ACCESS (1<<6) /* this access came through + * I/O. + */ +#define RF_DAG_TEST_ACCESS (1<<6) /* + * This access came through * rf_ioctl instead of - * rf_strategy */ + * rf_strategy. + */ -#endif /* !_RF__RF_DAGFLAGS_H_ */ +#endif /* !_RF__RF_DAGFLAGS_H_ */ diff --git a/sys/dev/raidframe/rf_dagfuncs.c b/sys/dev/raidframe/rf_dagfuncs.c index d3e35553d0a..7a2317034e6 100644 --- a/sys/dev/raidframe/rf_dagfuncs.c +++ b/sys/dev/raidframe/rf_dagfuncs.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_dagfuncs.c,v 1.5 2000/08/08 16:07:39 peter Exp $ */ +/* $OpenBSD: rf_dagfuncs.c,v 1.6 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_dagfuncs.c,v 1.6 2000/03/30 12:45:40 augustss Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -28,7 +29,7 @@ */ /* - * dagfuncs.c -- DAG node execution routines + * dagfuncs.c -- DAG node execution routines. * * Rules: * 1. Every DAG execution function must eventually cause node->status to @@ -65,28 +66,28 @@ #include "rf_kintf.h" -#if RF_INCLUDE_PARITYLOGGING > 0 +#if RF_INCLUDE_PARITYLOGGING > 0 #include "rf_paritylog.h" -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ - -int (*rf_DiskReadFunc) (RF_DagNode_t *); -int (*rf_DiskWriteFunc) (RF_DagNode_t *); -int (*rf_DiskReadUndoFunc) (RF_DagNode_t *); -int (*rf_DiskWriteUndoFunc) (RF_DagNode_t *); -int (*rf_DiskUnlockFunc) (RF_DagNode_t *); -int (*rf_DiskUnlockUndoFunc) (RF_DagNode_t *); -int (*rf_RegularXorUndoFunc) (RF_DagNode_t *); -int (*rf_SimpleXorUndoFunc) (RF_DagNode_t *); -int (*rf_RecoveryXorUndoFunc) (RF_DagNode_t *); - -/***************************************************************************************** - * main (only) configuration routine for this module - ****************************************************************************************/ -int -rf_ConfigureDAGFuncs(listp) - RF_ShutdownList_t **listp; +#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ + +int (*rf_DiskReadFunc) (RF_DagNode_t *); +int (*rf_DiskWriteFunc) (RF_DagNode_t *); +int (*rf_DiskReadUndoFunc) (RF_DagNode_t *); +int (*rf_DiskWriteUndoFunc) (RF_DagNode_t *); +int (*rf_DiskUnlockFunc) (RF_DagNode_t *); +int (*rf_DiskUnlockUndoFunc) (RF_DagNode_t *); +int (*rf_RegularXorUndoFunc) (RF_DagNode_t *); +int (*rf_SimpleXorUndoFunc) (RF_DagNode_t *); +int (*rf_RecoveryXorUndoFunc) (RF_DagNode_t *); + +/***************************************************************************** + * Main (only) configuration routine for this module. + *****************************************************************************/ +int +rf_ConfigureDAGFuncs(RF_ShutdownList_t **listp) { - RF_ASSERT(((sizeof(long) == 8) && RF_LONGSHIFT == 3) || ((sizeof(long) == 4) && RF_LONGSHIFT == 2)); + RF_ASSERT(((sizeof(long) == 8) && RF_LONGSHIFT == 3) || + ((sizeof(long) == 4) && RF_LONGSHIFT == 2)); rf_DiskReadFunc = rf_DiskReadFuncForThreads; rf_DiskReadUndoFunc = rf_DiskUndoFunc; rf_DiskWriteFunc = rf_DiskWriteFuncForThreads; @@ -100,78 +101,75 @@ rf_ConfigureDAGFuncs(listp) } - -/***************************************************************************************** - * the execution function associated with a terminate node - ****************************************************************************************/ -int -rf_TerminateFunc(node) - RF_DagNode_t *node; +/***************************************************************************** + * The execution function associated with a terminate node. + *****************************************************************************/ +int +rf_TerminateFunc(RF_DagNode_t *node) { RF_ASSERT(node->dagHdr->numCommits == node->dagHdr->numCommitNodes); node->status = rf_good; return (rf_FinishNode(node, RF_THREAD_CONTEXT)); } -int -rf_TerminateUndoFunc(node) - RF_DagNode_t *node; +int +rf_TerminateUndoFunc(RF_DagNode_t *node) { return (0); } -/***************************************************************************************** - * execution functions associated with a mirror node +/***************************************************************************** + * Execution functions associated with a mirror node. * * parameters: * - * 0 - physical disk addres of data - * 1 - buffer for holding read data - * 2 - parity stripe ID - * 3 - flags - * 4 - physical disk address of mirror (parity) + * 0 - Physical disk addres of data. + * 1 - Buffer for holding read data. + * 2 - Parity stripe ID. + * 3 - Flags. + * 4 - Physical disk address of mirror (parity). * - ****************************************************************************************/ + *****************************************************************************/ -int -rf_DiskReadMirrorIdleFunc(node) - RF_DagNode_t *node; +int +rf_DiskReadMirrorIdleFunc(RF_DagNode_t *node) { - /* select the mirror copy with the shortest queue and fill in node - * parameters with physical disk address */ + /* + * Select the mirror copy with the shortest queue and fill in node + * parameters with physical disk address. + */ rf_SelectMirrorDiskIdle(node); return (rf_DiskReadFunc(node)); } -int -rf_DiskReadMirrorPartitionFunc(node) - RF_DagNode_t *node; +int +rf_DiskReadMirrorPartitionFunc(RF_DagNode_t *node) { - /* select the mirror copy with the shortest queue and fill in node - * parameters with physical disk address */ + /* + * Select the mirror copy with the shortest queue and fill in node + * parameters with physical disk address. + */ rf_SelectMirrorDiskPartition(node); return (rf_DiskReadFunc(node)); } -int -rf_DiskReadMirrorUndoFunc(node) - RF_DagNode_t *node; +int +rf_DiskReadMirrorUndoFunc(RF_DagNode_t *node) { return (0); } -#if RF_INCLUDE_PARITYLOGGING > 0 -/***************************************************************************************** - * the execution function associated with a parity log update node - ****************************************************************************************/ -int -rf_ParityLogUpdateFunc(node) - RF_DagNode_t *node; +#if RF_INCLUDE_PARITYLOGGING > 0 +/***************************************************************************** + * The execution function associated with a parity log update node. + *****************************************************************************/ +int +rf_ParityLogUpdateFunc(RF_DagNode_t *node) { RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; caddr_t buf = (caddr_t) node->params[1].p; @@ -198,12 +196,11 @@ rf_ParityLogUpdateFunc(node) } -/***************************************************************************************** - * the execution function associated with a parity log overwrite node - ****************************************************************************************/ -int -rf_ParityLogOverwriteFunc(node) - RF_DagNode_t *node; +/***************************************************************************** + * The execution function associated with a parity log overwrite node. + *****************************************************************************/ +int +rf_ParityLogOverwriteFunc(RF_DagNode_t *node) { RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; caddr_t buf = (caddr_t) node->params[1].p; @@ -213,8 +210,9 @@ rf_ParityLogOverwriteFunc(node) if (node->dagHdr->status == rf_enable) { RF_ETIMER_START(timer); - logData = rf_CreateParityLogData(RF_OVERWRITE, pda, buf, (RF_Raid_t *) (node->dagHdr->raidPtr), - node->wakeFunc, (void *) node, node->dagHdr->tracerec, timer); + logData = rf_CreateParityLogData(RF_OVERWRITE, pda, buf, + (RF_Raid_t *) (node->dagHdr->raidPtr), node->wakeFunc, + (void *) node, node->dagHdr->tracerec, timer); if (logData) rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE); else { @@ -226,61 +224,56 @@ rf_ParityLogOverwriteFunc(node) } return (0); } -#else /* RF_INCLUDE_PARITYLOGGING > 0 */ +#else /* RF_INCLUDE_PARITYLOGGING > 0 */ -int -rf_ParityLogUpdateFunc(node) - RF_DagNode_t *node; +int +rf_ParityLogUpdateFunc(RF_DagNode_t *node) { return (0); } -int -rf_ParityLogOverwriteFunc(node) - RF_DagNode_t *node; + +int +rf_ParityLogOverwriteFunc(RF_DagNode_t *node) { return (0); } -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ +#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ -int -rf_ParityLogUpdateUndoFunc(node) - RF_DagNode_t *node; +int +rf_ParityLogUpdateUndoFunc(RF_DagNode_t *node) { return (0); } -int -rf_ParityLogOverwriteUndoFunc(node) - RF_DagNode_t *node; +int +rf_ParityLogOverwriteUndoFunc(RF_DagNode_t *node) { return (0); } -/***************************************************************************************** - * the execution function associated with a NOP node - ****************************************************************************************/ -int -rf_NullNodeFunc(node) - RF_DagNode_t *node; + +/***************************************************************************** + * The execution function associated with a NOP node. + *****************************************************************************/ +int +rf_NullNodeFunc(RF_DagNode_t *node) { node->status = rf_good; return (rf_FinishNode(node, RF_THREAD_CONTEXT)); } -int -rf_NullNodeUndoFunc(node) - RF_DagNode_t *node; +int +rf_NullNodeUndoFunc(RF_DagNode_t *node) { node->status = rf_undone; return (rf_FinishNode(node, RF_THREAD_CONTEXT)); } -/***************************************************************************************** - * the execution function associated with a disk-read node - ****************************************************************************************/ -int -rf_DiskReadFuncForThreads(node) - RF_DagNode_t *node; +/***************************************************************************** + * The execution function associated with a disk-read node. + *****************************************************************************/ +int +rf_DiskReadFuncForThreads(RF_DagNode_t *node) { RF_DiskQueueData_t *req; RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; @@ -291,9 +284,10 @@ rf_DiskReadFuncForThreads(node) unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); unsigned which_ru = RF_EXTRACT_RU(node->params[3].v); RF_DiskQueueDataFlags_t flags = 0; - RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_READ : RF_IO_TYPE_NOP; + RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? + RF_IO_TYPE_READ : RF_IO_TYPE_NOP; RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; - void *b_proc = NULL; + void *b_proc = NULL; if (node->dagHdr->bp) b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc; @@ -317,12 +311,11 @@ rf_DiskReadFuncForThreads(node) } -/***************************************************************************************** +/***************************************************************************** * the execution function associated with a disk-write node - ****************************************************************************************/ -int -rf_DiskWriteFuncForThreads(node) - RF_DagNode_t *node; + *****************************************************************************/ +int +rf_DiskWriteFuncForThreads(RF_DagNode_t *node) { RF_DiskQueueData_t *req; RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; @@ -333,23 +326,22 @@ rf_DiskWriteFuncForThreads(node) unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); unsigned which_ru = RF_EXTRACT_RU(node->params[3].v); RF_DiskQueueDataFlags_t flags = 0; - RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_WRITE : RF_IO_TYPE_NOP; + RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? + RF_IO_TYPE_WRITE : RF_IO_TYPE_NOP; RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; - void *b_proc = NULL; + void *b_proc = NULL; if (node->dagHdr->bp) b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc; - /* normal processing (rollaway or forward recovery) begins here */ + /* Normal processing (rollaway or forward recovery) begins here. */ RF_ASSERT(!(lock && unlock)); flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0; flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0; req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector, buf, parityStripeID, which_ru, - (int (*) (void *, int)) node->wakeFunc, - (void *) node, NULL, - node->dagHdr->tracerec, - (void *) (node->dagHdr->raidPtr), + (int (*) (void *, int)) node->wakeFunc, (void *) node, NULL, + node->dagHdr->tracerec, (void *) (node->dagHdr->raidPtr), flags, b_proc); if (!req) { @@ -361,77 +353,72 @@ rf_DiskWriteFuncForThreads(node) return (0); } -/***************************************************************************************** - * the undo function for disk nodes - * Note: this is not a proper undo of a write node, only locks are released. - * old data is not restored to disk! - ****************************************************************************************/ -int -rf_DiskUndoFunc(node) - RF_DagNode_t *node; +/***************************************************************************** + * The undo function for disk nodes. + * Note: This is not a proper undo of a write node, only locks are released. + * old data is not restored to disk ! + *****************************************************************************/ +int +rf_DiskUndoFunc(RF_DagNode_t *node) { RF_DiskQueueData_t *req; RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; - req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, - 0L, 0, NULL, 0L, 0, - (int (*) (void *, int)) node->wakeFunc, - (void *) node, - NULL, node->dagHdr->tracerec, - (void *) (node->dagHdr->raidPtr), + req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, 0L, 0, NULL, 0L, 0, + (int (*) (void *, int)) node->wakeFunc, (void *) node, + NULL, node->dagHdr->tracerec, (void *) (node->dagHdr->raidPtr), RF_UNLOCK_DISK_QUEUE, NULL); if (!req) (node->wakeFunc) (node, ENOMEM); else { node->dagFuncData = (void *) req; - rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, RF_IO_NORMAL_PRIORITY); + rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, + RF_IO_NORMAL_PRIORITY); } return (0); } -/***************************************************************************************** - * the execution function associated with an "unlock disk queue" node - ****************************************************************************************/ -int -rf_DiskUnlockFuncForThreads(node) - RF_DagNode_t *node; + +/***************************************************************************** + * The execution function associated with an "unlock disk queue" node. + *****************************************************************************/ +int +rf_DiskUnlockFuncForThreads(RF_DagNode_t *node) { RF_DiskQueueData_t *req; RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; - req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, - 0L, 0, NULL, 0L, 0, - (int (*) (void *, int)) node->wakeFunc, - (void *) node, - NULL, node->dagHdr->tracerec, - (void *) (node->dagHdr->raidPtr), + req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, 0L, 0, NULL, 0L, 0, + (int (*) (void *, int)) node->wakeFunc, (void *) node, + NULL, node->dagHdr->tracerec, (void *) (node->dagHdr->raidPtr), RF_UNLOCK_DISK_QUEUE, NULL); if (!req) (node->wakeFunc) (node, ENOMEM); else { node->dagFuncData = (void *) req; - rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, RF_IO_NORMAL_PRIORITY); + rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, + RF_IO_NORMAL_PRIORITY); } return (0); } -/***************************************************************************************** - * Callback routine for DiskRead and DiskWrite nodes. When the disk op completes, - * the routine is called to set the node status and inform the execution engine that - * the node has fired. - ****************************************************************************************/ -int -rf_GenericWakeupFunc(node, status) - RF_DagNode_t *node; - int status; + +/***************************************************************************** + * Callback routine for DiskRead and DiskWrite nodes. When the disk op + * completes, the routine is called to set the node status and inform + * the execution engine that the node has fired. + *****************************************************************************/ +int +rf_GenericWakeupFunc(RF_DagNode_t *node, int status) { switch (node->status) { case rf_bwd1: node->status = rf_bwd2; if (node->dagFuncData) - rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData); + rf_FreeDiskQueueData((RF_DiskQueueData_t *) + node->dagFuncData); return (rf_DiskWriteFuncForThreads(node)); break; case rf_fired: @@ -441,7 +428,7 @@ rf_GenericWakeupFunc(node, status) node->status = rf_good; break; case rf_recover: - /* probably should never reach this case */ + /* Probably should never reach this case. */ if (status) node->status = rf_panic; else @@ -460,99 +447,111 @@ rf_GenericWakeupFunc(node, status) } -/***************************************************************************************** - * there are three distinct types of xor nodes - * A "regular xor" is used in the fault-free case where the access spans a complete - * stripe unit. It assumes that the result buffer is one full stripe unit in size, - * and uses the stripe-unit-offset values that it computes from the PDAs to determine - * where within the stripe unit to XOR each argument buffer. +/***************************************************************************** + * There are three distinct types of xor nodes. + * + * A "regular xor" is used in the fault-free case where the access spans + * a complete stripe unit. It assumes that the result buffer is one full + * stripe unit in size, and uses the stripe-unit-offset values that it + * computes from the PDAs to determine where within the stripe unit to + * XOR each argument buffer. * - * A "simple xor" is used in the fault-free case where the access touches only a portion - * of one (or two, in some cases) stripe unit(s). It assumes that all the argument - * buffers are of the same size and have the same stripe unit offset. + * A "simple xor" is used in the fault-free case where the access touches + * only a portion of one (or two, in some cases) stripe unit(s). It assumes + * that all the argument buffers are of the same size and have the same + * stripe unit offset. * - * A "recovery xor" is used in the degraded-mode case. It's similar to the regular - * xor function except that it takes the failed PDA as an additional parameter, and - * uses it to determine what portions of the argument buffers need to be xor'd into - * the result buffer, and where in the result buffer they should go. - ****************************************************************************************/ - -/* xor the params together and store the result in the result field. - * assume the result field points to a buffer that is the size of one SU, + * A "recovery xor" is used in the degraded-mode case. It's similar to + * the regular xor function except that it takes the failed PDA as an + * additional parameter, and uses it to determine what portions of the + * argument buffers need to be xor'd into the result buffer, and where + * in the result buffer they should go. + *****************************************************************************/ + +/* + * Xor the params together and store the result in the result field. + * Assume the result field points to a buffer that is the size of one SU, * and use the pda params to determine where within the buffer to XOR * the input buffers. */ -int -rf_RegularXorFunc(node) - RF_DagNode_t *node; +int +rf_RegularXorFunc(RF_DagNode_t *node) { RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; RF_Etimer_t timer; - int i, retcode; + int i, retcode; retcode = 0; if (node->dagHdr->status == rf_enable) { - /* don't do the XOR if the input is the same as the output */ + /* Don't do the XOR if the input is the same as the output. */ RF_ETIMER_START(timer); for (i = 0; i < node->numParams - 1; i += 2) if (node->params[i + 1].p != node->results[0]) { - retcode = rf_XorIntoBuffer(raidPtr, (RF_PhysDiskAddr_t *) node->params[i].p, - (char *) node->params[i + 1].p, (char *) node->results[0], node->dagHdr->bp); + retcode = rf_XorIntoBuffer(raidPtr, + (RF_PhysDiskAddr_t *) node->params[i].p, + (char *) node->params[i + 1].p, + (char *) node->results[0], + node->dagHdr->bp); } RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->xor_us += RF_ETIMER_VAL_US(timer); } - return (rf_GenericWakeupFunc(node, retcode)); /* call wake func - * explicitly since no - * I/O in this node */ + /* Call wake func explicitly since no I/O in this node. */ + return (rf_GenericWakeupFunc(node, retcode)); } -/* xor the inputs into the result buffer, ignoring placement issues */ -int -rf_SimpleXorFunc(node) - RF_DagNode_t *node; + +/* Xor the inputs into the result buffer, ignoring placement issues. */ +int +rf_SimpleXorFunc(RF_DagNode_t *node) { RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; - int i, retcode = 0; + int i, retcode = 0; RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; RF_Etimer_t timer; if (node->dagHdr->status == rf_enable) { RF_ETIMER_START(timer); - /* don't do the XOR if the input is the same as the output */ + /* Don't do the XOR if the input is the same as the output. */ for (i = 0; i < node->numParams - 1; i += 2) if (node->params[i + 1].p != node->results[0]) { - retcode = rf_bxor((char *) node->params[i + 1].p, (char *) node->results[0], - rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[i].p)->numSector), + retcode = rf_bxor((char *) + node->params[i + 1].p, + (char *) node->results[0], + rf_RaidAddressToByte(raidPtr, + ((RF_PhysDiskAddr_t *) + node->params[i].p)->numSector), (struct buf *) node->dagHdr->bp); } RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->xor_us += RF_ETIMER_VAL_US(timer); } - return (rf_GenericWakeupFunc(node, retcode)); /* call wake func - * explicitly since no - * I/O in this node */ + /* Call wake func explicitly since no I/O in this node. */ + return (rf_GenericWakeupFunc(node, retcode)); } -/* this xor is used by the degraded-mode dag functions to recover lost data. - * the second-to-last parameter is the PDA for the failed portion of the access. - * the code here looks at this PDA and assumes that the xor target buffer is - * equal in size to the number of sectors in the failed PDA. It then uses + +/* + * This xor is used by the degraded-mode dag functions to recover lost data. + * The second-to-last parameter is the PDA for the failed portion of the access. + * The code here looks at this PDA and assumes that the xor target buffer is + * equal in size to the number of sectors in the failed PDA. It then uses * the other PDAs in the parameter list to determine where within the target * buffer the corresponding data should be xored. */ -int -rf_RecoveryXorFunc(node) - RF_DagNode_t *node; +int +rf_RecoveryXorFunc(RF_DagNode_t *node) { RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; - RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; - int i, retcode = 0; + RF_PhysDiskAddr_t *failedPDA = + (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; + int i, retcode = 0; RF_PhysDiskAddr_t *pda; - int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); - char *srcbuf, *destbuf; + int suoffset, failedSUOffset = + rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); + char *srcbuf, *destbuf; RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; RF_Etimer_t timer; @@ -562,9 +561,14 @@ rf_RecoveryXorFunc(node) if (node->params[i + 1].p != node->results[0]) { pda = (RF_PhysDiskAddr_t *) node->params[i].p; srcbuf = (char *) node->params[i + 1].p; - suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); - destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); - retcode = rf_bxor(srcbuf, destbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), node->dagHdr->bp); + suoffset = rf_StripeUnitOffset(layoutPtr, + pda->startSector); + destbuf = ((char *) node->results[0]) + + rf_RaidAddressToByte(raidPtr, + suoffset - failedSUOffset); + retcode = rf_bxor(srcbuf, destbuf, + rf_RaidAddressToByte(raidPtr, + pda->numSector), node->dagHdr->bp); } RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); @@ -572,31 +576,30 @@ rf_RecoveryXorFunc(node) } return (rf_GenericWakeupFunc(node, retcode)); } -/***************************************************************************************** - * The next three functions are utilities used by the above xor-execution functions. - ****************************************************************************************/ +/***************************************************************************** + * The next three functions are utilities used by the above xor-execution + * functions. + *****************************************************************************/ + /* - * this is just a glorified buffer xor. targbuf points to a buffer that is one full stripe unit - * in size. srcbuf points to a buffer that may be less than 1 SU, but never more. When the - * access described by pda is one SU in size (which by implication means it's SU-aligned), - * all that happens is (targbuf) <- (srcbuf ^ targbuf). When the access is less than one - * SU in size the XOR occurs on only the portion of targbuf identified in the pda. + * This is just a glorified buffer xor. Targbuf points to a buffer that is + * one full stripe unit in size. srcbuf points to a buffer that may be less + * than 1 SU, but never more. When the access described by pda is one SU in + * size (which by implication means it's SU-aligned), all that happens is + * (targbuf) <- (srcbuf ^ targbuf). When the access is less than one SU in + * size the XOR occurs on only the portion of targbuf identified in the pda. */ -int -rf_XorIntoBuffer(raidPtr, pda, srcbuf, targbuf, bp) - RF_Raid_t *raidPtr; - RF_PhysDiskAddr_t *pda; - char *srcbuf; - char *targbuf; - void *bp; +int +rf_XorIntoBuffer(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda, char *srcbuf, + char *targbuf, void *bp) { - char *targptr; - int sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; - int SUOffset = pda->startSector % sectPerSU; - int length, retcode = 0; + char *targptr; + int sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; + int SUOffset = pda->startSector % sectPerSU; + int length, retcode = 0; RF_ASSERT(pda->numSector <= sectPerSU); @@ -605,48 +608,46 @@ rf_XorIntoBuffer(raidPtr, pda, srcbuf, targbuf, bp) retcode = rf_bxor(srcbuf, targptr, length, bp); return (retcode); } -/* it really should be the case that the buffer pointers (returned by malloc) + +/* + * It really should be the case that the buffer pointers (returned by malloc) * are aligned to the natural word size of the machine, so this is the only - * case we optimize for. The length should always be a multiple of the sector + * case we optimize for. The length should always be a multiple of the sector * size, so there should be no problem with leftover bytes at the end. */ -int -rf_bxor(src, dest, len, bp) - char *src; - char *dest; - int len; - void *bp; +int +rf_bxor(char *src, char *dest, int len, void *bp) { unsigned mask = sizeof(long) - 1, retcode = 0; - if (!(((unsigned long) src) & mask) && !(((unsigned long) dest) & mask) && !(len & mask)) { - retcode = rf_longword_bxor((unsigned long *) src, (unsigned long *) dest, len >> RF_LONGSHIFT, bp); + if (!(((unsigned long) src) & mask) && + !(((unsigned long) dest) & mask) && !(len & mask)) { + retcode = rf_longword_bxor((unsigned long *) src, + (unsigned long *) dest, len >> RF_LONGSHIFT, bp); } else { RF_ASSERT(0); } return (retcode); } -/* map a user buffer into kernel space, if necessary */ -#define REMAP_VA(_bp,x,y) (y) = (x) - -/* When XORing in kernel mode, we need to map each user page to kernel space before we can access it. - * We don't want to assume anything about which input buffers are in kernel/user - * space, nor about their alignment, so in each loop we compute the maximum number - * of bytes that we can xor without crossing any page boundaries, and do only this many - * bytes before the next remap. + +/* Map a user buffer into kernel space, if necessary. */ +#define REMAP_VA(_bp,x,y) (y) = (x) + +/* + * When XORing in kernel mode, we need to map each user page to kernel + * space before we can access it. + * We don't want to assume anything about which input buffers are in + * kernel/user space, nor about their alignment, so in each loop we + * compute the maximum number of bytes that we can xor without crossing + * any page boundaries, and do only this many bytes before the next remap. */ -int -rf_longword_bxor(src, dest, len, bp) - unsigned long *src; - unsigned long *dest; - int len; /* longwords */ - void *bp; +int +rf_longword_bxor(unsigned long *src, unsigned long *dest, int len, void *bp) { - unsigned long *end = src + len; - unsigned long d0, d1, d2, d3, s0, s1, s2, s3; /* temps */ - unsigned long *pg_src, *pg_dest; /* per-page source/dest - * pointers */ - int longs_this_time;/* # longwords to xor in the current iteration */ + unsigned long *end = src + len; /* len in longwords. */ + unsigned long d0, d1, d2, d3, s0, s1, s2, s3; /* temps */ + unsigned long *pg_src, *pg_dest; /* Per-page source/dest pointers. */ + int longs_this_time; /* # longwords to xor in the current iteration. */ REMAP_VA(bp, src, pg_src); REMAP_VA(bp, dest, pg_dest); @@ -654,7 +655,8 @@ rf_longword_bxor(src, dest, len, bp) return (EFAULT); while (len >= 4) { - longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(pg_src), RF_BLIP(pg_dest)) >> RF_LONGSHIFT); /* note len in longwords */ + longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(pg_src), + RF_BLIP(pg_dest)) >> RF_LONGSHIFT); src += longs_this_time; dest += longs_this_time; len -= longs_this_time; @@ -675,14 +677,16 @@ rf_longword_bxor(src, dest, len, bp) pg_dest += 4; longs_this_time -= 4; } - while (longs_this_time > 0) { /* cannot cross any page - * boundaries here */ + while (longs_this_time > 0) { + /* Cannot cross any page boundaries here. */ *pg_dest++ ^= *pg_src++; longs_this_time--; } - /* either we're done, or we've reached a page boundary on one - * (or possibly both) of the pointers */ + /* + * Either we're done, or we've reached a page boundary on one + * (or possibly both) of the pointers. + */ if (len) { if (RF_PAGE_ALIGNED(src)) REMAP_VA(bp, src, pg_src); @@ -708,24 +712,21 @@ rf_longword_bxor(src, dest, len, bp) /* - dst = a ^ b ^ c; - a may equal dst - see comment above longword_bxor -*/ -int -rf_longword_bxor3(dst, a, b, c, len, bp) - unsigned long *dst; - unsigned long *a; - unsigned long *b; - unsigned long *c; - int len; /* length in longwords */ - void *bp; + * dst = a ^ b ^ c; + * a may equal dst + * see comment above longword_bxor + */ +int +rf_longword_bxor3(unsigned long *dst, unsigned long *a, unsigned long *b, + unsigned long *c, int len, void *bp) { unsigned long a0, a1, a2, a3, b0, b1, b2, b3; - unsigned long *pg_a, *pg_b, *pg_c, *pg_dst; /* per-page source/dest - * pointers */ - int longs_this_time;/* # longs to xor in the current iteration */ - char dst_is_a = 0; + /* Per-page source/dest pointers. */ + unsigned long *pg_a, *pg_b, *pg_c, *pg_dst; + int longs_this_time; /* # longs to xor in the current iteration */ + char dst_is_a = 0; + + /* Note: The length (len) is in longwords. */ REMAP_VA(bp, a, pg_a); REMAP_VA(bp, b, pg_b); @@ -737,7 +738,7 @@ rf_longword_bxor3(dst, a, b, c, len, bp) REMAP_VA(bp, dst, pg_dst); } - /* align dest to cache line. Can't cross a pg boundary on dst here. */ + /* Align dest to cache line. Can't cross a pg boundary on dst here. */ while ((((unsigned long) pg_dst) & 0x1f)) { *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; dst++; @@ -763,7 +764,9 @@ rf_longword_bxor3(dst, a, b, c, len, bp) } while (len > 4) { - longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(a), RF_MIN(RF_BLIP(b), RF_MIN(RF_BLIP(c), RF_BLIP(dst)))) >> RF_LONGSHIFT); + longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(a), + RF_MIN(RF_BLIP(b), RF_MIN(RF_BLIP(c), RF_BLIP(dst)))) >> + RF_LONGSHIFT); a += longs_this_time; b += longs_this_time; c += longs_this_time; @@ -784,7 +787,7 @@ rf_longword_bxor3(dst, a, b, c, len, bp) b2 = pg_b[2]; b3 = pg_b[3]; - /* start dual issue */ + /* Start dual issue. */ a0 ^= b0; b0 = pg_c[0]; @@ -811,8 +814,8 @@ rf_longword_bxor3(dst, a, b, c, len, bp) pg_dst[3] = a3; pg_dst += 4; } - while (longs_this_time > 0) { /* cannot cross any page - * boundaries here */ + while (longs_this_time > 0) { + /* Cannot cross any page boundaries here. */ *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; longs_this_time--; } @@ -877,17 +880,14 @@ rf_longword_bxor3(dst, a, b, c, len, bp) return (0); } -int -rf_bxor3(dst, a, b, c, len, bp) - unsigned char *dst; - unsigned char *a; - unsigned char *b; - unsigned char *c; - unsigned long len; - void *bp; +int +rf_bxor3(unsigned char *dst, unsigned char *a, unsigned char *b, + unsigned char *c, unsigned long len, void *bp) { - RF_ASSERT(((RF_UL(dst) | RF_UL(a) | RF_UL(b) | RF_UL(c) | len) & 0x7) == 0); + RF_ASSERT(((RF_UL(dst) | RF_UL(a) | RF_UL(b) | RF_UL(c) | len) & 0x7) + == 0); return (rf_longword_bxor3((unsigned long *) dst, (unsigned long *) a, - (unsigned long *) b, (unsigned long *) c, len >> RF_LONGSHIFT, bp)); + (unsigned long *) b, (unsigned long *) c, len >> RF_LONGSHIFT, + bp)); } diff --git a/sys/dev/raidframe/rf_dagfuncs.h b/sys/dev/raidframe/rf_dagfuncs.h index 7114fd5f164..681d0fb5f4e 100644 --- a/sys/dev/raidframe/rf_dagfuncs.h +++ b/sys/dev/raidframe/rf_dagfuncs.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_dagfuncs.h,v 1.3 2000/08/08 16:07:39 peter Exp $ */ +/* $OpenBSD: rf_dagfuncs.h,v 1.4 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_dagfuncs.h,v 1.4 2000/03/30 13:39:07 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,49 +28,44 @@ * rights to redistribute these changes. */ -/***************************************************************************************** +/***************************************************************************** * - * dagfuncs.h -- header file for DAG node execution routines + * dagfuncs.h -- Header file for DAG node execution routines. * - ****************************************************************************************/ + *****************************************************************************/ -#ifndef _RF__RF_DAGFUNCS_H_ -#define _RF__RF_DAGFUNCS_H_ +#ifndef _RF__RF_DAGFUNCS_H_ +#define _RF__RF_DAGFUNCS_H_ -int rf_ConfigureDAGFuncs(RF_ShutdownList_t ** listp); -int rf_TerminateFunc(RF_DagNode_t * node); -int rf_TerminateUndoFunc(RF_DagNode_t * node); -int rf_DiskReadMirrorIdleFunc(RF_DagNode_t * node); -int rf_DiskReadMirrorPartitionFunc(RF_DagNode_t * node); -int rf_DiskReadMirrorUndoFunc(RF_DagNode_t * node); -int rf_ParityLogUpdateFunc(RF_DagNode_t * node); -int rf_ParityLogOverwriteFunc(RF_DagNode_t * node); -int rf_ParityLogUpdateUndoFunc(RF_DagNode_t * node); -int rf_ParityLogOverwriteUndoFunc(RF_DagNode_t * node); -int rf_NullNodeFunc(RF_DagNode_t * node); -int rf_NullNodeUndoFunc(RF_DagNode_t * node); -int rf_DiskReadFuncForThreads(RF_DagNode_t * node); -int rf_DiskWriteFuncForThreads(RF_DagNode_t * node); -int rf_DiskUndoFunc(RF_DagNode_t * node); -int rf_DiskUnlockFuncForThreads(RF_DagNode_t * node); -int rf_GenericWakeupFunc(RF_DagNode_t * node, int status); -int rf_RegularXorFunc(RF_DagNode_t * node); -int rf_SimpleXorFunc(RF_DagNode_t * node); -int rf_RecoveryXorFunc(RF_DagNode_t * node); -int -rf_XorIntoBuffer(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * pda, char *srcbuf, - char *targbuf, void *bp); -int rf_bxor(char *src, char *dest, int len, void *bp); -int -rf_longword_bxor(unsigned long *src, unsigned long *dest, int len, void *bp); -int -rf_longword_bxor3(unsigned long *dest, unsigned long *a, unsigned long *b, - unsigned long *c, int len, void *bp); -int -rf_bxor3(unsigned char *dst, unsigned char *a, unsigned char *b, - unsigned char *c, unsigned long len, void *bp); +int rf_ConfigureDAGFuncs(RF_ShutdownList_t **); +int rf_TerminateFunc(RF_DagNode_t *); +int rf_TerminateUndoFunc(RF_DagNode_t *); +int rf_DiskReadMirrorIdleFunc(RF_DagNode_t *); +int rf_DiskReadMirrorPartitionFunc(RF_DagNode_t *); +int rf_DiskReadMirrorUndoFunc(RF_DagNode_t *); +int rf_ParityLogUpdateFunc(RF_DagNode_t *); +int rf_ParityLogOverwriteFunc(RF_DagNode_t *); +int rf_ParityLogUpdateUndoFunc(RF_DagNode_t *); +int rf_ParityLogOverwriteUndoFunc(RF_DagNode_t *); +int rf_NullNodeFunc(RF_DagNode_t *); +int rf_NullNodeUndoFunc(RF_DagNode_t *); +int rf_DiskReadFuncForThreads(RF_DagNode_t *); +int rf_DiskWriteFuncForThreads(RF_DagNode_t *); +int rf_DiskUndoFunc(RF_DagNode_t *); +int rf_DiskUnlockFuncForThreads(RF_DagNode_t *); +int rf_GenericWakeupFunc(RF_DagNode_t *, int); +int rf_RegularXorFunc(RF_DagNode_t *); +int rf_SimpleXorFunc(RF_DagNode_t *); +int rf_RecoveryXorFunc(RF_DagNode_t *); +int rf_XorIntoBuffer(RF_Raid_t *, RF_PhysDiskAddr_t *, char *, char *, void *); +int rf_bxor(char *, char *, int, void *); +int rf_longword_bxor(unsigned long *, unsigned long *, int, void *); +int rf_longword_bxor3(unsigned long *, unsigned long *, unsigned long *, + unsigned long *, int, void *); +int rf_bxor3(unsigned char *, unsigned char *, unsigned char *, + unsigned char *, unsigned long, void *); -/* function ptrs defined in ConfigureDAGFuncs() */ +/* Function ptrs defined in ConfigureDAGFuncs(). */ extern int (*rf_DiskReadFunc) (RF_DagNode_t *); extern int (*rf_DiskWriteFunc) (RF_DagNode_t *); extern int (*rf_DiskReadUndoFunc) (RF_DagNode_t *); @@ -80,11 +76,17 @@ extern int (*rf_SimpleXorUndoFunc) (RF_DagNode_t *); extern int (*rf_RegularXorUndoFunc) (RF_DagNode_t *); extern int (*rf_RecoveryXorUndoFunc) (RF_DagNode_t *); -/* macros for manipulating the param[3] in a read or write node */ -#define RF_CREATE_PARAM3(pri, lk, unlk, wru) (((RF_uint64)(((wru&0xFFFFFF)<<8)|((lk)?0x10:0)|((unlk)?0x20:0)|((pri)&0xF)) )) -#define RF_EXTRACT_PRIORITY(_x_) ((((unsigned) ((unsigned long)(_x_))) >> 0) & 0x0F) -#define RF_EXTRACT_LOCK_FLAG(_x_) ((((unsigned) ((unsigned long)(_x_))) >> 4) & 0x1) -#define RF_EXTRACT_UNLOCK_FLAG(_x_) ((((unsigned) ((unsigned long)(_x_))) >> 5) & 0x1) -#define RF_EXTRACT_RU(_x_) ((((unsigned) ((unsigned long)(_x_))) >> 8) & 0xFFFFFF) +/* Macros for manipulating the param[3] in a read or write node. */ +#define RF_CREATE_PARAM3(pri,lk,unlk,wru) \ + (((RF_uint64) (((wru & 0xFFFFFF) << 8) | ((lk) ? 0x10 : 0) | \ + ((unlk) ? 0x20 : 0) | ((pri) & 0xF)))) +#define RF_EXTRACT_PRIORITY(_x_) \ + ((((unsigned) ((unsigned long)(_x_))) >> 0) & 0x0F) +#define RF_EXTRACT_LOCK_FLAG(_x_) \ + ((((unsigned) ((unsigned long)(_x_))) >> 4) & 0x1) +#define RF_EXTRACT_UNLOCK_FLAG(_x_) \ + ((((unsigned) ((unsigned long)(_x_))) >> 5) & 0x1) +#define RF_EXTRACT_RU(_x_) \ + ((((unsigned) ((unsigned long)(_x_))) >> 8) & 0xFFFFFF) -#endif /* !_RF__RF_DAGFUNCS_H_ */ +#endif /* !_RF__RF_DAGFUNCS_H_ */ diff --git a/sys/dev/raidframe/rf_dagutils.c b/sys/dev/raidframe/rf_dagutils.c index d2b0930be1d..78f04e8e1f0 100644 --- a/sys/dev/raidframe/rf_dagutils.c +++ b/sys/dev/raidframe/rf_dagutils.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_dagutils.c,v 1.3 2000/01/07 14:50:20 peter Exp $ */ +/* $OpenBSD: rf_dagutils.c,v 1.4 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_dagutils.c,v 1.6 1999/12/09 02:26:09 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,9 +28,9 @@ * rights to redistribute these changes. */ -/****************************************************************************** +/***************************************************************************** * - * rf_dagutils.c -- utility routines for manipulating dags + * rf_dagutils.c -- Utility routines for manipulating dags. * *****************************************************************************/ @@ -45,50 +46,49 @@ #include "rf_map.h" #include "rf_shutdown.h" -#define SNUM_DIFF(_a_,_b_) (((_a_)>(_b_))?((_a_)-(_b_)):((_b_)-(_a_))) +#define SNUM_DIFF(_a_,_b_) (((_a_)>(_b_))?((_a_)-(_b_)):((_b_)-(_a_))) RF_RedFuncs_t rf_xorFuncs = { - rf_RegularXorFunc, "Reg Xr", -rf_SimpleXorFunc, "Simple Xr"}; + rf_RegularXorFunc, "Reg Xr", rf_SimpleXorFunc, "Simple Xr" +}; RF_RedFuncs_t rf_xorRecoveryFuncs = { - rf_RecoveryXorFunc, "Recovery Xr", -rf_RecoveryXorFunc, "Recovery Xr"}; - -static void rf_RecurPrintDAG(RF_DagNode_t *, int, int); -static void rf_PrintDAG(RF_DagHeader_t *); -static int -rf_ValidateBranch(RF_DagNode_t *, int *, int *, - RF_DagNode_t **, int); -static void rf_ValidateBranchVisitedBits(RF_DagNode_t *, int, int); -static void rf_ValidateVisitedBits(RF_DagHeader_t *); - -/****************************************************************************** + rf_RecoveryXorFunc, "Recovery Xr", rf_RecoveryXorFunc, "Recovery Xr" +}; + +void rf_RecurPrintDAG(RF_DagNode_t *, int, int); +void rf_PrintDAG(RF_DagHeader_t *); +int rf_ValidateBranch(RF_DagNode_t *, int *, int *, RF_DagNode_t **, int); +void rf_ValidateBranchVisitedBits(RF_DagNode_t *, int, int); +void rf_ValidateVisitedBits(RF_DagHeader_t *); + +/***************************************************************************** * - * InitNode - initialize a dag node + * InitNode - Initialize a dag node. * - * the size of the propList array is always the same as that of the + * The size of the propList array is always the same as that of the * successors array. * *****************************************************************************/ -void +void rf_InitNode( - RF_DagNode_t * node, - RF_NodeStatus_t initstatus, - int commit, - int (*doFunc) (RF_DagNode_t * node), - int (*undoFunc) (RF_DagNode_t * node), - int (*wakeFunc) (RF_DagNode_t * node, int status), - int nSucc, - int nAnte, - int nParam, - int nResult, - RF_DagHeader_t * hdr, - char *name, - RF_AllocListElem_t * alist) + RF_DagNode_t *node, + RF_NodeStatus_t initstatus, + int commit, + int (*doFunc) (RF_DagNode_t *), + int (*undoFunc) (RF_DagNode_t *node), + int (*wakeFunc) (RF_DagNode_t *node, int), + int nSucc, + int nAnte, + int nParam, + int nResult, + RF_DagHeader_t *hdr, + char *name, + RF_AllocListElem_t *alist +) { - void **ptrs; - int nptrs; + void **ptrs; + int nptrs; if (nAnte > RF_MAX_ANTECEDENTS) RF_PANIC(); @@ -107,7 +107,7 @@ rf_InitNode( node->dagHdr = hdr; node->visited = 0; - /* allocate all the pointers with one call to malloc */ + /* Allocate all the pointers with one call to malloc. */ nptrs = nSucc + nAnte + nResult + nSucc; if (nptrs <= RF_DAG_PTRCACHESIZE) { @@ -115,12 +115,12 @@ rf_InitNode( * The dag_ptrs field of the node is basically some scribble * space to be used here. We could get rid of it, and always * allocate the range of pointers, but that's expensive. So, - * we pick a "common case" size for the pointer cache. Hopefully, - * we'll find that: + * we pick a "common case" size for the pointer cache. + * Hopefully, we'll find that: * (1) Generally, nptrs doesn't exceed RF_DAG_PTRCACHESIZE by - * only a little bit (least efficient case) - * (2) Generally, ntprs isn't a lot less than RF_DAG_PTRCACHESIZE - * (wasted memory) + * only a little bit (least efficient case). + * (2) Generally, ntprs isn't a lot less than + * RF_DAG_PTRCACHESIZE (wasted memory). */ ptrs = (void **) node->dag_ptrs; } else { @@ -129,13 +129,15 @@ rf_InitNode( node->succedents = (nSucc) ? (RF_DagNode_t **) ptrs : NULL; node->antecedents = (nAnte) ? (RF_DagNode_t **) (ptrs + nSucc) : NULL; node->results = (nResult) ? (void **) (ptrs + nSucc + nAnte) : NULL; - node->propList = (nSucc) ? (RF_PropHeader_t **) (ptrs + nSucc + nAnte + nResult) : NULL; + node->propList = (nSucc) ? (RF_PropHeader_t **) + (ptrs + nSucc + nAnte + nResult) : NULL; if (nParam) { if (nParam <= RF_DAG_PARAMCACHESIZE) { node->params = (RF_DagParam_t *) node->dag_params; } else { - RF_CallocAndAdd(node->params, nParam, sizeof(RF_DagParam_t), (RF_DagParam_t *), alist); + RF_CallocAndAdd(node->params, nParam, + sizeof(RF_DagParam_t), (RF_DagParam_t *), alist); } } else { node->params = NULL; @@ -144,38 +146,39 @@ rf_InitNode( -/****************************************************************************** +/***************************************************************************** * - * allocation and deallocation routines + * Allocation and deallocation routines. * *****************************************************************************/ -void -rf_FreeDAG(dag_h) - RF_DagHeader_t *dag_h; +void +rf_FreeDAG(RF_DagHeader_t *dag_h) { RF_AccessStripeMapHeader_t *asmap, *t_asmap; RF_DagHeader_t *nextDag; - int i; + int i; while (dag_h) { nextDag = dag_h->next; for (i = 0; dag_h->memChunk[i] && i < RF_MAXCHUNKS; i++) { - /* release mem chunks */ + /* Release mem chunks. */ rf_ReleaseMemChunk(dag_h->memChunk[i]); dag_h->memChunk[i] = NULL; } RF_ASSERT(i == dag_h->chunkIndex); if (dag_h->xtraChunkCnt > 0) { - /* free xtraMemChunks */ - for (i = 0; dag_h->xtraMemChunk[i] && i < dag_h->xtraChunkIndex; i++) { + /* Free xtraMemChunks. */ + for (i = 0; dag_h->xtraMemChunk[i] && + i < dag_h->xtraChunkIndex; i++) { rf_ReleaseMemChunk(dag_h->xtraMemChunk[i]); dag_h->xtraMemChunk[i] = NULL; } RF_ASSERT(i == dag_h->xtraChunkIndex); - /* free ptrs to xtraMemChunks */ - RF_Free(dag_h->xtraMemChunk, dag_h->xtraChunkCnt * sizeof(RF_ChunkDesc_t *)); + /* Free ptrs to xtraMemChunks. */ + RF_Free(dag_h->xtraMemChunk, dag_h->xtraChunkCnt * + sizeof(RF_ChunkDesc_t *)); } rf_FreeAllocList(dag_h->allocList); for (asmap = dag_h->asmList; asmap;) { @@ -189,17 +192,13 @@ rf_FreeDAG(dag_h) } RF_PropHeader_t * -rf_MakePropListEntry( - RF_DagHeader_t * dag_h, - int resultNum, - int paramNum, - RF_PropHeader_t * next, - RF_AllocListElem_t * allocList) +rf_MakePropListEntry(RF_DagHeader_t *dag_h, int resultNum, int paramNum, + RF_PropHeader_t *next, RF_AllocListElem_t *allocList) { RF_PropHeader_t *p; - RF_CallocAndAdd(p, 1, sizeof(RF_PropHeader_t), - (RF_PropHeader_t *), allocList); + RF_CallocAndAdd(p, 1, sizeof(RF_PropHeader_t), (RF_PropHeader_t *), + allocList); p->resultNum = resultNum; p->paramNum = paramNum; p->next = next; @@ -208,32 +207,30 @@ rf_MakePropListEntry( static RF_FreeList_t *rf_dagh_freelist; -#define RF_MAX_FREE_DAGH 128 -#define RF_DAGH_INC 16 -#define RF_DAGH_INITIAL 32 +#define RF_MAX_FREE_DAGH 128 +#define RF_DAGH_INC 16 +#define RF_DAGH_INITIAL 32 -static void rf_ShutdownDAGs(void *); -static void -rf_ShutdownDAGs(ignored) - void *ignored; +void rf_ShutdownDAGs(void *); +void +rf_ShutdownDAGs(void *ignored) { RF_FREELIST_DESTROY(rf_dagh_freelist, next, (RF_DagHeader_t *)); } -int -rf_ConfigureDAGs(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureDAGs(RF_ShutdownList_t **listp) { - int rc; + int rc; - RF_FREELIST_CREATE(rf_dagh_freelist, RF_MAX_FREE_DAGH, - RF_DAGH_INC, sizeof(RF_DagHeader_t)); + RF_FREELIST_CREATE(rf_dagh_freelist, RF_MAX_FREE_DAGH, RF_DAGH_INC, + sizeof(RF_DagHeader_t)); if (rf_dagh_freelist == NULL) return (ENOMEM); rc = rf_ShutdownCreate(listp, rf_ShutdownDAGs, NULL); if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); + RF_ERRORMSG3("Unable to add to shutdown list file %s line" + " %d rc=%d\n", __FILE__, __LINE__, rc); rf_ShutdownDAGs(NULL); return (rc); } @@ -243,7 +240,7 @@ rf_ConfigureDAGs(listp) } RF_DagHeader_t * -rf_AllocDAGHeader() +rf_AllocDAGHeader(void) { RF_DagHeader_t *dh; @@ -254,36 +251,37 @@ rf_AllocDAGHeader() return (dh); } -void -rf_FreeDAGHeader(RF_DagHeader_t * dh) +void +rf_FreeDAGHeader(RF_DagHeader_t *dh) { RF_FREELIST_FREE(rf_dagh_freelist, dh, next); } -/* allocates a buffer big enough to hold the data described by pda */ -void * -rf_AllocBuffer( - RF_Raid_t * raidPtr, - RF_DagHeader_t * dag_h, - RF_PhysDiskAddr_t * pda, - RF_AllocListElem_t * allocList) + +/* Allocate a buffer big enough to hold the data described by pda. */ +void * +rf_AllocBuffer(RF_Raid_t *raidPtr, RF_DagHeader_t *dag_h, + RF_PhysDiskAddr_t *pda, RF_AllocListElem_t *allocList) { - char *p; + char *p; RF_MallocAndAdd(p, pda->numSector << raidPtr->logBytesPerSector, (char *), allocList); return ((void *) p); } -/****************************************************************************** + + +/***************************************************************************** * - * debug routines + * Debug routines. * *****************************************************************************/ -char * -rf_NodeStatusString(RF_DagNode_t * node) +char * +rf_NodeStatusString(RF_DagNode_t *node) { switch (node->status) { - case rf_wait:return ("wait"); + case rf_wait: + return ("wait"); case rf_fired: return ("fired"); case rf_good: @@ -295,25 +293,25 @@ rf_NodeStatusString(RF_DagNode_t * node) } } -void -rf_PrintNodeInfoString(RF_DagNode_t * node) +void +rf_PrintNodeInfoString(RF_DagNode_t *node) { RF_PhysDiskAddr_t *pda; - int (*df) (RF_DagNode_t *) = node->doFunc; - int i, lk, unlk; - void *bufPtr; + int (*df) (RF_DagNode_t *) = node->doFunc; + int i, lk, unlk; + void *bufPtr; - if ((df == rf_DiskReadFunc) || (df == rf_DiskWriteFunc) - || (df == rf_DiskReadMirrorIdleFunc) - || (df == rf_DiskReadMirrorPartitionFunc)) { + if ((df == rf_DiskReadFunc) || (df == rf_DiskWriteFunc) || + (df == rf_DiskReadMirrorIdleFunc) || + (df == rf_DiskReadMirrorPartitionFunc)) { pda = (RF_PhysDiskAddr_t *) node->params[0].p; bufPtr = (void *) node->params[1].p; lk = RF_EXTRACT_LOCK_FLAG(node->params[3].v); unlk = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); RF_ASSERT(!(lk && unlk)); - printf("r %d c %d offs %ld nsect %d buf 0x%lx %s\n", pda->row, pda->col, - (long) pda->startSector, (int) pda->numSector, (long) bufPtr, - (lk) ? "LOCK" : ((unlk) ? "UNLK" : " ")); + printf("r %d c %d offs %ld nsect %d buf 0x%lx %s\n", pda->row, + pda->col, (long) pda->startSector, (int) pda->numSector, + (long) bufPtr, (lk) ? "LOCK" : ((unlk) ? "UNLK" : " ")); return; } if (df == rf_DiskUnlockFunc) { @@ -337,7 +335,7 @@ rf_PrintNodeInfoString(RF_DagNode_t * node) } return; } -#if RF_INCLUDE_PARITYLOGGING > 0 +#if RF_INCLUDE_PARITYLOGGING > 0 if (df == rf_ParityLogOverwriteFunc || df == rf_ParityLogUpdateFunc) { for (i = 0; i < node->numParams - 1; i += 2) { pda = (RF_PhysDiskAddr_t *) node->params[i].p; @@ -348,7 +346,7 @@ rf_PrintNodeInfoString(RF_DagNode_t * node) } return; } -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ +#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ if ((df == rf_TerminateFunc) || (df == rf_NullNodeFunc)) { printf("\n"); @@ -357,20 +355,19 @@ rf_PrintNodeInfoString(RF_DagNode_t * node) printf("?\n"); } -static void -rf_RecurPrintDAG(node, depth, unvisited) - RF_DagNode_t *node; - int depth; - int unvisited; +void +rf_RecurPrintDAG(RF_DagNode_t *node, int depth, int unvisited) { - char *anttype; - int i; + char *anttype; + int i; node->visited = (unvisited) ? 0 : 1; printf("(%d) %d C%d %s: %s,s%d %d/%d,a%d/%d,p%d,r%d S{", depth, - node->nodeNum, node->commitNode, node->name, rf_NodeStatusString(node), - node->numSuccedents, node->numSuccFired, node->numSuccDone, - node->numAntecedents, node->numAntDone, node->numParams, node->numResults); + node->nodeNum, node->commitNode, node->name, + rf_NodeStatusString(node), node->numSuccedents, + node->numSuccFired, node->numSuccDone, + node->numAntecedents, node->numAntDone, + node->numParams, node->numResults); for (i = 0; i < node->numSuccedents; i++) { printf("%d%s", node->succedents[i]->nodeNum, ((i == node->numSuccedents - 1) ? "\0" : " ")); @@ -394,24 +391,25 @@ rf_RecurPrintDAG(node, depth, unvisited) anttype = "?"; break; } - printf("%d(%s)%s", node->antecedents[i]->nodeNum, anttype, (i == node->numAntecedents - 1) ? "\0" : " "); + printf("%d(%s)%s", node->antecedents[i]->nodeNum, anttype, + (i == node->numAntecedents - 1) ? "\0" : " "); } printf("}; "); rf_PrintNodeInfoString(node); for (i = 0; i < node->numSuccedents; i++) { if (node->succedents[i]->visited == unvisited) - rf_RecurPrintDAG(node->succedents[i], depth + 1, unvisited); + rf_RecurPrintDAG(node->succedents[i], depth + 1, + unvisited); } } -static void -rf_PrintDAG(dag_h) - RF_DagHeader_t *dag_h; +void +rf_PrintDAG(RF_DagHeader_t *dag_h) { - int unvisited, i; - char *status; + int unvisited, i; + char *status; - /* set dag status */ + /* Set dag status. */ switch (dag_h->status) { case rf_enable: status = "enable"; @@ -423,16 +421,18 @@ rf_PrintDAG(dag_h) status = "rollBackward"; break; default: - status = "illegal!"; + status = "illegal !"; break; } - /* find out if visited bits are currently set or clear */ + /* Find out if visited bits are currently set or cleared. */ unvisited = dag_h->succedents[0]->visited; printf("DAG type: %s\n", dag_h->creator); - printf("format is (depth) num commit type: status,nSucc nSuccFired/nSuccDone,nAnte/nAnteDone,nParam,nResult S{x} A{x(type)}; info\n"); + printf("format is (depth) num commit type: status,nSucc nSuccFired/n" + "SuccDone,nAnte/nAnteDone,nParam,nResult S{x} A{x(type)}; info\n"); printf("(0) %d Hdr: %s, s%d, (commit %d/%d) S{", dag_h->nodeNum, - status, dag_h->numSuccedents, dag_h->numCommitNodes, dag_h->numCommits); + status, dag_h->numSuccedents, dag_h->numCommitNodes, + dag_h->numCommits); for (i = 0; i < dag_h->numSuccedents; i++) { printf("%d%s", dag_h->succedents[i]->nodeNum, ((i == dag_h->numSuccedents - 1) ? "\0" : " ")); @@ -443,11 +443,12 @@ rf_PrintDAG(dag_h) rf_RecurPrintDAG(dag_h->succedents[i], 1, unvisited); } } -/* assigns node numbers */ -int -rf_AssignNodeNums(RF_DagHeader_t * dag_h) + +/* Assign node numbers. */ +int +rf_AssignNodeNums(RF_DagHeader_t *dag_h) { - int unvisited, i, nnum; + int unvisited, i, nnum; RF_DagNode_t *node; nnum = 0; @@ -457,59 +458,58 @@ rf_AssignNodeNums(RF_DagHeader_t * dag_h) for (i = 0; i < dag_h->numSuccedents; i++) { node = dag_h->succedents[i]; if (node->visited == unvisited) { - nnum = rf_RecurAssignNodeNums(dag_h->succedents[i], nnum, unvisited); + nnum = rf_RecurAssignNodeNums(dag_h->succedents[i], + nnum, unvisited); } } return (nnum); } -int -rf_RecurAssignNodeNums(node, num, unvisited) - RF_DagNode_t *node; - int num; - int unvisited; +int +rf_RecurAssignNodeNums(RF_DagNode_t *node, int num, int unvisited) { - int i; + int i; node->visited = (unvisited) ? 0 : 1; node->nodeNum = num++; for (i = 0; i < node->numSuccedents; i++) { if (node->succedents[i]->visited == unvisited) { - num = rf_RecurAssignNodeNums(node->succedents[i], num, unvisited); + num = rf_RecurAssignNodeNums(node->succedents[i], + num, unvisited); } } return (num); } -/* set the header pointers in each node to "newptr" */ -void -rf_ResetDAGHeaderPointers(dag_h, newptr) - RF_DagHeader_t *dag_h; - RF_DagHeader_t *newptr; + +/* Set the header pointers in each node to "newptr". */ +void +rf_ResetDAGHeaderPointers(RF_DagHeader_t *dag_h, RF_DagHeader_t *newptr) { - int i; + int i; + for (i = 0; i < dag_h->numSuccedents; i++) if (dag_h->succedents[i]->dagHdr != newptr) - rf_RecurResetDAGHeaderPointers(dag_h->succedents[i], newptr); + rf_RecurResetDAGHeaderPointers(dag_h->succedents[i], + newptr); } -void -rf_RecurResetDAGHeaderPointers(node, newptr) - RF_DagNode_t *node; - RF_DagHeader_t *newptr; +void +rf_RecurResetDAGHeaderPointers(RF_DagNode_t *node, RF_DagHeader_t *newptr) { - int i; + int i; + node->dagHdr = newptr; for (i = 0; i < node->numSuccedents; i++) if (node->succedents[i]->dagHdr != newptr) - rf_RecurResetDAGHeaderPointers(node->succedents[i], newptr); + rf_RecurResetDAGHeaderPointers(node->succedents[i], + newptr); } - -void -rf_PrintDAGList(RF_DagHeader_t * dag_h) +void +rf_PrintDAGList(RF_DagHeader_t *dag_h) { - int i = 0; + int i = 0; for (; dag_h; dag_h = dag_h->next) { rf_AssignNodeNums(dag_h); @@ -518,53 +518,53 @@ rf_PrintDAGList(RF_DagHeader_t * dag_h) } } -static int -rf_ValidateBranch(node, scount, acount, nodes, unvisited) - RF_DagNode_t *node; - int *scount; - int *acount; - RF_DagNode_t **nodes; - int unvisited; +int +rf_ValidateBranch(RF_DagNode_t *node, int *scount, int *acount, + RF_DagNode_t **nodes, int unvisited) { - int i, retcode = 0; + int i, retcode = 0; - /* construct an array of node pointers indexed by node num */ + /* Construct an array of node pointers indexed by node num. */ node->visited = (unvisited) ? 0 : 1; nodes[node->nodeNum] = node; if (node->next != NULL) { - printf("INVALID DAG: next pointer in node is not NULL\n"); + printf("INVALID DAG: next pointer in node is not NULL.\n"); retcode = 1; } if (node->status != rf_wait) { - printf("INVALID DAG: Node status is not wait\n"); + printf("INVALID DAG: Node status is not wait.\n"); retcode = 1; } if (node->numAntDone != 0) { - printf("INVALID DAG: numAntDone is not zero\n"); + printf("INVALID DAG: numAntDone is not zero.\n"); retcode = 1; } if (node->doFunc == rf_TerminateFunc) { if (node->numSuccedents != 0) { - printf("INVALID DAG: Terminator node has succedents\n"); + printf("INVALID DAG: Terminator node has" + " succedents.\n"); retcode = 1; } } else { if (node->numSuccedents == 0) { - printf("INVALID DAG: Non-terminator node has no succedents\n"); + printf("INVALID DAG: Non-terminator node has no" + " succedents\n"); retcode = 1; } } for (i = 0; i < node->numSuccedents; i++) { if (!node->succedents[i]) { - printf("INVALID DAG: succedent %d of node %s is NULL\n", i, node->name); + printf("INVALID DAG: succedent %d of node %s" + " is NULL.\n", i, node->name); retcode = 1; } scount[node->succedents[i]->nodeNum]++; } for (i = 0; i < node->numAntecedents; i++) { if (!node->antecedents[i]) { - printf("INVALID DAG: antecedent %d of node %s is NULL\n", i, node->name); + printf("INVALID DAG: antecedent %d of node %s is" + " NULL.\n", i, node->name); retcode = 1; } acount[node->antecedents[i]->nodeNum]++; @@ -580,43 +580,46 @@ rf_ValidateBranch(node, scount, acount, nodes, unvisited) return (retcode); } -static void -rf_ValidateBranchVisitedBits(node, unvisited, rl) - RF_DagNode_t *node; - int unvisited; - int rl; +void +rf_ValidateBranchVisitedBits(RF_DagNode_t *node, int unvisited, int rl) { - int i; + int i; RF_ASSERT(node->visited == unvisited); for (i = 0; i < node->numSuccedents; i++) { if (node->succedents[i] == NULL) { - printf("node=%lx node->succedents[%d] is NULL\n", (long) node, i); + printf("node=%lx node->succedents[%d] is NULL.\n", + (long) node, i); RF_ASSERT(0); } - rf_ValidateBranchVisitedBits(node->succedents[i], unvisited, rl + 1); + rf_ValidateBranchVisitedBits(node->succedents[i], + unvisited, rl + 1); } } -/* NOTE: never call this on a big dag, because it is exponential - * in execution time + +/* + * NOTE: Never call this on a big dag, because it is exponential + * in execution time. */ -static void -rf_ValidateVisitedBits(dag) - RF_DagHeader_t *dag; +void +rf_ValidateVisitedBits(RF_DagHeader_t *dag) { - int i, unvisited; + int i, unvisited; unvisited = dag->succedents[0]->visited; for (i = 0; i < dag->numSuccedents; i++) { if (dag->succedents[i] == NULL) { - printf("dag=%lx dag->succedents[%d] is NULL\n", (long) dag, i); + printf("dag=%lx dag->succedents[%d] is NULL.\n", + (long) dag, i); RF_ASSERT(0); } rf_ValidateBranchVisitedBits(dag->succedents[i], unvisited, 0); } } -/* validate a DAG. _at entry_ verify that: + +/* + * Validate a DAG. _at entry_ verify that: * -- numNodesCompleted is zero * -- node queue is null * -- dag status is rf_enable @@ -630,40 +633,42 @@ rf_ValidateVisitedBits(dag) * is equal to the antecedent count on that node * -- number of times that each node appears as an antecedent of another node * is equal to the succedent count on that node - * -- what else? + * -- what else ? */ -int -rf_ValidateDAG(dag_h) - RF_DagHeader_t *dag_h; +int +rf_ValidateDAG(RF_DagHeader_t *dag_h) { - int i, nodecount; - int *scount, *acount;/* per-node successor and antecedent counts */ - RF_DagNode_t **nodes; /* array of ptrs to nodes in dag */ - int retcode = 0; - int unvisited; - int commitNodeCount = 0; + int i, nodecount; + int *scount, *acount; /* Per-node successor and antecedent counts. */ + RF_DagNode_t **nodes; /* Array of ptrs to nodes in dag. */ + int retcode = 0; + int unvisited; + int commitNodeCount = 0; if (rf_validateVisitedDebug) rf_ValidateVisitedBits(dag_h); if (dag_h->numNodesCompleted != 0) { - printf("INVALID DAG: num nodes completed is %d, should be 0\n", dag_h->numNodesCompleted); + printf("INVALID DAG: num nodes completed is %d, should be 0.\n", + dag_h->numNodesCompleted); retcode = 1; goto validate_dag_bad; } if (dag_h->status != rf_enable) { - printf("INVALID DAG: not enabled\n"); + printf("INVALID DAG: not enabled.\n"); retcode = 1; goto validate_dag_bad; } if (dag_h->numCommits != 0) { - printf("INVALID DAG: numCommits != 0 (%d)\n", dag_h->numCommits); + printf("INVALID DAG: numCommits != 0 (%d)\n", + dag_h->numCommits); retcode = 1; goto validate_dag_bad; } if (dag_h->numSuccedents != 1) { - /* currently, all dags must have only one succedent */ - printf("INVALID DAG: numSuccedents !1 (%d)\n", dag_h->numSuccedents); + /* Currently, all dags must have only one succedent. */ + printf("INVALID DAG: numSuccedents != 1 (%d).\n", + dag_h->numSuccedents); retcode = 1; goto validate_dag_bad; } @@ -681,40 +686,49 @@ rf_ValidateDAG(dag_h) retcode = 1; } } - /* start at 1 to skip the header node */ + /* Start at 1 to skip the header node. */ for (i = 1; i < nodecount; i++) { if (nodes[i]->commitNode) commitNodeCount++; if (nodes[i]->doFunc == NULL) { - printf("INVALID DAG: node %s has an undefined doFunc\n", nodes[i]->name); + printf("INVALID DAG: node %s has an undefined" + " doFunc.\n", nodes[i]->name); retcode = 1; goto validate_dag_out; } if (nodes[i]->undoFunc == NULL) { - printf("INVALID DAG: node %s has an undefined doFunc\n", nodes[i]->name); + printf("INVALID DAG: node %s has an undefined" + " doFunc.\n", nodes[i]->name); retcode = 1; goto validate_dag_out; } if (nodes[i]->numAntecedents != scount[nodes[i]->nodeNum]) { - printf("INVALID DAG: node %s has %d antecedents but appears as a succedent %d times\n", - nodes[i]->name, nodes[i]->numAntecedents, scount[nodes[i]->nodeNum]); + printf("INVALID DAG: node %s has %d antecedents but" + " appears as a succedent %d times.\n", + nodes[i]->name, nodes[i]->numAntecedents, + scount[nodes[i]->nodeNum]); retcode = 1; goto validate_dag_out; } if (nodes[i]->numSuccedents != acount[nodes[i]->nodeNum]) { - printf("INVALID DAG: node %s has %d succedents but appears as an antecedent %d times\n", - nodes[i]->name, nodes[i]->numSuccedents, acount[nodes[i]->nodeNum]); + printf("INVALID DAG: node %s has %d succedents but" + " appears as an antecedent %d times.\n", + nodes[i]->name, nodes[i]->numSuccedents, + acount[nodes[i]->nodeNum]); retcode = 1; goto validate_dag_out; } } if (dag_h->numCommitNodes != commitNodeCount) { - printf("INVALID DAG: incorrect commit node count. hdr->numCommitNodes (%d) found (%d) commit nodes in graph\n", + printf("INVALID DAG: incorrect commit node count. " + "hdr->numCommitNodes (%d) found (%d) commit nodes" + " in graph.\n", dag_h->numCommitNodes, commitNodeCount); retcode = 1; goto validate_dag_out; } + validate_dag_out: RF_Free(scount, nodecount * sizeof(int)); RF_Free(acount, nodecount * sizeof(int)); @@ -733,37 +747,37 @@ validate_dag_bad: } -/****************************************************************************** +/***************************************************************************** * - * misc construction routines + * Misc construction routines. * *****************************************************************************/ -void -rf_redirect_asm( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap) +void +rf_redirect_asm(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap) { - int ds = (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) ? 1 : 0; - int row = asmap->physInfo->row; - int fcol = raidPtr->reconControl[row]->fcol; - int srow = raidPtr->reconControl[row]->spareRow; - int scol = raidPtr->reconControl[row]->spareCol; + int ds = (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) ? 1 : 0; + int row = asmap->physInfo->row; + int fcol = raidPtr->reconControl[row]->fcol; + int srow = raidPtr->reconControl[row]->spareRow; + int scol = raidPtr->reconControl[row]->spareCol; RF_PhysDiskAddr_t *pda; RF_ASSERT(raidPtr->status[row] == rf_rs_reconstructing); for (pda = asmap->physInfo; pda; pda = pda->next) { if (pda->col == fcol) { if (rf_dagDebug) { - if (!rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, - pda->startSector)) { + if (!rf_CheckRUReconstructed( + raidPtr->reconControl[row]->reconMap, + pda->startSector)) { RF_PANIC(); } } - /* printf("Remapped data for large write\n"); */ + /*printf("Remapped data for large write\n");*/ if (ds) { - raidPtr->Layout.map->MapSector(raidPtr, pda->raidAddress, - &pda->row, &pda->col, &pda->startSector, RF_REMAP); + raidPtr->Layout.map->MapSector(raidPtr, + pda->raidAddress, &pda->row, &pda->col, + &pda->startSector, RF_REMAP); } else { pda->row = srow; pda->col = scol; @@ -773,13 +787,17 @@ rf_redirect_asm( for (pda = asmap->parityInfo; pda; pda = pda->next) { if (pda->col == fcol) { if (rf_dagDebug) { - if (!rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, pda->startSector)) { + if (!rf_CheckRUReconstructed( + raidPtr->reconControl[row]->reconMap, + pda->startSector)) { RF_PANIC(); } } } if (ds) { - (raidPtr->Layout.map->MapParity) (raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP); + (raidPtr->Layout.map->MapParity) (raidPtr, + pda->raidAddress, &pda->row, &pda->col, + &pda->startSector, RF_REMAP); } else { pda->row = srow; pda->col = scol; @@ -788,125 +806,147 @@ rf_redirect_asm( } -/* this routine allocates read buffers and generates stripe maps for the +/* + * This routine allocates read buffers and generates stripe maps for the * regions of the array from the start of the stripe to the start of the - * access, and from the end of the access to the end of the stripe. It also + * access, and from the end of the access to the end of the stripe. It also * computes and returns the number of DAG nodes needed to read all this data. * Note that this routine does the wrong thing if the access is fully * contained within one stripe unit, so we RF_ASSERT against this case at the * start. */ -void +void rf_MapUnaccessedPortionOfStripe( - RF_Raid_t * raidPtr, - RF_RaidLayout_t * layoutPtr,/* in: layout information */ - RF_AccessStripeMap_t * asmap, /* in: access stripe map */ - RF_DagHeader_t * dag_h, /* in: header of the dag to create */ - RF_AccessStripeMapHeader_t ** new_asm_h, /* in: ptr to array of 2 - * headers, to be filled in */ - int *nRodNodes, /* out: num nodes to be generated to read - * unaccessed data */ - char **sosBuffer, /* out: pointers to newly allocated buffer */ - char **eosBuffer, - RF_AllocListElem_t * allocList) + RF_Raid_t *raidPtr, + RF_RaidLayout_t *layoutPtr, /* in: layout information */ + RF_AccessStripeMap_t *asmap, /* in: access stripe map */ + RF_DagHeader_t *dag_h, /* in: header of the dag */ + /* to create */ + RF_AccessStripeMapHeader_t **new_asm_h, /* in: ptr to array of 2 */ + /* headers, to be */ + /* filled in */ + int *nRodNodes, /* out: num nodes to be */ + /* generated to read */ + /* unaccessed data */ + char **sosBuffer, /* out: pointers to newly */ + /* allocated buffer */ + char **eosBuffer, + RF_AllocListElem_t *allocList +) { RF_RaidAddr_t sosRaidAddress, eosRaidAddress; RF_SectorNum_t sosNumSector, eosNumSector; RF_ASSERT(asmap->numStripeUnitsAccessed > (layoutPtr->numDataCol / 2)); - /* generate an access map for the region of the array from start of - * stripe to start of access */ + /* + * Generate an access map for the region of the array from start of + * stripe to start of access. + */ new_asm_h[0] = new_asm_h[1] = NULL; *nRodNodes = 0; if (!rf_RaidAddressStripeAligned(layoutPtr, asmap->raidAddress)) { - sosRaidAddress = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); + sosRaidAddress = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, + asmap->raidAddress); sosNumSector = asmap->raidAddress - sosRaidAddress; - RF_MallocAndAdd(*sosBuffer, rf_RaidAddressToByte(raidPtr, sosNumSector), (char *), allocList); - new_asm_h[0] = rf_MapAccess(raidPtr, sosRaidAddress, sosNumSector, *sosBuffer, RF_DONT_REMAP); + RF_MallocAndAdd(*sosBuffer, rf_RaidAddressToByte(raidPtr, + sosNumSector), (char *), allocList); + new_asm_h[0] = rf_MapAccess(raidPtr, sosRaidAddress, + sosNumSector, *sosBuffer, RF_DONT_REMAP); new_asm_h[0]->next = dag_h->asmList; dag_h->asmList = new_asm_h[0]; *nRodNodes += new_asm_h[0]->stripeMap->numStripeUnitsAccessed; RF_ASSERT(new_asm_h[0]->stripeMap->next == NULL); - /* we're totally within one stripe here */ + /* We're totally within one stripe here. */ if (asmap->flags & RF_ASM_REDIR_LARGE_WRITE) rf_redirect_asm(raidPtr, new_asm_h[0]->stripeMap); } - /* generate an access map for the region of the array from end of - * access to end of stripe */ + /* + * Generate an access map for the region of the array from end of + * access to end of stripe. + */ if (!rf_RaidAddressStripeAligned(layoutPtr, asmap->endRaidAddress)) { eosRaidAddress = asmap->endRaidAddress; - eosNumSector = rf_RaidAddressOfNextStripeBoundary(layoutPtr, eosRaidAddress) - eosRaidAddress; - RF_MallocAndAdd(*eosBuffer, rf_RaidAddressToByte(raidPtr, eosNumSector), (char *), allocList); - new_asm_h[1] = rf_MapAccess(raidPtr, eosRaidAddress, eosNumSector, *eosBuffer, RF_DONT_REMAP); + eosNumSector = rf_RaidAddressOfNextStripeBoundary(layoutPtr, + eosRaidAddress) - eosRaidAddress; + RF_MallocAndAdd(*eosBuffer, rf_RaidAddressToByte(raidPtr, + eosNumSector), (char *), allocList); + new_asm_h[1] = rf_MapAccess(raidPtr, eosRaidAddress, + eosNumSector, *eosBuffer, RF_DONT_REMAP); new_asm_h[1]->next = dag_h->asmList; dag_h->asmList = new_asm_h[1]; *nRodNodes += new_asm_h[1]->stripeMap->numStripeUnitsAccessed; RF_ASSERT(new_asm_h[1]->stripeMap->next == NULL); - /* we're totally within one stripe here */ + /* We're totally within one stripe here. */ if (asmap->flags & RF_ASM_REDIR_LARGE_WRITE) rf_redirect_asm(raidPtr, new_asm_h[1]->stripeMap); } } - -/* returns non-zero if the indicated ranges of stripe unit offsets overlap */ -int -rf_PDAOverlap( - RF_RaidLayout_t * layoutPtr, - RF_PhysDiskAddr_t * src, - RF_PhysDiskAddr_t * dest) +/* Returns non-zero if the indicated ranges of stripe unit offsets overlap. */ +int +rf_PDAOverlap(RF_RaidLayout_t *layoutPtr, RF_PhysDiskAddr_t *src, + RF_PhysDiskAddr_t *dest) { - RF_SectorNum_t soffs = rf_StripeUnitOffset(layoutPtr, src->startSector); - RF_SectorNum_t doffs = rf_StripeUnitOffset(layoutPtr, dest->startSector); - /* use -1 to be sure we stay within SU */ - RF_SectorNum_t send = rf_StripeUnitOffset(layoutPtr, src->startSector + src->numSector - 1); - RF_SectorNum_t dend = rf_StripeUnitOffset(layoutPtr, dest->startSector + dest->numSector - 1); + RF_SectorNum_t soffs = + rf_StripeUnitOffset(layoutPtr, src->startSector); + RF_SectorNum_t doffs = + rf_StripeUnitOffset(layoutPtr, dest->startSector); + /* Use -1 to be sure we stay within SU. */ + RF_SectorNum_t send = + rf_StripeUnitOffset(layoutPtr, src->startSector + + src->numSector - 1); + RF_SectorNum_t dend = + rf_StripeUnitOffset(layoutPtr, dest->startSector + + dest->numSector - 1); + return ((RF_MAX(soffs, doffs) <= RF_MIN(send, dend)) ? 1 : 0); } -/* GenerateFailedAccessASMs +/* + * GenerateFailedAccessASMs * - * this routine figures out what portion of the stripe needs to be read - * to effect the degraded read or write operation. It's primary function + * This routine figures out what portion of the stripe needs to be read + * to effect the degraded read or write operation. It's primary function * is to identify everything required to recover the data, and then * eliminate anything that is already being accessed by the user. * * The main result is two new ASMs, one for the region from the start of the * stripe to the start of the access, and one for the region from the end of - * the access to the end of the stripe. These ASMs describe everything that - * needs to be read to effect the degraded access. Other results are: - * nXorBufs -- the total number of buffers that need to be XORed together to - * recover the lost data, - * rpBufPtr -- ptr to a newly-allocated buffer to hold the parity. If NULL + * the access to the end of the stripe. These ASMs describe everything that + * needs to be read to effect the degraded access. Other results are: + * nXorBufs -- The total number of buffers that need to be XORed together + * to recover the lost data, + * rpBufPtr -- Ptr to a newly-allocated buffer to hold the parity. If NULL * at entry, not allocated. * overlappingPDAs -- - * describes which of the non-failed PDAs in the user access + * Describes which of the non-failed PDAs, in the user access, * overlap data that needs to be read to effect recovery. * overlappingPDAs[i]==1 if and only if, neglecting the failed - * PDA, the ith pda in the input asm overlaps data that needs + * PDA, the i'th pda in the input asm overlaps data that needs * to be read for recovery. */ - /* in: asm - ASM for the actual access, one stripe only */ - /* in: faildPDA - which component of the access has failed */ - /* in: dag_h - header of the DAG we're going to create */ - /* out: new_asm_h - the two new ASMs */ - /* out: nXorBufs - the total number of xor bufs required */ - /* out: rpBufPtr - a buffer for the parity read */ -void + /* in: asmap - ASM for the actual access, one stripe only. */ + /* in: faildPDA - Which component of the access has failed. */ + /* in: dag_h - Header of the DAG we're going to create. */ + /* out: new_asm_h - The two new ASMs. */ + /* out: nXorBufs - The total number of xor bufs required. */ + /* out: rpBufPtr - A buffer for the parity read. */ +void rf_GenerateFailedAccessASMs( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_PhysDiskAddr_t * failedPDA, - RF_DagHeader_t * dag_h, - RF_AccessStripeMapHeader_t ** new_asm_h, - int *nXorBufs, - char **rpBufPtr, - char *overlappingPDAs, - RF_AllocListElem_t * allocList) + RF_Raid_t *raidPtr, + RF_AccessStripeMap_t *asmap, + RF_PhysDiskAddr_t *failedPDA, + RF_DagHeader_t *dag_h, + RF_AccessStripeMapHeader_t **new_asm_h, + int *nXorBufs, + char **rpBufPtr, + char *overlappingPDAs, + RF_AllocListElem_t *allocList +) { RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); @@ -915,89 +955,124 @@ rf_GenerateFailedAccessASMs( RF_SectorCount_t numSect[2], numParitySect; RF_PhysDiskAddr_t *pda; - char *rdBuf, *bufP; - int foundit, i; + char *rdBuf, *bufP; + int foundit, i; bufP = NULL; foundit = 0; - /* first compute the following raid addresses: start of stripe, - * (sosAddr) MIN(start of access, start of failed SU), (sosEndAddr) - * MAX(end of access, end of failed SU), (eosStartAddr) end of - * stripe (i.e. start of next stripe) (eosAddr) */ - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - sosEndAddr = RF_MIN(asmap->raidAddress, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, failedPDA->raidAddress)); - eosStartAddr = RF_MAX(asmap->endRaidAddress, rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, failedPDA->raidAddress)); - eosAddr = rf_RaidAddressOfNextStripeBoundary(layoutPtr, asmap->raidAddress); + /* + * First compute the following raid addresses: + * - Start of stripe + * - (sosAddr) MIN(start of access, start of failed SU) + * - (sosEndAddr) MAX(end of access, end of failed SU) + * - (eosStartAddr) end of stripe (i.e. start of next stripe) + * (eosAddr) + */ + sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, + asmap->raidAddress); + sosEndAddr = RF_MIN(asmap->raidAddress, + rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, + failedPDA->raidAddress)); + eosStartAddr = RF_MAX(asmap->endRaidAddress, + rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, + failedPDA->raidAddress)); + eosAddr = rf_RaidAddressOfNextStripeBoundary(layoutPtr, + asmap->raidAddress); - /* now generate access stripe maps for each of the above regions of - * the stripe. Use a dummy (NULL) buf ptr for now */ + /* + * Now generate access stripe maps for each of the above regions of + * the stripe. Use a dummy (NULL) buf ptr for now. + */ - new_asm_h[0] = (sosAddr != sosEndAddr) ? rf_MapAccess(raidPtr, sosAddr, sosEndAddr - sosAddr, NULL, RF_DONT_REMAP) : NULL; - new_asm_h[1] = (eosStartAddr != eosAddr) ? rf_MapAccess(raidPtr, eosStartAddr, eosAddr - eosStartAddr, NULL, RF_DONT_REMAP) : NULL; + new_asm_h[0] = (sosAddr != sosEndAddr) ? + rf_MapAccess(raidPtr, sosAddr, sosEndAddr - sosAddr, NULL, + RF_DONT_REMAP) : NULL; + new_asm_h[1] = (eosStartAddr != eosAddr) ? + rf_MapAccess(raidPtr, eosStartAddr, eosAddr - eosStartAddr, NULL, + RF_DONT_REMAP) : NULL; - /* walk through the PDAs and range-restrict each SU to the region of - * the SU touched on the failed PDA. also compute total data buffer - * space requirements in this step. Ignore the parity for now. */ + /* + * Walk through the PDAs and range-restrict each SU to the region of + * the SU touched on the failed PDA. Also compute total data buffer + * space requirements in this step. Ignore the parity for now. + */ numSect[0] = numSect[1] = 0; if (new_asm_h[0]) { new_asm_h[0]->next = dag_h->asmList; dag_h->asmList = new_asm_h[0]; - for (pda = new_asm_h[0]->stripeMap->physInfo; pda; pda = pda->next) { - rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_NOBUFFER, 0); + for (pda = new_asm_h[0]->stripeMap->physInfo; pda; + pda = pda->next) { + rf_RangeRestrictPDA(raidPtr, failedPDA, pda, + RF_RESTRICT_NOBUFFER, 0); numSect[0] += pda->numSector; } } if (new_asm_h[1]) { new_asm_h[1]->next = dag_h->asmList; dag_h->asmList = new_asm_h[1]; - for (pda = new_asm_h[1]->stripeMap->physInfo; pda; pda = pda->next) { - rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_NOBUFFER, 0); + for (pda = new_asm_h[1]->stripeMap->physInfo; + pda; pda = pda->next) { + rf_RangeRestrictPDA(raidPtr, failedPDA, pda, + RF_RESTRICT_NOBUFFER, 0); numSect[1] += pda->numSector; } } numParitySect = failedPDA->numSector; - /* allocate buffer space for the data & parity we have to read to - * recover from the failure */ - - if (numSect[0] + numSect[1] + ((rpBufPtr) ? numParitySect : 0)) { /* don't allocate parity - * buf if not needed */ - RF_MallocAndAdd(rdBuf, rf_RaidAddressToByte(raidPtr, numSect[0] + numSect[1] + numParitySect), (char *), allocList); + /* + * Allocate buffer space for the data & parity we have to read to + * recover from the failure. + */ + + if (numSect[0] + numSect[1] + ((rpBufPtr) ? numParitySect : 0)) { + /* Don't allocate parity buf if not needed. */ + RF_MallocAndAdd(rdBuf, rf_RaidAddressToByte(raidPtr, + numSect[0] + numSect[1] + numParitySect), (char *), + allocList); bufP = rdBuf; if (rf_degDagDebug) printf("Newly allocated buffer (%d bytes) is 0x%lx\n", - (int) rf_RaidAddressToByte(raidPtr, numSect[0] + numSect[1] + numParitySect), (unsigned long) bufP); + (int) rf_RaidAddressToByte(raidPtr, + numSect[0] + numSect[1] + numParitySect), + (unsigned long) bufP); } - /* now walk through the pdas one last time and assign buffer pointers - * (ugh!). Again, ignore the parity. also, count nodes to find out - * how many bufs need to be xored together */ - (*nXorBufs) = 1; /* in read case, 1 is for parity. In write - * case, 1 is for failed data */ + /* + * Now walk through the pdas one last time and assign buffer pointers + * (ugh!). Again, ignore the parity. Also, count nodes to find out + * how many bufs need to be xored together. + */ + (*nXorBufs) = 1; /* In read case, 1 is for parity. */ + /* In write case, 1 is for failed data. */ if (new_asm_h[0]) { - for (pda = new_asm_h[0]->stripeMap->physInfo; pda; pda = pda->next) { + for (pda = new_asm_h[0]->stripeMap->physInfo; pda; + pda = pda->next) { pda->bufPtr = bufP; bufP += rf_RaidAddressToByte(raidPtr, pda->numSector); } *nXorBufs += new_asm_h[0]->stripeMap->numStripeUnitsAccessed; } if (new_asm_h[1]) { - for (pda = new_asm_h[1]->stripeMap->physInfo; pda; pda = pda->next) { + for (pda = new_asm_h[1]->stripeMap->physInfo; pda; + pda = pda->next) { pda->bufPtr = bufP; bufP += rf_RaidAddressToByte(raidPtr, pda->numSector); } (*nXorBufs) += new_asm_h[1]->stripeMap->numStripeUnitsAccessed; } if (rpBufPtr) - *rpBufPtr = bufP; /* the rest of the buffer is for - * parity */ + /* The rest of the buffer is for parity. */ + *rpBufPtr = bufP; - /* the last step is to figure out how many more distinct buffers need - * to get xor'd to produce the missing unit. there's one for each + /* + * The last step is to figure out how many more distinct buffers need + * to get xor'd to produce the missing unit. there's one for each * user-data read node that overlaps the portion of the failed unit - * being accessed */ + * being accessed. + */ - for (foundit = i = 0, pda = asmap->physInfo; pda; i++, pda = pda->next) { + for (foundit = i = 0, pda = asmap->physInfo; + pda; i++, pda = pda->next) { if (pda == failedPDA) { i--; foundit = 1; @@ -1009,7 +1084,8 @@ rf_GenerateFailedAccessASMs( } } if (!foundit) { - RF_ERRORMSG("GenerateFailedAccessASMs: did not find failedPDA in asm list\n"); + RF_ERRORMSG("GenerateFailedAccessASMs: did not find failedPDA" + " in asm list.\n"); RF_ASSERT(0); } if (rf_degDagDebug) { @@ -1025,8 +1101,9 @@ rf_GenerateFailedAccessASMs( } -/* adjusts the offset and number of sectors in the destination pda so that - * it covers at most the region of the SU covered by the source PDA. This +/* + * Adjust the offset and number of sectors in the destination pda so that + * it covers at most the region of the SU covered by the source PDA. This * is exclusively a restriction: the number of sectors indicated by the * target PDA can only shrink. * @@ -1047,72 +1124,80 @@ rf_GenerateFailedAccessASMs( * | rrrrrrrrrrrrrrrr | * */ -void -rf_RangeRestrictPDA( - RF_Raid_t * raidPtr, - RF_PhysDiskAddr_t * src, - RF_PhysDiskAddr_t * dest, - int dobuffer, - int doraidaddr) +void +rf_RangeRestrictPDA(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *src, + RF_PhysDiskAddr_t *dest, int dobuffer, int doraidaddr) { RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_SectorNum_t soffs = rf_StripeUnitOffset(layoutPtr, src->startSector); - RF_SectorNum_t doffs = rf_StripeUnitOffset(layoutPtr, dest->startSector); - RF_SectorNum_t send = rf_StripeUnitOffset(layoutPtr, src->startSector + src->numSector - 1); /* use -1 to be sure we - * stay within SU */ - RF_SectorNum_t dend = rf_StripeUnitOffset(layoutPtr, dest->startSector + dest->numSector - 1); - RF_SectorNum_t subAddr = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, dest->startSector); /* stripe unit boundary */ + RF_SectorNum_t soffs = + rf_StripeUnitOffset(layoutPtr, src->startSector); + RF_SectorNum_t doffs = + rf_StripeUnitOffset(layoutPtr, dest->startSector); + RF_SectorNum_t send = + rf_StripeUnitOffset(layoutPtr, src->startSector + + src->numSector - 1); /* Use -1 to be sure we stay within SU. */ + RF_SectorNum_t dend = + rf_StripeUnitOffset(layoutPtr, dest->startSector + + dest->numSector - 1); + RF_SectorNum_t subAddr = + rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, + dest->startSector); /* Stripe unit boundary. */ dest->startSector = subAddr + RF_MAX(soffs, doffs); dest->numSector = subAddr + RF_MIN(send, dend) + 1 - dest->startSector; if (dobuffer) - dest->bufPtr += (soffs > doffs) ? rf_RaidAddressToByte(raidPtr, soffs - doffs) : 0; + dest->bufPtr += (soffs > doffs) ? + rf_RaidAddressToByte(raidPtr, soffs - doffs) : 0; if (doraidaddr) { - dest->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, dest->raidAddress) + + dest->raidAddress = + rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, + dest->raidAddress) + rf_StripeUnitOffset(layoutPtr, dest->startSector); } } + /* * Want the highest of these primes to be the largest one * less than the max expected number of columns (won't hurt * to be too small or too large, but won't be optimal, either) * --jimz */ -#define NLOWPRIMES 8 +#define NLOWPRIMES 8 static int lowprimes[NLOWPRIMES] = {2, 3, 5, 7, 11, 13, 17, 19}; + /***************************************************************************** - * compute the workload shift factor. (chained declustering) + * Compute the workload shift factor. (chained declustering) * - * return nonzero if access should shift to secondary, otherwise, - * access is to primary + * Return nonzero if access should shift to secondary, otherwise, + * access is to primary. *****************************************************************************/ -int -rf_compute_workload_shift( - RF_Raid_t * raidPtr, - RF_PhysDiskAddr_t * pda) +int +rf_compute_workload_shift(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda) { /* - * variables: - * d = column of disk containing primary - * f = column of failed disk - * n = number of disks in array - * sd = "shift distance" (number of columns that d is to the right of f) - * row = row of array the access is in - * v = numerator of redirection ratio - * k = denominator of redirection ratio + * Variables: + * d = Column of disk containing primary. + * f = Column of failed disk. + * n = Number of disks in array. + * sd = "shift distance" + * (number of columns that d is to the right of f). + * row = Row of array the access is in. + * v = Numerator of redirection ratio. + * k = Denominator of redirection ratio. */ RF_RowCol_t d, f, sd, row, n; - int k, v, ret, i; + int k, v, ret, i; row = pda->row; n = raidPtr->numCol; - /* assign column of primary copy to d */ + /* Assign column of primary copy to d. */ d = pda->col; - /* assign column of dead disk to f */ - for (f = 0; ((!RF_DEAD_DISK(raidPtr->Disks[row][f].status)) && (f < n)); f++); + /* Assign column of dead disk to f. */ + for (f = 0; ((!RF_DEAD_DISK(raidPtr->Disks[row][f].status)) && + (f < n)); f++); RF_ASSERT(f < n); RF_ASSERT(f != d); @@ -1121,7 +1206,7 @@ rf_compute_workload_shift( RF_ASSERT(sd < n); /* - * v of every k accesses should be redirected + * v of every k accesses should be redirected. * * v/k := (n-1-sd)/(n-1) */ @@ -1131,10 +1216,10 @@ rf_compute_workload_shift( #if 1 /* * XXX - * Is this worth it? + * Is this worth it ? * * Now reduce the fraction, by repeatedly factoring - * out primes (just like they teach in elementary school!) + * out primes (just like they teach in elementary school !). */ for (i = 0; i < NLOWPRIMES; i++) { if (lowprimes[i] > v) @@ -1148,9 +1233,9 @@ rf_compute_workload_shift( raidPtr->hist_diskreq[row][d]++; if (raidPtr->hist_diskreq[row][d] > v) { - ret = 0; /* do not redirect */ + ret = 0; /* Do not redirect. */ } else { - ret = 1; /* redirect */ + ret = 1; /* Redirect. */ } #if 0 @@ -1159,33 +1244,34 @@ rf_compute_workload_shift( #endif if (raidPtr->hist_diskreq[row][d] >= k) { - /* reset counter */ + /* Reset counter. */ raidPtr->hist_diskreq[row][d] = 0; } return (ret); } + /* - * Disk selection routines + * Disk selection routines. */ /* - * Selects the disk with the shortest queue from a mirror pair. + * Select the disk with the shortest queue from a mirror pair. * Both the disk I/Os queued in RAIDframe as well as those at the physical - * disk are counted as members of the "queue" + * disk are counted as members of the "queue". */ -void -rf_SelectMirrorDiskIdle(RF_DagNode_t * node) +void +rf_SelectMirrorDiskIdle(RF_DagNode_t *node) { RF_Raid_t *raidPtr = (RF_Raid_t *) node->dagHdr->raidPtr; RF_RowCol_t rowData, colData, rowMirror, colMirror; - int dataQueueLength, mirrorQueueLength, usemirror; + int dataQueueLength, mirrorQueueLength, usemirror; RF_PhysDiskAddr_t *data_pda = (RF_PhysDiskAddr_t *) node->params[0].p; RF_PhysDiskAddr_t *mirror_pda = (RF_PhysDiskAddr_t *) node->params[4].p; RF_PhysDiskAddr_t *tmp_pda; RF_RaidDisk_t **disks = raidPtr->Disks; RF_DiskQueue_t **dqs = raidPtr->Queues, *dataQueue, *mirrorQueue; - /* return the [row col] of the disk with the shortest queue */ + /* Return the [row col] of the disk with the shortest queue. */ rowData = data_pda->row; colData = data_pda->col; rowMirror = mirror_pda->row; @@ -1193,18 +1279,19 @@ rf_SelectMirrorDiskIdle(RF_DagNode_t * node) dataQueue = &(dqs[rowData][colData]); mirrorQueue = &(dqs[rowMirror][colMirror]); -#ifdef RF_LOCK_QUEUES_TO_READ_LEN +#ifdef RF_LOCK_QUEUES_TO_READ_LEN RF_LOCK_QUEUE_MUTEX(dataQueue, "SelectMirrorDiskIdle"); -#endif /* RF_LOCK_QUEUES_TO_READ_LEN */ +#endif /* RF_LOCK_QUEUES_TO_READ_LEN */ dataQueueLength = dataQueue->queueLength + dataQueue->numOutstanding; -#ifdef RF_LOCK_QUEUES_TO_READ_LEN +#ifdef RF_LOCK_QUEUES_TO_READ_LEN RF_UNLOCK_QUEUE_MUTEX(dataQueue, "SelectMirrorDiskIdle"); RF_LOCK_QUEUE_MUTEX(mirrorQueue, "SelectMirrorDiskIdle"); -#endif /* RF_LOCK_QUEUES_TO_READ_LEN */ - mirrorQueueLength = mirrorQueue->queueLength + mirrorQueue->numOutstanding; -#ifdef RF_LOCK_QUEUES_TO_READ_LEN +#endif /* RF_LOCK_QUEUES_TO_READ_LEN */ + mirrorQueueLength = mirrorQueue->queueLength + + mirrorQueue->numOutstanding; +#ifdef RF_LOCK_QUEUES_TO_READ_LEN RF_UNLOCK_QUEUE_MUTEX(mirrorQueue, "SelectMirrorDiskIdle"); -#endif /* RF_LOCK_QUEUES_TO_READ_LEN */ +#endif /* RF_LOCK_QUEUES_TO_READ_LEN */ usemirror = 0; if (RF_DEAD_DISK(disks[rowMirror][colMirror].status)) { @@ -1214,7 +1301,7 @@ rf_SelectMirrorDiskIdle(RF_DagNode_t * node) usemirror = 1; } else if (raidPtr->parity_good == RF_RAID_DIRTY) { - /* Trust only the main disk */ + /* Trust only the main disk. */ usemirror = 0; } else if (dataQueueLength < mirrorQueueLength) { @@ -1223,10 +1310,14 @@ rf_SelectMirrorDiskIdle(RF_DagNode_t * node) if (mirrorQueueLength < dataQueueLength) { usemirror = 1; } else { - /* queues are equal length. attempt - * cleverness. */ - if (SNUM_DIFF(dataQueue->last_deq_sector, data_pda->startSector) - <= SNUM_DIFF(mirrorQueue->last_deq_sector, mirror_pda->startSector)) { + /* Queues are equal length. */ + /* Attempt cleverness. */ + if (SNUM_DIFF(dataQueue + ->last_deq_sector, data_pda + ->startSector) <= + SNUM_DIFF(mirrorQueue + ->last_deq_sector, mirror_pda + ->startSector)) { usemirror = 0; } else { usemirror = 1; @@ -1234,22 +1325,23 @@ rf_SelectMirrorDiskIdle(RF_DagNode_t * node) } if (usemirror) { - /* use mirror (parity) disk, swap params 0 & 4 */ + /* Use mirror (parity) disk, swap params 0 & 4. */ tmp_pda = data_pda; node->params[0].p = mirror_pda; node->params[4].p = tmp_pda; } else { - /* use data disk, leave param 0 unchanged */ + /* Use data disk, leave param 0 unchanged. */ } - /* printf("dataQueueLength %d, mirrorQueueLength - * %d\n",dataQueueLength, mirrorQueueLength); */ + /*printf("dataQueueLength %d, mirrorQueueLength %d\n", dataQueueLength, + mirrorQueueLength);*/ } + /* * Do simple partitioning. This assumes that * the data and parity disks are laid out identically. */ -void -rf_SelectMirrorDiskPartition(RF_DagNode_t * node) +void +rf_SelectMirrorDiskPartition(RF_DagNode_t *node) { RF_Raid_t *raidPtr = (RF_Raid_t *) node->dagHdr->raidPtr; RF_RowCol_t rowData, colData, rowMirror, colMirror; @@ -1258,9 +1350,9 @@ rf_SelectMirrorDiskPartition(RF_DagNode_t * node) RF_PhysDiskAddr_t *tmp_pda; RF_RaidDisk_t **disks = raidPtr->Disks; RF_DiskQueue_t **dqs = raidPtr->Queues, *dataQueue, *mirrorQueue; - int usemirror; + int usemirror; - /* return the [row col] of the disk with the shortest queue */ + /* Return the [row col] of the disk with the shortest queue. */ rowData = data_pda->row; colData = data_pda->col; rowMirror = mirror_pda->row; @@ -1274,12 +1366,12 @@ rf_SelectMirrorDiskPartition(RF_DagNode_t * node) } else if (RF_DEAD_DISK(disks[rowData][colData].status)) { usemirror = 1; - } else + } else if (raidPtr->parity_good == RF_RAID_DIRTY) { - /* Trust only the main disk */ + /* Trust only the main disk. */ usemirror = 0; } else - if (data_pda->startSector < + if (data_pda->startSector < (disks[rowData][colData].numBlocks / 2)) { usemirror = 0; } else { @@ -1287,11 +1379,11 @@ rf_SelectMirrorDiskPartition(RF_DagNode_t * node) } if (usemirror) { - /* use mirror (parity) disk, swap params 0 & 4 */ + /* Use mirror (parity) disk, swap params 0 & 4. */ tmp_pda = data_pda; node->params[0].p = mirror_pda; node->params[4].p = tmp_pda; } else { - /* use data disk, leave param 0 unchanged */ + /* Use data disk, leave param 0 unchanged. */ } } diff --git a/sys/dev/raidframe/rf_dagutils.h b/sys/dev/raidframe/rf_dagutils.h index abd3fa8f520..2541363c2e6 100644 --- a/sys/dev/raidframe/rf_dagutils.h +++ b/sys/dev/raidframe/rf_dagutils.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_dagutils.h,v 1.2 1999/02/16 00:02:33 niklas Exp $ */ +/* $OpenBSD: rf_dagutils.h,v 1.3 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_dagutils.h,v 1.3 1999/02/05 00:06:08 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,95 +28,85 @@ * rights to redistribute these changes. */ -/************************************************************************* +/***************************************************************************** * - * rf_dagutils.h -- header file for utility routines for manipulating DAGs + * rf_dagutils.h -- Header file for utility routines for manipulating DAGs. * - *************************************************************************/ + *****************************************************************************/ #include "rf_types.h" #include "rf_dagfuncs.h" #include "rf_general.h" -#ifndef _RF__RF_DAGUTILS_H_ -#define _RF__RF_DAGUTILS_H_ +#ifndef _RF__RF_DAGUTILS_H_ +#define _RF__RF_DAGUTILS_H_ struct RF_RedFuncs_s { - int (*regular) (RF_DagNode_t *); - char *RegularName; - int (*simple) (RF_DagNode_t *); - char *SimpleName; + int (*regular) (RF_DagNode_t *); + char *RegularName; + int (*simple) (RF_DagNode_t *); + char *SimpleName; }; extern RF_RedFuncs_t rf_xorFuncs; extern RF_RedFuncs_t rf_xorRecoveryFuncs; -void -rf_InitNode(RF_DagNode_t * node, RF_NodeStatus_t initstatus, - int commit, - int (*doFunc) (RF_DagNode_t * node), - int (*undoFunc) (RF_DagNode_t * node), - int (*wakeFunc) (RF_DagNode_t * node, int status), - int nSucc, int nAnte, int nParam, int nResult, - RF_DagHeader_t * hdr, char *name, RF_AllocListElem_t * alist); +void rf_InitNode(RF_DagNode_t *, RF_NodeStatus_t, int, int (*) (RF_DagNode_t *), + int (*) (RF_DagNode_t *), int (*) (RF_DagNode_t *, int), int, int, int, + int, RF_DagHeader_t *, char *, RF_AllocListElem_t *); - void rf_FreeDAG(RF_DagHeader_t * dag_h); +void rf_FreeDAG(RF_DagHeader_t *); - RF_PropHeader_t *rf_MakePropListEntry(RF_DagHeader_t * dag_h, int resultNum, - int paramNum, RF_PropHeader_t * next, RF_AllocListElem_t * allocList); +RF_PropHeader_t *rf_MakePropListEntry(RF_DagHeader_t *, int, int, +RF_PropHeader_t *, RF_AllocListElem_t *); - int rf_ConfigureDAGs(RF_ShutdownList_t ** listp); +int rf_ConfigureDAGs(RF_ShutdownList_t **); - RF_DagHeader_t *rf_AllocDAGHeader(void); +RF_DagHeader_t *rf_AllocDAGHeader(void); - void rf_FreeDAGHeader(RF_DagHeader_t * dh); +void rf_FreeDAGHeader(RF_DagHeader_t *); - void *rf_AllocBuffer(RF_Raid_t * raidPtr, RF_DagHeader_t * dag_h, - RF_PhysDiskAddr_t * pda, RF_AllocListElem_t * allocList); +void *rf_AllocBuffer(RF_Raid_t *, RF_DagHeader_t *, RF_PhysDiskAddr_t *, + RF_AllocListElem_t *); - char *rf_NodeStatusString(RF_DagNode_t * node); +char *rf_NodeStatusString(RF_DagNode_t *); - void rf_PrintNodeInfoString(RF_DagNode_t * node); +void rf_PrintNodeInfoString(RF_DagNode_t *); - int rf_AssignNodeNums(RF_DagHeader_t * dag_h); +int rf_AssignNodeNums(RF_DagHeader_t *); - int rf_RecurAssignNodeNums(RF_DagNode_t * node, int num, int unvisited); +int rf_RecurAssignNodeNums(RF_DagNode_t *, int, int); - void rf_ResetDAGHeaderPointers(RF_DagHeader_t * dag_h, RF_DagHeader_t * newptr); +void rf_ResetDAGHeaderPointers(RF_DagHeader_t *, RF_DagHeader_t *); - void rf_RecurResetDAGHeaderPointers(RF_DagNode_t * node, RF_DagHeader_t * newptr); +void rf_RecurResetDAGHeaderPointers(RF_DagNode_t *, RF_DagHeader_t *); - void rf_PrintDAGList(RF_DagHeader_t * dag_h); +void rf_PrintDAGList(RF_DagHeader_t *); - int rf_ValidateDAG(RF_DagHeader_t * dag_h); +int rf_ValidateDAG(RF_DagHeader_t *); - void rf_redirect_asm(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap); +void rf_redirect_asm(RF_Raid_t *, RF_AccessStripeMap_t *); - void rf_MapUnaccessedPortionOfStripe(RF_Raid_t * raidPtr, - RF_RaidLayout_t * layoutPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, - RF_AccessStripeMapHeader_t ** new_asm_h, int *nRodNodes, char **sosBuffer, - char **eosBuffer, RF_AllocListElem_t * allocList); +void rf_MapUnaccessedPortionOfStripe(RF_Raid_t *, RF_RaidLayout_t *, + RF_AccessStripeMap_t *, RF_DagHeader_t *, RF_AccessStripeMapHeader_t **, + int *, char **, char **, RF_AllocListElem_t *); - int rf_PDAOverlap(RF_RaidLayout_t * layoutPtr, RF_PhysDiskAddr_t * src, - RF_PhysDiskAddr_t * dest); +int rf_PDAOverlap(RF_RaidLayout_t *, RF_PhysDiskAddr_t *, RF_PhysDiskAddr_t *); - void rf_GenerateFailedAccessASMs(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_PhysDiskAddr_t * failedPDA, - RF_DagHeader_t * dag_h, RF_AccessStripeMapHeader_t ** new_asm_h, - int *nXorBufs, char **rpBufPtr, char *overlappingPDAs, - RF_AllocListElem_t * allocList); +void rf_GenerateFailedAccessASMs(RF_Raid_t *, RF_AccessStripeMap_t *, + RF_PhysDiskAddr_t *, RF_DagHeader_t *, RF_AccessStripeMapHeader_t **, + int *, char **, char *, RF_AllocListElem_t *); -/* flags used by RangeRestrictPDA */ -#define RF_RESTRICT_NOBUFFER 0 -#define RF_RESTRICT_DOBUFFER 1 +/* Flags used by RangeRestrictPDA. */ +#define RF_RESTRICT_NOBUFFER 0 +#define RF_RESTRICT_DOBUFFER 1 - void rf_RangeRestrictPDA(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * src, - RF_PhysDiskAddr_t * dest, int dobuffer, int doraidaddr); +void rf_RangeRestrictPDA(RF_Raid_t *, RF_PhysDiskAddr_t *, RF_PhysDiskAddr_t *, + int, int); - int rf_compute_workload_shift(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * pda); - void rf_SelectMirrorDiskIdle(RF_DagNode_t * node); - void rf_SelectMirrorDiskPartition(RF_DagNode_t * node); +int rf_compute_workload_shift(RF_Raid_t *, RF_PhysDiskAddr_t *); +void rf_SelectMirrorDiskIdle(RF_DagNode_t *); +void rf_SelectMirrorDiskPartition(RF_DagNode_t *); -#endif /* !_RF__RF_DAGUTILS_H_ */ +#endif /* ! _RF__RF_DAGUTILS_H_ */ diff --git a/sys/dev/raidframe/rf_debugMem.c b/sys/dev/raidframe/rf_debugMem.c index 731986fee9e..c533aba562b 100644 --- a/sys/dev/raidframe/rf_debugMem.c +++ b/sys/dev/raidframe/rf_debugMem.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_debugMem.c,v 1.4 2000/01/11 18:02:21 peter Exp $ */ +/* $OpenBSD: rf_debugMem.c,v 1.5 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_debugMem.c,v 1.7 2000/01/07 03:40:59 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,18 +28,19 @@ * rights to redistribute these changes. */ -/* debugMem.c: memory usage debugging stuff. +/* + * debugMem.c: Memory usage debugging stuff. + * * Malloc, Calloc, and Free are #defined everywhere * to do_malloc, do_calloc, and do_free. * - * if RF_UTILITY is nonzero, it means were compiling one of the - * raidframe utility programs, such as rfctrl or smd. In this + * If RF_UTILITY is nonzero, it means we are compiling one of the + * RAIDframe utility programs, such as rfctrl or smd. In this * case, we eliminate all references to the threads package * and to the allocation list stuff. */ #include "rf_types.h" - #include "rf_threadstuff.h" #include "rf_options.h" #include "rf_debugMem.h" @@ -46,72 +48,74 @@ static long tot_mem_in_use = 0; -/* Hash table of information about memory allocations */ -#define RF_MH_TABLESIZE 1000 +/* Hash table of information about memory allocations. */ +#define RF_MH_TABLESIZE 1000 struct mh_struct { - void *address; - int size; - int line; - char *filen; - char allocated; + void *address; + int size; + int line; + char *filen; + char allocated; struct mh_struct *next; }; + static struct mh_struct *mh_table[RF_MH_TABLESIZE]; -RF_DECLARE_MUTEX(rf_debug_mem_mutex) - static int mh_table_initialized = 0; - static void memory_hash_insert(void *addr, int size, int line, char *filen); - static int memory_hash_remove(void *addr, int sz); +RF_DECLARE_MUTEX(rf_debug_mem_mutex); +static int mh_table_initialized = 0; + +void rf_memory_hash_insert(void *, int, int, char *); +int rf_memory_hash_remove(void *, int); -void -rf_record_malloc(p, size, line, filen) - void *p; - int size, line; - char *filen; +void +rf_record_malloc(void *p, int size, int line, char *filen) { RF_ASSERT(size != 0); - /* RF_LOCK_MUTEX(rf_debug_mem_mutex); */ - memory_hash_insert(p, size, line, filen); + /*RF_LOCK_MUTEX(rf_debug_mem_mutex);*/ + rf_memory_hash_insert(p, size, line, filen); tot_mem_in_use += size; - /* RF_UNLOCK_MUTEX(rf_debug_mem_mutex); */ + /*RF_UNLOCK_MUTEX(rf_debug_mem_mutex);*/ + if ((long) p == rf_memDebugAddress) { - printf("Allocate: debug address allocated from line %d file %s\n", line, filen); + printf("Allocate: debug address allocated from line %d file" + " %s\n", line, filen); } } -void -rf_unrecord_malloc(p, sz) - void *p; - int sz; +void +rf_unrecord_malloc(void *p, int sz) { - int size; + int size; - /* RF_LOCK_MUTEX(rf_debug_mem_mutex); */ - size = memory_hash_remove(p, sz); + /*RF_LOCK_MUTEX(rf_debug_mem_mutex);*/ + size = rf_memory_hash_remove(p, sz); tot_mem_in_use -= size; - /* RF_UNLOCK_MUTEX(rf_debug_mem_mutex); */ + /*RF_UNLOCK_MUTEX(rf_debug_mem_mutex);*/ if ((long) p == rf_memDebugAddress) { - printf("Free: Found debug address\n"); /* this is really only a - * flag line for gdb */ + /* This is really only a flag line for gdb. */ + printf("Free: Found debug address\n"); } } -void -rf_print_unfreed() +void +rf_print_unfreed(void) { - int i, foundone = 0; + int i, foundone = 0; struct mh_struct *p; for (i = 0; i < RF_MH_TABLESIZE; i++) { for (p = mh_table[i]; p; p = p->next) if (p->allocated) { if (!foundone) - printf("\n\nThere are unfreed memory locations at program shutdown:\n"); + printf("\n\nThere are unfreed" + " memory locations at" + " program shutdown:\n"); foundone = 1; printf("Addr 0x%lx Size %d line %d file %s\n", - (long) p->address, p->size, p->line, p->filen); + (long) p->address, p->size, p->line, + p->filen); } } if (tot_mem_in_use) { @@ -119,16 +123,15 @@ rf_print_unfreed() } } -int -rf_ConfigureDebugMem(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureDebugMem(RF_ShutdownList_t **listp) { - int i, rc; + int i, rc; rc = rf_create_managed_mutex(listp, &rf_debug_mem_mutex); if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", + __FILE__, __LINE__, rc); return (rc); } if (rf_memDebug) { @@ -138,20 +141,18 @@ rf_ConfigureDebugMem(listp) } return (0); } -#define HASHADDR(_a_) ( (((unsigned long) _a_)>>3) % RF_MH_TABLESIZE ) -static void -memory_hash_insert(addr, size, line, filen) - void *addr; - int size, line; - char *filen; +#define HASHADDR(_a_) ( (((unsigned long) _a_)>>3) % RF_MH_TABLESIZE ) + +void +rf_memory_hash_insert(void *addr, int size, int line, char *filen) { unsigned long bucket = HASHADDR(addr); struct mh_struct *p; RF_ASSERT(mh_table_initialized); - /* search for this address in the hash table */ + /* Search for this address in the hash table. */ for (p = mh_table[bucket]; p && (p->address != addr); p = p->next); if (!p) { RF_Malloc(p, sizeof(struct mh_struct), (struct mh_struct *)); @@ -162,8 +163,11 @@ memory_hash_insert(addr, size, line, filen) p->allocated = 0; } if (p->allocated) { - printf("ERROR: reallocated address 0x%lx from line %d, file %s without intervening free\n", (long) addr, line, filen); - printf(" last allocated from line %d file %s\n", p->line, p->filen); + printf("ERROR: Reallocated address 0x%lx from line %d," + " file %s without intervening free\n", (long) addr, + line, filen); + printf(" Last allocated from line %d file %s\n", + p->line, p->filen); RF_ASSERT(0); } p->size = size; @@ -172,10 +176,8 @@ memory_hash_insert(addr, size, line, filen) p->allocated = 1; } -static int -memory_hash_remove(addr, sz) - void *addr; - int sz; +int +rf_memory_hash_remove(void *addr, int sz) { unsigned long bucket = HASHADDR(addr); struct mh_struct *p; @@ -183,17 +185,24 @@ memory_hash_remove(addr, sz) RF_ASSERT(mh_table_initialized); for (p = mh_table[bucket]; p && (p->address != addr); p = p->next); if (!p) { - printf("ERROR: freeing never-allocated address 0x%lx\n", (long) addr); + printf("ERROR: Freeing never-allocated address 0x%lx\n", + (long) addr); RF_PANIC(); } if (!p->allocated) { - printf("ERROR: freeing unallocated address 0x%lx. Last allocation line %d file %s\n", (long) addr, p->line, p->filen); + printf("ERROR: Freeing unallocated address 0x%lx." + " Last allocation line %d file %s\n", (long) addr, + p->line, p->filen); RF_PANIC(); } - if (sz > 0 && p->size != sz) { /* you can suppress this error by - * using a negative value as the size - * to free */ - printf("ERROR: incorrect size at free for address 0x%lx: is %d should be %d. Alloc at line %d of file %s\n", (unsigned long) addr, sz, p->size, p->line, p->filen); + if (sz > 0 && p->size != sz) { + /* + * This error can be suppressed by using a negative value + * as the size to free. + */ + printf("ERROR: Incorrect size at free for address 0x%lx:" + " is %d should be %d. Alloc at line %d of file %s\n", + (unsigned long) addr, sz, p->size, p->line, p->filen); RF_PANIC(); } p->allocated = 0; diff --git a/sys/dev/raidframe/rf_debugMem.h b/sys/dev/raidframe/rf_debugMem.h index d189fb46224..a22b9dc5917 100644 --- a/sys/dev/raidframe/rf_debugMem.h +++ b/sys/dev/raidframe/rf_debugMem.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_debugMem.h,v 1.4 2000/01/07 14:50:20 peter Exp $ */ +/* $OpenBSD: rf_debugMem.h,v 1.5 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_debugMem.h,v 1.7 1999/09/05 01:58:11 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -28,57 +29,53 @@ */ /* - * rf_debugMem.h -- memory leak debugging module + * rf_debugMem.h -- Memory leak debugging module. * - * IMPORTANT: if you put the lock/unlock mutex stuff back in here, you - * need to take it out of the routines in debugMem.c + * IMPORTANT: If you put the lock/unlock mutex stuff back in here, you + * need to take it out of the routines in debugMem.c * */ -#ifndef _RF__RF_DEBUGMEM_H_ -#define _RF__RF_DEBUGMEM_H_ +#ifndef _RF__RF_DEBUGMEM_H_ +#define _RF__RF_DEBUGMEM_H_ #include "rf_alloclist.h" -#ifdef _KERNEL +#ifdef _KERNEL #include <sys/types.h> #include <sys/malloc.h> -#define RF_Malloc(_p_, _size_, _cast_) \ - { \ - _p_ = _cast_ malloc((u_long)_size_, M_RAIDFRAME, M_WAITOK); \ - bzero((char *)_p_, _size_); \ - if (rf_memDebug) rf_record_malloc(_p_, _size_, __LINE__, __FILE__); \ - } +#define RF_Malloc(_p_,_size_,_cast_) do { \ + _p_ = _cast_ malloc((u_long)_size_, M_RAIDFRAME, M_WAITOK); \ + bzero((char *)_p_, _size_); \ + if (rf_memDebug) \ + rf_record_malloc(_p_, _size_, __LINE__, __FILE__); \ +} while (0) -#define RF_MallocAndAdd(__p_, __size_, __cast_, __alist_) \ - { \ - RF_Malloc(__p_, __size_, __cast_); \ - if (__alist_) rf_AddToAllocList(__alist_, __p_, __size_); \ - } +#define RF_MallocAndAdd(__p_,__size_,__cast_,__alist_) do { \ + RF_Malloc(__p_, __size_, __cast_); \ + if (__alist_) rf_AddToAllocList(__alist_, __p_, __size_); \ +} while (0) -#define RF_Calloc(_p_, _nel_, _elsz_, _cast_) \ - { \ - RF_Malloc( _p_, (_nel_) * (_elsz_), _cast_); \ - } +#define RF_Calloc(_p_,_nel_,_elsz_,_cast_) \ + RF_Malloc( _p_, (_nel_) * (_elsz_), _cast_); -#define RF_CallocAndAdd(__p,__nel,__elsz,__cast,__alist) \ - { \ - RF_Calloc(__p, __nel, __elsz, __cast); \ - if (__alist) rf_AddToAllocList(__alist, __p, (__nel)*(__elsz)); \ - } +#define RF_CallocAndAdd(__p,__nel,__elsz,__cast,__alist) do { \ + RF_Calloc(__p, __nel, __elsz, __cast); \ + if (__alist) \ + rf_AddToAllocList(__alist, __p, (__nel)*(__elsz)); \ +} while (0) -#define RF_Free(_p_, _sz_) \ - { \ - free((void *)(_p_), M_RAIDFRAME); \ - if (rf_memDebug) rf_unrecord_malloc(_p_, (u_int32_t) (_sz_)); \ - } +#define RF_Free(_p_,_sz_) do { \ + free((void *)(_p_), M_RAIDFRAME); \ + if (rf_memDebug) rf_unrecord_malloc(_p_, (u_int32_t) (_sz_)); \ +} while (0) -#endif /* _KERNEL */ +#endif /* _KERNEL */ -void rf_record_malloc(void *p, int size, int line, char *filen); -void rf_unrecord_malloc(void *p, int sz); -void rf_print_unfreed(void); -int rf_ConfigureDebugMem(RF_ShutdownList_t ** listp); +void rf_record_malloc(void *, int, int, char *); +void rf_unrecord_malloc(void *, int); +void rf_print_unfreed(void); +int rf_ConfigureDebugMem(RF_ShutdownList_t **); -#endif /* !_RF__RF_DEBUGMEM_H_ */ +#endif /* ! _RF__RF_DEBUGMEM_H_ */ diff --git a/sys/dev/raidframe/rf_debugprint.c b/sys/dev/raidframe/rf_debugprint.c index f6546c135f8..93b0f7b1694 100644 --- a/sys/dev/raidframe/rf_debugprint.c +++ b/sys/dev/raidframe/rf_debugprint.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_debugprint.c,v 1.2 1999/02/16 00:02:34 niklas Exp $ */ +/* $OpenBSD: rf_debugprint.c,v 1.3 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_debugprint.c,v 1.3 1999/02/05 00:06:08 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -43,48 +44,50 @@ #include <sys/param.h> struct RF_Entry_s { - char *cstring; - void *a1, *a2, *a3, *a4, *a5, *a6, *a7, *a8; + char *cstring; + void *a1, *a2, *a3, *a4, *a5, *a6, *a7, *a8; }; + /* space for 1k lines */ -#define BUFSHIFT 10 -#define BUFSIZE (1<<BUFSHIFT) -#define BUFMASK (BUFSIZE-1) +#define BUFSHIFT 10 +#define BUFSIZE (1<<BUFSHIFT) +#define BUFMASK (BUFSIZE-1) static struct RF_Entry_s rf_debugprint_buf[BUFSIZE]; static int rf_debugprint_index = 0; -RF_DECLARE_STATIC_MUTEX(rf_debug_print_mutex) - int rf_ConfigureDebugPrint(listp) - RF_ShutdownList_t **listp; +RF_DECLARE_STATIC_MUTEX(rf_debug_print_mutex); + +int +rf_ConfigureDebugPrint(RF_ShutdownList_t **listp) { - int rc; + int rc; rc = rf_create_managed_mutex(listp, &rf_debug_print_mutex); if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", + __FILE__, __LINE__, rc); return (rc); } + rf_clear_debug_print_buffer(); return (0); } -void -rf_clear_debug_print_buffer() +void +rf_clear_debug_print_buffer(void) { - int i; + int i; for (i = 0; i < BUFSIZE; i++) rf_debugprint_buf[i].cstring = NULL; rf_debugprint_index = 0; } -void -rf_debug_printf(s, a1, a2, a3, a4, a5, a6, a7, a8) - char *s; - void *a1, *a2, *a3, *a4, *a5, *a6, *a7, *a8; +void +rf_debug_printf(char *s, void *a1, void *a2, void *a3, void *a4, void *a5, + void *a6, void *a7, void *a8) { - int idx; + int idx; if (rf_debugPrintUseBuffer) { @@ -107,28 +110,36 @@ rf_debug_printf(s, a1, a2, a3, a4, a5, a6, a7, a8) } } -void -rf_print_debug_buffer() +void +rf_print_debug_buffer(void) { rf_spill_debug_buffer(NULL); } -void -rf_spill_debug_buffer(fname) - char *fname; +void +rf_spill_debug_buffer(char *fname) { - int i; + int i; if (!rf_debugPrintUseBuffer) return; RF_LOCK_MUTEX(rf_debug_print_mutex); - for (i = rf_debugprint_index + 1; i != rf_debugprint_index; i = (i + 1) & BUFMASK) + for (i = rf_debugprint_index + 1; i != rf_debugprint_index; + i = (i + 1) & BUFMASK) if (rf_debugprint_buf[i].cstring) - printf(rf_debugprint_buf[i].cstring, rf_debugprint_buf[i].a1, rf_debugprint_buf[i].a2, rf_debugprint_buf[i].a3, - rf_debugprint_buf[i].a4, rf_debugprint_buf[i].a5, rf_debugprint_buf[i].a6, rf_debugprint_buf[i].a7, rf_debugprint_buf[i].a8); - printf(rf_debugprint_buf[i].cstring, rf_debugprint_buf[i].a1, rf_debugprint_buf[i].a2, rf_debugprint_buf[i].a3, - rf_debugprint_buf[i].a4, rf_debugprint_buf[i].a5, rf_debugprint_buf[i].a6, rf_debugprint_buf[i].a7, rf_debugprint_buf[i].a8); + printf(rf_debugprint_buf[i].cstring, + rf_debugprint_buf[i].a1, rf_debugprint_buf[i].a2, + rf_debugprint_buf[i].a3, rf_debugprint_buf[i].a4, + rf_debugprint_buf[i].a5, rf_debugprint_buf[i].a6, + rf_debugprint_buf[i].a7, rf_debugprint_buf[i].a8); + + printf(rf_debugprint_buf[i].cstring, + rf_debugprint_buf[i].a1, rf_debugprint_buf[i].a2, + rf_debugprint_buf[i].a3, rf_debugprint_buf[i].a4, + rf_debugprint_buf[i].a5, rf_debugprint_buf[i].a6, + rf_debugprint_buf[i].a7, rf_debugprint_buf[i].a8); + RF_UNLOCK_MUTEX(rf_debug_print_mutex); } diff --git a/sys/dev/raidframe/rf_debugprint.h b/sys/dev/raidframe/rf_debugprint.h index a0d1168f863..e03d1716ba3 100644 --- a/sys/dev/raidframe/rf_debugprint.h +++ b/sys/dev/raidframe/rf_debugprint.h @@ -1,8 +1,10 @@ -/* $OpenBSD: rf_debugprint.h,v 1.2 1999/02/16 00:02:34 niklas Exp $ */ +/* $OpenBSD: rf_debugprint.h,v 1.3 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_debugprint.h,v 1.3 1999/02/05 00:06:08 oster Exp $ */ + /* * rf_debugprint.h */ + /* * Copyright (c) 1996 Carnegie-Mellon University. * All rights reserved. @@ -30,15 +32,14 @@ * rights to redistribute these changes. */ -#ifndef _RF__RF_DEBUGPRINT_H_ -#define _RF__RF_DEBUGPRINT_H_ +#ifndef _RF__RF_DEBUGPRINT_H_ +#define _RF__RF_DEBUGPRINT_H_ -int rf_ConfigureDebugPrint(RF_ShutdownList_t ** listp); -void rf_clear_debug_print_buffer(void); -void -rf_debug_printf(char *s, void *a1, void *a2, void *a3, void *a4, - void *a5, void *a6, void *a7, void *a8); -void rf_print_debug_buffer(void); -void rf_spill_debug_buffer(char *fname); +int rf_ConfigureDebugPrint(RF_ShutdownList_t **); +void rf_clear_debug_print_buffer(void); +void rf_debug_printf(char *, void *, void *, void *, void *, void *, void *, + void *, void *); +void rf_print_debug_buffer(void); +void rf_spill_debug_buffer(char *); -#endif /* !_RF__RF_DEBUGPRINT_H_ */ +#endif /* ! _RF__RF_DEBUGPRINT_H_ */ diff --git a/sys/dev/raidframe/rf_decluster.c b/sys/dev/raidframe/rf_decluster.c index 3cae0d27f59..3568a03f4a7 100644 --- a/sys/dev/raidframe/rf_decluster.c +++ b/sys/dev/raidframe/rf_decluster.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_decluster.c,v 1.4 2000/08/08 16:07:40 peter Exp $ */ +/* $OpenBSD: rf_decluster.c,v 1.5 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_decluster.c,v 1.5 2000/03/07 01:54:29 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,26 +28,26 @@ * rights to redistribute these changes. */ -/*---------------------------------------------------------------------- +/***************************************************************************** * - * rf_decluster.c -- code related to the declustered layout + * rf_decluster.c -- Code related to the declustered layout. * * Created 10-21-92 (MCH) * - * Nov 93: adding support for distributed sparing. This code is a little - * complex: the basic layout used is as follows: - * let F = (v-1)/GCD(r,v-1). The spare space for each set of - * F consecutive fulltables is grouped together and placed after - * that set of tables. - * +------------------------------+ - * | F fulltables | - * | Spare Space | - * | F fulltables | - * | Spare Space | - * | ... | - * +------------------------------+ + * Nov 93: Adding support for distributed sparing. This code is a little + * complex; the basic layout used is as follows: + * Let F = (v-1)/GCD(r,v-1). The spare space for each set of + * F consecutive fulltables is grouped together and placed after + * that set of tables. + * +-------------------------------+ + * | F fulltables | + * | Spare Space | + * | F fulltables | + * | Spare Space | + * | ... | + * +-------------------------------+ * - *--------------------------------------------------------------------*/ + *****************************************************************************/ #include "rf_types.h" #include "rf_raid.h" @@ -59,46 +60,45 @@ #include "rf_general.h" #include "rf_shutdown.h" -extern int rf_copyback_in_progress; /* debug only */ +extern int rf_copyback_in_progress; /* Debug only. */ -/* found in rf_kintf.c */ -int rf_GetSpareTableFromDaemon(RF_SparetWait_t * req); +/* Found in rf_kintf.c */ +int rf_GetSpareTableFromDaemon(RF_SparetWait_t *); -/* configuration code */ +/* Configuration code. */ -int -rf_ConfigureDeclustered( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) +int +rf_ConfigureDeclustered(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, + RF_Config_t *cfgPtr) { RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - int b, v, k, r, lambda; /* block design params */ - int i, j; + int b, v, k, r, lambda; /* block design params */ + int i, j; RF_RowCol_t *first_avail_slot; RF_StripeCount_t complete_FT_count, numCompleteFullTablesPerDisk; RF_DeclusteredConfigInfo_t *info; - RF_StripeCount_t PUsPerDisk, spareRegionDepthInPUs, numCompleteSpareRegionsPerDisk, - extraPUsPerDisk; + RF_StripeCount_t PUsPerDisk, spareRegionDepthInPUs, + numCompleteSpareRegionsPerDisk, extraPUsPerDisk; RF_StripeCount_t totSparePUsPerDisk; RF_SectorNum_t diskOffsetOfLastFullTableInSUs; RF_SectorCount_t SpareSpaceInSUs; - char *cfgBuf = (char *) (cfgPtr->layoutSpecific); + char *cfgBuf = (char *) (cfgPtr->layoutSpecific); RF_StripeNum_t l, SUID; SUID = l = 0; numCompleteSpareRegionsPerDisk = 0; - /* 1. create layout specific structure */ - RF_MallocAndAdd(info, sizeof(RF_DeclusteredConfigInfo_t), (RF_DeclusteredConfigInfo_t *), raidPtr->cleanupList); + /* 1. Create layout specific structure. */ + RF_MallocAndAdd(info, sizeof(RF_DeclusteredConfigInfo_t), + (RF_DeclusteredConfigInfo_t *), raidPtr->cleanupList); if (info == NULL) return (ENOMEM); layoutPtr->layoutSpecificInfo = (void *) info; info->SpareTable = NULL; - /* 2. extract parameters from the config structure */ + /* 2. Extract parameters from the config structure. */ if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) { - (void) bcopy(cfgBuf, info->sparemap_fname, RF_SPAREMAP_NAME_LEN); + bcopy(cfgBuf, info->sparemap_fname, RF_SPAREMAP_NAME_LEN); } cfgBuf += RF_SPAREMAP_NAME_LEN; @@ -115,132 +115,181 @@ rf_ConfigureDeclustered( raidPtr->noRotate = *((int *) cfgBuf); cfgBuf += sizeof(int); - /* the sparemaps are generated assuming that parity is rotated, so we + /* + * The sparemaps are generated assuming that parity is rotated, so we * issue a warning if both distributed sparing and no-rotate are on at - * the same time */ - if ((layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) && raidPtr->noRotate) { - RF_ERRORMSG("Warning: distributed sparing specified without parity rotation.\n"); + * the same time. + */ + if ((layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) && + raidPtr->noRotate) { + RF_ERRORMSG("Warning: distributed sparing specified without" + " parity rotation.\n"); } if (raidPtr->numCol != v) { - RF_ERRORMSG2("RAID: config error: table element count (%d) not equal to no. of cols (%d)\n", v, raidPtr->numCol); + RF_ERRORMSG2("RAID: config error: table element count (%d)" + " not equal to no. of cols (%d).\n", v, raidPtr->numCol); return (EINVAL); } - /* 3. set up the values used in the mapping code */ + /* 3. Set up the values used in the mapping code. */ info->BlocksPerTable = b; info->Lambda = lambda; info->NumParityReps = info->groupSize = k; - info->SUsPerTable = b * (k - 1) * layoutPtr->SUsPerPU; /* b blks, k-1 SUs each */ + /* b blks, k-1 SUs each. */ + info->SUsPerTable = b * (k - 1) * layoutPtr->SUsPerPU; info->SUsPerFullTable = k * info->SUsPerTable; /* rot k times */ info->PUsPerBlock = k - 1; info->SUsPerBlock = info->PUsPerBlock * layoutPtr->SUsPerPU; info->TableDepthInPUs = (b * k) / v; - info->FullTableDepthInPUs = info->TableDepthInPUs * k; /* k repetitions */ + /* k repetitions. */ + info->FullTableDepthInPUs = info->TableDepthInPUs * k; - /* used only in distributed sparing case */ - info->FullTablesPerSpareRegion = (v - 1) / rf_gcd(r, v - 1); /* (v-1)/gcd fulltables */ + /* Used only in distributed sparing case. */ + /* (v-1)/gcd fulltables. */ + info->FullTablesPerSpareRegion = (v - 1) / rf_gcd(r, v - 1); info->TablesPerSpareRegion = k * info->FullTablesPerSpareRegion; - info->SpareSpaceDepthPerRegionInSUs = (r * info->TablesPerSpareRegion / (v - 1)) * layoutPtr->SUsPerPU; + info->SpareSpaceDepthPerRegionInSUs = (r * info->TablesPerSpareRegion / + (v - 1)) * layoutPtr->SUsPerPU; - /* check to make sure the block design is sufficiently small */ + /* Check to make sure the block design is sufficiently small. */ if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - if (info->FullTableDepthInPUs * layoutPtr->SUsPerPU + info->SpareSpaceDepthPerRegionInSUs > layoutPtr->stripeUnitsPerDisk) { - RF_ERRORMSG3("RAID: config error: Full Table depth (%d) + Spare Space (%d) larger than disk size (%d) (BD too big)\n", + if (info->FullTableDepthInPUs * layoutPtr->SUsPerPU + + info->SpareSpaceDepthPerRegionInSUs > + layoutPtr->stripeUnitsPerDisk) { + RF_ERRORMSG3("RAID: config error: Full Table depth" + " (%d) + Spare Space (%d) larger than disk size" + " (%d) (BD too big).\n", (int) info->FullTableDepthInPUs, (int) info->SpareSpaceDepthPerRegionInSUs, (int) layoutPtr->stripeUnitsPerDisk); return (EINVAL); } } else { - if (info->TableDepthInPUs * layoutPtr->SUsPerPU > layoutPtr->stripeUnitsPerDisk) { - RF_ERRORMSG2("RAID: config error: Table depth (%d) larger than disk size (%d) (BD too big)\n", - (int) (info->TableDepthInPUs * layoutPtr->SUsPerPU), \ + if (info->TableDepthInPUs * layoutPtr->SUsPerPU > + layoutPtr->stripeUnitsPerDisk) { + RF_ERRORMSG2("RAID: config error: Table depth (%d)" + " larger than disk size (%d) (BD too big).\n", + (int) (info->TableDepthInPUs * layoutPtr->SUsPerPU), (int) layoutPtr->stripeUnitsPerDisk); return (EINVAL); } } - /* compute the size of each disk, and the number of tables in the last - * fulltable (which need not be complete) */ + /* + * Compute the size of each disk, and the number of tables in the last + * fulltable (which need not be complete). + */ if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { - PUsPerDisk = layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU; - spareRegionDepthInPUs = (info->TablesPerSpareRegion * info->TableDepthInPUs + - (info->TablesPerSpareRegion * info->TableDepthInPUs) / (v - 1)); - info->SpareRegionDepthInSUs = spareRegionDepthInPUs * layoutPtr->SUsPerPU; - - numCompleteSpareRegionsPerDisk = PUsPerDisk / spareRegionDepthInPUs; + PUsPerDisk = layoutPtr->stripeUnitsPerDisk / + layoutPtr->SUsPerPU; + spareRegionDepthInPUs = + (info->TablesPerSpareRegion * info->TableDepthInPUs + + (info->TablesPerSpareRegion * info->TableDepthInPUs) / + (v - 1)); + info->SpareRegionDepthInSUs = + spareRegionDepthInPUs * layoutPtr->SUsPerPU; + + numCompleteSpareRegionsPerDisk = + PUsPerDisk / spareRegionDepthInPUs; info->NumCompleteSRs = numCompleteSpareRegionsPerDisk; extraPUsPerDisk = PUsPerDisk % spareRegionDepthInPUs; - /* assume conservatively that we need the full amount of spare + /* + * Assume conservatively that we need the full amount of spare * space in one region in order to provide spares for the - * partial spare region at the end of the array. We set "i" - * to the number of tables in the partial spare region. This - * may actually include some fulltables. */ - extraPUsPerDisk -= (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU); + * partial spare region at the end of the array. We set "i" + * to the number of tables in the partial spare region. This + * may actually include some fulltables. + */ + extraPUsPerDisk -= (info->SpareSpaceDepthPerRegionInSUs / + layoutPtr->SUsPerPU); if (extraPUsPerDisk <= 0) i = 0; else i = extraPUsPerDisk / info->TableDepthInPUs; - complete_FT_count = raidPtr->numRow * (numCompleteSpareRegionsPerDisk * (info->TablesPerSpareRegion / k) + i / k); - info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable; + complete_FT_count = raidPtr->numRow * + (numCompleteSpareRegionsPerDisk * + (info->TablesPerSpareRegion / k) + i / k); + info->FullTableLimitSUID = + complete_FT_count * info->SUsPerFullTable; info->ExtraTablesPerDisk = i % k; - /* note that in the last spare region, the spare space is - * complete even though data/parity space is not */ - totSparePUsPerDisk = (numCompleteSpareRegionsPerDisk + 1) * (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU); + /* + * Note that in the last spare region, the spare space is + * complete even though data/parity space is not. + */ + totSparePUsPerDisk = (numCompleteSpareRegionsPerDisk + 1) * + (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU); info->TotSparePUsPerDisk = totSparePUsPerDisk; layoutPtr->stripeUnitsPerDisk = - ((complete_FT_count / raidPtr->numRow) * info->FullTableDepthInPUs + /* data & parity space */ + ((complete_FT_count / raidPtr->numRow) * + info->FullTableDepthInPUs + /* data & parity space */ info->ExtraTablesPerDisk * info->TableDepthInPUs + - totSparePUsPerDisk /* spare space */ + totSparePUsPerDisk /* spare space */ ) * layoutPtr->SUsPerPU; layoutPtr->dataStripeUnitsPerDisk = - (complete_FT_count * info->FullTableDepthInPUs + info->ExtraTablesPerDisk * info->TableDepthInPUs) - * layoutPtr->SUsPerPU * (k - 1) / k; + (complete_FT_count * info->FullTableDepthInPUs + + info->ExtraTablesPerDisk * info->TableDepthInPUs) * + layoutPtr->SUsPerPU * (k - 1) / k; } else { - /* non-dist spare case: force each disk to contain an - * integral number of tables */ - layoutPtr->stripeUnitsPerDisk /= (info->TableDepthInPUs * layoutPtr->SUsPerPU); - layoutPtr->stripeUnitsPerDisk *= (info->TableDepthInPUs * layoutPtr->SUsPerPU); + /* + * Non-dist spare case: force each disk to contain an + * integral number of tables. + */ + layoutPtr->stripeUnitsPerDisk /= + (info->TableDepthInPUs * layoutPtr->SUsPerPU); + layoutPtr->stripeUnitsPerDisk *= + (info->TableDepthInPUs * layoutPtr->SUsPerPU); - /* compute the number of tables in the last fulltable, which - * need not be complete */ + /* + * Compute the number of tables in the last fulltable, which + * need not be complete. + */ complete_FT_count = - ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) / info->FullTableDepthInPUs) * raidPtr->numRow; + ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) / + info->FullTableDepthInPUs) * raidPtr->numRow; - info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable; + info->FullTableLimitSUID = + complete_FT_count * info->SUsPerFullTable; info->ExtraTablesPerDisk = - ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) / info->TableDepthInPUs) % k; + ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) / + info->TableDepthInPUs) % k; } - raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; + raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * + layoutPtr->sectorsPerStripeUnit; - /* find the disk offset of the stripe unit where the last fulltable - * starts */ + /* + * Find the disk offset of the stripe unit where the last fulltable + * starts. + */ numCompleteFullTablesPerDisk = complete_FT_count / raidPtr->numRow; - diskOffsetOfLastFullTableInSUs = numCompleteFullTablesPerDisk * info->FullTableDepthInPUs * layoutPtr->SUsPerPU; + diskOffsetOfLastFullTableInSUs = numCompleteFullTablesPerDisk * + info->FullTableDepthInPUs * layoutPtr->SUsPerPU; if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { - SpareSpaceInSUs = numCompleteSpareRegionsPerDisk * info->SpareSpaceDepthPerRegionInSUs; + SpareSpaceInSUs = numCompleteSpareRegionsPerDisk * + info->SpareSpaceDepthPerRegionInSUs; diskOffsetOfLastFullTableInSUs += SpareSpaceInSUs; info->DiskOffsetOfLastSpareSpaceChunkInSUs = - diskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU; + diskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * + info->TableDepthInPUs * layoutPtr->SUsPerPU; } info->DiskOffsetOfLastFullTableInSUs = diskOffsetOfLastFullTableInSUs; info->numCompleteFullTablesPerDisk = numCompleteFullTablesPerDisk; - /* 4. create and initialize the lookup tables */ + /* 4. Create and initialize the lookup tables. */ info->LayoutTable = rf_make_2d_array(b, k, raidPtr->cleanupList); if (info->LayoutTable == NULL) return (ENOMEM); info->OffsetTable = rf_make_2d_array(b, k, raidPtr->cleanupList); if (info->OffsetTable == NULL) return (ENOMEM); - info->BlockTable = rf_make_2d_array(info->TableDepthInPUs * layoutPtr->SUsPerPU, raidPtr->numCol, raidPtr->cleanupList); + info->BlockTable = rf_make_2d_array(info->TableDepthInPUs * + layoutPtr->SUsPerPU, raidPtr->numCol, raidPtr->cleanupList); if (info->BlockTable == NULL) return (ENOMEM); @@ -252,18 +301,20 @@ rf_ConfigureDeclustered( for (j = 0; j < k; j++) info->LayoutTable[i][j] = *cfgBuf++; - /* initialize offset table */ + /* Initialize the offset table. */ for (i = 0; i < b; i++) for (j = 0; j < k; j++) { - info->OffsetTable[i][j] = first_avail_slot[info->LayoutTable[i][j]]; + info->OffsetTable[i][j] = + first_avail_slot[info->LayoutTable[i][j]]; first_avail_slot[info->LayoutTable[i][j]]++; } - /* initialize block table */ + /* Initialize the block table. */ for (SUID = l = 0; l < layoutPtr->SUsPerPU; l++) { for (i = 0; i < b; i++) { for (j = 0; j < k; j++) { - info->BlockTable[(info->OffsetTable[i][j] * layoutPtr->SUsPerPU) + l] + info->BlockTable[(info->OffsetTable[i][j] * + layoutPtr->SUsPerPU) + l] [info->LayoutTable[i][j]] = SUID; } SUID++; @@ -272,85 +323,91 @@ rf_ConfigureDeclustered( rf_free_1d_array(first_avail_slot, v); - /* 5. set up the remaining redundant-but-useful parameters */ + /* 5. Set up the remaining redundant-but-useful parameters. */ - raidPtr->totalSectors = (k * complete_FT_count + raidPtr->numRow * info->ExtraTablesPerDisk) * - info->SUsPerTable * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numStripe = (raidPtr->totalSectors / layoutPtr->sectorsPerStripeUnit) / (k - 1); + raidPtr->totalSectors = (k * complete_FT_count + raidPtr->numRow * + info->ExtraTablesPerDisk) * info->SUsPerTable * + layoutPtr->sectorsPerStripeUnit; + layoutPtr->numStripe = (raidPtr->totalSectors / + layoutPtr->sectorsPerStripeUnit) / (k - 1); - /* strange evaluation order below to try and minimize overflow - * problems */ - - layoutPtr->dataSectorsPerStripe = (k - 1) * layoutPtr->sectorsPerStripeUnit; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; + /* + * Strange evaluation order below to try and minimize overflow + * problems. + */ + + layoutPtr->dataSectorsPerStripe = + (k - 1) * layoutPtr->sectorsPerStripeUnit; + layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << + raidPtr->logBytesPerSector; layoutPtr->numDataCol = k - 1; layoutPtr->numParityCol = 1; return (0); } -/* declustering with distributed sparing */ -static void rf_ShutdownDeclusteredDS(RF_ThreadArg_t); -static void -rf_ShutdownDeclusteredDS(arg) - RF_ThreadArg_t arg; + +/* Declustering with distributed sparing. */ +void rf_ShutdownDeclusteredDS(RF_ThreadArg_t); +void +rf_ShutdownDeclusteredDS(RF_ThreadArg_t arg) { RF_DeclusteredConfigInfo_t *info; RF_Raid_t *raidPtr; raidPtr = (RF_Raid_t *) arg; - info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + info = + (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; if (info->SpareTable) rf_FreeSpareTable(raidPtr); } -int -rf_ConfigureDeclusteredDS( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) +int +rf_ConfigureDeclusteredDS(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, + RF_Config_t *cfgPtr) { - int rc; + int rc; rc = rf_ConfigureDeclustered(listp, raidPtr, cfgPtr); if (rc) return (rc); + rc = rf_ShutdownCreate(listp, rf_ShutdownDeclusteredDS, raidPtr); if (rc) { - RF_ERRORMSG1("Got %d adding shutdown event for DeclusteredDS\n", rc); + RF_ERRORMSG1("Got %d adding shutdown event for" + " DeclusteredDS.\n", rc); rf_ShutdownDeclusteredDS(raidPtr); return (rc); } + return (0); } -void -rf_MapSectorDeclustered(raidPtr, raidSector, row, col, diskSector, remap) - RF_Raid_t *raidPtr; - RF_RaidAddr_t raidSector; - RF_RowCol_t *row; - RF_RowCol_t *col; - RF_SectorNum_t *diskSector; - int remap; +void +rf_MapSectorDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap) { RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; + RF_DeclusteredConfigInfo_t *info = + (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit; RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset; RF_StripeNum_t BlockID, BlockOffset, RepIndex; RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; + RF_StripeCount_t fulltable_depth = + info->FullTableDepthInPUs * layoutPtr->SUsPerPU; RF_StripeNum_t base_suid = 0, outSU, SpareRegion = 0, SpareSpace = 0; - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); + rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, + &fulltable_depth, &base_suid); - FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array - * (across rows) */ + /* Fulltable ID within array (across rows). */ + FullTableID = SUID / sus_per_fulltable; if (raidPtr->numRow == 1) - *row = 0; /* avoid a mod and a div in the common case */ + *row = 0; /* Avoid a mod and a div in the common case. */ else { *row = FullTableID % raidPtr->numRow; - FullTableID /= raidPtr->numRow; /* convert to fulltable ID on - * this disk */ + /* Convert to fulltable ID on this disk. */ + FullTableID /= raidPtr->numRow; } if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { SpareRegion = FullTableID / info->FullTablesPerSpareRegion; @@ -367,294 +424,331 @@ rf_MapSectorDeclustered(raidPtr, raidSector, row, col, diskSector, remap) BlockOffset += ((BlockOffset >= RepIndex) ? 1 : 0); *col = info->LayoutTable[BlockID][BlockOffset]; - /* remap to distributed spare space if indicated */ + /* Remap to distributed spare space if indicated. */ if (remap) { - RF_ASSERT(raidPtr->Disks[*row][*col].status == rf_ds_reconstructing || raidPtr->Disks[*row][*col].status == rf_ds_dist_spared || - (rf_copyback_in_progress && raidPtr->Disks[*row][*col].status == rf_ds_optimal)); - rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU); + RF_ASSERT(raidPtr->Disks[*row][*col].status == + rf_ds_reconstructing || + raidPtr->Disks[*row][*col].status == rf_ds_dist_spared || + (rf_copyback_in_progress && + raidPtr->Disks[*row][*col].status == rf_ds_optimal)); + rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, + TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, + &outSU); } else { outSU = base_suid; - outSU += FullTableID * fulltable_depth; /* offs to strt of FT */ - outSU += SpareSpace; /* skip rsvd spare space */ - outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; /* offs to strt of tble */ - outSU += info->OffsetTable[BlockID][BlockOffset] * layoutPtr->SUsPerPU; /* offs to the PU */ + outSU += FullTableID * fulltable_depth; + /* Offset to start of FT. */ + outSU += SpareSpace; + /* Skip rsvd spare space. */ + outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; + /* Offset to start of table. */ + outSU += info->OffsetTable[BlockID][BlockOffset] * + layoutPtr->SUsPerPU; + /* Offset to the PU. */ } - outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); /* offs to the SU within - * a PU */ + outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); + /* offs to the SU within a PU */ - /* convert SUs to sectors, and, if not aligned to SU boundary, add in - * offset to sector. */ - *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit); + /* + * Convert SUs to sectors, and, if not aligned to SU boundary, add in + * offset to sector. + */ + *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + + (raidSector % layoutPtr->sectorsPerStripeUnit); RF_ASSERT(*col != -1); } - -/* prototyping this inexplicably causes the compile of the layout table (rf_layout.c) to fail */ -void -rf_MapParityDeclustered( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) +/* + * Prototyping this inexplicably causes the compile of the layout table + * (rf_layout.c) to fail. + */ +void +rf_MapParityDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap) { RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; + RF_DeclusteredConfigInfo_t *info = + (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit; RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset; RF_StripeNum_t BlockID, BlockOffset, RepIndex; RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; + RF_StripeCount_t fulltable_depth = + info->FullTableDepthInPUs * layoutPtr->SUsPerPU; RF_StripeNum_t base_suid = 0, outSU, SpareRegion = 0, SpareSpace = 0; - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); + rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, + &fulltable_depth, &base_suid); - /* compute row & (possibly) spare space exactly as before */ + /* Compute row & (possibly) spare space exactly as before. */ FullTableID = SUID / sus_per_fulltable; if (raidPtr->numRow == 1) - *row = 0; /* avoid a mod and a div in the common case */ + *row = 0; /* Avoid a mod and a div in the common case. */ else { *row = FullTableID % raidPtr->numRow; - FullTableID /= raidPtr->numRow; /* convert to fulltable ID on - * this disk */ + /* Convert to fulltable ID on this disk. */ + FullTableID /= raidPtr->numRow; } if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { SpareRegion = FullTableID / info->FullTablesPerSpareRegion; SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs; } - /* compute BlockID and RepIndex exactly as before */ + /* Compute BlockID and RepIndex exactly as before. */ FullTableOffset = SUID % sus_per_fulltable; TableID = FullTableOffset / info->SUsPerTable; TableOffset = FullTableOffset - TableID * info->SUsPerTable; - /* TableOffset = FullTableOffset % info->SUsPerTable; */ - /* BlockID = (TableOffset / info->PUsPerBlock) % - * info->BlocksPerTable; */ + /*TableOffset = FullTableOffset % info->SUsPerTable;*/ + /*BlockID = (TableOffset / info->PUsPerBlock) % + *info->BlocksPerTable;*/ BlockID = TableOffset / info->PUsPerBlock; - /* BlockOffset = TableOffset % info->PUsPerBlock; */ + /*BlockOffset = TableOffset % info->PUsPerBlock;*/ BlockOffset = TableOffset - BlockID * info->PUsPerBlock; BlockID %= info->BlocksPerTable; - /* the parity block is in the position indicated by RepIndex */ - RepIndex = (raidPtr->noRotate) ? info->PUsPerBlock : info->PUsPerBlock - TableID; + /* The parity block is in the position indicated by RepIndex. */ + RepIndex = (raidPtr->noRotate) ? + info->PUsPerBlock : info->PUsPerBlock - TableID; *col = info->LayoutTable[BlockID][RepIndex]; if (remap) { - RF_ASSERT(raidPtr->Disks[*row][*col].status == rf_ds_reconstructing || raidPtr->Disks[*row][*col].status == rf_ds_dist_spared || - (rf_copyback_in_progress && raidPtr->Disks[*row][*col].status == rf_ds_optimal)); - rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU); + RF_ASSERT(raidPtr->Disks[*row][*col].status == + rf_ds_reconstructing || + raidPtr->Disks[*row][*col].status == rf_ds_dist_spared || + (rf_copyback_in_progress && + raidPtr->Disks[*row][*col].status == rf_ds_optimal)); + rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, + TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, + &outSU); } else { - /* compute sector as before, except use RepIndex instead of - * BlockOffset */ + /* + * Compute sector as before, except use RepIndex instead of + * BlockOffset. + */ outSU = base_suid; outSU += FullTableID * fulltable_depth; outSU += SpareSpace; /* skip rsvd spare space */ outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; - outSU += info->OffsetTable[BlockID][RepIndex] * layoutPtr->SUsPerPU; + outSU += info->OffsetTable[BlockID][RepIndex] * + layoutPtr->SUsPerPU; } outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); - *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit); + *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + + (raidSector % layoutPtr->sectorsPerStripeUnit); RF_ASSERT(*col != -1); } -/* returns an array of ints identifying the disks that comprise the stripe containing the indicated address. - * the caller must _never_ attempt to modify this array. + +/* + * Return an array of ints identifying the disks that comprise the stripe + * containing the indicated address. + * The caller must _never_ attempt to modify this array. */ -void -rf_IdentifyStripeDeclustered( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) +void +rf_IdentifyStripeDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t addr, + RF_RowCol_t **diskids, RF_RowCol_t *outRow) { RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; + RF_DeclusteredConfigInfo_t *info = + (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; + RF_StripeCount_t fulltable_depth = + info->FullTableDepthInPUs * layoutPtr->SUsPerPU; RF_StripeNum_t base_suid = 0; RF_StripeNum_t SUID = rf_RaidAddressToStripeUnitID(layoutPtr, addr); RF_StripeNum_t stripeID, FullTableID; - int tableOffset; + int tableOffset; - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); - FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array - * (across rows) */ + rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, + &fulltable_depth, &base_suid); + /* Fulltable ID within array (across rows). */ + FullTableID = SUID / sus_per_fulltable; *outRow = FullTableID % raidPtr->numRow; - stripeID = rf_StripeUnitIDToStripeID(layoutPtr, SUID); /* find stripe offset - * into array */ - tableOffset = (stripeID % info->BlocksPerTable); /* find offset into - * block design table */ + /* Find stripe offset into array. */ + stripeID = rf_StripeUnitIDToStripeID(layoutPtr, SUID); + /* Find offset into block design table. */ + tableOffset = (stripeID % info->BlocksPerTable); *diskids = info->LayoutTable[tableOffset]; } -/* This returns the default head-separation limit, which is measured - * in "required units for reconstruction". Each time a disk fetches - * a unit, it bumps a counter. The head-sep code prohibits any disk + +/* + * This returns the default head-separation limit, measured in + * "required units for reconstruction". Each time a disk fetches + * a unit, it bumps a counter. The head-sep code prohibits any disk * from getting more than headSepLimit counter values ahead of any * other. * * We assume here that the number of floating recon buffers is already - * set. There are r stripes to be reconstructed in each table, and so + * set. There are r stripes to be reconstructed in each table, and so * if we have a total of B buffers, we can have at most B/r tables - * under recon at any one time. In each table, lambda units are required + * under recon at any one time. In each table, lambda units are required * from each disk, so given B buffers, the head sep limit has to be - * (lambda*B)/r units. We subtract one to avoid weird boundary cases. + * (lambda*B)/r units. We subtract one to avoid weird boundary cases. * - * for example, suppose were given 50 buffers, r=19, and lambda=4 as in - * the 20.5 design. There are 19 stripes/table to be reconstructed, so + * For example, suppose we are given 50 buffers, r=19, and lambda=4 as in + * the 20.5 design. There are 19 stripes/table to be reconstructed, so * we can have 50/19 tables concurrently under reconstruction, which means * we can allow the fastest disk to get 50/19 tables ahead of the slower - * disk. There are lambda "required units" for each disk, so the fastest + * disk. There are lambda "required units" for each disk, so the fastest * disk can get 4*50/19 = 10 counter values ahead of the slowest. * * If numBufsToAccumulate is not 1, we need to limit the head sep further * because multiple bufs will be required for each stripe under recon. */ -RF_HeadSepLimit_t -rf_GetDefaultHeadSepLimitDeclustered( - RF_Raid_t * raidPtr) +RF_HeadSepLimit_t +rf_GetDefaultHeadSepLimitDeclustered(RF_Raid_t *raidPtr) { - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + RF_DeclusteredConfigInfo_t *info = + (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - return (info->Lambda * raidPtr->numFloatingReconBufs / info->TableDepthInPUs / rf_numBufsToAccumulate); + return (info->Lambda * raidPtr->numFloatingReconBufs / + info->TableDepthInPUs / rf_numBufsToAccumulate); } -/* returns the default number of recon buffers to use. The value - * is somewhat arbitrary...it's intended to be large enough to allow - * for a reasonably large head-sep limit, but small enough that you - * don't use up all your system memory with buffers. + +/* + * Return the default number of recon buffers to use. The value + * is somewhat arbitrary... It's intended to be large enough to + * allow for a reasonably large head-sep limit, but small enough + * that you don't use up all your system memory with buffers. */ -int -rf_GetDefaultNumFloatingReconBuffersDeclustered(RF_Raid_t * raidPtr) +int +rf_GetDefaultNumFloatingReconBuffersDeclustered(RF_Raid_t *raidPtr) { return (100 * rf_numBufsToAccumulate); } -/* sectors in the last fulltable of the array need to be handled - * specially since this fulltable can be incomplete. this function + +/* + * Sectors in the last fulltable of the array need to be handled + * specially since this fulltable can be incomplete. This function * changes the values of certain params to handle this. * - * the idea here is that MapSector et. al. figure out which disk the + * The idea here is that MapSector et. al. figure out which disk the * addressed unit lives on by computing the modulos of the unit number * with the number of units per fulltable, table, etc. In the last * fulltable, there are fewer units per fulltable, so we need to adjust * the number of user data units per fulltable to reflect this. * - * so, we (1) convert the fulltable size and depth parameters to + * So, we (1) convert the fulltable size and depth parameters to * the size of the partial fulltable at the end, (2) compute the * disk sector offset where this fulltable starts, and (3) convert * the users stripe unit number from an offset into the array to * an offset into the last fulltable. */ -void -rf_decluster_adjust_params( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t * SUID, - RF_StripeCount_t * sus_per_fulltable, - RF_StripeCount_t * fulltable_depth, - RF_StripeNum_t * base_suid) +void +rf_decluster_adjust_params(RF_RaidLayout_t *layoutPtr, RF_StripeNum_t *SUID, + RF_StripeCount_t *sus_per_fulltable, RF_StripeCount_t *fulltable_depth, + RF_StripeNum_t *base_suid) { - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; + RF_DeclusteredConfigInfo_t *info = + (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; if (*SUID >= info->FullTableLimitSUID) { - /* new full table size is size of last full table on disk */ - *sus_per_fulltable = info->ExtraTablesPerDisk * info->SUsPerTable; + /* New full table size is size of last full table on disk. */ + *sus_per_fulltable = + info->ExtraTablesPerDisk * info->SUsPerTable; - /* new full table depth is corresponding depth */ - *fulltable_depth = info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU; + /* New full table depth is corresponding depth. */ + *fulltable_depth = + info->ExtraTablesPerDisk * info->TableDepthInPUs * + layoutPtr->SUsPerPU; - /* set up the new base offset */ + /* Set up the new base offset. */ *base_suid = info->DiskOffsetOfLastFullTableInSUs; - /* convert users array address to an offset into the last - * fulltable */ + /* + * Convert user's array address to an offset into the last + * fulltable. + */ *SUID -= info->FullTableLimitSUID; } } + /* - * map a stripe ID to a parity stripe ID. + * Map a stripe ID to a parity stripe ID. * See comment above RaidAddressToParityStripeID in layout.c. */ -void -rf_MapSIDToPSIDDeclustered( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru) +void +rf_MapSIDToPSIDDeclustered(RF_RaidLayout_t *layoutPtr, RF_StripeNum_t stripeID, + RF_StripeNum_t *psID, RF_ReconUnitNum_t *which_ru) { RF_DeclusteredConfigInfo_t *info; info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; - *psID = (stripeID / (layoutPtr->SUsPerPU * info->BlocksPerTable)) - * info->BlocksPerTable + (stripeID % info->BlocksPerTable); - *which_ru = (stripeID % (info->BlocksPerTable * layoutPtr->SUsPerPU)) - / info->BlocksPerTable; + *psID = (stripeID / (layoutPtr->SUsPerPU * info->BlocksPerTable)) * + info->BlocksPerTable + (stripeID % info->BlocksPerTable); + *which_ru = (stripeID % (info->BlocksPerTable * layoutPtr->SUsPerPU)) / + info->BlocksPerTable; RF_ASSERT((*which_ru) < layoutPtr->SUsPerPU / layoutPtr->SUsPerRU); } + /* * Called from MapSector and MapParity to retarget an access at the spare unit. * Modifies the "col" and "outSU" parameters only. */ -void -rf_remap_to_spare_space( - RF_RaidLayout_t * layoutPtr, - RF_DeclusteredConfigInfo_t * info, - RF_RowCol_t row, - RF_StripeNum_t FullTableID, - RF_StripeNum_t TableID, - RF_SectorNum_t BlockID, - RF_StripeNum_t base_suid, - RF_StripeNum_t SpareRegion, - RF_RowCol_t * outCol, - RF_StripeNum_t * outSU) +void +rf_remap_to_spare_space(RF_RaidLayout_t *layoutPtr, + RF_DeclusteredConfigInfo_t *info, RF_RowCol_t row, + RF_StripeNum_t FullTableID, RF_StripeNum_t TableID, RF_SectorNum_t BlockID, + RF_StripeNum_t base_suid, RF_StripeNum_t SpareRegion, RF_RowCol_t *outCol, + RF_StripeNum_t *outSU) { - RF_StripeNum_t ftID, spareTableStartSU, TableInSpareRegion, lastSROffset, - which_ft; + RF_StripeNum_t ftID, spareTableStartSU, TableInSpareRegion, + lastSROffset, which_ft; /* - * note that FullTableID and hence SpareRegion may have gotten - * tweaked by rf_decluster_adjust_params. We detect this by - * noticing that base_suid is not 0. - */ + * Note that FullTableID and hence SpareRegion may have gotten + * tweaked by rf_decluster_adjust_params. We detect this by + * noticing that base_suid is not 0. + */ if (base_suid == 0) { ftID = FullTableID; } else { /* - * There may be > 1.0 full tables in the last (i.e. partial) - * spare region. find out which of these we're in. - */ - lastSROffset = info->NumCompleteSRs * info->SpareRegionDepthInSUs; - which_ft = (info->DiskOffsetOfLastFullTableInSUs - lastSROffset) / (info->FullTableDepthInPUs * layoutPtr->SUsPerPU); - - /* compute the actual full table ID */ - ftID = info->DiskOffsetOfLastFullTableInSUs / (info->FullTableDepthInPUs * layoutPtr->SUsPerPU) + which_ft; + * There may be > 1.0 full tables in the last (i.e. partial) + * spare region. Find out which of these we are in. + */ + lastSROffset = info->NumCompleteSRs * + info->SpareRegionDepthInSUs; + which_ft = + (info->DiskOffsetOfLastFullTableInSUs - lastSROffset) / + (info->FullTableDepthInPUs * layoutPtr->SUsPerPU); + + /* Compute the actual full table ID. */ + ftID = info->DiskOffsetOfLastFullTableInSUs / + (info->FullTableDepthInPUs * layoutPtr->SUsPerPU) + + which_ft; SpareRegion = info->NumCompleteSRs; } - TableInSpareRegion = (ftID * info->NumParityReps + TableID) % info->TablesPerSpareRegion; + TableInSpareRegion = (ftID * info->NumParityReps + TableID) % + info->TablesPerSpareRegion; *outCol = info->SpareTable[TableInSpareRegion][BlockID].spareDisk; RF_ASSERT(*outCol != -1); spareTableStartSU = (SpareRegion == info->NumCompleteSRs) ? - info->DiskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU : - (SpareRegion + 1) * info->SpareRegionDepthInSUs - info->SpareSpaceDepthPerRegionInSUs; - *outSU = spareTableStartSU + info->SpareTable[TableInSpareRegion][BlockID].spareBlockOffsetInSUs; + info->DiskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * + info->TableDepthInPUs * layoutPtr->SUsPerPU : + (SpareRegion + 1) * info->SpareRegionDepthInSUs - + info->SpareSpaceDepthPerRegionInSUs; + *outSU = spareTableStartSU + + info->SpareTable[TableInSpareRegion][BlockID].spareBlockOffsetInSUs; if (*outSU >= layoutPtr->stripeUnitsPerDisk) { - printf("rf_remap_to_spare_space: invalid remapped disk SU offset %ld\n", (long) *outSU); + printf("rf_remap_to_spare_space: invalid remapped disk SU" + " offset %ld.\n", (long) *outSU); } } -int -rf_InstallSpareTable( - RF_Raid_t * raidPtr, - RF_RowCol_t frow, - RF_RowCol_t fcol) +int +rf_InstallSpareTable(RF_Raid_t *raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol) { - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + RF_DeclusteredConfigInfo_t *info = + (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; RF_SparetWait_t *req; - int retcode; + int retcode; RF_Malloc(req, sizeof(*req), (RF_SparetWait_t *)); req->C = raidPtr->numCol; @@ -664,76 +758,89 @@ rf_InstallSpareTable( req->TablesPerSpareRegion = info->TablesPerSpareRegion; req->BlocksPerTable = info->BlocksPerTable; req->TableDepthInPUs = info->TableDepthInPUs; - req->SpareSpaceDepthPerRegionInSUs = info->SpareSpaceDepthPerRegionInSUs; + req->SpareSpaceDepthPerRegionInSUs = + info->SpareSpaceDepthPerRegionInSUs; retcode = rf_GetSpareTableFromDaemon(req); - RF_ASSERT(!retcode); /* XXX -- fix this to recover gracefully -- - * XXX */ + RF_ASSERT(!retcode); + /* XXX -- Fix this to recover gracefully. -- XXX */ + return (retcode); } + /* * Invoked via ioctl to install a spare table in the kernel. */ -int -rf_SetSpareTable(raidPtr, data) - RF_Raid_t *raidPtr; - void *data; +int +rf_SetSpareTable(RF_Raid_t *raidPtr, void *data) { - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + RF_DeclusteredConfigInfo_t *info = + (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; RF_SpareTableEntry_t **ptrs; - int i, retcode; + int i, retcode; - /* what we need to copyin is a 2-d array, so first copyin the user - * pointers to the rows in the table */ - RF_Malloc(ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **)); - retcode = copyin((caddr_t) data, (caddr_t) ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *)); + /* + * What we need to copyin is a 2-d array, so first copyin the user + * pointers to the rows in the table. + */ + RF_Malloc(ptrs, info->TablesPerSpareRegion * + sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **)); + retcode = copyin((caddr_t) data, (caddr_t) ptrs, + info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *)); if (retcode) return (retcode); - /* now allocate kernel space for the row pointers */ - RF_Malloc(info->SpareTable, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **)); + /* Now allocate kernel space for the row pointers. */ + RF_Malloc(info->SpareTable, info->TablesPerSpareRegion * + sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **)); - /* now allocate kernel space for each row in the table, and copy it in - * from user space */ + /* + * Now allocate kernel space for each row in the table, and copy it in + * from user space. */ for (i = 0; i < info->TablesPerSpareRegion; i++) { - RF_Malloc(info->SpareTable[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t), (RF_SpareTableEntry_t *)); - retcode = copyin(ptrs[i], info->SpareTable[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t)); + RF_Malloc(info->SpareTable[i], info->BlocksPerTable * + sizeof(RF_SpareTableEntry_t), (RF_SpareTableEntry_t *)); + retcode = copyin(ptrs[i], info->SpareTable[i], + info->BlocksPerTable * sizeof(RF_SpareTableEntry_t)); if (retcode) { - info->SpareTable = NULL; /* blow off the memory - * we've allocated */ + /* Blow off the memory we have allocated. */ + info->SpareTable = NULL; return (retcode); } } - /* free up the temporary array we used */ - RF_Free(ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *)); + /* Free up the temporary array we used. */ + RF_Free(ptrs, info->TablesPerSpareRegion * + sizeof(RF_SpareTableEntry_t *)); return (0); } -RF_ReconUnitCount_t -rf_GetNumSpareRUsDeclustered(raidPtr) - RF_Raid_t *raidPtr; +RF_ReconUnitCount_t +rf_GetNumSpareRUsDeclustered(RF_Raid_t *raidPtr) { RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - return (((RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo)->TotSparePUsPerDisk); + return (((RF_DeclusteredConfigInfo_t *) + layoutPtr->layoutSpecificInfo)->TotSparePUsPerDisk); } -void -rf_FreeSpareTable(raidPtr) - RF_Raid_t *raidPtr; +void +rf_FreeSpareTable(RF_Raid_t *raidPtr) { - long i; + long i; RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; + RF_DeclusteredConfigInfo_t *info = + (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; RF_SpareTableEntry_t **table = info->SpareTable; for (i = 0; i < info->TablesPerSpareRegion; i++) { - RF_Free(table[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t)); + RF_Free(table[i], info->BlocksPerTable * + sizeof(RF_SpareTableEntry_t)); } - RF_Free(table, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *)); + RF_Free(table, info->TablesPerSpareRegion * + sizeof(RF_SpareTableEntry_t *)); info->SpareTable = (RF_SpareTableEntry_t **) NULL; } diff --git a/sys/dev/raidframe/rf_decluster.h b/sys/dev/raidframe/rf_decluster.h index e4e3ac40c7d..b1b3cc9a3c9 100644 --- a/sys/dev/raidframe/rf_decluster.h +++ b/sys/dev/raidframe/rf_decluster.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_decluster.h,v 1.2 1999/02/16 00:02:35 niklas Exp $ */ +/* $OpenBSD: rf_decluster.h,v 1.3 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_decluster.h,v 1.3 1999/02/05 00:06:09 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,17 +28,17 @@ * rights to redistribute these changes. */ -/*---------------------------------------------------------------------- +/***************************************************************************** * - * decluster.h -- header file for declustered layout code + * decluster.h -- Header file for declustered layout code. * * Adapted from raidSim version July 1994 * Created 10-21-92 (MCH) * - *--------------------------------------------------------------------*/ + *****************************************************************************/ -#ifndef _RF__RF_DECLUSTER_H_ -#define _RF__RF_DECLUSTER_H_ +#ifndef _RF__RF_DECLUSTER_H_ +#define _RF__RF_DECLUSTER_H_ #include "rf_types.h" @@ -47,95 +48,98 @@ * the associated inverse mapping. */ struct RF_SpareTableEntry_s { - u_int spareDisk; /* disk to which this block is spared */ - u_int spareBlockOffsetInSUs; /* offset into spare table for that - * disk */ + u_int spareDisk; /* Disk where this block is spared. */ + u_int spareBlockOffsetInSUs; /* + * Offset into spare table for that + * disk. + */ }; -#define RF_SPAREMAP_NAME_LEN 128 -/* this is the layout-specific info structure for the declustered layout. +#define RF_SPAREMAP_NAME_LEN 128 + +/* + * This is the layout-specific info structure for the declustered layout. */ struct RF_DeclusteredConfigInfo_s { - RF_StripeCount_t groupSize; /* no. of stripe units per parity - * stripe */ - RF_RowCol_t **LayoutTable; /* the block design table */ - RF_RowCol_t **OffsetTable; /* the sector offset table */ - RF_RowCol_t **BlockTable; /* the block membership table */ - RF_StripeCount_t SUsPerFullTable; /* stripe units per full table */ - RF_StripeCount_t SUsPerTable; /* stripe units per table */ - RF_StripeCount_t PUsPerBlock; /* parity units per block */ - RF_StripeCount_t SUsPerBlock; /* stripe units per block */ - RF_StripeCount_t BlocksPerTable; /* block design tuples per - * table */ - RF_StripeCount_t NumParityReps; /* tables per full table */ - RF_StripeCount_t TableDepthInPUs; /* PUs on one disk in 1 table */ - RF_StripeCount_t FullTableDepthInPUs; /* PUs on one disk in 1 - * fulltable */ - RF_StripeCount_t FullTableLimitSUID; /* SU where partial fulltables - * start */ - RF_StripeCount_t ExtraTablesPerDisk; /* # of tables in last - * fulltable */ - RF_SectorNum_t DiskOffsetOfLastFullTableInSUs; /* disk offs of partial - * ft, if any */ - RF_StripeCount_t numCompleteFullTablesPerDisk; /* ft identifier of - * partial ft, if any */ - u_int Lambda; /* the pair count in the block design */ + /* Number of stripe units per parity stripe. */ + RF_StripeCount_t groupSize; + /* The block design table. */ + RF_RowCol_t **LayoutTable; + RF_RowCol_t **OffsetTable; + /* The sector offset table. */ + RF_RowCol_t **BlockTable; + /* The block membership table. */ + RF_StripeCount_t SUsPerFullTable; + /* Stripe units per full table. */ + RF_StripeCount_t SUsPerTable; + /* Stripe units per table. */ + RF_StripeCount_t PUsPerBlock; + /* Parity units per block. */ + RF_StripeCount_t SUsPerBlock; + /* Stripe units per block. */ + RF_StripeCount_t BlocksPerTable; + /* Block design tuples per table. */ + RF_StripeCount_t NumParityReps; + /* Tables per full table. */ + RF_StripeCount_t TableDepthInPUs; + /* PUs on one disk in 1 table. */ + RF_StripeCount_t FullTableDepthInPUs; + /* PUs on one disk in 1 fulltable. */ + RF_StripeCount_t FullTableLimitSUID; + /* SU where partial fulltables start. */ + RF_StripeCount_t ExtraTablesPerDisk; + /* Number of tables in last fulltable. */ + RF_SectorNum_t DiskOffsetOfLastFullTableInSUs; + /* Disk offsets of partial fulltable, if any. */ + RF_StripeCount_t numCompleteFullTablesPerDisk; + /* Fulltable identifier of partial fulltable, if any. */ + u_int Lambda; + /* The pair count in the block design. */ - /* these are used only in the distributed-sparing case */ - RF_StripeCount_t FullTablesPerSpareRegion; /* # of ft's comprising - * 1 spare region */ - RF_StripeCount_t TablesPerSpareRegion; /* # of tables */ - RF_SectorCount_t SpareSpaceDepthPerRegionInSUs; /* spare - * space/disk/region */ - RF_SectorCount_t SpareRegionDepthInSUs; /* # of units/disk/region */ - RF_SectorNum_t DiskOffsetOfLastSpareSpaceChunkInSUs; /* locates sp space - * after partial ft */ - RF_StripeCount_t TotSparePUsPerDisk; /* total number of spare PUs - * per disk */ - RF_StripeCount_t NumCompleteSRs; - RF_SpareTableEntry_t **SpareTable; /* remap table for spare space */ - char sparemap_fname[RF_SPAREMAP_NAME_LEN]; /* where to find - * sparemap. not used in - * kernel */ + /* These are used only in the distributed-sparing case. */ + RF_StripeCount_t FullTablesPerSpareRegion; + /* Number of fulltables comprising 1 spare region. */ + RF_StripeCount_t TablesPerSpareRegion; + /* Number of tables. */ + RF_SectorCount_t SpareSpaceDepthPerRegionInSUs; + /* Spare space/disk/region. */ + RF_SectorCount_t SpareRegionDepthInSUs; + /* Number of units/disk/region. */ + RF_SectorNum_t DiskOffsetOfLastSpareSpaceChunkInSUs; + /* Locates spare space after partial fulltable. */ + RF_StripeCount_t TotSparePUsPerDisk; + /* Total number of spare PUs per disk. */ + RF_StripeCount_t NumCompleteSRs; + RF_SpareTableEntry_t **SpareTable; + /* Remap table for spare space. */ + char sparemap_fname[RF_SPAREMAP_NAME_LEN]; + /* Where to find sparemap. Not used in kernel. */ }; -int -rf_ConfigureDeclustered(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int -rf_ConfigureDeclusteredDS(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); +int rf_ConfigureDeclustered(RF_ShutdownList_t **, RF_Raid_t *, RF_Config_t *); +int rf_ConfigureDeclusteredDS(RF_ShutdownList_t **, RF_Raid_t *, + RF_Config_t *); -void -rf_MapSectorDeclustered(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapParityDeclustered(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_IdentifyStripeDeclustered(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -void -rf_MapSIDToPSIDDeclustered(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru); -int rf_InstallSpareTable(RF_Raid_t * raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol); -void rf_FreeSpareTable(RF_Raid_t * raidPtr); +void rf_MapSectorDeclustered(RF_Raid_t *, RF_RaidAddr_t, RF_RowCol_t *, + RF_RowCol_t *, RF_SectorNum_t *, int); +void rf_MapParityDeclustered(RF_Raid_t *, RF_RaidAddr_t, RF_RowCol_t *, + RF_RowCol_t *, RF_SectorNum_t *, int); +void rf_IdentifyStripeDeclustered(RF_Raid_t *, RF_RaidAddr_t, RF_RowCol_t **, + RF_RowCol_t *); +void rf_MapSIDToPSIDDeclustered(RF_RaidLayout_t *, RF_StripeNum_t, + RF_StripeNum_t *, RF_ReconUnitNum_t *); +int rf_InstallSpareTable(RF_Raid_t *, RF_RowCol_t, RF_RowCol_t); +void rf_FreeSpareTable(RF_Raid_t *); -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitDeclustered(RF_Raid_t * raidPtr); -int rf_GetDefaultNumFloatingReconBuffersDeclustered(RF_Raid_t * raidPtr); +RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitDeclustered(RF_Raid_t *); +int rf_GetDefaultNumFloatingReconBuffersDeclustered(RF_Raid_t *); -void -rf_decluster_adjust_params(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t * SUID, RF_StripeCount_t * sus_per_fulltable, - RF_StripeCount_t * fulltable_depth, RF_StripeNum_t * base_suid); -void -rf_remap_to_spare_space( - RF_RaidLayout_t * layoutPtr, - RF_DeclusteredConfigInfo_t * info, RF_RowCol_t row, RF_StripeNum_t FullTableID, - RF_StripeNum_t TableID, RF_SectorNum_t BlockID, RF_StripeNum_t base_suid, - RF_StripeNum_t SpareRegion, RF_RowCol_t * outCol, RF_StripeNum_t * outSU); -int rf_SetSpareTable(RF_Raid_t * raidPtr, void *data); -RF_ReconUnitCount_t rf_GetNumSpareRUsDeclustered(RF_Raid_t * raidPtr); +void rf_decluster_adjust_params(RF_RaidLayout_t *, RF_StripeNum_t *, + RF_StripeCount_t *, RF_StripeCount_t *, RF_StripeNum_t *); +void rf_remap_to_spare_space(RF_RaidLayout_t *, RF_DeclusteredConfigInfo_t *, + RF_RowCol_t, RF_StripeNum_t, RF_StripeNum_t, RF_SectorNum_t, + RF_StripeNum_t, RF_StripeNum_t, RF_RowCol_t *, RF_StripeNum_t *); +int rf_SetSpareTable(RF_Raid_t *, void *); +RF_ReconUnitCount_t rf_GetNumSpareRUsDeclustered(RF_Raid_t *); -#endif /* !_RF__RF_DECLUSTER_H_ */ +#endif /* ! _RF__RF_DECLUSTER_H_ */ diff --git a/sys/dev/raidframe/rf_declusterPQ.c b/sys/dev/raidframe/rf_declusterPQ.c index 1fa4692a96b..605071fbe97 100644 --- a/sys/dev/raidframe/rf_declusterPQ.c +++ b/sys/dev/raidframe/rf_declusterPQ.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_declusterPQ.c,v 1.3 2002/08/09 15:10:20 tdeval Exp $ */ +/* $OpenBSD: rf_declusterPQ.c,v 1.4 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_declusterPQ.c,v 1.3 1999/02/05 00:06:09 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,13 +28,13 @@ * rights to redistribute these changes. */ -/*-------------------------------------------------- +/***************************************************************************** * rf_declusterPQ.c * - * mapping code for declustered P & Q or declustered EvenOdd - * much code borrowed from rf_decluster.c + * Mapping code for declustered P & Q or declustered EvenOdd. + * Much code borrowed from rf_decluster.c * - *--------------------------------------------------*/ + *****************************************************************************/ #include "rf_types.h" @@ -46,26 +47,24 @@ #include "rf_alloclist.h" #include "rf_general.h" -/* configuration code */ +/* Configuration code. */ -int -rf_ConfigureDeclusteredPQ( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) +int +rf_ConfigureDeclusteredPQ(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, + RF_Config_t *cfgPtr) { RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - int b, v, k, r, lambda; /* block design params */ - int i, j, l; - int *first_avail_slot; - int complete_FT_count, SUID; + int b, v, k, r, lambda; /* block design params */ + int i, j, l; + int *first_avail_slot; + int complete_FT_count, SUID; RF_DeclusteredConfigInfo_t *info; - int numCompleteFullTablesPerDisk; - int PUsPerDisk, spareRegionDepthInPUs, numCompleteSpareRegionsPerDisk = 0, - extraPUsPerDisk; - int totSparePUsPerDisk; - int diskOffsetOfLastFullTableInSUs, SpareSpaceInSUs; - char *cfgBuf = (char *) (cfgPtr->layoutSpecific); + int numCompleteFullTablesPerDisk; + int PUsPerDisk, spareRegionDepthInPUs, extraPUsPerDisk; + int numCompleteSpareRegionsPerDisk = 0; + int totSparePUsPerDisk; + int diskOffsetOfLastFullTableInSUs, SpareSpaceInSUs; + char *cfgBuf = (char *)(cfgPtr->layoutSpecific); cfgBuf += RF_SPAREMAP_NAME_LEN; @@ -88,137 +87,186 @@ rf_ConfigureDeclusteredPQ( #endif /* RAIDDEBUG */ return (EINVAL); } - /* 1. create layout specific structure */ - RF_MallocAndAdd(info, sizeof(RF_DeclusteredConfigInfo_t), (RF_DeclusteredConfigInfo_t *), raidPtr->cleanupList); + /* 1. Create layout specific structure. */ + RF_MallocAndAdd(info, sizeof(RF_DeclusteredConfigInfo_t), + (RF_DeclusteredConfigInfo_t *), raidPtr->cleanupList); if (info == NULL) return (ENOMEM); layoutPtr->layoutSpecificInfo = (void *) info; - /* the sparemaps are generated assuming that parity is rotated, so we - * issue a warning if both distributed sparing and no-rotate are on at - * the same time */ - if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) && raidPtr->noRotate) { - RF_ERRORMSG("Warning: distributed sparing specified without parity rotation.\n"); + /* + * 2. The sparemaps are generated assuming that parity is rotated, so + * we issue a warning if both distributed sparing and no-rotate are on + * at the same time. + */ + if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) && + raidPtr->noRotate) { + RF_ERRORMSG("Warning: distributed sparing specified without" + " parity rotation.\n"); } if (raidPtr->numCol != v) { - RF_ERRORMSG2("RAID: config error: table element count (%d) not equal to no. of cols (%d)\n", v, raidPtr->numCol); + RF_ERRORMSG2("RAID: config error: table element count (%d)" + " not equal to no. of cols (%d).\n", v, raidPtr->numCol); return (EINVAL); } - /* 3. set up the values used in devRaidMap */ + /* 3. Set up the values used in devRaidMap. */ info->BlocksPerTable = b; info->NumParityReps = info->groupSize = k; info->PUsPerBlock = k - 2; /* PQ */ - info->SUsPerTable = b * info->PUsPerBlock * layoutPtr->SUsPerPU; /* b blks, k-1 SUs each */ - info->SUsPerFullTable = k * info->SUsPerTable; /* rot k times */ + info->SUsPerTable = b * info->PUsPerBlock * layoutPtr->SUsPerPU; + /* b blks, k-1 SUs each. */ + info->SUsPerFullTable = k * info->SUsPerTable; /* Rot k times. */ info->SUsPerBlock = info->PUsPerBlock * layoutPtr->SUsPerPU; info->TableDepthInPUs = (b * k) / v; - info->FullTableDepthInPUs = info->TableDepthInPUs * k; /* k repetitions */ + info->FullTableDepthInPUs = info->TableDepthInPUs * k; + /* k repetitions. */ - /* used only in distributed sparing case */ - info->FullTablesPerSpareRegion = (v - 1) / rf_gcd(r, v - 1); /* (v-1)/gcd fulltables */ + /* Used only in distributed sparing case. */ + info->FullTablesPerSpareRegion = (v - 1) / rf_gcd(r, v - 1); + /* (v-1)/gcd fulltables. */ info->TablesPerSpareRegion = k * info->FullTablesPerSpareRegion; - info->SpareSpaceDepthPerRegionInSUs = (r * info->TablesPerSpareRegion / (v - 1)) * layoutPtr->SUsPerPU; + info->SpareSpaceDepthPerRegionInSUs = (r * info->TablesPerSpareRegion + / (v - 1)) * layoutPtr->SUsPerPU; - /* check to make sure the block design is sufficiently small */ + /* Check to make sure the block design is sufficiently small. */ if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - if (info->FullTableDepthInPUs * layoutPtr->SUsPerPU + info->SpareSpaceDepthPerRegionInSUs > layoutPtr->stripeUnitsPerDisk) { - RF_ERRORMSG3("RAID: config error: Full Table depth (%d) + Spare Space (%d) larger than disk size (%d) (BD too big)\n", - (int) info->FullTableDepthInPUs, - (int) info->SpareSpaceDepthPerRegionInSUs, - (int) layoutPtr->stripeUnitsPerDisk); + if (info->FullTableDepthInPUs * layoutPtr->SUsPerPU + + info->SpareSpaceDepthPerRegionInSUs > + layoutPtr->stripeUnitsPerDisk) { + RF_ERRORMSG3("RAID: config error: Full Table depth" + " (%d) + Spare Space (%d) larger than disk size" + " (%d) (BD too big).\n", + (int)info->FullTableDepthInPUs, + (int)info->SpareSpaceDepthPerRegionInSUs, + (int)layoutPtr->stripeUnitsPerDisk); return (EINVAL); } } else { - if (info->TableDepthInPUs * layoutPtr->SUsPerPU > layoutPtr->stripeUnitsPerDisk) { - RF_ERRORMSG2("RAID: config error: Table depth (%d) larger than disk size (%d) (BD too big)\n", + if (info->TableDepthInPUs * layoutPtr->SUsPerPU > + layoutPtr->stripeUnitsPerDisk) { + RF_ERRORMSG2("RAID: config error: Table depth (%d)" + " larger than disk size (%d) (BD too big).\n", (int) (info->TableDepthInPUs * layoutPtr->SUsPerPU), (int) layoutPtr->stripeUnitsPerDisk); return (EINVAL); } } - - /* compute the size of each disk, and the number of tables in the last - * fulltable (which need not be complete) */ + /* + * Compute the size of each disk, and the number of tables in the last + * fulltable (which need not be complete). + */ if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - PUsPerDisk = layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU; - spareRegionDepthInPUs = (info->TablesPerSpareRegion * info->TableDepthInPUs + - (info->TablesPerSpareRegion * info->TableDepthInPUs) / (v - 1)); - info->SpareRegionDepthInSUs = spareRegionDepthInPUs * layoutPtr->SUsPerPU; + PUsPerDisk = layoutPtr->stripeUnitsPerDisk / + layoutPtr->SUsPerPU; + spareRegionDepthInPUs = (info->TablesPerSpareRegion * + info->TableDepthInPUs + (info->TablesPerSpareRegion * + info->TableDepthInPUs) / (v - 1)); + info->SpareRegionDepthInSUs = spareRegionDepthInPUs * + layoutPtr->SUsPerPU; - numCompleteSpareRegionsPerDisk = PUsPerDisk / spareRegionDepthInPUs; + numCompleteSpareRegionsPerDisk = PUsPerDisk / + spareRegionDepthInPUs; info->NumCompleteSRs = numCompleteSpareRegionsPerDisk; extraPUsPerDisk = PUsPerDisk % spareRegionDepthInPUs; - /* assume conservatively that we need the full amount of spare + /* + * Assume conservatively that we need the full amount of spare * space in one region in order to provide spares for the * partial spare region at the end of the array. We set "i" * to the number of tables in the partial spare region. This - * may actually include some fulltables. */ - extraPUsPerDisk -= (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU); + * may actually include some fulltables. + */ + extraPUsPerDisk -= (info->SpareSpaceDepthPerRegionInSUs / + layoutPtr->SUsPerPU); if (extraPUsPerDisk <= 0) i = 0; else i = extraPUsPerDisk / info->TableDepthInPUs; - complete_FT_count = raidPtr->numRow * (numCompleteSpareRegionsPerDisk * (info->TablesPerSpareRegion / k) + i / k); - info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable; + complete_FT_count = raidPtr->numRow * + (numCompleteSpareRegionsPerDisk * + (info->TablesPerSpareRegion / k) + i / k); + info->FullTableLimitSUID = complete_FT_count * + info->SUsPerFullTable; info->ExtraTablesPerDisk = i % k; - /* note that in the last spare region, the spare space is - * complete even though data/parity space is not */ - totSparePUsPerDisk = (numCompleteSpareRegionsPerDisk + 1) * (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU); + /* + * Note that in the last spare region, the spare space is + * complete even though data/parity space is not. + */ + totSparePUsPerDisk = (numCompleteSpareRegionsPerDisk + 1) * + (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU); info->TotSparePUsPerDisk = totSparePUsPerDisk; layoutPtr->stripeUnitsPerDisk = - ((complete_FT_count / raidPtr->numRow) * info->FullTableDepthInPUs + /* data & parity space */ + ((complete_FT_count / raidPtr->numRow) * + info->FullTableDepthInPUs + /* data & parity space */ info->ExtraTablesPerDisk * info->TableDepthInPUs + totSparePUsPerDisk /* spare space */ ) * layoutPtr->SUsPerPU; layoutPtr->dataStripeUnitsPerDisk = - (complete_FT_count * info->FullTableDepthInPUs + info->ExtraTablesPerDisk * info->TableDepthInPUs) + (complete_FT_count * info->FullTableDepthInPUs + + info->ExtraTablesPerDisk * info->TableDepthInPUs) * layoutPtr->SUsPerPU * (k - 1) / k; } else { - /* non-dist spare case: force each disk to contain an - * integral number of tables */ - layoutPtr->stripeUnitsPerDisk /= (info->TableDepthInPUs * layoutPtr->SUsPerPU); - layoutPtr->stripeUnitsPerDisk *= (info->TableDepthInPUs * layoutPtr->SUsPerPU); - - /* compute the number of tables in the last fulltable, which - * need not be complete */ - complete_FT_count = - ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) / info->FullTableDepthInPUs) * raidPtr->numRow; - - info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable; - info->ExtraTablesPerDisk = - ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) / info->TableDepthInPUs) % k; + /* + * Non-dist spare case: force each disk to contain an + * integral number of tables. + */ + layoutPtr->stripeUnitsPerDisk /= + (info->TableDepthInPUs * layoutPtr->SUsPerPU); + layoutPtr->stripeUnitsPerDisk *= + (info->TableDepthInPUs * layoutPtr->SUsPerPU); + + /* + * Compute the number of tables in the last fulltable, which + * need not be complete. + */ + complete_FT_count = ((layoutPtr->stripeUnitsPerDisk / + layoutPtr->SUsPerPU) / info->FullTableDepthInPUs) * + raidPtr->numRow; + + info->FullTableLimitSUID = complete_FT_count * + info->SUsPerFullTable; + info->ExtraTablesPerDisk = ((layoutPtr->stripeUnitsPerDisk / + layoutPtr->SUsPerPU) / info->TableDepthInPUs) % k; } - raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; + raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * + layoutPtr->sectorsPerStripeUnit; - /* find the disk offset of the stripe unit where the last fulltable - * starts */ + /* + * Find the disk offset of the stripe unit where the last + * fulltable starts. + */ numCompleteFullTablesPerDisk = complete_FT_count / raidPtr->numRow; - diskOffsetOfLastFullTableInSUs = numCompleteFullTablesPerDisk * info->FullTableDepthInPUs * layoutPtr->SUsPerPU; + diskOffsetOfLastFullTableInSUs = numCompleteFullTablesPerDisk * + info->FullTableDepthInPUs * layoutPtr->SUsPerPU; if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - SpareSpaceInSUs = numCompleteSpareRegionsPerDisk * info->SpareSpaceDepthPerRegionInSUs; + SpareSpaceInSUs = numCompleteSpareRegionsPerDisk * + info->SpareSpaceDepthPerRegionInSUs; diskOffsetOfLastFullTableInSUs += SpareSpaceInSUs; info->DiskOffsetOfLastSpareSpaceChunkInSUs = - diskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU; + diskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * + info->TableDepthInPUs * layoutPtr->SUsPerPU; } info->DiskOffsetOfLastFullTableInSUs = diskOffsetOfLastFullTableInSUs; info->numCompleteFullTablesPerDisk = numCompleteFullTablesPerDisk; - /* 4. create and initialize the lookup tables */ + /* 4. Create and initialize the lookup tables. */ info->LayoutTable = rf_make_2d_array(b, k, raidPtr->cleanupList); if (info->LayoutTable == NULL) return (ENOMEM); + info->OffsetTable = rf_make_2d_array(b, k, raidPtr->cleanupList); if (info->OffsetTable == NULL) return (ENOMEM); - info->BlockTable = rf_make_2d_array(info->TableDepthInPUs * layoutPtr->SUsPerPU, raidPtr->numCol, raidPtr->cleanupList); + + info->BlockTable = rf_make_2d_array(info->TableDepthInPUs * + layoutPtr->SUsPerPU, raidPtr->numCol, raidPtr->cleanupList); if (info->BlockTable == NULL) return (ENOMEM); @@ -230,18 +278,20 @@ rf_ConfigureDeclusteredPQ( for (j = 0; j < k; j++) info->LayoutTable[i][j] = *cfgBuf++; - /* initialize offset table */ + /* Initialize offset table. */ for (i = 0; i < b; i++) for (j = 0; j < k; j++) { - info->OffsetTable[i][j] = first_avail_slot[info->LayoutTable[i][j]]; + info->OffsetTable[i][j] = + first_avail_slot[info->LayoutTable[i][j]]; first_avail_slot[info->LayoutTable[i][j]]++; } - /* initialize block table */ + /* Initialize block table. */ for (SUID = l = 0; l < layoutPtr->SUsPerPU; l++) { for (i = 0; i < b; i++) { for (j = 0; j < k; j++) { - info->BlockTable[(info->OffsetTable[i][j] * layoutPtr->SUsPerPU) + l] + info->BlockTable[(info->OffsetTable[i][j] * + layoutPtr->SUsPerPU) + l] [info->LayoutTable[i][j]] = SUID; } SUID++; @@ -250,57 +300,63 @@ rf_ConfigureDeclusteredPQ( rf_free_1d_array(first_avail_slot, v); - /* 5. set up the remaining redundant-but-useful parameters */ - - raidPtr->totalSectors = (k * complete_FT_count + raidPtr->numRow * info->ExtraTablesPerDisk) * - info->SUsPerTable * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numStripe = (raidPtr->totalSectors / layoutPtr->sectorsPerStripeUnit) / (k - 2); + /* 5. Set up the remaining redundant-but-useful parameters. */ - /* strange evaluation order below to try and minimize overflow - * problems */ + raidPtr->totalSectors = (k * complete_FT_count + raidPtr->numRow * + info->ExtraTablesPerDisk) * info->SUsPerTable * + layoutPtr->sectorsPerStripeUnit; + layoutPtr->numStripe = (raidPtr->totalSectors / + layoutPtr->sectorsPerStripeUnit) / (k - 2); - layoutPtr->dataSectorsPerStripe = (k - 2) * layoutPtr->sectorsPerStripeUnit; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; + /* + * Strange evaluation order below to try and minimize overflow + * problems. + */ + + layoutPtr->dataSectorsPerStripe = (k - 2) * + layoutPtr->sectorsPerStripeUnit; + layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << + raidPtr->logBytesPerSector; layoutPtr->numDataCol = k - 2; layoutPtr->numParityCol = 2; return (0); } -int -rf_GetDefaultNumFloatingReconBuffersPQ(RF_Raid_t * raidPtr) +int +rf_GetDefaultNumFloatingReconBuffersPQ(RF_Raid_t *raidPtr) { - int def_decl; + int def_decl; def_decl = rf_GetDefaultNumFloatingReconBuffersDeclustered(raidPtr); return (RF_MAX(3 * raidPtr->numCol, def_decl)); } -void -rf_MapSectorDeclusteredPQ( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) +void +rf_MapSectorDeclusteredPQ(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap) { RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; + RF_DeclusteredConfigInfo_t *info = + (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit; RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset; RF_StripeNum_t BlockID, BlockOffset, RepIndex; RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; + RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * + layoutPtr->SUsPerPU; RF_StripeNum_t base_suid = 0, outSU, SpareRegion = 0, SpareSpace = 0; - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); + rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, + &fulltable_depth, &base_suid); - FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array - * (across rows) */ + /* Fulltable ID within array (across rows). */ + FullTableID = SUID / sus_per_fulltable; *row = FullTableID % raidPtr->numRow; - FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this - * disk */ + + /* Convert to fulltable ID on this disk. */ + FullTableID /= raidPtr->numRow; + if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { SpareRegion = FullTableID / info->FullTablesPerSpareRegion; SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs; @@ -313,71 +369,82 @@ rf_MapSectorDeclusteredPQ( BlockID %= info->BlocksPerTable; RF_ASSERT(BlockOffset < info->groupSize - 2); /* - TableIDs go from 0 .. GroupSize-1 inclusive. - PUsPerBlock is k-2. - We want the tableIDs to rotate from the - right, so use GroupSize - */ + * TableIDs go from 0 .. GroupSize-1 inclusive. + * PUsPerBlock is k-2. + * We want the tableIDs to rotate from the + * right, so use GroupSize. + */ RepIndex = info->groupSize - 1 - TableID; RF_ASSERT(RepIndex >= 0); if (!raidPtr->noRotate) { if (TableID == 0) - BlockOffset++; /* P on last drive, Q on first */ + /* P on last drive, Q on first. */ + BlockOffset++; else - BlockOffset += ((BlockOffset >= RepIndex) ? 2 : 0); /* skip over PQ */ + /* Skip over PQ. */ + BlockOffset += ((BlockOffset >= RepIndex) ? 2 : 0); + RF_ASSERT(BlockOffset < info->groupSize); *col = info->LayoutTable[BlockID][BlockOffset]; } - /* remap to distributed spare space if indicated */ + /* Remap to distributed spare space if indicated. */ if (remap) { - rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU); + rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, + TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, + &outSU); } else { outSU = base_suid; - outSU += FullTableID * fulltable_depth; /* offs to strt of FT */ - outSU += SpareSpace; /* skip rsvd spare space */ - outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; /* offs to strt of tble */ - outSU += info->OffsetTable[BlockID][BlockOffset] * layoutPtr->SUsPerPU; /* offs to the PU */ + outSU += FullTableID * fulltable_depth; + /* Offset to strt of FT. */ + outSU += SpareSpace; + /* Skip reserved spare space. */ + outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; + /* Offset to start of table. */ + outSU += info->OffsetTable[BlockID][BlockOffset] * + layoutPtr->SUsPerPU; + /* Offset to the PU. */ } - outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); /* offs to the SU within - * a PU */ + outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); + /* Offset to the SU within a PU. */ - /* convert SUs to sectors, and, if not aligned to SU boundary, add in - * offset to sector */ - *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit); + /* + * Convert SUs to sectors, and, if not aligned to SU boundary, add in + * offset to sector. + */ + *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + + (raidSector % layoutPtr->sectorsPerStripeUnit); } -void -rf_MapParityDeclusteredPQ( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) +void +rf_MapParityDeclusteredPQ(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap) { RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; + RF_DeclusteredConfigInfo_t *info = + (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit; RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset; RF_StripeNum_t BlockID, BlockOffset, RepIndex; RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; + RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * + layoutPtr->SUsPerPU; RF_StripeNum_t base_suid = 0, outSU, SpareRegion, SpareSpace = 0; - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); + rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, + &fulltable_depth, &base_suid); - /* compute row & (possibly) spare space exactly as before */ + /* Compute row & (possibly) spare space exactly as before. */ FullTableID = SUID / sus_per_fulltable; *row = FullTableID % raidPtr->numRow; - FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this - * disk */ + /* Convert to fulltable ID on this disk. */ + FullTableID /= raidPtr->numRow; if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { SpareRegion = FullTableID / info->FullTablesPerSpareRegion; SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs; } - /* compute BlockID and RepIndex exactly as before */ + /* Compute BlockID and RepIndex exactly as before. */ FullTableOffset = SUID % sus_per_fulltable; TableID = FullTableOffset / info->SUsPerTable; TableOffset = FullTableOffset - TableID * info->SUsPerTable; @@ -385,15 +452,18 @@ rf_MapParityDeclusteredPQ( BlockOffset = TableOffset - BlockID * info->PUsPerBlock; BlockID %= info->BlocksPerTable; - /* the parity block is in the position indicated by RepIndex */ - RepIndex = (raidPtr->noRotate) ? info->PUsPerBlock : info->groupSize - 1 - TableID; + /* The parity block is in the position indicated by RepIndex. */ + RepIndex = (raidPtr->noRotate) ? + info->PUsPerBlock : info->groupSize - 1 - TableID; *col = info->LayoutTable[BlockID][RepIndex]; if (remap) RF_PANIC(); - /* compute sector as before, except use RepIndex instead of - * BlockOffset */ + /* + * Compute sector as before, except use RepIndex instead of + * BlockOffset. + */ outSU = base_suid; outSU += FullTableID * fulltable_depth; outSU += SpareSpace; /* skip rsvd spare space */ @@ -401,39 +471,38 @@ rf_MapParityDeclusteredPQ( outSU += info->OffsetTable[BlockID][RepIndex] * layoutPtr->SUsPerPU; outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); - *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit); + *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + + (raidSector % layoutPtr->sectorsPerStripeUnit); } -void -rf_MapQDeclusteredPQ( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) +void +rf_MapQDeclusteredPQ(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap) { RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; + RF_DeclusteredConfigInfo_t *info = + (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit; RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset; RF_StripeNum_t BlockID, BlockOffset, RepIndex, RepIndexQ; RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; + RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * + layoutPtr->SUsPerPU; RF_StripeNum_t base_suid = 0, outSU, SpareRegion, SpareSpace = 0; - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); + rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, + &fulltable_depth, &base_suid); - /* compute row & (possibly) spare space exactly as before */ + /* Compute row & (possibly) spare space exactly as before. */ FullTableID = SUID / sus_per_fulltable; *row = FullTableID % raidPtr->numRow; - FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this - * disk */ + /* Convert to fulltable ID on this disk. */ + FullTableID /= raidPtr->numRow; if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { SpareRegion = FullTableID / info->FullTablesPerSpareRegion; SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs; } - /* compute BlockID and RepIndex exactly as before */ + /* Compute BlockID and RepIndex exactly as before. */ FullTableOffset = SUID % sus_per_fulltable; TableID = FullTableOffset / info->SUsPerTable; TableOffset = FullTableOffset - TableID * info->SUsPerTable; @@ -441,16 +510,19 @@ rf_MapQDeclusteredPQ( BlockOffset = TableOffset - BlockID * info->PUsPerBlock; BlockID %= info->BlocksPerTable; - /* the q block is in the position indicated by RepIndex */ - RepIndex = (raidPtr->noRotate) ? info->PUsPerBlock : info->groupSize - 1 - TableID; + /* The q block is in the position indicated by RepIndex. */ + RepIndex = (raidPtr->noRotate) ? + info->PUsPerBlock : info->groupSize - 1 - TableID; RepIndexQ = ((RepIndex == (info->groupSize - 1)) ? 0 : RepIndex + 1); *col = info->LayoutTable[BlockID][RepIndexQ]; if (remap) RF_PANIC(); - /* compute sector as before, except use RepIndex instead of - * BlockOffset */ + /* + * Compute sector as before, except use RepIndex instead of + * BlockOffset. + */ outSU = base_suid; outSU += FullTableID * fulltable_depth; outSU += SpareSpace; /* skip rsvd spare space */ @@ -458,34 +530,38 @@ rf_MapQDeclusteredPQ( outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); outSU += info->OffsetTable[BlockID][RepIndexQ] * layoutPtr->SUsPerPU; - *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit); + *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + + (raidSector % layoutPtr->sectorsPerStripeUnit); } -/* returns an array of ints identifying the disks that comprise the stripe containing the indicated address. - * the caller must _never_ attempt to modify this array. + +/* + * Returns an array of ints identifying the disks that comprise the stripe + * containing the indicated address. + * The caller must _never_ attempt to modify this array. */ -void -rf_IdentifyStripeDeclusteredPQ( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) +void +rf_IdentifyStripeDeclusteredPQ(RF_Raid_t *raidPtr, RF_RaidAddr_t addr, + RF_RowCol_t **diskids, RF_RowCol_t *outRow) { RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; + RF_DeclusteredConfigInfo_t *info = + (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable; - RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU; + RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * + layoutPtr->SUsPerPU; RF_StripeNum_t base_suid = 0; RF_StripeNum_t SUID = rf_RaidAddressToStripeUnitID(layoutPtr, addr); RF_StripeNum_t stripeID, FullTableID; - int tableOffset; + int tableOffset; - rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid); - FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array - * (across rows) */ + rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, + &fulltable_depth, &base_suid); + /* Fulltable ID within array (across rows). */ + FullTableID = SUID / sus_per_fulltable; *outRow = FullTableID % raidPtr->numRow; - stripeID = rf_StripeUnitIDToStripeID(layoutPtr, SUID); /* find stripe offset - * into array */ - tableOffset = (stripeID % info->BlocksPerTable); /* find offset into - * block design table */ + /* Find stripe offset into array. */ + stripeID = rf_StripeUnitIDToStripeID(layoutPtr, SUID); + /* Find offset into block design table. */ + tableOffset = (stripeID % info->BlocksPerTable); *diskids = info->LayoutTable[tableOffset]; } diff --git a/sys/dev/raidframe/rf_declusterPQ.h b/sys/dev/raidframe/rf_declusterPQ.h index f83ec3d7d5d..9d9d6c421c3 100644 --- a/sys/dev/raidframe/rf_declusterPQ.h +++ b/sys/dev/raidframe/rf_declusterPQ.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_declusterPQ.h,v 1.2 1999/02/16 00:02:35 niklas Exp $ */ +/* $OpenBSD: rf_declusterPQ.h,v 1.3 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_declusterPQ.h,v 1.3 1999/02/05 00:06:09 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,26 +28,21 @@ * rights to redistribute these changes. */ -#ifndef _RF__RF_DECLUSTERPQ_H_ -#define _RF__RF_DECLUSTERPQ_H_ +#ifndef _RF__RF_DECLUSTERPQ_H_ +#define _RF__RF_DECLUSTERPQ_H_ #include "rf_types.h" -int -rf_ConfigureDeclusteredPQ(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int rf_GetDefaultNumFloatingReconBuffersPQ(RF_Raid_t * raidPtr); -void -rf_MapSectorDeclusteredPQ(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapParityDeclusteredPQ(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapQDeclusteredPQ(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_IdentifyStripeDeclusteredPQ(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); +int rf_ConfigureDeclusteredPQ(RF_ShutdownList_t **, RF_Raid_t *, + RF_Config_t *); +int rf_GetDefaultNumFloatingReconBuffersPQ(RF_Raid_t *); +void rf_MapSectorDeclusteredPQ(RF_Raid_t *, RF_RaidAddr_t, RF_RowCol_t *, + RF_RowCol_t *, RF_SectorNum_t *, int); +void rf_MapParityDeclusteredPQ(RF_Raid_t *, RF_RaidAddr_t, RF_RowCol_t *, + RF_RowCol_t *, RF_SectorNum_t *, int); +void rf_MapQDeclusteredPQ(RF_Raid_t *, RF_RaidAddr_t, RF_RowCol_t *, + RF_RowCol_t *, RF_SectorNum_t *, int); +void rf_IdentifyStripeDeclusteredPQ(RF_Raid_t *, RF_RaidAddr_t, RF_RowCol_t **, + RF_RowCol_t *); -#endif /* !_RF__RF_DECLUSTERPQ_H_ */ +#endif /* ! _RF__RF_DECLUSTERPQ_H_ */ diff --git a/sys/dev/raidframe/rf_desc.h b/sys/dev/raidframe/rf_desc.h index 84b4cb109a8..1b673a9d8d3 100644 --- a/sys/dev/raidframe/rf_desc.h +++ b/sys/dev/raidframe/rf_desc.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_desc.h,v 1.5 2000/01/11 18:02:21 peter Exp $ */ +/* $OpenBSD: rf_desc.h,v 1.6 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_desc.h,v 1.5 2000/01/09 00:00:18 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,8 +28,8 @@ * rights to redistribute these changes. */ -#ifndef _RF__RF_DESC_H_ -#define _RF__RF_DESC_H_ +#ifndef _RF__RF_DESC_H_ +#define _RF__RF_DESC_H_ #include "rf_archs.h" #include "rf_types.h" @@ -36,78 +37,115 @@ #include "rf_dag.h" struct RF_RaidReconDesc_s { - RF_Raid_t *raidPtr; /* raid device descriptor */ - RF_RowCol_t row; /* row of failed disk */ - RF_RowCol_t col; /* col of failed disk */ - int state; /* how far along the reconstruction operation - * has gotten */ - RF_RaidDisk_t *spareDiskPtr; /* describes target disk for recon - * (not used in dist sparing) */ - int numDisksDone; /* the number of surviving disks that have - * completed their work */ - RF_RowCol_t srow; /* row ID of the spare disk (not used in dist - * sparing) */ - RF_RowCol_t scol; /* col ID of the spare disk (not used in dist - * sparing) */ + RF_Raid_t *raidPtr; /* Raid device descriptor. */ + RF_RowCol_t row; /* Row of failed disk. */ + RF_RowCol_t col; /* Col of failed disk. */ + int state; /* + * How far along the reconstruction + * operation has gotten. + */ + RF_RaidDisk_t *spareDiskPtr; /* + * Describes target disk for recon. + * (not used in dist sparing) + */ + int numDisksDone; /* + * The number of surviving disks that + * have completed their work. + */ + RF_RowCol_t srow; /* + * Row ID of the spare disk. + * (not used in dist sparing) + */ + RF_RowCol_t scol; /* + * Col ID of the spare disk. + * (not used in dist sparing) + */ /* * Prevent recon from hogging CPU */ - RF_Etimer_t recon_exec_timer; - RF_uint64 reconExecTimerRunning; - RF_uint64 reconExecTicks; - RF_uint64 maxReconExecTicks; + RF_Etimer_t recon_exec_timer; + RF_uint64 reconExecTimerRunning; + RF_uint64 reconExecTicks; + RF_uint64 maxReconExecTicks; -#if RF_RECON_STATS > 0 - RF_uint64 hsStallCount; /* head sep stall count */ - RF_uint64 numReconExecDelays; - RF_uint64 numReconEventWaits; -#endif /* RF_RECON_STATS > 0 */ +#if RF_RECON_STATS > 0 + RF_uint64 hsStallCount; /* Head sep stall count. */ + RF_uint64 numReconExecDelays; + RF_uint64 numReconEventWaits; +#endif /* RF_RECON_STATS > 0 */ RF_RaidReconDesc_t *next; }; struct RF_RaidAccessDesc_s { - RF_Raid_t *raidPtr; /* raid device descriptor */ - RF_IoType_t type; /* read or write */ - RF_RaidAddr_t raidAddress; /* starting address in raid address - * space */ - RF_SectorCount_t numBlocks; /* number of blocks (sectors) to - * transfer */ - RF_StripeCount_t numStripes; /* number of stripes involved in - * access */ - caddr_t bufPtr; /* pointer to data buffer */ - RF_RaidAccessFlags_t flags; /* flags controlling operation */ - int state; /* index into states telling how far along the - * RAID operation has gotten */ - RF_AccessState_t *states; /* array of states to be run */ - int status; /* pass/fail status of the last operation */ - RF_DagList_t *dagArray; /* array of dag lists, one list per stripe */ - RF_AccessStripeMapHeader_t *asmap; /* the asm for this I/O */ - void *bp; /* buf pointer for this RAID acc. ignored - * outside the kernel */ - RF_DagHeader_t **paramDAG; /* allows the DAG to be returned to - * the caller after I/O completion */ - RF_AccessStripeMapHeader_t **paramASM; /* allows the ASM to be + RF_Raid_t *raidPtr; /* Raid device descriptor. */ + RF_IoType_t type; /* Read or write. */ + RF_RaidAddr_t raidAddress; /* + * Starting address in raid address + * space. + */ + RF_SectorCount_t numBlocks; /* + * Number of blocks (sectors) + * to transfer. + */ + RF_StripeCount_t numStripes; /* + * Number of stripes involved in + * access. + */ + caddr_t bufPtr; /* Pointer to data buffer. */ + RF_RaidAccessFlags_t flags; /* Flags controlling operation. */ + int state; /* + * Index into states telling how far + * along the RAID operation has gotten. + */ + RF_AccessState_t *states; /* Array of states to be run. */ + int status; /* + * Pass/fail status of the last + * operation. + */ + RF_DagList_t *dagArray; /* + * Array of DAG lists, one list + * per stripe. + */ + RF_AccessStripeMapHeader_t *asmap; /* The asm for this I/O. */ + void *bp; /* + * Buffer pointer for this RAID acc. + * Ignored outside the kernel. + */ + RF_DagHeader_t **paramDAG; /* + * Allows the DAG to be returned to + * the caller after I/O completion. + */ + RF_AccessStripeMapHeader_t **paramASM; /* + * Allows the ASM to be * returned to the caller - * after I/O completion */ - RF_AccTraceEntry_t tracerec; /* perf monitoring information for a - * user access (not for dag stats) */ - void (*callbackFunc) (RF_CBParam_t); /* callback function for this - * I/O */ - void *callbackArg; /* arg to give to callback func */ + * after I/O completion. + */ + RF_AccTraceEntry_t tracerec; /* + * Perf monitoring information for a + * user access (not for dag stats). + */ + void (*callbackFunc) (RF_CBParam_t); + /* Callback function for this I/O. */ + void *callbackArg; /* Arg to give to callback func. */ - RF_AllocListElem_t *cleanupList; /* memory to be freed at the - * end of the access */ + RF_AllocListElem_t *cleanupList; /* + * Memory to be freed at the + * end of the access. + */ RF_RaidAccessDesc_t *next; RF_RaidAccessDesc_t *head; - int numPending; + int numPending; - RF_DECLARE_MUTEX(mutex) /* these are used to implement - * blocking I/O */ - RF_DECLARE_COND(cond) - int async_flag; + RF_DECLARE_MUTEX( mutex ); /* + * These are used to implement + * blocking I/O. + */ + RF_DECLARE_COND( cond ); + int async_flag; - RF_Etimer_t timer; /* used for timing this access */ + RF_Etimer_t timer; /* Used for timing this access. */ }; -#endif /* !_RF__RF_DESC_H_ */ + +#endif /* ! _RF__RF_DESC_H_ */ diff --git a/sys/dev/raidframe/rf_diskqueue.c b/sys/dev/raidframe/rf_diskqueue.c index d324f43fadc..e29d25b81a2 100644 --- a/sys/dev/raidframe/rf_diskqueue.c +++ b/sys/dev/raidframe/rf_diskqueue.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_diskqueue.c,v 1.6 2000/08/08 16:07:40 peter Exp $ */ +/* $OpenBSD: rf_diskqueue.c,v 1.7 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_diskqueue.c,v 1.13 2000/03/04 04:22:34 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,44 +28,44 @@ * rights to redistribute these changes. */ -/**************************************************************************** +/***************************************************************************** * - * rf_diskqueue.c -- higher-level disk queue code + * rf_diskqueue.c -- Higher-level disk queue code. * - * the routines here are a generic wrapper around the actual queueing - * routines. The code here implements thread scheduling, synchronization, + * The routines here are a generic wrapper around the actual queueing + * routines. The code here implements thread scheduling, synchronization, * and locking ops (see below) on top of the lower-level queueing code. * - * to support atomic RMW, we implement "locking operations". When a + * To support atomic RMW, we implement "locking operations". When a * locking op is dispatched to the lower levels of the driver, the * queue is locked, and no further I/Os are dispatched until the queue - * receives & completes a corresponding "unlocking operation". This + * receives & completes a corresponding "unlocking operation". This * code relies on the higher layers to guarantee that a locking op - * will always be eventually followed by an unlocking op. The model + * will always be eventually followed by an unlocking op. The model * is that the higher layers are structured so locking and unlocking * ops occur in pairs, i.e. an unlocking op cannot be generated until - * after a locking op reports completion. There is no good way to + * after a locking op reports completion. There is no good way to * check to see that an unlocking op "corresponds" to the op that - * currently has the queue locked, so we make no such attempt. Since + * currently has the queue locked, so we make no such attempt. Since * by definition there can be only one locking op outstanding on a * disk, this should not be a problem. * * In the kernel, we allow multiple I/Os to be concurrently dispatched - * to the disk driver. In order to support locking ops in this + * to the disk driver. In order to support locking ops in this * environment, when we decide to do a locking op, we stop dispatching * new I/Os and wait until all dispatched I/Os have completed before * dispatching the locking op. * * Unfortunately, the code is different in the 3 different operating - * states (user level, kernel, simulator). In the kernel, I/O is + * states (user level, kernel, simulator). In the kernel, I/O is * non-blocking, and we have no disk threads to dispatch for us. * Therefore, we have to dispatch new I/Os to the scsi driver at the - * time of enqueue, and also at the time of completion. At user + * time of enqueue, and also at the time of completion. At user * level, I/O is blocking, and so only the disk threads may dispatch - * I/Os. Thus at user level, all we can do at enqueue time is enqueue + * I/Os. Thus at user level, all we can do at enqueue time is enqueue * and wake up the disk thread to do the dispatch. * - ****************************************************************************/ + *****************************************************************************/ #include "rf_types.h" #include "rf_threadstuff.h" @@ -83,21 +84,33 @@ #include "rf_fifo.h" #include "rf_kintf.h" -static int init_dqd(RF_DiskQueueData_t *); -static void clean_dqd(RF_DiskQueueData_t *); -static void rf_ShutdownDiskQueueSystem(void *); - -#define Dprintf1(s,a) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf2(s,a,b) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf3(s,a,b,c) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL) +int rf_init_dqd(RF_DiskQueueData_t *); +void rf_clean_dqd(RF_DiskQueueData_t *); +void rf_ShutdownDiskQueueSystem(void *); + +#define Dprintf1(s,a) \ + if (rf_queueDebug) \ + rf_debug_printf(s,(void *)((unsigned long)a), \ + NULL,NULL,NULL,NULL,NULL,NULL,NULL) +#define Dprintf2(s,a,b) \ + if (rf_queueDebug) \ + rf_debug_printf(s,(void *)((unsigned long)a), \ + (void *)((unsigned long)b), \ + NULL,NULL,NULL,NULL,NULL,NULL) +#define Dprintf3(s,a,b,c) \ + if (rf_queueDebug) \ + rf_debug_printf(s,(void *)((unsigned long)a), \ + (void *)((unsigned long)b), \ + (void *)((unsigned long)c), \ + NULL,NULL,NULL,NULL,NULL) /***************************************************************************** * - * the disk queue switch defines all the functions used in the + * The disk queue switch defines all the functions used in the * different queueing disciplines queue ID, init routine, enqueue - * routine, dequeue routine + * routine, dequeue routine. * - ****************************************************************************/ + *****************************************************************************/ static RF_DiskQueueSW_t diskqueuesw[] = { {"fifo", /* FIFO */ @@ -105,85 +118,85 @@ static RF_DiskQueueSW_t diskqueuesw[] = { rf_FifoEnqueue, rf_FifoDequeue, rf_FifoPeek, - rf_FifoPromote}, + rf_FifoPromote}, {"cvscan", /* cvscan */ rf_CvscanCreate, rf_CvscanEnqueue, rf_CvscanDequeue, rf_CvscanPeek, - rf_CvscanPromote}, + rf_CvscanPromote}, {"sstf", /* shortest seek time first */ rf_SstfCreate, rf_SstfEnqueue, rf_SstfDequeue, rf_SstfPeek, - rf_SstfPromote}, + rf_SstfPromote}, {"scan", /* SCAN (two-way elevator) */ rf_ScanCreate, rf_SstfEnqueue, rf_ScanDequeue, rf_ScanPeek, - rf_SstfPromote}, + rf_SstfPromote}, {"cscan", /* CSCAN (one-way elevator) */ rf_CscanCreate, rf_SstfEnqueue, rf_CscanDequeue, rf_CscanPeek, - rf_SstfPromote}, + rf_SstfPromote}, }; -#define NUM_DISK_QUEUE_TYPES (sizeof(diskqueuesw)/sizeof(RF_DiskQueueSW_t)) +#define NUM_DISK_QUEUE_TYPES (sizeof(diskqueuesw)/sizeof(RF_DiskQueueSW_t)) static RF_FreeList_t *rf_dqd_freelist; -#define RF_MAX_FREE_DQD 256 -#define RF_DQD_INC 16 -#define RF_DQD_INITIAL 64 +#define RF_MAX_FREE_DQD 256 +#define RF_DQD_INC 16 +#define RF_DQD_INITIAL 64 #include <sys/buf.h> -static int -init_dqd(dqd) - RF_DiskQueueData_t *dqd; +int +rf_init_dqd(RF_DiskQueueData_t *dqd) { - dqd->bp = (struct buf *) malloc(sizeof(struct buf), - M_RAIDFRAME, M_NOWAIT); + dqd->bp = (struct buf *) malloc(sizeof(struct buf), M_RAIDFRAME, + M_NOWAIT); if (dqd->bp == NULL) { return (ENOMEM); } - memset(dqd->bp, 0, sizeof(struct buf)); /* if you don't do it, nobody - * else will.. */ + /* If you don't do it, nobody else will... */ + memset(dqd->bp, 0, sizeof(struct buf)); + return (0); } -static void -clean_dqd(dqd) - RF_DiskQueueData_t *dqd; +void +rf_clean_dqd(RF_DiskQueueData_t *dqd) { free(dqd->bp, M_RAIDFRAME); } -/* configures a single disk queue */ -int +/* Configure a single disk queue. */ +int rf_ConfigureDiskQueue( - RF_Raid_t * raidPtr, - RF_DiskQueue_t * diskqueue, - RF_RowCol_t r, /* row & col -- debug only. BZZT not any - * more... */ - RF_RowCol_t c, - RF_DiskQueueSW_t * p, - RF_SectorCount_t sectPerDisk, - dev_t dev, - int maxOutstanding, - RF_ShutdownList_t ** listp, - RF_AllocListElem_t * clList) + RF_Raid_t *raidPtr, + RF_DiskQueue_t *diskqueue, + /* row & col -- Debug only. BZZT not any more... */ + RF_RowCol_t r, + RF_RowCol_t c, + RF_DiskQueueSW_t *p, + RF_SectorCount_t sectPerDisk, + dev_t dev, + int maxOutstanding, + RF_ShutdownList_t **listp, + RF_AllocListElem_t *clList +) { - int rc; + int rc; diskqueue->row = r; diskqueue->col = c; @@ -202,58 +215,55 @@ rf_ConfigureDiskQueue( diskqueue->rf_cinfo = &raidPtr->raid_cinfo[r][c]; rc = rf_create_managed_mutex(listp, &diskqueue->mutex); if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", + __FILE__, __LINE__, rc); return (rc); } rc = rf_create_managed_cond(listp, &diskqueue->cond); if (rc) { - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", + __FILE__, __LINE__, rc); return (rc); } return (0); } -static void -rf_ShutdownDiskQueueSystem(ignored) - void *ignored; +void +rf_ShutdownDiskQueueSystem(void *ignored) { - RF_FREELIST_DESTROY_CLEAN(rf_dqd_freelist, next, (RF_DiskQueueData_t *), clean_dqd); + RF_FREELIST_DESTROY_CLEAN(rf_dqd_freelist, next, + (RF_DiskQueueData_t *), rf_clean_dqd); } -int -rf_ConfigureDiskQueueSystem(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureDiskQueueSystem(RF_ShutdownList_t **listp) { - int rc; + int rc; - RF_FREELIST_CREATE(rf_dqd_freelist, RF_MAX_FREE_DQD, - RF_DQD_INC, sizeof(RF_DiskQueueData_t)); + RF_FREELIST_CREATE(rf_dqd_freelist, RF_MAX_FREE_DQD, RF_DQD_INC, + sizeof(RF_DiskQueueData_t)); if (rf_dqd_freelist == NULL) return (ENOMEM); rc = rf_ShutdownCreate(listp, rf_ShutdownDiskQueueSystem, NULL); if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); + RF_ERRORMSG3("Unable to add to shutdown list file %s line %d" + " rc=%d\n", __FILE__, __LINE__, rc); rf_ShutdownDiskQueueSystem(NULL); return (rc); } RF_FREELIST_PRIME_INIT(rf_dqd_freelist, RF_DQD_INITIAL, next, - (RF_DiskQueueData_t *), init_dqd); + (RF_DiskQueueData_t *), rf_init_dqd); return (0); } -int -rf_ConfigureDiskQueues( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) +int +rf_ConfigureDiskQueues(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, + RF_Config_t *cfgPtr) { RF_DiskQueue_t **diskQueues, *spareQueues; RF_DiskQueueSW_t *p; RF_RowCol_t r, c; - int rc, i; + int rc, i; raidPtr->maxQueueDepth = cfgPtr->maxOutstandingDiskReqs; @@ -264,29 +274,30 @@ rf_ConfigureDiskQueues( } } if (p == NULL) { - RF_ERRORMSG2("Unknown queue type \"%s\". Using %s\n", cfgPtr->diskQueueType, diskqueuesw[0].queueType); + RF_ERRORMSG2("Unknown queue type \"%s\". Using %s\n", + cfgPtr->diskQueueType, diskqueuesw[0].queueType); p = &diskqueuesw[0]; } raidPtr->qType = p; - RF_CallocAndAdd(diskQueues, raidPtr->numRow, sizeof(RF_DiskQueue_t *), (RF_DiskQueue_t **), raidPtr->cleanupList); + RF_CallocAndAdd(diskQueues, raidPtr->numRow, sizeof(RF_DiskQueue_t *), + (RF_DiskQueue_t **), raidPtr->cleanupList); if (diskQueues == NULL) { return (ENOMEM); } raidPtr->Queues = diskQueues; for (r = 0; r < raidPtr->numRow; r++) { - RF_CallocAndAdd(diskQueues[r], raidPtr->numCol + - ((r == 0) ? RF_MAXSPARE : 0), - sizeof(RF_DiskQueue_t), (RF_DiskQueue_t *), + RF_CallocAndAdd(diskQueues[r], raidPtr->numCol + + ((r == 0) ? RF_MAXSPARE : 0), + sizeof(RF_DiskQueue_t), (RF_DiskQueue_t *), raidPtr->cleanupList); if (diskQueues[r] == NULL) return (ENOMEM); for (c = 0; c < raidPtr->numCol; c++) { rc = rf_ConfigureDiskQueue(raidPtr, &diskQueues[r][c], - r, c, p, - raidPtr->sectorsPerDisk, - raidPtr->Disks[r][c].dev, - cfgPtr->maxOutstandingDiskReqs, - listp, raidPtr->cleanupList); + r, c, p, raidPtr->sectorsPerDisk, + raidPtr->Disks[r][c].dev, + cfgPtr->maxOutstandingDiskReqs, listp, + raidPtr->cleanupList); if (rc) return (rc); } @@ -294,9 +305,8 @@ rf_ConfigureDiskQueues( spareQueues = &raidPtr->Queues[0][raidPtr->numCol]; for (r = 0; r < raidPtr->numSpare; r++) { - rc = rf_ConfigureDiskQueue(raidPtr, &spareQueues[r], - 0, raidPtr->numCol + r, p, - raidPtr->sectorsPerDisk, + rc = rf_ConfigureDiskQueue(raidPtr, &spareQueues[r], 0, + raidPtr->numCol + r, p, raidPtr->sectorsPerDisk, raidPtr->Disks[0][raidPtr->numCol + r].dev, cfgPtr->maxOutstandingDiskReqs, listp, raidPtr->cleanupList); @@ -305,7 +315,9 @@ rf_ConfigureDiskQueues( } return (0); } -/* Enqueue a disk I/O + +/* + * Enqueue a disk I/O * * Unfortunately, we have to do things differently in the different * environments (simulator, user-level, kernel). @@ -314,31 +326,28 @@ rf_ConfigureDiskQueues( * In the kernel, I/O is non-blocking and so we'd like to have multiple * I/Os outstanding on the physical disks when possible. * - * when any request arrives at a queue, we have two choices: + * When any request arrives at a queue, we have two choices: * dispatch it to the lower levels * queue it up * - * kernel rules for when to do what: - * locking request: queue empty => dispatch and lock queue, - * else queue it - * unlocking req : always dispatch it - * normal req : queue empty => dispatch it & set priority - * queue not full & priority is ok => dispatch it - * else queue it + * Kernel rules for when to do what: + * locking request: Queue empty => dispatch and lock queue, + * else queue it. + * unlocking req : Always dispatch it. + * normal req : Queue empty => dispatch it & set priority. + * Queue not full & priority is ok => dispatch it + * else queue it. * - * user-level rules: - * always enqueue. In the special case of an unlocking op, enqueue + * User-level rules: + * Always enqueue. In the special case of an unlocking op, enqueue * in a special way that will cause the unlocking op to be the next * thing dequeued. * - * simulator rules: + * Simulator rules: * Do the same as at user level, with the sleeps and wakeups suppressed. */ -void -rf_DiskIOEnqueue(queue, req, pri) - RF_DiskQueue_t *queue; - RF_DiskQueueData_t *req; - int pri; +void +rf_DiskIOEnqueue(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req, int pri) { RF_ETIMER_START(req->qtime); RF_ASSERT(req->type == RF_IO_TYPE_NOP || req->numSector); @@ -348,148 +357,208 @@ rf_DiskIOEnqueue(queue, req, pri) printf("Warning: Enqueueing zero-sector access\n"); } /* - * kernel + * Kernel. */ RF_LOCK_QUEUE_MUTEX(queue, "DiskIOEnqueue"); - /* locking request */ + /* Locking request. */ if (RF_LOCKING_REQ(req)) { if (RF_QUEUE_EMPTY(queue)) { - Dprintf3("Dispatching pri %d locking op to r %d c %d (queue empty)\n", pri, queue->row, queue->col); + Dprintf3("Dispatching pri %d locking op to r %d c %d" + " (queue empty)\n", pri, queue->row, queue->col); RF_LOCK_QUEUE(queue); rf_DispatchKernelIO(queue, req); } else { - queue->queueLength++; /* increment count of number - * of requests waiting in this - * queue */ - Dprintf3("Enqueueing pri %d locking op to r %d c %d (queue not empty)\n", pri, queue->row, queue->col); + /* + * Increment count of number of requests waiting + * in this queue. + */ + queue->queueLength++; + Dprintf3("Enqueueing pri %d locking op to r %d c %d" + " (queue not empty)\n", pri, queue->row, + queue->col); req->queue = (void *) queue; (queue->qPtr->Enqueue) (queue->qHdr, req, pri); } - } - /* unlocking request */ - else - if (RF_UNLOCKING_REQ(req)) { /* we'll do the actual unlock - * when this I/O completes */ - Dprintf3("Dispatching pri %d unlocking op to r %d c %d\n", pri, queue->row, queue->col); + } else { + /* Unlocking request. */ + if (RF_UNLOCKING_REQ(req)) { + /* + * We'll do the actual unlock when this + * I/O completes. + */ + Dprintf3("Dispatching pri %d unlocking op to r %d" + " c %d\n", pri, queue->row, queue->col); RF_ASSERT(RF_QUEUE_LOCKED(queue)); rf_DispatchKernelIO(queue, req); - } - /* normal request */ - else + } else { + /* Normal request. */ if (RF_OK_TO_DISPATCH(queue, req)) { - Dprintf3("Dispatching pri %d regular op to r %d c %d (ok to dispatch)\n", pri, queue->row, queue->col); + Dprintf3("Dispatching pri %d regular op to" + " r %d c %d (ok to dispatch)\n", pri, + queue->row, queue->col); rf_DispatchKernelIO(queue, req); } else { - queue->queueLength++; /* increment count of - * number of requests - * waiting in this queue */ - Dprintf3("Enqueueing pri %d regular op to r %d c %d (not ok to dispatch)\n", pri, queue->row, queue->col); + /* + * Increment count of number of requests + * waiting in this queue. + */ + queue->queueLength++; + Dprintf3("Enqueueing pri %d regular op to" + " r %d c %d (not ok to dispatch)\n", pri, + queue->row, queue->col); req->queue = (void *) queue; (queue->qPtr->Enqueue) (queue->qHdr, req, pri); } + } + } RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOEnqueue"); } -/* get the next set of I/Os started, kernel version only */ -void -rf_DiskIOComplete(queue, req, status) - RF_DiskQueue_t *queue; - RF_DiskQueueData_t *req; - int status; +/* Get the next set of I/Os started, kernel version only. */ +void +rf_DiskIOComplete(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req, int status) { - int done = 0; + int done = 0; RF_LOCK_QUEUE_MUTEX(queue, "DiskIOComplete"); - /* unlock the queue: (1) after an unlocking req completes (2) after a - * locking req fails */ + /* + * Unlock the queue: + * (1) after an unlocking req completes. + * (2) after a locking req fails. + */ if (RF_UNLOCKING_REQ(req) || (RF_LOCKING_REQ(req) && status)) { - Dprintf2("DiskIOComplete: unlocking queue at r %d c %d\n", queue->row, queue->col); - RF_ASSERT(RF_QUEUE_LOCKED(queue) && (queue->unlockingOp == NULL)); + Dprintf2("DiskIOComplete: unlocking queue at r %d c %d\n", + queue->row, queue->col); + RF_ASSERT(RF_QUEUE_LOCKED(queue) && + (queue->unlockingOp == NULL)); RF_UNLOCK_QUEUE(queue); } queue->numOutstanding--; RF_ASSERT(queue->numOutstanding >= 0); - /* dispatch requests to the disk until we find one that we can't. */ - /* no reason to continue once we've filled up the queue */ - /* no reason to even start if the queue is locked */ + /* + * Dispatch requests to the disk until we find one that we can't. + * No reason to continue once we've filled up the queue. + * No reason to even start if the queue is locked. + */ while (!done && !RF_QUEUE_FULL(queue) && !RF_QUEUE_LOCKED(queue)) { if (queue->nextLockingOp) { req = queue->nextLockingOp; queue->nextLockingOp = NULL; - Dprintf3("DiskIOComplete: a pri %d locking req was pending at r %d c %d\n", req->priority, queue->row, queue->col); + Dprintf3("DiskIOComplete: a pri %d locking req was" + " pending at r %d c %d\n", req->priority, + queue->row, queue->col); } else { req = (queue->qPtr->Dequeue) (queue->qHdr); if (req != NULL) { - Dprintf3("DiskIOComplete: extracting pri %d req from queue at r %d c %d\n", req->priority, queue->row, queue->col); + Dprintf3("DiskIOComplete: extracting pri %d" + " req from queue at r %d c %d\n", + req->priority, queue->row, queue->col); } else { - Dprintf1("DiskIOComplete: no more requests to extract.\n", ""); + Dprintf1("DiskIOComplete: no more requests" + " to extract.\n", ""); } } if (req) { - queue->queueLength--; /* decrement count of number - * of requests waiting in this - * queue */ + /* + * Decrement count of number of requests waiting + * in this queue. + */ + queue->queueLength--; RF_ASSERT(queue->queueLength >= 0); } if (!req) done = 1; - else + else { if (RF_LOCKING_REQ(req)) { - if (RF_QUEUE_EMPTY(queue)) { /* dispatch it */ - Dprintf3("DiskIOComplete: dispatching pri %d locking req to r %d c %d (queue empty)\n", req->priority, queue->row, queue->col); + if (RF_QUEUE_EMPTY(queue)) { + /* Dispatch it. */ + Dprintf3("DiskIOComplete: dispatching" + " pri %d locking req to r %d c %d" + " (queue empty)\n", req->priority, + queue->row, queue->col); RF_LOCK_QUEUE(queue); rf_DispatchKernelIO(queue, req); done = 1; - } else { /* put it aside to wait for - * the queue to drain */ - Dprintf3("DiskIOComplete: postponing pri %d locking req to r %d c %d\n", req->priority, queue->row, queue->col); + } else { + /* + * Put it aside to wait for + * the queue to drain. + */ + Dprintf3("DiskIOComplete: postponing" + " pri %d locking req to r %d" + " c %d\n", req->priority, + queue->row, queue->col); RF_ASSERT(queue->nextLockingOp == NULL); queue->nextLockingOp = req; done = 1; } - } else - if (RF_UNLOCKING_REQ(req)) { /* should not happen: - * unlocking ops should - * not get queued */ - RF_ASSERT(RF_QUEUE_LOCKED(queue)); /* support it anyway for - * the future */ - Dprintf3("DiskIOComplete: dispatching pri %d unl req to r %d c %d (SHOULD NOT SEE THIS)\n", req->priority, queue->row, queue->col); + } else { + if (RF_UNLOCKING_REQ(req)) { + /* + * Should not happen: + * Unlocking ops should not get queued. + */ + /* Support it anyway for the future. */ + RF_ASSERT(RF_QUEUE_LOCKED(queue)); + Dprintf3("DiskIOComplete: dispatching" + " pri %d unl req to r %d c %d" + " (SHOULD NOT SEE THIS)\n", + req->priority, queue->row, + queue->col); rf_DispatchKernelIO(queue, req); done = 1; - } else + } else { if (RF_OK_TO_DISPATCH(queue, req)) { - Dprintf3("DiskIOComplete: dispatching pri %d regular req to r %d c %d (ok to dispatch)\n", req->priority, queue->row, queue->col); + Dprintf3("DiskIOComplete:" + " dispatching pri %d" + " regular req to r %d" + " c %d (ok to dispatch)\n", + req->priority, queue->row, + queue->col); rf_DispatchKernelIO(queue, req); - } else { /* we can't dispatch it, - * so just re-enqueue - * it. */ - /* potential trouble here if - * disk queues batch reqs */ - Dprintf3("DiskIOComplete: re-enqueueing pri %d regular req to r %d c %d\n", req->priority, queue->row, queue->col); + } else { + /* + * We can't dispatch it, + * so just re-enqueue + * it. + */ + /* + * Potential trouble here if + * disk queues batch reqs. + */ + Dprintf3("DiskIOComplete:" + " re-enqueueing pri %d" + " regular req to r %d" + " c %d\n", req->priority, + queue->row, queue->col); queue->queueLength++; - (queue->qPtr->Enqueue) (queue->qHdr, req, req->priority); + (queue->qPtr->Enqueue) + (queue->qHdr, req, + req->priority); done = 1; } + } + } + } } RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOComplete"); } -/* promotes accesses tagged with the given parityStripeID from low priority - * to normal priority. This promotion is optional, meaning that a queue - * need not implement it. If there is no promotion routine associated with + +/* Promote accesses tagged with the given parityStripeID from low priority + * to normal priority. This promotion is optional, meaning that a queue + * need not implement it. If there is no promotion routine associated with * a queue, this routine does nothing and returns -1. */ -int -rf_DiskIOPromote(queue, parityStripeID, which_ru) - RF_DiskQueue_t *queue; - RF_StripeNum_t parityStripeID; - RF_ReconUnitNum_t which_ru; +int +rf_DiskIOPromote(RF_DiskQueue_t *queue, RF_StripeNum_t parityStripeID, + RF_ReconUnitNum_t which_ru) { - int retval; + int retval; if (!queue->qPtr->Promote) return (-1); @@ -501,23 +570,25 @@ rf_DiskIOPromote(queue, parityStripeID, which_ru) RF_DiskQueueData_t * rf_CreateDiskQueueData( - RF_IoType_t typ, - RF_SectorNum_t ssect, - RF_SectorCount_t nsect, - caddr_t buf, - RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru, - int (*wakeF) (void *, int), - void *arg, - RF_DiskQueueData_t * next, - RF_AccTraceEntry_t * tracerec, - void *raidPtr, - RF_DiskQueueDataFlags_t flags, - void *kb_proc) + RF_IoType_t typ, + RF_SectorNum_t ssect, + RF_SectorCount_t nsect, + caddr_t buf, + RF_StripeNum_t parityStripeID, + RF_ReconUnitNum_t which_ru, + int (*wakeF) (void *, int), + void *arg, + RF_DiskQueueData_t *next, + RF_AccTraceEntry_t *tracerec, + void *raidPtr, + RF_DiskQueueDataFlags_t flags, + void *kb_proc +) { RF_DiskQueueData_t *p; - RF_FREELIST_GET_INIT(rf_dqd_freelist, p, next, (RF_DiskQueueData_t *), init_dqd); + RF_FREELIST_GET_INIT(rf_dqd_freelist, p, next, (RF_DiskQueueData_t *), + rf_init_dqd); p->sectorOffset = ssect + rf_protectedSectors; p->numSector = nsect; @@ -540,26 +611,28 @@ rf_CreateDiskQueueData( RF_DiskQueueData_t * rf_CreateDiskQueueDataFull( - RF_IoType_t typ, - RF_SectorNum_t ssect, - RF_SectorCount_t nsect, - caddr_t buf, - RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru, - int (*wakeF) (void *, int), - void *arg, - RF_DiskQueueData_t * next, - RF_AccTraceEntry_t * tracerec, - int priority, - int (*AuxFunc) (void *,...), - caddr_t buf2, - void *raidPtr, - RF_DiskQueueDataFlags_t flags, - void *kb_proc) + RF_IoType_t typ, + RF_SectorNum_t ssect, + RF_SectorCount_t nsect, + caddr_t buf, + RF_StripeNum_t parityStripeID, + RF_ReconUnitNum_t which_ru, + int (*wakeF) (void *, int), + void *arg, + RF_DiskQueueData_t *next, + RF_AccTraceEntry_t *tracerec, + int priority, + int (*AuxFunc) (void *,...), + caddr_t buf2, + void *raidPtr, + RF_DiskQueueDataFlags_t flags, + void *kb_proc +) { RF_DiskQueueData_t *p; - RF_FREELIST_GET_INIT(rf_dqd_freelist, p, next, (RF_DiskQueueData_t *), init_dqd); + RF_FREELIST_GET_INIT(rf_dqd_freelist, p, next, (RF_DiskQueueData_t *), + rf_init_dqd); p->sectorOffset = ssect + rf_protectedSectors; p->numSector = nsect; @@ -580,9 +653,8 @@ rf_CreateDiskQueueDataFull( return (p); } -void -rf_FreeDiskQueueData(p) - RF_DiskQueueData_t *p; +void +rf_FreeDiskQueueData(RF_DiskQueueData_t *p) { - RF_FREELIST_FREE_CLEAN(rf_dqd_freelist, p, next, clean_dqd); + RF_FREELIST_FREE_CLEAN(rf_dqd_freelist, p, next, rf_clean_dqd); } diff --git a/sys/dev/raidframe/rf_diskqueue.h b/sys/dev/raidframe/rf_diskqueue.h index fb8e09927bd..d22e791b4f6 100644 --- a/sys/dev/raidframe/rf_diskqueue.h +++ b/sys/dev/raidframe/rf_diskqueue.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_diskqueue.h,v 1.4 2000/08/08 16:07:40 peter Exp $ */ +/* $OpenBSD: rf_diskqueue.h,v 1.5 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_diskqueue.h,v 1.5 2000/02/13 04:53:57 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,17 +28,17 @@ * rights to redistribute these changes. */ -/***************************************************************************************** +/***************************************************************************** * - * rf_diskqueue.h -- header file for disk queues + * rf_diskqueue.h -- Header file for disk queues. * - * see comments in rf_diskqueue.c + * See comments in rf_diskqueue.c * - ****************************************************************************************/ + *****************************************************************************/ -#ifndef _RF__RF_DISKQUEUE_H_ -#define _RF__RF_DISKQUEUE_H_ +#ifndef _RF__RF_DISKQUEUE_H_ +#define _RF__RF_DISKQUEUE_H_ #include "rf_threadstuff.h" #include "rf_acctrace.h" @@ -46,168 +47,206 @@ #include "rf_etimer.h" -#if defined(__NetBSD__) +#if defined(__NetBSD__) #include "rf_netbsd.h" -#elif defined(__OpenBSD__) +#elif defined(__OpenBSD__) #include "rf_openbsd.h" #endif -#define RF_IO_NORMAL_PRIORITY 1 -#define RF_IO_LOW_PRIORITY 0 +#define RF_IO_NORMAL_PRIORITY 1 +#define RF_IO_LOW_PRIORITY 0 -/* the data held by a disk queue entry */ +/* The data held by a disk queue entry. */ struct RF_DiskQueueData_s { - RF_SectorNum_t sectorOffset; /* sector offset into the disk */ - RF_SectorCount_t numSector; /* number of sectors to read/write */ - RF_IoType_t type; /* read/write/nop */ - caddr_t buf; /* buffer pointer */ - RF_StripeNum_t parityStripeID; /* the RAID parity stripe ID this - * access is for */ - RF_ReconUnitNum_t which_ru; /* which RU within this parity stripe */ - int priority; /* the priority of this request */ - int (*CompleteFunc) (void *, int); /* function to be called upon - * completion */ - int (*AuxFunc) (void *,...); /* function called upon - * completion of the first I/O - * of a Read_Op_Write pair */ - void *argument; /* argument to be passed to CompleteFunc */ - RF_Raid_t *raidPtr; /* needed for simulation */ - RF_AccTraceEntry_t *tracerec; /* perf mon only */ - RF_Etimer_t qtime; /* perf mon only - time request is in queue */ - long entryTime; + RF_SectorNum_t sectorOffset; /* Sector offset into the disk. */ + RF_SectorCount_t numSector; /* Number of sectors to read/write. */ + RF_IoType_t type; /* Read/write/nop. */ + caddr_t buf; /* Buffer pointer. */ + RF_StripeNum_t parityStripeID; + /* + * The RAID parity stripe ID this + * access is for. + */ + RF_ReconUnitNum_t which_ru; /* Which RU within this parity stripe */ + int priority; /* The priority of this request. */ + int (*CompleteFunc) (void *, int); + /* + * Function to be called upon + * completion. + */ + int (*AuxFunc) (void *,...); + /* + * Function called upon completion + * of the first I/O of a Read_Op_Write + * pair. + */ + void *argument; /* + * Argument to be passed to + * CompleteFunc. + */ + RF_Raid_t *raidPtr; /* Needed for simulation. */ + RF_AccTraceEntry_t *tracerec; /* Perf mon only. */ + RF_Etimer_t qtime; /* + * Perf mon only - time request is + * in queue. + */ + long entryTime; RF_DiskQueueData_t *next; RF_DiskQueueData_t *prev; - caddr_t buf2; /* for read-op-write */ - dev_t dev; /* the device number for in-kernel version */ - RF_DiskQueue_t *queue; /* the disk queue to which this req is - * targeted */ - RF_DiskQueueDataFlags_t flags; /* flags controlling operation */ - - struct proc *b_proc; /* the b_proc from the original bp passed into - * the driver for this I/O */ - struct buf *bp; /* a bp to use to get this I/O done */ + caddr_t buf2; /* For read-op-write. */ + dev_t dev; /* + * The device number for in-kernel + * version. + */ + RF_DiskQueue_t *queue; /* + * The disk queue to which this req + * is targeted. + */ + RF_DiskQueueDataFlags_t flags; /* Flags controlling operation. */ + + struct proc *b_proc; /* + * The b_proc from the original bp + * passed into the driver for this I/O. + */ + struct buf *bp; /* A bp to use to get this I/O done. */ }; -#define RF_LOCK_DISK_QUEUE 0x01 -#define RF_UNLOCK_DISK_QUEUE 0x02 +#define RF_LOCK_DISK_QUEUE 0x01 +#define RF_UNLOCK_DISK_QUEUE 0x02 -/* note: "Create" returns type-specific queue header pointer cast to (void *) */ +/* + * Note: "Create" returns type-specific queue header pointer cast to (void *). + */ struct RF_DiskQueueSW_s { RF_DiskQueueType_t queueType; - void *(*Create) (RF_SectorCount_t, RF_AllocListElem_t *, RF_ShutdownList_t **); /* creation routine -- - * one call per queue in - * system */ - void (*Enqueue) (void *, RF_DiskQueueData_t *, int); /* enqueue routine */ - RF_DiskQueueData_t *(*Dequeue) (void *); /* dequeue routine */ - RF_DiskQueueData_t *(*Peek) (void *); /* peek at head of queue */ - - /* the rest are optional: they improve performance, but the driver - * will deal with it if they don't exist */ - int (*Promote) (void *, RF_StripeNum_t, RF_ReconUnitNum_t); /* promotes priority of - * tagged accesses */ + void *(*Create) (RF_SectorCount_t, RF_AllocListElem_t *, + RF_ShutdownList_t **); + /* + * Creation routine -- one call per + * queue in system. + */ + void (*Enqueue) (void *, RF_DiskQueueData_t *, int); + /* Enqueue routine. */ + RF_DiskQueueData_t *(*Dequeue) (void *); + /* Dequeue routine. */ + RF_DiskQueueData_t *(*Peek) (void *); + /* Peek at head of queue. */ + + /* + * The rest are optional: they improve performance, but the driver + * will deal with it if they don't exist. + */ + int (*Promote) (void *, RF_StripeNum_t, RF_ReconUnitNum_t); + /* + * Promotes priority of tagged + * accesses. + */ }; struct RF_DiskQueue_s { - RF_DiskQueueSW_t *qPtr; /* access point to queue functions */ - void *qHdr; /* queue header, of whatever type */ - RF_DECLARE_MUTEX(mutex) /* mutex locking data structures */ - RF_DECLARE_COND(cond) /* condition variable for - * synchronization */ - long numOutstanding; /* number of I/Os currently outstanding on - * disk */ - long maxOutstanding; /* max # of I/Os that can be outstanding on a - * disk (in-kernel only) */ - int curPriority; /* the priority of accs all that are currently - * outstanding */ - long queueLength; /* number of requests in queue */ - RF_DiskQueueData_t *nextLockingOp; /* a locking op that has - * arrived at the head of the - * queue & is waiting for - * drainage */ - RF_DiskQueueData_t *unlockingOp; /* used at user level to - * communicate unlocking op - * b/w user (or dag exec) & - * disk threads */ - int numWaiting; /* number of threads waiting on this variable. - * user-level only */ - RF_DiskQueueFlags_t flags; /* terminate, locked */ - RF_Raid_t *raidPtr; /* associated array */ - dev_t dev; /* device number for kernel version */ - RF_SectorNum_t last_deq_sector; /* last sector number dequeued or - * dispatched */ - int row, col; /* debug only */ - struct raidcinfo *rf_cinfo; /* disks component info.. */ + RF_DiskQueueSW_t *qPtr; /* Access point to queue functions. */ + void *qHdr; /* Queue header, of whatever type. */ + RF_DECLARE_MUTEX(mutex); /* Mutex locking data structures. */ + RF_DECLARE_COND(cond); /* + * Condition variable for + * synchronization. + */ + long numOutstanding; + /* + * Number of I/Os currently + * outstanding on disk. + */ + long maxOutstanding; + /* + * Max number of I/Os that can be + * outstanding on a disk. + * (in-kernel only) + */ + int curPriority; /* + * The priority of accs all that are + * currently outstanding. + */ + long queueLength; /* Number of requests in queue. */ + RF_DiskQueueData_t *nextLockingOp; + /* + * A locking op that has arrived at + * the head of the queue & is waiting + * for drainage. + */ + RF_DiskQueueData_t *unlockingOp;/* + * Used at user level to communicate + * unlocking op b/w user (or dag exec) + * & disk threads. + */ + int numWaiting; /* + * Number of threads waiting on + * this variable. + * (user-level only) + */ + RF_DiskQueueFlags_t flags; /* Terminate, locked. */ + RF_Raid_t *raidPtr; /* Associated array. */ + dev_t dev; /* Device number for kernel version. */ + RF_SectorNum_t last_deq_sector; + /* + * Last sector number dequeued or + * dispatched. + */ + int row, col; /* Debug only. */ + struct raidcinfo *rf_cinfo; /* Disks component info... */ }; -#define RF_DQ_LOCKED 0x02 /* no new accs allowed until queue is - * explicitly unlocked */ - -/* macros setting & returning information about queues and requests */ -#define RF_QUEUE_LOCKED(_q) ((_q)->flags & RF_DQ_LOCKED) -#define RF_QUEUE_EMPTY(_q) (((_q)->numOutstanding == 0) && ((_q)->nextLockingOp == NULL) && !RF_QUEUE_LOCKED(_q)) -#define RF_QUEUE_FULL(_q) ((_q)->numOutstanding == (_q)->maxOutstanding) - -#define RF_LOCK_QUEUE(_q) (_q)->flags |= RF_DQ_LOCKED -#define RF_UNLOCK_QUEUE(_q) (_q)->flags &= ~RF_DQ_LOCKED - -#define RF_LOCK_QUEUE_MUTEX(_q_,_wh_) RF_LOCK_MUTEX((_q_)->mutex) -#define RF_UNLOCK_QUEUE_MUTEX(_q_,_wh_) RF_UNLOCK_MUTEX((_q_)->mutex) - -#define RF_LOCKING_REQ(_r) ((_r)->flags & RF_LOCK_DISK_QUEUE) -#define RF_UNLOCKING_REQ(_r) ((_r)->flags & RF_UNLOCK_DISK_QUEUE) - -/* whether it is ok to dispatch a regular request */ -#define RF_OK_TO_DISPATCH(_q_,_r_) \ - (RF_QUEUE_EMPTY(_q_) || \ - (!RF_QUEUE_FULL(_q_) && ((_r_)->priority >= (_q_)->curPriority))) - -int rf_ConfigureDiskQueueSystem(RF_ShutdownList_t ** listp); - -void rf_TerminateDiskQueues(RF_Raid_t * raidPtr); - -int -rf_ConfigureDiskQueues(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); - -void rf_DiskIOEnqueue(RF_DiskQueue_t * queue, RF_DiskQueueData_t * req, int pri); - - -void rf_DiskIOComplete(RF_DiskQueue_t * queue, RF_DiskQueueData_t * req, int status); - -int -rf_DiskIOPromote(RF_DiskQueue_t * queue, RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru); - -RF_DiskQueueData_t * -rf_CreateDiskQueueData(RF_IoType_t typ, RF_SectorNum_t ssect, - RF_SectorCount_t nsect, caddr_t buf, - RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru, - int (*wakeF) (void *, int), - void *arg, RF_DiskQueueData_t * next, - RF_AccTraceEntry_t * tracerec, - void *raidPtr, RF_DiskQueueDataFlags_t flags, - void *kb_proc); - -RF_DiskQueueData_t * -rf_CreateDiskQueueDataFull(RF_IoType_t typ, RF_SectorNum_t ssect, - RF_SectorCount_t nsect, caddr_t buf, - RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru, - int (*wakeF) (void *, int), - void *arg, RF_DiskQueueData_t * next, - RF_AccTraceEntry_t * tracerec, - int priority, int (*AuxFunc) (void *,...), - caddr_t buf2, void *raidPtr, - RF_DiskQueueDataFlags_t flags, void *kb_proc); - -void -rf_FreeDiskQueueData(RF_DiskQueueData_t * p); - -int -rf_ConfigureDiskQueue(RF_Raid_t *, RF_DiskQueue_t *, RF_RowCol_t, - RF_RowCol_t, RF_DiskQueueSW_t *, - RF_SectorCount_t, dev_t, int, - RF_ShutdownList_t **, - RF_AllocListElem_t *); - -#endif /* !_RF__RF_DISKQUEUE_H_ */ + +/* No new accs allowed until queue is explicitly unlocked. */ +#define RF_DQ_LOCKED 0x02 + +/* Macros setting & returning information about queues and requests. */ +#define RF_QUEUE_LOCKED(_q) ((_q)->flags & RF_DQ_LOCKED) +#define RF_QUEUE_EMPTY(_q) (((_q)->numOutstanding == 0) && \ + ((_q)->nextLockingOp == NULL) && \ + !RF_QUEUE_LOCKED(_q)) +#define RF_QUEUE_FULL(_q) ((_q)->numOutstanding == \ + (_q)->maxOutstanding) + +#define RF_LOCK_QUEUE(_q) (_q)->flags |= RF_DQ_LOCKED +#define RF_UNLOCK_QUEUE(_q) (_q)->flags &= ~RF_DQ_LOCKED + +#define RF_LOCK_QUEUE_MUTEX(_q_,_wh_) RF_LOCK_MUTEX((_q_)->mutex) +#define RF_UNLOCK_QUEUE_MUTEX(_q_,_wh_) RF_UNLOCK_MUTEX((_q_)->mutex) + +#define RF_LOCKING_REQ(_r) ((_r)->flags & RF_LOCK_DISK_QUEUE) +#define RF_UNLOCKING_REQ(_r) ((_r)->flags & RF_UNLOCK_DISK_QUEUE) + +/* Whether it is ok to dispatch a regular request. */ +#define RF_OK_TO_DISPATCH(_q_,_r_) \ + (RF_QUEUE_EMPTY(_q_) || \ + ( !RF_QUEUE_FULL(_q_) && ((_r_)->priority >= (_q_)->curPriority))) + +int rf_ConfigureDiskQueueSystem(RF_ShutdownList_t **); + +void rf_TerminateDiskQueues(RF_Raid_t *); + +int rf_ConfigureDiskQueues(RF_ShutdownList_t **, RF_Raid_t *, RF_Config_t *); + +void rf_DiskIOEnqueue(RF_DiskQueue_t *, RF_DiskQueueData_t *, int); + +void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int); + +int rf_DiskIOPromote(RF_DiskQueue_t *, RF_StripeNum_t, RF_ReconUnitNum_t); + +RF_DiskQueueData_t *rf_CreateDiskQueueData(RF_IoType_t, RF_SectorNum_t, + RF_SectorCount_t, caddr_t, RF_StripeNum_t, RF_ReconUnitNum_t, + int (*) (void *, int), void *, RF_DiskQueueData_t *, + RF_AccTraceEntry_t *, void *, RF_DiskQueueDataFlags_t, void *); + +RF_DiskQueueData_t *rf_CreateDiskQueueDataFull(RF_IoType_t, RF_SectorNum_t, + RF_SectorCount_t, caddr_t, RF_StripeNum_t, RF_ReconUnitNum_t, + int (*) (void *, int), void *, RF_DiskQueueData_t *, + RF_AccTraceEntry_t *, int, int (*) (void *,...), caddr_t, void *, + RF_DiskQueueDataFlags_t, void *); + +void rf_FreeDiskQueueData(RF_DiskQueueData_t *); + +int rf_ConfigureDiskQueue(RF_Raid_t *, RF_DiskQueue_t *, RF_RowCol_t, + RF_RowCol_t, RF_DiskQueueSW_t *, RF_SectorCount_t, dev_t, int, + RF_ShutdownList_t **, RF_AllocListElem_t *); + +#endif /* ! _RF__RF_DISKQUEUE_H_ */ diff --git a/sys/dev/raidframe/rf_disks.c b/sys/dev/raidframe/rf_disks.c index 80aaffd87d8..f9ba8433298 100644 --- a/sys/dev/raidframe/rf_disks.c +++ b/sys/dev/raidframe/rf_disks.c @@ -1,6 +1,7 @@ -/* $OpenBSD: rf_disks.c,v 1.6 2000/08/08 16:07:40 peter Exp $ */ +/* $OpenBSD: rf_disks.c,v 1.7 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_disks.c,v 1.31 2000/06/02 01:17:14 oster Exp $ */ -/*- + +/* * Copyright (c) 1999 The NetBSD Foundation, Inc. * All rights reserved. * @@ -17,8 +18,8 @@ * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. * 4. Neither the name of The NetBSD Foundation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. @@ -63,7 +64,7 @@ */ /*************************************************************** - * rf_disks.c -- code to perform operations on the actual disks + * rf_disks.c -- Code to perform operations on the actual disks. ***************************************************************/ #include "rf_types.h" @@ -87,59 +88,56 @@ #include <sys/proc.h> #include <sys/ioctl.h> #include <sys/fcntl.h> -#ifdef __NETBSD__ +#ifdef __NETBSD__ #include <sys/vnode.h> -#endif +#endif /* __NETBSD__ */ -static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *); -static void rf_print_label_status( RF_Raid_t *, int, int, char *, - RF_ComponentLabel_t *); -static int rf_check_label_vitals( RF_Raid_t *, int, int, char *, - RF_ComponentLabel_t *, int, int ); +int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *); +void rf_print_label_status(RF_Raid_t *, int, int, char *, + RF_ComponentLabel_t *); +int rf_check_label_vitals(RF_Raid_t *, int, int, char *, + RF_ComponentLabel_t *, int, int); -#define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f) -#define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g) +#define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f) +#define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g) /**************************************************************************** * - * initialize the disks comprising the array + * Initialize the disks comprising the array. * - * We want the spare disks to have regular row,col numbers so that we can - * easily substitue a spare for a failed disk. But, the driver code assumes - * throughout that the array contains numRow by numCol _non-spare_ disks, so - * it's not clear how to fit in the spares. This is an unfortunate holdover - * from raidSim. The quick and dirty fix is to make row zero bigger than the - * rest, and put all the spares in it. This probably needs to get changed + * We want the spare disks to have regular row,col numbers so that we can + * easily substitue a spare for a failed disk. But, the driver code assumes + * throughout that the array contains numRow by numCol _non-spare_ disks, so + * it's not clear how to fit in the spares. This is an unfortunate holdover + * from raidSim. The quick and dirty fix is to make row zero bigger than the + * rest, and put all the spares in it. This probably needs to get changed * eventually. * ****************************************************************************/ -int -rf_ConfigureDisks( listp, raidPtr, cfgPtr ) - RF_ShutdownList_t **listp; - RF_Raid_t *raidPtr; - RF_Config_t *cfgPtr; +int +rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, + RF_Config_t *cfgPtr) { RF_RaidDisk_t **disks; RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; RF_RowCol_t r, c; - int bs, ret; + int bs, ret; unsigned i, count, foundone = 0, numFailuresThisRow; - int force; + int force; force = cfgPtr->force; - + ret = rf_AllocDiskStructures(raidPtr, cfgPtr); if (ret) goto fail; disks = raidPtr->Disks; - + for (r = 0; r < raidPtr->numRow; r++) { numFailuresThisRow = 0; for (c = 0; c < raidPtr->numCol; c++) { - ret = rf_ConfigureDisk(raidPtr, - &cfgPtr->devnames[r][c][0], - &disks[r][c], r, c); + ret = rf_ConfigureDisk(raidPtr, + &cfgPtr->devnames[r][c][0], &disks[r][c], r, c); if (ret) goto fail; @@ -156,22 +154,27 @@ rf_ConfigureDisks( listp, raidPtr, cfgPtr ) } else { if (disks[r][c].numBlocks < min_numblks) min_numblks = disks[r][c].numBlocks; - DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", + DPRINTF7("Disk at row %d col %d: dev %s" + " numBlocks %ld blockSize %d (%ld MB)\n", r, c, disks[r][c].devname, (long int) disks[r][c].numBlocks, disks[r][c].blockSize, (long int) disks[r][c].numBlocks * - disks[r][c].blockSize / 1024 / 1024); + disks[r][c].blockSize / 1024 / 1024); } } - /* XXX fix for n-fault tolerant */ - /* XXX this should probably check to see how many failures - we can handle for this configuration! */ + /* XXX Fix for n-fault tolerant. */ + /* + * XXX This should probably check to see how many failures + * we can handle for this configuration ! + */ if (numFailuresThisRow > 0) raidPtr->status[r] = rf_rs_degraded; } - /* all disks must be the same size & have the same block size, bs must - * be a power of 2 */ + /* + * All disks must be the same size & have the same block size, bs must + * be a power of 2. + */ bs = 0; for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) { for (c = 0; !foundone && c < raidPtr->numCol; c++) { @@ -182,7 +185,8 @@ rf_ConfigureDisks( listp, raidPtr, cfgPtr ) } } if (!foundone) { - RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n"); + RF_ERRORMSG("RAIDFRAME: Did not find any live disks in" + " the array.\n"); ret = EINVAL; goto fail; } @@ -190,32 +194,36 @@ rf_ConfigureDisks( listp, raidPtr, cfgPtr ) if (bs & i) count++; if (count != 1) { - RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs); + RF_ERRORMSG1("Error: block size on disks (%d) must be a" + " power of 2.\n", bs); ret = EINVAL; goto fail; } - if (rf_CheckLabels( raidPtr, cfgPtr )) { + if (rf_CheckLabels(raidPtr, cfgPtr)) { printf("raid%d: There were fatal errors\n", raidPtr->raidid); if (force != 0) { printf("raid%d: Fatal errors being ignored.\n", - raidPtr->raidid); + raidPtr->raidid); } else { ret = EINVAL; goto fail; - } + } } for (r = 0; r < raidPtr->numRow; r++) { for (c = 0; c < raidPtr->numCol; c++) { if (disks[r][c].status == rf_ds_optimal) { if (disks[r][c].blockSize != bs) { - RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c); + RF_ERRORMSG2("Error: block size of" + " disk at r %d c %d different from" + " disk at r 0 c 0.\n", r, c); ret = EINVAL; goto fail; } if (disks[r][c].numBlocks != min_numblks) { - RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n", + RF_ERRORMSG3("WARNING: truncating disk" + " at r %d c %d to %d blocks.\n", r, c, (int) min_numblks); disks[r][c].numBlocks = min_numblks; } @@ -230,71 +238,76 @@ rf_ConfigureDisks( listp, raidPtr, cfgPtr ) return (0); fail: - rf_UnconfigureVnodes( raidPtr ); + rf_UnconfigureVnodes(raidPtr); return (ret); } /**************************************************************************** - * set up the data structures describing the spare disks in the array - * recall from the above comment that the spare disk descriptors are stored + * Set up the data structures describing the spare disks in the array. + * Recall from the above comment that the spare disk descriptors are stored * in row zero, which is specially expanded to hold them. ****************************************************************************/ -int -rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr ) - RF_ShutdownList_t ** listp; - RF_Raid_t * raidPtr; - RF_Config_t * cfgPtr; +int +rf_ConfigureSpareDisks(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr) { - int i, ret; + int i, ret; unsigned int bs; RF_RaidDisk_t *disks; - int num_spares_done; + int num_spares_done; num_spares_done = 0; - /* The space for the spares should have already been allocated by - * ConfigureDisks() */ + /* + * The space for the spares should have already been allocated by + * ConfigureDisks(). + */ disks = &raidPtr->Disks[0][raidPtr->numCol]; for (i = 0; i < raidPtr->numSpare; i++) { ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0], - &disks[i], 0, raidPtr->numCol + i); + &disks[i], 0, raidPtr->numCol + i); if (ret) goto fail; if (disks[i].status != rf_ds_optimal) { - RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", - &cfgPtr->spare_names[i][0]); + RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", + &cfgPtr->spare_names[i][0]); } else { - disks[i].status = rf_ds_spare; /* change status to - * spare */ - DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i, - disks[i].devname, + /* Change status to spare. */ + disks[i].status = rf_ds_spare; + DPRINTF6("Spare Disk %d: dev %s numBlocks %ld" + " blockSize %d (%ld MB).\n", i, disks[i].devname, (long int) disks[i].numBlocks, disks[i].blockSize, - (long int) disks[i].numBlocks * - disks[i].blockSize / 1024 / 1024); + (long int) disks[i].numBlocks * + disks[i].blockSize / 1024 / 1024); } num_spares_done++; } - /* check sizes and block sizes on spare disks */ + /* Check sizes and block sizes on spare disks. */ bs = 1 << raidPtr->logBytesPerSector; for (i = 0; i < raidPtr->numSpare; i++) { if (disks[i].blockSize != bs) { - RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs); + RF_ERRORMSG3("Block size of %d on spare disk %s is" + " not the same as on other disks (%d).\n", + disks[i].blockSize, disks[i].devname, bs); ret = EINVAL; goto fail; } if (disks[i].numBlocks < raidPtr->sectorsPerDisk) { - RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n", - disks[i].devname, disks[i].blockSize, - (long int) raidPtr->sectorsPerDisk); + RF_ERRORMSG3("Spare disk %s (%d blocks) is too small" + " to serve as a spare (need %ld blocks).\n", + disks[i].devname, disks[i].blockSize, + (long int) raidPtr->sectorsPerDisk); ret = EINVAL; goto fail; } else if (disks[i].numBlocks > raidPtr->sectorsPerDisk) { - RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk); + RF_ERRORMSG2("Warning: truncating spare disk" + " %s to %ld blocks.\n", disks[i].devname, + (long int) raidPtr->sectorsPerDisk); disks[i].numBlocks = raidPtr->sectorsPerDisk; } @@ -304,38 +317,37 @@ rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr ) fail: - /* Release the hold on the main components. We've failed to allocate - * a spare, and since we're failing, we need to free things.. - - XXX failing to allocate a spare is *not* that big of a deal... - We *can* survive without it, if need be, esp. if we get hot - adding working. - If we don't fail out here, then we need a way to remove this spare... - that should be easier to do here than if we are "live"... + /* + * Release the hold on the main components. We've failed to allocate + * a spare, and since we're failing, we need to free things... + * + * XXX Failing to allocate a spare is *not* that big of a deal... + * We *can* survive without it, if need be, esp. if we get hot + * adding working. + * If we don't fail out here, then we need a way to remove this spare... + * That should be easier to do here than if we are "live"... */ - rf_UnconfigureVnodes( raidPtr ); - + rf_UnconfigureVnodes(raidPtr); + return (ret); } -static int -rf_AllocDiskStructures(raidPtr, cfgPtr) - RF_Raid_t *raidPtr; - RF_Config_t *cfgPtr; +int +rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr) { RF_RaidDisk_t **disks; int ret; int r; - RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *), - (RF_RaidDisk_t **), raidPtr->cleanupList); + RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *), + (RF_RaidDisk_t **), raidPtr->cleanupList); if (disks == NULL) { ret = ENOMEM; goto fail; } raidPtr->Disks = disks; - /* get space for the device-specific stuff... */ + /* Get space for the device-specific stuff... */ RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow, sizeof(struct raidcinfo *), (struct raidcinfo **), raidPtr->cleanupList); @@ -345,19 +357,20 @@ rf_AllocDiskStructures(raidPtr, cfgPtr) } for (r = 0; r < raidPtr->numRow; r++) { - /* We allocate RF_MAXSPARE on the first row so that we - have room to do hot-swapping of spares */ - RF_CallocAndAdd(disks[r], raidPtr->numCol - + ((r == 0) ? RF_MAXSPARE : 0), - sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *), - raidPtr->cleanupList); + /* + * We allocate RF_MAXSPARE on the first row so that we + * have room to do hot-swapping of spares. + */ + RF_CallocAndAdd(disks[r], raidPtr->numCol + + ((r == 0) ? RF_MAXSPARE : 0), sizeof(RF_RaidDisk_t), + (RF_RaidDisk_t *), raidPtr->cleanupList); if (disks[r] == NULL) { ret = ENOMEM; goto fail; } - /* get more space for device specific stuff.. */ - RF_CallocAndAdd(raidPtr->raid_cinfo[r], - raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0), + /* Get more space for device specific stuff... */ + RF_CallocAndAdd(raidPtr->raid_cinfo[r], raidPtr->numCol + + ((r == 0) ? raidPtr->numSpare : 0), sizeof(struct raidcinfo), (struct raidcinfo *), raidPtr->cleanupList); if (raidPtr->raid_cinfo[r] == NULL) { @@ -366,23 +379,21 @@ rf_AllocDiskStructures(raidPtr, cfgPtr) } } return(0); -fail: - rf_UnconfigureVnodes( raidPtr ); +fail: + rf_UnconfigureVnodes(raidPtr); return(ret); } -/* configure a single disk during auto-configuration at boot */ +/* Configure a single disk during auto-configuration at boot. */ int -rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config) - RF_Raid_t *raidPtr; - RF_Config_t *cfgPtr; - RF_AutoConfig_t *auto_config; +rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, + RF_AutoConfig_t *auto_config) { RF_RaidDisk_t **disks; RF_RaidDisk_t *diskPtr; - RF_RowCol_t r, c; + RF_RowCol_t r, c; RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; int bs, ret; int numFailuresThisRow; @@ -392,9 +403,9 @@ rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config) int mod_counter; int mod_counter_found; -#if DEBUG +#if DEBUG printf("Starting autoconfiguration of RAID set...\n"); -#endif +#endif /* DEBUG */ force = cfgPtr->force; ret = rf_AllocDiskStructures(raidPtr, cfgPtr); @@ -403,14 +414,14 @@ rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config) disks = raidPtr->Disks; - /* assume the parity will be fine.. */ + /* Assume the parity will be fine... */ parity_good = RF_RAID_CLEAN; - /* Check for mod_counters that are too low */ + /* Check for mod_counters that are too low. */ mod_counter_found = 0; ac = auto_config; while(ac!=NULL) { - if (mod_counter_found==0) { + if (mod_counter_found == 0) { mod_counter = ac->clabel->mod_counter; mod_counter_found = 1; } else { @@ -418,7 +429,7 @@ rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config) mod_counter = ac->clabel->mod_counter; } } - ac->flag = 0; /* clear the general purpose flag */ + ac->flag = 0; /* Clear the general purpose flag. */ ac = ac->next; } @@ -427,163 +438,184 @@ rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config) for (c = 0; c < raidPtr->numCol; c++) { diskPtr = &disks[r][c]; - /* find this row/col in the autoconfig */ -#if DEBUG - printf("Looking for %d,%d in autoconfig\n",r,c); -#endif + /* Find this row/col in the autoconfig. */ +#if DEBUG + printf("Looking for %d,%d in autoconfig.\n", r, c); +#endif /* DEBUG */ ac = auto_config; while(ac!=NULL) { - if (ac->clabel==NULL) { - /* big-time bad news. */ + if (ac->clabel == NULL) { + /* Big-time bad news. */ goto fail; } if ((ac->clabel->row == r) && (ac->clabel->column == c) && (ac->clabel->mod_counter == mod_counter)) { - /* it's this one... */ - /* flag it as 'used', so we don't - free it later. */ + /* It's this one... */ + /* + * Flag it as 'used', so we don't + * free it later. + */ ac->flag = 1; -#if DEBUG - printf("Found: %s at %d,%d\n", - ac->devname,r,c); -#endif - +#if DEBUG + printf("Found: %s at %d,%d.\n", + ac->devname, r, c); +#endif /* DEBUG */ + break; } - ac=ac->next; + ac = ac->next; } - if (ac==NULL) { - /* we didn't find an exact match with a - correct mod_counter above... can we - find one with an incorrect mod_counter - to use instead? (this one, if we find - it, will be marked as failed once the - set configures) - */ + if (ac == NULL) { + /* + * We didn't find an exact match with a + * correct mod_counter above... Can we + * find one with an incorrect mod_counter + * to use instead ? (This one, if we find + * it, will be marked as failed once the + * set configures) + */ ac = auto_config; while(ac!=NULL) { - if (ac->clabel==NULL) { - /* big-time bad news. */ + if (ac->clabel == NULL) { + /* Big-time bad news. */ goto fail; } if ((ac->clabel->row == r) && (ac->clabel->column == c)) { - /* it's this one... - flag it as 'used', so we - don't free it later. */ + /* + * It's this one... + * Flag it as 'used', so we + * don't free it later. + */ ac->flag = 1; -#if DEBUG - printf("Found(low mod_counter): %s at %d,%d\n", - ac->devname,r,c); -#endif - +#if DEBUG + printf("Found(low mod_counter)" + ": %s at %d,%d.\n", + ac->devname, r, c); +#endif /* DEBUG */ + break; } - ac=ac->next; + ac = ac->next; } } if (ac!=NULL) { - /* Found it. Configure it.. */ + /* Found it. Configure it... */ diskPtr->blockSize = ac->clabel->blockSize; diskPtr->numBlocks = ac->clabel->numBlocks; - /* Note: rf_protectedSectors is already - factored into numBlocks here */ + /* + * Note: rf_protectedSectors is already + * factored into numBlocks here. + */ raidPtr->raid_cinfo[r][c].ci_vp = ac->vp; raidPtr->raid_cinfo[r][c].ci_dev = ac->dev; memcpy(&raidPtr->raid_cinfo[r][c].ci_label, - ac->clabel, sizeof(*ac->clabel)); - sprintf(diskPtr->devname, "/dev/%s", - ac->devname); - - /* note the fact that this component was - autoconfigured. You'll need this info - later. Trust me :) */ + ac->clabel, sizeof(*ac->clabel)); + sprintf(diskPtr->devname, "/dev/%s", + ac->devname); + + /* + * Note the fact that this component was + * autoconfigured. You'll need this info + * later. Trust me :) + */ diskPtr->auto_configured = 1; diskPtr->dev = ac->dev; - - /* - * we allow the user to specify that + + /* + * We allow the user to specify that * only a fraction of the disks should - * be used this is just for debug: it - * speeds up the parity scan + * be used. This is just for debug: it + * speeds up the parity scan. */ - diskPtr->numBlocks = diskPtr->numBlocks * + diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage / 100; - /* XXX these will get set multiple times, - but since we're autoconfiguring, they'd - better be always the same each time! - If not, this is the least of your worries */ + /* + * XXX These will get set multiple times, + * but since we're autoconfiguring, they'd + * better be always the same each time ! + * If not, this is the least of your worries. + */ bs = diskPtr->blockSize; min_numblks = diskPtr->numBlocks; - /* this gets done multiple times, but that's - fine -- the serial number will be the same - for all components, guaranteed */ - raidPtr->serial_number = - ac->clabel->serial_number; - /* check the last time the label - was modified */ - if (ac->clabel->mod_counter != - mod_counter) { - /* Even though we've filled in all - of the above, we don't trust - this component since it's - modification counter is not - in sync with the rest, and we really - consider it to be failed. */ + /* + * This gets done multiple times, but that's + * fine -- the serial number will be the same + * for all components, guaranteed. + */ + raidPtr->serial_number = + ac->clabel->serial_number; + /* + * Check the last time the label + * was modified. + */ + if (ac->clabel->mod_counter != mod_counter) { + /* + * Even though we've filled in all + * of the above, we don't trust + * this component since it's + * modification counter is not + * in sync with the rest, and we really + * consider it to be failed. + */ disks[r][c].status = rf_ds_failed; numFailuresThisRow++; } else { - if (ac->clabel->clean != - RF_RAID_CLEAN) { + if (ac->clabel->clean != RF_RAID_CLEAN) + { parity_good = RF_RAID_DIRTY; } } } else { - /* Didn't find it at all!! - Component must really be dead */ + /* + * Didn't find it at all !!! + * Component must really be dead. + */ disks[r][c].status = rf_ds_failed; - sprintf(disks[r][c].devname,"component%d", - r * raidPtr->numCol + c); + sprintf(disks[r][c].devname, "component%d", + r * raidPtr->numCol + c); numFailuresThisRow++; } } - /* XXX fix for n-fault tolerant */ - /* XXX this should probably check to see how many failures - we can handle for this configuration! */ + /* XXX Fix for n-fault tolerant. */ + /* + * XXX This should probably check to see how many failures + * we can handle for this configuration ! + */ if (numFailuresThisRow > 0) raidPtr->status[r] = rf_rs_degraded; } - /* close the device for the ones that didn't get used */ + /* Close the device for the ones that didn't get used. */ ac = auto_config; - while(ac!=NULL) { + while(ac != NULL) { if (ac->flag == 0) { VOP_CLOSE(ac->vp, FREAD, NOCRED, 0); vput(ac->vp); ac->vp = NULL; -#if DEBUG +#if DEBUG printf("Released %s from auto-config set.\n", - ac->devname); -#endif + ac->devname); +#endif /* DEBUG */ } ac = ac->next; } raidPtr->mod_counter = mod_counter; - /* note the state of the parity, if any */ + /* Note the state of the parity, if any. */ raidPtr->parity_good = parity_good; raidPtr->sectorsPerDisk = min_numblks; raidPtr->logBytesPerSector = ffs(bs) - 1; @@ -592,35 +624,31 @@ rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config) return (0); fail: - - rf_UnconfigureVnodes( raidPtr ); + + rf_UnconfigureVnodes(raidPtr); return (ret); } -/* configure a single disk in the array */ -int -rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col) - RF_Raid_t *raidPtr; - char *buf; - RF_RaidDisk_t *diskPtr; - RF_RowCol_t row; - RF_RowCol_t col; +/* Configure a single disk in the array. */ +int +rf_ConfigureDisk(RF_Raid_t *raidPtr, char *buf, RF_RaidDisk_t *diskPtr, + RF_RowCol_t row, RF_RowCol_t col) { - char *p; - int retcode; + char *p; + int retcode; struct partinfo dpart; struct vnode *vp; struct vattr va; struct proc *proc; - int error; + int error; retcode = 0; p = rf_find_non_white(buf); if (p[strlen(p) - 1] == '\n') { - /* strip off the newline */ + /* Strip off the newline. */ p[strlen(p) - 1] = '\0'; } (void) strcpy(diskPtr->devname, p); @@ -639,9 +667,9 @@ rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col) error = raidlookup(diskPtr->devname, proc, &vp); if (error) { - printf("raidlookup on device: %s failed!\n", diskPtr->devname); + printf("raidlookup on device: %s failed !\n", diskPtr->devname); if (error == ENXIO) { - /* the component isn't there... must be dead :-( */ + /* The component isn't there... Must be dead :-( */ diskPtr->status = rf_ds_failed; } else { return (error); @@ -652,8 +680,8 @@ rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col) if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) { return (error); } - error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart, - FREAD, proc->p_ucred, proc); + error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart, FREAD, + proc->p_ucred, proc); if (error) { return (error); } @@ -661,103 +689,92 @@ rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col) diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors; diskPtr->partitionSize = dpart.part->p_size; - + raidPtr->raid_cinfo[row][col].ci_vp = vp; raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev; - /* This component was not automatically configured */ + /* This component was not automatically configured. */ diskPtr->auto_configured = 0; diskPtr->dev = va.va_rdev; - /* we allow the user to specify that only a fraction of the - * disks should be used this is just for debug: it speeds up - * the parity scan */ - diskPtr->numBlocks = diskPtr->numBlocks * - rf_sizePercentage / 100; + /* + * We allow the user to specify that only a fraction of the + * disks should be used. This is just for debug: it speeds up + * the parity scan. + */ + diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage + / 100; } return (0); } -static void -rf_print_label_status( raidPtr, row, column, dev_name, ci_label ) - RF_Raid_t *raidPtr; - int row; - int column; - char *dev_name; - RF_ComponentLabel_t *ci_label; +void +rf_print_label_status(RF_Raid_t *raidPtr, int row, int column, char *dev_name, + RF_ComponentLabel_t *ci_label) { - printf("raid%d: Component %s being configured at row: %d col: %d\n", - raidPtr->raidid, dev_name, row, column ); + printf("raid%d: Component %s being configured at row: %d col: %d\n", + raidPtr->raidid, dev_name, row, column); printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", - ci_label->row, ci_label->column, - ci_label->num_rows, ci_label->num_columns); + ci_label->row, ci_label->column, ci_label->num_rows, + ci_label->num_columns); printf(" Version: %d Serial Number: %d Mod Counter: %d\n", - ci_label->version, ci_label->serial_number, - ci_label->mod_counter); + ci_label->version, ci_label->serial_number, ci_label->mod_counter); printf(" Clean: %s Status: %d\n", - ci_label->clean ? "Yes" : "No", ci_label->status ); + ci_label->clean ? "Yes" : "No", ci_label->status); } -static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label, - serial_number, mod_counter ) - RF_Raid_t *raidPtr; - int row; - int column; - char *dev_name; - RF_ComponentLabel_t *ci_label; - int serial_number; - int mod_counter; +int +rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column, char *dev_name, + RF_ComponentLabel_t *ci_label, int serial_number, int mod_counter) { int fatal_error = 0; if (serial_number != ci_label->serial_number) { - printf("%s has a different serial number: %d %d\n", - dev_name, serial_number, ci_label->serial_number); + printf("%s has a different serial number: %d %d.\n", + dev_name, serial_number, ci_label->serial_number); fatal_error = 1; } if (mod_counter != ci_label->mod_counter) { - printf("%s has a different modfication count: %d %d\n", - dev_name, mod_counter, ci_label->mod_counter); + printf("%s has a different modfication count: %d %d.\n", + dev_name, mod_counter, ci_label->mod_counter); } - + if (row != ci_label->row) { - printf("Row out of alignment for: %s\n", dev_name); + printf("Row out of alignment for: %s.\n", dev_name); fatal_error = 1; } if (column != ci_label->column) { - printf("Column out of alignment for: %s\n", dev_name); + printf("Column out of alignment for: %s.\n", dev_name); fatal_error = 1; } if (raidPtr->numRow != ci_label->num_rows) { - printf("Number of rows do not match for: %s\n", dev_name); + printf("Number of rows do not match for: %s.\n", dev_name); fatal_error = 1; } if (raidPtr->numCol != ci_label->num_columns) { - printf("Number of columns do not match for: %s\n", dev_name); + printf("Number of columns do not match for: %s.\n", dev_name); fatal_error = 1; } if (ci_label->clean == 0) { - /* it's not clean, but that's not fatal */ - printf("%s is not clean!\n", dev_name); + /* It's not clean, but that's not fatal. */ + printf("%s is not clean !\n", dev_name); } return(fatal_error); } -/* - - rf_CheckLabels() - check all the component labels for consistency. - Return an error if there is anything major amiss. - +/* + * + * rf_CheckLabels() - Check all the component labels for consistency. + * Return an error if there is anything major amiss. + * */ -int -rf_CheckLabels( raidPtr, cfgPtr ) - RF_Raid_t *raidPtr; - RF_Config_t *cfgPtr; +int +rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr) { - int r,c; + int r, c; char *dev_name; RF_ComponentLabel_t *ci_label; int serial_number = 0; @@ -782,32 +799,32 @@ rf_CheckLabels( raidPtr, cfgPtr ) too_fatal = 0; force = cfgPtr->force; - /* - We're going to try to be a little intelligent here. If one - component's label is bogus, and we can identify that it's the - *only* one that's gone, we'll mark it as "failed" and allow - the configuration to proceed. This will be the *only* case - that we'll proceed if there would be (otherwise) fatal errors. - - Basically we simply keep a count of how many components had - what serial number. If all but one agree, we simply mark - the disagreeing component as being failed, and allow - things to come up "normally". - - We do this first for serial numbers, and then for "mod_counter". - + /* + * We're going to try to be a little intelligent here. If one + * component's label is bogus, and we can identify that it's the + * *only* one that's gone, we'll mark it as "failed" and allow + * the configuration to proceed. This will be the *only* case + * that we'll proceed if there would be (otherwise) fatal errors. + * + * Basically we simply keep a count of how many components had + * what serial number. If all but one agree, we simply mark + * the disagreeing component as being failed, and allow + * things to come up "normally". + * + * We do this first for serial numbers, and then for "mod_counter". + * */ num_ser = 0; num_mod = 0; - for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) { + for (r = 0; r < raidPtr->numRow && !fatal_error; r++) { for (c = 0; c < raidPtr->numCol; c++) { ci_label = &raidPtr->raid_cinfo[r][c].ci_label; - found=0; - for(i=0;i<num_ser;i++) { + found = 0; + for(i = 0; i < num_ser; i++) { if (ser_values[i] == ci_label->serial_number) { ser_count[i]++; - found=1; + found = 1; break; } } @@ -815,51 +832,52 @@ rf_CheckLabels( raidPtr, cfgPtr ) ser_values[num_ser] = ci_label->serial_number; ser_count[num_ser] = 1; num_ser++; - if (num_ser>2) { + if (num_ser > 2) { fatal_error = 1; break; } } - found=0; - for(i=0;i<num_mod;i++) { + found = 0; + for(i = 0; i < num_mod; i++) { if (mod_values[i] == ci_label->mod_counter) { mod_count[i]++; - found=1; + found = 1; break; } } if (!found) { - mod_values[num_mod] = ci_label->mod_counter; + mod_values[num_mod] = ci_label->mod_counter; mod_count[num_mod] = 1; num_mod++; - if (num_mod>2) { + if (num_mod > 2) { fatal_error = 1; break; } } } } -#if DEBUG +#if DEBUG printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid); - for(i=0;i<num_ser;i++) { + for(i = 0; i < num_ser; i++) { printf("%d %d\n", ser_values[i], ser_count[i]); } printf("raid%d: Summary of mod counters:\n", raidPtr->raidid); - for(i=0;i<num_mod;i++) { + for(i = 0; i < num_mod; i++) { printf("%d %d\n", mod_values[i], mod_count[i]); } -#endif +#endif /* DEBUG */ serial_number = ser_values[0]; if (num_ser == 2) { if ((ser_count[0] == 1) || (ser_count[1] == 1)) { - /* Locate the maverick component */ + /* Locate the maverick component. */ if (ser_count[1] > ser_count[0]) { serial_number = ser_values[1]; - } + } for (r = 0; r < raidPtr->numRow; r++) { for (c = 0; c < raidPtr->numCol; c++) { - ci_label = &raidPtr->raid_cinfo[r][c].ci_label; - if (serial_number != + ci_label = + &raidPtr->raid_cinfo[r][c].ci_label; + if (serial_number != ci_label->serial_number) { hosed_row = r; hosed_column = c; @@ -867,14 +885,16 @@ rf_CheckLabels( raidPtr, cfgPtr ) } } } - printf("Hosed component: %s\n", - &cfgPtr->devnames[hosed_row][hosed_column][0]); + printf("Hosed component: %s.\n", + &cfgPtr->devnames[hosed_row][hosed_column][0]); if (!force) { - /* we'll fail this component, as if there are - other major errors, we arn't forcing things - and we'll abort the config anyways */ + /* + * We'll fail this component, as if there are + * other major errors, we aren't forcing things + * and we'll abort the config anyways. + */ raidPtr->Disks[hosed_row][hosed_column].status - = rf_ds_failed; + = rf_ds_failed; raidPtr->numFailures++; raidPtr->status[hosed_row] = rf_rs_degraded; } @@ -882,47 +902,56 @@ rf_CheckLabels( raidPtr, cfgPtr ) too_fatal = 1; } if (cfgPtr->parityConfig == '0') { - /* We've identified two different serial numbers. - RAID 0 can't cope with that, so we'll punt */ + /* + * We've identified two different serial numbers. + * RAID 0 can't cope with that, so we'll punt. + */ too_fatal = 1; } - } + } - /* record the serial number for later. If we bail later, setting - this doesn't matter, otherwise we've got the best guess at the - correct serial number */ + /* + * Record the serial number for later. If we bail later, setting + * this doesn't matter, otherwise we've got the best guess at the + * correct serial number. + */ raidPtr->serial_number = serial_number; mod_number = mod_values[0]; if (num_mod == 2) { if ((mod_count[0] == 1) || (mod_count[1] == 1)) { - /* Locate the maverick component */ + /* Locate the maverick component. */ if (mod_count[1] > mod_count[0]) { mod_number = mod_values[1]; } else if (mod_count[1] < mod_count[0]) { mod_number = mod_values[0]; } else { - /* counts of different modification values - are the same. Assume greater value is - the correct one, all other things - considered */ + /* + * Counts of different modification values + * are the same. Assume greater value is + * the correct one, all other things + * considered. + */ if (mod_values[0] > mod_values[1]) { mod_number = mod_values[0]; } else { mod_number = mod_values[1]; } - + } - for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) { + for (r = 0; r < raidPtr->numRow && !too_fatal; r++) { for (c = 0; c < raidPtr->numCol; c++) { - ci_label = &raidPtr->raid_cinfo[r][c].ci_label; - if (mod_number != + ci_label = + &raidPtr->raid_cinfo[r][c].ci_label; + if (mod_number != ci_label->mod_counter) { - if ( ( hosed_row == r ) && - ( hosed_column == c )) { - /* same one. Can - deal with it. */ + if ((hosed_row == r) && + (hosed_column == c)) { + /* + * Same one. Can + * deal with it. + */ } else { hosed_row = r; hosed_column = c; @@ -934,52 +963,64 @@ rf_CheckLabels( raidPtr, cfgPtr ) } } } - printf("Hosed component: %s\n", - &cfgPtr->devnames[hosed_row][hosed_column][0]); + printf("Hosed component: %s.\n", + &cfgPtr->devnames[hosed_row][hosed_column][0]); if (!force) { - /* we'll fail this component, as if there are - other major errors, we arn't forcing things - and we'll abort the config anyways */ - if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) { - raidPtr->Disks[hosed_row][hosed_column].status - = rf_ds_failed; + /* + * We'll fail this component, as if there are + * other major errors, we aren't forcing things + * and we'll abort the config anyways. + */ + if (raidPtr + ->Disks[hosed_row][hosed_column].status != + rf_ds_failed) { + raidPtr->Disks[hosed_row] + [hosed_column].status = + rf_ds_failed; raidPtr->numFailures++; - raidPtr->status[hosed_row] = rf_rs_degraded; + raidPtr->status[hosed_row] = + rf_rs_degraded; } } } else { too_fatal = 1; } if (cfgPtr->parityConfig == '0') { - /* We've identified two different mod counters. - RAID 0 can't cope with that, so we'll punt */ + /* + * We've identified two different mod counters. + * RAID 0 can't cope with that, so we'll punt. + */ too_fatal = 1; } - } + } raidPtr->mod_counter = mod_number; if (too_fatal) { - /* we've had both a serial number mismatch, and a mod_counter - mismatch -- and they involved two different components!! - Bail -- make things fail so that the user must force - the issue... */ + /* + * We've had both a serial number mismatch, and a mod_counter + * mismatch -- and they involved two different components !!! + * Bail -- make things fail so that the user must force + * the issue... + */ hosed_row = -1; hosed_column = -1; } if (num_ser > 2) { - printf("raid%d: Too many different serial numbers!\n", - raidPtr->raidid); + printf("raid%d: Too many different serial numbers !\n", + raidPtr->raidid); } if (num_mod > 2) { - printf("raid%d: Too many different mod counters!\n", - raidPtr->raidid); + printf("raid%d: Too many different mod counters !\n", + raidPtr->raidid); } - /* we start by assuming the parity will be good, and flee from - that notion at the slightest sign of trouble */ + /* + * We start by assuming the parity will be good, and flee from + * that notion at the slightest sign of trouble. + */ parity_good = RF_RAID_CLEAN; for (r = 0; r < raidPtr->numRow; r++) { @@ -988,15 +1029,14 @@ rf_CheckLabels( raidPtr, cfgPtr ) ci_label = &raidPtr->raid_cinfo[r][c].ci_label; if ((r == hosed_row) && (c == hosed_column)) { - printf("raid%d: Ignoring %s\n", - raidPtr->raidid, dev_name); - } else { - rf_print_label_status( raidPtr, r, c, - dev_name, ci_label ); - if (rf_check_label_vitals( raidPtr, r, c, - dev_name, ci_label, - serial_number, - mod_number )) { + printf("raid%d: Ignoring %s.\n", + raidPtr->raidid, dev_name); + } else { + rf_print_label_status(raidPtr, r, c, dev_name, + ci_label); + if (rf_check_label_vitals(raidPtr, r, c, + dev_name, ci_label, serial_number, + mod_number)) { fatal_error = 1; } if (ci_label->clean != RF_RAID_CLEAN) { @@ -1009,16 +1049,14 @@ rf_CheckLabels( raidPtr, cfgPtr ) parity_good = RF_RAID_DIRTY; } - /* we note the state of the parity */ + /* We note the state of the parity. */ raidPtr->parity_good = parity_good; - return(fatal_error); + return(fatal_error); } int -rf_add_hot_spare(raidPtr, sparePtr) - RF_Raid_t *raidPtr; - RF_SingleComponent_t *sparePtr; +rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr) { RF_RaidDisk_t *disks; RF_DiskQueue_t *spareQueues; @@ -1027,78 +1065,77 @@ rf_add_hot_spare(raidPtr, sparePtr) int spare_number; #if 0 - printf("Just in rf_add_hot_spare: %d\n",raidPtr->numSpare); - printf("Num col: %d\n",raidPtr->numCol); + printf("Just in rf_add_hot_spare: %d.\n", raidPtr->numSpare); + printf("Num col: %d.\n", raidPtr->numCol); #endif if (raidPtr->numSpare >= RF_MAXSPARE) { - RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare); + RF_ERRORMSG1("Too many spares: %d.\n", raidPtr->numSpare); return(EINVAL); } RF_LOCK_MUTEX(raidPtr->mutex); - /* the beginning of the spares... */ + /* The beginning of the spares... */ disks = &raidPtr->Disks[0][raidPtr->numCol]; spare_number = raidPtr->numSpare; ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name, - &disks[spare_number], 0, - raidPtr->numCol + spare_number); + &disks[spare_number], 0, raidPtr->numCol + spare_number); if (ret) goto fail; if (disks[spare_number].status != rf_ds_optimal) { - RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", - sparePtr->component_name); - ret=EINVAL; + RF_ERRORMSG1("Warning: spare disk %s failed TUR.\n", + sparePtr->component_name); + ret = EINVAL; goto fail; } else { disks[spare_number].status = rf_ds_spare; - DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number, - disks[spare_number].devname, - (long int) disks[spare_number].numBlocks, - disks[spare_number].blockSize, - (long int) disks[spare_number].numBlocks * - disks[spare_number].blockSize / 1024 / 1024); + DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d" + " (%ld MB).\n", spare_number, disks[spare_number].devname, + (long int) disks[spare_number].numBlocks, + disks[spare_number].blockSize, + (long int) disks[spare_number].numBlocks * + disks[spare_number].blockSize / 1024 / 1024); } - - /* check sizes and block sizes on the spare disk */ + + /* Check sizes and block sizes on the spare disk. */ bs = 1 << raidPtr->logBytesPerSector; if (disks[spare_number].blockSize != bs) { - RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs); + RF_ERRORMSG3("Block size of %d on spare disk %s is not" + " the same as on other disks (%d).\n", + disks[spare_number].blockSize, + disks[spare_number].devname, bs); ret = EINVAL; goto fail; } if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) { - RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n", - disks[spare_number].devname, - disks[spare_number].blockSize, - (long int) raidPtr->sectorsPerDisk); + RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve" + " as a spare (need %ld blocks).\n", + disks[spare_number].devname, disks[spare_number].blockSize, + (long int) raidPtr->sectorsPerDisk); ret = EINVAL; goto fail; } else { - if (disks[spare_number].numBlocks > + if (disks[spare_number].numBlocks > raidPtr->sectorsPerDisk) { - RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname, - (long int) raidPtr->sectorsPerDisk); - + RF_ERRORMSG2("Warning: truncating spare disk %s to %ld" + " blocks.\n", disks[spare_number].devname, + (long int) raidPtr->sectorsPerDisk); + disks[spare_number].numBlocks = raidPtr->sectorsPerDisk; } } spareQueues = &raidPtr->Queues[0][raidPtr->numCol]; - ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number], - 0, raidPtr->numCol + spare_number, - raidPtr->qType, - raidPtr->sectorsPerDisk, - raidPtr->Disks[0][raidPtr->numCol + - spare_number].dev, - raidPtr->maxOutstanding, - &raidPtr->shutdownList, - raidPtr->cleanupList); - + ret = rf_ConfigureDiskQueue(raidPtr, &spareQueues[spare_number], + 0, raidPtr->numCol + spare_number, raidPtr->qType, + raidPtr->sectorsPerDisk, raidPtr->Disks[0][raidPtr->numCol + + spare_number].dev, raidPtr->maxOutstanding, + &raidPtr->shutdownList, raidPtr->cleanupList); + raidPtr->numSpare++; RF_UNLOCK_MUTEX(raidPtr->mutex); @@ -1110,29 +1147,26 @@ fail: } int -rf_remove_hot_spare(raidPtr,sparePtr) - RF_Raid_t *raidPtr; - RF_SingleComponent_t *sparePtr; +rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr) { int spare_number; - - if (raidPtr->numSpare==0) { - printf("No spares to remove!\n"); + if (raidPtr->numSpare == 0) { + printf("No spares to remove !\n"); return(EINVAL); } spare_number = sparePtr->column; - return(EINVAL); /* XXX not implemented yet */ + return(EINVAL); /* XXX Not implemented yet. */ #if 0 if (spare_number < 0 || spare_number > raidPtr->numSpare) { return(EINVAL); } - /* verify that this spare isn't in use... */ + /* Verify that this spare isn't in use... */ - /* it's gone.. */ + /* It's gone... */ raidPtr->numSpare--; @@ -1141,34 +1175,32 @@ rf_remove_hot_spare(raidPtr,sparePtr) } int -rf_delete_component(raidPtr,component) - RF_Raid_t *raidPtr; - RF_SingleComponent_t *component; +rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component) { RF_RaidDisk_t *disks; - if ((component->row < 0) || + if ((component->row < 0) || (component->row >= raidPtr->numRow) || - (component->column < 0) || + (component->column < 0) || (component->column >= raidPtr->numCol)) { return(EINVAL); } disks = &raidPtr->Disks[component->row][component->column]; - /* 1. This component must be marked as 'failed' */ + /* 1. This component must be marked as 'failed'. */ return(EINVAL); /* Not implemented yet. */ } int -rf_incorporate_hot_spare(raidPtr,component) - RF_Raid_t *raidPtr; - RF_SingleComponent_t *component; +rf_incorporate_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *component) { - /* Issues here include how to 'move' this in if there is IO - taking place (e.g. component queues and such) */ + /* + * Issues here include how to 'move' this in if there is IO + * taking place (e.g. component queues and such). + */ return(EINVAL); /* Not implemented yet. */ } diff --git a/sys/dev/raidframe/rf_disks.h b/sys/dev/raidframe/rf_disks.h index bb15d365b5e..f6ea2e666d2 100644 --- a/sys/dev/raidframe/rf_disks.h +++ b/sys/dev/raidframe/rf_disks.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_disks.h,v 1.4 2000/08/08 16:07:40 peter Exp $ */ +/* $OpenBSD: rf_disks.h,v 1.5 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_disks.h,v 1.8 2000/03/27 03:25:17 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -28,20 +29,20 @@ */ /* - * rf_disks.h -- header file for code related to physical disks + * rf_disks.h -- Header file for code related to physical disks. */ -#ifndef _RF__RF_DISKS_H_ -#define _RF__RF_DISKS_H_ +#ifndef _RF__RF_DISKS_H_ +#define _RF__RF_DISKS_H_ #include <sys/types.h> #include "rf_archs.h" #include "rf_types.h" -#if defined(__NetBSD__) +#if defined(__NetBSD__) #include "rf_netbsd.h" -#elif defined(__OpenBSD__) +#elif defined(__OpenBSD__) #include "rf_openbsd.h" #endif @@ -50,69 +51,86 @@ * IF YOU ADD A STATE, CHECK TO SEE IF YOU NEED TO MODIFY RF_DEAD_DISK() BELOW. */ enum RF_DiskStatus_e { - rf_ds_optimal, /* no problems */ - rf_ds_failed, /* reconstruction ongoing */ - rf_ds_reconstructing, /* reconstruction complete to spare, dead disk - * not yet replaced */ - rf_ds_dist_spared, /* reconstruction complete to distributed - * spare space, dead disk not yet replaced */ - rf_ds_spared, /* reconstruction complete to distributed - * spare space, dead disk not yet replaced */ - rf_ds_spare, /* an available spare disk */ - rf_ds_used_spare /* a spare which has been used, and hence is - * not available */ + rf_ds_optimal, /* No problems. */ + rf_ds_failed, /* Reconstruction ongoing. */ + rf_ds_reconstructing, /* + * Reconstruction complete to spare, dead disk + * not yet replaced. + */ + rf_ds_dist_spared, /* + * Reconstruction complete to distributed + * spare space, dead disk not yet replaced. + */ + rf_ds_spared, /* + * Reconstruction complete to distributed + * spare space, dead disk not yet replaced. + */ + rf_ds_spare, /* An available spare disk. */ + rf_ds_used_spare /* + * A spare which has been used, and hence is + * not available. + */ }; typedef enum RF_DiskStatus_e RF_DiskStatus_t; struct RF_RaidDisk_s { - char devname[56]; /* name of device file */ - RF_DiskStatus_t status; /* whether it is up or down */ - RF_RowCol_t spareRow; /* if in status "spared", this identifies the - * spare disk */ - RF_RowCol_t spareCol; /* if in status "spared", this identifies the - * spare disk */ - RF_SectorCount_t numBlocks; /* number of blocks, obtained via READ - * CAPACITY */ - int blockSize; - RF_SectorCount_t partitionSize; /* The *actual* and *full* size of - the partition, from the disklabel */ - int auto_configured;/* 1 if this component was autoconfigured. - 0 otherwise. */ - dev_t dev; + char devname[56]; /* Name of device file. */ + RF_DiskStatus_t status; /* Whether it is up or down. */ + RF_RowCol_t spareRow; /* + * If in status "spared", this + * identifies the spare disk. + */ + RF_RowCol_t spareCol; /* + * If in status "spared", this + * identifies the spare disk. + */ + RF_SectorCount_t numBlocks; /* + * Number of blocks, obtained via + * READ CAPACITY. + */ + int blockSize; + RF_SectorCount_t partitionSize; /* + * The *actual* and *full* size of + * the partition, from the disklabel. + */ + int auto_configured; /* + * 1 if this component was + * autoconfigured. 0 otherwise. + */ + dev_t dev; }; + /* * An RF_DiskOp_t ptr is really a pointer to a UAGT_CCB, but I want - * to isolate the cam layer from all other layers, so I typecast to/from + * to isolate the CAM layer from all other layers, so I typecast to/from * RF_DiskOp_t * (i.e. void *) at the interfaces. */ typedef void RF_DiskOp_t; -/* if a disk is in any of these states, it is inaccessible */ -#define RF_DEAD_DISK(_dstat_) (((_dstat_) == rf_ds_spared) || \ - ((_dstat_) == rf_ds_reconstructing) || ((_dstat_) == rf_ds_failed) || \ - ((_dstat_) == rf_ds_dist_spared)) +/* If a disk is in any of these states, it is inaccessible. */ +#define RF_DEAD_DISK(_dstat_) \ + (((_dstat_) == rf_ds_spared) || \ + ((_dstat_) == rf_ds_reconstructing) || \ + ((_dstat_) == rf_ds_failed) || \ + ((_dstat_) == rf_ds_dist_spared)) -#ifdef _KERNEL -#if defined(__NetBSD__) +#ifdef _KERNEL +#if defined(__NetBSD__) #include "rf_netbsd.h" -#elif defined(__OpenBSD__) +#elif defined(__OpenBSD__) #include "rf_openbsd.h" #endif -int rf_ConfigureDisks(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int rf_ConfigureSpareDisks(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int rf_ConfigureDisk(RF_Raid_t * raidPtr, char *buf, RF_RaidDisk_t * diskPtr, - RF_RowCol_t row, RF_RowCol_t col); -int rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, - RF_AutoConfig_t *auto_config); -int rf_CheckLabels( RF_Raid_t *, RF_Config_t *); -int rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr); -int rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr); -int rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component); -int rf_incorporate_hot_spare(RF_Raid_t *raidPtr, - RF_SingleComponent_t *component); -#endif /* _KERNEL */ +int rf_ConfigureDisks(RF_ShutdownList_t **, RF_Raid_t *, RF_Config_t *); +int rf_ConfigureSpareDisks(RF_ShutdownList_t **, RF_Raid_t *, RF_Config_t *); +int rf_ConfigureDisk(RF_Raid_t *, char *, RF_RaidDisk_t *, + RF_RowCol_t, RF_RowCol_t); +int rf_AutoConfigureDisks(RF_Raid_t *, RF_Config_t *, RF_AutoConfig_t *); +int rf_CheckLabels(RF_Raid_t *, RF_Config_t *); +int rf_add_hot_spare(RF_Raid_t *, RF_SingleComponent_t *); +int rf_remove_hot_spare(RF_Raid_t *, RF_SingleComponent_t *); +int rf_delete_component(RF_Raid_t *, RF_SingleComponent_t *); +int rf_incorporate_hot_spare(RF_Raid_t *, RF_SingleComponent_t *); +#endif /* _KERNEL */ -#endif /* !_RF__RF_DISKS_H_ */ +#endif /* !_RF__RF_DISKS_H_ */ diff --git a/sys/dev/raidframe/rf_driver.c b/sys/dev/raidframe/rf_driver.c index 57580f7b390..3a5e00f74b3 100644 --- a/sys/dev/raidframe/rf_driver.c +++ b/sys/dev/raidframe/rf_driver.c @@ -1,6 +1,7 @@ -/* $OpenBSD: rf_driver.c,v 1.10 2002/08/09 15:10:20 tdeval Exp $ */ +/* $OpenBSD: rf_driver.c,v 1.11 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_driver.c,v 1.37 2000/06/04 02:05:13 oster Exp $ */ -/*- + +/* * Copyright (c) 1999 The NetBSD Foundation, Inc. * All rights reserved. * @@ -17,8 +18,8 @@ * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. * 4. Neither the name of The NetBSD Foundation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. @@ -40,8 +41,9 @@ * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. * - * Author: Mark Holland, Khalil Amiri, Claudson Bornstein, William V. Courtright II, - * Robby Findler, Daniel Stodolsky, Rachad Youssef, Jim Zelenka + * Author: Mark Holland, Khalil Amiri, Claudson Bornstein, + * William V. Courtright II, Robby Findler, Daniel Stodolsky, + * Rachad Youssef, Jim Zelenka * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright @@ -64,20 +66,21 @@ * rights to redistribute these changes. */ -/****************************************************************************** +/***************************************************************************** * - * rf_driver.c -- main setup, teardown, and access routines for the RAID driver + * rf_driver.c -- Main setup, teardown, and access routines for the RAID + * driver * - * all routines are prefixed with rf_ (raidframe), to avoid conficts. + * All routines are prefixed with rf_ (RAIDframe), to avoid conficts. * - ******************************************************************************/ + *****************************************************************************/ #include <sys/types.h> #include <sys/param.h> #include <sys/systm.h> #include <sys/ioctl.h> #include <sys/fcntl.h> -#ifdef __NetBSD__ +#ifdef __NetBSD__ #include <sys/vnode.h> #endif @@ -122,51 +125,61 @@ /* rad == RF_RaidAccessDesc_t */ static RF_FreeList_t *rf_rad_freelist; -#define RF_MAX_FREE_RAD 128 -#define RF_RAD_INC 16 -#define RF_RAD_INITIAL 32 +#define RF_MAX_FREE_RAD 128 +#define RF_RAD_INC 16 +#define RF_RAD_INITIAL 32 -/* debug variables */ -char rf_panicbuf[2048]; /* a buffer to hold an error msg when we panic */ +/* Debug variables. */ +char rf_panicbuf[2048]; /* + * A buffer to hold an error msg when we panic. + */ -/* main configuration routines */ +/* Main configuration routines. */ static int raidframe_booted = 0; -static void rf_ConfigureDebug(RF_Config_t * cfgPtr); -static void set_debug_option(char *name, long val); -static void rf_UnconfigureArray(void); -static int init_rad(RF_RaidAccessDesc_t *); -static void clean_rad(RF_RaidAccessDesc_t *); -static void rf_ShutdownRDFreeList(void *); -static int rf_ConfigureRDFreeList(RF_ShutdownList_t **); - -RF_DECLARE_MUTEX(rf_printf_mutex) /* debug only: avoids interleaved - * printfs by different stripes */ - -#define SIGNAL_QUIESCENT_COND(_raid_) wakeup(&((_raid_)->accesses_suspended)) -#define WAIT_FOR_QUIESCENCE(_raid_) \ - tsleep(&((_raid_)->accesses_suspended),PRIBIO,"raidframe quiesce", 0); - -#define IO_BUF_ERR(bp, err) { \ - bp->b_flags |= B_ERROR; \ - bp->b_resid = bp->b_bcount; \ - bp->b_error = err; \ - biodone(bp); \ -} - -static int configureCount = 0; /* number of active configurations */ -static int isconfigged = 0; /* is basic raidframe (non per-array) - * stuff configged */ -RF_DECLARE_STATIC_MUTEX(configureMutex) /* used to lock the configuration - * stuff */ -static RF_ShutdownList_t *globalShutdown; /* non array-specific stuff */ -static int rf_ConfigureRDFreeList(RF_ShutdownList_t ** listp); - -/* called at system boot time */ +void rf_ConfigureDebug(RF_Config_t *); +void rf_set_debug_option(char *, long); +void rf_UnconfigureArray(void); +int rf_init_rad(RF_RaidAccessDesc_t *); +void rf_clean_rad(RF_RaidAccessDesc_t *); +void rf_ShutdownRDFreeList(void *); +int rf_ConfigureRDFreeList(RF_ShutdownList_t **); + +RF_DECLARE_MUTEX(rf_printf_mutex); /* + * Debug only: Avoids interleaved + * printfs by different stripes. + */ + +#define SIGNAL_QUIESCENT_COND(_raid_) wakeup(&((_raid_)->accesses_suspended)) +#define WAIT_FOR_QUIESCENCE(_raid_) \ + tsleep(&((_raid_)->accesses_suspended), PRIBIO, "RAIDframe quiesce", 0); + +#define IO_BUF_ERR(bp, err) \ +do { \ + bp->b_flags |= B_ERROR; \ + bp->b_resid = bp->b_bcount; \ + bp->b_error = err; \ + biodone(bp); \ +} while (0) + +static int configureCount = 0; /* Number of active configurations. */ +static int isconfigged = 0; /* + * Is basic RAIDframe (non per-array) + * stuff configured ? + */ +RF_DECLARE_STATIC_MUTEX(configureMutex); /* + * Used to lock the + * configuration stuff. + */ +static RF_ShutdownList_t *globalShutdown; /* Non array-specific stuff. */ +int rf_ConfigureRDFreeList(RF_ShutdownList_t **); + + +/* Called at system boot time. */ int -rf_BootRaidframe() +rf_BootRaidframe(void) { - int rc; + int rc; if (raidframe_booted) return (EBUSY); @@ -174,8 +187,8 @@ rf_BootRaidframe() rc = rf_mutex_init(&configureMutex); if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d.\n", + __FILE__, __LINE__, rc); RF_PANIC(); } configureCount = 0; @@ -183,16 +196,18 @@ rf_BootRaidframe() globalShutdown = NULL; return (0); } + + /* - * This function is really just for debugging user-level stuff: it - * frees up all memory, other RAIDframe resources which might otherwise + * This function is really just for debugging user-level stuff: It + * frees up all memory, other RAIDframe resources that might otherwise * be kept around. This is used with systems like "sentinel" to detect * memory leaks. */ -int -rf_UnbootRaidframe() +int +rf_UnbootRaidframe(void) { - int rc; + int rc; RF_LOCK_MUTEX(configureMutex); if (configureCount) { @@ -203,58 +218,65 @@ rf_UnbootRaidframe() RF_UNLOCK_MUTEX(configureMutex); rc = rf_mutex_destroy(&configureMutex); if (rc) { - RF_ERRORMSG3("Unable to destroy mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + RF_ERRORMSG3("Unable to destroy mutex file %s line %d" + " rc=%d.\n", __FILE__, __LINE__, rc); RF_PANIC(); } return (0); } + + /* - * Called whenever an array is shutdown + * Called whenever an array is shutdown. */ -static void -rf_UnconfigureArray() +void +rf_UnconfigureArray(void) { - int rc; + int rc; RF_LOCK_MUTEX(configureMutex); - if (--configureCount == 0) { /* if no active configurations, shut - * everything down */ + if (--configureCount == 0) { /* + * If no active configurations, shut + * everything down. + */ isconfigged = 0; rc = rf_ShutdownList(&globalShutdown); if (rc) { - RF_ERRORMSG1("RAIDFRAME: unable to do global shutdown, rc=%d\n", rc); + RF_ERRORMSG1("RAIDFRAME: unable to do global shutdown," + " rc=%d.\n", rc); } /* - * We must wait until now, because the AllocList module - * uses the DebugMem module. - */ + * We must wait until now, because the AllocList module + * uses the DebugMem module. + */ if (rf_memDebug) rf_print_unfreed(); } RF_UNLOCK_MUTEX(configureMutex); } + + /* * Called to shut down an array. */ -int -rf_Shutdown(raidPtr) - RF_Raid_t *raidPtr; +int +rf_Shutdown(RF_Raid_t *raidPtr) { if (!raidPtr->valid) { - RF_ERRORMSG("Attempt to shut down unconfigured RAIDframe driver. Aborting shutdown\n"); + RF_ERRORMSG("Attempt to shut down unconfigured RAIDframe" + " driver. Aborting shutdown.\n"); return (EINVAL); } /* - * wait for outstanding IOs to land - * As described in rf_raid.h, we use the rad_freelist lock - * to protect the per-array info about outstanding descs - * since we need to do freelist locking anyway, and this - * cuts down on the amount of serialization we've got going - * on. - */ + * Wait for outstanding IOs to land. + * As described in rf_raid.h, we use the rad_freelist lock + * to protect the per-array info about outstanding descs, + * since we need to do freelist locking anyway, and this + * cuts down on the amount of serialization we've got going + * on. + */ RF_FREELIST_DO_LOCK(rf_rad_freelist); if (raidPtr->waitShutdown) { RF_FREELIST_DO_UNLOCK(rf_rad_freelist); @@ -286,67 +308,73 @@ rf_Shutdown(raidPtr) return (0); } -#define DO_INIT_CONFIGURE(f) { \ - rc = f (&globalShutdown); \ - if (rc) { \ - RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \ - rf_ShutdownList(&globalShutdown); \ - configureCount--; \ - RF_UNLOCK_MUTEX(configureMutex); \ - return(rc); \ - } \ -} - -#define DO_RAID_FAIL() { \ - rf_UnconfigureVnodes(raidPtr); \ - rf_ShutdownList(&raidPtr->shutdownList); \ - rf_UnconfigureArray(); \ -} - -#define DO_RAID_INIT_CONFIGURE(f) { \ - rc = f (&raidPtr->shutdownList, raidPtr, cfgPtr); \ - if (rc) { \ - RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \ - DO_RAID_FAIL(); \ - return(rc); \ - } \ -} - -#define DO_RAID_MUTEX(_m_) { \ - rc = rf_create_managed_mutex(&raidPtr->shutdownList, (_m_)); \ - if (rc) { \ - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", \ - __FILE__, __LINE__, rc); \ - DO_RAID_FAIL(); \ - return(rc); \ - } \ -} - -#define DO_RAID_COND(_c_) { \ - rc = rf_create_managed_cond(&raidPtr->shutdownList, (_c_)); \ - if (rc) { \ - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", \ - __FILE__, __LINE__, rc); \ - DO_RAID_FAIL(); \ - return(rc); \ - } \ -} +#define DO_INIT_CONFIGURE(f) \ +do { \ + rc = f (&globalShutdown); \ + if (rc) { \ + RF_ERRORMSG2("RAIDFRAME: failed %s with %d.\n", \ + RF_STRING(f), rc); \ + rf_ShutdownList(&globalShutdown); \ + configureCount--; \ + RF_UNLOCK_MUTEX(configureMutex); \ + return(rc); \ + } \ +} while (0) + +#define DO_RAID_FAIL() \ +do { \ + rf_UnconfigureVnodes(raidPtr); \ + rf_ShutdownList(&raidPtr->shutdownList); \ + rf_UnconfigureArray(); \ +} while (0) + +#define DO_RAID_INIT_CONFIGURE(f) \ +do { \ + rc = (f)(&raidPtr->shutdownList, raidPtr, cfgPtr); \ + if (rc) { \ + RF_ERRORMSG2("RAIDFRAME: failed %s with %d.\n", \ + RF_STRING(f), rc); \ + DO_RAID_FAIL(); \ + return(rc); \ + } \ +} while (0) + +#define DO_RAID_MUTEX(_m_) \ +do { \ + rc = rf_create_managed_mutex(&raidPtr->shutdownList, (_m_)); \ + if (rc) { \ + RF_ERRORMSG3("Unable to init mutex file %s line %d" \ + " rc=%d.\n", __FILE__, __LINE__, rc); \ + DO_RAID_FAIL(); \ + return(rc); \ + } \ +} while (0) + +#define DO_RAID_COND(_c_) \ +do { \ + rc = rf_create_managed_cond(&raidPtr->shutdownList, (_c_)); \ + if (rc) { \ + RF_ERRORMSG3("Unable to init cond file %s line %d" \ + " rc=%d.\n", __FILE__, __LINE__, rc); \ + DO_RAID_FAIL(); \ + return(rc); \ + } \ +} while (0) -int -rf_Configure(raidPtr, cfgPtr, ac) - RF_Raid_t *raidPtr; - RF_Config_t *cfgPtr; - RF_AutoConfig_t *ac; +int +rf_Configure(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, RF_AutoConfig_t *ac) { RF_RowCol_t row, col; - int i, rc; + int i, rc; - /* XXX This check can probably be removed now, since - RAIDFRAME_CONFIGURRE now checks to make sure that the - RAID set is not already valid - */ + /* + * XXX This check can probably be removed now, since + * RAIDFRAME_CONFIGURE now checks to make sure that the + * RAID set is not already valid. + */ if (raidPtr->valid) { - RF_ERRORMSG("RAIDframe configuration not shut down. Aborting configure.\n"); + RF_ERRORMSG("RAIDframe configuration not shut down." + " Aborting configure.\n"); return (EINVAL); } RF_LOCK_MUTEX(configureMutex); @@ -354,14 +382,14 @@ rf_Configure(raidPtr, cfgPtr, ac) if (isconfigged == 0) { rc = rf_create_managed_mutex(&globalShutdown, &rf_printf_mutex); if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + RF_ERRORMSG3("Unable to init mutex file %s line %d" + " rc=%d.\n", __FILE__, __LINE__, rc); rf_ShutdownList(&globalShutdown); return (rc); } - /* initialize globals */ + /* Initialize globals. */ #ifdef RAIDDEBUG - printf("RAIDFRAME: protectedSectors is %ld\n", + printf("RAIDFRAME: protectedSectors is %ld.\n", rf_protectedSectors); #endif /* RAIDDEBUG */ @@ -370,8 +398,8 @@ rf_Configure(raidPtr, cfgPtr, ac) DO_INIT_CONFIGURE(rf_ConfigureAllocList); /* - * Yes, this does make debugging general to the whole - * system instead of being array specific. Bummer, drag. + * Yes, this does make debugging general to the whole + * system instead of being array specific. Bummer, drag. */ rf_ConfigureDebug(cfgPtr); DO_INIT_CONFIGURE(rf_ConfigureDebugMem); @@ -395,8 +423,10 @@ rf_Configure(raidPtr, cfgPtr, ac) RF_UNLOCK_MUTEX(configureMutex); DO_RAID_MUTEX(&raidPtr->mutex); - /* set up the cleanup list. Do this after ConfigureDebug so that - * value of memDebug will be set */ + /* + * Set up the cleanup list. Do this after ConfigureDebug so that + * value of memDebug will be set. + */ rf_MakeAllocList(raidPtr->cleanupList); if (raidPtr->cleanupList == NULL) { @@ -404,11 +434,10 @@ rf_Configure(raidPtr, cfgPtr, ac) return (ENOMEM); } rc = rf_ShutdownCreate(&raidPtr->shutdownList, - (void (*) (void *)) rf_FreeAllocList, - raidPtr->cleanupList); + (void (*) (void *)) rf_FreeAllocList, raidPtr->cleanupList); if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); + RF_ERRORMSG3("Unable to add to shutdown list file %s line %d" + " rc=%d.\n", __FILE__, __LINE__, rc); DO_RAID_FAIL(); return (rc); } @@ -416,15 +445,17 @@ rf_Configure(raidPtr, cfgPtr, ac) raidPtr->numCol = cfgPtr->numCol; raidPtr->numSpare = cfgPtr->numSpare; - /* XXX we don't even pretend to support more than one row in the - * kernel... */ + /* + * XXX We don't even pretend to support more than one row in the + * kernel... + */ if (raidPtr->numRow != 1) { RF_ERRORMSG("Only one row supported in kernel.\n"); DO_RAID_FAIL(); return (EINVAL); } - RF_CallocAndAdd(raidPtr->status, raidPtr->numRow, sizeof(RF_RowStatus_t), - (RF_RowStatus_t *), raidPtr->cleanupList); + RF_CallocAndAdd(raidPtr->status, raidPtr->numRow, + sizeof(RF_RowStatus_t), (RF_RowStatus_t *), raidPtr->cleanupList); if (raidPtr->status == NULL) { DO_RAID_FAIL(); return (ENOMEM); @@ -455,17 +486,21 @@ rf_Configure(raidPtr, cfgPtr, ac) DO_RAID_MUTEX(&raidPtr->recon_done_proc_mutex); - if (ac!=NULL) { - /* We have an AutoConfig structure.. Don't do the - normal disk configuration... call the auto config - stuff */ + if (ac != NULL) { + /* + * We have an AutoConfig structure... Don't do the + * normal disk configuration... call the auto config + * stuff. + */ rf_AutoConfigureDisks(raidPtr, cfgPtr, ac); } else { DO_RAID_INIT_CONFIGURE(rf_ConfigureDisks); DO_RAID_INIT_CONFIGURE(rf_ConfigureSpareDisks); } - /* do this after ConfigureDisks & ConfigureSpareDisks to be sure dev - * no. is set */ + /* + * Do this after ConfigureDisks & ConfigureSpareDisks to be sure + * devno is set. + */ DO_RAID_INIT_CONFIGURE(rf_ConfigureDiskQueues); DO_RAID_INIT_CONFIGURE(rf_ConfigureLayout); @@ -475,8 +510,8 @@ rf_Configure(raidPtr, cfgPtr, ac) for (row = 0; row < raidPtr->numRow; row++) { for (col = 0; col < raidPtr->numCol; col++) { /* - * XXX better distribution - */ + * XXX Better distribution. + */ raidPtr->hist_diskreq[row][col] = 0; } } @@ -487,8 +522,10 @@ rf_Configure(raidPtr, cfgPtr, ac) raidPtr->recon_in_progress = 0; raidPtr->maxOutstanding = cfgPtr->maxOutstandingDiskReqs; - /* autoconfigure and root_partition will actually get filled in - after the config is done */ + /* + * Autoconfigure and root_partition will actually get filled in + * after the config is done. + */ raidPtr->autoconfigure = 0; raidPtr->root_partition = 0; raidPtr->last_unit = raidPtr->raidid; @@ -503,21 +540,20 @@ rf_Configure(raidPtr, cfgPtr, ac) return (0); } -static int -init_rad(desc) - RF_RaidAccessDesc_t *desc; +int +rf_init_rad(RF_RaidAccessDesc_t *desc) { - int rc; + int rc; rc = rf_mutex_init(&desc->mutex); if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d.\n", __FILE__, __LINE__, rc); return (rc); } rc = rf_cond_init(&desc->cond); if (rc) { - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, + RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d.\n", __FILE__, __LINE__, rc); rf_mutex_destroy(&desc->mutex); return (rc); @@ -525,26 +561,24 @@ init_rad(desc) return (0); } -static void -clean_rad(desc) - RF_RaidAccessDesc_t *desc; +void +rf_clean_rad(RF_RaidAccessDesc_t *desc) { rf_mutex_destroy(&desc->mutex); rf_cond_destroy(&desc->cond); } -static void -rf_ShutdownRDFreeList(ignored) - void *ignored; +void +rf_ShutdownRDFreeList(void *ignored) { - RF_FREELIST_DESTROY_CLEAN(rf_rad_freelist, next, (RF_RaidAccessDesc_t *), clean_rad); + RF_FREELIST_DESTROY_CLEAN(rf_rad_freelist, next, + (RF_RaidAccessDesc_t *), rf_clean_rad); } -static int -rf_ConfigureRDFreeList(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureRDFreeList(RF_ShutdownList_t **listp) { - int rc; + int rc; RF_FREELIST_CREATE(rf_rad_freelist, RF_MAX_FREE_RAD, RF_RAD_INC, sizeof(RF_RaidAccessDesc_t)); @@ -553,41 +587,44 @@ rf_ConfigureRDFreeList(listp) } rc = rf_ShutdownCreate(listp, rf_ShutdownRDFreeList, NULL); if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, + RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d.\n", __FILE__, __LINE__, rc); rf_ShutdownRDFreeList(NULL); return (rc); } RF_FREELIST_PRIME_INIT(rf_rad_freelist, RF_RAD_INITIAL, next, - (RF_RaidAccessDesc_t *), init_rad); + (RF_RaidAccessDesc_t *), rf_init_rad); return (0); } RF_RaidAccessDesc_t * rf_AllocRaidAccDesc( - RF_Raid_t * raidPtr, - RF_IoType_t type, - RF_RaidAddr_t raidAddress, - RF_SectorCount_t numBlocks, - caddr_t bufPtr, - void *bp, - RF_DagHeader_t ** paramDAG, - RF_AccessStripeMapHeader_t ** paramASM, - RF_RaidAccessFlags_t flags, - void (*cbF) (struct buf *), - void *cbA, - RF_AccessState_t * states) + RF_Raid_t *raidPtr, + RF_IoType_t type, + RF_RaidAddr_t raidAddress, + RF_SectorCount_t numBlocks, + caddr_t bufPtr, + void *bp, + RF_DagHeader_t **paramDAG, + RF_AccessStripeMapHeader_t **paramASM, + RF_RaidAccessFlags_t flags, + void (*cbF) (struct buf *), + void *cbA, + RF_AccessState_t *states +) { RF_RaidAccessDesc_t *desc; - RF_FREELIST_GET_INIT_NOUNLOCK(rf_rad_freelist, desc, next, (RF_RaidAccessDesc_t *), init_rad); + RF_FREELIST_GET_INIT_NOUNLOCK(rf_rad_freelist, desc, next, + (RF_RaidAccessDesc_t *), rf_init_rad); if (raidPtr->waitShutdown) { /* - * Actually, we're shutting the array down. Free the desc - * and return NULL. - */ + * Actually, we're shutting the array down. Free the desc + * and return NULL. + */ RF_FREELIST_DO_UNLOCK(rf_rad_freelist); - RF_FREELIST_FREE_CLEAN(rf_rad_freelist, desc, next, clean_rad); + RF_FREELIST_FREE_CLEAN(rf_rad_freelist, desc, next, + rf_clean_rad); return (NULL); } raidPtr->nAccOutstanding++; @@ -617,7 +654,7 @@ rf_AllocRaidAccDesc( return (desc); } -void +void rf_FreeRaidAccDesc(RF_RaidAccessDesc_t * desc) { RF_Raid_t *raidPtr = desc->raidPtr; @@ -625,39 +662,45 @@ rf_FreeRaidAccDesc(RF_RaidAccessDesc_t * desc) RF_ASSERT(desc); rf_FreeAllocList(desc->cleanupList); - RF_FREELIST_FREE_CLEAN_NOUNLOCK(rf_rad_freelist, desc, next, clean_rad); + RF_FREELIST_FREE_CLEAN_NOUNLOCK(rf_rad_freelist, desc, next, rf_clean_rad); raidPtr->nAccOutstanding--; if (raidPtr->waitShutdown) { RF_SIGNAL_COND(raidPtr->outstandingCond); } RF_FREELIST_DO_UNLOCK(rf_rad_freelist); } -/********************************************************************* + + +/******************************************************************** * Main routine for performing an access. - * Accesses are retried until a DAG can not be selected. This occurs + * Accesses are retried until a DAG can not be selected. This occurs * when either the DAG library is incomplete or there are too many * failures in a parity group. ********************************************************************/ -int +int rf_DoAccess( - RF_Raid_t * raidPtr, - RF_IoType_t type, - int async_flag, - RF_RaidAddr_t raidAddress, - RF_SectorCount_t numBlocks, - caddr_t bufPtr, - void *bp_in, - RF_DagHeader_t ** paramDAG, - RF_AccessStripeMapHeader_t ** paramASM, - RF_RaidAccessFlags_t flags, - RF_RaidAccessDesc_t ** paramDesc, - void (*cbF) (struct buf *), - void *cbA) -/* -type should be read or write -async_flag should be RF_TRUE or RF_FALSE -bp_in is a buf pointer. void * to facilitate ignoring it outside the kernel -*/ + RF_Raid_t *raidPtr, + RF_IoType_t type, /* Should be read or write. */ + int async_flag, /* + * Should be RF_TRUE + * or RF_FALSE. + */ + RF_RaidAddr_t raidAddress, + RF_SectorCount_t numBlocks, + caddr_t bufPtr, + void *bp_in, /* + * It's a buf pointer. + * void * to facilitate + * ignoring it outside + * the kernel. + */ + RF_DagHeader_t **paramDAG, + RF_AccessStripeMapHeader_t **paramASM, + RF_RaidAccessFlags_t flags, + RF_RaidAccessDesc_t **paramDesc, + void (*cbF) (struct buf *), + void *cbA +) { RF_RaidAccessDesc_t *desc; caddr_t lbufPtr = bufPtr; @@ -666,17 +709,18 @@ bp_in is a buf pointer. void * to facilitate ignoring it outside the kernel raidAddress += rf_raidSectorOffset; if (!raidPtr->valid) { - RF_ERRORMSG("RAIDframe driver not successfully configured. Rejecting access.\n"); + RF_ERRORMSG("RAIDframe driver not successfully configured." + " Rejecting access.\n"); IO_BUF_ERR(bp, EINVAL); return (EINVAL); } if (rf_accessDebug) { - printf("logBytes is: %d %d %d\n", raidPtr->raidid, + printf("logBytes is: %d %d %d.\n", raidPtr->raidid, raidPtr->logBytesPerSector, (int) rf_RaidAddressToByte(raidPtr, numBlocks)); - printf("raid%d: %s raidAddr %d (stripeid %d-%d) numBlocks %d (%d bytes) buf 0x%lx\n", raidPtr->raidid, + printf("raid%d: %s raidAddr %d (stripeid %d-%d) numBlocks %d (%d bytes) buf 0x%lx.\n", raidPtr->raidid, (type == RF_IO_TYPE_READ) ? "READ" : "WRITE", (int) raidAddress, (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress), (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress + numBlocks - 1), @@ -686,7 +730,7 @@ bp_in is a buf pointer. void * to facilitate ignoring it outside the kernel } if (raidAddress + numBlocks > raidPtr->totalSectors) { - printf("DoAccess: raid addr %lu too large to access %lu sectors. Max legal addr is %lu\n", + printf("DoAccess: raid addr %lu too large to access %lu sectors. Max legal addr is %lu.\n", (u_long) raidAddress, (u_long) numBlocks, (u_long) raidPtr->totalSectors); IO_BUF_ERR(bp, ENOSPC); @@ -707,15 +751,15 @@ bp_in is a buf pointer. void * to facilitate ignoring it outside the kernel return (0); } -/* force the array into reconfigured mode without doing reconstruction */ -int -rf_SetReconfiguredMode(raidPtr, row, col) - RF_Raid_t *raidPtr; - int row; - int col; + + +/* Force the array into reconfigured mode without doing reconstruction. */ +int +rf_SetReconfiguredMode(RF_Raid_t *raidPtr, int row, int col) { if (!(raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { - printf("Can't set reconfigured mode in dedicated-spare array\n"); + printf("Can't set reconfigured mode in dedicated-spare" + " array.\n"); RF_PANIC(); } RF_LOCK_MUTEX(raidPtr->mutex); @@ -723,8 +767,10 @@ rf_SetReconfiguredMode(raidPtr, row, col) raidPtr->Disks[row][col].status = rf_ds_dist_spared; raidPtr->status[row] = rf_rs_reconfigured; rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE); - /* install spare table only if declustering + distributed sparing - * architecture. */ + /* + * Install spare table only if declustering + distributed sparing + * architecture. + */ if (raidPtr->Layout.map->flags & RF_BD_DECLUSTERED) rf_InstallSpareTable(raidPtr, row, col); RF_UNLOCK_MUTEX(raidPtr->mutex); @@ -734,14 +780,10 @@ rf_SetReconfiguredMode(raidPtr, row, col) extern int fail_row, fail_col, fail_time; extern int delayed_recon; -int -rf_FailDisk( - RF_Raid_t * raidPtr, - int frow, - int fcol, - int initRecon) +int +rf_FailDisk(RF_Raid_t *raidPtr, int frow, int fcol, int initRecon) { - printf("raid%d: Failing disk r%d c%d\n", raidPtr->raidid, frow, fcol); + printf("raid%d: Failing disk r%d c%d.\n", raidPtr->raidid, frow, fcol); RF_LOCK_MUTEX(raidPtr->mutex); raidPtr->numFailures++; raidPtr->Disks[frow][fcol].status = rf_ds_failed; @@ -752,16 +794,17 @@ rf_FailDisk( rf_ReconstructFailedDisk(raidPtr, frow, fcol); return (0); } -/* releases a thread that is waiting for the array to become quiesced. - * access_suspend_mutex should be locked upon calling this + + +/* + * Releases a thread that is waiting for the array to become quiesced. + * access_suspend_mutex should be locked upon calling this. */ -void -rf_SignalQuiescenceLock(raidPtr, reconDesc) - RF_Raid_t *raidPtr; - RF_RaidReconDesc_t *reconDesc; +void +rf_SignalQuiescenceLock(RF_Raid_t *raidPtr, RF_RaidReconDesc_t *reconDesc) { if (rf_quiesceDebug) { - printf("raid%d: Signalling quiescence lock\n", + printf("raid%d: Signalling quiescence lock.\n", raidPtr->raidid); } raidPtr->access_suspend_release = 1; @@ -770,13 +813,17 @@ rf_SignalQuiescenceLock(raidPtr, reconDesc) SIGNAL_QUIESCENT_COND(raidPtr); } } -/* suspends all new requests to the array. No effect on accesses that are in flight. */ -int -rf_SuspendNewRequestsAndWait(raidPtr) - RF_Raid_t *raidPtr; + + +/* + * Suspends all new requests to the array. No effect on accesses that are + * in flight. + */ +int +rf_SuspendNewRequestsAndWait(RF_Raid_t *raidPtr) { if (rf_quiesceDebug) - printf("Suspending new reqs\n"); + printf("Suspending new reqs.\n"); RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); raidPtr->accesses_suspended++; @@ -785,25 +832,26 @@ rf_SuspendNewRequestsAndWait(raidPtr) if (raidPtr->waiting_for_quiescence) { raidPtr->access_suspend_release = 0; while (!raidPtr->access_suspend_release) { - printf("Suspending: Waiting for Quiescence\n"); + printf("Suspending: Waiting for Quiescence.\n"); WAIT_FOR_QUIESCENCE(raidPtr); raidPtr->waiting_for_quiescence = 0; } } - printf("Quiescence reached..\n"); + printf("Quiescence reached...\n"); RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); return (raidPtr->waiting_for_quiescence); } -/* wake up everyone waiting for quiescence to be released */ -void -rf_ResumeNewRequests(raidPtr) - RF_Raid_t *raidPtr; + + +/* Wake up everyone waiting for quiescence to be released. */ +void +rf_ResumeNewRequests(RF_Raid_t *raidPtr) { RF_CallbackDesc_t *t, *cb; if (rf_quiesceDebug) - printf("Resuming new reqs\n"); + printf("Resuming new reqs.\n"); RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); raidPtr->accesses_suspended--; @@ -821,16 +869,16 @@ rf_ResumeNewRequests(raidPtr) rf_FreeCallbackDesc(t); } } -/***************************************************************************************** + + +/***************************************************************************** * - * debug routines + * Debug routines. * - ****************************************************************************************/ + *****************************************************************************/ -static void -set_debug_option(name, val) - char *name; - long val; +void +rf_set_debug_option(char *name, long val) { RF_DebugName_t *p; @@ -845,51 +893,52 @@ set_debug_option(name, val) } -/* would like to use sscanf here, but apparently not available in kernel */ +/* Would like to use sscanf here, but apparently not available in kernel. */ /*ARGSUSED*/ -static void -rf_ConfigureDebug(cfgPtr) - RF_Config_t *cfgPtr; +void +rf_ConfigureDebug(RF_Config_t *cfgPtr) { - char *val_p, *name_p, *white_p; - long val; - int i; + char *val_p, *name_p, *white_p; + long val; + int i; rf_ResetDebugOptions(); for (i = 0; cfgPtr->debugVars[i][0] && i < RF_MAXDBGV; i++) { name_p = rf_find_non_white(&cfgPtr->debugVars[i][0]); - white_p = rf_find_white(name_p); /* skip to start of 2nd - * word */ + white_p = rf_find_white(name_p); /* + * Skip to start of 2nd + * word. + */ val_p = rf_find_non_white(white_p); if (*val_p == '0' && *(val_p + 1) == 'x') val = rf_htoi(val_p + 2); else val = rf_atoi(val_p); *white_p = '\0'; - set_debug_option(name_p, val); + rf_set_debug_option(name_p, val); } } -/* performance monitoring stuff */ -#if !defined(_KERNEL) && !defined(SIMULATE) + +/* Performance monitoring stuff. */ + +#if !defined(_KERNEL) && !defined(SIMULATE) /* - * Throughput stats currently only used in user-level RAIDframe + * Throughput stats currently only used in user-level RAIDframe. */ -static int -rf_InitThroughputStats( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) +int +rf_InitThroughputStats(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, + RF_Config_t *cfgPtr) { - int rc; + int rc; - /* these used by user-level raidframe only */ + /* These used by user-level RAIDframe only. */ rc = rf_create_managed_mutex(listp, &raidPtr->throughputstats.mutex); if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d.\n", + __FILE__, __LINE__, rc); return (rc); } raidPtr->throughputstats.sum_io_us = 0; @@ -898,8 +947,8 @@ rf_InitThroughputStats( return (0); } -void -rf_StartThroughputStats(RF_Raid_t * raidPtr) +void +rf_StartThroughputStats(RF_Raid_t *raidPtr) { RF_LOCK_MUTEX(raidPtr->throughputstats.mutex); raidPtr->throughputstats.num_ios++; @@ -909,8 +958,8 @@ rf_StartThroughputStats(RF_Raid_t * raidPtr) RF_UNLOCK_MUTEX(raidPtr->throughputstats.mutex); } -static void -rf_StopThroughputStats(RF_Raid_t * raidPtr) +void +rf_StopThroughputStats(RF_Raid_t *raidPtr) { struct timeval diff; @@ -918,25 +967,28 @@ rf_StopThroughputStats(RF_Raid_t * raidPtr) raidPtr->throughputstats.num_out_ios--; if (raidPtr->throughputstats.num_out_ios == 0) { RF_GETTIME(raidPtr->throughputstats.stop); - RF_TIMEVAL_DIFF(&raidPtr->throughputstats.start, &raidPtr->throughputstats.stop, &diff); + RF_TIMEVAL_DIFF(&raidPtr->throughputstats.start, + &raidPtr->throughputstats.stop, &diff); raidPtr->throughputstats.sum_io_us += RF_TIMEVAL_TO_US(diff); } RF_UNLOCK_MUTEX(raidPtr->throughputstats.mutex); } -static void -rf_PrintThroughputStats(RF_Raid_t * raidPtr) +void +rf_PrintThroughputStats(RF_Raid_t *raidPtr) { RF_ASSERT(raidPtr->throughputstats.num_out_ios == 0); if (raidPtr->throughputstats.sum_io_us != 0) { - printf("[Througphut: %8.2f IOs/second]\n", raidPtr->throughputstats.num_ios - / (raidPtr->throughputstats.sum_io_us / 1000000.0)); + printf("[Througphut: %8.2f IOs/second]\n", + raidPtr->throughputstats.num_ios / + (raidPtr->throughputstats.sum_io_us / 1000000.0)); } } -#endif /* !_KERNEL && !SIMULATE */ -void -rf_StartUserStats(RF_Raid_t * raidPtr) +#endif /* !_KERNEL && !SIMULATE */ + +void +rf_StartUserStats(RF_Raid_t *raidPtr) { RF_GETTIME(raidPtr->userstats.start); raidPtr->userstats.sum_io_us = 0; @@ -944,51 +996,63 @@ rf_StartUserStats(RF_Raid_t * raidPtr) raidPtr->userstats.num_sect_moved = 0; } -void -rf_StopUserStats(RF_Raid_t * raidPtr) +void +rf_StopUserStats(RF_Raid_t *raidPtr) { RF_GETTIME(raidPtr->userstats.stop); } -void -rf_UpdateUserStats(raidPtr, rt, numsect) - RF_Raid_t *raidPtr; - int rt; /* resp time in us */ - int numsect; /* number of sectors for this access */ +void +rf_UpdateUserStats( + RF_Raid_t *raidPtr, + int rt, /* Response time in us. */ + int numsect /* Number of sectors for this access. */ +) { raidPtr->userstats.sum_io_us += rt; raidPtr->userstats.num_ios++; raidPtr->userstats.num_sect_moved += numsect; } -void -rf_PrintUserStats(RF_Raid_t * raidPtr) +void +rf_PrintUserStats(RF_Raid_t *raidPtr) { long elapsed_us, mbs, mbs_frac; struct timeval diff; - RF_TIMEVAL_DIFF(&raidPtr->userstats.start, &raidPtr->userstats.stop, &diff); + RF_TIMEVAL_DIFF(&raidPtr->userstats.start, &raidPtr->userstats.stop, + &diff); elapsed_us = RF_TIMEVAL_TO_US(diff); - /* 2000 sectors per megabyte, 10000000 microseconds per second */ + /* 2000 sectors per megabyte, 10000000 microseconds per second. */ if (elapsed_us) - mbs = (raidPtr->userstats.num_sect_moved / 2000) / (elapsed_us / 1000000); + mbs = (raidPtr->userstats.num_sect_moved / 2000) / + (elapsed_us / 1000000); else mbs = 0; - /* this computes only the first digit of the fractional mb/s moved */ + /* This computes only the first digit of the fractional mb/s moved. */ if (elapsed_us) { - mbs_frac = ((raidPtr->userstats.num_sect_moved / 200) / (elapsed_us / 1000000)) - - (mbs * 10); + mbs_frac = ((raidPtr->userstats.num_sect_moved / 200) / + (elapsed_us / 1000000)) - (mbs * 10); } else { mbs_frac = 0; } - printf("Number of I/Os: %ld\n", raidPtr->userstats.num_ios); - printf("Elapsed time (us): %ld\n", elapsed_us); - printf("User I/Os per second: %ld\n", RF_DB0_CHECK(raidPtr->userstats.num_ios, (elapsed_us / 1000000))); - printf("Average user response time: %ld us\n", RF_DB0_CHECK(raidPtr->userstats.sum_io_us, raidPtr->userstats.num_ios)); - printf("Total sectors moved: %ld\n", raidPtr->userstats.num_sect_moved); - printf("Average access size (sect): %ld\n", RF_DB0_CHECK(raidPtr->userstats.num_sect_moved, raidPtr->userstats.num_ios)); - printf("Achieved data rate: %ld.%ld MB/sec\n", mbs, mbs_frac); + printf("Number of I/Os: %ld\n", + raidPtr->userstats.num_ios); + printf("Elapsed time (us): %ld\n", + elapsed_us); + printf("User I/Os per second: %ld\n", + RF_DB0_CHECK(raidPtr->userstats.num_ios, (elapsed_us / 1000000))); + printf("Average user response time: %ld us\n", + RF_DB0_CHECK(raidPtr->userstats.sum_io_us, + raidPtr->userstats.num_ios)); + printf("Total sectors moved: %ld\n", + raidPtr->userstats.num_sect_moved); + printf("Average access size (sect): %ld\n", + RF_DB0_CHECK(raidPtr->userstats.num_sect_moved, + raidPtr->userstats.num_ios)); + printf("Achieved data rate: %ld.%ld MB/sec\n", + mbs, mbs_frac); } diff --git a/sys/dev/raidframe/rf_driver.h b/sys/dev/raidframe/rf_driver.h index 6e29c3c6d6c..9df7c907f2d 100644 --- a/sys/dev/raidframe/rf_driver.h +++ b/sys/dev/raidframe/rf_driver.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_driver.h,v 1.3 2000/08/08 16:07:41 peter Exp $ */ +/* $OpenBSD: rf_driver.h,v 1.4 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_driver.h,v 1.4 2000/02/13 04:53:57 oster Exp $ */ + /* * rf_driver.h */ @@ -30,55 +31,43 @@ * rights to redistribute these changes. */ -#ifndef _RF__RF_DRIVER_H_ -#define _RF__RF_DRIVER_H_ +#ifndef _RF__RF_DRIVER_H_ +#define _RF__RF_DRIVER_H_ #include "rf_threadstuff.h" #include "rf_types.h" -#if defined(__NetBSD__) +#if defined(__NetBSD__) #include "rf_netbsd.h" -#elif defined(__OpenBSD__) +#elif defined(__OpenBSD__) #include "rf_openbsd.h" #endif -#if _KERNEL -RF_DECLARE_EXTERN_MUTEX(rf_printf_mutex) -int rf_BootRaidframe(void); -int rf_UnbootRaidframe(void); -int rf_Shutdown(RF_Raid_t * raidPtr); -int rf_Configure(RF_Raid_t * raidPtr, RF_Config_t * cfgPtr, - RF_AutoConfig_t *ac); -RF_RaidAccessDesc_t *rf_AllocRaidAccDesc(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_RaidAddr_t raidAddress, - RF_SectorCount_t numBlocks, - caddr_t bufPtr, - void *bp, RF_DagHeader_t ** paramDAG, - RF_AccessStripeMapHeader_t ** paramASM, - RF_RaidAccessFlags_t flags, - void (*cbF) (struct buf *), - void *cbA, - RF_AccessState_t * states); -void rf_FreeRaidAccDesc(RF_RaidAccessDesc_t * desc); -int rf_DoAccess(RF_Raid_t * raidPtr, RF_IoType_t type, int async_flag, - RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks, - caddr_t bufPtr, void *bp_in, RF_DagHeader_t ** paramDAG, - RF_AccessStripeMapHeader_t ** paramASM, - RF_RaidAccessFlags_t flags, - RF_RaidAccessDesc_t ** paramDesc, - void (*cbF) (struct buf *), void *cbA); -int rf_SetReconfiguredMode(RF_Raid_t * raidPtr, RF_RowCol_t row, - RF_RowCol_t col); -int rf_FailDisk(RF_Raid_t * raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol, - int initRecon); -void rf_SignalQuiescenceLock(RF_Raid_t * raidPtr, - RF_RaidReconDesc_t * reconDesc); -int rf_SuspendNewRequestsAndWait(RF_Raid_t * raidPtr); -void rf_ResumeNewRequests(RF_Raid_t * raidPtr); -void rf_StartThroughputStats(RF_Raid_t * raidPtr); -void rf_StartUserStats(RF_Raid_t * raidPtr); -void rf_StopUserStats(RF_Raid_t * raidPtr); -void rf_UpdateUserStats(RF_Raid_t * raidPtr, int rt, int numsect); -void rf_PrintUserStats(RF_Raid_t * raidPtr); -#endif /* _KERNEL */ -#endif /* !_RF__RF_DRIVER_H_ */ +#if _KERNEL +RF_DECLARE_EXTERN_MUTEX(rf_printf_mutex); +int rf_BootRaidframe(void); +int rf_UnbootRaidframe(void); +int rf_Shutdown(RF_Raid_t *); +int rf_Configure(RF_Raid_t *, RF_Config_t *, RF_AutoConfig_t *); +RF_RaidAccessDesc_t *rf_AllocRaidAccDesc(RF_Raid_t *, RF_IoType_t, + RF_RaidAddr_t, RF_SectorCount_t, caddr_t, void *, RF_DagHeader_t **, + RF_AccessStripeMapHeader_t **, RF_RaidAccessFlags_t, + void (*) (struct buf *), void *, RF_AccessState_t *); +void rf_FreeRaidAccDesc(RF_RaidAccessDesc_t *); +int rf_DoAccess(RF_Raid_t *, RF_IoType_t, int, RF_RaidAddr_t, + RF_SectorCount_t, caddr_t, void *, RF_DagHeader_t **, + RF_AccessStripeMapHeader_t **, RF_RaidAccessFlags_t, + RF_RaidAccessDesc_t **, void (*) (struct buf *), void *); +int rf_SetReconfiguredMode(RF_Raid_t *, RF_RowCol_t, RF_RowCol_t); +int rf_FailDisk(RF_Raid_t *, RF_RowCol_t, RF_RowCol_t, int); +void rf_SignalQuiescenceLock(RF_Raid_t *, RF_RaidReconDesc_t *); +int rf_SuspendNewRequestsAndWait(RF_Raid_t *); +void rf_ResumeNewRequests(RF_Raid_t *); +void rf_StartThroughputStats(RF_Raid_t *); +void rf_StartUserStats(RF_Raid_t *); +void rf_StopUserStats(RF_Raid_t *); +void rf_UpdateUserStats(RF_Raid_t *, int, int); +void rf_PrintUserStats(RF_Raid_t *); +#endif /* _KERNEL */ + +#endif /* !_RF__RF_DRIVER_H_ */ diff --git a/sys/dev/raidframe/rf_engine.c b/sys/dev/raidframe/rf_engine.c index f988f0de05a..458ab8c00b3 100644 --- a/sys/dev/raidframe/rf_engine.c +++ b/sys/dev/raidframe/rf_engine.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_engine.c,v 1.12 2002/07/15 18:42:06 art Exp $ */ +/* $OpenBSD: rf_engine.c,v 1.13 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_engine.c,v 1.10 2000/08/20 16:51:03 thorpej Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,16 +30,16 @@ /**************************************************************************** * * - * engine.c -- code for DAG execution engine * + * engine.c -- Code for DAG execution engine. * * * * Modified to work as follows (holland): * * A user-thread calls into DispatchDAG, which fires off the nodes that * - * are direct successors to the header node. DispatchDAG then returns, * - * and the rest of the I/O continues asynchronously. As each node * - * completes, the node execution function calls FinishNode(). FinishNode * + * are direct successors to the header node. DispatchDAG then returns, * + * and the rest of the I/O continues asynchronously. As each node * + * completes, the node execution function calls FinishNode(). FinishNode * * scans the list of successors to the node and increments the antecedent * - * counts. Each node that becomes enabled is placed on a central node * - * queue. A dedicated dag-execution thread grabs nodes off of this * + * counts. Each node that becomes enabled is placed on a central node * + * queue. A dedicated dag-execution thread grabs nodes off of this * * queue and fires them. * * * * NULL nodes are never fired. * @@ -49,8 +50,8 @@ * If a node fails, the dag either rolls forward to the completion or * * rolls back, undoing previously-completed nodes and fails atomically. * * The direction of recovery is determined by the location of the failed * - * node in the graph. If the failure occurred before the commit node in * - * the graph, backward recovery is used. Otherwise, forward recovery is * + * node in the graph. If the failure occurred before the commit node in * + * the graph, backward recovery is used. Otherwise, forward recovery is * * used. * * * ****************************************************************************/ @@ -67,56 +68,67 @@ #include "rf_shutdown.h" #include "rf_raid.h" -void DAGExecutionThread(RF_ThreadArg_t arg); -#ifdef RAID_AUTOCONFIG -void DAGExecutionThread_pre(RF_ThreadArg_t arg); +int rf_BranchDone(RF_DagNode_t *); +int rf_NodeReady(RF_DagNode_t *); +void rf_FireNode(RF_DagNode_t *); +void rf_FireNodeArray(int, RF_DagNode_t **); +void rf_FireNodeList(RF_DagNode_t *); +void rf_PropagateResults(RF_DagNode_t *, int); +void rf_ProcessNode(RF_DagNode_t *, int); + +void rf_DAGExecutionThread(RF_ThreadArg_t); +#ifdef RAID_AUTOCONFIG #define RF_ENGINE_PID 10 +void rf_DAGExecutionThread_pre(RF_ThreadArg_t); extern pid_t lastpid; #endif /* RAID_AUTOCONFIG */ void **rf_hook_cookies; extern int numraid; -#define DO_INIT(_l_,_r_) { \ - int _rc; \ - _rc = rf_create_managed_mutex(_l_,&(_r_)->node_queue_mutex); \ - if (_rc) { \ - return(_rc); \ - } \ - _rc = rf_create_managed_cond(_l_,&(_r_)->node_queue_cond); \ - if (_rc) { \ - return(_rc); \ - } \ -} +#define DO_INIT(_l_,_r_) \ +do { \ + int _rc; \ + _rc = rf_create_managed_mutex(_l_, &(_r_)->node_queue_mutex); \ + if (_rc) { \ + return(_rc); \ + } \ + _rc = rf_create_managed_cond(_l_, &(_r_)->node_queue_cond); \ + if (_rc) { \ + return(_rc); \ + } \ +} while (0) -/* synchronization primitives for this file. DO_WAIT should be enclosed in a while loop. */ +/* + * Synchronization primitives for this file. DO_WAIT should be enclosed + * in a while loop. + */ /* - * XXX Is this spl-ing really necessary? + * XXX Is this spl-ing really necessary ? */ -#define DO_LOCK(_r_) \ -do { \ - ks = splbio(); \ - RF_LOCK_MUTEX((_r_)->node_queue_mutex); \ +#define DO_LOCK(_r_) \ +do { \ + ks = splbio(); \ + RF_LOCK_MUTEX((_r_)->node_queue_mutex); \ } while (0) -#define DO_UNLOCK(_r_) \ -do { \ - RF_UNLOCK_MUTEX((_r_)->node_queue_mutex); \ - splx(ks); \ +#define DO_UNLOCK(_r_) \ +do { \ + RF_UNLOCK_MUTEX((_r_)->node_queue_mutex); \ + splx(ks); \ } while (0) -#define DO_WAIT(_r_) \ +#define DO_WAIT(_r_) \ RF_WAIT_COND((_r_)->node_queue, (_r_)->node_queue_mutex) /* XXX RF_SIGNAL_COND? */ -#define DO_SIGNAL(_r_) \ +#define DO_SIGNAL(_r_) \ RF_BROADCAST_COND((_r_)->node_queue) void rf_ShutdownEngine(void *); -void -rf_ShutdownEngine(arg) - void *arg; +void +rf_ShutdownEngine(void *arg) { RF_Raid_t *raidPtr; @@ -125,14 +137,12 @@ rf_ShutdownEngine(arg) DO_SIGNAL(raidPtr); } -int -rf_ConfigureEngine( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) +int +rf_ConfigureEngine(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, + RF_Config_t *cfgPtr) { - int rc; - char raidname[16]; + int rc; + char raidname[16]; DO_INIT(listp, raidPtr); @@ -143,9 +153,11 @@ rf_ConfigureEngine( if (rc) return (rc); - /* we create the execution thread only once per system boot. no need + /* + * We create the execution thread only once per system boot. No need * to check return code b/c the kernel panics if it can't create the - * thread. */ + * thread. + */ if (rf_engineDebug) { printf("raid%d: Creating engine thread\n", raidPtr->raidid); } @@ -157,94 +169,105 @@ rf_ConfigureEngine( return (ENOMEM); bzero(rf_hook_cookies, numraid * sizeof(void *)); } -#ifdef RAID_AUTOCONFIG +#ifdef RAID_AUTOCONFIG if (initproc == NULL) { rf_hook_cookies[raidPtr->raidid] = - startuphook_establish(DAGExecutionThread_pre, raidPtr); + startuphook_establish(rf_DAGExecutionThread_pre, + raidPtr); } else { #endif /* RAID_AUTOCONFIG */ snprintf(&raidname[0], 16, "raid%d", raidPtr->raidid); if (RF_CREATE_THREAD(raidPtr->engine_thread, - DAGExecutionThread, raidPtr, &raidname[0])) { - RF_ERRORMSG("RAIDFRAME: Unable to create engine thread\n"); + rf_DAGExecutionThread, raidPtr, &raidname[0])) { + RF_ERRORMSG("RAIDFRAME: Unable to create engine" + " thread\n"); return (ENOMEM); } if (rf_engineDebug) { - printf("raid%d: Created engine thread\n", raidPtr->raidid); + printf("raid%d: Created engine thread\n", + raidPtr->raidid); } RF_THREADGROUP_STARTED(&raidPtr->engine_tg); -#ifdef RAID_AUTOCONFIG +#ifdef RAID_AUTOCONFIG } #endif - /* XXX something is missing here... */ -#ifdef debug - printf("Skipping the WAIT_START!!\n"); + /* XXX Something is missing here... */ +#ifdef debug + printf("Skipping the WAIT_START !!!\n"); #endif - /* engine thread is now running and waiting for work */ + /* Engine thread is now running and waiting for work. */ if (rf_engineDebug) { - printf("raid%d: Engine thread running and waiting for events\n", raidPtr->raidid); + printf("raid%d: Engine thread running and waiting for events\n", + raidPtr->raidid); } rc = rf_ShutdownCreate(listp, rf_ShutdownEngine, raidPtr); if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + RF_ERRORMSG3("Unable to add to shutdown list file %s line %d" + " rc=%d\n", __FILE__, __LINE__, rc); rf_ShutdownEngine(NULL); } return (rc); } -static int -BranchDone(RF_DagNode_t * node) +int +rf_BranchDone(RF_DagNode_t *node) { - int i; + int i; - /* return true if forward execution is completed for a node and it's - * succedents */ + /* + * Return true if forward execution is completed for a node and it's + * succedents. + */ switch (node->status) { case rf_wait: - /* should never be called in this state */ + /* Should never be called in this state. */ RF_PANIC(); break; case rf_fired: - /* node is currently executing, so we're not done */ + /* Node is currently executing, so we're not done. */ return (RF_FALSE); case rf_good: - for (i = 0; i < node->numSuccedents; i++) /* for each succedent */ - if (!BranchDone(node->succedents[i])) /* recursively check - * branch */ + /* For each succedent. */ + for (i = 0; i < node->numSuccedents; i++) + /* Recursively check branch. */ + if (!rf_BranchDone(node->succedents[i])) return RF_FALSE; - return RF_TRUE; /* node and all succedent branches aren't in - * fired state */ + + return RF_TRUE; /* + * Node and all succedent branches aren't in + * fired state. + */ break; case rf_bad: - /* succedents can't fire */ + /* Succedents can't fire. */ return (RF_TRUE); case rf_recover: - /* should never be called in this state */ + /* Should never be called in this state. */ RF_PANIC(); break; case rf_undone: case rf_panic: - /* XXX need to fix this case */ - /* for now, assume that we're done */ + /* XXX Need to fix this case. */ + /* For now, assume that we're done. */ return (RF_TRUE); break; default: - /* illegal node status */ + /* Illegal node status. */ RF_PANIC(); break; } } -static int -NodeReady(RF_DagNode_t * node) +int +rf_NodeReady(RF_DagNode_t *node) { - int ready; + int ready; switch (node->dagHdr->status) { case rf_enable: case rf_rollForward: - if ((node->status == rf_wait) && (node->numAntecedents == node->numAntDone)) + if ((node->status == rf_wait) && + (node->numAntecedents == node->numAntDone)) ready = RF_TRUE; else ready = RF_FALSE; @@ -253,13 +276,15 @@ NodeReady(RF_DagNode_t * node) RF_ASSERT(node->numSuccDone <= node->numSuccedents); RF_ASSERT(node->numSuccFired <= node->numSuccedents); RF_ASSERT(node->numSuccFired <= node->numSuccDone); - if ((node->status == rf_good) && (node->numSuccDone == node->numSuccedents)) + if ((node->status == rf_good) && + (node->numSuccDone == node->numSuccedents)) ready = RF_TRUE; else ready = RF_FALSE; break; default: - printf("Execution engine found illegal DAG status in NodeReady\n"); + printf("Execution engine found illegal DAG status" + " in rf_NodeReady\n"); RF_PANIC(); break; } @@ -268,30 +293,32 @@ NodeReady(RF_DagNode_t * node) } - -/* user context and dag-exec-thread context: - * Fire a node. The node's status field determines which function, do or undo, +/* + * User context and dag-exec-thread context: + * Fire a node. The node's status field determines which function, do or undo, * to be fired. * This routine assumes that the node's status field has alread been set to * "fired" or "recover" to indicate the direction of execution. */ -static void -FireNode(RF_DagNode_t * node) +void +rf_FireNode(RF_DagNode_t *node) { switch (node->status) { case rf_fired: - /* fire the do function of a node */ + /* Fire the do function of a node. */ if (rf_engineDebug) { - printf("raid%d: Firing node 0x%lx (%s)\n", - node->dagHdr->raidPtr->raidid, - (unsigned long) node, node->name); + printf("raid%d: Firing node 0x%lx (%s)\n", + node->dagHdr->raidPtr->raidid, + (unsigned long) node, node->name); } if (node->flags & RF_DAGNODE_FLAG_YIELD) { #if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) /* thread_block(); */ /* printf("Need to block the thread here...\n"); */ - /* XXX thread_block is actually mentioned in - * /usr/include/vm/vm_extern.h */ + /* + * XXX thread_block is actually mentioned in + * /usr/include/vm/vm_extern.h + */ #else thread_block(); #endif @@ -299,22 +326,24 @@ FireNode(RF_DagNode_t * node) (*(node->doFunc)) (node); break; case rf_recover: - /* fire the undo function of a node */ + /* Fire the undo function of a node. */ if (rf_engineDebug) { - printf("raid%d: Firing (undo) node 0x%lx (%s)\n", - node->dagHdr->raidPtr->raidid, - (unsigned long) node, node->name); + printf("raid%d: Firing (undo) node 0x%lx (%s)\n", + node->dagHdr->raidPtr->raidid, + (unsigned long) node, node->name); } if (node->flags & RF_DAGNODE_FLAG_YIELD) { #if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) /* thread_block(); */ /* printf("Need to block the thread here...\n"); */ - /* XXX thread_block is actually mentioned in - * /usr/include/vm/vm_extern.h */ + /* + * XXX thread_block is actually mentioned in + * /usr/include/vm/vm_extern.h + */ #else thread_block(); #endif - } + } (*(node->undoFunc)) (node); break; default: @@ -324,26 +353,25 @@ FireNode(RF_DagNode_t * node) } - -/* user context: +/* + * User context: * Attempt to fire each node in a linear array. * The entire list is fired atomically. */ -static void -FireNodeArray( - int numNodes, - RF_DagNode_t ** nodeList) +void +rf_FireNodeArray(int numNodes, RF_DagNode_t **nodeList) { RF_DagStatus_t dstat; RF_DagNode_t *node; - int i, j; + int i, j; - /* first, mark all nodes which are ready to be fired */ + /* First, mark all nodes which are ready to be fired. */ for (i = 0; i < numNodes; i++) { node = nodeList[i]; dstat = node->dagHdr->status; - RF_ASSERT((node->status == rf_wait) || (node->status == rf_good)); - if (NodeReady(node)) { + RF_ASSERT((node->status == rf_wait) || + (node->status == rf_good)); + if (rf_NodeReady(node)) { if ((dstat == rf_enable) || (dstat == rf_rollForward)) { RF_ASSERT(node->status == rf_wait); if (node->commitNode) @@ -354,99 +382,107 @@ FireNodeArray( } else { RF_ASSERT(dstat == rf_rollBackward); RF_ASSERT(node->status == rf_good); - RF_ASSERT(node->commitNode == RF_FALSE); /* only one commit node - * per graph */ + /* Only one commit node per graph. */ + RF_ASSERT(node->commitNode == RF_FALSE); node->status = rf_recover; } } } - /* now, fire the nodes */ + /* Now, fire the nodes. */ for (i = 0; i < numNodes; i++) { - if ((nodeList[i]->status == rf_fired) || (nodeList[i]->status == rf_recover)) - FireNode(nodeList[i]); + if ((nodeList[i]->status == rf_fired) || + (nodeList[i]->status == rf_recover)) + rf_FireNode(nodeList[i]); } } -/* user context: +/* + * User context: * Attempt to fire each node in a linked list. * The entire list is fired atomically. */ -static void -FireNodeList(RF_DagNode_t * nodeList) +void +rf_FireNodeList(RF_DagNode_t *nodeList) { RF_DagNode_t *node, *next; RF_DagStatus_t dstat; - int j; + int j; if (nodeList) { - /* first, mark all nodes which are ready to be fired */ + /* First, mark all nodes which are ready to be fired. */ for (node = nodeList; node; node = next) { next = node->next; dstat = node->dagHdr->status; - RF_ASSERT((node->status == rf_wait) || (node->status == rf_good)); - if (NodeReady(node)) { - if ((dstat == rf_enable) || (dstat == rf_rollForward)) { + RF_ASSERT((node->status == rf_wait) || + (node->status == rf_good)); + if (rf_NodeReady(node)) { + if ((dstat == rf_enable) || + (dstat == rf_rollForward)) { RF_ASSERT(node->status == rf_wait); if (node->commitNode) node->dagHdr->numCommits++; node->status = rf_fired; - for (j = 0; j < node->numAntecedents; j++) - node->antecedents[j]->numSuccFired++; + for (j = 0; j < node->numAntecedents; + j++) + node->antecedents[j] + ->numSuccFired++; } else { RF_ASSERT(dstat == rf_rollBackward); RF_ASSERT(node->status == rf_good); - RF_ASSERT(node->commitNode == RF_FALSE); /* only one commit node - * per graph */ + /* Only one commit node per graph. */ + RF_ASSERT(node->commitNode == RF_FALSE); node->status = rf_recover; } } } - /* now, fire the nodes */ + /* Now, fire the nodes. */ for (node = nodeList; node; node = next) { next = node->next; - if ((node->status == rf_fired) || (node->status == rf_recover)) - FireNode(node); + if ((node->status == rf_fired) || + (node->status == rf_recover)) + rf_FireNode(node); } } } -/* interrupt context: - * for each succedent - * propagate required results from node to succedent - * increment succedent's numAntDone - * place newly-enable nodes on node queue for firing + + +/* + * Interrupt context: + * For each succedent, + * propagate required results from node to succedent. + * increment succedent's numAntDone. + * place newly-enable nodes on node queue for firing. * * To save context switches, we don't place NIL nodes on the node queue, - * but rather just process them as if they had fired. Note that NIL nodes + * but rather just process them as if they had fired. Note that NIL nodes * that are the direct successors of the header will actually get fired by * DispatchDAG, which is fine because no context switches are involved. * * Important: when running at user level, this can be called by any * disk thread, and so the increment and check of the antecedent count - * must be locked. I used the node queue mutex and locked down the + * must be locked. I used the node queue mutex and locked down the * entire function, but this is certainly overkill. */ -static void -PropagateResults( - RF_DagNode_t * node, - int context) +void +rf_PropagateResults(RF_DagNode_t *node, int context) { RF_DagNode_t *s, *a; RF_Raid_t *raidPtr; - int i, ks; - RF_DagNode_t *finishlist = NULL; /* a list of NIL nodes to be - * finished */ - RF_DagNode_t *skiplist = NULL; /* list of nodes with failed truedata - * antecedents */ - RF_DagNode_t *firelist = NULL; /* a list of nodes to be fired */ + int i, ks; + /* A list of NIL nodes to be finished. */ + RF_DagNode_t *finishlist = NULL; + /* List of nodes with failed truedata antecedents. */ + RF_DagNode_t *skiplist = NULL; + RF_DagNode_t *firelist = NULL; /* A list of nodes to be fired. */ RF_DagNode_t *q = NULL, *qh = NULL, *next; - int j, skipNode; + int j, skipNode; raidPtr = node->dagHdr->raidPtr; DO_LOCK(raidPtr); - /* debug - validate fire counts */ + /* Debug - validate fire counts. */ for (i = 0; i < node->numAntecedents; i++) { a = *(node->antecedents + i); RF_ASSERT(a->numSuccFired >= a->numSuccDone); @@ -462,40 +498,60 @@ PropagateResults( RF_ASSERT(s->status == rf_wait); (s->numAntDone)++; if (s->numAntDone == s->numAntecedents) { - /* look for NIL nodes */ + /* Look for NIL nodes. */ if (s->doFunc == rf_NullNodeFunc) { - /* don't fire NIL nodes, just process - * them */ + /* + * Don't fire NIL nodes, just process + * them. + */ s->next = finishlist; finishlist = s; } else { - /* look to see if the node is to be - * skipped */ + /* + * Look to see if the node is to be + * skipped. + */ skipNode = RF_FALSE; for (j = 0; j < s->numAntecedents; j++) - if ((s->antType[j] == rf_trueData) && (s->antecedents[j]->status == rf_bad)) + if ((s->antType[j] == + rf_trueData) && + (s->antecedents[j]->status + == rf_bad)) skipNode = RF_TRUE; if (skipNode) { - /* this node has one or more + /* + * This node has one or more * failed true data - * dependencies, so skip it */ + * dependencies, so skip it. + */ s->next = skiplist; skiplist = s; - } else - /* add s to list of nodes (q) - * to execute */ - if (context != RF_INTR_CONTEXT) { - /* we only have to + } else { + /* + * Add s to list of nodes (q) + * to execute. + */ + if (context != RF_INTR_CONTEXT) + { + /* + * We only have to * enqueue if we're at - * intr context */ - s->next = firelist; /* put node on a list to - * be fired after we - * unlock */ + * intr context. + */ + /* + * Put node on a list to + * be fired after we + * unlock. + */ + s->next = firelist; firelist = s; - } else { /* enqueue the node for - * the dag exec thread - * to fire */ - RF_ASSERT(NodeReady(s)); + } else { + /* + * Enqueue the node for + * the dag exec thread + * to fire. + */ + RF_ASSERT(rf_NodeReady(s)); if (q) { q->next = s; q = s; @@ -504,12 +560,16 @@ PropagateResults( qh->next = NULL; } } + } } } } if (q) { - /* xfer our local list of nodes to the node queue */ + /* + * Transfer our local list of nodes to the node + * queue. + */ q->next = raidPtr->node_queue; raidPtr->node_queue = qh; DO_SIGNAL(raidPtr); @@ -528,7 +588,7 @@ PropagateResults( rf_FinishNode(skiplist, context); } for (; finishlist; finishlist = next) { - /* NIL nodes: no need to fire them */ + /* NIL nodes: no need to fire them. */ next = finishlist->next; finishlist->status = rf_good; for (i = 0; i < finishlist->numAntecedents; i++) { @@ -537,18 +597,20 @@ PropagateResults( if (finishlist->commitNode) finishlist->dagHdr->numCommits++; /* - * Okay, here we're calling rf_FinishNode() on nodes that - * have the null function as their work proc. Such a node - * could be the terminal node in a DAG. If so, it will - * cause the DAG to complete, which will in turn free - * memory used by the DAG, which includes the node in - * question. Thus, we must avoid referencing the node - * at all after calling rf_FinishNode() on it. - */ - rf_FinishNode(finishlist, context); /* recursive call */ + * Okay, here we're calling rf_FinishNode() on nodes + * that have the null function as their work proc. + * Such a node could be the terminal node in a DAG. + * If so, it will cause the DAG to complete, which will + * in turn free memory used by the DAG, which includes + * the node in question. + * Thus, we must avoid referencing the node at all + * after calling rf_FinishNode() on it. + */ + /* Recursive call. */ + rf_FinishNode(finishlist, context); } - /* fire all nodes in firelist */ - FireNodeList(firelist); + /* Fire all nodes in firelist. */ + rf_FireNodeList(firelist); break; case rf_rollBackward: @@ -560,22 +622,32 @@ PropagateResults( if (a->numSuccDone == a->numSuccFired) { if (a->undoFunc == rf_NullNodeFunc) { - /* don't fire NIL nodes, just process - * them */ + /* + * Don't fire NIL nodes, just process + * them. + */ a->next = finishlist; finishlist = a; } else { if (context != RF_INTR_CONTEXT) { - /* we only have to enqueue if - * we're at intr context */ - a->next = firelist; /* put node on a list to - * be fired after we - * unlock */ + /* + * We only have to enqueue if + * we're at intr context. + */ + /* + * Put node on a list to + * be fired after we + * unlock. + */ + a->next = firelist; firelist = a; - } else { /* enqueue the node for - * the dag exec thread - * to fire */ - RF_ASSERT(NodeReady(a)); + } else { + /* + * Enqueue the node for + * the dag exec thread + * to fire. + */ + RF_ASSERT(rf_NodeReady(a)); if (q) { q->next = a; q = a; @@ -588,47 +660,50 @@ PropagateResults( } } if (q) { - /* xfer our local list of nodes to the node queue */ + /* + * Transfer our local list of nodes to the node + * queue. + */ q->next = raidPtr->node_queue; raidPtr->node_queue = qh; DO_SIGNAL(raidPtr); } DO_UNLOCK(raidPtr); - for (; finishlist; finishlist = next) { /* NIL nodes: no need to - * fire them */ + for (; finishlist; finishlist = next) { + /* NIL nodes: no need to fire them. */ next = finishlist->next; finishlist->status = rf_good; /* - * Okay, here we're calling rf_FinishNode() on nodes that - * have the null function as their work proc. Such a node - * could be the first node in a DAG. If so, it will - * cause the DAG to complete, which will in turn free - * memory used by the DAG, which includes the node in - * question. Thus, we must avoid referencing the node - * at all after calling rf_FinishNode() on it. - */ - rf_FinishNode(finishlist, context); /* recursive call */ + * Okay, here we're calling rf_FinishNode() on nodes + * that have the null function as their work proc. + * Such a node could be the first node in a DAG. + * If so, it will cause the DAG to complete, which will + * in turn free memory used by the DAG, which includes + * the node in question. + * Thus, we must avoid referencing the node at all + * after calling rf_FinishNode() on it. + */ + rf_FinishNode(finishlist, context); + /* Recursive call. */ } - /* fire all nodes in firelist */ - FireNodeList(firelist); + /* Fire all nodes in firelist. */ + rf_FireNodeList(firelist); break; default: - printf("Engine found illegal DAG status in PropagateResults()\n"); + printf("Engine found illegal DAG status in" + " rf_PropagateResults()\n"); RF_PANIC(); break; } } - /* - * Process a fired node which has completed + * Process a fired node which has completed. */ -static void -ProcessNode( - RF_DagNode_t * node, - int context) +void +rf_ProcessNode(RF_DagNode_t *node, int context) { RF_Raid_t *raidPtr; @@ -636,79 +711,82 @@ ProcessNode( switch (node->status) { case rf_good: - /* normal case, don't need to do anything */ + /* Normal case, don't need to do anything. */ break; case rf_bad: - if ((node->dagHdr->numCommits > 0) || (node->dagHdr->numCommitNodes == 0)) { - node->dagHdr->status = rf_rollForward; /* crossed commit - * barrier */ + if ((node->dagHdr->numCommits > 0) || + (node->dagHdr->numCommitNodes == 0)) { + /* Crossed commit barrier. */ + node->dagHdr->status = rf_rollForward; if (rf_engineDebug || 1) { - printf("raid%d: node (%s) returned fail, rolling forward\n", raidPtr->raidid, node->name); + printf("raid%d: node (%s) returned fail," + " rolling forward\n", raidPtr->raidid, + node->name); } } else { - node->dagHdr->status = rf_rollBackward; /* never reached commit - * barrier */ + /* Never reached commit barrier. */ + node->dagHdr->status = rf_rollBackward; if (rf_engineDebug || 1) { - printf("raid%d: node (%s) returned fail, rolling backward\n", raidPtr->raidid, node->name); + printf("raid%d: node (%s) returned fail," + " rolling backward\n", raidPtr->raidid, + node->name); } } break; case rf_undone: - /* normal rollBackward case, don't need to do anything */ + /* Normal rollBackward case, don't need to do anything. */ break; case rf_panic: - /* an undo node failed!!! */ - printf("UNDO of a node failed!!!/n"); + /* An undo node failed !!! */ + printf("UNDO of a node failed !!!/n"); break; default: - printf("node finished execution with an illegal status!!!\n"); + printf("node finished execution with an illegal status !!!\n"); RF_PANIC(); break; } - /* enqueue node's succedents (antecedents if rollBackward) for - * execution */ - PropagateResults(node, context); + /* + * Enqueue node's succedents (antecedents if rollBackward) for + * execution. + */ + rf_PropagateResults(node, context); } - -/* user context or dag-exec-thread context: +/* + * User context or dag-exec-thread context: * This is the first step in post-processing a newly-completed node. * This routine is called by each node execution function to mark the node * as complete and fire off any successors that have been enabled. */ -int -rf_FinishNode( - RF_DagNode_t * node, - int context) +int +rf_FinishNode(RF_DagNode_t *node, int context) { - /* as far as I can tell, retcode is not used -wvcii */ - int retcode = RF_FALSE; + /* As far as I can tell, retcode is not used -wvcii. */ + int retcode = RF_FALSE; node->dagHdr->numNodesCompleted++; - ProcessNode(node, context); + rf_ProcessNode(node, context); return (retcode); } -/* user context: - * submit dag for execution, return non-zero if we have to wait for completion. - * if and only if we return non-zero, we'll cause cbFunc to get invoked with +/* + * User context: + * Submit dag for execution, return non-zero if we have to wait for completion. + * If and only if we return non-zero, we'll cause cbFunc to get invoked with * cbArg when the DAG has completed. * - * for now we always return 1. If the DAG does not cause any I/O, then the callback - * may get invoked before DispatchDAG returns. There's code in state 5 of ContinueRaidAccess - * to handle this. + * For now we always return 1. If the DAG does not cause any I/O, then the + * callback may get invoked before DispatchDAG returns. There's code in state + * 5 of ContinueRaidAccess to handle this. * - * All we do here is fire the direct successors of the header node. The - * DAG execution thread does the rest of the dag processing. + * All we do here is fire the direct successors of the header node. The DAG + * execution thread does the rest of the dag processing. */ -int -rf_DispatchDAG( - RF_DagHeader_t * dag, - void (*cbFunc) (void *), - void *cbArg) +int +rf_DispatchDAG(RF_DagHeader_t *dag, void (*cbFunc) (void *), void *cbArg) { RF_Raid_t *raidPtr; @@ -723,29 +801,35 @@ rf_DispatchDAG( if (rf_engineDebug) { printf("raid%d: Entering DispatchDAG\n", raidPtr->raidid); } - raidPtr->dags_in_flight++; /* debug only: blow off proper - * locking */ + raidPtr->dags_in_flight++; /* + * Debug only: blow off proper + * locking. + */ dag->cbFunc = cbFunc; dag->cbArg = cbArg; dag->numNodesCompleted = 0; dag->status = rf_enable; - FireNodeArray(dag->numSuccedents, dag->succedents); + rf_FireNodeArray(dag->numSuccedents, dag->succedents); return (1); } -/* dedicated kernel thread: - * the thread that handles all DAG node firing. - * To minimize locking and unlocking, we grab a copy of the entire node queue and then set the - * node queue to NULL before doing any firing of nodes. This way we only have to release the - * lock once. Of course, it's probably rare that there's more than one node in the queue at - * any one time, but it sometimes happens. + + +/* + * Dedicated kernel thread: + * The thread that handles all DAG node firing. + * To minimize locking and unlocking, we grab a copy of the entire node queue + * and then set the node queue to NULL before doing any firing of nodes. + * This way we only have to release the lock once. Of course, it's probably + * rare that there's more than one node in the queue at any one time, but it + * sometimes happens. * - * In the kernel, this thread runs at spl0 and is not swappable. I copied these + * In the kernel, this thread runs at spl0 and is not swappable. I copied these * characteristics from the aio_completion_thread. */ -#ifdef RAID_AUTOCONFIG -void -DAGExecutionThread_pre(RF_ThreadArg_t arg) +#ifdef RAID_AUTOCONFIG +void +rf_DAGExecutionThread_pre(RF_ThreadArg_t arg) { RF_Raid_t *raidPtr; char raidname[16]; @@ -760,12 +844,12 @@ DAGExecutionThread_pre(RF_ThreadArg_t arg) lastpid = RF_ENGINE_PID + raidPtr->raidid - 1; len = sprintf(&raidname[0], "raid%d", raidPtr->raidid); -#ifdef DIAGNOSTIC +#ifdef DIAGNOSTIC if (len >= sizeof(raidname)) panic("raidname expansion too long."); -#endif /* DIAGNOSTIC */ +#endif /* DIAGNOSTIC */ - if (RF_CREATE_THREAD(raidPtr->engine_thread, DAGExecutionThread, + if (RF_CREATE_THREAD(raidPtr->engine_thread, rf_DAGExecutionThread, raidPtr, &raidname[0])) { RF_ERRORMSG("RAIDFRAME: Unable to create engine thread\n"); return; @@ -779,13 +863,13 @@ DAGExecutionThread_pre(RF_ThreadArg_t arg) } #endif /* RAID_AUTOCONFIG */ -void -DAGExecutionThread(RF_ThreadArg_t arg) +void +rf_DAGExecutionThread(RF_ThreadArg_t arg) { RF_DagNode_t *nd, *local_nq, *term_nq, *fire_nq; RF_Raid_t *raidPtr; - int ks; - int s; + int ks; + int s; raidPtr = (RF_Raid_t *) arg; @@ -796,7 +880,7 @@ DAGExecutionThread(RF_ThreadArg_t arg) if (rf_engineDebug) { printf("raid%d: Engine thread is running\n", raidPtr->raidid); } - /* XXX what to put here XXX */ + /* XXX What to put here ? XXX */ s = splbio(); @@ -815,7 +899,7 @@ DAGExecutionThread(RF_ThreadArg_t arg) raidPtr->node_queue = NULL; DO_UNLOCK(raidPtr); - /* first, strip out the terminal nodes */ + /* First, strip out the terminal nodes. */ while (local_nq) { nd = local_nq; local_nq = local_nq->next; @@ -823,26 +907,34 @@ DAGExecutionThread(RF_ThreadArg_t arg) case rf_enable: case rf_rollForward: if (nd->numSuccedents == 0) { - /* end of the dag, add to - * callback list */ + /* + * End of the dag, add to + * callback list. + */ nd->next = term_nq; term_nq = nd; } else { - /* not the end, add to the - * fire queue */ + /* + * Not the end, add to the + * fire queue. + */ nd->next = fire_nq; fire_nq = nd; } break; case rf_rollBackward: if (nd->numAntecedents == 0) { - /* end of the dag, add to the - * callback list */ + /* + * End of the dag, add to the + * callback list. + */ nd->next = term_nq; term_nq = nd; } else { - /* not the end, add to the - * fire queue */ + /* + * Not the end, add to the + * fire queue. + */ nd->next = fire_nq; fire_nq = nd; } @@ -853,18 +945,20 @@ DAGExecutionThread(RF_ThreadArg_t arg) } } - /* execute callback of dags which have reached the - * terminal node */ + /* + * Execute callback of dags which have reached the + * terminal node. + */ while (term_nq) { nd = term_nq; term_nq = term_nq->next; nd->next = NULL; (nd->dagHdr->cbFunc) (nd->dagHdr->cbArg); - raidPtr->dags_in_flight--; /* debug only */ + raidPtr->dags_in_flight--; /* Debug only. */ } - /* fire remaining nodes */ - FireNodeList(fire_nq); + /* Fire remaining nodes. */ + rf_FireNodeList(fire_nq); DO_LOCK(raidPtr); } diff --git a/sys/dev/raidframe/rf_engine.h b/sys/dev/raidframe/rf_engine.h index dd21e705369..ae3ae2dd293 100644 --- a/sys/dev/raidframe/rf_engine.h +++ b/sys/dev/raidframe/rf_engine.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_engine.h,v 1.3 2002/08/05 22:11:27 tdeval Exp $ */ +/* $OpenBSD: rf_engine.h,v 1.4 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_engine.h,v 1.3 1999/02/05 00:06:11 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,24 +28,23 @@ * rights to redistribute these changes. */ -/********************************************************** - * * - * engine.h -- header file for execution engine functions * - * * - **********************************************************/ +/*********************************************************** + * * + * engine.h -- Header file for execution engine functions. * + * * + ***********************************************************/ -#ifndef _RF__RF_ENGINE_H_ -#define _RF__RF_ENGINE_H_ +#ifndef _RF__RF_ENGINE_H_ +#define _RF__RF_ENGINE_H_ extern void **rf_hook_cookies; -int -rf_ConfigureEngine(RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, RF_Config_t * cfgPtr); +int rf_ConfigureEngine(RF_ShutdownList_t **, RF_Raid_t *, RF_Config_t *); -int rf_FinishNode(RF_DagNode_t * node, int context); /* return finished node - * to engine */ + /* Return finished node to engine. */ +int rf_FinishNode(RF_DagNode_t *, int); -int rf_DispatchDAG(RF_DagHeader_t * dag, void (*cbFunc) (void *), void *cbArg); /* execute dag */ + /* Execute dag. */ +int rf_DispatchDAG(RF_DagHeader_t *, void (*) (void *), void *); -#endif /* !_RF__RF_ENGINE_H_ */ +#endif /* !_RF__RF_ENGINE_H_ */ diff --git a/sys/dev/raidframe/rf_etimer.h b/sys/dev/raidframe/rf_etimer.h index 7531ade1471..197433740a1 100644 --- a/sys/dev/raidframe/rf_etimer.h +++ b/sys/dev/raidframe/rf_etimer.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_etimer.h,v 1.5 2000/01/07 14:50:21 peter Exp $ */ +/* $OpenBSD: rf_etimer.h,v 1.6 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_etimer.h,v 1.4 1999/08/13 03:26:55 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,8 +28,8 @@ * rights to redistribute these changes. */ -#ifndef _RF__RF_TIMER_H_ -#define _RF__RF_TIMER_H_ +#ifndef _RF__RF_TIMER_H_ +#define _RF__RF_TIMER_H_ #include "rf_options.h" @@ -42,34 +43,34 @@ struct RF_Etimer_s { struct timeval diff; }; -#if defined(_KERNEL) +#if defined(_KERNEL) #include <sys/kernel.h> -#define RF_ETIMER_START(_t_) \ - { \ - int s; \ - bzero(&(_t_), sizeof (_t_)); \ - s = splclock(); \ - (_t_).st = mono_time; \ - splx(s); \ - } +#define RF_ETIMER_START(_t_) \ + do { \ + int s; \ + bzero(&(_t_), sizeof (_t_)); \ + s = splclock(); \ + (_t_).st = mono_time; \ + splx(s); \ + } while (0) -#define RF_ETIMER_STOP(_t_) \ - { \ - int s; \ - s = splclock(); \ - (_t_).et = mono_time; \ - splx(s); \ - } +#define RF_ETIMER_STOP(_t_) \ + do { \ + int s; \ + s = splclock(); \ + (_t_).et = mono_time; \ + splx(s); \ + } while (0) -#define RF_ETIMER_EVAL(_t_) \ - { \ - RF_TIMEVAL_DIFF(&(_t_).st, &(_t_).et, &(_t_).diff) \ - } +#define RF_ETIMER_EVAL(_t_) \ + do { \ + RF_TIMEVAL_DIFF(&(_t_).st, &(_t_).et, &(_t_).diff); \ + } while (0) -#define RF_ETIMER_VAL_US(_t_) (RF_TIMEVAL_TO_US((_t_).diff)) -#define RF_ETIMER_VAL_MS(_t_) (RF_TIMEVAL_TO_US((_t_).diff)/1000) +#define RF_ETIMER_VAL_US(_t_) (RF_TIMEVAL_TO_US((_t_).diff)) +#define RF_ETIMER_VAL_MS(_t_) (RF_TIMEVAL_TO_US((_t_).diff)/1000) -#endif /* _KERNEL */ +#endif /* _KERNEL */ -#endif /* !_RF__RF_TIMER_H_ */ +#endif /* !_RF__RF_TIMER_H_ */ diff --git a/sys/dev/raidframe/rf_evenodd.c b/sys/dev/raidframe/rf_evenodd.c index 52118285fc8..da372aca9ed 100644 --- a/sys/dev/raidframe/rf_evenodd.c +++ b/sys/dev/raidframe/rf_evenodd.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_evenodd.c,v 1.4 2000/01/11 18:02:22 peter Exp $ */ +/* $OpenBSD: rf_evenodd.c,v 1.5 2002/12/16 07:01:03 tdeval Exp $ */ /* $NetBSD: rf_evenodd.c,v 1.4 2000/01/07 03:40:59 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,15 +28,15 @@ * rights to redistribute these changes. */ -/***************************************************************************************** +/***************************************************************************** * * rf_evenodd.c -- implements EVENODD array architecture * - ****************************************************************************************/ + *****************************************************************************/ #include "rf_archs.h" -#if RF_INCLUDE_EVENODD > 0 +#if RF_INCLUDE_EVENODD > 0 #include "rf_types.h" #include "rf_raid.h" @@ -61,148 +62,172 @@ #include "rf_engine.h" typedef struct RF_EvenOddConfigInfo_s { - RF_RowCol_t **stripeIdentifier; /* filled in at config time & used by - * IdentifyStripe */ -} RF_EvenOddConfigInfo_t; - -int -rf_ConfigureEvenOdd(listp, raidPtr, cfgPtr) - RF_ShutdownList_t **listp; - RF_Raid_t *raidPtr; - RF_Config_t *cfgPtr; + RF_RowCol_t **stripeIdentifier; /* + * Filled in at config time & used by + * IdentifyStripe. + */ +} RF_EvenOddConfigInfo_t; + +int +rf_ConfigureEvenOdd(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, + RF_Config_t *cfgPtr) { RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; RF_EvenOddConfigInfo_t *info; RF_RowCol_t i, j, startdisk; - RF_MallocAndAdd(info, sizeof(RF_EvenOddConfigInfo_t), (RF_EvenOddConfigInfo_t *), raidPtr->cleanupList); + RF_MallocAndAdd(info, sizeof(RF_EvenOddConfigInfo_t), + (RF_EvenOddConfigInfo_t *), raidPtr->cleanupList); layoutPtr->layoutSpecificInfo = (void *) info; RF_ASSERT(raidPtr->numRow == 1); - info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, raidPtr->numCol, raidPtr->cleanupList); + info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, + raidPtr->numCol, raidPtr->cleanupList); startdisk = 0; for (i = 0; i < raidPtr->numCol; i++) { for (j = 0; j < raidPtr->numCol; j++) { - info->stripeIdentifier[i][j] = (startdisk + j) % raidPtr->numCol; + info->stripeIdentifier[i][j] = (startdisk + j) % + raidPtr->numCol; } if ((startdisk -= 2) < 0) startdisk += raidPtr->numCol; } - /* fill in the remaining layout parameters */ + /* Fill in the remaining layout parameters. */ layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; - layoutPtr->numDataCol = raidPtr->numCol - 2; /* ORIG: + layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << + raidPtr->logBytesPerSector; + layoutPtr->numDataCol = raidPtr->numCol - 2; /* + * ORIG: * layoutPtr->numDataCol - * = raidPtr->numCol-1; */ -#if RF_EO_MATRIX_DIM > 17 + * = raidPtr->numCol-1; + */ +#if RF_EO_MATRIX_DIM > 17 if (raidPtr->numCol <= 17) { - printf("Number of stripe units in a parity stripe is smaller than 17. Please\n"); - printf("define the macro RF_EO_MATRIX_DIM in file rf_evenodd_dagfuncs.h to \n"); - printf("be 17 to increase performance. \n"); + printf("Number of stripe units in a parity stripe is smaller" + " than 17. Please\ndefine the macro RF_EO_MATRIX_DIM in" + " file rf_evenodd_dagfuncs.h to\nbe 17 to increase" + " performance.\n"); return (EINVAL); } -#elif RF_EO_MATRIX_DIM == 17 +#elif RF_EO_MATRIX_DIM == 17 if (raidPtr->numCol > 17) { - printf("Number of stripe units in a parity stripe is bigger than 17. Please\n"); - printf("define the macro RF_EO_MATRIX_DIM in file rf_evenodd_dagfuncs.h to \n"); - printf("be 257 for encoding and decoding functions to work. \n"); + printf("Number of stripe units in a parity stripe is bigger" + " than 17. Please\ndefine the macro RF_EO_MATRIX_DIM in" + " file rf_evenodd_dagfuncs.h to\nbe 257 for encoding and" + " decoding functions to work.\n"); return (EINVAL); } #endif - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; + layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * + layoutPtr->sectorsPerStripeUnit; layoutPtr->numParityCol = 2; layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk; - raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; + raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * + layoutPtr->sectorsPerStripeUnit; - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; + raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * + layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; return (0); } -int -rf_GetDefaultNumFloatingReconBuffersEvenOdd(RF_Raid_t * raidPtr) +int +rf_GetDefaultNumFloatingReconBuffersEvenOdd(RF_Raid_t *raidPtr) { return (20); } -RF_HeadSepLimit_t -rf_GetDefaultHeadSepLimitEvenOdd(RF_Raid_t * raidPtr) +RF_HeadSepLimit_t +rf_GetDefaultHeadSepLimitEvenOdd(RF_Raid_t *raidPtr) { return (10); } -void -rf_IdentifyStripeEvenOdd( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) +void +rf_IdentifyStripeEvenOdd(RF_Raid_t *raidPtr, RF_RaidAddr_t addr, + RF_RowCol_t **diskids, RF_RowCol_t *outRow) { - RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr); - RF_EvenOddConfigInfo_t *info = (RF_EvenOddConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, + addr); + RF_EvenOddConfigInfo_t *info = + (RF_EvenOddConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; *outRow = 0; *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol]; } -/* The layout of stripe unit on the disks are: c0 c1 c2 c3 c4 - 0 1 2 E P - 5 E P 3 4 - P 6 7 8 E - 10 11 E P 9 - E P 12 13 14 - .... - We use the MapSectorRAID5 to map data information because the routine can be shown to map exactly - the layout of data stripe unit as shown above although we have 2 redundant information now. - But for E and P, we use rf_MapEEvenOdd and rf_MapParityEvenOdd which are different method from raid-5. -*/ +/* + * The layout of stripe unit on the disks are: c0 c1 c2 c3 c4 + * + * 0 1 2 E P + * 5 E P 3 4 + * P 6 7 8 E + * 10 11 E P 9 + * E P 12 13 14 + * .... + * + * We use the MapSectorRAID5 to map data information because the routine can + * be shown to map exactly the layout of data stripe unit as shown above, + * although we have 2 redundant information now. + * But for E and P, we use rf_MapEEvenOdd and rf_MapParityEvenOdd which are + * different method from raid-5. + */ -void +void rf_MapParityEvenOdd( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) + RF_Raid_t *raidPtr, + RF_RaidAddr_t raidSector, + RF_RowCol_t *row, + RF_RowCol_t *col, + RF_SectorNum_t *diskSector, + int remap +) { RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - RF_StripeNum_t endSUIDofthisStrip = (SUID / raidPtr->Layout.numDataCol + 1) * raidPtr->Layout.numDataCol - 1; + RF_StripeNum_t endSUIDofthisStrip = + (SUID / raidPtr->Layout.numDataCol + 1) * + raidPtr->Layout.numDataCol - 1; *row = 0; *col = (endSUIDofthisStrip + 2) % raidPtr->numCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + + *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * + raidPtr->Layout.sectorsPerStripeUnit + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); } -void +void rf_MapEEvenOdd( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) + RF_Raid_t *raidPtr, + RF_RaidAddr_t raidSector, + RF_RowCol_t *row, + RF_RowCol_t *col, + RF_SectorNum_t *diskSector, + int remap +) { RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - RF_StripeNum_t endSUIDofthisStrip = (SUID / raidPtr->Layout.numDataCol + 1) * raidPtr->Layout.numDataCol - 1; + RF_StripeNum_t endSUIDofthisStrip = + (SUID / raidPtr->Layout.numDataCol + 1) * + raidPtr->Layout.numDataCol - 1; *row = 0; *col = (endSUIDofthisStrip + 1) % raidPtr->numCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + + *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * + raidPtr->Layout.sectorsPerStripeUnit + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); } -void +void rf_EODagSelect( - RF_Raid_t * raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t * asmap, - RF_VoidFuncPtr * createFunc) + RF_Raid_t *raidPtr, + RF_IoType_t type, + RF_AccessStripeMap_t *asmap, + RF_VoidFuncPtr *createFunc +) { RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); unsigned ndfail = asmap->numDataFailed; @@ -211,34 +236,47 @@ rf_EODagSelect( RF_ASSERT(RF_IO_IS_R_OR_W(type)); if (ntfail > 2) { - RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n"); + RF_ERRORMSG("more than two disks failed in a single group !" + " Aborting I/O operation.\n"); /* *infoFunc = */ *createFunc = NULL; return; } - /* ok, we can do this I/O */ + /* Ok, we can do this I/O. */ if (type == RF_IO_TYPE_READ) { switch (ndfail) { case 0: - /* fault free read */ - *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; /* same as raid 5 */ + /* Fault free read. */ + *createFunc = (RF_VoidFuncPtr) + rf_CreateFaultFreeReadDAG; /* Same as raid 5. */ break; case 1: - /* lost a single data unit */ - /* two cases: (1) parity is not lost. do a normal raid - * 5 reconstruct read. (2) parity is lost. do a - * reconstruct read using "e". */ - if (ntfail == 2) { /* also lost redundancy */ - if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) - *createFunc = (RF_VoidFuncPtr) rf_EO_110_CreateReadDAG; + /* Lost a single data unit. */ + /* + * Two cases: + * (1) Parity is not lost. Do a normal raid 5 + * reconstruct read. + * (2) Parity is lost. Do a reconstruct read using "e". + */ + if (ntfail == 2) { /* Also lost redundancy. */ + if (asmap->failedPDAs[1]->type == + RF_PDA_TYPE_PARITY) + *createFunc = (RF_VoidFuncPtr) + rf_EO_110_CreateReadDAG; else - *createFunc = (RF_VoidFuncPtr) rf_EO_101_CreateReadDAG; + *createFunc = (RF_VoidFuncPtr) + rf_EO_101_CreateReadDAG; } else { - /* P and E are ok. But is there a failure in - * some unaccessed data unit? */ - if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2) - *createFunc = (RF_VoidFuncPtr) rf_EO_200_CreateReadDAG; + /* + * P and E are ok. But is there a failure in + * some unaccessed data unit ? + */ + if (rf_NumFailedDataUnitsInStripe(raidPtr, + asmap) == 2) + *createFunc = (RF_VoidFuncPtr) + rf_EO_200_CreateReadDAG; else - *createFunc = (RF_VoidFuncPtr) rf_EO_100_CreateReadDAG; + *createFunc = (RF_VoidFuncPtr) + rf_EO_100_CreateReadDAG; } break; case 2: @@ -248,12 +286,16 @@ rf_EODagSelect( } return; } - /* a write */ + /* A write. */ switch (ntfail) { - case 0: /* fault free */ + case 0: /* Fault free. */ if (rf_suppressLocksAndLargeWrites || - (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) || - (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) { + (((asmap->numStripeUnitsAccessed <= + (layoutPtr->numDataCol / 2)) && + (layoutPtr->numDataCol != 1)) || + (asmap->parityInfo->next != NULL) || + (asmap->qInfo->next != NULL) || + rf_CheckStripeForFailures(raidPtr, asmap))) { *createFunc = (RF_VoidFuncPtr) rf_EOCreateSmallWriteDAG; } else { @@ -261,85 +303,129 @@ rf_EODagSelect( } break; - case 1: /* single disk fault */ + case 1: /* Single disk fault. */ if (npfail == 1) { - RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q)); - if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) { /* q died, treat like - * normal mode raid5 - * write. */ - if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) - || (asmap->parityInfo->next != NULL) || rf_NumFailedDataUnitsInStripe(raidPtr, asmap)) - *createFunc = (RF_VoidFuncPtr) rf_EO_001_CreateSmallWriteDAG; + RF_ASSERT((asmap->failedPDAs[0]->type == + RF_PDA_TYPE_PARITY) || + (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q)); + if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) { + /* + * q died, treat like normal mode raid5 + * write. + */ + if (((asmap->numStripeUnitsAccessed <= + (layoutPtr->numDataCol / 2)) || + (asmap->numStripeUnitsAccessed == 1)) || + (asmap->parityInfo->next != NULL) || + rf_NumFailedDataUnitsInStripe(raidPtr, + asmap)) + *createFunc = (RF_VoidFuncPtr) + rf_EO_001_CreateSmallWriteDAG; else - *createFunc = (RF_VoidFuncPtr) rf_EO_001_CreateLargeWriteDAG; - } else {/* parity died, small write only updating Q */ - if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) - || (asmap->qInfo->next != NULL) || rf_NumFailedDataUnitsInStripe(raidPtr, asmap)) - *createFunc = (RF_VoidFuncPtr) rf_EO_010_CreateSmallWriteDAG; + *createFunc = (RF_VoidFuncPtr) + rf_EO_001_CreateLargeWriteDAG; + } else { + /* Parity died, small write only updating Q. */ + if (((asmap->numStripeUnitsAccessed <= + (layoutPtr->numDataCol / 2)) || + (asmap->numStripeUnitsAccessed == 1)) || + (asmap->qInfo->next != NULL) || + rf_NumFailedDataUnitsInStripe(raidPtr, + asmap)) + *createFunc = (RF_VoidFuncPtr) + rf_EO_010_CreateSmallWriteDAG; else - *createFunc = (RF_VoidFuncPtr) rf_EO_010_CreateLargeWriteDAG; + *createFunc = (RF_VoidFuncPtr) + rf_EO_010_CreateLargeWriteDAG; } - } else { /* data missing. Do a P reconstruct write if + } else { /* + * Data missing. Do a P reconstruct write if * only a single data unit is lost in the * stripe, otherwise a reconstruct write which - * employnig both P and E units. */ - if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2) { + * is employing both P and E units. + */ + if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2) + { if (asmap->numStripeUnitsAccessed == 1) - *createFunc = (RF_VoidFuncPtr) rf_EO_200_CreateWriteDAG; + *createFunc = (RF_VoidFuncPtr) + rf_EO_200_CreateWriteDAG; else - *createFunc = NULL; /* No direct support for - * this case now, like - * that in Raid-5 */ + /* + * No direct support for this case now, + * like that in Raid-5. + */ + *createFunc = NULL; } else { - if (asmap->numStripeUnitsAccessed != 1 && asmap->failedPDAs[0]->numSector != layoutPtr->sectorsPerStripeUnit) - *createFunc = NULL; /* No direct support for - * this case now, like - * that in Raid-5 */ + if (asmap->numStripeUnitsAccessed != 1 && + asmap->failedPDAs[0]->numSector != + layoutPtr->sectorsPerStripeUnit) + /* + * No direct support for this case now, + * like that in Raid-5. + */ + *createFunc = NULL; else - *createFunc = (RF_VoidFuncPtr) rf_EO_100_CreateWriteDAG; + *createFunc = (RF_VoidFuncPtr) + rf_EO_100_CreateWriteDAG; } } break; - case 2: /* two disk faults */ + case 2: /* Two disk faults. */ switch (npfail) { - case 2: /* both p and q dead */ + case 2: /* Both p and q dead. */ *createFunc = (RF_VoidFuncPtr) rf_EO_011_CreateWriteDAG; break; - case 1: /* either p or q and dead data */ - RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA); - RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)); + case 1: /* Either p or q and dead data. */ + RF_ASSERT(asmap->failedPDAs[0]->type == + RF_PDA_TYPE_DATA); + RF_ASSERT((asmap->failedPDAs[1]->type == + RF_PDA_TYPE_PARITY) || + (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)); if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q) { - if (asmap->numStripeUnitsAccessed != 1 && asmap->failedPDAs[0]->numSector != layoutPtr->sectorsPerStripeUnit) - *createFunc = NULL; /* In both PQ and - * EvenOdd, no direct - * support for this case - * now, like that in - * Raid-5 */ + if (asmap->numStripeUnitsAccessed != 1 && + asmap->failedPDAs[0]->numSector != + layoutPtr->sectorsPerStripeUnit) + /* + * In both PQ and EvenOdd, no direct + * support for this case now, like that + * in Raid-5. + */ + *createFunc = NULL; else - *createFunc = (RF_VoidFuncPtr) rf_EO_101_CreateWriteDAG; + *createFunc = (RF_VoidFuncPtr) + rf_EO_101_CreateWriteDAG; } else { - if (asmap->numStripeUnitsAccessed != 1 && asmap->failedPDAs[0]->numSector != layoutPtr->sectorsPerStripeUnit) - *createFunc = NULL; /* No direct support for - * this case, like that - * in Raid-5 */ + if (asmap->numStripeUnitsAccessed != 1 && + asmap->failedPDAs[0]->numSector != + layoutPtr->sectorsPerStripeUnit) + /* + * No direct support for this case, + * like that in Raid-5. + */ + *createFunc = NULL; else - *createFunc = (RF_VoidFuncPtr) rf_EO_110_CreateWriteDAG; + *createFunc = (RF_VoidFuncPtr) + rf_EO_110_CreateWriteDAG; } break; - case 0: /* double data loss */ - /* if(asmap->failedPDAs[0]->numSector + + case 0: /* Double data loss. */ + /* + * if(asmap->failedPDAs[0]->numSector + * asmap->failedPDAs[1]->numSector == 2 * * layoutPtr->sectorsPerStripeUnit ) createFunc = - * rf_EOCreateLargeWriteDAG; else */ - *createFunc = NULL; /* currently, in Evenodd, No + * rf_EOCreateLargeWriteDAG; else + */ + *createFunc = NULL; /* + * Currently, in Evenodd, no * support for simultaneous - * access of both failed SUs */ + * access of both failed SUs. + */ break; } break; - default: /* more than 2 disk faults */ + default: /* More than 2 disk faults. */ *createFunc = NULL; RF_PANIC(); } @@ -347,34 +433,37 @@ rf_EODagSelect( } -int -rf_VerifyParityEvenOdd(raidPtr, raidAddr, parityPDA, correct_it, flags) - RF_Raid_t *raidPtr; - RF_RaidAddr_t raidAddr; - RF_PhysDiskAddr_t *parityPDA; - int correct_it; - RF_RaidAccessFlags_t flags; +int +rf_VerifyParityEvenOdd( + RF_Raid_t *raidPtr, + RF_RaidAddr_t raidAddr, + RF_PhysDiskAddr_t *parityPDA, + int correct_it, + RF_RaidAccessFlags_t flags +) { RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); - RF_RaidAddr_t startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr); + RF_RaidAddr_t startAddr = + rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr); RF_SectorCount_t numsector = parityPDA->numSector; - int numbytes = rf_RaidAddressToByte(raidPtr, numsector); - int bytesPerStripe = numbytes * layoutPtr->numDataCol; - RF_DagHeader_t *rd_dag_h, *wr_dag_h; /* read, write dag */ + int numbytes = rf_RaidAddressToByte(raidPtr, numsector); + int bytesPerStripe = numbytes * layoutPtr->numDataCol; + RF_DagHeader_t *rd_dag_h, *wr_dag_h; /* Read, write dag. */ RF_DagNode_t *blockNode, *unblockNode, *wrBlock, *wrUnblock; RF_AccessStripeMapHeader_t *asm_h; RF_AccessStripeMap_t *asmap; RF_AllocListElem_t *alloclist; RF_PhysDiskAddr_t *pda; - char *pbuf, *buf, *end_p, *p; - char *redundantbuf2; - int redundantTwoErr = 0, redundantOneErr = 0; - int parity_cant_correct = RF_FALSE, red2_cant_correct = RF_FALSE, - parity_corrected = RF_FALSE, red2_corrected = RF_FALSE; - int i, retcode; + char *pbuf, *buf, *end_p, *p; + char *redundantbuf2; + int redundantTwoErr = 0, redundantOneErr = 0; + int parity_cant_correct = RF_FALSE, red2_cant_correct = RF_FALSE, + parity_corrected = RF_FALSE, red2_corrected = RF_FALSE; + int i, retcode; RF_ReconUnitNum_t which_ru; - RF_StripeNum_t psID = rf_RaidAddressToParityStripeID(layoutPtr, raidAddr, &which_ru); - int stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; + RF_StripeNum_t psID = rf_RaidAddressToParityStripeID(layoutPtr, + raidAddr, &which_ru); + int stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; RF_AccTraceEntry_t tracerec; RF_MCPair_t *mcpair; @@ -382,32 +471,40 @@ rf_VerifyParityEvenOdd(raidPtr, raidAddr, parityPDA, correct_it, flags) mcpair = rf_AllocMCPair(); rf_MakeAllocList(alloclist); - RF_MallocAndAdd(buf, numbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol), (char *), alloclist); - RF_CallocAndAdd(pbuf, 1, numbytes, (char *), alloclist); /* use calloc to make - * sure buffer is zeroed */ + RF_MallocAndAdd(buf, numbytes * (layoutPtr->numDataCol + + layoutPtr->numParityCol), (char *), alloclist); + /* Use calloc to make sure buffer is zeroed. */ + RF_CallocAndAdd(pbuf, 1, numbytes, (char *), alloclist); end_p = buf + bytesPerStripe; - RF_CallocAndAdd(redundantbuf2, 1, numbytes, (char *), alloclist); /* use calloc to make - * sure buffer is zeroed */ + /* Use calloc to make sure buffer is zeroed. */ + RF_CallocAndAdd(redundantbuf2, 1, numbytes, (char *), alloclist); - rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, numbytes, buf, rf_DiskReadFunc, rf_DiskReadUndoFunc, - "Rod", alloclist, flags, RF_IO_NORMAL_PRIORITY); + rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, numbytes, buf, + rf_DiskReadFunc, rf_DiskReadUndoFunc, "Rod", alloclist, flags, + RF_IO_NORMAL_PRIORITY); blockNode = rd_dag_h->succedents[0]; unblockNode = blockNode->succedents[0]->succedents[0]; - /* map the stripe and fill in the PDAs in the dag */ - asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, buf, RF_DONT_REMAP); + /* Map the stripe and fill in the PDAs in the dag. */ + asm_h = rf_MapAccess(raidPtr, startAddr, + layoutPtr->dataSectorsPerStripe, buf, RF_DONT_REMAP); asmap = asm_h->stripeMap; - for (pda = asmap->physInfo, i = 0; i < layoutPtr->numDataCol; i++, pda = pda->next) { + for (pda = asmap->physInfo, i = 0; i < layoutPtr->numDataCol; + i++, pda = pda->next) { RF_ASSERT(pda); rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); RF_ASSERT(pda->numSector != 0); if (rf_TryToRedirectPDA(raidPtr, pda, 0)) - goto out; /* no way to verify parity if disk is - * dead. return w/ good status */ + /* + * No way to verify parity if disk is dead. + * Return w/ good status. + */ + goto out; blockNode->succedents[i]->params[0].p = pda; blockNode->succedents[i]->params[2].v = psID; - blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + blockNode->succedents[i]->params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); } RF_ASSERT(!asmap->parityInfo->next); @@ -415,19 +512,23 @@ rf_VerifyParityEvenOdd(raidPtr, raidAddr, parityPDA, correct_it, flags) RF_ASSERT(asmap->parityInfo->numSector != 0); if (rf_TryToRedirectPDA(raidPtr, asmap->parityInfo, 1)) goto out; - blockNode->succedents[layoutPtr->numDataCol]->params[0].p = asmap->parityInfo; + blockNode->succedents[layoutPtr->numDataCol]->params[0].p = + asmap->parityInfo; RF_ASSERT(!asmap->qInfo->next); rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->qInfo, 0, 1); RF_ASSERT(asmap->qInfo->numSector != 0); if (rf_TryToRedirectPDA(raidPtr, asmap->qInfo, 1)) goto out; - /* if disk is dead, b/c no reconstruction is implemented right now, - * the function "rf_TryToRedirectPDA" always return one, which cause - * go to out and return w/ good status */ - blockNode->succedents[layoutPtr->numDataCol + 1]->params[0].p = asmap->qInfo; - - /* fire off the DAG */ + /* + * If disk is dead, b/c no reconstruction is implemented right now, + * the function "rf_TryToRedirectPDA" always return one, which causes + * go to out and return w/ good status. + */ + blockNode->succedents[layoutPtr->numDataCol + 1]->params[0].p = + asmap->qInfo; + + /* Fire off the DAG. */ bzero((char *) &tracerec, sizeof(tracerec)); rd_dag_h->tracerec = &tracerec; @@ -443,15 +544,19 @@ rf_VerifyParityEvenOdd(raidPtr, raidAddr, parityPDA, correct_it, flags) RF_WAIT_COND(mcpair->cond, mcpair->mutex); RF_UNLOCK_MUTEX(mcpair->mutex); if (rd_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to verify parity: can't read the stripe\n"); + RF_ERRORMSG("Unable to verify parity: can't read" + " the stripe\n"); retcode = RF_PARITY_COULD_NOT_VERIFY; goto out; } for (p = buf, i = 0; p < end_p; p += numbytes, i++) { - rf_e_encToBuf(raidPtr, i, p, RF_EO_MATRIX_DIM - 2, redundantbuf2, numsector); - /* the corresponding columes in EvenOdd encoding Matrix for - * these p pointers which point to the databuffer in a full - * stripe are sequentially from 0 to layoutPtr->numDataCol-1 */ + rf_e_encToBuf(raidPtr, i, p, RF_EO_MATRIX_DIM - 2, + redundantbuf2, numsector); + /* + * The corresponding columns in EvenOdd encoding Matrix for + * these p pointers that point to the databuffer in a full + * stripe are sequential from 0 to layoutPtr->numDataCol-1. + */ rf_bxor(p, pbuf, numbytes, NULL); } RF_ASSERT(i == layoutPtr->numDataCol); @@ -459,8 +564,10 @@ rf_VerifyParityEvenOdd(raidPtr, raidAddr, parityPDA, correct_it, flags) for (i = 0; i < numbytes; i++) { if (pbuf[i] != buf[bytesPerStripe + i]) { if (!correct_it) { - RF_ERRORMSG3("Parity verify error: byte %d of parity is 0x%x should be 0x%x\n", - i, (u_char) buf[bytesPerStripe + i], (u_char) pbuf[i]); + RF_ERRORMSG3("Parity verify error: byte %d of" + " parity is 0x%x should be 0x%x\n", i, + (u_char) buf[bytesPerStripe + i], + (u_char) pbuf[i]); } } redundantOneErr = 1; @@ -470,8 +577,11 @@ rf_VerifyParityEvenOdd(raidPtr, raidAddr, parityPDA, correct_it, flags) for (i = 0; i < numbytes; i++) { if (redundantbuf2[i] != buf[bytesPerStripe + numbytes + i]) { if (!correct_it) { - RF_ERRORMSG3("Parity verify error: byte %d of second redundant information is 0x%x should be 0x%x\n", - i, (u_char) buf[bytesPerStripe + numbytes + i], (u_char) redundantbuf2[i]); + RF_ERRORMSG3("Parity verify error: byte %d of" + " second redundant information is 0x%x" + " should be 0x%x\n", i, + (u_char) buf[bytesPerStripe + numbytes + i], + (u_char) redundantbuf2[i]); } redundantTwoErr = 1; break; @@ -480,15 +590,17 @@ rf_VerifyParityEvenOdd(raidPtr, raidAddr, parityPDA, correct_it, flags) if (redundantOneErr || redundantTwoErr) retcode = RF_PARITY_BAD; - /* correct the first redundant disk, ie parity if it is error */ + /* Correct the first redundant disk, ie parity if it is error. */ if (redundantOneErr && correct_it) { - wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, pbuf, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - "Wnp", alloclist, flags, RF_IO_NORMAL_PRIORITY); + wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, pbuf, + rf_DiskWriteFunc, rf_DiskWriteUndoFunc, "Wnp", alloclist, + flags, RF_IO_NORMAL_PRIORITY); wrBlock = wr_dag_h->succedents[0]; wrUnblock = wrBlock->succedents[0]->succedents[0]; wrBlock->succedents[0]->params[0].p = asmap->parityInfo; wrBlock->succedents[0]->params[2].v = psID; - wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + wrBlock->succedents[0]->params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); bzero((char *) &tracerec, sizeof(tracerec)); wr_dag_h->tracerec = &tracerec; if (rf_verifyParityDebug) { @@ -497,13 +609,14 @@ rf_VerifyParityEvenOdd(raidPtr, raidAddr, parityPDA, correct_it, flags) } RF_LOCK_MUTEX(mcpair->mutex); mcpair->flag = 0; - rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, - (void *) mcpair); + rf_DispatchDAG(wr_dag_h, + (void (*) (void *)) rf_MCPairWakeupFunc, (void *) mcpair); while (!mcpair->flag) RF_WAIT_COND(mcpair->cond, mcpair->mutex); RF_UNLOCK_MUTEX(mcpair->mutex); if (wr_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to correct parity in VerifyParity: can't write the stripe\n"); + RF_ERRORMSG("Unable to correct parity in VerifyParity:" + " can't write the stripe\n"); parity_cant_correct = RF_TRUE; } else { parity_corrected = RF_TRUE; @@ -511,28 +624,33 @@ rf_VerifyParityEvenOdd(raidPtr, raidAddr, parityPDA, correct_it, flags) rf_FreeDAG(wr_dag_h); } if (redundantTwoErr && correct_it) { - wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, redundantbuf2, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, + redundantbuf2, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, "Wnred2", alloclist, flags, RF_IO_NORMAL_PRIORITY); wrBlock = wr_dag_h->succedents[0]; wrUnblock = wrBlock->succedents[0]->succedents[0]; wrBlock->succedents[0]->params[0].p = asmap->qInfo; wrBlock->succedents[0]->params[2].v = psID; - wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + wrBlock->succedents[0]->params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); bzero((char *) &tracerec, sizeof(tracerec)); wr_dag_h->tracerec = &tracerec; if (rf_verifyParityDebug) { - printf("Dag of write new second redundant information in parity verify :\n"); + printf("Dag of write new second redundant information" + " in parity verify :\n"); rf_PrintDAGList(wr_dag_h); } RF_LOCK_MUTEX(mcpair->mutex); mcpair->flag = 0; - rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, - (void *) mcpair); + rf_DispatchDAG(wr_dag_h, + (void (*) (void *)) rf_MCPairWakeupFunc, (void *) mcpair); while (!mcpair->flag) RF_WAIT_COND(mcpair->cond, mcpair->mutex); RF_UNLOCK_MUTEX(mcpair->mutex); if (wr_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to correct second redundant information in VerifyParity: can't write the stripe\n"); + RF_ERRORMSG("Unable to correct second redundant" + " information in VerifyParity: can't write the" + " stripe\n"); red2_cant_correct = RF_TRUE; } else { red2_corrected = RF_TRUE; @@ -553,4 +671,4 @@ out: rf_FreeMCPair(mcpair); return (retcode); } -#endif /* RF_INCLUDE_EVENODD > 0 */ +#endif /* RF_INCLUDE_EVENODD > 0 */ diff --git a/sys/dev/raidframe/rf_evenodd.h b/sys/dev/raidframe/rf_evenodd.h index 63b0d75c9b9..34b5fc4f664 100644 --- a/sys/dev/raidframe/rf_evenodd.h +++ b/sys/dev/raidframe/rf_evenodd.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_evenodd.h,v 1.2 1999/02/16 00:02:44 niklas Exp $ */ +/* $OpenBSD: rf_evenodd.h,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_evenodd.h,v 1.2 1999/02/05 00:06:11 oster Exp $ */ + /* * Copyright (c) 1995, 1996 Carnegie-Mellon University. * All rights reserved. @@ -27,29 +28,22 @@ * rights to redistribute these changes. */ -#ifndef _RF__RF_EVENODD_H_ -#define _RF__RF_EVENODD_H_ +#ifndef _RF__RF_EVENODD_H_ +#define _RF__RF_EVENODD_H_ -/* extern declerations of the failure mode functions. */ -int -rf_ConfigureEvenOdd(RF_ShutdownList_t ** shutdownListp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int rf_GetDefaultNumFloatingReconBuffersEvenOdd(RF_Raid_t * raidPtr); -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitEvenOdd(RF_Raid_t * raidPtr); -void -rf_IdentifyStripeEvenOdd(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outrow); -void -rf_MapParityEvenOdd(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapEEvenOdd(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_EODagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); -int -rf_VerifyParityEvenOdd(RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t * parityPDA, int correct_it, RF_RaidAccessFlags_t flags); +/* Extern declarations of the failure mode functions. */ +int rf_ConfigureEvenOdd(RF_ShutdownList_t **, RF_Raid_t *, RF_Config_t *); +int rf_GetDefaultNumFloatingReconBuffersEvenOdd(RF_Raid_t *); +RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitEvenOdd(RF_Raid_t *); +void rf_IdentifyStripeEvenOdd(RF_Raid_t *, RF_RaidAddr_t, + RF_RowCol_t **, RF_RowCol_t *); +void rf_MapParityEvenOdd(RF_Raid_t *, RF_RaidAddr_t, RF_RowCol_t *, + RF_RowCol_t *, RF_SectorNum_t *, int); +void rf_MapEEvenOdd(RF_Raid_t *, RF_RaidAddr_t, RF_RowCol_t *, + RF_RowCol_t *, RF_SectorNum_t *, int); +void rf_EODagSelect(RF_Raid_t *, RF_IoType_t, RF_AccessStripeMap_t *, + RF_VoidFuncPtr *); +int rf_VerifyParityEvenOdd(RF_Raid_t *, RF_RaidAddr_t, RF_PhysDiskAddr_t *, + int, RF_RaidAccessFlags_t); -#endif /* !_RF__RF_EVENODD_H_ */ +#endif /* !_RF__RF_EVENODD_H_ */ diff --git a/sys/dev/raidframe/rf_evenodd_dagfuncs.c b/sys/dev/raidframe/rf_evenodd_dagfuncs.c index 7617b8241a3..d619b1ca50e 100644 --- a/sys/dev/raidframe/rf_evenodd_dagfuncs.c +++ b/sys/dev/raidframe/rf_evenodd_dagfuncs.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_evenodd_dagfuncs.c,v 1.6 2000/08/08 16:07:41 peter Exp $ */ +/* $OpenBSD: rf_evenodd_dagfuncs.c,v 1.7 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_evenodd_dagfuncs.c,v 1.6 2000/03/30 12:45:40 augustss Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -28,7 +29,7 @@ */ /* - * Code for RAID-EVENODD architecture. + * Code for RAID-EVENODD architecture. */ #include "rf_types.h" @@ -47,153 +48,199 @@ #include "rf_evenodd.h" #include "rf_evenodd_dagfuncs.h" -/* These redundant functions are for small write */ -RF_RedFuncs_t rf_EOSmallWritePFuncs = {rf_RegularXorFunc, "Regular Old-New P", rf_SimpleXorFunc, "Simple Old-New P"}; -RF_RedFuncs_t rf_EOSmallWriteEFuncs = {rf_RegularONEFunc, "Regular Old-New E", rf_SimpleONEFunc, "Regular Old-New E"}; -/* These redundant functions are for degraded read */ -RF_RedFuncs_t rf_eoPRecoveryFuncs = {rf_RecoveryXorFunc, "Recovery Xr", rf_RecoveryXorFunc, "Recovery Xr"}; -RF_RedFuncs_t rf_eoERecoveryFuncs = {rf_RecoveryEFunc, "Recovery E Func", rf_RecoveryEFunc, "Recovery E Func"}; -/********************************************************************************************** - * the following encoding node functions is used in EO_000_CreateLargeWriteDAG - **********************************************************************************************/ -int -rf_RegularPEFunc(node) - RF_DagNode_t *node; +/* These redundant functions are for small write. */ +RF_RedFuncs_t rf_EOSmallWritePFuncs = { + rf_RegularXorFunc, "Regular Old-New P", + rf_SimpleXorFunc, "Simple Old-New P" +}; +RF_RedFuncs_t rf_EOSmallWriteEFuncs = { + rf_RegularONEFunc, "Regular Old-New E", + rf_SimpleONEFunc, "Regular Old-New E" +}; +/* These redundant functions are for degraded read. */ +RF_RedFuncs_t rf_eoPRecoveryFuncs = { + rf_RecoveryXorFunc, "Recovery Xr", + rf_RecoveryXorFunc, "Recovery Xr" +}; +RF_RedFuncs_t rf_eoERecoveryFuncs = { + rf_RecoveryEFunc, "Recovery E Func", + rf_RecoveryEFunc, "Recovery E Func" +}; + + +/***************************************************************************** + * The following encoding node functions is used in + * EO_000_CreateLargeWriteDAG. + *****************************************************************************/ +int +rf_RegularPEFunc(RF_DagNode_t *node) { rf_RegularESubroutine(node, node->results[1]); - rf_RegularXorFunc(node);/* does the wakeup here! */ + rf_RegularXorFunc(node); /* Do the wakeup here ! */ #if 1 return (0); /* XXX This was missing... GO */ #endif } -/************************************************************************************************ - * For EO_001_CreateSmallWriteDAG, there are (i)RegularONEFunc() and (ii)SimpleONEFunc() to - * be used. The previous case is when write access at least sectors of full stripe unit. - * The later function is used when the write access two stripe units but with total sectors - * less than sectors per SU. In this case, the access of parity and 'E' are shown as disconnected - * areas in their stripe unit and parity write and 'E' write are both devided into two distinct - * writes( totally four). This simple old-new write and regular old-new write happen as in RAID-5 - ************************************************************************************************/ - -/* Algorithm: - 1. Store the difference of old data and new data in the Rod buffer. - 2. then encode this buffer into the buffer which already have old 'E' information inside it, - the result can be shown to be the new 'E' information. - 3. xor the Wnd buffer into the difference buffer to recover the original old data. - Here we have another alternative: to allocate a temporary buffer for storing the difference of - old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach - take the same speed as the previous, and need more memory. -*/ -int -rf_RegularONEFunc(node) - RF_DagNode_t *node; +/***************************************************************************** + * For EO_001_CreateSmallWriteDAG, there are (i) RegularONEFunc() and + * (ii) SimpleONEFunc() to be used. The previous case is when write accesses + * at least sectors of full stripe unit. + * The later function is used when the write accesses two stripe units but + * with total sectors less than sectors per SU. In this case, the access of + * parity and 'E' are shown as disconnected areas in their stripe unit and + * parity write and 'E' write are both divided into two distinct writes + * (totally four). This simple old-new write and regular old-new write happen + * as in RAID-5. + *****************************************************************************/ + +/* + * Algorithm: + * 1. Store the difference of old data and new data in the Rod buffer. + * 2. Then encode this buffer into the buffer that already have old 'E' + * information inside it, the result can be shown to be the new 'E' + * information. + * 3. Xor the Wnd buffer into the difference buffer to recover the original + * old data. + * Here we have another alternative: to allocate a temporary buffer for + * storing the difference of old data and new data, then encode temp buf + * into old 'E' buf to form new 'E', but this approach takes the same speed + * as the previous, and needs more memory. + */ +int +rf_RegularONEFunc(RF_DagNode_t *node) { RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; - int EpdaIndex = (node->numParams - 1) / 2 - 1; /* the parameter of node - * where you can find - * e-pda */ - int i, k, retcode = 0; - int suoffset, length; + int EpdaIndex = (node->numParams - 1) / 2 - 1; /* + * The parameter of node + * where you can find + * e-pda. + */ + int i, k, retcode = 0; + int suoffset, length; RF_RowCol_t scol; - char *srcbuf, *destbuf; + char *srcbuf, *destbuf; RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; RF_Etimer_t timer; - RF_PhysDiskAddr_t *pda, *EPDA = (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p; - int ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector); /* generally zero */ + RF_PhysDiskAddr_t *pda, *EPDA = (RF_PhysDiskAddr_t *) + node->params[EpdaIndex].p; + /* Generally zero. */ + int ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector); RF_ASSERT(EPDA->type == RF_PDA_TYPE_Q); RF_ASSERT(ESUOffset == 0); RF_ETIMER_START(timer); - /* Xor the Wnd buffer into Rod buffer, the difference of old data and - * new data is stored in Rod buffer */ + /* + * Xor the Wnd buffer into Rod buffer. The difference of old data and + * new data is stored in Rod buffer. + */ for (k = 0; k < EpdaIndex; k += 2) { - length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector); - retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length, node->dagHdr->bp); + length = rf_RaidAddressToByte(raidPtr, + ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector); + retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, + node->params[k + 1].p, length, node->dagHdr->bp); } - /* Start to encoding the buffer storing the difference of old data and - * new data into 'E' buffer */ + /* + * Start to encode the buffer, storing the difference of old data and + * new data into 'E' buffer. + */ for (i = 0; i < EpdaIndex; i += 2) - if (node->params[i + 1].p != node->results[0]) { /* results[0] is buf ptr - * of E */ + if (node->params[i + 1].p != node->results[0]) { + /* results[0] is buf ptr of E. */ pda = (RF_PhysDiskAddr_t *) node->params[i].p; srcbuf = (char *) node->params[i + 1].p; scol = rf_EUCol(layoutPtr, pda->raidAddress); - suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); - destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset); - rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); + suoffset = rf_StripeUnitOffset(layoutPtr, + pda->startSector); + destbuf = ((char *) node->results[0]) + + rf_RaidAddressToByte(raidPtr, suoffset); + rf_e_encToBuf(raidPtr, scol, srcbuf, + RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); } - /* Recover the original old data to be used by parity encoding - * function in XorNode */ + /* + * Recover the original old data to be used by parity encoding + * function in XorNode. + */ for (k = 0; k < EpdaIndex; k += 2) { - length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector); - retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length, node->dagHdr->bp); + length = rf_RaidAddressToByte(raidPtr, + ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector); + retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, + node->params[k + 1].p, length, node->dagHdr->bp); } RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->q_us += RF_ETIMER_VAL_US(timer); rf_GenericWakeupFunc(node, 0); #if 1 - return (0); /* XXX this was missing.. GO */ + return (0); /* XXX This was missing... GO */ #endif } -int -rf_SimpleONEFunc(node) - RF_DagNode_t *node; +int +rf_SimpleONEFunc(RF_DagNode_t *node) { RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; - int retcode = 0; - char *srcbuf, *destbuf; + int retcode = 0; + char *srcbuf, *destbuf; RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - int length; + int length; RF_RowCol_t scol; RF_Etimer_t timer; - RF_ASSERT(((RF_PhysDiskAddr_t *) node->params[2].p)->type == RF_PDA_TYPE_Q); + RF_ASSERT(((RF_PhysDiskAddr_t *) node->params[2].p)->type == + RF_PDA_TYPE_Q); if (node->dagHdr->status == rf_enable) { RF_ETIMER_START(timer); - length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[4].p)->numSector); /* this is a pda of - * writeDataNodes */ - /* bxor to buffer of readDataNodes */ - retcode = rf_bxor(node->params[5].p, node->params[1].p, length, node->dagHdr->bp); - /* find out the corresponding colume in encoding matrix for - * write colume to be encoded into redundant disk 'E' */ + /* This is a pda of writeDataNodes. */ + length = rf_RaidAddressToByte(raidPtr, + ((RF_PhysDiskAddr_t *) node->params[4].p)->numSector); + /* bxor to buffer of readDataNodes. */ + retcode = rf_bxor(node->params[5].p, node->params[1].p, + length, node->dagHdr->bp); + /* + * Find out the corresponding column in encoding matrix for + * write column to be encoded into redundant disk 'E'. + */ scol = rf_EUCol(layoutPtr, pda->raidAddress); srcbuf = node->params[1].p; destbuf = node->params[3].p; - /* Start encoding process */ - rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); - rf_bxor(node->params[5].p, node->params[1].p, length, node->dagHdr->bp); + /* Start encoding process. */ + rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, + destbuf, pda->numSector); + rf_bxor(node->params[5].p, node->params[1].p, length, + node->dagHdr->bp); RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->q_us += RF_ETIMER_VAL_US(timer); } - return (rf_GenericWakeupFunc(node, retcode)); /* call wake func + return (rf_GenericWakeupFunc(node, retcode)); /* + * Call wake func * explicitly since no - * I/O in this node */ + * I/O in this node. + */ } -/****** called by rf_RegularPEFunc(node) and rf_RegularEFunc(node) in f.f. large write ********/ -void -rf_RegularESubroutine(node, ebuf) - RF_DagNode_t *node; - char *ebuf; +/* + * Called by rf_RegularPEFunc(node) and rf_RegularEFunc(node) + * in f.f. large write. + */ +void +rf_RegularESubroutine(RF_DagNode_t *node, char *ebuf) { RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; RF_PhysDiskAddr_t *pda; - int i, suoffset; + int i, suoffset; RF_RowCol_t scol; - char *srcbuf, *destbuf; + char *srcbuf, *destbuf; RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; RF_Etimer_t timer; @@ -205,7 +252,8 @@ rf_RegularESubroutine(node, ebuf) scol = rf_EUCol(layoutPtr, pda->raidAddress); srcbuf = (char *) node->params[i + 1].p; destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset); - rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); + rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, + destbuf, pda->numSector); } RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); @@ -213,44 +261,43 @@ rf_RegularESubroutine(node, ebuf) } -/******************************************************************************************* - * Used in EO_001_CreateLargeWriteDAG - ******************************************************************************************/ -int -rf_RegularEFunc(node) - RF_DagNode_t *node; +/***************************************************************************** + * Used in EO_001_CreateLargeWriteDAG. + *****************************************************************************/ +int +rf_RegularEFunc(RF_DagNode_t *node) { rf_RegularESubroutine(node, node->results[0]); rf_GenericWakeupFunc(node, 0); #if 1 - return (0); /* XXX this was missing?.. GO */ + return (0); /* XXX This was missing... GO */ #endif } -/******************************************************************************************* - * This degraded function allow only two case: - * 1. when write access the full failed stripe unit, then the access can be more than - * one tripe units. - * 2. when write access only part of the failed SU, we assume accesses of more than - * one stripe unit is not allowed so that the write can be dealt with like a - * large write. - * The following function is based on these assumptions. So except in the second case, - * it looks the same as a large write encodeing function. But this is not exactly the - * normal way for doing a degraded write, since raidframe have to break cases of access - * other than the above two into smaller accesses. We may have to change - * DegrESubroutin in the future. - *******************************************************************************************/ -void -rf_DegrESubroutine(node, ebuf) - RF_DagNode_t *node; - char *ebuf; + + +/***************************************************************************** + * This degraded function allow only two cases: + * 1. When write accesses the full failed stripe unit, then the access can + * be more than one stripe unit. + * 2. When write accesses only part of the failed SU, we assume accesses of + * more than one stripe unit are not allowed so that the write can be + * dealt with like a large write. + * The following function is based on these assumptions. So except in the + * second case, it looks the same as a large write encoding function. But + * this is not exactly the normal way of doing a degraded write, since + * RAIDframe has to break cases of accesses other than the above two into + * smaller accesses. We may have to change DegrESubroutin in the future. + *****************************************************************************/ +void +rf_DegrESubroutine(RF_DagNode_t *node, char *ebuf) { RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; RF_PhysDiskAddr_t *pda; - int i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); + int i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); RF_RowCol_t scol; - char *srcbuf, *destbuf; + char *srcbuf, *destbuf; RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; RF_Etimer_t timer; @@ -271,90 +318,94 @@ rf_DegrESubroutine(node, ebuf) } -/************************************************************************************** - * This function is used in case where one data disk failed and both redundant disks - * alive. It is used in the EO_100_CreateWriteDAG. Note: if there is another disk - * failed in the stripe but not accessed at this time, then we should, instead, use - * the rf_EOWriteDoubleRecoveryFunc(). - **************************************************************************************/ -int -rf_Degraded_100_EOFunc(node) - RF_DagNode_t *node; +/***************************************************************************** + * This function is used in case where one data disk failed and both redundant + * disks are alive. It is used in the EO_100_CreateWriteDAG. Note: if there is + * another disk failed in the stripe but not accessed at this time, then we + * should, instead, use the rf_EOWriteDoubleRecoveryFunc(). + *****************************************************************************/ +int +rf_Degraded_100_EOFunc(RF_DagNode_t *node) { rf_DegrESubroutine(node, node->results[1]); - rf_RecoveryXorFunc(node); /* does the wakeup here! */ + rf_RecoveryXorFunc(node); /* Does the wakeup here ! */ #if 1 - return (0); /* XXX this was missing... SHould these be - * void functions??? GO */ + return (0); /* XXX This was missing... Should these be + * void functions ??? GO */ #endif } -/************************************************************************************** - * This function is to encode one sector in one of the data disks to the E disk. - * However, in evenodd this function can also be used as decoding function to recover - * data from dead disk in the case of parity failure and a single data failure. - **************************************************************************************/ -void -rf_e_EncOneSect( - RF_RowCol_t srcLogicCol, - char *srcSecbuf, - RF_RowCol_t destLogicCol, - char *destSecbuf, - int bytesPerSector) + + +/***************************************************************************** + * This function is to encode one sector in one of the data disks to the E + * disk. However, in evenodd this function can also be used as decoding + * function to recover data from dead disk in the case of parity failure and + * a single data failure. + *****************************************************************************/ +void +rf_e_EncOneSect(RF_RowCol_t srcLogicCol, char *srcSecbuf, + RF_RowCol_t destLogicCol, char *destSecbuf, int bytesPerSector) { - int S_index; /* index of the EU in the src col which need - * be Xored into all EUs in a dest sector */ - int numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1; - RF_RowCol_t j, indexInDest, /* row index of an encoding unit in - * the destination colume of encoding - * matrix */ - indexInSrc; /* row index of an encoding unit in the source - * colume used for recovery */ - int bytesPerEU = bytesPerSector / numRowInEncMatix; - -#if RF_EO_MATRIX_DIM > 17 - int shortsPerEU = bytesPerEU / sizeof(short); + int S_index; /* + * Index of the EU in the src col which need + * be Xored into all EUs in a dest sector. + */ + int numRowInEncMatrix = (RF_EO_MATRIX_DIM) - 1; + RF_RowCol_t j, indexInDest; /* + * Row index of an encoding unit in + * the destination column of encoding + * matrix. + */ + RF_RowCol_t indexInSrc; /* + * Row index of an encoding unit in the source + * column used for recovery. + */ + int bytesPerEU = bytesPerSector / numRowInEncMatrix; + +#if RF_EO_MATRIX_DIM > 17 + int shortsPerEU = bytesPerEU / sizeof(short); short *destShortBuf, *srcShortBuf1, *srcShortBuf2; short temp1; -#elif RF_EO_MATRIX_DIM == 17 - int longsPerEU = bytesPerEU / sizeof(long); - long *destLongBuf, *srcLongBuf1, *srcLongBuf2; +#elif RF_EO_MATRIX_DIM == 17 + int longsPerEU = bytesPerEU / sizeof(long); + long *destLongBuf, *srcLongBuf1, *srcLongBuf2; long temp1; #endif -#if RF_EO_MATRIX_DIM > 17 +#if RF_EO_MATRIX_DIM > 17 RF_ASSERT(sizeof(short) == 2 || sizeof(short) == 1); RF_ASSERT(bytesPerEU % sizeof(short) == 0); -#elif RF_EO_MATRIX_DIM == 17 +#elif RF_EO_MATRIX_DIM == 17 RF_ASSERT(sizeof(long) == 8 || sizeof(long) == 4); RF_ASSERT(bytesPerEU % sizeof(long) == 0); #endif S_index = rf_EO_Mod((RF_EO_MATRIX_DIM - 1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM); -#if RF_EO_MATRIX_DIM > 17 +#if RF_EO_MATRIX_DIM > 17 srcShortBuf1 = (short *) (srcSecbuf + S_index * bytesPerEU); -#elif RF_EO_MATRIX_DIM == 17 +#elif RF_EO_MATRIX_DIM == 17 srcLongBuf1 = (long *) (srcSecbuf + S_index * bytesPerEU); #endif - for (indexInDest = 0; indexInDest < numRowInEncMatix; indexInDest++) { + for (indexInDest = 0; indexInDest < numRowInEncMatrix; indexInDest++) { indexInSrc = rf_EO_Mod((indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM); -#if RF_EO_MATRIX_DIM > 17 +#if RF_EO_MATRIX_DIM > 17 destShortBuf = (short *) (destSecbuf + indexInDest * bytesPerEU); srcShortBuf2 = (short *) (srcSecbuf + indexInSrc * bytesPerEU); for (j = 0; j < shortsPerEU; j++) { temp1 = destShortBuf[j] ^ srcShortBuf1[j]; - /* note: S_index won't be at the end row for any src - * col! */ + /* Note: S_index won't be at the end row for any src + * col ! */ if (indexInSrc != RF_EO_MATRIX_DIM - 1) destShortBuf[j] = (srcShortBuf2[j]) ^ temp1; /* if indexInSrc is at the end row, ie. - * RF_EO_MATRIX_DIM -1, then all elements are zero! */ + * RF_EO_MATRIX_DIM -1, then all elements are zero ! */ else destShortBuf[j] = temp1; } -#elif RF_EO_MATRIX_DIM == 17 +#elif RF_EO_MATRIX_DIM == 17 destLongBuf = (long *) (destSecbuf + indexInDest * bytesPerEU); srcLongBuf2 = (long *) (srcSecbuf + indexInSrc * bytesPerEU); for (j = 0; j < longsPerEU; j++) { @@ -368,16 +419,11 @@ rf_e_EncOneSect( } } -void -rf_e_encToBuf( - RF_Raid_t * raidPtr, - RF_RowCol_t srcLogicCol, - char *srcbuf, - RF_RowCol_t destLogicCol, - char *destbuf, - int numSector) +void +rf_e_encToBuf(RF_Raid_t *raidPtr, RF_RowCol_t srcLogicCol, char *srcbuf, + RF_RowCol_t destLogicCol, char *destbuf, int numSector) { - int i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); + int i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); for (i = 0; i < numSector; i++) { rf_e_EncOneSect(srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector); @@ -385,25 +431,26 @@ rf_e_encToBuf( destbuf += bytesPerSector; } } -/************************************************************************************** + + +/***************************************************************************** * when parity die and one data die, We use second redundant information, 'E', * to recover the data in dead disk. This function is used in the recovery node of * for EO_110_CreateReadDAG - **************************************************************************************/ -int -rf_RecoveryEFunc(node) - RF_DagNode_t *node; + *****************************************************************************/ +int +rf_RecoveryEFunc(RF_DagNode_t *node) { RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; - RF_RowCol_t scol, /* source logical column */ - fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress); /* logical column of + RF_RowCol_t scol; /* source logical column */ + RF_RowCol_t fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress); /* logical column of * failed SU */ - int i; + int i; RF_PhysDiskAddr_t *pda; - int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); - char *srcbuf, *destbuf; + int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); + char *srcbuf, *destbuf; RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; RF_Etimer_t timer; @@ -429,62 +476,61 @@ rf_RecoveryEFunc(node) } return (rf_GenericWakeupFunc(node, 0)); /* node execute successfully */ } -/************************************************************************************** + + +/***************************************************************************** * This function is used in the case where one data and the parity have filed. - * (in EO_110_CreateWriteDAG ) - **************************************************************************************/ -int -rf_EO_DegradedWriteEFunc(RF_DagNode_t * node) + * (in EO_110_CreateWriteDAG) + *****************************************************************************/ +int +rf_EO_DegradedWriteEFunc(RF_DagNode_t *node) { rf_DegrESubroutine(node, node->results[0]); rf_GenericWakeupFunc(node, 0); #if 1 - return (0); /* XXX Yet another one!! GO */ + return (0); /* XXX Yet another one !!! GO */ #endif } -/************************************************************************************** - * THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES - **************************************************************************************/ +/***************************************************************************** + * THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES. + *****************************************************************************/ -void -rf_doubleEOdecode( - RF_Raid_t * raidPtr, - char **rrdbuf, - char **dest, - RF_RowCol_t * fcol, - char *pbuf, - char *ebuf) +void +rf_doubleEOdecode(RF_Raid_t *raidPtr, char **rrdbuf, char **dest, + RF_RowCol_t *fcol, char *pbuf, char *ebuf) { - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); - int i, j, k, f1, f2, row; - int rrdrow, erow, count = 0; - int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); - int numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1; + RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout); + int i, j, k, f1, f2, row; + int rrdrow, erow, count = 0; + int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); + int numRowInEncMatrix = (RF_EO_MATRIX_DIM) - 1; #if 0 - int pcol = (RF_EO_MATRIX_DIM) - 1; + int pcol = (RF_EO_MATRIX_DIM) - 1; #endif - int ecol = (RF_EO_MATRIX_DIM) - 2; - int bytesPerEU = bytesPerSector / numRowInEncMatix; - int numDataCol = layoutPtr->numDataCol; -#if RF_EO_MATRIX_DIM > 17 - int shortsPerEU = bytesPerEU / sizeof(short); - short *rrdbuf_current, *pbuf_current, *ebuf_current; - short *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current; + int ecol = (RF_EO_MATRIX_DIM) - 2; + int bytesPerEU = bytesPerSector / numRowInEncMatrix; + int numDataCol = layoutPtr->numDataCol; +#if RF_EO_MATRIX_DIM > 17 + int shortsPerEU = bytesPerEU / sizeof(short); + short *rrdbuf_current, *pbuf_current, *ebuf_current; + short *dest_smaller, *dest_smaller_current; + short *dest_larger, *dest_larger_current; short *temp; - short *P; + short *P; RF_ASSERT(bytesPerEU % sizeof(short) == 0); RF_Malloc(P, bytesPerEU, (short *)); RF_Malloc(temp, bytesPerEU, (short *)); -#elif RF_EO_MATRIX_DIM == 17 - int longsPerEU = bytesPerEU / sizeof(long); - long *rrdbuf_current, *pbuf_current, *ebuf_current; - long *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current; +#elif RF_EO_MATRIX_DIM == 17 + int longsPerEU = bytesPerEU / sizeof(long); + long *rrdbuf_current, *pbuf_current, *ebuf_current; + long *dest_smaller, *dest_smaller_current; + long *dest_larger, *dest_larger_current; long *temp; - long *P; + long *P; RF_ASSERT(bytesPerEU % sizeof(long) == 0); RF_Malloc(P, bytesPerEU, (long *)); @@ -495,15 +541,17 @@ rf_doubleEOdecode( bzero((char *) P, bytesPerEU); bzero((char *) temp, bytesPerEU); RF_ASSERT(*P == 0); - /* calculate the 'P' parameter, which, not parity, is the Xor of all - * elements in the last two column, ie. 'E' and 'parity' colume, see - * the Ref. paper by Blaum, et al 1993 */ - for (i = 0; i < numRowInEncMatix; i++) + /* + * Calculate the 'P' parameter, which, not parity, is the Xor of all + * elements in the last two column, ie. 'E' and 'parity' columns, see + * the Ref. paper by Blaum, et al 1993. + */ + for (i = 0; i < numRowInEncMatrix; i++) for (k = 0; k < longsPerEU; k++) { -#if RF_EO_MATRIX_DIM > 17 +#if RF_EO_MATRIX_DIM > 17 ebuf_current = ((short *) ebuf) + i * shortsPerEU + k; pbuf_current = ((short *) pbuf) + i * shortsPerEU + k; -#elif RF_EO_MATRIX_DIM == 17 +#elif RF_EO_MATRIX_DIM == 17 ebuf_current = ((long *) ebuf) + i * longsPerEU + k; pbuf_current = ((long *) pbuf) + i * longsPerEU + k; #endif @@ -512,20 +560,20 @@ rf_doubleEOdecode( } RF_ASSERT(fcol[0] != fcol[1]); if (fcol[0] < fcol[1]) { -#if RF_EO_MATRIX_DIM > 17 +#if RF_EO_MATRIX_DIM > 17 dest_smaller = (short *) (dest[0]); dest_larger = (short *) (dest[1]); -#elif RF_EO_MATRIX_DIM == 17 +#elif RF_EO_MATRIX_DIM == 17 dest_smaller = (long *) (dest[0]); dest_larger = (long *) (dest[1]); #endif f1 = fcol[0]; f2 = fcol[1]; } else { -#if RF_EO_MATRIX_DIM > 17 +#if RF_EO_MATRIX_DIM > 17 dest_smaller = (short *) (dest[1]); dest_larger = (short *) (dest[0]); -#elif RF_EO_MATRIX_DIM == 17 +#elif RF_EO_MATRIX_DIM == 17 dest_smaller = (long *) (dest[1]); dest_larger = (long *) (dest[0]); #endif @@ -533,93 +581,107 @@ rf_doubleEOdecode( f2 = fcol[0]; } row = (RF_EO_MATRIX_DIM) - 1; - while ((row = rf_EO_Mod((row + f1 - f2), RF_EO_MATRIX_DIM)) != ((RF_EO_MATRIX_DIM) - 1)) { -#if RF_EO_MATRIX_DIM > 17 + while ((row = rf_EO_Mod((row + f1 - f2), RF_EO_MATRIX_DIM)) != + ((RF_EO_MATRIX_DIM) - 1)) { +#if RF_EO_MATRIX_DIM > 17 dest_larger_current = dest_larger + row * shortsPerEU; dest_smaller_current = dest_smaller + row * shortsPerEU; -#elif RF_EO_MATRIX_DIM == 17 +#elif RF_EO_MATRIX_DIM == 17 dest_larger_current = dest_larger + row * longsPerEU; dest_smaller_current = dest_smaller + row * longsPerEU; #endif - /** Do the diagonal recovery. Initially, temp[k] = (failed 1), - which is the failed data in the colume which has smaller col index. **/ - /* step 1: ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3)) */ + /* + * Do the diagonal recovery. Initially, temp[k] = (failed 1), + * which is the failed data in the column that has smaller + * col index. + */ + /* Step 1: ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3)) */ for (j = 0; j < numDataCol; j++) { if (j == f1 || j == f2) continue; rrdrow = rf_EO_Mod((row + f2 - j), RF_EO_MATRIX_DIM); if (rrdrow != (RF_EO_MATRIX_DIM) - 1) { -#if RF_EO_MATRIX_DIM > 17 - rrdbuf_current = (short *) (rrdbuf[j]) + rrdrow * shortsPerEU; +#if RF_EO_MATRIX_DIM > 17 + rrdbuf_current = (short *) (rrdbuf[j]) + + rrdrow * shortsPerEU; for (k = 0; k < shortsPerEU; k++) temp[k] ^= *(rrdbuf_current + k); -#elif RF_EO_MATRIX_DIM == 17 - rrdbuf_current = (long *) (rrdbuf[j]) + rrdrow * longsPerEU; +#elif RF_EO_MATRIX_DIM == 17 + rrdbuf_current = (long *) (rrdbuf[j]) + + rrdrow * longsPerEU; for (k = 0; k < longsPerEU; k++) temp[k] ^= *(rrdbuf_current + k); #endif } } - /* step 2: ^E(erow,m-2), If erow is at the buttom row, don't - * Xor into it E(erow,m-2) = (principle diagonal) ^ (failed - * 1) ^ (failed 2) ^ ( SUM of nonfailed in-diagonal - * A(rrdrow,0..m-3) ) After this step, temp[k] = (principle - * diagonal) ^ (failed 2) */ + /* + * Step 2: ^E(erow,m-2), If erow is at the bottom row, don't + * Xor into it. E(erow,m-2) = (principle diagonal) ^ (failed + * 1) ^ (failed 2) ^ (SUM of nonfailed in-diagonal + * A(rrdrow,0..m-3)) + * After this step, temp[k] = (principle diagonal) ^ (failed 2). + */ erow = rf_EO_Mod((row + f2 - ecol), (RF_EO_MATRIX_DIM)); if (erow != (RF_EO_MATRIX_DIM) - 1) { -#if RF_EO_MATRIX_DIM > 17 +#if RF_EO_MATRIX_DIM > 17 ebuf_current = (short *) ebuf + shortsPerEU * erow; for (k = 0; k < shortsPerEU; k++) temp[k] ^= *(ebuf_current + k); -#elif RF_EO_MATRIX_DIM == 17 +#elif RF_EO_MATRIX_DIM == 17 ebuf_current = (long *) ebuf + longsPerEU * erow; for (k = 0; k < longsPerEU; k++) temp[k] ^= *(ebuf_current + k); #endif } - /* step 3: ^P to obtain the failed data (failed 2). P can be - * proved to be actually (principle diagonal) After this - * step, temp[k] = (failed 2), the failed data to be recovered */ -#if RF_EO_MATRIX_DIM > 17 + /* + * Step 3: ^P to obtain the failed data (failed 2). P can be + * proved to be actually (principal diagonal). After this + * step, temp[k] = (failed 2), the failed data to be recovered. + */ +#if RF_EO_MATRIX_DIM > 17 for (k = 0; k < shortsPerEU; k++) temp[k] ^= P[k]; - /* Put the data to the destination buffer */ + /* Put the data into the destination buffer. */ for (k = 0; k < shortsPerEU; k++) dest_larger_current[k] = temp[k]; -#elif RF_EO_MATRIX_DIM == 17 +#elif RF_EO_MATRIX_DIM == 17 for (k = 0; k < longsPerEU; k++) temp[k] ^= P[k]; - /* Put the data to the destination buffer */ + /* Put the data into the destination buffer. */ for (k = 0; k < longsPerEU; k++) dest_larger_current[k] = temp[k]; #endif - /** THE FOLLOWING DO THE HORIZONTAL XOR **/ - /* step 1: ^(SUM of A(row,0..m-3)), ie. all nonfailed data - * columes */ + /* THE FOLLOWING DO THE HORIZONTAL XOR. */ + /* + * Step 1: ^(SUM of A(row,0..m-3)), ie. all nonfailed data + * columns. + */ for (j = 0; j < numDataCol; j++) { if (j == f1 || j == f2) continue; -#if RF_EO_MATRIX_DIM > 17 - rrdbuf_current = (short *) (rrdbuf[j]) + row * shortsPerEU; +#if RF_EO_MATRIX_DIM > 17 + rrdbuf_current = (short *) (rrdbuf[j]) + + row * shortsPerEU; for (k = 0; k < shortsPerEU; k++) temp[k] ^= *(rrdbuf_current + k); -#elif RF_EO_MATRIX_DIM == 17 - rrdbuf_current = (long *) (rrdbuf[j]) + row * longsPerEU; +#elif RF_EO_MATRIX_DIM == 17 + rrdbuf_current = (long *) (rrdbuf[j]) + + row * longsPerEU; for (k = 0; k < longsPerEU; k++) temp[k] ^= *(rrdbuf_current + k); #endif } - /* step 2: ^A(row,m-1) */ - /* step 3: Put the data to the destination buffer */ -#if RF_EO_MATRIX_DIM > 17 + /* Step 2: ^A(row,m-1) */ + /* Step 3: Put the data into the destination buffer. */ +#if RF_EO_MATRIX_DIM > 17 pbuf_current = (short *) pbuf + shortsPerEU * row; for (k = 0; k < shortsPerEU; k++) temp[k] ^= *(pbuf_current + k); for (k = 0; k < shortsPerEU; k++) dest_smaller_current[k] = temp[k]; -#elif RF_EO_MATRIX_DIM == 17 +#elif RF_EO_MATRIX_DIM == 17 pbuf_current = (long *) pbuf + longsPerEU * row; for (k = 0; k < longsPerEU; k++) temp[k] ^= *(pbuf_current + k); @@ -628,50 +690,56 @@ rf_doubleEOdecode( #endif count++; } - /* Check if all Encoding Unit in the data buffer have been decoded, - * according EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime number, - * this algorithm will covered all buffer */ - RF_ASSERT(count == numRowInEncMatix); + /* + * Check if all Encoding Unit in the data buffer have been decoded ? + * According to EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime + * number, this algorithm will covered all buffer. + */ + RF_ASSERT(count == numRowInEncMatrix); RF_Free((char *) P, bytesPerEU); RF_Free((char *) temp, bytesPerEU); } -/*************************************************************************************** -* This function is called by double degragded read -* EO_200_CreateReadDAG -* -***************************************************************************************/ -int -rf_EvenOddDoubleRecoveryFunc(node) - RF_DagNode_t *node; +/***************************************************************************** + * This function is called by double degraded read EO_200_CreateReadDAG. + *****************************************************************************/ +int +rf_EvenOddDoubleRecoveryFunc(RF_DagNode_t *node) { - int ndataParam = 0; - int np = node->numParams; - RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p; + int ndataParam = 0; + int np = node->numParams; + RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) + node->params[np - 1].p; RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p; RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); - int i, prm, sector, nresults = node->numResults; + int i, prm, sector, nresults = node->numResults; RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; unsigned sosAddr; - int two = 0, mallc_one = 0, mallc_two = 0; /* flags to indicate if - * memory is allocated */ - int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); + int two = 0, mallc_one = 0, mallc_two = 0; /* + * Flags to indicate if + * memory is allocated. + */ + int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); RF_PhysDiskAddr_t *ppda, *ppda2, *epda, *epda2, *pda, *pda0, *pda1, - npda; - RF_RowCol_t fcol[2], fsuoff[2], fsuend[2], numDataCol = layoutPtr->numDataCol; - char **buf, *ebuf, *pbuf, *dest[2]; - long *suoff = NULL, *suend = NULL, *prmToCol = NULL, psuoff, esuoff; + npda; + RF_RowCol_t fcol[2], fsuoff[2], fsuend[2], + numDataCol = layoutPtr->numDataCol; + char **buf, *ebuf, *pbuf, *dest[2]; + long *suoff = NULL, *suend = NULL, *prmToCol = NULL, psuoff, esuoff; RF_SectorNum_t startSector, endSector; RF_Etimer_t timer; RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; RF_ETIMER_START(timer); - /* Find out the number of parameters which are pdas for data - * information */ + /* + * Find out the number of parameters that are pdas for data + * information. + */ for (i = 0; i <= np; i++) - if (((RF_PhysDiskAddr_t *) node->params[i].p)->type != RF_PDA_TYPE_DATA) { + if (((RF_PhysDiskAddr_t *) node->params[i].p)->type != + RF_PDA_TYPE_DATA) { ndataParam = i; break; } @@ -682,8 +750,9 @@ rf_EvenOddDoubleRecoveryFunc(node) RF_Malloc(prmToCol, ndataParam * sizeof(long), (long *)); } if (asmap->failedPDAs[1] && - (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) { - RF_ASSERT(0); /* currently, no support for this situation */ + (asmap->failedPDAs[1]->numSector + + asmap->failedPDAs[0]->numSector) < secPerSU) { + RF_ASSERT(0); /* Currently, no support for this situation. */ ppda = node->params[np - 6].p; ppda2 = node->params[np - 5].p; RF_ASSERT(ppda2->type == RF_PDA_TYPE_PARITY); @@ -699,13 +768,15 @@ rf_EvenOddDoubleRecoveryFunc(node) RF_ASSERT(psuoff == esuoff); } /* - the followings have three goals: - 1. determine the startSector to begin decoding and endSector to end decoding. - 2. determine the colume numbers of the two failed disks. - 3. determine the offset and end offset of the access within each failed stripe unit. - */ + * The followings have three goals: + * 1. Determine the startSector to begin decoding and endSector + * to end decoding. + * 2. Determine the column numbers of the two failed disks. + * 3. Determine the offset and end offset of the access within + * each failed stripe unit. + */ if (nresults == 1) { - /* find the startSector to begin decoding */ + /* Find the startSector to begin decoding. */ pda = node->results[0]; bzero(pda->bufPtr, bytesPerSector * pda->numSector); fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector); @@ -713,16 +784,20 @@ rf_EvenOddDoubleRecoveryFunc(node) startSector = fsuoff[0]; endSector = fsuend[0]; - /* find out the column of failed disk being accessed */ + /* Find out the column of failed disk being accessed. */ fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress); - /* find out the other failed colume not accessed */ - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); + /* Find out the other failed column not accessed. */ + sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, + asmap->raidAddress); for (i = 0; i < numDataCol; i++) { npda.raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0); - /* skip over dead disks */ - if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status)) + (raidPtr->Layout.map->MapSector) (raidPtr, + npda.raidAddress, &(npda.row), &(npda.col), + &(npda.startSector), 0); + /* Skip over dead disks. */ + if (RF_DEAD_DISK(raidPtr + ->Disks[npda.row][npda.col].status)) if (i != fcol[0]) break; } @@ -734,67 +809,87 @@ rf_EvenOddDoubleRecoveryFunc(node) bzero(pda0->bufPtr, bytesPerSector * pda0->numSector); pda1 = node->results[1]; bzero(pda1->bufPtr, bytesPerSector * pda1->numSector); - /* determine the failed colume numbers of the two failed - * disks. */ + /* + * Determine the failed column numbers of the two failed + * disks. + */ fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress); fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress); - /* determine the offset and end offset of the access within - * each failed stripe unit. */ + /* + * Determine the offset and end offset of the access within + * each failed stripe unit. + */ fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector); fsuend[0] = fsuoff[0] + pda0->numSector; fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector); fsuend[1] = fsuoff[1] + pda1->numSector; - /* determine the startSector to begin decoding */ + /* Determine the startSector to begin decoding. */ startSector = RF_MIN(pda0->startSector, pda1->startSector); - /* determine the endSector to end decoding */ + /* Determine the endSector to end decoding. */ endSector = RF_MAX(fsuend[0], fsuend[1]); } /* - assign the beginning sector and the end sector for each parameter - find out the corresponding colume # for each parameter - */ + * Assign the beginning sector and the end sector for each parameter. + * Find out the corresponding column # for each parameter. + */ for (prm = 0; prm < ndataParam; prm++) { pda = node->params[prm].p; suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector); suend[prm] = suoff[prm] + pda->numSector; prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress); } - /* 'sector' is the sector for the current decoding algorithm. For each - * sector in the failed SU, find out the corresponding parameters that - * cover the current sector and that are needed for decoding of this - * sector in failed SU. 2. Find out if sector is in the shadow of any - * accessed failed SU. If not, malloc a temporary space of a sector in - * size. */ + /* + * 'sector' is the sector for the current decoding algorithm. For each + * sector in the failed SU + * 1. Find out the corresponding parameters that cover the current + * sector and that are needed for the decoding of this sector in + * failed SU. + * 2. Find out if sector is in the shadow of any accessed failed SU. + * If not, malloc a temporary space of a sector in size. + */ for (sector = startSector; sector < endSector; sector++) { if (nresults == 2) - if (!(fsuoff[0] <= sector && sector < fsuend[0]) && !(fsuoff[1] <= sector && sector < fsuend[1])) + if (!(fsuoff[0] <= sector && sector < fsuend[0]) && + !(fsuoff[1] <= sector && sector < fsuend[1])) continue; for (prm = 0; prm < ndataParam; prm++) if (suoff[prm] <= sector && sector < suend[prm]) - buf[(prmToCol[prm])] = ((RF_PhysDiskAddr_t *) node->params[prm].p)->bufPtr + - rf_RaidAddressToByte(raidPtr, sector - suoff[prm]); - /* find out if sector is in the shadow of any accessed failed + buf[(prmToCol[prm])] = ((RF_PhysDiskAddr_t *) + node->params[prm].p)->bufPtr + + rf_RaidAddressToByte(raidPtr, + sector - suoff[prm]); + /* + * Find out if sector is in the shadow of any accessed failed * SU. If yes, assign dest[0], dest[1] to point at suitable - * position of the buffer corresponding to failed SUs. if no, + * position of the buffer corresponding to failed SUs. If no, * malloc a temporary space of a sector in size for - * destination of decoding. */ + * destination of decoding. + */ RF_ASSERT(nresults == 1 || nresults == 2); if (nresults == 1) { - dest[0] = ((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]); - /* Always malloc temp buffer to dest[1] */ + dest[0] = ((RF_PhysDiskAddr_t *) + node->results[0])->bufPtr + + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]); + /* Always malloc temp buffer to dest[1]. */ RF_Malloc(dest[1], bytesPerSector, (char *)); bzero(dest[1], bytesPerSector); mallc_two = 1; } else { if (fsuoff[0] <= sector && sector < fsuend[0]) - dest[0] = ((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]); + dest[0] = ((RF_PhysDiskAddr_t *) + node->results[0])->bufPtr + + rf_RaidAddressToByte(raidPtr, + sector - fsuoff[0]); else { RF_Malloc(dest[0], bytesPerSector, (char *)); bzero(dest[0], bytesPerSector); mallc_one = 1; } if (fsuoff[1] <= sector && sector < fsuend[1]) - dest[1] = ((RF_PhysDiskAddr_t *) node->results[1])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[1]); + dest[1] = ((RF_PhysDiskAddr_t *) + node->results[1])->bufPtr + + rf_RaidAddressToByte(raidPtr, + sector - fsuoff[1]); else { RF_Malloc(dest[1], bytesPerSector, (char *)); bzero(dest[1], bytesPerSector); @@ -802,15 +897,19 @@ rf_EvenOddDoubleRecoveryFunc(node) } RF_ASSERT(mallc_one == 0 || mallc_two == 0); } - pbuf = ppda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - psuoff); - ebuf = epda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - esuoff); + pbuf = ppda->bufPtr + rf_RaidAddressToByte(raidPtr, + sector - psuoff); + ebuf = epda->bufPtr + rf_RaidAddressToByte(raidPtr, + sector - esuoff); /* - * After finish finding all needed sectors, call doubleEOdecode function for decoding - * one sector to destination. - */ + * After finish finding all needed sectors, call doubleEOdecode + * function for decoding one sector to destination. + */ rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf); - /* free all allocated memory, and mark flag to indicate no - * memory is being allocated */ + /* + * Free all allocated memory, and mark flag to indicate no + * memory is being allocated. + */ if (mallc_one == 1) RF_Free(dest[0], bytesPerSector); if (mallc_two == 1) @@ -830,27 +929,29 @@ rf_EvenOddDoubleRecoveryFunc(node) } rf_GenericWakeupFunc(node, 0); #if 1 - return (0); /* XXX is this even close!!?!?!!? GO */ + return (0); /* XXX Is this even close !!?!?!!? GO */ #endif } -/* currently, only access of one of the two failed SU is allowed in this function. - * also, asmap->numStripeUnitsAccessed is limited to be one, the RaidFrame will break large access into - * many accesses of single stripe unit. +/* + * Currently, only access of one of the two failed SU is allowed in this + * function. Also, asmap->numStripeUnitsAccessed is limited to be one, + * the RAIDframe will break large access into many accesses of single + * stripe unit. */ -int -rf_EOWriteDoubleRecoveryFunc(node) - RF_DagNode_t *node; +int +rf_EOWriteDoubleRecoveryFunc(RF_DagNode_t *node) { - int np = node->numParams; - RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p; + int np = node->numParams; + RF_AccessStripeMap_t *asmap = + (RF_AccessStripeMap_t *) node->params[np - 1].p; RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p; - RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); + RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout); RF_SectorNum_t sector; RF_RowCol_t col, scol; - int prm, i, j; + int prm, i, j; RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; unsigned sosAddr; unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); @@ -858,57 +959,71 @@ rf_EOWriteDoubleRecoveryFunc(node) RF_SectorNum_t startSector, endSector; RF_PhysDiskAddr_t *ppda, *epda, *pda, *fpda, npda; RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol; - char **buf; /* buf[0], buf[1], buf[2], ...etc. point to + char **buf; /* + * buf[0], buf[1], buf[2], ... etc, point to * buffer storing data read from col0, col1, - * col2 */ - char *ebuf, *pbuf, *dest[2], *olddata[2]; + * col2. + */ + char *ebuf, *pbuf, *dest[2], *olddata[2]; RF_Etimer_t timer; RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; - RF_ASSERT(asmap->numDataFailed == 1); /* currently only support this + RF_ASSERT(asmap->numDataFailed == 1); /* + * Currently only support this * case, the other failed SU - * is not being accessed */ + * is not being accessed. + */ RF_ETIMER_START(timer); RF_Malloc(buf, numDataCol * sizeof(char *), (char **)); - ppda = node->results[0];/* Instead of being buffers, node->results[0] - * and [1] are Ppda and Epda */ + ppda = node->results[0]; /* + * Instead of being buffers, + * node->results[0] and [1] + * are Ppda and Epda. + */ epda = node->results[1]; fpda = asmap->failedPDAs[0]; - /* First, recovery the failed old SU using EvenOdd double decoding */ - /* determine the startSector and endSector for decoding */ + /* First, recovery the failed old SU using EvenOdd double decoding. */ + /* Determine the startSector and endSector for decoding. */ startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector); endSector = startSector + fpda->numSector; - /* Assign buf[col] pointers to point to each non-failed colume and + /* + * Assign buf[col] pointers to point to each non-failed column and * initialize the pbuf and ebuf to point at the beginning of each - * source buffers and destination buffers */ + * source buffers and destination buffers. */ for (prm = 0; prm < numDataCol - 2; prm++) { pda = (RF_PhysDiskAddr_t *) node->params[prm].p; col = rf_EUCol(layoutPtr, pda->raidAddress); buf[col] = pda->bufPtr; } - /* pbuf and ebuf: they will change values as double recovery decoding - * goes on */ + /* + * pbuf and ebuf: They will change values as double recovery decoding + * goes on. + */ pbuf = ppda->bufPtr; ebuf = epda->bufPtr; - /* find out the logical colume numbers in the encoding matrix of the - * two failed columes */ + /* + * Find out the logical column numbers in the encoding matrix of the + * two failed columns. + */ fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress); - /* find out the other failed colume not accessed this time */ - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); + /* Find out the other failed column not accessed this time. */ + sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, + asmap->raidAddress); for (i = 0; i < numDataCol; i++) { npda.raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0); - /* skip over dead disks */ + (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, + &(npda.row), &(npda.col), &(npda.startSector), 0); + /* Skip over dead disks. */ if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status)) if (i != fcol[0]) break; } RF_ASSERT(i < numDataCol); fcol[1] = i; - /* assign temporary space to put recovered failed SU */ + /* Assign temporary space to put recovered failed SU. */ numbytes = fpda->numSector * bytesPerSector; RF_Malloc(olddata[0], numbytes, (char *)); RF_Malloc(olddata[1], numbytes, (char *)); @@ -916,9 +1031,11 @@ rf_EOWriteDoubleRecoveryFunc(node) dest[1] = olddata[1]; bzero(olddata[0], numbytes); bzero(olddata[1], numbytes); - /* Begin the recovery decoding, initially buf[j], ebuf, pbuf, dest[j] + /* + * Begin the recovery decoding, initially buf[j], ebuf, pbuf, dest[j] * have already pointed at the beginning of each source buffers and - * destination buffers */ + * destination buffers. + */ for (sector = startSector, i = 0; sector < endSector; sector++, i++) { rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf); for (j = 0; j < numDataCol; j++) @@ -929,10 +1046,13 @@ rf_EOWriteDoubleRecoveryFunc(node) ebuf += bytesPerSector; pbuf += bytesPerSector; } - /* after recovery, the buffer pointed by olddata[0] is the old failed + /* + * After recovery, the buffer pointed by olddata[0] is the old failed * data. With new writing data and this old data, use small write to - * calculate the new redundant informations */ - /* node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of + * calculate the new redundant informations. + */ + /* + * node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of * Rrd; params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol * -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ; params[ * PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol @@ -942,20 +1062,27 @@ rf_EOWriteDoubleRecoveryFunc(node) * ie. PDAPerDisk = 1 then node->params[numDataCol] must be the new * data to be written to the failed disk. We first bxor the new data * into the old recovered data, then do the same things as small - * write. */ + * write. + */ - rf_bxor(((RF_PhysDiskAddr_t *) node->params[numDataCol].p)->bufPtr, olddata[0], numbytes, node->dagHdr->bp); - /* do new 'E' calculation */ - /* find out the corresponding colume in encoding matrix for write - * colume to be encoded into redundant disk 'E' */ + rf_bxor(((RF_PhysDiskAddr_t *) node->params[numDataCol].p)->bufPtr, + olddata[0], numbytes, node->dagHdr->bp); + /* Do new 'E' calculation. */ + /* + * Find out the corresponding column in encoding matrix for write + * column to be encoded into redundant disk 'E'. + */ scol = rf_EUCol(layoutPtr, fpda->raidAddress); - /* olddata[0] now is source buffer pointer; epda->bufPtr is the dest - * buffer pointer */ - rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2, epda->bufPtr, fpda->numSector); + /* + * olddata[0] now is source buffer pointer; epda->bufPtr is the dest + * buffer pointer. + */ + rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2, + epda->bufPtr, fpda->numSector); - /* do new 'P' calculation */ + /* Do new 'P' calculation. */ rf_bxor(olddata[0], ppda->bufPtr, numbytes, node->dagHdr->bp); - /* Free the allocated buffer */ + /* Free the allocated buffer. */ RF_Free(olddata[0], numbytes); RF_Free(olddata[1], numbytes); RF_Free(buf, numDataCol * sizeof(char *)); diff --git a/sys/dev/raidframe/rf_evenodd_dagfuncs.h b/sys/dev/raidframe/rf_evenodd_dagfuncs.h index c00a1d82530..294d1cfea6b 100644 --- a/sys/dev/raidframe/rf_evenodd_dagfuncs.h +++ b/sys/dev/raidframe/rf_evenodd_dagfuncs.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_evenodd_dagfuncs.h,v 1.2 1999/02/16 00:02:45 niklas Exp $ */ +/* $OpenBSD: rf_evenodd_dagfuncs.h,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_evenodd_dagfuncs.h,v 1.2 1999/02/05 00:06:11 oster Exp $ */ + /* * rf_evenodd_dagfuncs.h */ @@ -30,8 +31,8 @@ * rights to redistribute these changes. */ -#ifndef _RF__RF_EVENODD_DAGFUNCS_H_ -#define _RF__RF_EVENODD_DAGFUNCS_H_ +#ifndef _RF__RF_EVENODD_DAGFUNCS_H_ +#define _RF__RF_EVENODD_DAGFUNCS_H_ extern RF_RedFuncs_t rf_EOSmallWriteEFuncs; extern RF_RedFuncs_t rf_EOSmallWritePFuncs; @@ -39,42 +40,40 @@ extern RF_RedFuncs_t rf_eoERecoveryFuncs; extern RF_RedFuncs_t rf_eoPRecoveryFuncs; extern RF_RedFuncs_t rf_eoERecoveryFuncs; -int rf_RegularPEFunc(RF_DagNode_t * node); -int rf_RegularONEFunc(RF_DagNode_t * node); -int rf_SimpleONEFunc(RF_DagNode_t * node); -void rf_RegularESubroutine(RF_DagNode_t * node, char *ebuf); -int rf_RegularEFunc(RF_DagNode_t * node); -void rf_DegrESubroutine(RF_DagNode_t * node, char *ebuf); -int rf_Degraded_100_EOFunc(RF_DagNode_t * node); -void -rf_e_EncOneSect(RF_RowCol_t srcLogicCol, char *srcSecbuf, - RF_RowCol_t destLogicCol, char *destSecbuf, int bytesPerSector); -void -rf_e_encToBuf(RF_Raid_t * raidPtr, RF_RowCol_t srcLogicCol, - char *srcbuf, RF_RowCol_t destLogicCol, char *destbuf, int numSector); -int rf_RecoveryEFunc(RF_DagNode_t * node); -int rf_EO_DegradedWriteEFunc(RF_DagNode_t * node); -void -rf_doubleEOdecode(RF_Raid_t * raidPtr, char **rrdbuf, char **dest, - RF_RowCol_t * fcol, char *pbuf, char *ebuf); -int rf_EvenOddDoubleRecoveryFunc(RF_DagNode_t * node); -int rf_EOWriteDoubleRecoveryFunc(RF_DagNode_t * node); +int rf_RegularPEFunc(RF_DagNode_t *); +int rf_RegularONEFunc(RF_DagNode_t *); +int rf_SimpleONEFunc(RF_DagNode_t *); +void rf_RegularESubroutine(RF_DagNode_t *, char *); +int rf_RegularEFunc(RF_DagNode_t *); +void rf_DegrESubroutine(RF_DagNode_t *, char *); +int rf_Degraded_100_EOFunc(RF_DagNode_t *); +void rf_e_EncOneSect(RF_RowCol_t, char *, RF_RowCol_t, char *, int); +void rf_e_encToBuf(RF_Raid_t *, RF_RowCol_t, char *, RF_RowCol_t, char *, int); +int rf_RecoveryEFunc(RF_DagNode_t *); +int rf_EO_DegradedWriteEFunc(RF_DagNode_t *); +void rf_doubleEOdecode(RF_Raid_t *, char **, char **, RF_RowCol_t *, + char *, char *); +int rf_EvenOddDoubleRecoveryFunc(RF_DagNode_t *); +int rf_EOWriteDoubleRecoveryFunc(RF_DagNode_t *); -#define rf_EUCol(_layoutPtr_, _addr_ ) \ -( (_addr_)%( (_layoutPtr_)->dataSectorsPerStripe ) )/((_layoutPtr_)->sectorsPerStripeUnit) +#define rf_EUCol(_layoutPtr_,_addr_) \ + ((_addr_) % ((_layoutPtr_)->dataSectorsPerStripe)) / \ + ((_layoutPtr_)->sectorsPerStripeUnit) -#define rf_EO_Mod( _int1_, _int2_ ) \ -( ((_int1_) < 0)? (((_int1_)+(_int2_))%(_int2_)) : (_int1_)%(_int2_) ) +#define rf_EO_Mod(_int1_,_int2_) \ + (((_int1_) < 0) ? (((_int1_) + (_int2_)) % (_int2_)) \ + : ((_int1_) % (_int2_))) -#define rf_OffsetOfNextEUBoundary(_offset_, sec_per_eu) ((_offset_)/(sec_per_eu) + 1)*(sec_per_eu) +#define rf_OffsetOfNextEUBoundary(_offset_, sec_per_eu) \ + ((_offset_) / (sec_per_eu) + 1) * (sec_per_eu) -#define RF_EO_MATRIX_DIM 17 +#define RF_EO_MATRIX_DIM 17 /* * RF_EO_MATRIX_DIM should be a prime number: and "bytesPerSector" should be - * dividable by ( RF_EO_MATRIX_DIM - 1) to fully encode and utilize the space - * in a sector, this number could also be 17. Tha later case doesn't apply + * divisible by (RF_EO_MATRIX_DIM - 1) to fully encode and utilize the space + * in a sector, this number could also be 17. That later case doesn't apply * for disk array larger than 17 columns totally. */ -#endif /* !_RF__RF_EVENODD_DAGFUNCS_H_ */ +#endif /* !_RF__RF_EVENODD_DAGFUNCS_H_ */ diff --git a/sys/dev/raidframe/rf_evenodd_dags.c b/sys/dev/raidframe/rf_evenodd_dags.c index 512b73b257c..a5010f8af12 100644 --- a/sys/dev/raidframe/rf_evenodd_dags.c +++ b/sys/dev/raidframe/rf_evenodd_dags.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_evenodd_dags.c,v 1.2 1999/02/16 00:02:45 niklas Exp $ */ +/* $OpenBSD: rf_evenodd_dags.c,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_evenodd_dags.c,v 1.2 1999/02/05 00:06:11 oster Exp $ */ + /* * rf_evenodd_dags.c */ @@ -32,7 +33,7 @@ #include "rf_archs.h" -#if RF_INCLUDE_EVENODD > 0 +#if RF_INCLUDE_EVENODD > 0 #include "rf_types.h" #include "rf_raid.h" @@ -57,16 +58,20 @@ */ RF_CREATE_DAG_FUNC_DECL(rf_EO_100_CreateReadDAG) { - rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_eoPRecoveryFuncs); + rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, + &rf_eoPRecoveryFuncs); } + /* * Lost data + E. * Use P to reconstruct missing data. */ RF_CREATE_DAG_FUNC_DECL(rf_EO_101_CreateReadDAG) { - rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_eoPRecoveryFuncs); + rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, + &rf_eoPRecoveryFuncs); } + /* * Lost data + P. * Make E look like P, and use Eor for Xor, and we can @@ -75,12 +80,14 @@ RF_CREATE_DAG_FUNC_DECL(rf_EO_101_CreateReadDAG) RF_CREATE_DAG_FUNC_DECL(rf_EO_110_CreateReadDAG) { RF_PhysDiskAddr_t *temp; - /* swap P and E pointers to fake out the DegradedReadDAG code */ + /* Swap P and E pointers to fake out the DegradedReadDAG code. */ temp = asmap->parityInfo; asmap->parityInfo = asmap->qInfo; asmap->qInfo = temp; - rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_eoERecoveryFuncs); + rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, + &rf_eoERecoveryFuncs); } + /* * Lost two data. */ @@ -88,34 +95,45 @@ RF_CREATE_DAG_FUNC_DECL(rf_EOCreateDoubleDegradedReadDAG) { rf_EO_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList); } + /* * Lost two data. */ RF_CREATE_DAG_FUNC_DECL(rf_EO_200_CreateReadDAG) { - rf_EOCreateDoubleDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList); + rf_EOCreateDoubleDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, + allocList); } + RF_CREATE_DAG_FUNC_DECL(rf_EO_100_CreateWriteDAG) { if (asmap->numStripeUnitsAccessed != 1 && - asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) + asmap->failedPDAs[0]->numSector != + raidPtr->Layout.sectorsPerStripeUnit) RF_PANIC(); - rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2, (int (*) (RF_DagNode_t *)) rf_Degraded_100_EOFunc, RF_TRUE); + rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, + flags, allocList, 2, + (int (*) (RF_DagNode_t *)) rf_Degraded_100_EOFunc, RF_TRUE); } + /* * E is dead. Small write. */ RF_CREATE_DAG_FUNC_DECL(rf_EO_001_CreateSmallWriteDAG) { - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_EOSmallWritePFuncs, NULL); + rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, + allocList, &rf_EOSmallWritePFuncs, NULL); } + /* * E is dead. Large write. */ RF_CREATE_DAG_FUNC_DECL(rf_EO_001_CreateLargeWriteDAG) { - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularPFunc, RF_TRUE); + rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, + allocList, 1, rf_RegularPFunc, RF_TRUE); } + /* * P is dead. Small write. * Swap E + P, use single-degraded stuff. @@ -123,12 +141,14 @@ RF_CREATE_DAG_FUNC_DECL(rf_EO_001_CreateLargeWriteDAG) RF_CREATE_DAG_FUNC_DECL(rf_EO_010_CreateSmallWriteDAG) { RF_PhysDiskAddr_t *temp; - /* swap P and E pointers to fake out the DegradedReadDAG code */ + /* Swap P and E pointers to fake out the DegradedReadDAG code. */ temp = asmap->parityInfo; asmap->parityInfo = asmap->qInfo; asmap->qInfo = temp; - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_EOSmallWriteEFuncs, NULL); + rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, + allocList, &rf_EOSmallWriteEFuncs, NULL); } + /* * P is dead. Large write. * Swap E + P, use single-degraded stuff. @@ -136,54 +156,71 @@ RF_CREATE_DAG_FUNC_DECL(rf_EO_010_CreateSmallWriteDAG) RF_CREATE_DAG_FUNC_DECL(rf_EO_010_CreateLargeWriteDAG) { RF_PhysDiskAddr_t *temp; - /* swap P and E pointers to fake out the code */ + /* Swap P and E pointers to fake out the code. */ temp = asmap->parityInfo; asmap->parityInfo = asmap->qInfo; asmap->qInfo = temp; - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularEFunc, RF_FALSE); + rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, + allocList, 1, rf_RegularEFunc, RF_FALSE); } + RF_CREATE_DAG_FUNC_DECL(rf_EO_011_CreateWriteDAG) { - rf_CreateNonRedundantWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - RF_IO_TYPE_WRITE); + rf_CreateNonRedundantWriteDAG(raidPtr, asmap, dag_h, bp, flags, + allocList, RF_IO_TYPE_WRITE); } + RF_CREATE_DAG_FUNC_DECL(rf_EO_110_CreateWriteDAG) { RF_PhysDiskAddr_t *temp; if (asmap->numStripeUnitsAccessed != 1 && - asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) { + asmap->failedPDAs[0]->numSector != + raidPtr->Layout.sectorsPerStripeUnit) { RF_PANIC(); } - /* swap P and E to fake out parity code */ + /* Swap P and E to fake out parity code. */ temp = asmap->parityInfo; asmap->parityInfo = asmap->qInfo; asmap->qInfo = temp; - rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, (int (*) (RF_DagNode_t *)) rf_EO_DegradedWriteEFunc, RF_FALSE); - /* is the regular E func the right one to call? */ + rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, + flags, allocList, 1, + (int (*) (RF_DagNode_t *)) rf_EO_DegradedWriteEFunc, RF_FALSE); + /* Is the regular E func the right one to call ? */ } + RF_CREATE_DAG_FUNC_DECL(rf_EO_101_CreateWriteDAG) { if (asmap->numStripeUnitsAccessed != 1 && - asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) + asmap->failedPDAs[0]->numSector != + raidPtr->Layout.sectorsPerStripeUnit) RF_PANIC(); - rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RecoveryXorFunc, RF_TRUE); + rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, + flags, allocList, 1, rf_RecoveryXorFunc, RF_TRUE); } + RF_CREATE_DAG_FUNC_DECL(rf_EO_DoubleDegRead) { rf_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList, "Re", "EvenOddRecovery", rf_EvenOddDoubleRecoveryFunc); } + RF_CREATE_DAG_FUNC_DECL(rf_EOCreateSmallWriteDAG) { - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_EOSmallWriteEFuncs); + rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, + allocList, &rf_pFuncs, &rf_EOSmallWriteEFuncs); } + RF_CREATE_DAG_FUNC_DECL(rf_EOCreateLargeWriteDAG) { - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2, rf_RegularPEFunc, RF_FALSE); + rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, + allocList, 2, rf_RegularPEFunc, RF_FALSE); } + RF_CREATE_DAG_FUNC_DECL(rf_EO_200_CreateWriteDAG) { - rf_DoubleDegSmallWrite(raidPtr, asmap, dag_h, bp, flags, allocList, "Re", "We", "EOWrDDRecovery", rf_EOWriteDoubleRecoveryFunc); + rf_DoubleDegSmallWrite(raidPtr, asmap, dag_h, bp, flags, allocList, + "Re", "We", "EOWrDDRecovery", rf_EOWriteDoubleRecoveryFunc); } -#endif /* RF_INCLUDE_EVENODD > 0 */ + +#endif /* RF_INCLUDE_EVENODD > 0 */ diff --git a/sys/dev/raidframe/rf_evenodd_dags.h b/sys/dev/raidframe/rf_evenodd_dags.h index b2df9a3c7a4..874b377ae8d 100644 --- a/sys/dev/raidframe/rf_evenodd_dags.h +++ b/sys/dev/raidframe/rf_evenodd_dags.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_evenodd_dags.h,v 1.2 1999/02/16 00:02:45 niklas Exp $ */ +/* $OpenBSD: rf_evenodd_dags.h,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_evenodd_dags.h,v 1.2 1999/02/05 00:06:11 oster Exp $ */ + /* * rf_evenodd_dags.h */ @@ -30,16 +31,17 @@ * rights to redistribute these changes. */ -#ifndef _RF__RF_EVENODD_DAGS_H_ -#define _RF__RF_EVENODD_DAGS_H_ +#ifndef _RF__RF_EVENODD_DAGS_H_ +#define _RF__RF_EVENODD_DAGS_H_ #include "rf_types.h" -#if RF_UTILITY == 0 +#if RF_UTILITY == 0 #include "rf_dag.h" -/* extern decl's of the failure mode EO functions. - * swiped from rf_pqdeg.h +/* + * Extern decl's of the failure mode EO functions. + * Swiped from rf_pqdeg.h */ RF_CREATE_DAG_FUNC_DECL(rf_EO_100_CreateReadDAG); @@ -59,6 +61,6 @@ RF_CREATE_DAG_FUNC_DECL(rf_EO_DoubleDegRead); RF_CREATE_DAG_FUNC_DECL(rf_EOCreateSmallWriteDAG); RF_CREATE_DAG_FUNC_DECL(rf_EOCreateLargeWriteDAG); RF_CREATE_DAG_FUNC_DECL(rf_EO_200_CreateWriteDAG); -#endif /* RF_UTILITY == 0 */ +#endif /* RF_UTILITY == 0 */ -#endif /* !_RF__RF_EVENODD_DAGS_H_ */ +#endif /* !_RF__RF_EVENODD_DAGS_H_ */ diff --git a/sys/dev/raidframe/rf_fifo.c b/sys/dev/raidframe/rf_fifo.c index 4e37d7e36a0..9a391a8f259 100644 --- a/sys/dev/raidframe/rf_fifo.c +++ b/sys/dev/raidframe/rf_fifo.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_fifo.c,v 1.5 2000/08/08 16:07:41 peter Exp $ */ +/* $OpenBSD: rf_fifo.c,v 1.6 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_fifo.c,v 1.5 2000/03/04 03:27:13 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -48,30 +49,27 @@ #include "rf_raid.h" #include "rf_types.h" -/* just malloc a header, zero it (via calloc), and return it */ +/* Just malloc a header, zero it (via calloc), and return it. */ /*ARGSUSED*/ -void * -rf_FifoCreate(sectPerDisk, clList, listp) - RF_SectorCount_t sectPerDisk; - RF_AllocListElem_t *clList; - RF_ShutdownList_t **listp; +void * +rf_FifoCreate(RF_SectorCount_t sectPerDisk, RF_AllocListElem_t *clList, + RF_ShutdownList_t **listp) { RF_FifoHeader_t *q; - RF_CallocAndAdd(q, 1, sizeof(RF_FifoHeader_t), (RF_FifoHeader_t *), clList); + RF_CallocAndAdd(q, 1, sizeof(RF_FifoHeader_t), (RF_FifoHeader_t *), + clList); q->hq_count = q->lq_count = 0; return ((void *) q); } -void -rf_FifoEnqueue(q_in, elem, priority) - void *q_in; - RF_DiskQueueData_t *elem; - int priority; +void +rf_FifoEnqueue(void *q_in, RF_DiskQueueData_t *elem, int priority) { RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in; - RF_ASSERT(priority == RF_IO_NORMAL_PRIORITY || priority == RF_IO_LOW_PRIORITY); + RF_ASSERT(priority == RF_IO_NORMAL_PRIORITY || + priority == RF_IO_LOW_PRIORITY); elem->next = NULL; if (priority == RF_IO_NORMAL_PRIORITY) { @@ -87,8 +85,8 @@ rf_FifoEnqueue(q_in, elem, priority) } else { RF_ASSERT(elem->next == NULL); if (rf_fifoDebug) { - printf("raid%d: fifo: ENQ lopri\n", - elem->raidPtr->raidid); + printf("raid%d: fifo: ENQ lopri\n", + elem->raidPtr->raidid); } if (!q->lq_tail) { RF_ASSERT(q->lq_count == 0 && q->lq_head == NULL); @@ -101,7 +99,7 @@ rf_FifoEnqueue(q_in, elem, priority) q->lq_count++; } if ((q->hq_count + q->lq_count) != elem->queue->queueLength) { - printf("Queue lengths differ!: %d %d %d\n", + printf("Queue lengths differ ! : %d %d %d\n", q->hq_count, q->lq_count, (int) elem->queue->queueLength); printf("%d %d %d %d\n", (int) elem->queue->numOutstanding, @@ -113,8 +111,7 @@ rf_FifoEnqueue(q_in, elem, priority) } RF_DiskQueueData_t * -rf_FifoDequeue(q_in) - void *q_in; +rf_FifoDequeue(void *q_in) { RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in; RF_DiskQueueData_t *nd; @@ -138,17 +135,19 @@ rf_FifoDequeue(q_in) nd->next = NULL; q->lq_count--; if (rf_fifoDebug) { - printf("raid%d: fifo: DEQ lopri %lx\n", - nd->raidPtr->raidid, (long) nd); + printf("raid%d: fifo: DEQ lopri %lx\n", + nd->raidPtr->raidid, (long) nd); } } else { - RF_ASSERT(q->hq_count == 0 && q->lq_count == 0 && q->hq_tail == NULL && q->lq_tail == NULL); + RF_ASSERT(q->hq_count == 0 && q->lq_count == 0 && + q->hq_tail == NULL && q->lq_tail == NULL); nd = NULL; } return (nd); } -/* Return ptr to item at head of queue. Used to examine request +/* + * Return ptr to item at head of queue. Used to examine request * info without actually dequeueing the request. */ RF_DiskQueueData_t * @@ -165,43 +164,48 @@ rf_FifoPeek(void *q_in) headElement = q->lq_head; return (headElement); } -/* We sometimes need to promote a low priority access to a regular priority access. - * Currently, this is only used when the user wants to write a stripe which is currently - * under reconstruction. - * This routine will promote all accesses tagged with the indicated parityStripeID from - * the low priority queue to the end of the normal priority queue. + +/* + * We sometimes need to promote a low priority access to a regular priority + * access. Currently, this is only used when the user wants to write a stripe + * that is currently under reconstruction. + * This routine will promote all accesses tagged with the indicated + * parityStripeID from the low priority queue to the end of the normal + * priority queue. * We assume the queue is locked upon entry. */ -int -rf_FifoPromote(q_in, parityStripeID, which_ru) - void *q_in; - RF_StripeNum_t parityStripeID; - RF_ReconUnitNum_t which_ru; +int +rf_FifoPromote(void *q_in, RF_StripeNum_t parityStripeID, + RF_ReconUnitNum_t which_ru) { RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in; - RF_DiskQueueData_t *lp = q->lq_head, *pt = NULL; /* lp = lo-pri queue - * pointer, pt = trailer */ - int retval = 0; + /* lp = lo-pri queue pointer, pt = trailer */ + RF_DiskQueueData_t *lp = q->lq_head, *pt = NULL; + int retval = 0; while (lp) { - /* search for the indicated parity stripe in the low-pri queue */ - if (lp->parityStripeID == parityStripeID && lp->which_ru == which_ru) { + /* + * Search for the indicated parity stripe in the low-pri queue. + */ + if (lp->parityStripeID == parityStripeID && + lp->which_ru == which_ru) { /* printf("FifoPromote: promoting access for psid - * %ld\n",parityStripeID); */ + * %ld\n", parityStripeID); */ if (pt) - pt->next = lp->next; /* delete an entry other - * than the first */ + /* Delete an entry other than the first. */ + pt->next = lp->next; else - q->lq_head = lp->next; /* delete the head entry */ + /* Delete the head entry. */ + q->lq_head = lp->next; if (!q->lq_head) - q->lq_tail = NULL; /* we deleted the only - * entry */ + /* We deleted the only entry. */ + q->lq_tail = NULL; else if (lp == q->lq_tail) - q->lq_tail = pt; /* we deleted the tail - * entry */ + /* We deleted the tail entry. */ + q->lq_tail = pt; lp->next = NULL; q->lq_count--; @@ -210,17 +214,18 @@ rf_FifoPromote(q_in, parityStripeID, which_ru) q->hq_tail->next = lp; q->hq_tail = lp; } - /* append to hi-priority queue */ + /* Append to hi-priority queue. */ else { q->hq_head = q->hq_tail = lp; } q->hq_count++; + /* Deal with this later, if ever. */ /* UpdateShortestSeekFinishTimeForced(lp->requestPtr, - * lp->diskState); *//* deal with this later, if ever */ + * lp->diskState); */ - lp = (pt) ? pt->next : q->lq_head; /* reset low-pri pointer - * and continue */ + /* Reset low-pri pointer and continue. */ + lp = (pt) ? pt->next : q->lq_head; retval++; } else { @@ -229,8 +234,10 @@ rf_FifoPromote(q_in, parityStripeID, which_ru) } } - /* sanity check. delete this if you ever put more than one entry in - * the low-pri queue */ + /* + * Sanity check. Delete this if you ever put more than one entry in + * the low-pri queue. + */ RF_ASSERT(retval == 0 || retval == 1); return (retval); } diff --git a/sys/dev/raidframe/rf_fifo.h b/sys/dev/raidframe/rf_fifo.h index f23d7dcb573..74b4ddb76d5 100644 --- a/sys/dev/raidframe/rf_fifo.h +++ b/sys/dev/raidframe/rf_fifo.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_fifo.h,v 1.2 1999/02/16 00:02:46 niklas Exp $ */ +/* $OpenBSD: rf_fifo.h,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_fifo.h,v 1.3 1999/02/05 00:06:11 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -34,8 +35,8 @@ */ -#ifndef _RF__RF_FIFO_H_ -#define _RF__RF_FIFO_H_ +#ifndef _RF__RF_FIFO_H_ +#define _RF__RF_FIFO_H_ #include "rf_archs.h" #include "rf_types.h" @@ -44,19 +45,14 @@ typedef struct RF_FifoHeader_s { RF_DiskQueueData_t *hq_head, *hq_tail; /* high priority requests */ RF_DiskQueueData_t *lq_head, *lq_tail; /* low priority requests */ - int hq_count, lq_count; /* debug only */ -} RF_FifoHeader_t; - -extern void * -rf_FifoCreate(RF_SectorCount_t sectPerDisk, - RF_AllocListElem_t * clList, RF_ShutdownList_t ** listp); -extern void -rf_FifoEnqueue(void *q_in, RF_DiskQueueData_t * elem, - int priority); -extern RF_DiskQueueData_t *rf_FifoDequeue(void *q_in); -extern RF_DiskQueueData_t *rf_FifoPeek(void *q_in); -extern int -rf_FifoPromote(void *q_in, RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru); - -#endif /* !_RF__RF_FIFO_H_ */ + int hq_count, lq_count; /* debug only */ +} RF_FifoHeader_t; + +extern void *rf_FifoCreate(RF_SectorCount_t, RF_AllocListElem_t *, + RF_ShutdownList_t **); +extern void rf_FifoEnqueue(void *, RF_DiskQueueData_t *, int); +extern RF_DiskQueueData_t *rf_FifoDequeue(void *); +extern RF_DiskQueueData_t *rf_FifoPeek(void *); +extern int rf_FifoPromote(void *, RF_StripeNum_t, RF_ReconUnitNum_t); + +#endif /* !_RF__RF_FIFO_H_ */ diff --git a/sys/dev/raidframe/rf_freelist.h b/sys/dev/raidframe/rf_freelist.h index c2535109ec3..aba35f48874 100644 --- a/sys/dev/raidframe/rf_freelist.h +++ b/sys/dev/raidframe/rf_freelist.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_freelist.h,v 1.2 1999/02/16 00:02:47 niklas Exp $ */ +/* $OpenBSD: rf_freelist.h,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_freelist.h,v 1.3 1999/02/05 00:06:11 oster Exp $ */ + /* * rf_freelist.h */ @@ -31,7 +32,7 @@ */ /* - * rf_freelist.h -- code to manage counted freelists + * rf_freelist.h -- Code to manage counted freelists. * * Keep an arena of fixed-size objects. When a new object is needed, * allocate it as necessary. When an object is freed, either put it @@ -39,630 +40,677 @@ * size. */ -#ifndef _RF__RF_FREELIST_H_ -#define _RF__RF_FREELIST_H_ +#ifndef _RF__RF_FREELIST_H_ +#define _RF__RF_FREELIST_H_ #include "rf_types.h" #include "rf_debugMem.h" #include "rf_general.h" #include "rf_threadstuff.h" -#define RF_FREELIST_STATS 0 +#define RF_FREELIST_STATS 0 -#if RF_FREELIST_STATS > 0 +#if RF_FREELIST_STATS > 0 typedef struct RF_FreeListStats_s { - char *file; - int line; - int allocations; - int frees; - int max_free; - int grows; - int outstanding; - int max_outstanding; -} RF_FreeListStats_t; -#define RF_FREELIST_STAT_INIT(_fl_) { \ - bzero((char *)&((_fl_)->stats), sizeof(RF_FreeListStats_t)); \ - (_fl_)->stats.file = __FILE__; \ - (_fl_)->stats.line = __LINE__; \ -} - -#define RF_FREELIST_STAT_ALLOC(_fl_) { \ - (_fl_)->stats.allocations++; \ - (_fl_)->stats.outstanding++; \ - if ((_fl_)->stats.outstanding > (_fl_)->stats.max_outstanding) \ - (_fl_)->stats.max_outstanding = (_fl_)->stats.outstanding; \ -} - -#define RF_FREELIST_STAT_FREE_UPDATE(_fl_) { \ - if ((_fl_)->free_cnt > (_fl_)->stats.max_free) \ - (_fl_)->stats.max_free = (_fl_)->free_cnt; \ -} - -#define RF_FREELIST_STAT_FREE(_fl_) { \ - (_fl_)->stats.frees++; \ - (_fl_)->stats.outstanding--; \ - RF_FREELIST_STAT_FREE_UPDATE(_fl_); \ -} - -#define RF_FREELIST_STAT_GROW(_fl_) { \ - (_fl_)->stats.grows++; \ - RF_FREELIST_STAT_FREE_UPDATE(_fl_); \ -} - -#define RF_FREELIST_STAT_REPORT(_fl_) { \ - printf("Freelist at %s %d (%s)\n", (_fl_)->stats.file, (_fl_)->stats.line, RF_STRING(_fl_)); \ - printf(" %d allocations, %d frees\n", (_fl_)->stats.allocations, (_fl_)->stats.frees); \ - printf(" %d grows\n", (_fl_)->stats.grows); \ - printf(" %d outstanding\n", (_fl_)->stats.outstanding); \ - printf(" %d free (max)\n", (_fl_)->stats.max_free); \ - printf(" %d outstanding (max)\n", (_fl_)->stats.max_outstanding); \ -} - -#else /* RF_FREELIST_STATS > 0 */ - -#define RF_FREELIST_STAT_INIT(_fl_) -#define RF_FREELIST_STAT_ALLOC(_fl_) -#define RF_FREELIST_STAT_FREE_UPDATE(_fl_) -#define RF_FREELIST_STAT_FREE(_fl_) -#define RF_FREELIST_STAT_GROW(_fl_) -#define RF_FREELIST_STAT_REPORT(_fl_) - -#endif /* RF_FREELIST_STATS > 0 */ + char *file; + int line; + int allocations; + int frees; + int max_free; + int grows; + int outstanding; + int max_outstanding; +} RF_FreeListStats_t; + +#define RF_FREELIST_STAT_INIT(_fl_) \ +do { \ + bzero((char *)&((_fl_)->stats), sizeof(RF_FreeListStats_t)); \ + (_fl_)->stats.file = __FILE__; \ + (_fl_)->stats.line = __LINE__; \ +} while (0) + +#define RF_FREELIST_STAT_ALLOC(_fl_) \ +do { \ + (_fl_)->stats.allocations++; \ + (_fl_)->stats.outstanding++; \ + if ((_fl_)->stats.outstanding > (_fl_)->stats.max_outstanding) \ + (_fl_)->stats.max_outstanding = \ + (_fl_)->stats.outstanding; \ +} while (0) + +#define RF_FREELIST_STAT_FREE_UPDATE(_fl_) \ +do { \ + if ((_fl_)->free_cnt > (_fl_)->stats.max_free) \ + (_fl_)->stats.max_free = (_fl_)->free_cnt; \ +} while (0) + +#define RF_FREELIST_STAT_FREE(_fl_) \ +do { \ + (_fl_)->stats.frees++; \ + (_fl_)->stats.outstanding--; \ + RF_FREELIST_STAT_FREE_UPDATE(_fl_); \ +} while (0) + +#define RF_FREELIST_STAT_GROW(_fl_) \ +do { \ + (_fl_)->stats.grows++; \ + RF_FREELIST_STAT_FREE_UPDATE(_fl_); \ +} while (0) + +#define RF_FREELIST_STAT_REPORT(_fl_) \ +do { \ + printf("Freelist at %s %d (%s)\n", (_fl_)->stats.file, \ + (_fl_)->stats.line, RF_STRING(_fl_)); \ + printf(" %d allocations, %d frees\n", \ + (_fl_)->stats.allocations, (_fl_)->stats.frees); \ + printf(" %d grows\n", (_fl_)->stats.grows); \ + printf(" %d outstanding\n", (_fl_)->stats.outstanding); \ + printf(" %d free (max)\n", (_fl_)->stats.max_free); \ + printf(" %d outstanding (max)\n", \ + (_fl_)->stats.max_outstanding); \ +} while (0) + +#else /* RF_FREELIST_STATS > 0 */ + +#define RF_FREELIST_STAT_INIT(_fl_) +#define RF_FREELIST_STAT_ALLOC(_fl_) +#define RF_FREELIST_STAT_FREE_UPDATE(_fl_) +#define RF_FREELIST_STAT_FREE(_fl_) +#define RF_FREELIST_STAT_GROW(_fl_) +#define RF_FREELIST_STAT_REPORT(_fl_) + +#endif /* RF_FREELIST_STATS > 0 */ struct RF_FreeList_s { - void *objlist; /* list of free obj */ - int free_cnt; /* how many free obj */ - int max_free_cnt; /* max free arena size */ - int obj_inc; /* how many to allocate at a time */ - int obj_size; /* size of objects */ - RF_DECLARE_MUTEX(lock) -#if RF_FREELIST_STATS > 0 - RF_FreeListStats_t stats; /* statistics */ -#endif /* RF_FREELIST_STATS > 0 */ + void *objlist; /* List of free obj. */ + int free_cnt; /* How many free obj. */ + int max_free_cnt; /* Max free arena size. */ + int obj_inc; /* How many to allocate at a time. */ + int obj_size; /* Size of objects. */ + RF_DECLARE_MUTEX(lock); +#if RF_FREELIST_STATS > 0 + RF_FreeListStats_t stats; /* Statistics. */ +#endif /* RF_FREELIST_STATS > 0 */ }; + /* - * fl = freelist - * maxcnt = max number of items in arena - * inc = how many to allocate at a time - * size = size of object + * fl = FreeList. + * maxcnt = Max number of items in arena. + * inc = How many to allocate at a time. + * size = Size of object. */ -#define RF_FREELIST_CREATE(_fl_,_maxcnt_,_inc_,_size_) { \ - int rc; \ - RF_ASSERT((_inc_) > 0); \ - RF_Malloc(_fl_, sizeof(RF_FreeList_t), (RF_FreeList_t *)); \ - (_fl_)->objlist = NULL; \ - (_fl_)->free_cnt = 0; \ - (_fl_)->max_free_cnt = _maxcnt_; \ - (_fl_)->obj_inc = _inc_; \ - (_fl_)->obj_size = _size_; \ - rc = rf_mutex_init(&(_fl_)->lock); \ - if (rc) { \ - RF_Free(_fl_, sizeof(RF_FreeList_t)); \ - _fl_ = NULL; \ - } \ - RF_FREELIST_STAT_INIT(_fl_); \ -} +#define RF_FREELIST_CREATE(_fl_,_maxcnt_,_inc_,_size_) \ +do { \ + int rc; \ + RF_ASSERT((_inc_) > 0); \ + RF_Malloc(_fl_, sizeof(RF_FreeList_t), (RF_FreeList_t *)); \ + (_fl_)->objlist = NULL; \ + (_fl_)->free_cnt = 0; \ + (_fl_)->max_free_cnt = _maxcnt_; \ + (_fl_)->obj_inc = _inc_; \ + (_fl_)->obj_size = _size_; \ + rc = rf_mutex_init(&(_fl_)->lock); \ + if (rc) { \ + RF_Free(_fl_, sizeof(RF_FreeList_t)); \ + _fl_ = NULL; \ + } \ + RF_FREELIST_STAT_INIT(_fl_); \ +} while (0) /* - * fl = freelist - * cnt = number to prime with - * nextp = name of "next" pointer in obj - * cast = object cast + * fl = FreeList. + * cnt = Number to prime with. + * nextp = Name of "next" pointer in obj. + * cast = Object cast. */ -#define RF_FREELIST_PRIME(_fl_,_cnt_,_nextp_,_cast_) { \ - void *_p; \ - int _i; \ - RF_LOCK_MUTEX((_fl_)->lock); \ - for(_i=0;_i<(_cnt_);_i++) { \ - RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \ - if (_p) { \ - (_cast_(_p))->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = _p; \ - (_fl_)->free_cnt++; \ - } \ - else { \ - break; \ - } \ - } \ - RF_FREELIST_STAT_FREE_UPDATE(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} - -#define RF_FREELIST_MUTEX_OF(_fl_) ((_fl_)->lock) - -#define RF_FREELIST_DO_UNLOCK(_fl_) { \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} - -#define RF_FREELIST_DO_LOCK(_fl_) { \ - RF_LOCK_MUTEX((_fl_)->lock); \ -} +#define RF_FREELIST_PRIME(_fl_,_cnt_,_nextp_,_cast_) \ +do { \ + void *_p; \ + int _i; \ + RF_LOCK_MUTEX((_fl_)->lock); \ + for (_i = 0; _i < (_cnt_); _i++) { \ + RF_Calloc(_p, 1, (_fl_)->obj_size, (void *)); \ + if (_p) { \ + (_cast_(_p))->_nextp_ = (_fl_)->objlist; \ + (_fl_)->objlist = _p; \ + (_fl_)->free_cnt++; \ + } \ + else { \ + break; \ + } \ + } \ + RF_FREELIST_STAT_FREE_UPDATE(_fl_); \ + RF_UNLOCK_MUTEX((_fl_)->lock); \ +} while (0) + +#define RF_FREELIST_MUTEX_OF(_fl_) ((_fl_)->lock) + +#define RF_FREELIST_DO_UNLOCK(_fl_) RF_UNLOCK_MUTEX((_fl_)->lock) + +#define RF_FREELIST_DO_LOCK(_fl_) RF_LOCK_MUTEX((_fl_)->lock) /* - * fl = freelist - * cnt = number to prime with - * nextp = name of "next" pointer in obj - * cast = object cast - * init = func to call to init obj + * fl = FreeList. + * cnt = Number to prime with. + * nextp = Name of "next" pointer in obj. + * cast = Object cast. + * init = Func to call to init obj. */ -#define RF_FREELIST_PRIME_INIT(_fl_,_cnt_,_nextp_,_cast_,_init_) { \ - void *_p; \ - int _i; \ - RF_LOCK_MUTEX((_fl_)->lock); \ - for(_i=0;_i<(_cnt_);_i++) { \ - RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \ - if (_init_ (_cast_ _p)) { \ - RF_Free(_p,(_fl_)->obj_size); \ - _p = NULL; \ - } \ - if (_p) { \ - (_cast_(_p))->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = _p; \ - (_fl_)->free_cnt++; \ - } \ - else { \ - break; \ - } \ - } \ - RF_FREELIST_STAT_FREE_UPDATE(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} +#define RF_FREELIST_PRIME_INIT(_fl_,_cnt_,_nextp_,_cast_,_init_) \ +do { \ + void *_p; \ + int _i; \ + RF_LOCK_MUTEX((_fl_)->lock); \ + for (_i = 0; _i < (_cnt_); _i++) { \ + RF_Calloc(_p, 1, (_fl_)->obj_size, (void *)); \ + if (_init_(_cast_ _p)) { \ + RF_Free(_p, (_fl_)->obj_size); \ + _p = NULL; \ + } \ + if (_p) { \ + (_cast_(_p))->_nextp_ = (_fl_)->objlist; \ + (_fl_)->objlist = _p; \ + (_fl_)->free_cnt++; \ + } \ + else { \ + break; \ + } \ + } \ + RF_FREELIST_STAT_FREE_UPDATE(_fl_); \ + RF_UNLOCK_MUTEX((_fl_)->lock); \ +} while (0) /* - * fl = freelist - * cnt = number to prime with - * nextp = name of "next" pointer in obj - * cast = object cast - * init = func to call to init obj - * arg = arg to init obj func + * fl = FreeList. + * cnt = Number to prime with. + * nextp = Name of "next" pointer in obj. + * cast = Object cast. + * init = Func to call to init obj. + * arg = Arg to init obj func. */ -#define RF_FREELIST_PRIME_INIT_ARG(_fl_,_cnt_,_nextp_,_cast_,_init_,_arg_) { \ - void *_p; \ - int _i; \ - RF_LOCK_MUTEX((_fl_)->lock); \ - for(_i=0;_i<(_cnt_);_i++) { \ - RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \ - if (_init_ (_cast_ _p,_arg_)) { \ - RF_Free(_p,(_fl_)->obj_size); \ - _p = NULL; \ - } \ - if (_p) { \ - (_cast_(_p))->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = _p; \ - (_fl_)->free_cnt++; \ - } \ - else { \ - break; \ - } \ - } \ - RF_FREELIST_STAT_FREE_UPDATE(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} +#define RF_FREELIST_PRIME_INIT_ARG(_fl_,_cnt_,_nextp_,_cast_,_init_,_arg_) \ +do { \ + void *_p; \ + int _i; \ + RF_LOCK_MUTEX((_fl_)->lock); \ + for (_i = 0; _i < (_cnt_); _i++) { \ + RF_Calloc(_p, 1, (_fl_)->obj_size, (void *)); \ + if (_init_(_cast_ _p, _arg_)) { \ + RF_Free(_p, (_fl_)->obj_size); \ + _p = NULL; \ + } \ + if (_p) { \ + (_cast_(_p))->_nextp_ = (_fl_)->objlist; \ + (_fl_)->objlist = _p; \ + (_fl_)->free_cnt++; \ + } \ + else { \ + break; \ + } \ + } \ + RF_FREELIST_STAT_FREE_UPDATE(_fl_); \ + RF_UNLOCK_MUTEX((_fl_)->lock); \ +} while (0) /* - * fl = freelist - * obj = object to allocate - * nextp = name of "next" pointer in obj - * cast = cast of obj assignment - * init = init obj func + * fl = FreeList. + * obj = Object to allocate. + * nextp = Name of "next" pointer in obj. + * cast = Cast of obj assignment. + * init = Init obj func. */ -#define RF_FREELIST_GET_INIT(_fl_,_obj_,_nextp_,_cast_,_init_) { \ - void *_p; \ - int _i; \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \ - if (_fl_->objlist) { \ - _obj_ = _cast_((_fl_)->objlist); \ - (_fl_)->objlist = (void *)((_obj_)->_nextp_); \ - (_fl_)->free_cnt--; \ - } \ - else { \ - /* \ - * Allocate one at a time so we can free \ - * one at a time without cleverness when arena \ - * is full. \ - */ \ - RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \ - if (_obj_) { \ - if (_init_ (_obj_)) { \ - RF_Free(_obj_,(_fl_)->obj_size); \ - _obj_ = NULL; \ - } \ - else { \ - for(_i=1;_i<(_fl_)->obj_inc;_i++) { \ - RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \ - if (_p) { \ - if (_init_ (_p)) { \ - RF_Free(_p,(_fl_)->obj_size); \ - _p = NULL; \ - break; \ - } \ - (_cast_(_p))->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = _p; \ - } \ - else { \ - break; \ - } \ - } \ - } \ - } \ - RF_FREELIST_STAT_GROW(_fl_); \ - } \ - RF_FREELIST_STAT_ALLOC(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} +#define RF_FREELIST_GET_INIT(_fl_,_obj_,_nextp_,_cast_,_init_) \ +do { \ + void *_p; \ + int _i; \ + RF_LOCK_MUTEX((_fl_)->lock); \ + RF_ASSERT(sizeof(*(_obj_)) == ((_fl_)->obj_size)); \ + if (_fl_->objlist) { \ + _obj_ = _cast_((_fl_)->objlist); \ + (_fl_)->objlist = (void *)((_obj_)->_nextp_); \ + (_fl_)->free_cnt--; \ + } \ + else { \ + /* \ + * Allocate one at a time so we can free \ + * one at a time without cleverness when arena \ + * is full. \ + */ \ + RF_Calloc(_obj_, 1, (_fl_)->obj_size, _cast_); \ + if (_obj_) { \ + if (_init_(_obj_)) { \ + RF_Free(_obj_, (_fl_)->obj_size); \ + _obj_ = NULL; \ + } \ + else { \ + for (_i = 1; _i < (_fl_)->obj_inc; \ + _i++) { \ + RF_Calloc(_p, 1, \ + (_fl_)->obj_size, \ + (void *)); \ + if (_p) { \ + if (_init_(_p)) { \ + RF_Free(_p, \ + (_fl_)->obj_size); \ + _p = NULL; \ + break; \ + } \ + (_cast_(_p))->_nextp_ = \ + (_fl_)->objlist; \ + (_fl_)->objlist = _p; \ + } \ + else { \ + break; \ + } \ + } \ + } \ + } \ + RF_FREELIST_STAT_GROW(_fl_); \ + } \ + RF_FREELIST_STAT_ALLOC(_fl_); \ + RF_UNLOCK_MUTEX((_fl_)->lock); \ +} while (0) /* - * fl = freelist - * obj = object to allocate - * nextp = name of "next" pointer in obj - * cast = cast of obj assignment - * init = init obj func - * arg = arg to init obj func + * fl = FreeList. + * obj = Object to allocate. + * nextp = Name of "next" pointer in obj. + * cast = Cast of obj assignment. + * init = Init obj func. + * arg = Arg to init obj func. */ -#define RF_FREELIST_GET_INIT_ARG(_fl_,_obj_,_nextp_,_cast_,_init_,_arg_) { \ - void *_p; \ - int _i; \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \ - if (_fl_->objlist) { \ - _obj_ = _cast_((_fl_)->objlist); \ - (_fl_)->objlist = (void *)((_obj_)->_nextp_); \ - (_fl_)->free_cnt--; \ - } \ - else { \ - /* \ - * Allocate one at a time so we can free \ - * one at a time without cleverness when arena \ - * is full. \ - */ \ - RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \ - if (_obj_) { \ - if (_init_ (_obj_,_arg_)) { \ - RF_Free(_obj_,(_fl_)->obj_size); \ - _obj_ = NULL; \ - } \ - else { \ - for(_i=1;_i<(_fl_)->obj_inc;_i++) { \ - RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \ - if (_p) { \ - if (_init_ (_p,_arg_)) { \ - RF_Free(_p,(_fl_)->obj_size); \ - _p = NULL; \ - break; \ - } \ - (_cast_(_p))->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = _p; \ - } \ - else { \ - break; \ - } \ - } \ - } \ - } \ - RF_FREELIST_STAT_GROW(_fl_); \ - } \ - RF_FREELIST_STAT_ALLOC(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} +#define RF_FREELIST_GET_INIT_ARG(_fl_,_obj_,_nextp_,_cast_,_init_,_arg_) \ +do { \ + void *_p; \ + int _i; \ + RF_LOCK_MUTEX((_fl_)->lock); \ + RF_ASSERT(sizeof(*(_obj_)) == ((_fl_)->obj_size)); \ + if (_fl_->objlist) { \ + _obj_ = _cast_((_fl_)->objlist); \ + (_fl_)->objlist = (void *)((_obj_)->_nextp_); \ + (_fl_)->free_cnt--; \ + } \ + else { \ + /* \ + * Allocate one at a time so we can free \ + * one at a time without cleverness when arena \ + * is full. \ + */ \ + RF_Calloc(_obj_, 1, (_fl_)->obj_size, _cast_); \ + if (_obj_) { \ + if (_init_(_obj_, _arg_)) { \ + RF_Free(_obj_, (_fl_)->obj_size); \ + _obj_ = NULL; \ + } \ + else { \ + for (_i = 1; _i < (_fl_)->obj_inc; \ + _i++) { \ + RF_Calloc(_p, 1, \ + (_fl_)->obj_size, (void *)); \ + if (_p) { \ + if (_init_(_p, _arg_)) \ + { \ + RF_Free(_p, \ + (_fl_)->obj_size); \ + _p = NULL; \ + break; \ + } \ + (_cast_(_p))->_nextp_ = \ + (_fl_)->objlist; \ + (_fl_)->objlist = _p; \ + } \ + else { \ + break; \ + } \ + } \ + } \ + } \ + RF_FREELIST_STAT_GROW(_fl_); \ + } \ + RF_FREELIST_STAT_ALLOC(_fl_); \ + RF_UNLOCK_MUTEX((_fl_)->lock); \ +} while (0) /* - * fl = freelist - * obj = object to allocate - * nextp = name of "next" pointer in obj - * cast = cast of obj assignment - * init = init obj func + * fl = FreeList. + * obj = Object to allocate. + * nextp = Name of "next" pointer in obj. + * cast = Cast of obj assignment. + * init = Init obj func. */ -#define RF_FREELIST_GET_INIT_NOUNLOCK(_fl_,_obj_,_nextp_,_cast_,_init_) { \ - void *_p; \ - int _i; \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \ - if (_fl_->objlist) { \ - _obj_ = _cast_((_fl_)->objlist); \ - (_fl_)->objlist = (void *)((_obj_)->_nextp_); \ - (_fl_)->free_cnt--; \ - } \ - else { \ - /* \ - * Allocate one at a time so we can free \ - * one at a time without cleverness when arena \ - * is full. \ - */ \ - RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \ - if (_obj_) { \ - if (_init_ (_obj_)) { \ - RF_Free(_obj_,(_fl_)->obj_size); \ - _obj_ = NULL; \ - } \ - else { \ - for(_i=1;_i<(_fl_)->obj_inc;_i++) { \ - RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \ - if (_p) { \ - if (_init_ (_p)) { \ - RF_Free(_p,(_fl_)->obj_size); \ - _p = NULL; \ - break; \ - } \ - (_cast_(_p))->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = _p; \ - } \ - else { \ - break; \ - } \ - } \ - } \ - } \ - RF_FREELIST_STAT_GROW(_fl_); \ - } \ - RF_FREELIST_STAT_ALLOC(_fl_); \ -} +#define RF_FREELIST_GET_INIT_NOUNLOCK(_fl_,_obj_,_nextp_,_cast_,_init_) \ +do { \ + void *_p; \ + int _i; \ + RF_LOCK_MUTEX((_fl_)->lock); \ + RF_ASSERT(sizeof(*(_obj_)) == ((_fl_)->obj_size)); \ + if (_fl_->objlist) { \ + _obj_ = _cast_((_fl_)->objlist); \ + (_fl_)->objlist = (void *)((_obj_)->_nextp_); \ + (_fl_)->free_cnt--; \ + } \ + else { \ + /* \ + * Allocate one at a time so we can free \ + * one at a time without cleverness when arena \ + * is full. \ + */ \ + RF_Calloc(_obj_, 1, (_fl_)->obj_size, _cast_); \ + if (_obj_) { \ + if (_init_(_obj_)) { \ + RF_Free(_obj_, (_fl_)->obj_size); \ + _obj_ = NULL; \ + } \ + else { \ + for (_i = 1; _i < (_fl_)->obj_inc; \ + _i++) { \ + RF_Calloc(_p, 1, \ + (_fl_)->obj_size, \ + (void *)); \ + if (_p) { \ + if (_init_(_p)) { \ + RF_Free(_p, \ + (_fl_)->obj_size); \ + _p = NULL; \ + break; \ + } \ + (_cast_(_p))->_nextp_ = \ + (_fl_)->objlist; \ + (_fl_)->objlist = _p; \ + } \ + else { \ + break; \ + } \ + } \ + } \ + } \ + RF_FREELIST_STAT_GROW(_fl_); \ + } \ + RF_FREELIST_STAT_ALLOC(_fl_); \ +} while (0) /* - * fl = freelist - * obj = object to allocate - * nextp = name of "next" pointer in obj - * cast = cast of obj assignment + * fl = FreeList. + * obj = Object to allocate. + * nextp = Name of "next" pointer in obj. + * cast = Cast of obj assignment. */ -#define RF_FREELIST_GET(_fl_,_obj_,_nextp_,_cast_) { \ - void *_p; \ - int _i; \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \ - if (_fl_->objlist) { \ - _obj_ = _cast_((_fl_)->objlist); \ - (_fl_)->objlist = (void *)((_obj_)->_nextp_); \ - (_fl_)->free_cnt--; \ - } \ - else { \ - /* \ - * Allocate one at a time so we can free \ - * one at a time without cleverness when arena \ - * is full. \ - */ \ - RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \ - if (_obj_) { \ - for(_i=1;_i<(_fl_)->obj_inc;_i++) { \ - RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \ - if (_p) { \ - (_cast_(_p))->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = _p; \ - } \ - else { \ - break; \ - } \ - } \ - } \ - RF_FREELIST_STAT_GROW(_fl_); \ - } \ - RF_FREELIST_STAT_ALLOC(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} +#define RF_FREELIST_GET(_fl_,_obj_,_nextp_,_cast_) \ +do { \ + void *_p; \ + int _i; \ + RF_LOCK_MUTEX((_fl_)->lock); \ + RF_ASSERT(sizeof(*(_obj_)) == ((_fl_)->obj_size)); \ + if (_fl_->objlist) { \ + _obj_ = _cast_((_fl_)->objlist); \ + (_fl_)->objlist = (void *)((_obj_)->_nextp_); \ + (_fl_)->free_cnt--; \ + } \ + else { \ + /* \ + * Allocate one at a time so we can free \ + * one at a time without cleverness when arena \ + * is full. \ + */ \ + RF_Calloc(_obj_, 1, (_fl_)->obj_size, _cast_); \ + if (_obj_) { \ + for (_i = 1; _i < (_fl_)->obj_inc; _i++) { \ + RF_Calloc(_p, 1, (_fl_)->obj_size, \ + (void *)); \ + if (_p) { \ + (_cast_(_p))->_nextp_ = \ + (_fl_)->objlist; \ + (_fl_)->objlist = _p; \ + } \ + else { \ + break; \ + } \ + } \ + } \ + RF_FREELIST_STAT_GROW(_fl_); \ + } \ + RF_FREELIST_STAT_ALLOC(_fl_); \ + RF_UNLOCK_MUTEX((_fl_)->lock); \ +} while (0) /* - * fl = freelist - * obj = object to allocate - * nextp = name of "next" pointer in obj - * cast = cast of obj assignment - * num = num objs to return + * fl = FreeList. + * obj = Object to allocate. + * nextp = Name of "next" pointer in obj. + * cast = Cast of obj assignment. + * num = Num objs to return. */ -#define RF_FREELIST_GET_N(_fl_,_obj_,_nextp_,_cast_,_num_) { \ - void *_p, *_l, *_f; \ - int _i, _n; \ - _l = _f = NULL; \ - _n = 0; \ - RF_LOCK_MUTEX((_fl_)->lock); \ - RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \ - for(_n=0;_n<_num_;_n++) { \ - if (_fl_->objlist) { \ - _obj_ = _cast_((_fl_)->objlist); \ - (_fl_)->objlist = (void *)((_obj_)->_nextp_); \ - (_fl_)->free_cnt--; \ - } \ - else { \ - /* \ - * Allocate one at a time so we can free \ - * one at a time without cleverness when arena \ - * is full. \ - */ \ - RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \ - if (_obj_) { \ - for(_i=1;_i<(_fl_)->obj_inc;_i++) { \ - RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \ - if (_p) { \ - (_cast_(_p))->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = _p; \ - } \ - else { \ - break; \ - } \ - } \ - } \ - RF_FREELIST_STAT_GROW(_fl_); \ - } \ - if (_f == NULL) \ - _f = _obj_; \ - if (_obj_) { \ - (_cast_(_obj_))->_nextp_ = _l; \ - _l = _obj_; \ - RF_FREELIST_STAT_ALLOC(_fl_); \ - } \ - else { \ - (_cast_(_f))->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = _l; \ - _n = _num_; \ - } \ - } \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} +#define RF_FREELIST_GET_N(_fl_,_obj_,_nextp_,_cast_,_num_) \ +do { \ + void *_p, *_l, *_f; \ + int _i, _n; \ + _l = _f = NULL; \ + _n = 0; \ + RF_LOCK_MUTEX((_fl_)->lock); \ + RF_ASSERT(sizeof(*(_obj_)) == ((_fl_)->obj_size)); \ + for (_n = 0; _n < _num_; _n++) { \ + if (_fl_->objlist) { \ + _obj_ = _cast_((_fl_)->objlist); \ + (_fl_)->objlist = (void *)((_obj_)->_nextp_); \ + (_fl_)->free_cnt--; \ + } \ + else { \ + /* \ + * Allocate one at a time so we can free \ + * one at a time without cleverness when arena \ + * is full. \ + */ \ + RF_Calloc(_obj_, 1, (_fl_)->obj_size, _cast_); \ + if (_obj_) { \ + for (_i = 1; _i < (_fl_)->obj_inc; \ + _i++) { \ + RF_Calloc(_p, 1, \ + (_fl_)->obj_size, \ + (void *)); \ + if (_p) { \ + (_cast_(_p))->_nextp_ = \ + (_fl_)->objlist; \ + (_fl_)->objlist = _p; \ + } \ + else { \ + break; \ + } \ + } \ + } \ + RF_FREELIST_STAT_GROW(_fl_); \ + } \ + if (_f == NULL) \ + _f = _obj_; \ + if (_obj_) { \ + (_cast_(_obj_))->_nextp_ = _l; \ + _l = _obj_; \ + RF_FREELIST_STAT_ALLOC(_fl_); \ + } \ + else { \ + (_cast_(_f))->_nextp_ = (_fl_)->objlist; \ + (_fl_)->objlist = _l; \ + _n = _num_; \ + } \ + } \ + RF_UNLOCK_MUTEX((_fl_)->lock); \ +} while (0) /* - * fl = freelist - * obj = object to free - * nextp = name of "next" pointer in obj + * fl = FreeList. + * obj = Object to free. + * nextp = Name of "next" pointer in obj. */ -#define RF_FREELIST_FREE(_fl_,_obj_,_nextp_) { \ - RF_LOCK_MUTEX((_fl_)->lock); \ - if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \ - RF_Free(_obj_,(_fl_)->obj_size); \ - } \ - else { \ - RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \ - (_obj_)->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = (void *)(_obj_); \ - (_fl_)->free_cnt++; \ - } \ - RF_FREELIST_STAT_FREE(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} +#define RF_FREELIST_FREE(_fl_,_obj_,_nextp_) \ +do { \ + RF_LOCK_MUTEX((_fl_)->lock); \ + if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \ + RF_Free(_obj_, (_fl_)->obj_size); \ + } \ + else { \ + RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \ + (_obj_)->_nextp_ = (_fl_)->objlist; \ + (_fl_)->objlist = (void *)(_obj_); \ + (_fl_)->free_cnt++; \ + } \ + RF_FREELIST_STAT_FREE(_fl_); \ + RF_UNLOCK_MUTEX((_fl_)->lock); \ +} while (0) /* - * fl = freelist - * obj = object to free - * nextp = name of "next" pointer in obj - * num = num to free (debugging) + * fl = FreeList. + * obj = Object to free. + * nextp = Name of "next" pointer in obj. + * num = Num to free (debugging). */ -#define RF_FREELIST_FREE_N(_fl_,_obj_,_nextp_,_cast_,_num_) { \ - void *_no; \ - int _n; \ - _n = 0; \ - RF_LOCK_MUTEX((_fl_)->lock); \ - while(_obj_) { \ - _no = (_cast_(_obj_))->_nextp_; \ - if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \ - RF_Free(_obj_,(_fl_)->obj_size); \ - } \ - else { \ - RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \ - (_obj_)->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = (void *)(_obj_); \ - (_fl_)->free_cnt++; \ - } \ - _n++; \ - _obj_ = _no; \ - RF_FREELIST_STAT_FREE(_fl_); \ - } \ - RF_ASSERT(_n==(_num_)); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} +#define RF_FREELIST_FREE_N(_fl_,_obj_,_nextp_,_cast_,_num_) \ +do { \ + void *_no; \ + int _n; \ + _n = 0; \ + RF_LOCK_MUTEX((_fl_)->lock); \ + while(_obj_) { \ + _no = (_cast_(_obj_))->_nextp_; \ + if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \ + RF_Free(_obj_, (_fl_)->obj_size); \ + } \ + else { \ + RF_ASSERT((_fl_)->free_cnt < \ + (_fl_)->max_free_cnt); \ + (_obj_)->_nextp_ = (_fl_)->objlist; \ + (_fl_)->objlist = (void *)(_obj_); \ + (_fl_)->free_cnt++; \ + } \ + _n++; \ + _obj_ = _no; \ + RF_FREELIST_STAT_FREE(_fl_); \ + } \ + RF_ASSERT(_n==(_num_)); \ + RF_UNLOCK_MUTEX((_fl_)->lock); \ +} while (0) /* - * fl = freelist - * obj = object to free - * nextp = name of "next" pointer in obj - * clean = undo for init + * fl = FreeList. + * obj = Object to free. + * nextp = Name of "next" pointer in obj. + * clean = Undo for init. */ -#define RF_FREELIST_FREE_CLEAN(_fl_,_obj_,_nextp_,_clean_) { \ - RF_LOCK_MUTEX((_fl_)->lock); \ - if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \ - _clean_ (_obj_); \ - RF_Free(_obj_,(_fl_)->obj_size); \ - } \ - else { \ - RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \ - (_obj_)->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = (void *)(_obj_); \ - (_fl_)->free_cnt++; \ - } \ - RF_FREELIST_STAT_FREE(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} +#define RF_FREELIST_FREE_CLEAN(_fl_,_obj_,_nextp_,_clean_) \ +do { \ + RF_LOCK_MUTEX((_fl_)->lock); \ + if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \ + _clean_(_obj_); \ + RF_Free(_obj_, (_fl_)->obj_size); \ + } \ + else { \ + RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \ + (_obj_)->_nextp_ = (_fl_)->objlist; \ + (_fl_)->objlist = (void *)(_obj_); \ + (_fl_)->free_cnt++; \ + } \ + RF_FREELIST_STAT_FREE(_fl_); \ + RF_UNLOCK_MUTEX((_fl_)->lock); \ +} while (0) /* - * fl = freelist - * obj = object to free - * nextp = name of "next" pointer in obj - * clean = undo for init - * arg = arg for undo func + * fl = FreeList. + * obj = Object to free. + * nextp = Name of "next" pointer in obj. + * clean = Undo for init. + * arg = Arg for undo func. */ -#define RF_FREELIST_FREE_CLEAN_ARG(_fl_,_obj_,_nextp_,_clean_,_arg_) { \ - RF_LOCK_MUTEX((_fl_)->lock); \ - if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \ - _clean_ (_obj_,_arg_); \ - RF_Free(_obj_,(_fl_)->obj_size); \ - } \ - else { \ - RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \ - (_obj_)->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = (void *)(_obj_); \ - (_fl_)->free_cnt++; \ - } \ - RF_FREELIST_STAT_FREE(_fl_); \ - RF_UNLOCK_MUTEX((_fl_)->lock); \ -} +#define RF_FREELIST_FREE_CLEAN_ARG(_fl_,_obj_,_nextp_,_clean_,_arg_) \ +do { \ + RF_LOCK_MUTEX((_fl_)->lock); \ + if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \ + _clean_(_obj_, _arg_); \ + RF_Free(_obj_, (_fl_)->obj_size); \ + } \ + else { \ + RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \ + (_obj_)->_nextp_ = (_fl_)->objlist; \ + (_fl_)->objlist = (void *)(_obj_); \ + (_fl_)->free_cnt++; \ + } \ + RF_FREELIST_STAT_FREE(_fl_); \ + RF_UNLOCK_MUTEX((_fl_)->lock); \ +} while (0) /* - * fl = freelist - * obj = object to free - * nextp = name of "next" pointer in obj - * clean = undo for init + * fl = FreeList. + * obj = Object to free. + * nextp = Name of "next" pointer in obj. + * clean = Undo for init. */ -#define RF_FREELIST_FREE_CLEAN_NOUNLOCK(_fl_,_obj_,_nextp_,_clean_) { \ - RF_LOCK_MUTEX((_fl_)->lock); \ - if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \ - _clean_ (_obj_); \ - RF_Free(_obj_,(_fl_)->obj_size); \ - } \ - else { \ - RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \ - (_obj_)->_nextp_ = (_fl_)->objlist; \ - (_fl_)->objlist = (void *)(_obj_); \ - (_fl_)->free_cnt++; \ - } \ - RF_FREELIST_STAT_FREE(_fl_); \ -} +#define RF_FREELIST_FREE_CLEAN_NOUNLOCK(_fl_,_obj_,_nextp_,_clean_) \ +do { \ + RF_LOCK_MUTEX((_fl_)->lock); \ + if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \ + _clean_(_obj_); \ + RF_Free(_obj_, (_fl_)->obj_size); \ + } \ + else { \ + RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \ + (_obj_)->_nextp_ = (_fl_)->objlist; \ + (_fl_)->objlist = (void *)(_obj_); \ + (_fl_)->free_cnt++; \ + } \ + RF_FREELIST_STAT_FREE(_fl_); \ +} while (0) /* - * fl = freelist - * nextp = name of "next" pointer in obj - * cast = cast to object type + * fl = FreeList. + * nextp = Name of "next" pointer in obj. + * cast = Cast to object type. */ -#define RF_FREELIST_DESTROY(_fl_,_nextp_,_cast_) { \ - void *_cur, *_next; \ - RF_FREELIST_STAT_REPORT(_fl_); \ - rf_mutex_destroy(&((_fl_)->lock)); \ - for(_cur=(_fl_)->objlist;_cur;_cur=_next) { \ - _next = (_cast_ _cur)->_nextp_; \ - RF_Free(_cur,(_fl_)->obj_size); \ - } \ - RF_Free(_fl_,sizeof(RF_FreeList_t)); \ -} +#define RF_FREELIST_DESTROY(_fl_,_nextp_,_cast_) \ +do { \ + void *_cur, *_next; \ + RF_FREELIST_STAT_REPORT(_fl_); \ + rf_mutex_destroy(&((_fl_)->lock)); \ + for (_cur = (_fl_)->objlist; _cur; _cur = _next) { \ + _next = (_cast_ _cur)->_nextp_; \ + RF_Free(_cur, (_fl_)->obj_size); \ + } \ + RF_Free(_fl_, sizeof(RF_FreeList_t)); \ +} while (0) /* - * fl = freelist - * nextp = name of "next" pointer in obj - * cast = cast to object type - * clean = func to undo obj init + * fl = FreeList. + * nextp = Name of "next" pointer in obj. + * cast = Cast to object type. + * clean = Func to undo obj init. */ -#define RF_FREELIST_DESTROY_CLEAN(_fl_,_nextp_,_cast_,_clean_) { \ - void *_cur, *_next; \ - RF_FREELIST_STAT_REPORT(_fl_); \ - rf_mutex_destroy(&((_fl_)->lock)); \ - for(_cur=(_fl_)->objlist;_cur;_cur=_next) { \ - _next = (_cast_ _cur)->_nextp_; \ - _clean_ (_cur); \ - RF_Free(_cur,(_fl_)->obj_size); \ - } \ - RF_Free(_fl_,sizeof(RF_FreeList_t)); \ -} +#define RF_FREELIST_DESTROY_CLEAN(_fl_,_nextp_,_cast_,_clean_) \ +do { \ + void *_cur, *_next; \ + RF_FREELIST_STAT_REPORT(_fl_); \ + rf_mutex_destroy(&((_fl_)->lock)); \ + for (_cur = (_fl_)->objlist; _cur; _cur = _next) { \ + _next = (_cast_ _cur)->_nextp_; \ + _clean_(_cur); \ + RF_Free(_cur, (_fl_)->obj_size); \ + } \ + RF_Free(_fl_, sizeof(RF_FreeList_t)); \ +} while (0) /* - * fl = freelist - * nextp = name of "next" pointer in obj - * cast = cast to object type - * clean = func to undo obj init - * arg = arg for undo func + * fl = FreeList. + * nextp = Name of "next" pointer in obj. + * cast = Cast to object type. + * clean = Func to undo obj init. + * arg = Arg for undo func. */ -#define RF_FREELIST_DESTROY_CLEAN_ARG(_fl_,_nextp_,_cast_,_clean_,_arg_) { \ - void *_cur, *_next; \ - RF_FREELIST_STAT_REPORT(_fl_); \ - rf_mutex_destroy(&((_fl_)->lock)); \ - for(_cur=(_fl_)->objlist;_cur;_cur=_next) { \ - _next = (_cast_ _cur)->_nextp_; \ - _clean_ (_cur,_arg_); \ - RF_Free(_cur,(_fl_)->obj_size); \ - } \ - RF_Free(_fl_,sizeof(RF_FreeList_t)); \ -} - -#endif /* !_RF__RF_FREELIST_H_ */ +#define RF_FREELIST_DESTROY_CLEAN_ARG(_fl_,_nextp_,_cast_,_clean_,_arg_) \ +do { \ + void *_cur, *_next; \ + RF_FREELIST_STAT_REPORT(_fl_); \ + rf_mutex_destroy(&((_fl_)->lock)); \ + for (_cur = (_fl_)->objlist; _cur; _cur = _next) { \ + _next = (_cast_ _cur)->_nextp_; \ + _clean_(_cur, _arg_); \ + RF_Free(_cur, (_fl_)->obj_size); \ + } \ + RF_Free(_fl_, sizeof(RF_FreeList_t)); \ +} while (0) + +#endif /* !_RF__RF_FREELIST_H_ */ diff --git a/sys/dev/raidframe/rf_general.h b/sys/dev/raidframe/rf_general.h index c69d97088a8..12d40fa82b2 100644 --- a/sys/dev/raidframe/rf_general.h +++ b/sys/dev/raidframe/rf_general.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_general.h,v 1.4 2000/08/08 16:07:41 peter Exp $ */ +/* $OpenBSD: rf_general.h,v 1.5 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_general.h,v 1.5 2000/03/03 02:04:48 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -28,76 +29,86 @@ */ /* - * rf_general.h -- some general-use definitions + * rf_general.h -- Some general-use definitions. */ /*#define NOASSERT*/ -#ifndef _RF__RF_GENERAL_H_ -#define _RF__RF_GENERAL_H_ +#ifndef _RF__RF_GENERAL_H_ +#define _RF__RF_GENERAL_H_ -/* error reporting and handling */ +/* Error reporting and handling. */ -#ifdef _KERNEL -#include<sys/systm.h> /* printf, sprintf, and friends */ +#ifdef _KERNEL +#include <sys/systm.h> /* printf, sprintf, and friends. */ #endif -#define RF_ERRORMSG(s) printf((s)) -#define RF_ERRORMSG1(s,a) printf((s),(a)) -#define RF_ERRORMSG2(s,a,b) printf((s),(a),(b)) -#define RF_ERRORMSG3(s,a,b,c) printf((s),(a),(b),(c)) +#define RF_ERRORMSG(s) printf((s)) +#define RF_ERRORMSG1(s,a) printf((s), (a)) +#define RF_ERRORMSG2(s,a,b) printf((s), (a), (b)) +#define RF_ERRORMSG3(s,a,b,c) printf((s), (a), (b), (c)) extern char rf_panicbuf[]; -#define RF_PANIC() {sprintf(rf_panicbuf,"raidframe error at line %d file %s",__LINE__,__FILE__); panic(rf_panicbuf);} - -#ifdef _KERNEL -#ifdef RF_ASSERT -#undef RF_ASSERT -#endif /* RF_ASSERT */ -#ifndef NOASSERT -#define RF_ASSERT(_x_) { \ - if (!(_x_)) { \ - sprintf(rf_panicbuf, \ - "raidframe error at line %d file %s (failed asserting %s)\n", \ - __LINE__, __FILE__, #_x_); \ - panic(rf_panicbuf); \ - } \ -} -#else /* !NOASSERT */ -#define RF_ASSERT(x) {/*noop*/} -#endif /* !NOASSERT */ -#else /* _KERNEL */ -#define RF_ASSERT(x) {/*noop*/} -#endif /* _KERNEL */ - -/* random stuff */ -#define RF_MAX(a,b) (((a) > (b)) ? (a) : (b)) -#define RF_MIN(a,b) (((a) < (b)) ? (a) : (b)) - -/* divide-by-zero check */ -#define RF_DB0_CHECK(a,b) ( ((b)==0) ? 0 : (a)/(b) ) - -/* get time of day */ -#define RF_GETTIME(_t) microtime(&(_t)) +#define RF_PANIC() \ +do { \ + sprintf(rf_panicbuf, "RAIDframe error at line %d file %s", \ + __LINE__, __FILE__); \ + panic(rf_panicbuf); \ +} while (0) + +#ifdef _KERNEL +#ifdef RF_ASSERT +#undef RF_ASSERT +#endif /* RF_ASSERT */ +#ifndef NOASSERT +#define RF_ASSERT(_x_) \ +do { \ + if (!(_x_)) { \ + sprintf(rf_panicbuf, "RAIDframe error at line %d" \ + " file %s (failed asserting %s)\n", __LINE__, \ + __FILE__, #_x_); \ + panic(rf_panicbuf); \ + } \ +} while (0) +#else /* !NOASSERT */ +#define RF_ASSERT(x) {/*noop*/} +#endif /* !NOASSERT */ +#else /* _KERNEL */ +#define RF_ASSERT(x) {/*noop*/} +#endif /* _KERNEL */ + +/* Random stuff. */ +#define RF_MAX(a,b) (((a) > (b)) ? (a) : (b)) +#define RF_MIN(a,b) (((a) < (b)) ? (a) : (b)) + +/* Divide-by-zero check. */ +#define RF_DB0_CHECK(a,b) (((b)==0) ? 0 : (a)/(b)) + +/* Get time of day. */ +#define RF_GETTIME(_t) microtime(&(_t)) /* - * zero memory- not all bzero calls go through here, only - * those which in the kernel may have a user address + * Zero memory - Not all bzero calls go through here, only + * those which in the kernel may have a user address. */ -#define RF_BZERO(_bp,_b,_l) bzero(_b,_l) /* XXX This is likely - * incorrect. GO */ - - -#define RF_UL(x) ((unsigned long) (x)) -#define RF_PGMASK RF_UL(NBPG-1) -#define RF_BLIP(x) (NBPG - (RF_UL(x) & RF_PGMASK)) /* bytes left in page */ -#define RF_PAGE_ALIGNED(x) ((RF_UL(x) & RF_PGMASK) == 0) - -#ifdef __STDC__ -#define RF_STRING(_str_) #_str_ -#else /* __STDC__ */ -#define RF_STRING(_str_) "_str_" -#endif /* __STDC__ */ - -#endif /* !_RF__RF_GENERAL_H_ */ +#define RF_BZERO(_bp,_b,_l) bzero(_b, _l) /* + * XXX This is likely + * incorrect. GO + */ + +#define RF_UL(x) ((unsigned long)(x)) +#define RF_PGMASK RF_UL(NBPG-1) +#define RF_BLIP(x) (NBPG - (RF_UL(x) & RF_PGMASK)) /* + * Bytes left + * in page. + */ +#define RF_PAGE_ALIGNED(x) ((RF_UL(x) & RF_PGMASK) == 0) + +#ifdef __STDC__ +#define RF_STRING(_str_) #_str_ +#else /* __STDC__ */ +#define RF_STRING(_str_) "_str_" +#endif /* __STDC__ */ + +#endif /* !_RF__RF_GENERAL_H_ */ diff --git a/sys/dev/raidframe/rf_geniq.c b/sys/dev/raidframe/rf_geniq.c index fd824846f6c..a895fd43abd 100644 --- a/sys/dev/raidframe/rf_geniq.c +++ b/sys/dev/raidframe/rf_geniq.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_geniq.c,v 1.4 2001/07/09 17:20:07 fgsch Exp $ */ +/* $OpenBSD: rf_geniq.c,v 1.5 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_geniq.c,v 1.3 1999/02/05 00:06:12 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,23 +28,22 @@ * rights to redistribute these changes. */ -/* rf_geniq.c - * code which implements Reed-Solomon encoding for RAID level 6 +/* + * rf_geniq.c -- Code which implements Reed-Solomon encoding for RAID level 6. */ -#define RF_UTILITY 1 +#define RF_UTILITY 1 #include "rf_pqdeg.h" /* - five bit lfsr - poly - feedback connections - - val = value; -*/ -int -lsfr_shift(val, poly) - unsigned val, poly; + * five bit lfsr + * poly - feedback connections + * + * val = value; + */ +int +lsfr_shift(unsigned val, unsigned poly) { unsigned new; unsigned int i; @@ -54,19 +54,21 @@ lsfr_shift(val, poly) for (i = 1; i <= 4; i++) { bit = (val >> (i - 1)) & 1; - if (poly & (1 << i)) /* there is a feedback connection */ + if (poly & (1 << i)) /* There is a feedback connection. */ new = new | ((bit ^ high) << i); else new = new | (bit << i); } return new; } -/* generate Q matricies for the data */ + + +/* Generate Q matrices for the data. */ RF_ua32_t rf_qfor[32]; -void -main() +int +main(void) { unsigned int i, j, l, a, b; unsigned int val; @@ -82,20 +84,20 @@ main() printf(" * GENERATED FILE -- DO NOT EDIT\n"); printf(" */\n"); printf("\n"); - printf("#ifndef _RF__RF_INVERTQ_H_\n"); - printf("#define _RF__RF_INVERTQ_H_\n"); + printf("#ifndef\t_RF__RF_INVERTQ_H_\n"); + printf("#define\t_RF__RF_INVERTQ_H_\n"); printf("\n"); printf("/*\n"); printf(" * rf_geniq.c must include rf_archs.h before including\n"); printf(" * this file (to get VPATH magic right with the way we\n"); - printf(" * generate this file in kernel trees)\n"); + printf(" * generate this file in kernel trees).\n"); printf(" */\n"); printf("/* #include \"rf_archs.h\" */\n"); printf("\n"); - printf("#if (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0)\n"); + printf("#if\t(RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0)\n"); printf("\n"); - printf("#define RF_Q_COLS 32\n"); - printf("RF_ua32_t rf_rn = {\n"); + printf("#define\tRF_Q_COLS\t32\n"); + printf("RF_ua32_t rf_rn = {"); k[0] = 1; for (j = 0; j < 31; j++) k[j + 1] = lsfr_shift(k[j], 5); @@ -105,7 +107,7 @@ main() printf("RF_ua32_t rf_qfor[32] = {\n"); for (i = 0; i < 32; i++) { - printf("/* i = %d */ { 0, ", i); + printf("/* i = %d */\t{0, ", i); rf_qfor[i][0] = 0; for (j = 1; j < 32; j++) { val = j; @@ -117,47 +119,58 @@ main() printf("},\n"); } printf("};\n"); - printf("#define RF_Q_DATA_COL(col_num) rf_rn[col_num],rf_qfor[28-(col_num)]\n"); + printf("#define\tRF_Q_DATA_COL(col_num)\trf_rn[col_num]," + " rf_qfor[28-(col_num)]\n"); /* generate the inverse tables. (i,j,p,q) */ /* The table just stores a. Get b back from the parity */ - printf("#ifdef _KERNEL\n"); - printf("RF_ua1024_t rf_qinv[1]; /* don't compile monster table into kernel */\n"); - printf("#elif defined(NO_PQ)\n"); + printf("#ifdef\t_KERNEL\n"); + printf("RF_ua1024_t rf_qinv[1];\t/* don't compile monster table" + " into kernel */\n"); + printf("#elif\tdefined(NO_PQ)\n"); printf("RF_ua1024_t rf_qinv[29*29];\n"); - printf("#else /* !_KERNEL && NO_PQ */\n"); + printf("#else\t/* !_KERNEL && NO_PQ */\n"); printf("RF_ua1024_t rf_qinv[29*29] = {\n"); for (i = 0; i < 29; i++) { for (j = 0; j < 29; j++) { - printf("/* i %d, j %d */{ ", i, j); + printf("/* i %d, j %d */\t{", i, j); if (i == j) for (l = 0; l < 1023; l++) printf("0, "); else { for (p = 0; p < 32; p++) for (q = 0; q < 32; q++) { - /* What are a, b such that a ^ - * b = p; and qfor[(28-i)][a + /* + * What are a, b such that a ^ + * b == p; and qfor[(28-i)][a * ^ rf_rn[i+1]] ^ * qfor[(28-j)][b ^ - * rf_rn[j+1]] = q. Solve by - * guessing a. Then testing. */ + * rf_rn[j+1]] == q. Solve by + * guessing a. Then testing. + */ for (a = 0; a < 32; a++) { b = a ^ p; - if ((rf_qfor[28 - i][a ^ k[i + 1]] ^ rf_qfor[28 - j][b ^ k[j + 1]]) == q) + if ((rf_qfor[28 - i] + [a ^ k[i + 1]] ^ + rf_qfor[28 - j] + [b ^ k[j + 1]]) == + q) break; } if (a == 32) - printf("unable to solve %d %d %d %d\n", i, j, p, q); - printf("%d,", a); + printf("unable to solve" + " %d %d %d %d\n", + i, j, p, q); + printf("%d, ", a); } } printf("},\n"); } } printf("};\n"); - printf("\n#endif /* (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */\n\n"); - printf("#endif /* !_KERNEL && NO_PQ */\n"); - printf("#endif /* !_RF__RF_INVERTQ_H_ */\n"); + printf("\n#endif\t/* (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */" + "\n\n"); + printf("#endif\t/* !_KERNEL && NO_PQ */\n"); + printf("#endif\t/* !_RF__RF_INVERTQ_H_ */\n"); exit(0); } diff --git a/sys/dev/raidframe/rf_hist.h b/sys/dev/raidframe/rf_hist.h index 70104aa4f90..5f6d984bfa7 100644 --- a/sys/dev/raidframe/rf_hist.h +++ b/sys/dev/raidframe/rf_hist.h @@ -1,9 +1,10 @@ -/* $OpenBSD: rf_hist.h,v 1.2 1999/02/16 00:02:51 niklas Exp $ */ +/* $OpenBSD: rf_hist.h,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_hist.h,v 1.3 1999/02/05 00:06:12 oster Exp $ */ + /* * rf_hist.h * - * Histgram operations for RAIDframe stats + * Histgram operations for RAIDframe stats. */ /* * Copyright (c) 1995 Carnegie-Mellon University. @@ -32,26 +33,26 @@ * rights to redistribute these changes. */ -#ifndef _RF__RF_HIST_H_ -#define _RF__RF_HIST_H_ +#ifndef _RF__RF_HIST_H_ +#define _RF__RF_HIST_H_ #include "rf_types.h" -#define RF_HIST_RESOLUTION 5 -#define RF_HIST_MIN_VAL 0 -#define RF_HIST_MAX_VAL 1000 -#define RF_HIST_RANGE (RF_HIST_MAX_VAL - RF_HIST_MIN_VAL) -#define RF_HIST_NUM_BUCKETS (RF_HIST_RANGE / RF_HIST_RESOLUTION + 1) +#define RF_HIST_RESOLUTION 5 +#define RF_HIST_MIN_VAL 0 +#define RF_HIST_MAX_VAL 1000 +#define RF_HIST_RANGE (RF_HIST_MAX_VAL - RF_HIST_MIN_VAL) +#define RF_HIST_NUM_BUCKETS (RF_HIST_RANGE / RF_HIST_RESOLUTION + 1) typedef RF_uint32 RF_Hist_t; -#define RF_HIST_ADD(_hist_,_val_) { \ - RF_Hist_t val; \ - val = ((RF_Hist_t)(_val_)) / 1000; \ - if (val >= RF_HIST_MAX_VAL) \ - _hist_[RF_HIST_NUM_BUCKETS-1]++; \ - else \ - _hist_[(val - RF_HIST_MIN_VAL) / RF_HIST_RESOLUTION]++; \ -} +#define RF_HIST_ADD(_hist_,_val_) do { \ + RF_Hist_t val; \ + val = ((RF_Hist_t)(_val_)) / 1000; \ + if (val >= RF_HIST_MAX_VAL) \ + _hist_[RF_HIST_NUM_BUCKETS-1]++; \ + else \ + _hist_[(val - RF_HIST_MIN_VAL) / RF_HIST_RESOLUTION]++; \ +} while (0) -#endif /* !_RF__RF_HIST_H_ */ +#endif /* !_RF__RF_HIST_H_ */ diff --git a/sys/dev/raidframe/rf_interdecluster.c b/sys/dev/raidframe/rf_interdecluster.c index e2e395f1a0d..7b0068663f8 100644 --- a/sys/dev/raidframe/rf_interdecluster.c +++ b/sys/dev/raidframe/rf_interdecluster.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_interdecluster.c,v 1.3 2000/01/11 18:02:22 peter Exp $ */ +/* $OpenBSD: rf_interdecluster.c,v 1.4 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_interdecluster.c,v 1.4 2000/01/07 03:41:00 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,11 +28,11 @@ * rights to redistribute these changes. */ -/************************************************************ +/************************************************************* * - * rf_interdecluster.c -- implements interleaved declustering + * rf_interdecluster.c -- Implements interleaved declustering. * - ************************************************************/ + *************************************************************/ #include "rf_types.h" @@ -48,34 +49,33 @@ #include "rf_dagdegwr.h" typedef struct RF_InterdeclusterConfigInfo_s { - RF_RowCol_t **stripeIdentifier; /* filled in at config time and used - * by IdentifyStripe */ - RF_StripeCount_t numSparingRegions; - RF_StripeCount_t stripeUnitsPerSparingRegion; - RF_SectorNum_t mirrorStripeOffset; -} RF_InterdeclusterConfigInfo_t; - -int -rf_ConfigureInterDecluster( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) + /* Filled in at config time and used by IdentifyStripe. */ + RF_RowCol_t **stripeIdentifier; + RF_StripeCount_t numSparingRegions; + RF_StripeCount_t stripeUnitsPerSparingRegion; + RF_SectorNum_t mirrorStripeOffset; +} RF_InterdeclusterConfigInfo_t; + +int +rf_ConfigureInterDecluster(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, + RF_Config_t *cfgPtr) { RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; RF_StripeCount_t num_used_stripeUnitsPerDisk; RF_InterdeclusterConfigInfo_t *info; RF_RowCol_t i, tmp, SUs_per_region; - /* create an Interleaved Declustering configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_InterdeclusterConfigInfo_t), (RF_InterdeclusterConfigInfo_t *), - raidPtr->cleanupList); + /* Create an Interleaved Declustering configuration structure. */ + RF_MallocAndAdd(info, sizeof(RF_InterdeclusterConfigInfo_t), + (RF_InterdeclusterConfigInfo_t *), raidPtr->cleanupList); if (info == NULL) return (ENOMEM); layoutPtr->layoutSpecificInfo = (void *) info; - /* fill in the config structure. */ + /* Fill in the config structure. */ SUs_per_region = raidPtr->numCol * (raidPtr->numCol - 1); - info->stripeIdentifier = rf_make_2d_array(SUs_per_region, 2, raidPtr->cleanupList); + info->stripeIdentifier = rf_make_2d_array(SUs_per_region, 2, + raidPtr->cleanupList); if (info->stripeIdentifier == NULL) return (ENOMEM); for (i = 0; i < SUs_per_region; i++) { @@ -84,25 +84,35 @@ rf_ConfigureInterDecluster( info->stripeIdentifier[i][1] = (i + 1 + tmp) % raidPtr->numCol; } - /* no spare tables */ + /* No spare tables. */ RF_ASSERT(raidPtr->numRow == 1); - /* fill in the remaining layout parameters */ + /* Fill in the remaining layout parameters. */ - /* total number of stripes should a multiple of 2*numCol: Each sparing + /* + * Total number of stripes should a multiple of 2*numCol: Each sparing * region consists of 2*numCol stripes: n-1 primary copy, n-1 - * secondary copy and 2 for spare .. */ - num_used_stripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk - (layoutPtr->stripeUnitsPerDisk % - (2 * raidPtr->numCol)); - info->numSparingRegions = num_used_stripeUnitsPerDisk / (2 * raidPtr->numCol); - /* this is in fact the number of stripe units (that are primary data - * copies) in the sparing region */ - info->stripeUnitsPerSparingRegion = raidPtr->numCol * (raidPtr->numCol - 1); - info->mirrorStripeOffset = info->numSparingRegions * (raidPtr->numCol + 1); - layoutPtr->numStripe = info->numSparingRegions * info->stripeUnitsPerSparingRegion; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; + * secondary copy and 2 for spare... + */ + num_used_stripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk - + (layoutPtr->stripeUnitsPerDisk % (2 * raidPtr->numCol)); + info->numSparingRegions = num_used_stripeUnitsPerDisk / + (2 * raidPtr->numCol); + /* + * This is in fact the number of stripe units (that are primary data + * copies) in the sparing region. + */ + info->stripeUnitsPerSparingRegion = raidPtr->numCol * + (raidPtr->numCol - 1); + info->mirrorStripeOffset = info->numSparingRegions * + (raidPtr->numCol + 1); + layoutPtr->numStripe = info->numSparingRegions * + info->stripeUnitsPerSparingRegion; + layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << + raidPtr->logBytesPerSector; layoutPtr->numDataCol = 1; - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; + layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * + layoutPtr->sectorsPerStripeUnit; layoutPtr->numParityCol = 1; layoutPtr->dataStripeUnitsPerDisk = num_used_stripeUnitsPerDisk; @@ -113,48 +123,48 @@ rf_ConfigureInterDecluster( raidPtr->totalSectors = (layoutPtr->numStripe) * layoutPtr->sectorsPerStripeUnit; - layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit; + layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / + layoutPtr->sectorsPerStripeUnit; return (0); } -int -rf_GetDefaultNumFloatingReconBuffersInterDecluster(RF_Raid_t * raidPtr) +int +rf_GetDefaultNumFloatingReconBuffersInterDecluster(RF_Raid_t *raidPtr) { return (30); } -RF_HeadSepLimit_t -rf_GetDefaultHeadSepLimitInterDecluster(RF_Raid_t * raidPtr) +RF_HeadSepLimit_t +rf_GetDefaultHeadSepLimitInterDecluster(RF_Raid_t *raidPtr) { return (raidPtr->sectorsPerDisk); } -RF_ReconUnitCount_t -rf_GetNumSpareRUsInterDecluster( - RF_Raid_t * raidPtr) +RF_ReconUnitCount_t +rf_GetNumSpareRUsInterDecluster(RF_Raid_t *raidPtr) { - RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) + raidPtr->Layout.layoutSpecificInfo; return (2 * ((RF_ReconUnitCount_t) info->numSparingRegions)); - /* the layout uses two stripe units per disk as spare within each - * sparing region */ + /* + * The layout uses two stripe units per disk as spare within each + * sparing region. + */ } -/* Maps to the primary copy of the data, i.e. the first mirror pair */ -void -rf_MapSectorInterDecluster( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) + +/* Maps to the primary copy of the data, i.e. the first mirror pair. */ +void +rf_MapSectorInterDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t* diskSector, int remap) { - RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) + raidPtr->Layout.layoutSpecificInfo; RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; RF_StripeNum_t su_offset_into_disk, mirror_su_offset_into_disk; RF_StripeNum_t sparing_region_id, index_within_region; - int col_before_remap; + int col_before_remap; *row = 0; sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; @@ -164,53 +174,64 @@ rf_MapSectorInterDecluster( col_before_remap = index_within_region / (raidPtr->numCol - 1); if (!remap) { - *col = col_before_remap;; - *diskSector = (su_offset_into_disk + ((raidPtr->numCol - 1) * sparing_region_id)) * - raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + *col = col_before_remap; + *diskSector = (su_offset_into_disk + ((raidPtr->numCol - 1) * + sparing_region_id)) * raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); } else { - /* remap sector to spare space... */ - *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidPtr->numCol - 1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); - *col = (index_within_region + 1 + mirror_su_offset_into_disk) % raidPtr->numCol; + /* Remap sector to spare space... */ + *diskSector = sparing_region_id * (raidPtr->numCol + 1) * + raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += (raidPtr->numCol - 1) * + raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + *col = (index_within_region + 1 + mirror_su_offset_into_disk) % + raidPtr->numCol; *col = (*col + 1) % raidPtr->numCol; if (*col == col_before_remap) *col = (*col + 1) % raidPtr->numCol; } } + /* Maps to the second copy of the mirror pair. */ -void -rf_MapParityInterDecluster( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) +void +rf_MapParityInterDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap) { - RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; - RF_StripeNum_t sparing_region_id, index_within_region, mirror_su_offset_into_disk; + RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) + raidPtr->Layout.layoutSpecificInfo; + RF_StripeNum_t sparing_region_id, index_within_region; + RF_StripeNum_t mirror_su_offset_into_disk; RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; - int col_before_remap; + int col_before_remap; sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; index_within_region = SUID % info->stripeUnitsPerSparingRegion; mirror_su_offset_into_disk = index_within_region / raidPtr->numCol; - col_before_remap = (index_within_region + 1 + mirror_su_offset_into_disk) % raidPtr->numCol; + col_before_remap = (index_within_region + 1 + + mirror_su_offset_into_disk) % raidPtr->numCol; *row = 0; if (!remap) { *col = col_before_remap; - *diskSector = info->mirrorStripeOffset * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += sparing_region_id * (raidPtr->numCol - 1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += mirror_su_offset_into_disk * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + *diskSector = info->mirrorStripeOffset * + raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += sparing_region_id * (raidPtr->numCol - 1) * + raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += mirror_su_offset_into_disk * + raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); } else { - /* remap parity to spare space ... */ - *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit; - *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + /* Remap parity to spare space... */ + *diskSector = sparing_region_id * (raidPtr->numCol + 1) * + raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += (raidPtr->numCol) * + raidPtr->Layout.sectorsPerStripeUnit; + *diskSector += (raidSector % + raidPtr->Layout.sectorsPerStripeUnit); *col = index_within_region / (raidPtr->numCol - 1); *col = (*col + 1) % raidPtr->numCol; if (*col == col_before_remap) @@ -218,14 +239,12 @@ rf_MapParityInterDecluster( } } -void -rf_IdentifyStripeInterDecluster( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) +void +rf_IdentifyStripeInterDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t addr, + RF_RowCol_t **diskids, RF_RowCol_t *outRow) { - RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) + raidPtr->Layout.layoutSpecificInfo; RF_StripeNum_t SUID; SUID = addr / raidPtr->Layout.sectorsPerStripeUnit; @@ -235,45 +254,46 @@ rf_IdentifyStripeInterDecluster( *diskids = info->stripeIdentifier[SUID]; } -void -rf_MapSIDToPSIDInterDecluster( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru) +void +rf_MapSIDToPSIDInterDecluster( RF_RaidLayout_t *layoutPtr, + RF_StripeNum_t stripeID, RF_StripeNum_t *psID, RF_ReconUnitNum_t *which_ru) { *which_ru = 0; *psID = stripeID; } -/****************************************************************************** - * select a graph to perform a single-stripe access + + +/***************************************************************************** + * Select a graph to perform a single-stripe access. * - * Parameters: raidPtr - description of the physical array - * type - type of operation (read or write) requested - * asmap - logical & physical addresses for this access - * createFunc - name of function to use to create the graph + * Parameters: raidPtr - Description of the physical array. + * type - Type of operation (read or write) requested. + * asmap - Logical & physical addresses for this access. + * createFunc - Name of function to use to create the graph. *****************************************************************************/ -void -rf_RAIDIDagSelect( - RF_Raid_t * raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t * asmap, - RF_VoidFuncPtr * createFunc) +void +rf_RAIDIDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type, + RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc) { RF_ASSERT(RF_IO_IS_R_OR_W(type)); if (asmap->numDataFailed + asmap->numParityFailed > 1) { - RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); + RF_ERRORMSG("Multiple disks failed in a single group !" + " Aborting I/O operation.\n"); *createFunc = NULL; return; } - *createFunc = (type == RF_IO_TYPE_READ) ? (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG : (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; + *createFunc = (type == RF_IO_TYPE_READ) + ? (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG + : (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; if (type == RF_IO_TYPE_READ) { if (asmap->numDataFailed == 0) - *createFunc = (RF_VoidFuncPtr) rf_CreateMirrorPartitionReadDAG; + *createFunc = (RF_VoidFuncPtr) + rf_CreateMirrorPartitionReadDAG; else - *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneDegradedReadDAG; + *createFunc = (RF_VoidFuncPtr) + rf_CreateRaidOneDegradedReadDAG; } else *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; } diff --git a/sys/dev/raidframe/rf_interdecluster.h b/sys/dev/raidframe/rf_interdecluster.h index ac9388b45ae..328d1595348 100644 --- a/sys/dev/raidframe/rf_interdecluster.h +++ b/sys/dev/raidframe/rf_interdecluster.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_interdecluster.h,v 1.2 1999/02/16 00:02:52 niklas Exp $ */ +/* $OpenBSD: rf_interdecluster.h,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_interdecluster.h,v 1.3 1999/02/05 00:06:12 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -31,30 +32,23 @@ * header file for Interleaved Declustering */ -#ifndef _RF__RF_INTERDECLUSTER_H_ -#define _RF__RF_INTERDECLUSTER_H_ +#ifndef _RF__RF_INTERDECLUSTER_H_ +#define _RF__RF_INTERDECLUSTER_H_ -int -rf_ConfigureInterDecluster(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int rf_GetDefaultNumFloatingReconBuffersInterDecluster(RF_Raid_t * raidPtr); -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitInterDecluster(RF_Raid_t * raidPtr); -RF_ReconUnitCount_t rf_GetNumSpareRUsInterDecluster(RF_Raid_t * raidPtr); -void -rf_MapSectorInterDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapParityInterDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_IdentifyStripeInterDecluster(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -void -rf_MapSIDToPSIDInterDecluster(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru); -void -rf_RAIDIDagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); +int rf_ConfigureInterDecluster(RF_ShutdownList_t **, RF_Raid_t *, + RF_Config_t *); +int rf_GetDefaultNumFloatingReconBuffersInterDecluster(RF_Raid_t *); +RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitInterDecluster(RF_Raid_t *); +RF_ReconUnitCount_t rf_GetNumSpareRUsInterDecluster(RF_Raid_t *); +void rf_MapSectorInterDecluster(RF_Raid_t *, RF_RaidAddr_t, RF_RowCol_t *, + RF_RowCol_t *, RF_SectorNum_t *, int); +void rf_MapParityInterDecluster(RF_Raid_t *, RF_RaidAddr_t, RF_RowCol_t *, + RF_RowCol_t *, RF_SectorNum_t *, int); +void rf_IdentifyStripeInterDecluster(RF_Raid_t *, RF_RaidAddr_t, + RF_RowCol_t **, RF_RowCol_t *); +void rf_MapSIDToPSIDInterDecluster(RF_RaidLayout_t *, RF_StripeNum_t, + RF_StripeNum_t *, RF_ReconUnitNum_t *); +void rf_RAIDIDagSelect(RF_Raid_t *, RF_IoType_t, RF_AccessStripeMap_t *, + RF_VoidFuncPtr *); -#endif /* !_RF__RF_INTERDECLUSTER_H_ */ +#endif /* !_RF__RF_INTERDECLUSTER_H_ */ diff --git a/sys/dev/raidframe/rf_invertq.c b/sys/dev/raidframe/rf_invertq.c index 224e331fe39..d0462e5ffe7 100644 --- a/sys/dev/raidframe/rf_invertq.c +++ b/sys/dev/raidframe/rf_invertq.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_invertq.c,v 1.2 1999/02/16 00:02:53 niklas Exp $ */ +/* $OpenBSD: rf_invertq.c,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_invertq.c,v 1.3 1999/02/05 00:06:12 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. diff --git a/sys/dev/raidframe/rf_invertq.h b/sys/dev/raidframe/rf_invertq.h index 35d387ae70a..99d4347b5e6 100644 --- a/sys/dev/raidframe/rf_invertq.h +++ b/sys/dev/raidframe/rf_invertq.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_invertq.h,v 1.2 1999/02/16 00:02:53 niklas Exp $ */ +/* $OpenBSD: rf_invertq.h,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_invertq.h,v 1.3 1999/02/05 00:06:12 oster Exp $ */ + /* * rf_invertq.h */ @@ -7,19 +8,19 @@ * This is normally a generated file. Not so for Net- and OpenBSD. */ -#ifndef _RF__RF_INVERTQ_H_ -#define _RF__RF_INVERTQ_H_ +#ifndef _RF__RF_INVERTQ_H_ +#define _RF__RF_INVERTQ_H_ /* * rf_geniq.c must include rf_archs.h before including * this file (to get VPATH magic right with the way we - * generate this file in kernel trees) + * generate this file in kernel trees). */ /* #include "rf_archs.h" */ -#if (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0) +#if (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0) -#define RF_Q_COLS 32 +#define RF_Q_COLS 32 RF_ua32_t rf_rn = { 1, 2, 4, 8, 16, 5, 10, 20, 13, 26, 17, 7, 14, 28, 29, 31, 27, 19, 3, 6, 12, 24, 21, 15, 30, 25, 23, 11, 22, 9, 18, 1,}; RF_ua32_t rf_qfor[32] = { @@ -56,9 +57,8 @@ RF_ua32_t rf_qfor[32] = { /* i = 30 */ {0, 18, 1, 19, 2, 16, 3, 17, 4, 22, 5, 23, 6, 20, 7, 21, 8, 26, 9, 27, 10, 24, 11, 25, 12, 30, 13, 31, 14, 28, 15, 29,}, /* i = 31 */ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,}, }; -#define RF_Q_DATA_COL(col_num) rf_rn[col_num],rf_qfor[28-(col_num)] +#define RF_Q_DATA_COL(col_num) rf_rn[col_num], rf_qfor[28-(col_num)] RF_ua1024_t rf_qinv[1]; /* don't compile monster table into kernel */ -#endif /* (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > - * 0) */ -#endif /* !_RF__RF_INVERTQ_H_ */ +#endif /* (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */ +#endif /* !_RF__RF_INVERTQ_H_ */ diff --git a/sys/dev/raidframe/rf_kintf.h b/sys/dev/raidframe/rf_kintf.h index 9b71fea32dc..13fbd4e5878 100644 --- a/sys/dev/raidframe/rf_kintf.h +++ b/sys/dev/raidframe/rf_kintf.h @@ -1,9 +1,10 @@ -/* $OpenBSD: rf_kintf.h,v 1.7 2001/12/29 21:51:18 tdeval Exp $ */ +/* $OpenBSD: rf_kintf.h,v 1.8 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_kintf.h,v 1.15 2000/10/20 02:24:45 oster Exp $ */ + /* * rf_kintf.h * - * RAIDframe exported kernel interface + * RAIDframe exported kernel interface. */ /* * Copyright (c) 1995 Carnegie-Mellon University. @@ -32,27 +33,27 @@ * rights to redistribute these changes. */ -#ifndef _RF__RF_KINTF_H_ -#define _RF__RF_KINTF_H_ +#ifndef _RF__RF_KINTF_H_ +#define _RF__RF_KINTF_H_ #include "rf_types.h" -int rf_GetSpareTableFromDaemon(RF_SparetWait_t * req); -void raidstart(RF_Raid_t * raidPtr); -int rf_DispatchKernelIO(RF_DiskQueue_t * queue, RF_DiskQueueData_t * req); +int rf_GetSpareTableFromDaemon(RF_SparetWait_t *); +void raidstart(RF_Raid_t *raidPtr); +int rf_DispatchKernelIO(RF_DiskQueue_t *, RF_DiskQueueData_t *); -int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *); -int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *); +int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *); +int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *); -#define RF_NORMAL_COMPONENT_UPDATE 0 -#define RF_FINAL_COMPONENT_UPDATE 1 +#define RF_NORMAL_COMPONENT_UPDATE 0 +#define RF_FINAL_COMPONENT_UPDATE 1 void rf_update_component_labels(RF_Raid_t *, int); -int raidlookup(char *, struct proc *, struct vnode **); -int raidmarkclean(dev_t dev, struct vnode *b_vp, int); -int raidmarkdirty(dev_t dev, struct vnode *b_vp, int); +int raidlookup(char *, struct proc *, struct vnode **); +int raidmarkclean(dev_t dev, struct vnode *b_vp, int); +int raidmarkdirty(dev_t dev, struct vnode *b_vp, int); void raid_init_component_label(RF_Raid_t *, RF_ComponentLabel_t *); void rf_print_component_label(RF_ComponentLabel_t *); void rf_UnconfigureVnodes( RF_Raid_t * ); void rf_close_component( RF_Raid_t *, struct vnode *, int); void rf_disk_unbusy(RF_RaidAccessDesc_t *); -#endif /* _RF__RF_KINTF_H_ */ +#endif /* _RF__RF_KINTF_H_ */ diff --git a/sys/dev/raidframe/rf_layout.c b/sys/dev/raidframe/rf_layout.c index bc83f0f7ea5..5b803863920 100644 --- a/sys/dev/raidframe/rf_layout.c +++ b/sys/dev/raidframe/rf_layout.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_layout.c,v 1.5 2002/08/09 15:10:20 tdeval Exp $ */ +/* $OpenBSD: rf_layout.c,v 1.6 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_layout.c,v 1.6 2000/04/17 19:35:12 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,7 +28,8 @@ * rights to redistribute these changes. */ -/* rf_layout.c -- driver code dealing with layout and mapping issues +/* + * rf_layout.c -- Driver code dealing with layout and mapping issues. */ #include "rf_types.h" @@ -44,71 +46,77 @@ #include "rf_raid4.h" #include "rf_raid5.h" #include "rf_states.h" -#if RF_INCLUDE_RAID5_RS > 0 +#if RF_INCLUDE_RAID5_RS > 0 #include "rf_raid5_rotatedspare.h" -#endif /* RF_INCLUDE_RAID5_RS > 0 */ -#if RF_INCLUDE_CHAINDECLUSTER > 0 +#endif /* RF_INCLUDE_RAID5_RS > 0 */ +#if RF_INCLUDE_CHAINDECLUSTER > 0 #include "rf_chaindecluster.h" -#endif /* RF_INCLUDE_CHAINDECLUSTER > 0 */ -#if RF_INCLUDE_INTERDECLUSTER > 0 +#endif /* RF_INCLUDE_CHAINDECLUSTER > 0 */ +#if RF_INCLUDE_INTERDECLUSTER > 0 #include "rf_interdecluster.h" -#endif /* RF_INCLUDE_INTERDECLUSTER > 0 */ -#if RF_INCLUDE_PARITYLOGGING > 0 +#endif /* RF_INCLUDE_INTERDECLUSTER > 0 */ +#if RF_INCLUDE_PARITYLOGGING > 0 #include "rf_paritylogging.h" -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ -#if RF_INCLUDE_EVENODD > 0 +#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ +#if RF_INCLUDE_EVENODD > 0 #include "rf_evenodd.h" -#endif /* RF_INCLUDE_EVENODD > 0 */ +#endif /* RF_INCLUDE_EVENODD > 0 */ #include "rf_general.h" #include "rf_driver.h" #include "rf_parityscan.h" #include "rf_reconbuffer.h" #include "rf_reconutil.h" -/*********************************************************************** +/***************************************************************************** * - * the layout switch defines all the layouts that are supported. - * fields are: layout ID, init routine, shutdown routine, map - * sector, map parity, identify stripe, dag selection, map stripeid - * to parity stripe id (optional), num faults tolerated, special - * flags. + * The layout switch defines all the layouts that are supported. + * Fields are: layout ID, init routine, shutdown routine, map sector, + * map parity, identify stripe, dag selection, map stripeid + * to parity stripe id (optional), num faults tolerated, + * special flags. * - ***********************************************************************/ - -static RF_AccessState_t DefaultStates[] = {rf_QuiesceState, - rf_IncrAccessesCountState, rf_MapState, rf_LockState, rf_CreateDAGState, - rf_ExecuteDAGState, rf_ProcessDAGState, rf_DecrAccessesCountState, -rf_CleanupState, rf_LastState}; -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && !defined(_KERNEL) -/* XXX Gross hack to shutup gcc -- it complains that DefaultStates is not -used when compiling this in userland.. I hate to burst it's bubble, but -DefaultStates is used all over the place here in the initialization of -lots of data structures. GO */ + *****************************************************************************/ + +static RF_AccessState_t DefaultStates[] = { + rf_QuiesceState, rf_IncrAccessesCountState, rf_MapState, + rf_LockState, rf_CreateDAGState, rf_ExecuteDAGState, + rf_ProcessDAGState, rf_DecrAccessesCountState, + rf_CleanupState, rf_LastState +}; + +#if (defined(__NetBSD__) || defined(__OpenBSD__)) && !defined(_KERNEL) +/* + * XXX Gross hack to shutup gcc -- It complains that DefaultStates is not + * used when compiling this in userland... I hate to burst it's bubble, but + * DefaultStates is used all over the place here in the initialization of + * lots of data structures. GO + */ RF_AccessState_t *NothingAtAll = DefaultStates; #endif -#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) -/* XXX Remove static so GCC doesn't complain about these being unused! */ -int distSpareYes = 1; -int distSpareNo = 0; +#if (defined(__NetBSD__) || defined(__OpenBSD__)) && defined(_KERNEL) +/* XXX Remove static so GCC doesn't complain about these being unused ! */ +int distSpareYes = 1; +int distSpareNo = 0; #else static int distSpareYes = 1; static int distSpareNo = 0; #endif -#ifdef _KERNEL -#define RF_NK2(a,b) -#else /* _KERNEL */ -#define RF_NK2(a,b) a,b, -#endif /* _KERNEL */ - -#if RF_UTILITY > 0 -#define RF_NU(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) -#else /* RF_UTILITY > 0 */ -#define RF_NU(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p -#endif /* RF_UTILITY > 0 */ + +#ifdef _KERNEL +#define RF_NK2(a,b) +#else /* _KERNEL */ +#define RF_NK2(a,b) a,b, +#endif /* !_KERNEL */ + +#if RF_UTILITY > 0 +#define RF_NU(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) +#else /* RF_UTILITY > 0 */ +#define RF_NU(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p +#endif /* RF_UTILITY > 0 */ static RF_LayoutSW_t mapsw[] = { - /* parity declustering */ + /* Parity declustering. */ {'T', "Parity declustering", RF_NK2(rf_MakeLayoutSpecificDeclustered, &distSpareNo) RF_NU( @@ -127,7 +135,7 @@ static RF_LayoutSW_t mapsw[] = { 0) }, - /* parity declustering with distributed sparing */ + /* Parity declustering with distributed sparing. */ {'D', "Distributed sparing parity declustering", RF_NK2(rf_MakeLayoutSpecificDeclustered, &distSpareYes) RF_NU( @@ -146,13 +154,14 @@ static RF_LayoutSW_t mapsw[] = { RF_DISTRIBUTE_SPARE | RF_BD_DECLUSTERED) }, -#if RF_INCLUDE_DECL_PQ > 0 - /* declustered P+Q */ +#if RF_INCLUDE_DECL_PQ > 0 + /* Declustered P+Q. */ {'Q', "Declustered P+Q", RF_NK2(rf_MakeLayoutSpecificDeclustered, &distSpareNo) RF_NU( rf_ConfigureDeclusteredPQ, - rf_MapSectorDeclusteredPQ, rf_MapParityDeclusteredPQ, rf_MapQDeclusteredPQ, + rf_MapSectorDeclusteredPQ, rf_MapParityDeclusteredPQ, + rf_MapQDeclusteredPQ, rf_IdentifyStripeDeclusteredPQ, rf_PQDagSelect, rf_MapSIDToPSIDDeclustered, @@ -165,10 +174,10 @@ static RF_LayoutSW_t mapsw[] = { DefaultStates, 0) }, -#endif /* RF_INCLUDE_DECL_PQ > 0 */ +#endif /* RF_INCLUDE_DECL_PQ > 0 */ -#if RF_INCLUDE_RAID5_RS > 0 - /* RAID 5 with rotated sparing */ +#if RF_INCLUDE_RAID5_RS > 0 + /* RAID 5 with rotated sparing. */ {'R', "RAID Level 5 rotated sparing", RF_NK2(rf_MakeLayoutSpecificNULL, NULL) RF_NU( @@ -186,15 +195,16 @@ static RF_LayoutSW_t mapsw[] = { DefaultStates, RF_DISTRIBUTE_SPARE) }, -#endif /* RF_INCLUDE_RAID5_RS > 0 */ +#endif /* RF_INCLUDE_RAID5_RS > 0 */ -#if RF_INCLUDE_CHAINDECLUSTER > 0 - /* Chained Declustering */ +#if RF_INCLUDE_CHAINDECLUSTER > 0 + /* Chained Declustering. */ {'C', "Chained Declustering", RF_NK2(rf_MakeLayoutSpecificNULL, NULL) RF_NU( rf_ConfigureChainDecluster, - rf_MapSectorChainDecluster, rf_MapParityChainDecluster, NULL, + rf_MapSectorChainDecluster, rf_MapParityChainDecluster, + NULL, rf_IdentifyStripeChainDecluster, rf_RAIDCDagSelect, rf_MapSIDToPSIDChainDecluster, @@ -207,15 +217,16 @@ static RF_LayoutSW_t mapsw[] = { DefaultStates, 0) }, -#endif /* RF_INCLUDE_CHAINDECLUSTER > 0 */ +#endif /* RF_INCLUDE_CHAINDECLUSTER > 0 */ -#if RF_INCLUDE_INTERDECLUSTER > 0 - /* Interleaved Declustering */ +#if RF_INCLUDE_INTERDECLUSTER > 0 + /* Interleaved Declustering. */ {'I', "Interleaved Declustering", RF_NK2(rf_MakeLayoutSpecificNULL, NULL) RF_NU( rf_ConfigureInterDecluster, - rf_MapSectorInterDecluster, rf_MapParityInterDecluster, NULL, + rf_MapSectorInterDecluster, rf_MapParityInterDecluster, + NULL, rf_IdentifyStripeInterDecluster, rf_RAIDIDagSelect, rf_MapSIDToPSIDInterDecluster, @@ -228,10 +239,10 @@ static RF_LayoutSW_t mapsw[] = { DefaultStates, RF_DISTRIBUTE_SPARE) }, -#endif /* RF_INCLUDE_INTERDECLUSTER > 0 */ +#endif /* RF_INCLUDE_INTERDECLUSTER > 0 */ -#if RF_INCLUDE_RAID0 > 0 - /* RAID level 0 */ +#if RF_INCLUDE_RAID0 > 0 + /* RAID level 0. */ {'0', "RAID Level 0", RF_NK2(rf_MakeLayoutSpecificNULL, NULL) RF_NU( @@ -249,10 +260,10 @@ static RF_LayoutSW_t mapsw[] = { DefaultStates, 0) }, -#endif /* RF_INCLUDE_RAID0 > 0 */ +#endif /* RF_INCLUDE_RAID0 > 0 */ -#if RF_INCLUDE_RAID1 > 0 - /* RAID level 1 */ +#if RF_INCLUDE_RAID1 > 0 + /* RAID level 1. */ {'1', "RAID Level 1", RF_NK2(rf_MakeLayoutSpecificNULL, NULL) RF_NU( @@ -270,10 +281,10 @@ static RF_LayoutSW_t mapsw[] = { DefaultStates, 0) }, -#endif /* RF_INCLUDE_RAID1 > 0 */ +#endif /* RF_INCLUDE_RAID1 > 0 */ -#if RF_INCLUDE_RAID4 > 0 - /* RAID level 4 */ +#if RF_INCLUDE_RAID4 > 0 + /* RAID level 4. */ {'4', "RAID Level 4", RF_NK2(rf_MakeLayoutSpecificNULL, NULL) RF_NU( @@ -291,10 +302,10 @@ static RF_LayoutSW_t mapsw[] = { DefaultStates, 0) }, -#endif /* RF_INCLUDE_RAID4 > 0 */ +#endif /* RF_INCLUDE_RAID4 > 0 */ -#if RF_INCLUDE_RAID5 > 0 - /* RAID level 5 */ +#if RF_INCLUDE_RAID5 > 0 + /* RAID level 5. */ {'5', "RAID Level 5", RF_NK2(rf_MakeLayoutSpecificNULL, NULL) RF_NU( @@ -312,10 +323,10 @@ static RF_LayoutSW_t mapsw[] = { DefaultStates, 0) }, -#endif /* RF_INCLUDE_RAID5 > 0 */ +#endif /* RF_INCLUDE_RAID5 > 0 */ -#if RF_INCLUDE_EVENODD > 0 - /* Evenodd */ +#if RF_INCLUDE_EVENODD > 0 + /* Evenodd. */ {'E', "EvenOdd", RF_NK2(rf_MakeLayoutSpecificNULL, NULL) RF_NU( @@ -327,37 +338,38 @@ static RF_LayoutSW_t mapsw[] = { NULL, NULL, NULL, NULL, - NULL, /* no reconstruction, yet */ + NULL, /* No reconstruction, yet. */ rf_VerifyParityEvenOdd, 2, DefaultStates, 0) }, -#endif /* RF_INCLUDE_EVENODD > 0 */ +#endif /* RF_INCLUDE_EVENODD > 0 */ -#if RF_INCLUDE_EVENODD > 0 - /* Declustered Evenodd */ +#if RF_INCLUDE_EVENODD > 0 + /* Declustered Evenodd. */ {'e', "Declustered EvenOdd", RF_NK2(rf_MakeLayoutSpecificDeclustered, &distSpareNo) RF_NU( rf_ConfigureDeclusteredPQ, - rf_MapSectorDeclusteredPQ, rf_MapParityDeclusteredPQ, rf_MapQDeclusteredPQ, + rf_MapSectorDeclusteredPQ, rf_MapParityDeclusteredPQ, + rf_MapQDeclusteredPQ, rf_IdentifyStripeDeclusteredPQ, rf_EODagSelect, rf_MapSIDToPSIDRAID5, rf_GetDefaultHeadSepLimitDeclustered, rf_GetDefaultNumFloatingReconBuffersPQ, NULL, NULL, - NULL, /* no reconstruction, yet */ + NULL, /* No reconstruction, yet. */ rf_VerifyParityEvenOdd, 2, DefaultStates, 0) }, -#endif /* RF_INCLUDE_EVENODD > 0 */ +#endif /* RF_INCLUDE_EVENODD > 0 */ -#if RF_INCLUDE_PARITYLOGGING > 0 - /* parity logging */ +#if RF_INCLUDE_PARITYLOGGING > 0 + /* Parity logging. */ {'L', "Parity logging", RF_NK2(rf_MakeLayoutSpecificNULL, NULL) RF_NU( @@ -375,9 +387,9 @@ static RF_LayoutSW_t mapsw[] = { DefaultStates, 0) }, -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ +#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ - /* end-of-list marker */ + /* End-of-list marker. */ {'\0', NULL, RF_NK2(NULL, NULL) RF_NU( @@ -402,7 +414,7 @@ rf_GetLayout(RF_ParityConfig_t parityConfig) { RF_LayoutSW_t *p; - /* look up the specific layout */ + /* Look up the specific layout. */ for (p = &mapsw[0]; p->parityConfig; p++) if (p->parityConfig == parityConfig) break; @@ -411,26 +423,25 @@ rf_GetLayout(RF_ParityConfig_t parityConfig) RF_ASSERT(p->parityConfig == parityConfig); return (p); } -#if RF_UTILITY == 0 -/***************************************************************************************** + +#if RF_UTILITY == 0 +/***************************************************************************** * - * ConfigureLayout -- + * ConfigureLayout * - * read the configuration file and set up the RAID layout parameters. After reading - * common params, invokes the layout-specific configuration routine to finish - * the configuration. + * Read the configuration file and set up the RAID layout parameters. + * After reading common params, invokes the layout-specific configuration + * routine to finish the configuration. * - ****************************************************************************************/ -int -rf_ConfigureLayout( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) + *****************************************************************************/ +int +rf_ConfigureLayout(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, + RF_Config_t *cfgPtr) { RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); RF_ParityConfig_t parityConfig; RF_LayoutSW_t *p; - int retval; + int retval; layoutPtr->sectorsPerStripeUnit = cfgPtr->sectPerSU; layoutPtr->SUsPerPU = cfgPtr->SUsPerPU; @@ -438,13 +449,13 @@ rf_ConfigureLayout( parityConfig = cfgPtr->parityConfig; if (layoutPtr->sectorsPerStripeUnit <= 0) { - RF_ERRORMSG2("raid%d: Invalid sectorsPerStripeUnit: %d\n", - raidPtr->raidid, - (int)layoutPtr->sectorsPerStripeUnit ); - return (EINVAL); + RF_ERRORMSG2("raid%d: Invalid sectorsPerStripeUnit: %d.\n", + raidPtr->raidid, (int)layoutPtr->sectorsPerStripeUnit); + return (EINVAL); } - layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit; + layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / + layoutPtr->sectorsPerStripeUnit; p = rf_GetLayout(parityConfig); if (p == NULL) { @@ -454,20 +465,23 @@ rf_ConfigureLayout( RF_ASSERT(p->parityConfig == parityConfig); layoutPtr->map = p; - /* initialize the specific layout */ + /* Initialize the specific layout. */ retval = (p->Configure) (listp, raidPtr, cfgPtr); if (retval) return (retval); - layoutPtr->dataBytesPerStripe = layoutPtr->dataSectorsPerStripe << raidPtr->logBytesPerSector; - raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; + layoutPtr->dataBytesPerStripe = layoutPtr->dataSectorsPerStripe << + raidPtr->logBytesPerSector; + raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * + layoutPtr->sectorsPerStripeUnit; if (rf_forceNumFloatingReconBufs >= 0) { raidPtr->numFloatingReconBufs = rf_forceNumFloatingReconBufs; } else { - raidPtr->numFloatingReconBufs = rf_GetDefaultNumFloatingReconBuffers(raidPtr); + raidPtr->numFloatingReconBufs = + rf_GetDefaultNumFloatingReconBuffers(raidPtr); } if (rf_forceHeadSepLimit >= 0) { @@ -479,12 +493,12 @@ rf_ConfigureLayout( #ifdef RAIDDEBUG if (raidPtr->headSepLimit >= 0) { printf("RAIDFRAME(%s): Using %ld floating recon bufs" - " with head sep limit %ld\n", layoutPtr->map->configName, + " with head sep limit %ld.\n", layoutPtr->map->configName, (long) raidPtr->numFloatingReconBufs, (long) raidPtr->headSepLimit); } else { printf("RAIDFRAME(%s): Using %ld floating recon bufs" - " with no head sep limit\n", layoutPtr->map->configName, + " with no head sep limit.\n", layoutPtr->map->configName, (long) raidPtr->numFloatingReconBufs); } #endif /* RAIDDEBUG */ @@ -492,28 +506,28 @@ rf_ConfigureLayout( return (0); } -/* typically there is a 1-1 mapping between stripes and parity stripes. - * however, the declustering code supports packing multiple stripes into +/* + * Typically there is a 1-1 mapping between stripes and parity stripes. + * However, the declustering code supports packing multiple stripes into * a single parity stripe, so as to increase the size of the reconstruction - * unit without affecting the size of the stripe unit. This routine finds - * the parity stripe identifier associated with a stripe ID. There is also + * unit without affecting the size of the stripe unit. This routine finds + * the parity stripe identifier associated with a stripe ID. There is also * a RaidAddressToParityStripeID macro in layout.h */ -RF_StripeNum_t -rf_MapStripeIDToParityStripeID(layoutPtr, stripeID, which_ru) - RF_RaidLayout_t *layoutPtr; - RF_StripeNum_t stripeID; - RF_ReconUnitNum_t *which_ru; +RF_StripeNum_t +rf_MapStripeIDToParityStripeID(RF_RaidLayout_t *layoutPtr, + RF_StripeNum_t stripeID, RF_ReconUnitNum_t *which_ru) { RF_StripeNum_t parityStripeID; - /* quick exit in the common case of SUsPerPU==1 */ + /* Quick exit in the common case of SUsPerPU == 1. */ if ((layoutPtr->SUsPerPU == 1) || !layoutPtr->map->MapSIDToPSID) { *which_ru = 0; return (stripeID); } else { - (layoutPtr->map->MapSIDToPSID) (layoutPtr, stripeID, &parityStripeID, which_ru); + (layoutPtr->map->MapSIDToPSID) (layoutPtr, stripeID, + &parityStripeID, which_ru); } return (parityStripeID); } -#endif /* RF_UTILITY == 0 */ +#endif /* RF_UTILITY == 0 */ diff --git a/sys/dev/raidframe/rf_layout.h b/sys/dev/raidframe/rf_layout.h index a368fc8663a..beb62689a96 100644 --- a/sys/dev/raidframe/rf_layout.h +++ b/sys/dev/raidframe/rf_layout.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_layout.h,v 1.4 2000/08/08 16:07:42 peter Exp $ */ +/* $OpenBSD: rf_layout.h,v 1.5 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_layout.h,v 1.4 2000/05/23 00:44:38 thorpej Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,324 +28,465 @@ * rights to redistribute these changes. */ -/* rf_layout.h -- header file defining layout data structures +/* + * rf_layout.h -- Header file defining layout data structures. */ -#ifndef _RF__RF_LAYOUT_H_ -#define _RF__RF_LAYOUT_H_ +#ifndef _RF__RF_LAYOUT_H_ +#define _RF__RF_LAYOUT_H_ #include "rf_types.h" #include "rf_archs.h" #include "rf_alloclist.h" -#ifndef _KERNEL +#ifndef _KERNEL #include <stdio.h> #endif -/***************************************************************************************** +/***************************************************************************** * * This structure identifies all layout-specific operations and parameters. * - ****************************************************************************************/ + *****************************************************************************/ typedef struct RF_LayoutSW_s { - RF_ParityConfig_t parityConfig; - const char *configName; - -#ifndef _KERNEL - /* layout-specific parsing */ - int (*MakeLayoutSpecific) (FILE * fp, RF_Config_t * cfgPtr, void *arg); - void *makeLayoutSpecificArg; -#endif /* !_KERNEL */ - -#if RF_UTILITY == 0 - /* initialization routine */ - int (*Configure) (RF_ShutdownList_t ** shutdownListp, RF_Raid_t * raidPtr, RF_Config_t * cfgPtr); - - /* routine to map RAID sector address -> physical (row, col, offset) */ - void (*MapSector) (RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); - - /* routine to map RAID sector address -> physical (r,c,o) of parity - * unit */ - void (*MapParity) (RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); - - /* routine to map RAID sector address -> physical (r,c,o) of Q unit */ - void (*MapQ) (RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, RF_RowCol_t * row, - RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); - - /* routine to identify the disks comprising a stripe */ - void (*IdentifyStripe) (RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); - - /* routine to select a dag */ - void (*SelectionFunc) (RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, - RF_VoidFuncPtr *); -#if 0 - void (**createFunc) (RF_Raid_t *, - RF_AccessStripeMap_t *, - RF_DagHeader_t *, void *, - RF_RaidAccessFlags_t, -/**INDENT** Warning@88: Extra ) */ - RF_AllocListElem_t *)); + RF_ParityConfig_t parityConfig; + const char *configName; + +#ifndef _KERNEL + /* Layout-specific parsing. */ + int (*MakeLayoutSpecific) + (FILE *, RF_Config_t *, void *); + void *makeLayoutSpecificArg; +#endif /* !_KERNEL */ + +#if RF_UTILITY == 0 + /* Initialization routine. */ + int (*Configure) + (RF_ShutdownList_t **, RF_Raid_t *, + RF_Config_t *); + + /* Routine to map RAID sector address -> physical (row, col, offset). */ + void (*MapSector) + (RF_Raid_t *, RF_RaidAddr_t, RF_RowCol_t *, + RF_RowCol_t *, RF_SectorNum_t *, int); + /* + * Routine to map RAID sector address -> physical (r,c,o) of parity + * unit. + */ + void (*MapParity) + (RF_Raid_t *, RF_RaidAddr_t, RF_RowCol_t *, + RF_RowCol_t *, RF_SectorNum_t *, int); + + /* Routine to map RAID sector address -> physical (r,c,o) of Q unit. */ + void (*MapQ) + (RF_Raid_t *, RF_RaidAddr_t, RF_RowCol_t *, + RF_RowCol_t *, RF_SectorNum_t *, int); + + /* Routine to identify the disks comprising a stripe. */ + void (*IdentifyStripe) + (RF_Raid_t *, RF_RaidAddr_t, RF_RowCol_t **, + RF_RowCol_t *); + + /* Routine to select a dag. */ + void (*SelectionFunc) + (RF_Raid_t *, RF_IoType_t, + RF_AccessStripeMap_t *, RF_VoidFuncPtr *); +#if 0 + void (**createFunc) + (RF_Raid_t *, RF_AccessStripeMap_t *, + RF_DagHeader_t *, void *, + RF_RaidAccessFlags_t, RF_AllocListElem_t *); #endif - /* map a stripe ID to a parity stripe ID. This is typically the - * identity mapping */ - void (*MapSIDToPSID) (RF_RaidLayout_t * layoutPtr, RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, RF_ReconUnitNum_t * which_ru); + /* + * Map a stripe ID to a parity stripe ID. This is typically the + * identity mapping. + */ + void (*MapSIDToPSID) + (RF_RaidLayout_t *, RF_StripeNum_t, + RF_StripeNum_t *, RF_ReconUnitNum_t *); - /* get default head separation limit (may be NULL) */ - RF_HeadSepLimit_t(*GetDefaultHeadSepLimit) (RF_Raid_t * raidPtr); + /* Get default head separation limit (may be NULL). */ + RF_HeadSepLimit_t (*GetDefaultHeadSepLimit) (RF_Raid_t *); - /* get default num recon buffers (may be NULL) */ - int (*GetDefaultNumFloatingReconBuffers) (RF_Raid_t * raidPtr); + /* Get default num recon buffers (may be NULL). */ + int (*GetDefaultNumFloatingReconBuffers) + (RF_Raid_t *); - /* get number of spare recon units (may be NULL) */ - RF_ReconUnitCount_t(*GetNumSpareRUs) (RF_Raid_t * raidPtr); + /* Get number of spare recon units (may be NULL). */ + RF_ReconUnitCount_t (*GetNumSpareRUs) (RF_Raid_t *); - /* spare table installation (may be NULL) */ - int (*InstallSpareTable) (RF_Raid_t * raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol); + /* Spare table installation (may be NULL). */ + int (*InstallSpareTable) + (RF_Raid_t *, RF_RowCol_t, RF_RowCol_t); - /* recon buffer submission function */ - int (*SubmitReconBuffer) (RF_ReconBuffer_t * rbuf, int keep_it, - int use_committed); + /* Recon buffer submission function. */ + int (*SubmitReconBuffer) + (RF_ReconBuffer_t *, int, int); /* - * verify that parity information for a stripe is correct - * see rf_parityscan.h for return vals - */ - int (*VerifyParity) (RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t * parityPDA, int correct_it, RF_RaidAccessFlags_t flags); + * Verify that parity information for a stripe is correct. + * See rf_parityscan.h for return vals. + */ + int (*VerifyParity) + (RF_Raid_t *, RF_RaidAddr_t, + RF_PhysDiskAddr_t *, int, + RF_RaidAccessFlags_t); - /* number of faults tolerated by this mapping */ - int faultsTolerated; + /* Number of faults tolerated by this mapping. */ + int faultsTolerated; - /* states to step through in an access. Must end with "LastState". The - * default is DefaultStates in rf_layout.c */ - RF_AccessState_t *states; + /* + * States to step through in an access. Must end with "LastState". The + * default is DefaultStates in rf_layout.c . + */ + RF_AccessState_t *states; RF_AccessStripeMapFlags_t flags; -#endif /* RF_UTILITY == 0 */ -} RF_LayoutSW_t; -/* enables remapping to spare location under dist sparing */ -#define RF_REMAP 1 -#define RF_DONT_REMAP 0 +#endif /* RF_UTILITY == 0 */ +} RF_LayoutSW_t; + +/* Enables remapping to spare location under dist sparing. */ +#define RF_REMAP 1 +#define RF_DONT_REMAP 0 /* - * Flags values for RF_AccessStripeMapFlags_t + * Flags values for RF_AccessStripeMapFlags_t. */ -#define RF_NO_STRIPE_LOCKS 0x0001 /* suppress stripe locks */ -#define RF_DISTRIBUTE_SPARE 0x0002 /* distribute spare space in archs - * that support it */ -#define RF_BD_DECLUSTERED 0x0004 /* declustering uses block designs */ +#define RF_NO_STRIPE_LOCKS 0x0001 /* Suppress stripe locks. */ +#define RF_DISTRIBUTE_SPARE 0x0002 /* + * Distribute spare space in + * archs that support it. + */ +#define RF_BD_DECLUSTERED 0x0004 /* + * Declustering uses block + * designs. + */ /************************************************************************* * - * this structure forms the layout component of the main Raid - * structure. It describes everything needed to define and perform + * This structure forms the layout component of the main Raid + * structure. It describes everything needed to define and perform * the mapping of logical RAID addresses <-> physical disk addresses. * *************************************************************************/ struct RF_RaidLayout_s { - /* configuration parameters */ - RF_SectorCount_t sectorsPerStripeUnit; /* number of sectors in one - * stripe unit */ - RF_StripeCount_t SUsPerPU; /* stripe units per parity unit */ - RF_StripeCount_t SUsPerRU; /* stripe units per reconstruction - * unit */ - - /* redundant-but-useful info computed from the above, used in all - * layouts */ - RF_StripeCount_t numStripe; /* total number of stripes in the - * array */ - RF_SectorCount_t dataSectorsPerStripe; - RF_StripeCount_t dataStripeUnitsPerDisk; - u_int bytesPerStripeUnit; - u_int dataBytesPerStripe; - RF_StripeCount_t numDataCol; /* number of SUs of data per stripe - * (name here is a la RAID4) */ - RF_StripeCount_t numParityCol; /* number of SUs of parity per stripe. - * Always 1 for now */ - RF_StripeCount_t numParityLogCol; /* number of SUs of parity log - * per stripe. Always 1 for - * now */ - RF_StripeCount_t stripeUnitsPerDisk; - - RF_LayoutSW_t *map; /* ptr to struct holding mapping fns and - * information */ - void *layoutSpecificInfo; /* ptr to a structure holding - * layout-specific params */ + /* Configuration parameters. */ + RF_SectorCount_t sectorsPerStripeUnit; + /* + * Number of sectors in one + * stripe unit. + */ + RF_StripeCount_t SUsPerPU; /* + * Stripe units per parity unit. + */ + RF_StripeCount_t SUsPerRU; /* + * Stripe units per + * reconstruction unit. + */ + + /* + * Redundant-but-useful info computed from the above, used in all + * layouts. + */ + RF_StripeCount_t numStripe; /* + * Total number of stripes + * in the array. + */ + RF_SectorCount_t dataSectorsPerStripe; + RF_StripeCount_t dataStripeUnitsPerDisk; + u_int bytesPerStripeUnit; + u_int dataBytesPerStripe; + RF_StripeCount_t numDataCol; /* + * Number of SUs of data per + * stripe. + * (name here is a la RAID4) + */ + RF_StripeCount_t numParityCol; /* + * Number of SUs of parity + * per stripe. + * Always 1 for now. + */ + RF_StripeCount_t numParityLogCol; + /* + * Number of SUs of parity log + * per stripe. + * Always 1 for now. + */ + RF_StripeCount_t stripeUnitsPerDisk; + + RF_LayoutSW_t *map; /* + * Pointer to struct holding + * mapping fns and information. + */ + void *layoutSpecificInfo; + /* Pointer to a struct holding + * layout-specific params. + */ }; -/***************************************************************************************** + +/***************************************************************************** * - * The mapping code returns a pointer to a list of AccessStripeMap structures, which - * describes all the mapping information about an access. The list contains one - * AccessStripeMap structure per stripe touched by the access. Each element in the list - * contains a stripe identifier and a pointer to a list of PhysDiskAddr structuress. Each - * element in this latter list describes the physical location of a stripe unit accessed + * The mapping code returns a pointer to a list of AccessStripeMap + * structures, which describes all the mapping information about an access. + * The list contains one AccessStripeMap structure per stripe touched by + * the access. Each element in the list contains a stripe identifier and + * a pointer to a list of PhysDiskAddr structuress. Each element in this + * latter list describes the physical location of a stripe unit accessed * within the corresponding stripe. * - ****************************************************************************************/ + *****************************************************************************/ -#define RF_PDA_TYPE_DATA 0 -#define RF_PDA_TYPE_PARITY 1 -#define RF_PDA_TYPE_Q 2 +#define RF_PDA_TYPE_DATA 0 +#define RF_PDA_TYPE_PARITY 1 +#define RF_PDA_TYPE_Q 2 struct RF_PhysDiskAddr_s { - RF_RowCol_t row, col; /* disk identifier */ - RF_SectorNum_t startSector; /* sector offset into the disk */ - RF_SectorCount_t numSector; /* number of sectors accessed */ - int type; /* used by higher levels: currently, data, - * parity, or q */ - caddr_t bufPtr; /* pointer to buffer supplying/receiving data */ - RF_RaidAddr_t raidAddress; /* raid address corresponding to this - * physical disk address */ - RF_PhysDiskAddr_t *next; + RF_RowCol_t row, col; /* Disk identifier. */ + RF_SectorNum_t startSector; /* + * Sector offset into the disk. + */ + RF_SectorCount_t numSector; /* + * Number of sectors accessed. + */ + int type; /* + * Used by higher levels: + * currently data, parity, + * or q. + */ + caddr_t bufPtr; /* + * Pointer to buffer + * supplying/receiving data. + */ + RF_RaidAddr_t raidAddress; /* + * Raid address corresponding + * to this physical disk + * address. + */ + RF_PhysDiskAddr_t *next; }; -#define RF_MAX_FAILED_PDA RF_MAXCOL +#define RF_MAX_FAILED_PDA RF_MAXCOL struct RF_AccessStripeMap_s { - RF_StripeNum_t stripeID;/* the stripe index */ - RF_RaidAddr_t raidAddress; /* the starting raid address within - * this stripe */ - RF_RaidAddr_t endRaidAddress; /* raid address one sector past the - * end of the access */ - RF_SectorCount_t totalSectorsAccessed; /* total num sectors - * identified in physInfo list */ - RF_StripeCount_t numStripeUnitsAccessed; /* total num elements in - * physInfo list */ - int numDataFailed; /* number of failed data disks accessed */ - int numParityFailed;/* number of failed parity disks accessed (0 - * or 1) */ - int numQFailed; /* number of failed Q units accessed (0 or 1) */ - RF_AccessStripeMapFlags_t flags; /* various flags */ + RF_StripeNum_t stripeID; /* The stripe index. */ + RF_RaidAddr_t raidAddress; /* + * The starting raid address + * within this stripe. + */ + RF_RaidAddr_t endRaidAddress;/* + * Raid address one sector past + * the end of the access. + */ + RF_SectorCount_t totalSectorsAccessed; + /* + * Total num sectors + * identified in physInfo list. + */ + RF_StripeCount_t numStripeUnitsAccessed; + /* + * Total num elements in + * physInfo list. + */ + int numDataFailed; /* + * Number of failed data disks + * accessed. + */ + int numParityFailed; + /* + * Number of failed parity + * disks accessed (0 or 1). + */ + int numQFailed; /* + * Number of failed Q units + * accessed (0 or 1). + */ + RF_AccessStripeMapFlags_t flags; /* Various flags. */ #if 0 - RF_PhysDiskAddr_t *failedPDA; /* points to the PDA that has failed */ - RF_PhysDiskAddr_t *failedPDAtwo; /* points to the second PDA - * that has failed, if any */ + RF_PhysDiskAddr_t *failedPDA; /* + * Points to the PDA that + * has failed. + */ + RF_PhysDiskAddr_t *failedPDAtwo; /* + * Points to the second PDA + * that has failed, if any. + */ #else - int numFailedPDAs; /* number of failed phys addrs */ - RF_PhysDiskAddr_t *failedPDAs[RF_MAX_FAILED_PDA]; /* array of failed phys - * addrs */ + int numFailedPDAs; /* + * Number of failed phys addrs. + */ + RF_PhysDiskAddr_t *failedPDAs[RF_MAX_FAILED_PDA]; + /* + * Array of failed phys addrs. + */ #endif - RF_PhysDiskAddr_t *physInfo; /* a list of PhysDiskAddr structs */ - RF_PhysDiskAddr_t *parityInfo; /* list of physical addrs for the - * parity (P of P + Q ) */ - RF_PhysDiskAddr_t *qInfo; /* list of physical addrs for the Q of - * P + Q */ - RF_LockReqDesc_t lockReqDesc; /* used for stripe locking */ - RF_RowCol_t origRow; /* the original row: we may redirect the acc - * to a different row */ - RF_AccessStripeMap_t *next; + RF_PhysDiskAddr_t *physInfo; /* + * A list of PhysDiskAddr + * structs. + */ + RF_PhysDiskAddr_t *parityInfo; /* + * List of physical addrs for + * the parity (P of P + Q). + */ + RF_PhysDiskAddr_t *qInfo; /* + * List of physical addrs for + * the Q of P + Q. + */ + RF_LockReqDesc_t lockReqDesc; /* Used for stripe locking. */ + RF_RowCol_t origRow; /* + * The original row: we may + * redirect the acc to a + * different row. + */ + RF_AccessStripeMap_t *next; }; -/* flag values */ -#define RF_ASM_REDIR_LARGE_WRITE 0x00000001 /* allows large-write creation - * code to redirect failed - * accs */ -#define RF_ASM_BAILOUT_DAG_USED 0x00000002 /* allows us to detect - * recursive calls to the - * bailout write dag */ -#define RF_ASM_FLAGS_LOCK_TRIED 0x00000004 /* we've acquired the lock on - * the first parity range in - * this parity stripe */ -#define RF_ASM_FLAGS_LOCK_TRIED2 0x00000008 /* we've acquired the lock on - * the 2nd parity range in - * this parity stripe */ -#define RF_ASM_FLAGS_FORCE_TRIED 0x00000010 /* we've done the force-recon - * call on this parity stripe */ -#define RF_ASM_FLAGS_RECON_BLOCKED 0x00000020 /* we blocked recon => we must - * unblock it later */ +/* Flag values. */ +#define RF_ASM_REDIR_LARGE_WRITE 0x00000001 /* + * Allows large-write + * creation code to + * redirect failed + * accs. + */ +#define RF_ASM_BAILOUT_DAG_USED 0x00000002 /* + * Allows us to detect + * recursive calls to + * the bailout write + * dag. + */ +#define RF_ASM_FLAGS_LOCK_TRIED 0x00000004 /* + * We've acquired the + * lock on the first + * parity range in + * this parity stripe. + */ +#define RF_ASM_FLAGS_LOCK_TRIED2 0x00000008 /* + * we've acquired the + * lock on the 2nd + * parity range in this + * parity stripe. + */ +#define RF_ASM_FLAGS_FORCE_TRIED 0x00000010 /* + * We've done the + * force-recon call on + * this parity stripe. + */ +#define RF_ASM_FLAGS_RECON_BLOCKED 0x00000020 /* + * We blocked recon + * => we must unblock + * it later. + */ struct RF_AccessStripeMapHeader_s { - RF_StripeCount_t numStripes; /* total number of stripes touched by - * this acc */ - RF_AccessStripeMap_t *stripeMap; /* pointer to the actual map. - * Also used for making lists */ + RF_StripeCount_t numStripes; /* + * Total number of stripes + * touched by this access. + */ + RF_AccessStripeMap_t *stripeMap; /* + * Pointer to the actual map. + * Also used for making lists. + */ RF_AccessStripeMapHeader_t *next; }; -/***************************************************************************************** + + +/***************************************************************************** * - * various routines mapping addresses in the RAID address space. These work across - * all layouts. DON'T PUT ANY LAYOUT-SPECIFIC CODE HERE. + * Various routines mapping addresses in the RAID address space. These work + * across all layouts. DON'T PUT ANY LAYOUT-SPECIFIC CODE HERE. * - ****************************************************************************************/ + *****************************************************************************/ -/* return the identifier of the stripe containing the given address */ -#define rf_RaidAddressToStripeID(_layoutPtr_, _addr_) \ - ( ((_addr_) / (_layoutPtr_)->sectorsPerStripeUnit) / (_layoutPtr_)->numDataCol ) +/* Return the identifier of the stripe containing the given address. */ +#define rf_RaidAddressToStripeID(_layoutPtr_,_addr_) \ + (((_addr_) / (_layoutPtr_)->sectorsPerStripeUnit) / \ + (_layoutPtr_)->numDataCol) -/* return the raid address of the start of the indicates stripe ID */ -#define rf_StripeIDToRaidAddress(_layoutPtr_, _sid_) \ - ( ((_sid_) * (_layoutPtr_)->sectorsPerStripeUnit) * (_layoutPtr_)->numDataCol ) +/* Return the raid address of the start of the indicates stripe ID. */ +#define rf_StripeIDToRaidAddress(_layoutPtr_,_sid_) \ + (((_sid_) * (_layoutPtr_)->sectorsPerStripeUnit) * \ + (_layoutPtr_)->numDataCol) -/* return the identifier of the stripe containing the given stripe unit id */ -#define rf_StripeUnitIDToStripeID(_layoutPtr_, _addr_) \ - ( (_addr_) / (_layoutPtr_)->numDataCol ) +/* Return the identifier of the stripe containing the given stripe unit ID. */ +#define rf_StripeUnitIDToStripeID(_layoutPtr_,_addr_) \ + ((_addr_) / (_layoutPtr_)->numDataCol) -/* return the identifier of the stripe unit containing the given address */ -#define rf_RaidAddressToStripeUnitID(_layoutPtr_, _addr_) \ - ( ((_addr_) / (_layoutPtr_)->sectorsPerStripeUnit) ) +/* Return the identifier of the stripe unit containing the given address. */ +#define rf_RaidAddressToStripeUnitID(_layoutPtr_,_addr_) \ + (((_addr_) / (_layoutPtr_)->sectorsPerStripeUnit)) -/* return the RAID address of next stripe boundary beyond the given address */ -#define rf_RaidAddressOfNextStripeBoundary(_layoutPtr_, _addr_) \ - ( (((_addr_)/(_layoutPtr_)->dataSectorsPerStripe)+1) * (_layoutPtr_)->dataSectorsPerStripe ) +/* Return the RAID address of next stripe boundary beyond the given address. */ +#define rf_RaidAddressOfNextStripeBoundary(_layoutPtr_,_addr_) \ + ((((_addr_) / (_layoutPtr_)->dataSectorsPerStripe) + 1) * \ + (_layoutPtr_)->dataSectorsPerStripe) -/* return the RAID address of the start of the stripe containing the given address */ -#define rf_RaidAddressOfPrevStripeBoundary(_layoutPtr_, _addr_) \ - ( (((_addr_)/(_layoutPtr_)->dataSectorsPerStripe)+0) * (_layoutPtr_)->dataSectorsPerStripe ) +/* + * Return the RAID address of the start of the stripe containing the + * given address. + */ +#define rf_RaidAddressOfPrevStripeBoundary(_layoutPtr_,_addr_) \ + ((((_addr_) / (_layoutPtr_)->dataSectorsPerStripe) + 0) * \ + (_layoutPtr_)->dataSectorsPerStripe) -/* return the RAID address of next stripe unit boundary beyond the given address */ -#define rf_RaidAddressOfNextStripeUnitBoundary(_layoutPtr_, _addr_) \ - ( (((_addr_)/(_layoutPtr_)->sectorsPerStripeUnit)+1L)*(_layoutPtr_)->sectorsPerStripeUnit ) +/* + * Return the RAID address of next stripe unit boundary beyond the + * given address. + */ +#define rf_RaidAddressOfNextStripeUnitBoundary(_layoutPtr_,_addr_) \ + ((((_addr_) / (_layoutPtr_)->sectorsPerStripeUnit) + 1L) * \ + (_layoutPtr_)->sectorsPerStripeUnit) -/* return the RAID address of the start of the stripe unit containing RAID address _addr_ */ -#define rf_RaidAddressOfPrevStripeUnitBoundary(_layoutPtr_, _addr_) \ - ( (((_addr_)/(_layoutPtr_)->sectorsPerStripeUnit)+0)*(_layoutPtr_)->sectorsPerStripeUnit ) +/* + * Return the RAID address of the start of the stripe unit containing + * RAID address _addr_. + */ +#define rf_RaidAddressOfPrevStripeUnitBoundary(_layoutPtr_,_addr_) \ + ((((_addr_) / (_layoutPtr_)->sectorsPerStripeUnit) + 0) * \ + (_layoutPtr_)->sectorsPerStripeUnit) -/* returns the offset into the stripe. used by RaidAddressStripeAligned */ -#define rf_RaidAddressStripeOffset(_layoutPtr_, _addr_) \ - ( (_addr_) % ((_layoutPtr_)->dataSectorsPerStripe) ) +/* Returns the offset into the stripe. Used by RaidAddressStripeAligned. */ +#define rf_RaidAddressStripeOffset(_layoutPtr_,_addr_) \ + ((_addr_) % (_layoutPtr_)->dataSectorsPerStripe) -/* returns the offset into the stripe unit. */ -#define rf_StripeUnitOffset(_layoutPtr_, _addr_) \ - ( (_addr_) % ((_layoutPtr_)->sectorsPerStripeUnit) ) +/* Returns the offset into the stripe unit. */ +#define rf_StripeUnitOffset(_layoutPtr_,_addr_) \ + ((_addr_) % (_layoutPtr_)->sectorsPerStripeUnit) -/* returns nonzero if the given RAID address is stripe-aligned */ -#define rf_RaidAddressStripeAligned( __layoutPtr__, __addr__ ) \ - ( rf_RaidAddressStripeOffset(__layoutPtr__, __addr__) == 0 ) +/* Returns nonzero if the given RAID address is stripe-aligned. */ +#define rf_RaidAddressStripeAligned(__layoutPtr__,__addr__) \ + (rf_RaidAddressStripeOffset(__layoutPtr__, __addr__) == 0) -/* returns nonzero if the given address is stripe-unit aligned */ -#define rf_StripeUnitAligned( __layoutPtr__, __addr__ ) \ - ( rf_StripeUnitOffset(__layoutPtr__, __addr__) == 0 ) +/* Returns nonzero if the given address is stripe-unit aligned. */ +#define rf_StripeUnitAligned(__layoutPtr__,__addr__) \ + (rf_StripeUnitOffset(__layoutPtr__, __addr__) == 0) -/* convert an address expressed in RAID blocks to/from an addr expressed in bytes */ -#define rf_RaidAddressToByte(_raidPtr_, _addr_) \ - ( (_addr_) << ( (_raidPtr_)->logBytesPerSector ) ) +/* + * Convert an address expressed in RAID blocks to/from an addr expressed + * in bytes. + */ +#define rf_RaidAddressToByte(_raidPtr_,_addr_) \ + ((_addr_) << (_raidPtr_)->logBytesPerSector) -#define rf_ByteToRaidAddress(_raidPtr_, _addr_) \ - ( (_addr_) >> ( (_raidPtr_)->logBytesPerSector ) ) +#define rf_ByteToRaidAddress(_raidPtr_,_addr_) \ + ((_addr_) >> (_raidPtr_)->logBytesPerSector) -/* convert a raid address to/from a parity stripe ID. Conversion to raid address is easy, - * since we're asking for the address of the first sector in the parity stripe. Conversion to a - * parity stripe ID is more complex, since stripes are not contiguously allocated in - * parity stripes. +/* + * Convert a raid address to/from a parity stripe ID. Conversion to raid + * address is easy, since we're asking for the address of the first sector + * in the parity stripe. Conversion to a parity stripe ID is more complex, + * since stripes are not contiguously allocated in parity stripes. */ -#define rf_RaidAddressToParityStripeID(_layoutPtr_, _addr_, _ru_num_) \ - rf_MapStripeIDToParityStripeID( (_layoutPtr_), rf_RaidAddressToStripeID( (_layoutPtr_), (_addr_) ), (_ru_num_) ) +#define rf_RaidAddressToParityStripeID(_layoutPtr_,_addr_,_ru_num_) \ + rf_MapStripeIDToParityStripeID((_layoutPtr_), \ + rf_RaidAddressToStripeID((_layoutPtr_), (_addr_)), (_ru_num_)) -#define rf_ParityStripeIDToRaidAddress(_layoutPtr_, _psid_) \ - ( (_psid_) * (_layoutPtr_)->SUsPerPU * (_layoutPtr_)->numDataCol * (_layoutPtr_)->sectorsPerStripeUnit ) +#define rf_ParityStripeIDToRaidAddress(_layoutPtr_,_psid_) \ + ((_psid_) * (_layoutPtr_)->SUsPerPU * \ + (_layoutPtr_)->numDataCol * (_layoutPtr_)->sectorsPerStripeUnit) -RF_LayoutSW_t *rf_GetLayout(RF_ParityConfig_t parityConfig); -int -rf_ConfigureLayout(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -RF_StripeNum_t -rf_MapStripeIDToParityStripeID(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_ReconUnitNum_t * which_ru); +RF_LayoutSW_t *rf_GetLayout(RF_ParityConfig_t); +int rf_ConfigureLayout(RF_ShutdownList_t **, RF_Raid_t *, RF_Config_t *); +RF_StripeNum_t rf_MapStripeIDToParityStripeID(RF_RaidLayout_t *, + RF_StripeNum_t, RF_ReconUnitNum_t *); -#endif /* !_RF__RF_LAYOUT_H_ */ +#endif /* !_RF__RF_LAYOUT_H_ */ diff --git a/sys/dev/raidframe/rf_map.c b/sys/dev/raidframe/rf_map.c index db5d6c7fd1c..bc36e17aaa8 100644 --- a/sys/dev/raidframe/rf_map.c +++ b/sys/dev/raidframe/rf_map.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_map.c,v 1.4 2000/08/08 16:07:42 peter Exp $ */ +/* $OpenBSD: rf_map.c,v 1.5 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_map.c,v 1.5 2000/06/29 00:22:27 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,11 +28,11 @@ * rights to redistribute these changes. */ -/************************************************************************** +/***************************************************************************** * - * map.c -- main code for mapping RAID addresses to physical disk addresses + * map.c -- Main code for mapping RAID addresses to physical disk addresses. * - **************************************************************************/ + *****************************************************************************/ #include "rf_types.h" #include "rf_threadstuff.h" @@ -41,67 +42,74 @@ #include "rf_freelist.h" #include "rf_shutdown.h" -static void rf_FreePDAList(RF_PhysDiskAddr_t * start, RF_PhysDiskAddr_t * end, int count); -static void -rf_FreeASMList(RF_AccessStripeMap_t * start, RF_AccessStripeMap_t * end, - int count); +void rf_FreePDAList(RF_PhysDiskAddr_t *, RF_PhysDiskAddr_t *, int); +void rf_FreeASMList(RF_AccessStripeMap_t *, RF_AccessStripeMap_t *, int); -/***************************************************************************************** +/***************************************************************************** * - * MapAccess -- main 1st order mapping routine. + * MapAccess -- Main 1st order mapping routine. * - * Maps an access in the RAID address space to the corresponding set of physical disk - * addresses. The result is returned as a list of AccessStripeMap structures, one per - * stripe accessed. Each ASM structure contains a pointer to a list of PhysDiskAddr - * structures, which describe the physical locations touched by the user access. Note - * that this routine returns only static mapping information, i.e. the list of physical - * addresses returned does not necessarily identify the set of physical locations that - * will actually be read or written. + * Maps an access in the RAID address space to the corresponding set of + * physical disk addresses. The result is returned as a list of + * AccessStripeMap structures, one per stripe accessed. Each ASM structure + * contains a pointer to a list of PhysDiskAddr structures, which describe + * the physical locations touched by the user access. Note that this routine + * returns only static mapping information, i.e. the list of physical + * addresses returned does not necessarily identify the set of physical + * locations that will actually be read or written. * - * The routine also maps the parity. The physical disk location returned always - * indicates the entire parity unit, even when only a subset of it is being accessed. - * This is because an access that is not stripe unit aligned but that spans a stripe - * unit boundary may require access two distinct portions of the parity unit, and we - * can't yet tell which portion(s) we'll actually need. We leave it up to the algorithm + * The routine also maps the parity. The physical disk location returned + * always indicates the entire parity unit, even when only a subset of it + * is being accessed. This is because an access that is not stripe unit + * aligned but that spans a stripe unit boundary may require access two + * distinct portions of the parity unit, and we can't yet tell which + * portion(s) we'll actually need. We leave it up to the algorithm * selection code to decide what subset of the parity unit to access. * - * Note that addresses in the RAID address space must always be maintained as - * longs, instead of ints. + * Note that addresses in the RAID address space must always be maintained + * as longs, instead of ints. * - * This routine returns NULL if numBlocks is 0 + * This routine returns NULL if numBlocks is 0. * - ****************************************************************************************/ + *****************************************************************************/ RF_AccessStripeMapHeader_t * -rf_MapAccess(raidPtr, raidAddress, numBlocks, buffer, remap) - RF_Raid_t *raidPtr; - RF_RaidAddr_t raidAddress; /* starting address in RAID address - * space */ - RF_SectorCount_t numBlocks; /* number of blocks in RAID address - * space to access */ - caddr_t buffer; /* buffer to supply/receive data */ - int remap; /* 1 => remap addresses to spare space */ +rf_MapAccess( + RF_Raid_t *raidPtr, + RF_RaidAddr_t raidAddress, /* + * Starting address in RAID address + * space. + */ + RF_SectorCount_t numBlocks, /* + * Number of blocks in RAID address + * space to access. + */ + caddr_t buffer, /* Buffer to supply/receive data. */ + int remap /* + * 1 => remap addresses to spare space. + */ +) { RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); RF_AccessStripeMapHeader_t *asm_hdr = NULL; RF_AccessStripeMap_t *asm_list = NULL, *asm_p = NULL; - int faultsTolerated = layoutPtr->map->faultsTolerated; - RF_RaidAddr_t startAddress = raidAddress; /* we'll change - * raidAddress along the - * way */ + int faultsTolerated = layoutPtr->map->faultsTolerated; + /* We'll change raidAddress along the way. */ + RF_RaidAddr_t startAddress = raidAddress; RF_RaidAddr_t endAddress = raidAddress + numBlocks; RF_RaidDisk_t **disks = raidPtr->Disks; RF_PhysDiskAddr_t *pda_p, *pda_q; RF_StripeCount_t numStripes = 0; - RF_RaidAddr_t stripeRealEndAddress, stripeEndAddress, nextStripeUnitAddress; + RF_RaidAddr_t stripeRealEndAddress, stripeEndAddress; + RF_RaidAddr_t nextStripeUnitAddress; RF_RaidAddr_t startAddrWithinStripe, lastRaidAddr; RF_StripeCount_t totStripes; RF_StripeNum_t stripeID, lastSID, SUID, lastSUID; RF_AccessStripeMap_t *asmList, *t_asm; RF_PhysDiskAddr_t *pdaList, *t_pda; - /* allocate all the ASMs and PDAs up front */ + /* Allocate all the ASMs and PDAs up front. */ lastRaidAddr = raidAddress + numBlocks - 1; stripeID = rf_RaidAddressToStripeID(layoutPtr, raidAddress); lastSID = rf_RaidAddressToStripeID(layoutPtr, lastRaidAddr); @@ -110,18 +118,21 @@ rf_MapAccess(raidPtr, raidAddress, numBlocks, buffer, remap) lastSUID = rf_RaidAddressToStripeUnitID(layoutPtr, lastRaidAddr); asmList = rf_AllocASMList(totStripes); - pdaList = rf_AllocPDAList(lastSUID - SUID + 1 + faultsTolerated * totStripes); /* may also need pda(s) - * per stripe for parity */ + pdaList = rf_AllocPDAList(lastSUID - SUID + 1 + + faultsTolerated * totStripes); /* + * May also need pda(s) + * per stripe for parity. + */ if (raidAddress + numBlocks > raidPtr->totalSectors) { - RF_ERRORMSG1("Unable to map access because offset (%d) was invalid\n", - (int) raidAddress); + RF_ERRORMSG1("Unable to map access because offset (%d)" + " was invalid\n", (int) raidAddress); return (NULL); } if (rf_mapDebug) rf_PrintRaidAddressInfo(raidPtr, raidAddress, numBlocks); for (; raidAddress < endAddress;) { - /* make the next stripe structure */ + /* Make the next stripe structure. */ RF_ASSERT(asmList); t_asm = asmList; asmList = asmList->next; @@ -134,20 +145,24 @@ rf_MapAccess(raidPtr, raidAddress, numBlocks, buffer, remap) } numStripes++; - /* map SUs from current location to the end of the stripe */ - asm_p->stripeID = /* rf_RaidAddressToStripeID(layoutPtr, - raidAddress) */ stripeID++; - stripeRealEndAddress = rf_RaidAddressOfNextStripeBoundary(layoutPtr, raidAddress); + /* Map SUs from current location to the end of the stripe. */ + asm_p->stripeID = + /* rf_RaidAddressToStripeID(layoutPtr, raidAddress) */ + stripeID++; + stripeRealEndAddress = + rf_RaidAddressOfNextStripeBoundary(layoutPtr, raidAddress); stripeEndAddress = RF_MIN(endAddress, stripeRealEndAddress); asm_p->raidAddress = raidAddress; asm_p->endRaidAddress = stripeEndAddress; - /* map each stripe unit in the stripe */ + /* Map each stripe unit in the stripe. */ pda_p = NULL; - startAddrWithinStripe = raidAddress; /* Raid addr of start of - * portion of access - * that is within this - * stripe */ + /* + * Raid addr of start of portion of access that is within this + * stripe. + */ + startAddrWithinStripe = raidAddress; + for (; raidAddress < stripeEndAddress;) { RF_ASSERT(pdaList); t_pda = pdaList; @@ -161,52 +176,75 @@ rf_MapAccess(raidPtr, raidAddress, numBlocks, buffer, remap) } pda_p->type = RF_PDA_TYPE_DATA; - (layoutPtr->map->MapSector) (raidPtr, raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), remap); - - /* mark any failures we find. failedPDA is don't-care - * if there is more than one failure */ - pda_p->raidAddress = raidAddress; /* the RAID address - * corresponding to this - * physical disk address */ - nextStripeUnitAddress = rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, raidAddress); - pda_p->numSector = RF_MIN(endAddress, nextStripeUnitAddress) - raidAddress; + (layoutPtr->map->MapSector) (raidPtr, raidAddress, + &(pda_p->row), &(pda_p->col), + &(pda_p->startSector), remap); + + /* + * Mark any failures we find. + * failedPDA is don't-care if there is more than + * one failure. + */ + /* + * The RAID address corresponding to this physical + * disk address. + */ + pda_p->raidAddress = raidAddress; + nextStripeUnitAddress = + rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, + raidAddress); + pda_p->numSector = RF_MIN(endAddress, + nextStripeUnitAddress) - raidAddress; RF_ASSERT(pda_p->numSector != 0); rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 0); - pda_p->bufPtr = buffer + rf_RaidAddressToByte(raidPtr, (raidAddress - startAddress)); + pda_p->bufPtr = buffer + rf_RaidAddressToByte(raidPtr, + (raidAddress - startAddress)); asm_p->totalSectorsAccessed += pda_p->numSector; asm_p->numStripeUnitsAccessed++; - asm_p->origRow = pda_p->row; /* redundant but + asm_p->origRow = pda_p->row; /* + * Redundant but * harmless to do this * in every loop - * iteration */ + * iteration. + */ raidAddress = RF_MIN(endAddress, nextStripeUnitAddress); } - /* Map the parity. At this stage, the startSector and + /* + * Map the parity. At this stage, the startSector and * numSector fields for the parity unit are always set to * indicate the entire parity unit. We may modify this after - * mapping the data portion. */ + * mapping the data portion. + */ switch (faultsTolerated) { case 0: break; - case 1: /* single fault tolerant */ + case 1: /* Single fault tolerant. */ RF_ASSERT(pdaList); t_pda = pdaList; pdaList = pdaList->next; bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t)); pda_p = asm_p->parityInfo = t_pda; pda_p->type = RF_PDA_TYPE_PARITY; - (layoutPtr->map->MapParity) (raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe), - &(pda_p->row), &(pda_p->col), &(pda_p->startSector), remap); + (layoutPtr->map->MapParity) (raidPtr, + rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, + startAddrWithinStripe), &(pda_p->row), + &(pda_p->col), &(pda_p->startSector), remap); pda_p->numSector = layoutPtr->sectorsPerStripeUnit; - /* raidAddr may be needed to find unit to redirect to */ - pda_p->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe); + /* + * raidAddr may be needed to find unit to redirect to. + */ + pda_p->raidAddress = + rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, + startAddrWithinStripe); rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 1); - rf_ASMParityAdjust(asm_p->parityInfo, startAddrWithinStripe, endAddress, layoutPtr, asm_p); + rf_ASMParityAdjust(asm_p->parityInfo, + startAddrWithinStripe, endAddress, + layoutPtr, asm_p); break; - case 2: /* two fault tolerant */ + case 2: /* Two fault tolerant. */ RF_ASSERT(pdaList && pdaList->next); t_pda = pdaList; pdaList = pdaList->next; @@ -218,24 +256,38 @@ rf_MapAccess(raidPtr, raidAddress, numBlocks, buffer, remap) bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t)); pda_q = asm_p->qInfo = t_pda; pda_q->type = RF_PDA_TYPE_Q; - (layoutPtr->map->MapParity) (raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe), - &(pda_p->row), &(pda_p->col), &(pda_p->startSector), remap); - (layoutPtr->map->MapQ) (raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe), - &(pda_q->row), &(pda_q->col), &(pda_q->startSector), remap); - pda_q->numSector = pda_p->numSector = layoutPtr->sectorsPerStripeUnit; - /* raidAddr may be needed to find unit to redirect to */ - pda_p->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe); - pda_q->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe); - /* failure mode stuff */ + (layoutPtr->map->MapParity) (raidPtr, + rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, + startAddrWithinStripe), &(pda_p->row), + &(pda_p->col), &(pda_p->startSector), remap); + (layoutPtr->map->MapQ) (raidPtr, + rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, + startAddrWithinStripe), &(pda_q->row), + &(pda_q->col), &(pda_q->startSector), remap); + pda_q->numSector = pda_p->numSector = + layoutPtr->sectorsPerStripeUnit; + /* + * raidAddr may be needed to find unit to redirect to. + */ + pda_p->raidAddress = + rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, + startAddrWithinStripe); + pda_q->raidAddress = + rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, + startAddrWithinStripe); + /* Failure mode stuff. */ rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 1); rf_ASMCheckStatus(raidPtr, pda_q, asm_p, disks, 1); - rf_ASMParityAdjust(asm_p->parityInfo, startAddrWithinStripe, endAddress, layoutPtr, asm_p); - rf_ASMParityAdjust(asm_p->qInfo, startAddrWithinStripe, endAddress, layoutPtr, asm_p); + rf_ASMParityAdjust(asm_p->parityInfo, + startAddrWithinStripe, endAddress, + layoutPtr, asm_p); + rf_ASMParityAdjust(asm_p->qInfo, startAddrWithinStripe, + endAddress, layoutPtr, asm_p); break; } } RF_ASSERT(asmList == NULL && pdaList == NULL); - /* make the header structure */ + /* Make the header structure. */ asm_hdr = rf_AllocAccessStripeMapHeader(); RF_ASSERT(numStripes == totStripes); asm_hdr->numStripes = numStripes; @@ -245,25 +297,24 @@ rf_MapAccess(raidPtr, raidAddress, numBlocks, buffer, remap) rf_PrintAccessStripeMap(asm_hdr); return (asm_hdr); } -/***************************************************************************************** + +/***************************************************************************** * This routine walks through an ASM list and marks the PDAs that have failed. * It's called only when a disk failure causes an in-flight DAG to fail. - * The parity may consist of two components, but we want to use only one failedPDA - * pointer. Thus we set failedPDA to point to the first parity component, and rely - * on the rest of the code to do the right thing with this. - ****************************************************************************************/ - -void -rf_MarkFailuresInASMList(raidPtr, asm_h) - RF_Raid_t *raidPtr; - RF_AccessStripeMapHeader_t *asm_h; + * The parity may consist of two components, but we want to use only one + * failedPDA pointer. Thus we set failedPDA to point to the first parity + * component, and rely on the rest of the code to do the right thing with this. + *****************************************************************************/ +void +rf_MarkFailuresInASMList(RF_Raid_t *raidPtr, RF_AccessStripeMapHeader_t *asm_h) { RF_RaidDisk_t **disks = raidPtr->Disks; RF_AccessStripeMap_t *asmap; RF_PhysDiskAddr_t *pda; for (asmap = asm_h->stripeMap; asmap; asmap = asmap->next) { - asmap->numDataFailed = asmap->numParityFailed = asmap->numQFailed = 0; + asmap->numDataFailed = asmap->numParityFailed = + asmap->numQFailed = 0; asmap->numFailedPDAs = 0; bzero((char *) asmap->failedPDAs, RF_MAX_FAILED_PDA * sizeof(RF_PhysDiskAddr_t *)); @@ -288,14 +339,14 @@ rf_MarkFailuresInASMList(raidPtr, asm_h) } } } -/***************************************************************************************** + +/***************************************************************************** * - * DuplicateASM -- duplicates an ASM and returns the new one + * DuplicateASM -- Duplicates an ASM and returns the new one. * - ****************************************************************************************/ + *****************************************************************************/ RF_AccessStripeMap_t * -rf_DuplicateASM(asmap) - RF_AccessStripeMap_t *asmap; +rf_DuplicateASM(RF_AccessStripeMap_t *asmap) { RF_AccessStripeMap_t *new_asm; RF_PhysDiskAddr_t *pda, *new_pda, *t_pda; @@ -309,8 +360,8 @@ rf_DuplicateASM(asmap) new_asm->parityInfo = NULL; new_asm->next = NULL; - for (pda = asmap->physInfo; pda; pda = pda->next) { /* copy the physInfo - * list */ + for (pda = asmap->physInfo; pda; pda = pda->next) { + /* Copy the physInfo list. */ t_pda = rf_AllocPhysDiskAddr(); bcopy((char *) pda, (char *) t_pda, sizeof(RF_PhysDiskAddr_t)); t_pda->next = NULL; @@ -324,8 +375,8 @@ rf_DuplicateASM(asmap) if (pda == asmap->failedPDAs[0]) new_asm->failedPDAs[0] = t_pda; } - for (pda = asmap->parityInfo; pda; pda = pda->next) { /* copy the parityInfo - * list */ + for (pda = asmap->parityInfo; pda; pda = pda->next) { + /* Copy the parityInfo list. */ t_pda = rf_AllocPhysDiskAddr(); bcopy((char *) pda, (char *) t_pda, sizeof(RF_PhysDiskAddr_t)); t_pda->next = NULL; @@ -341,14 +392,14 @@ rf_DuplicateASM(asmap) } return (new_asm); } -/***************************************************************************************** + +/***************************************************************************** * - * DuplicatePDA -- duplicates a PDA and returns the new one + * DuplicatePDA -- Duplicates a PDA and returns the new one. * - ****************************************************************************************/ + *****************************************************************************/ RF_PhysDiskAddr_t * -rf_DuplicatePDA(pda) - RF_PhysDiskAddr_t *pda; +rf_DuplicatePDA(RF_PhysDiskAddr_t *pda) { RF_PhysDiskAddr_t *new; @@ -356,47 +407,50 @@ rf_DuplicatePDA(pda) bcopy((char *) pda, (char *) new, sizeof(RF_PhysDiskAddr_t)); return (new); } -/***************************************************************************************** + +/***************************************************************************** * - * routines to allocate and free list elements. All allocation routines zero the - * structure before returning it. + * Routines to allocate and free list elements. All allocation routines zero + * the structure before returning it. * - * FreePhysDiskAddr is static. It should never be called directly, because + * FreePhysDiskAddr is static. It should never be called directly, because * FreeAccessStripeMap takes care of freeing the PhysDiskAddr list. * - ****************************************************************************************/ + *****************************************************************************/ static RF_FreeList_t *rf_asmhdr_freelist; -#define RF_MAX_FREE_ASMHDR 128 -#define RF_ASMHDR_INC 16 -#define RF_ASMHDR_INITIAL 32 +#define RF_MAX_FREE_ASMHDR 128 +#define RF_ASMHDR_INC 16 +#define RF_ASMHDR_INITIAL 32 static RF_FreeList_t *rf_asm_freelist; -#define RF_MAX_FREE_ASM 192 -#define RF_ASM_INC 24 -#define RF_ASM_INITIAL 64 +#define RF_MAX_FREE_ASM 192 +#define RF_ASM_INC 24 +#define RF_ASM_INITIAL 64 static RF_FreeList_t *rf_pda_freelist; -#define RF_MAX_FREE_PDA 192 -#define RF_PDA_INC 24 -#define RF_PDA_INITIAL 64 - -/* called at shutdown time. So far, all that is necessary is to release all the free lists */ -static void rf_ShutdownMapModule(void *); -static void -rf_ShutdownMapModule(ignored) - void *ignored; +#define RF_MAX_FREE_PDA 192 +#define RF_PDA_INC 24 +#define RF_PDA_INITIAL 64 + +/* + * Called at shutdown time. So far, all that is necessary is to release + * all the free lists. + */ +void rf_ShutdownMapModule(void *); +void +rf_ShutdownMapModule(void *ignored) { - RF_FREELIST_DESTROY(rf_asmhdr_freelist, next, (RF_AccessStripeMapHeader_t *)); + RF_FREELIST_DESTROY(rf_asmhdr_freelist, next, + (RF_AccessStripeMapHeader_t *)); RF_FREELIST_DESTROY(rf_pda_freelist, next, (RF_PhysDiskAddr_t *)); RF_FREELIST_DESTROY(rf_asm_freelist, next, (RF_AccessStripeMap_t *)); } -int -rf_ConfigureMapModule(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureMapModule(RF_ShutdownList_t **listp) { - int rc; + int rc; RF_FREELIST_CREATE(rf_asmhdr_freelist, RF_MAX_FREE_ASMHDR, RF_ASMHDR_INC, sizeof(RF_AccessStripeMapHeader_t)); @@ -406,20 +460,23 @@ rf_ConfigureMapModule(listp) RF_FREELIST_CREATE(rf_asm_freelist, RF_MAX_FREE_ASM, RF_ASM_INC, sizeof(RF_AccessStripeMap_t)); if (rf_asm_freelist == NULL) { - RF_FREELIST_DESTROY(rf_asmhdr_freelist, next, (RF_AccessStripeMapHeader_t *)); + RF_FREELIST_DESTROY(rf_asmhdr_freelist, next, + (RF_AccessStripeMapHeader_t *)); return (ENOMEM); } - RF_FREELIST_CREATE(rf_pda_freelist, RF_MAX_FREE_PDA, - RF_PDA_INC, sizeof(RF_PhysDiskAddr_t)); + RF_FREELIST_CREATE(rf_pda_freelist, RF_MAX_FREE_PDA, RF_PDA_INC, + sizeof(RF_PhysDiskAddr_t)); if (rf_pda_freelist == NULL) { - RF_FREELIST_DESTROY(rf_asmhdr_freelist, next, (RF_AccessStripeMapHeader_t *)); - RF_FREELIST_DESTROY(rf_pda_freelist, next, (RF_PhysDiskAddr_t *)); + RF_FREELIST_DESTROY(rf_asmhdr_freelist, next, + (RF_AccessStripeMapHeader_t *)); + RF_FREELIST_DESTROY(rf_pda_freelist, next, + (RF_PhysDiskAddr_t *)); return (ENOMEM); } rc = rf_ShutdownCreate(listp, rf_ShutdownMapModule, NULL); if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + RF_ERRORMSG3("Unable to add to shutdown list file %s line %d" + " rc=%d\n", __FILE__, __LINE__, rc); rf_ShutdownMapModule(NULL); return (rc); } @@ -434,26 +491,25 @@ rf_ConfigureMapModule(listp) } RF_AccessStripeMapHeader_t * -rf_AllocAccessStripeMapHeader() +rf_AllocAccessStripeMapHeader(void) { RF_AccessStripeMapHeader_t *p; - RF_FREELIST_GET(rf_asmhdr_freelist, p, next, (RF_AccessStripeMapHeader_t *)); + RF_FREELIST_GET(rf_asmhdr_freelist, p, next, + (RF_AccessStripeMapHeader_t *)); bzero((char *) p, sizeof(RF_AccessStripeMapHeader_t)); return (p); } - -void -rf_FreeAccessStripeMapHeader(p) - RF_AccessStripeMapHeader_t *p; +void +rf_FreeAccessStripeMapHeader(RF_AccessStripeMapHeader_t *p) { RF_FREELIST_FREE(rf_asmhdr_freelist, p, next); } RF_PhysDiskAddr_t * -rf_AllocPhysDiskAddr() +rf_AllocPhysDiskAddr(void) { RF_PhysDiskAddr_t *p; @@ -462,39 +518,43 @@ rf_AllocPhysDiskAddr() return (p); } -/* allocates a list of PDAs, locking the free list only once - * when we have to call calloc, we do it one component at a time to simplify - * the process of freeing the list at program shutdown. This should not be + +/* + * Allocates a list of PDAs, locking the free list only once. + * When we have to call calloc, we do it one component at a time to simplify + * the process of freeing the list at program shutdown. This should not be * much of a performance hit, because it should be very infrequently executed. */ RF_PhysDiskAddr_t * -rf_AllocPDAList(count) - int count; +rf_AllocPDAList(int count) { RF_PhysDiskAddr_t *p = NULL; - RF_FREELIST_GET_N(rf_pda_freelist, p, next, (RF_PhysDiskAddr_t *), count); + RF_FREELIST_GET_N(rf_pda_freelist, p, next, (RF_PhysDiskAddr_t *), + count); return (p); } -void -rf_FreePhysDiskAddr(p) - RF_PhysDiskAddr_t *p; +void +rf_FreePhysDiskAddr(RF_PhysDiskAddr_t *p) { RF_FREELIST_FREE(rf_pda_freelist, p, next); } -static void -rf_FreePDAList(l_start, l_end, count) - RF_PhysDiskAddr_t *l_start, *l_end; /* pointers to start and end - * of list */ - int count; /* number of elements in list */ +void +rf_FreePDAList( + /* Pointers to start and end of list. */ + RF_PhysDiskAddr_t *l_start, + RF_PhysDiskAddr_t *l_end, + int count /* Number of elements in list. */ +) { - RF_FREELIST_FREE_N(rf_pda_freelist, l_start, next, (RF_PhysDiskAddr_t *), count); + RF_FREELIST_FREE_N(rf_pda_freelist, l_start, next, + (RF_PhysDiskAddr_t *), count); } RF_AccessStripeMap_t * -rf_AllocAccessStripeMapComponent() +rf_AllocAccessStripeMapComponent(void) { RF_AccessStripeMap_t *p; @@ -503,47 +563,47 @@ rf_AllocAccessStripeMapComponent() return (p); } -/* this is essentially identical to AllocPDAList. I should combine the two. - * when we have to call calloc, we do it one component at a time to simplify - * the process of freeing the list at program shutdown. This should not be + +/* + * This is essentially identical to AllocPDAList. I should combine the two. + * When we have to call calloc, we do it one component at a time to simplify + * the process of freeing the list at program shutdown. This should not be * much of a performance hit, because it should be very infrequently executed. */ RF_AccessStripeMap_t * -rf_AllocASMList(count) - int count; +rf_AllocASMList(int count) { RF_AccessStripeMap_t *p = NULL; - RF_FREELIST_GET_N(rf_asm_freelist, p, next, (RF_AccessStripeMap_t *), count); + RF_FREELIST_GET_N(rf_asm_freelist, p, next, (RF_AccessStripeMap_t *), + count); return (p); } -void -rf_FreeAccessStripeMapComponent(p) - RF_AccessStripeMap_t *p; +void +rf_FreeAccessStripeMapComponent(RF_AccessStripeMap_t *p) { RF_FREELIST_FREE(rf_asm_freelist, p, next); } -static void -rf_FreeASMList(l_start, l_end, count) - RF_AccessStripeMap_t *l_start, *l_end; - int count; +void +rf_FreeASMList(RF_AccessStripeMap_t *l_start, RF_AccessStripeMap_t *l_end, + int count) { - RF_FREELIST_FREE_N(rf_asm_freelist, l_start, next, (RF_AccessStripeMap_t *), count); + RF_FREELIST_FREE_N(rf_asm_freelist, l_start, next, + (RF_AccessStripeMap_t *), count); } -void -rf_FreeAccessStripeMap(hdr) - RF_AccessStripeMapHeader_t *hdr; +void +rf_FreeAccessStripeMap(RF_AccessStripeMapHeader_t *hdr) { RF_AccessStripeMap_t *p, *pt = NULL; RF_PhysDiskAddr_t *pdp, *trailer, *pdaList = NULL, *pdaEnd = NULL; - int count = 0, t, asm_count = 0; + int count = 0, t, asm_count = 0; for (p = hdr->stripeMap; p; p = p->next) { - /* link the 3 pda lists into the accumulating pda list */ + /* Link the 3 pda lists into the accumulating pda list. */ if (!pdaList) pdaList = p->qInfo; @@ -585,7 +645,7 @@ rf_FreeAccessStripeMap(hdr) asm_count++; } - /* debug only */ + /* Debug only. */ for (t = 0, pdp = pdaList; pdp; pdp = pdp->next) t++; RF_ASSERT(t == count); @@ -595,54 +655,70 @@ rf_FreeAccessStripeMap(hdr) rf_FreeASMList(hdr->stripeMap, pt, asm_count); rf_FreeAccessStripeMapHeader(hdr); } -/* We can't use the large write optimization if there are any failures in the stripe. - * In the declustered layout, there is no way to immediately determine what disks - * constitute a stripe, so we actually have to hunt through the stripe looking for failures. - * The reason we map the parity instead of just using asm->parityInfo->col is because - * the latter may have been already redirected to a spare drive, which would - * mess up the computation of the stripe offset. + +/* + * We can't use the large write optimization if there are any failures in the + * stripe. + * In the declustered layout, there is no way to immediately determine what + * disks constitute a stripe, so we actually have to hunt through the stripe + * looking for failures. + * The reason we map the parity instead of just using asm->parityInfo->col is + * because the latter may have been already redirected to a spare drive, which + * would mess up the computation of the stripe offset. * * ASSUMES AT MOST ONE FAILURE IN THE STRIPE. */ -int -rf_CheckStripeForFailures(raidPtr, asmap) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *asmap; +int +rf_CheckStripeForFailures(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap) { RF_RowCol_t trow, tcol, prow, pcol, *diskids, row, i; RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; RF_StripeCount_t stripeOffset; - int numFailures; + int numFailures; RF_RaidAddr_t sosAddr; RF_SectorNum_t diskOffset, poffset; RF_RowCol_t testrow; - /* quick out in the fault-free case. */ + /* Quick out in the fault-free case. */ RF_LOCK_MUTEX(raidPtr->mutex); numFailures = raidPtr->numFailures; RF_UNLOCK_MUTEX(raidPtr->mutex); if (numFailures == 0) return (0); - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); + sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, + asmap->raidAddress); row = asmap->physInfo->row; - (layoutPtr->map->IdentifyStripe) (raidPtr, asmap->raidAddress, &diskids, &testrow); - (layoutPtr->map->MapParity) (raidPtr, asmap->raidAddress, &prow, &pcol, &poffset, 0); /* get pcol */ - - /* this need not be true if we've redirected the access to a spare in - * another row RF_ASSERT(row == testrow); */ + (layoutPtr->map->IdentifyStripe) (raidPtr, asmap->raidAddress, + &diskids, &testrow); + (layoutPtr->map->MapParity) (raidPtr, asmap->raidAddress, + &prow, &pcol, &poffset, 0); /* get pcol */ + + /* + * This needs not be true if we've redirected the access to a spare in + * another row. + * RF_ASSERT(row == testrow); + */ stripeOffset = 0; for (i = 0; i < layoutPtr->numDataCol + layoutPtr->numParityCol; i++) { if (diskids[i] != pcol) { - if (RF_DEAD_DISK(raidPtr->Disks[testrow][diskids[i]].status)) { - if (raidPtr->status[testrow] != rf_rs_reconstructing) + if (RF_DEAD_DISK(raidPtr + ->Disks[testrow][diskids[i]].status)) { + if (raidPtr->status[testrow] != + rf_rs_reconstructing) return (1); - RF_ASSERT(raidPtr->reconControl[testrow]->fcol == diskids[i]); + RF_ASSERT( + raidPtr->reconControl[testrow]->fcol == + diskids[i]); layoutPtr->map->MapSector(raidPtr, - sosAddr + stripeOffset * layoutPtr->sectorsPerStripeUnit, + sosAddr + stripeOffset * + layoutPtr->sectorsPerStripeUnit, &trow, &tcol, &diskOffset, 0); - RF_ASSERT((trow == testrow) && (tcol == diskids[i])); - if (!rf_CheckRUReconstructed(raidPtr->reconControl[testrow]->reconMap, diskOffset)) + RF_ASSERT((trow == testrow) && + (tcol == diskids[i])); + if (!rf_CheckRUReconstructed(raidPtr + ->reconControl[testrow]->reconMap, + diskOffset)) return (1); asmap->flags |= RF_ASM_REDIR_LARGE_WRITE; return (0); @@ -652,22 +728,20 @@ rf_CheckStripeForFailures(raidPtr, asmap) } return (0); } -/* - return the number of failed data units in the stripe. -*/ -int -rf_NumFailedDataUnitsInStripe(raidPtr, asmap) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *asmap; +/* + * Return the number of failed data units in the stripe. + */ +int +rf_NumFailedDataUnitsInStripe(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap) { RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; RF_RowCol_t trow, tcol, row, i; RF_SectorNum_t diskOffset; RF_RaidAddr_t sosAddr; - int numFailures; + int numFailures; - /* quick out in the fault-free case. */ + /* Quick out in the fault-free case. */ RF_LOCK_MUTEX(raidPtr->mutex); numFailures = raidPtr->numFailures; RF_UNLOCK_MUTEX(raidPtr->mutex); @@ -675,10 +749,12 @@ rf_NumFailedDataUnitsInStripe(raidPtr, asmap) return (0); numFailures = 0; - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); + sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, + asmap->raidAddress); row = asmap->physInfo->row; for (i = 0; i < layoutPtr->numDataCol; i++) { - (layoutPtr->map->MapSector) (raidPtr, sosAddr + i * layoutPtr->sectorsPerStripeUnit, + (layoutPtr->map->MapSector) (raidPtr, sosAddr + i * + layoutPtr->sectorsPerStripeUnit, &trow, &tcol, &diskOffset, 0); if (RF_DEAD_DISK(raidPtr->Disks[trow][tcol].status)) numFailures++; @@ -688,58 +764,62 @@ rf_NumFailedDataUnitsInStripe(raidPtr, asmap) } -/***************************************************************************************** +/***************************************************************************** * - * debug routines + * Debug routines. * - ****************************************************************************************/ + *****************************************************************************/ -void -rf_PrintAccessStripeMap(asm_h) - RF_AccessStripeMapHeader_t *asm_h; +void +rf_PrintAccessStripeMap(RF_AccessStripeMapHeader_t *asm_h) { rf_PrintFullAccessStripeMap(asm_h, 0); } -void -rf_PrintFullAccessStripeMap(asm_h, prbuf) - RF_AccessStripeMapHeader_t *asm_h; - int prbuf; /* flag to print buffer pointers */ +void +rf_PrintFullAccessStripeMap(RF_AccessStripeMapHeader_t *asm_h, + int prbuf /* Flag to print buffer pointers. */) { - int i; + int i; RF_AccessStripeMap_t *asmap = asm_h->stripeMap; RF_PhysDiskAddr_t *p; printf("%d stripes total\n", (int) asm_h->numStripes); for (; asmap; asmap = asmap->next) { - /* printf("Num failures: %d\n",asmap->numDataFailed); */ - /* printf("Num sectors: - * %d\n",(int)asmap->totalSectorsAccessed); */ + /* printf("Num failures: %d\n", asmap->numDataFailed); */ + /* printf("Num sectors: %d\n", + * (int)asmap->totalSectorsAccessed); */ printf("Stripe %d (%d sectors), failures: %d data, %d parity: ", (int) asmap->stripeID, (int) asmap->totalSectorsAccessed, (int) asmap->numDataFailed, (int) asmap->numParityFailed); if (asmap->parityInfo) { - printf("Parity [r%d c%d s%d-%d", asmap->parityInfo->row, asmap->parityInfo->col, + printf("Parity [r%d c%d s%d-%d", asmap->parityInfo->row, + asmap->parityInfo->col, (int) asmap->parityInfo->startSector, (int) (asmap->parityInfo->startSector + - asmap->parityInfo->numSector - 1)); + asmap->parityInfo->numSector - 1)); if (prbuf) - printf(" b0x%lx", (unsigned long) asmap->parityInfo->bufPtr); + printf(" b0x%lx", + (unsigned long) asmap->parityInfo->bufPtr); if (asmap->parityInfo->next) { - printf(", r%d c%d s%d-%d", asmap->parityInfo->next->row, + printf(", r%d c%d s%d-%d", + asmap->parityInfo->next->row, asmap->parityInfo->next->col, (int) asmap->parityInfo->next->startSector, - (int) (asmap->parityInfo->next->startSector + - asmap->parityInfo->next->numSector - 1)); + (int) (asmap->parityInfo->next->startSector + + asmap->parityInfo->next->numSector - 1)); if (prbuf) - printf(" b0x%lx", (unsigned long) asmap->parityInfo->next->bufPtr); - RF_ASSERT(asmap->parityInfo->next->next == NULL); + printf(" b0x%lx", (unsigned long) + asmap->parityInfo->next->bufPtr); + RF_ASSERT(asmap->parityInfo->next->next + == NULL); } printf("]\n\t"); } for (i = 0, p = asmap->physInfo; p; p = p->next, i++) { - printf("SU r%d c%d s%d-%d ", p->row, p->col, (int) p->startSector, + printf("SU r%d c%d s%d-%d ", p->row, p->col, + (int) p->startSector, (int) (p->startSector + p->numSector - 1)); if (prbuf) printf("b0x%lx ", (unsigned long) p->bufPtr); @@ -748,26 +828,30 @@ rf_PrintFullAccessStripeMap(asm_h, prbuf) } printf("\n"); p = asm_h->stripeMap->failedPDAs[0]; - if (asm_h->stripeMap->numDataFailed + asm_h->stripeMap->numParityFailed > 1) + if (asm_h->stripeMap->numDataFailed + + asm_h->stripeMap->numParityFailed > 1) printf("[multiple failures]\n"); else - if (asm_h->stripeMap->numDataFailed + asm_h->stripeMap->numParityFailed > 0) - printf("\t[Failed PDA: r%d c%d s%d-%d]\n", p->row, p->col, - (int) p->startSector, (int) (p->startSector + p->numSector - 1)); + if (asm_h->stripeMap->numDataFailed + + asm_h->stripeMap->numParityFailed > 0) + printf("\t[Failed PDA: r%d c%d s%d-%d]\n", + p->row, p->col, (int) p->startSector, + (int) (p->startSector + p->numSector - 1)); } } -void -rf_PrintRaidAddressInfo(raidPtr, raidAddr, numBlocks) - RF_Raid_t *raidPtr; - RF_RaidAddr_t raidAddr; - RF_SectorCount_t numBlocks; +void +rf_PrintRaidAddressInfo(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr, + RF_SectorCount_t numBlocks) { RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_RaidAddr_t ra, sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr); + RF_RaidAddr_t ra, sosAddr = + rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr); - printf("Raid addrs of SU boundaries from start of stripe to end of access:\n\t"); - for (ra = sosAddr; ra <= raidAddr + numBlocks; ra += layoutPtr->sectorsPerStripeUnit) { + printf("Raid addrs of SU boundaries from start of stripe to end" + " of access:\n\t"); + for (ra = sosAddr; ra <= raidAddr + numBlocks; + ra += layoutPtr->sectorsPerStripeUnit) { printf("%d (0x%x), ", (int) ra, (int) ra); } printf("\n"); @@ -775,76 +859,93 @@ rf_PrintRaidAddressInfo(raidPtr, raidAddr, numBlocks) (int) (raidAddr % layoutPtr->sectorsPerStripeUnit), (int) (raidAddr % layoutPtr->sectorsPerStripeUnit)); } + /* - given a parity descriptor and the starting address within a stripe, - range restrict the parity descriptor to touch only the correct stuff. -*/ -void + * Given a parity descriptor and the starting address within a stripe, + * range restrict the parity descriptor to touch only the correct stuff. + */ +void rf_ASMParityAdjust( - RF_PhysDiskAddr_t * toAdjust, - RF_StripeNum_t startAddrWithinStripe, - RF_SectorNum_t endAddress, - RF_RaidLayout_t * layoutPtr, - RF_AccessStripeMap_t * asm_p) + RF_PhysDiskAddr_t *toAdjust, + RF_StripeNum_t startAddrWithinStripe, + RF_SectorNum_t endAddress, + RF_RaidLayout_t *layoutPtr, + RF_AccessStripeMap_t *asm_p +) { RF_PhysDiskAddr_t *new_pda; - /* when we're accessing only a portion of one stripe unit, we want the + /* + * When we're accessing only a portion of one stripe unit, we want the * parity descriptor to identify only the chunk of parity associated - * with the data. When the access spans exactly one stripe unit + * with the data. When the access spans exactly one stripe unit * boundary and is less than a stripe unit in size, it uses two - * disjoint regions of the parity unit. When an access spans more + * disjoint regions of the parity unit. When an access spans more * than one stripe unit boundary, it uses all of the parity unit. - * + * * To better handle the case where stripe units are small, we may * eventually want to change the 2nd case so that if the SU size is * below some threshold, we just read/write the whole thing instead of - * breaking it up into two accesses. */ + * breaking it up into two accesses. + */ if (asm_p->numStripeUnitsAccessed == 1) { - int x = (startAddrWithinStripe % layoutPtr->sectorsPerStripeUnit); + int x = (startAddrWithinStripe % + layoutPtr->sectorsPerStripeUnit); toAdjust->startSector += x; toAdjust->raidAddress += x; toAdjust->numSector = asm_p->physInfo->numSector; RF_ASSERT(toAdjust->numSector != 0); } else - if (asm_p->numStripeUnitsAccessed == 2 && asm_p->totalSectorsAccessed < layoutPtr->sectorsPerStripeUnit) { - int x = (startAddrWithinStripe % layoutPtr->sectorsPerStripeUnit); - - /* create a second pda and copy the parity map info - * into it */ + if (asm_p->numStripeUnitsAccessed == 2 && + asm_p->totalSectorsAccessed < + layoutPtr->sectorsPerStripeUnit) { + int x = (startAddrWithinStripe % + layoutPtr->sectorsPerStripeUnit); + + /* + * Create a second pda and copy the parity map info + * into it. + */ RF_ASSERT(toAdjust->next == NULL); new_pda = toAdjust->next = rf_AllocPhysDiskAddr(); - *new_pda = *toAdjust; /* structure assignment */ + *new_pda = *toAdjust; /* Structure assignment. */ new_pda->next = NULL; - /* adjust the start sector & number of blocks for the - * first parity pda */ + /* + * Adjust the start sector & number of blocks for the + * first parity pda. + */ toAdjust->startSector += x; toAdjust->raidAddress += x; - toAdjust->numSector = rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, startAddrWithinStripe) - startAddrWithinStripe; + toAdjust->numSector = + rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, + startAddrWithinStripe) - startAddrWithinStripe; RF_ASSERT(toAdjust->numSector != 0); - /* adjust the second pda */ - new_pda->numSector = endAddress - rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, endAddress); + /* Adjust the second pda. */ + new_pda->numSector = endAddress - + rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, + endAddress); /* new_pda->raidAddress = - * rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, - * toAdjust->raidAddress); */ + * rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, + * toAdjust->raidAddress); */ RF_ASSERT(new_pda->numSector != 0); } } + /* - Check if a disk has been spared or failed. If spared, - redirect the I/O. - If it has been failed, record it in the asm pointer. - Fourth arg is whether data or parity. -*/ -void + * Check if a disk has been spared or failed. If spared, redirect the I/O. + * If it has been failed, record it in the asm pointer. + * Fourth arg is whether data or parity. + */ +void rf_ASMCheckStatus( - RF_Raid_t * raidPtr, - RF_PhysDiskAddr_t * pda_p, - RF_AccessStripeMap_t * asm_p, - RF_RaidDisk_t ** disks, - int parity) + RF_Raid_t *raidPtr, + RF_PhysDiskAddr_t *pda_p, + RF_AccessStripeMap_t *asm_p, + RF_RaidDisk_t **disks, + int parity +) { RF_DiskStatus_t dstatus; RF_RowCol_t frow, fcol; @@ -852,44 +953,54 @@ rf_ASMCheckStatus( dstatus = disks[pda_p->row][pda_p->col].status; if (dstatus == rf_ds_spared) { - /* if the disk has been spared, redirect access to the spare */ + /* If the disk has been spared, redirect access to the spare. */ frow = pda_p->row; fcol = pda_p->col; pda_p->row = disks[frow][fcol].spareRow; pda_p->col = disks[frow][fcol].spareCol; } else if (dstatus == rf_ds_dist_spared) { - /* ditto if disk has been spared to dist spare space */ + /* Ditto if disk has been spared to dist spare space. */ RF_RowCol_t or = pda_p->row, oc = pda_p->col; RF_SectorNum_t oo = pda_p->startSector; if (pda_p->type == RF_PDA_TYPE_DATA) - raidPtr->Layout.map->MapSector(raidPtr, pda_p->raidAddress, &pda_p->row, &pda_p->col, &pda_p->startSector, RF_REMAP); + raidPtr->Layout.map->MapSector(raidPtr, + pda_p->raidAddress, &pda_p->row, + &pda_p->col, &pda_p->startSector, RF_REMAP); else - raidPtr->Layout.map->MapParity(raidPtr, pda_p->raidAddress, &pda_p->row, &pda_p->col, &pda_p->startSector, RF_REMAP); + raidPtr->Layout.map->MapParity(raidPtr, + pda_p->raidAddress, &pda_p->row, + &pda_p->col, &pda_p->startSector, RF_REMAP); if (rf_mapDebug) { - printf("Redirected r %d c %d o %d -> r%d c %d o %d\n", or, oc, (int) oo, - pda_p->row, pda_p->col, (int) pda_p->startSector); + printf("Redirected r %d c %d o %d -> r%d c %d" + " o %d\n", or, oc, (int) oo, pda_p->row, + pda_p->col, (int) pda_p->startSector); } } else if (RF_DEAD_DISK(dstatus)) { - /* if the disk is inaccessible, mark the - * failure */ + /* + * If the disk is inaccessible, mark the + * failure. + */ if (parity) asm_p->numParityFailed++; else { asm_p->numDataFailed++; #if 0 - /* XXX Do we really want this spewing - * out on the console? GO */ - printf("DATA_FAILED!\n"); + /* + * XXX Do we really want this spewing + * out on the console ? GO + */ + printf("DATA_FAILED !\n"); #endif } asm_p->failedPDAs[asm_p->numFailedPDAs] = pda_p; asm_p->numFailedPDAs++; #if 0 - switch (asm_p->numParityFailed + asm_p->numDataFailed) { + switch (asm_p->numParityFailed + + asm_p->numDataFailed) { case 1: asm_p->failedPDAs[0] = pda_p; break; @@ -900,8 +1011,10 @@ rf_ASMCheckStatus( } #endif } - /* the redirected access should never span a stripe unit boundary */ - RF_ASSERT(rf_RaidAddressToStripeUnitID(&raidPtr->Layout, pda_p->raidAddress) == - rf_RaidAddressToStripeUnitID(&raidPtr->Layout, pda_p->raidAddress + pda_p->numSector - 1)); + /* The redirected access should never span a stripe unit boundary. */ + RF_ASSERT(rf_RaidAddressToStripeUnitID(&raidPtr->Layout, + pda_p->raidAddress) == + rf_RaidAddressToStripeUnitID(&raidPtr->Layout, pda_p->raidAddress + + pda_p->numSector - 1)); RF_ASSERT(pda_p->col != -1); } diff --git a/sys/dev/raidframe/rf_map.h b/sys/dev/raidframe/rf_map.h index 95b2d243083..fac621f6352 100644 --- a/sys/dev/raidframe/rf_map.h +++ b/sys/dev/raidframe/rf_map.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_map.h,v 1.2 1999/02/16 00:02:56 niklas Exp $ */ +/* $OpenBSD: rf_map.h,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_map.h,v 1.3 1999/02/05 00:06:12 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,66 +30,57 @@ /* rf_map.h */ -#ifndef _RF__RF_MAP_H_ -#define _RF__RF_MAP_H_ +#ifndef _RF__RF_MAP_H_ +#define _RF__RF_MAP_H_ #include "rf_types.h" #include "rf_alloclist.h" #include "rf_raid.h" -/* mapping structure allocation and free routines */ -RF_AccessStripeMapHeader_t * -rf_MapAccess(RF_Raid_t * raidPtr, - RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks, - caddr_t buffer, int remap); +/* Mapping structure allocation and free routines. */ +RF_AccessStripeMapHeader_t *rf_MapAccess(RF_Raid_t *, RF_RaidAddr_t, + RF_SectorCount_t, caddr_t, int); -void -rf_MarkFailuresInASMList(RF_Raid_t * raidPtr, - RF_AccessStripeMapHeader_t * asm_h); +void rf_MarkFailuresInASMList(RF_Raid_t *, RF_AccessStripeMapHeader_t *); -RF_AccessStripeMap_t *rf_DuplicateASM(RF_AccessStripeMap_t * asmap); +RF_AccessStripeMap_t *rf_DuplicateASM(RF_AccessStripeMap_t *); -RF_PhysDiskAddr_t *rf_DuplicatePDA(RF_PhysDiskAddr_t * pda); +RF_PhysDiskAddr_t *rf_DuplicatePDA(RF_PhysDiskAddr_t *); -int rf_ConfigureMapModule(RF_ShutdownList_t ** listp); +int rf_ConfigureMapModule(RF_ShutdownList_t **); RF_AccessStripeMapHeader_t *rf_AllocAccessStripeMapHeader(void); -void rf_FreeAccessStripeMapHeader(RF_AccessStripeMapHeader_t * p); +void rf_FreeAccessStripeMapHeader(RF_AccessStripeMapHeader_t *); RF_PhysDiskAddr_t *rf_AllocPhysDiskAddr(void); -RF_PhysDiskAddr_t *rf_AllocPDAList(int count); +RF_PhysDiskAddr_t *rf_AllocPDAList(int); -void rf_FreePhysDiskAddr(RF_PhysDiskAddr_t * p); +void rf_FreePhysDiskAddr(RF_PhysDiskAddr_t *); RF_AccessStripeMap_t *rf_AllocAccessStripeMapComponent(void); -RF_AccessStripeMap_t *rf_AllocASMList(int count); +RF_AccessStripeMap_t *rf_AllocASMList(int); -void rf_FreeAccessStripeMapComponent(RF_AccessStripeMap_t * p); +void rf_FreeAccessStripeMapComponent(RF_AccessStripeMap_t *); -void rf_FreeAccessStripeMap(RF_AccessStripeMapHeader_t * hdr); +void rf_FreeAccessStripeMap(RF_AccessStripeMapHeader_t *); -int rf_CheckStripeForFailures(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap); +int rf_CheckStripeForFailures(RF_Raid_t *, RF_AccessStripeMap_t *); -int rf_NumFailedDataUnitsInStripe(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap); +int rf_NumFailedDataUnitsInStripe(RF_Raid_t *, RF_AccessStripeMap_t *); -void rf_PrintAccessStripeMap(RF_AccessStripeMapHeader_t * asm_h); +void rf_PrintAccessStripeMap(RF_AccessStripeMapHeader_t *); -void rf_PrintFullAccessStripeMap(RF_AccessStripeMapHeader_t * asm_h, int prbuf); +void rf_PrintFullAccessStripeMap(RF_AccessStripeMapHeader_t *, int); -void -rf_PrintRaidAddressInfo(RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr, - RF_SectorCount_t numBlocks); +void rf_PrintRaidAddressInfo(RF_Raid_t *, RF_RaidAddr_t, RF_SectorCount_t); -void -rf_ASMParityAdjust(RF_PhysDiskAddr_t * toAdjust, - RF_StripeNum_t startAddrWithinStripe, RF_SectorNum_t endAddress, - RF_RaidLayout_t * layoutPtr, RF_AccessStripeMap_t * asm_p); +void rf_ASMParityAdjust(RF_PhysDiskAddr_t *, RF_StripeNum_t, RF_SectorNum_t, + RF_RaidLayout_t *, RF_AccessStripeMap_t *); -void -rf_ASMCheckStatus(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * pda_p, - RF_AccessStripeMap_t * asm_p, RF_RaidDisk_t ** disks, int parity); +void rf_ASMCheckStatus(RF_Raid_t *, RF_PhysDiskAddr_t *, + RF_AccessStripeMap_t *, RF_RaidDisk_t **, int); -#endif /* !_RF__RF_MAP_H_ */ +#endif /* !_RF__RF_MAP_H_ */ diff --git a/sys/dev/raidframe/rf_mcpair.c b/sys/dev/raidframe/rf_mcpair.c index 5b39b182332..d189c1d1127 100644 --- a/sys/dev/raidframe/rf_mcpair.c +++ b/sys/dev/raidframe/rf_mcpair.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_mcpair.c,v 1.2 1999/02/16 00:02:56 niklas Exp $ */ +/* $OpenBSD: rf_mcpair.c,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_mcpair.c,v 1.3 1999/02/05 00:06:13 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,9 +28,10 @@ * rights to redistribute these changes. */ -/* rf_mcpair.c - * an mcpair is a structure containing a mutex and a condition variable. - * it's used to block the current thread until some event occurs. +/* + * rf_mcpair.c + * An mcpair is a structure containing a mutex and a condition variable. + * It's used to block the current thread until some event occurs. */ #include "rf_types.h" @@ -43,78 +45,76 @@ static RF_FreeList_t *rf_mcpair_freelist; -#define RF_MAX_FREE_MCPAIR 128 -#define RF_MCPAIR_INC 16 -#define RF_MCPAIR_INITIAL 24 +#define RF_MAX_FREE_MCPAIR 128 +#define RF_MCPAIR_INC 16 +#define RF_MCPAIR_INITIAL 24 -static int init_mcpair(RF_MCPair_t *); -static void clean_mcpair(RF_MCPair_t *); -static void rf_ShutdownMCPair(void *); +int rf_init_mcpair(RF_MCPair_t *); +void rf_clean_mcpair(RF_MCPair_t *); +void rf_ShutdownMCPair(void *); -static int -init_mcpair(t) - RF_MCPair_t *t; +int +rf_init_mcpair(RF_MCPair_t *t) { - int rc; + int rc; rc = rf_mutex_init(&t->mutex); if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", + __FILE__, __LINE__, rc); return (rc); } rc = rf_cond_init(&t->cond); if (rc) { - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", + __FILE__, __LINE__, rc); rf_mutex_destroy(&t->mutex); return (rc); } return (0); } -static void -clean_mcpair(t) - RF_MCPair_t *t; +void +rf_clean_mcpair(RF_MCPair_t *t) { rf_mutex_destroy(&t->mutex); rf_cond_destroy(&t->cond); } -static void -rf_ShutdownMCPair(ignored) - void *ignored; +void +rf_ShutdownMCPair(void *ignored) { - RF_FREELIST_DESTROY_CLEAN(rf_mcpair_freelist, next, (RF_MCPair_t *), clean_mcpair); + RF_FREELIST_DESTROY_CLEAN(rf_mcpair_freelist, next, (RF_MCPair_t *), + rf_clean_mcpair); } -int -rf_ConfigureMCPair(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureMCPair(RF_ShutdownList_t **listp) { - int rc; + int rc; RF_FREELIST_CREATE(rf_mcpair_freelist, RF_MAX_FREE_MCPAIR, RF_MCPAIR_INC, sizeof(RF_MCPair_t)); rc = rf_ShutdownCreate(listp, rf_ShutdownMCPair, NULL); if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); + RF_ERRORMSG3("Unable to add to shutdown list file %s line %d" + " rc=%d\n", __FILE__, __LINE__, rc); rf_ShutdownMCPair(NULL); return (rc); } RF_FREELIST_PRIME_INIT(rf_mcpair_freelist, RF_MCPAIR_INITIAL, next, - (RF_MCPair_t *), init_mcpair); + (RF_MCPair_t *), rf_init_mcpair); return (0); } RF_MCPair_t * -rf_AllocMCPair() +rf_AllocMCPair(void) { RF_MCPair_t *t; - RF_FREELIST_GET_INIT(rf_mcpair_freelist, t, next, (RF_MCPair_t *), init_mcpair); + RF_FREELIST_GET_INIT(rf_mcpair_freelist, t, next, (RF_MCPair_t *), + rf_init_mcpair); if (t) { t->flag = 0; t->next = NULL; @@ -122,27 +122,33 @@ rf_AllocMCPair() return (t); } -void -rf_FreeMCPair(t) - RF_MCPair_t *t; +void +rf_FreeMCPair(RF_MCPair_t *t) { - RF_FREELIST_FREE_CLEAN(rf_mcpair_freelist, t, next, clean_mcpair); + RF_FREELIST_FREE_CLEAN(rf_mcpair_freelist, t, next, rf_clean_mcpair); } -/* the callback function used to wake you up when you use an mcpair to wait for something */ -void -rf_MCPairWakeupFunc(mcpair) - RF_MCPair_t *mcpair; + +/* + * The callback function used to wake you up when you use an mcpair to wait + * for something. + */ +void +rf_MCPairWakeupFunc(RF_MCPair_t *mcpair) { RF_LOCK_MUTEX(mcpair->mutex); mcpair->flag = 1; #if 0 printf("MCPairWakeupFunc called!\n"); #endif - wakeup(&(mcpair->flag));/* XXX Does this do anything useful!! GO */ - /* XXX Looks like the following is needed to truly get the - * functionality they were looking for here... This could be a + wakeup(&(mcpair->flag)); /* XXX Does this do anything useful !!! GO */ + /* + * XXX + * Looks like the following is needed to truly get the + * functionality they were looking for here... This could be a * side-effect of my using a tsleep in the Net- and OpenBSD port - * though... XXX */ - wakeup(&(mcpair->cond));/* XXX XXX XXX GO */ + * though... + * XXX + */ + wakeup(&(mcpair->cond)); /* XXX XXX XXX GO */ RF_UNLOCK_MUTEX(mcpair->mutex); } diff --git a/sys/dev/raidframe/rf_mcpair.h b/sys/dev/raidframe/rf_mcpair.h index 493d4450d5b..8691f62f999 100644 --- a/sys/dev/raidframe/rf_mcpair.h +++ b/sys/dev/raidframe/rf_mcpair.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_mcpair.h,v 1.3 1999/07/30 14:45:32 peter Exp $ */ +/* $OpenBSD: rf_mcpair.h,v 1.4 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_mcpair.h,v 1.4 1999/03/14 21:53:31 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,27 +28,28 @@ * rights to redistribute these changes. */ -/* rf_mcpair.h - * see comments in rf_mcpair.c +/* + * rf_mcpair.h + * See comments in rf_mcpair.c */ -#ifndef _RF__RF_MCPAIR_H_ -#define _RF__RF_MCPAIR_H_ +#ifndef _RF__RF_MCPAIR_H_ +#define _RF__RF_MCPAIR_H_ #include "rf_types.h" #include "rf_threadstuff.h" struct RF_MCPair_s { - RF_DECLARE_MUTEX(mutex) - RF_DECLARE_COND(cond) - int flag; - RF_MCPair_t *next; + RF_DECLARE_MUTEX(mutex); + RF_DECLARE_COND (cond); + int flag; + RF_MCPair_t *next; }; -#define RF_WAIT_MCPAIR(_mcp) tsleep(&((_mcp)->flag), PRIBIO, "mcpair", 0) +#define RF_WAIT_MCPAIR(_mcp) tsleep(&((_mcp)->flag), PRIBIO, "mcpair", 0) -int rf_ConfigureMCPair(RF_ShutdownList_t ** listp); +int rf_ConfigureMCPair(RF_ShutdownList_t **); RF_MCPair_t *rf_AllocMCPair(void); -void rf_FreeMCPair(RF_MCPair_t * t); -void rf_MCPairWakeupFunc(RF_MCPair_t * t); +void rf_FreeMCPair(RF_MCPair_t *); +void rf_MCPairWakeupFunc(RF_MCPair_t *); -#endif /* !_RF__RF_MCPAIR_H_ */ +#endif /* !_RF__RF_MCPAIR_H_ */ diff --git a/sys/dev/raidframe/rf_memchunk.c b/sys/dev/raidframe/rf_memchunk.c index a9bea6f29ef..2febacde6d2 100644 --- a/sys/dev/raidframe/rf_memchunk.c +++ b/sys/dev/raidframe/rf_memchunk.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rf_memchunk.c,v 1.3 2000/01/07 14:50:21 peter Exp $ */ +/* $OpenBSD: rf_memchunk.c,v 1.4 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_memchunk.c,v 1.4 1999/08/13 03:41:56 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. @@ -76,12 +76,12 @@ struct RF_ChunkHdr_s { static RF_ChunkHdr_t *chunklist, *chunk_hdr_free_list; static RF_ChunkDesc_t *chunk_desc_free_list; -RF_DECLARE_STATIC_MUTEX(chunkmutex) - static void rf_ShutdownMemChunk(void *); - static RF_ChunkDesc_t *NewMemChunk(int, char *); +RF_DECLARE_STATIC_MUTEX(chunkmutex); +void rf_ShutdownMemChunk(void *); +RF_ChunkDesc_t *rf_NewMemChunk(int, char *); - static void rf_ShutdownMemChunk(ignored) +void rf_ShutdownMemChunk(ignored) void *ignored; { RF_ChunkDesc_t *pt, *p; @@ -133,8 +133,8 @@ rf_ConfigureMemChunk(listp) * * free list is not currently used */ -static RF_ChunkDesc_t * -NewMemChunk(size, buf) +RF_ChunkDesc_t * +rf_NewMemChunk(size, buf) int size; char *buf; { @@ -176,7 +176,7 @@ rf_GetMemChunk(size) } if (!p) { RF_Malloc(buf, size, (char *)); - p = NewMemChunk(size, buf); + p = rf_NewMemChunk(size, buf); } RF_UNLOCK_MUTEX(chunkmutex); (void) bzero(p->buf, size); diff --git a/sys/dev/raidframe/rf_memchunk.h b/sys/dev/raidframe/rf_memchunk.h index d2585a48987..afd4555e63c 100644 --- a/sys/dev/raidframe/rf_memchunk.h +++ b/sys/dev/raidframe/rf_memchunk.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_memchunk.h,v 1.2 1999/02/16 00:02:57 niklas Exp $ */ +/* $OpenBSD: rf_memchunk.h,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_memchunk.h,v 1.3 1999/02/05 00:06:13 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,22 +28,22 @@ * rights to redistribute these changes. */ -/* header file for rf_memchunk.c. See comments there */ +/* Header file for rf_memchunk.c. See comments there. */ -#ifndef _RF__RF_MEMCHUNK_H_ -#define _RF__RF_MEMCHUNK_H_ +#ifndef _RF__RF_MEMCHUNK_H_ +#define _RF__RF_MEMCHUNK_H_ #include "rf_types.h" struct RF_ChunkDesc_s { - int size; - int reuse_count; - char *buf; - RF_ChunkDesc_t *next; + int size; + int reuse_count; + char *buf; + RF_ChunkDesc_t *next; }; -int rf_ConfigureMemChunk(RF_ShutdownList_t ** listp); -RF_ChunkDesc_t *rf_GetMemChunk(int size); -void rf_ReleaseMemChunk(RF_ChunkDesc_t * chunk); +int rf_ConfigureMemChunk(RF_ShutdownList_t **); +RF_ChunkDesc_t *rf_GetMemChunk(int); +void rf_ReleaseMemChunk(RF_ChunkDesc_t *); -#endif /* !_RF__RF_MEMCHUNK_H_ */ +#endif /* !_RF__RF_MEMCHUNK_H_ */ diff --git a/sys/dev/raidframe/rf_netbsd.h b/sys/dev/raidframe/rf_netbsd.h index 0f3a18d3811..a19148d9d20 100644 --- a/sys/dev/raidframe/rf_netbsd.h +++ b/sys/dev/raidframe/rf_netbsd.h @@ -1,7 +1,7 @@ -/* $OpenBSD: rf_netbsd.h,v 1.5 2000/08/08 16:07:42 peter Exp $ */ +/* $OpenBSD: rf_netbsd.h,v 1.6 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_netbsd.h,v 1.12 2000/05/28 22:53:49 oster Exp $ */ -/*- +/* * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. * All rights reserved. * @@ -18,8 +18,8 @@ * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. * 4. Neither the name of The NetBSD Foundation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. @@ -37,104 +37,139 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef _RF__RF_NETBSDSTUFF_H_ -#define _RF__RF_NETBSDSTUFF_H_ +#ifndef _RF__RF_NETBSDSTUFF_H_ +#define _RF__RF_NETBSDSTUFF_H_ -#ifdef _KERNEL +#ifdef _KERNEL #include <sys/fcntl.h> #include <sys/systm.h> #include <sys/namei.h> #include <sys/vnode.h> -#endif /* _KERNEL */ - -/* The per-component label information that the user can set */ +#endif /* _KERNEL */ + +/* The per-component label information that the user can set. */ typedef struct RF_ComponentInfo_s { - int row; /* the row number of this component */ - int column; /* the column number of this component */ - int serial_number; /* a user-specified serial number for this - RAID set */ + int row; /* The row number of this component. */ + int column; /* The column number of this component. */ + int serial_number; /* + * A user-specified serial number for this + * RAID set. + */ } RF_ComponentInfo_t; /* The per-component label information */ typedef struct RF_ComponentLabel_s { - int version; /* The version of this label. */ - int serial_number; /* a user-specified serial number for this - RAID set */ - int mod_counter; /* modification counter. Changed (usually - by incrementing) every time the label - is changed */ - int row; /* the row number of this component */ - int column; /* the column number of this component */ - int num_rows; /* number of rows in this RAID set */ - int num_columns; /* number of columns in this RAID set */ - int clean; /* 1 when clean, 0 when dirty */ - int status; /* rf_ds_optimal, rf_ds_dist_spared, whatever. */ - /* stuff that will be in version 2 of the label */ - int sectPerSU; /* Sectors per Stripe Unit */ - int SUsPerPU; /* Stripe Units per Parity Units */ - int SUsPerRU; /* Stripe Units per Reconstruction Units */ - int parityConfig; /* '0' == RAID0, '1' == RAID1, etc. */ - int maxOutstanding; /* maxOutstanding disk requests */ - int blockSize; /* size of component block. - (disklabel->d_secsize) */ - int numBlocks; /* number of blocks on this component. May - be smaller than the partition size. */ - int partitionSize; /* number of blocks on this *partition*. - Must exactly match the partition size - from the disklabel. */ - int future_use[33]; /* Future expansion */ - int autoconfigure; /* automatically configure this RAID set. - 0 == no, 1 == yes */ - int root_partition; /* Use this set as / - 0 == no, 1 == yes*/ - int last_unit; /* last unit number (e.g. 0 for /dev/raid0) - of this component. Used for autoconfigure - only. */ - int config_order; /* 0 .. n. The order in which the component - should be auto-configured. E.g. 0 is will - done first, (and would become raid0). - This may be in conflict with last_unit!!?! */ - /* Not currently used. */ - int future_use2[44]; /* More future expansion */ + int version; /* The version of this label. */ + int serial_number; /* + * A user-specified serial number for this + * RAID set. + */ + int mod_counter; /* + * Modification counter. Changed (usually + * by incrementing) every time the label + * is changed. + */ + int row; /* The row number of this component. */ + int column; /* The column number of this component. */ + int num_rows; /* Number of rows in this RAID set. */ + int num_columns; /* Number of columns in this RAID set. */ + int clean; /* 1 when clean, 0 when dirty. */ + int status; /* + * rf_ds_optimal, rf_ds_dist_spared, whatever. + */ + /* Stuff that will be in version 2 of the label. */ + int sectPerSU; /* Sectors per Stripe Unit. */ + int SUsPerPU; /* Stripe Units per Parity Units. */ + int SUsPerRU; /* Stripe Units per Reconstruction Units. */ + int parityConfig; /* '0' == RAID0, '1' == RAID1, etc. */ + int maxOutstanding; /* maxOutstanding disk requests. */ + int blockSize; /* + * Size of component block. + * (disklabel->d_secsize) + */ + int numBlocks; /* + * Number of blocks on this component. + * May be smaller than the partition size. + */ + int partitionSize; /* + * Number of blocks on this *partition*. + * Must exactly match the partition size + * from the disklabel. + */ + int future_use[33]; /* Future expansion. */ + int autoconfigure; /* + * Automatically configure this RAID set. + * 0 == no, 1 == yes + */ + int root_partition; /* + * Use this set as : + * 0 == no, 1 == yes + */ + int last_unit; /* + * Last unit number (e.g. 0 for /dev/raid0) + * of this component. Used for autoconfigure + * only. + */ + int config_order; /* + * 0 .. n. The order in which the component + * should be auto-configured. E.g. 0 will be + * done first, (and would become raid0). + * This may be in conflict with last_unit !!?! + */ + /* Not currently used. */ + int fut_use2[44]; /* More future expansion. */ } RF_ComponentLabel_t; typedef struct RF_SingleComponent_s { - int row; - int column; - char component_name[50]; /* name of the component */ -} RF_SingleComponent_t; + int row; + int column; + char component_name[50]; /* Name of the component. */ +} RF_SingleComponent_t; -#ifdef _KERNEL +#ifdef _KERNEL - struct raidcinfo { - struct vnode *ci_vp; /* component device's vnode */ - dev_t ci_dev; /* component device's dev_t */ - RF_ComponentLabel_t ci_label; /* components RAIDframe label */ +struct raidcinfo { + struct vnode *ci_vp; /* Component device's vnode. */ + dev_t ci_dev; /* Component device's dev_t. */ + RF_ComponentLabel_t ci_label; /* Components RAIDframe label. */ #if 0 - size_t ci_size; /* size */ - char *ci_path; /* path to component */ - size_t ci_pathlen; /* length of component path */ + size_t ci_size; /* Size. */ + char *ci_path; /* Path to component. */ + size_t ci_pathlen; /* Length of component path. */ #endif - }; +}; -/* XXX probably belongs in a different .h file. */ +/* XXX Probably belongs in a different .h file. */ typedef struct RF_AutoConfig_s { - char devname[56]; /* the name of this component */ - int flag; /* a general-purpose flag */ - dev_t dev; /* the device for this component */ - struct vnode *vp; /* Mr. Vnode Pointer */ - RF_ComponentLabel_t *clabel; /* the label */ - struct RF_AutoConfig_s *next; /* the next autoconfig structure - in this set. */ + char devname[56]; /* + * The name of this component. + */ + int flag; /* A general-purpose flag. */ + dev_t dev; /* + * The device for this + * component. + */ + struct vnode *vp; /* Master Vnode Pointer. */ + RF_ComponentLabel_t *clabel; /* The label. */ + struct RF_AutoConfig_s *next; /* + * The next autoconfig + * structure in this set. + */ } RF_AutoConfig_t; typedef struct RF_ConfigSet_s { - struct RF_AutoConfig_s *ac; /* all of the autoconfig structures for - this config set. */ - int rootable; /* Set to 1 if this set can be root */ - struct RF_ConfigSet_s *next; + struct RF_AutoConfig_s *ac; /* + * All of the autoconfig + * structures for this + * config set. + */ + int rootable; /* + * Set to 1 if this set can + * be root. + */ + struct RF_ConfigSet_s *next; } RF_ConfigSet_t; -#endif /* _KERNEL */ -#endif /* _RF__RF_NETBSDSTUFF_H_ */ +#endif /* _KERNEL */ +#endif /* _RF__RF_NETBSDSTUFF_H_ */ diff --git a/sys/dev/raidframe/rf_netbsdkintf.c b/sys/dev/raidframe/rf_netbsdkintf.c index 0ba733a4133..ff5d1e98b6b 100644 --- a/sys/dev/raidframe/rf_netbsdkintf.c +++ b/sys/dev/raidframe/rf_netbsdkintf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rf_netbsdkintf.c,v 1.10 2002/10/12 01:09:44 krw Exp $ */ +/* $OpenBSD: rf_netbsdkintf.c,v 1.11 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_netbsdkintf.c,v 1.93 2000/07/14 15:26:29 oster Exp $ */ /*- * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. @@ -1551,7 +1551,7 @@ rf_GetSpareTableFromDaemon(req) /* mpsleep unlocks the mutex */ while (!rf_sparet_resp_queue) { tsleep(&rf_sparet_resp_queue, PRIBIO, - "raidframe getsparetable", 0); + "RAIDframe getsparetable", 0); } req = rf_sparet_resp_queue; rf_sparet_resp_queue = req->next; diff --git a/sys/dev/raidframe/rf_nwayxor.c b/sys/dev/raidframe/rf_nwayxor.c index df7604650fb..fe35211806d 100644 --- a/sys/dev/raidframe/rf_nwayxor.c +++ b/sys/dev/raidframe/rf_nwayxor.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_nwayxor.c,v 1.3 2000/08/08 16:07:43 peter Exp $ */ +/* $OpenBSD: rf_nwayxor.c,v 1.4 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_nwayxor.c,v 1.4 2000/03/30 12:45:41 augustss Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,26 +28,25 @@ * rights to redistribute these changes. */ -/************************************************************ +/************************************************************* * - * nwayxor.c -- code to do N-way xors for reconstruction + * nwayxor.c -- Code to do N-way xors for reconstruction. * * nWayXorN xors N input buffers into the destination buffer. - * adapted from danner's longword_bxor code. + * Adapted from danner's longword_bxor code. * - ************************************************************/ + *************************************************************/ #include "rf_nwayxor.h" #include "rf_shutdown.h" static int callcount[10]; -static void rf_ShutdownNWayXor(void *); +void rf_ShutdownNWayXor(void *); -static void -rf_ShutdownNWayXor(ignored) - void *ignored; +void +rf_ShutdownNWayXor(void *ignored) { - int i; + int i; if (rf_showXorCallCounts == 0) return; @@ -56,11 +56,10 @@ rf_ShutdownNWayXor(ignored) printf("\n"); } -int -rf_ConfigureNWayXor(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureNWayXor(RF_ShutdownList_t **listp) { - int i, rc; + int i, rc; for (i = 0; i < 10; i++) callcount[i] = 0; @@ -68,11 +67,12 @@ rf_ConfigureNWayXor(listp) return (rc); } -void -rf_nWayXor1(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; +void +rf_nWayXor1( + RF_ReconBuffer_t **src_rbs, + RF_ReconBuffer_t *dest_rb, + int len +) { unsigned long *src = (unsigned long *) src_rbs[0]->buffer; unsigned long *dest = (unsigned long *) dest_rb->buffer; @@ -102,11 +102,12 @@ rf_nWayXor1(src_rbs, dest_rb, len) } } -void -rf_nWayXor2(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; +void +rf_nWayXor2( + RF_ReconBuffer_t **src_rbs, + RF_ReconBuffer_t *dest_rb, + int len +) { unsigned long *dst = (unsigned long *) dest_rb->buffer; unsigned long *a = dst; @@ -115,7 +116,7 @@ rf_nWayXor2(src_rbs, dest_rb, len) unsigned long a0, a1, a2, a3, b0, b1, b2, b3; callcount[2]++; - /* align dest to cache line */ + /* Align dest to cache line. */ while ((((unsigned long) dst) & 0x1f)) { *dst++ = *a++ ^ *b++ ^ *c++; len--; @@ -135,7 +136,7 @@ rf_nWayXor2(src_rbs, dest_rb, len) b2 = b[2]; b3 = b[3]; - /* start dual issue */ + /* Start dual issue. */ a0 ^= b0; b0 = c[0]; @@ -167,39 +168,41 @@ rf_nWayXor2(src_rbs, dest_rb, len) len--; } } -/* note that first arg is not incremented but 2nd arg is */ -#define LOAD_FIRST(_dst,_b) \ - a0 = _dst[0]; len -= 4; \ - a1 = _dst[1]; \ - a2 = _dst[2]; \ - a3 = _dst[3]; \ - b0 = _b[0]; \ - b1 = _b[1]; \ - b2 = _b[2]; \ - b3 = _b[3]; _b += 4; - -/* note: arg is incremented */ -#define XOR_AND_LOAD_NEXT(_n) \ - a0 ^= b0; b0 = _n[0]; \ - a1 ^= b1; b1 = _n[1]; \ - a2 ^= b2; b2 = _n[2]; \ - a3 ^= b3; b3 = _n[3]; \ - _n += 4; - -/* arg is incremented */ -#define XOR_AND_STORE(_dst) \ - a0 ^= b0; _dst[0] = a0; \ - a1 ^= b1; _dst[1] = a1; \ - a2 ^= b2; _dst[2] = a2; \ - a3 ^= b3; _dst[3] = a3; \ - _dst += 4; - - -void -rf_nWayXor3(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; + +/* Note that first arg is not incremented but 2nd arg is. */ +#define LOAD_FIRST(_dst,_b) \ + a0 = _dst[0]; len -= 4; \ + a1 = _dst[1]; \ + a2 = _dst[2]; \ + a3 = _dst[3]; \ + b0 = _b[0]; \ + b1 = _b[1]; \ + b2 = _b[2]; \ + b3 = _b[3]; _b += 4; + +/* Note: arg is incremented. */ +#define XOR_AND_LOAD_NEXT(_n) \ + a0 ^= b0; b0 = _n[0]; \ + a1 ^= b1; b1 = _n[1]; \ + a2 ^= b2; b2 = _n[2]; \ + a3 ^= b3; b3 = _n[3]; \ + _n += 4; + +/* Arg is incremented. */ +#define XOR_AND_STORE(_dst) \ + a0 ^= b0; _dst[0] = a0; \ + a1 ^= b1; _dst[1] = a1; \ + a2 ^= b2; _dst[2] = a2; \ + a3 ^= b3; _dst[3] = a3; \ + _dst += 4; + + +void +rf_nWayXor3( + RF_ReconBuffer_t **src_rbs, + RF_ReconBuffer_t *dest_rb, + int len +) { unsigned long *dst = (unsigned long *) dest_rb->buffer; unsigned long *b = (unsigned long *) src_rbs[0]->buffer; @@ -208,7 +211,7 @@ rf_nWayXor3(src_rbs, dest_rb, len) unsigned long a0, a1, a2, a3, b0, b1, b2, b3; callcount[3]++; - /* align dest to cache line */ + /* Align dest to cache line. */ while ((((unsigned long) dst) & 0x1f)) { *dst++ ^= *b++ ^ *c++ ^ *d++; len--; @@ -225,11 +228,12 @@ rf_nWayXor3(src_rbs, dest_rb, len) } } -void -rf_nWayXor4(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; +void +rf_nWayXor4( + RF_ReconBuffer_t **src_rbs, + RF_ReconBuffer_t *dest_rb, + int len +) { unsigned long *dst = (unsigned long *) dest_rb->buffer; unsigned long *b = (unsigned long *) src_rbs[0]->buffer; @@ -239,7 +243,7 @@ rf_nWayXor4(src_rbs, dest_rb, len) unsigned long a0, a1, a2, a3, b0, b1, b2, b3; callcount[4]++; - /* align dest to cache line */ + /* Align dest to cache line. */ while ((((unsigned long) dst) & 0x1f)) { *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++; len--; @@ -257,11 +261,12 @@ rf_nWayXor4(src_rbs, dest_rb, len) } } -void -rf_nWayXor5(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; +void +rf_nWayXor5( + RF_ReconBuffer_t **src_rbs, + RF_ReconBuffer_t *dest_rb, + int len +) { unsigned long *dst = (unsigned long *) dest_rb->buffer; unsigned long *b = (unsigned long *) src_rbs[0]->buffer; @@ -272,7 +277,7 @@ rf_nWayXor5(src_rbs, dest_rb, len) unsigned long a0, a1, a2, a3, b0, b1, b2, b3; callcount[5]++; - /* align dest to cache line */ + /* Align dest to cache line. */ while ((((unsigned long) dst) & 0x1f)) { *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++; len--; @@ -291,11 +296,12 @@ rf_nWayXor5(src_rbs, dest_rb, len) } } -void -rf_nWayXor6(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; +void +rf_nWayXor6( + RF_ReconBuffer_t **src_rbs, + RF_ReconBuffer_t *dest_rb, + int len +) { unsigned long *dst = (unsigned long *) dest_rb->buffer; unsigned long *b = (unsigned long *) src_rbs[0]->buffer; @@ -307,7 +313,7 @@ rf_nWayXor6(src_rbs, dest_rb, len) unsigned long a0, a1, a2, a3, b0, b1, b2, b3; callcount[6]++; - /* align dest to cache line */ + /* Align dest to cache line. */ while ((((unsigned long) dst) & 0x1f)) { *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++; len--; @@ -327,11 +333,12 @@ rf_nWayXor6(src_rbs, dest_rb, len) } } -void -rf_nWayXor7(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; +void +rf_nWayXor7( + RF_ReconBuffer_t **src_rbs, + RF_ReconBuffer_t *dest_rb, + int len +) { unsigned long *dst = (unsigned long *) dest_rb->buffer; unsigned long *b = (unsigned long *) src_rbs[0]->buffer; @@ -344,7 +351,7 @@ rf_nWayXor7(src_rbs, dest_rb, len) unsigned long a0, a1, a2, a3, b0, b1, b2, b3; callcount[7]++; - /* align dest to cache line */ + /* Align dest to cache line. */ while ((((unsigned long) dst) & 0x1f)) { *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++; len--; @@ -365,11 +372,12 @@ rf_nWayXor7(src_rbs, dest_rb, len) } } -void -rf_nWayXor8(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; +void +rf_nWayXor8( + RF_ReconBuffer_t **src_rbs, + RF_ReconBuffer_t *dest_rb, + int len +) { unsigned long *dst = (unsigned long *) dest_rb->buffer; unsigned long *b = (unsigned long *) src_rbs[0]->buffer; @@ -383,7 +391,7 @@ rf_nWayXor8(src_rbs, dest_rb, len) unsigned long a0, a1, a2, a3, b0, b1, b2, b3; callcount[8]++; - /* align dest to cache line */ + /* Align dest to cache line. */ while ((((unsigned long) dst) & 0x1f)) { *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++; len--; @@ -406,11 +414,12 @@ rf_nWayXor8(src_rbs, dest_rb, len) } -void -rf_nWayXor9(src_rbs, dest_rb, len) - RF_ReconBuffer_t **src_rbs; - RF_ReconBuffer_t *dest_rb; - int len; +void +rf_nWayXor9( + RF_ReconBuffer_t **src_rbs, + RF_ReconBuffer_t *dest_rb, + int len +) { unsigned long *dst = (unsigned long *) dest_rb->buffer; unsigned long *b = (unsigned long *) src_rbs[0]->buffer; @@ -425,9 +434,10 @@ rf_nWayXor9(src_rbs, dest_rb, len) unsigned long a0, a1, a2, a3, b0, b1, b2, b3; callcount[9]++; - /* align dest to cache line */ + /* Align dest to cache line. */ while ((((unsigned long) dst) & 0x1f)) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++; + *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ + *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++; len--; } while (len > 4) { @@ -443,7 +453,8 @@ rf_nWayXor9(src_rbs, dest_rb, len) XOR_AND_STORE(dst); } while (len) { - *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++; + *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ + *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++; len--; } } diff --git a/sys/dev/raidframe/rf_nwayxor.h b/sys/dev/raidframe/rf_nwayxor.h index e328696220c..046df6d8a84 100644 --- a/sys/dev/raidframe/rf_nwayxor.h +++ b/sys/dev/raidframe/rf_nwayxor.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_nwayxor.h,v 1.2 1999/02/16 00:03:00 niklas Exp $ */ +/* $OpenBSD: rf_nwayxor.h,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_nwayxor.h,v 1.3 1999/02/05 00:06:13 oster Exp $ */ + /* * rf_nwayxor.h */ @@ -29,26 +30,27 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ + /* - * rf_nwayxor.h -- types and prototypes for nwayxor module + * rf_nwayxor.h -- Types and prototypes for nwayxor module. */ -#ifndef _RF__RF_NWAYXOR_H_ -#define _RF__RF_NWAYXOR_H_ +#ifndef _RF__RF_NWAYXOR_H_ +#define _RF__RF_NWAYXOR_H_ #include "rf_types.h" #include "rf_raid.h" #include "rf_reconstruct.h" -int rf_ConfigureNWayXor(RF_ShutdownList_t ** listp); -void rf_nWayXor1(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); -void rf_nWayXor2(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); -void rf_nWayXor3(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); -void rf_nWayXor4(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); -void rf_nWayXor5(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); -void rf_nWayXor6(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); -void rf_nWayXor7(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); -void rf_nWayXor8(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); -void rf_nWayXor9(RF_ReconBuffer_t ** src_rbs, RF_ReconBuffer_t * dest_rb, int len); +int rf_ConfigureNWayXor(RF_ShutdownList_t **); +void rf_nWayXor1(RF_ReconBuffer_t **, RF_ReconBuffer_t *, int); +void rf_nWayXor2(RF_ReconBuffer_t **, RF_ReconBuffer_t *, int); +void rf_nWayXor3(RF_ReconBuffer_t **, RF_ReconBuffer_t *, int); +void rf_nWayXor4(RF_ReconBuffer_t **, RF_ReconBuffer_t *, int); +void rf_nWayXor5(RF_ReconBuffer_t **, RF_ReconBuffer_t *, int); +void rf_nWayXor6(RF_ReconBuffer_t **, RF_ReconBuffer_t *, int); +void rf_nWayXor7(RF_ReconBuffer_t **, RF_ReconBuffer_t *, int); +void rf_nWayXor8(RF_ReconBuffer_t **, RF_ReconBuffer_t *, int); +void rf_nWayXor9(RF_ReconBuffer_t **, RF_ReconBuffer_t *, int); -#endif /* !_RF__RF_NWAYXOR_H_ */ +#endif /* !_RF__RF_NWAYXOR_H_ */ diff --git a/sys/dev/raidframe/rf_openbsd.h b/sys/dev/raidframe/rf_openbsd.h index 17333cb9647..5b6a0e680bd 100644 --- a/sys/dev/raidframe/rf_openbsd.h +++ b/sys/dev/raidframe/rf_openbsd.h @@ -1,6 +1,6 @@ -/* $OpenBSD: rf_openbsd.h,v 1.4 2000/08/08 16:07:43 peter Exp $ */ +/* $OpenBSD: rf_openbsd.h,v 1.5 2002/12/16 07:01:04 tdeval Exp $ */ -/*- +/* * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. * All rights reserved. * @@ -17,8 +17,8 @@ * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. * 4. Neither the name of The NetBSD Foundation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. @@ -36,7 +36,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/*- +/* * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. * All rights reserved. * @@ -53,8 +53,8 @@ * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. * 4. Neither the name of The NetBSD Foundation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. @@ -73,102 +73,139 @@ */ -#ifndef _RF__RF_OPENBSD_H_ -#define _RF__RF_OPENBSD_H_ +#ifndef _RF__RF_OPENBSD_H_ +#define _RF__RF_OPENBSD_H_ -#ifdef _KERNEL +#ifdef _KERNEL #include <sys/fcntl.h> #include <sys/systm.h> #include <sys/namei.h> #include <sys/vnode.h> -#endif /* _KERNEL */ - -/* The per-component label information that the user can set */ +#endif /* _KERNEL */ + +/* The per-component label information that the user can set. */ typedef struct RF_ComponentInfo_s { - int row; /* the row number of this component */ - int column; /* the column number of this component */ - int serial_number; /* a user-specified serial number for this - RAID set */ + int row; /* The row number of this component. */ + int column; /* The column number of this component. */ + int serial_number; /* + * A user-specified serial number for this + * RAID set. + */ } RF_ComponentInfo_t; -/* The per-component label information */ +/* The per-component label information. */ typedef struct RF_ComponentLabel_s { - int version; /* The version of this label. */ - int serial_number; /* a user-specified serial number for this - RAID set */ - int mod_counter; /* modification counter. Changed (usually - by incrementing) every time the label - is changed */ - int row; /* the row number of this component */ - int column; /* the column number of this component */ - int num_rows; /* number of rows in this RAID set */ - int num_columns; /* number of columns in this RAID set */ - int clean; /* 1 when clean, 0 when dirty */ - int status; /* rf_ds_optimal, rf_ds_dist_spared, whatever. */ - /* stuff that will be in version 2 of the label */ - int sectPerSU; /* Sectors per Stripe Unit */ - int SUsPerPU; /* Stripe Units per Parity Units */ - int SUsPerRU; /* Stripe Units per Reconstruction Units */ - int parityConfig; /* '0' == RAID0, '1' == RAID1, etc. */ - int maxOutstanding; /* maxOutstanding disk requests */ - int blockSize; /* size of component block. - (disklabel->d_secsize) */ - int numBlocks; /* number of blocks on this component. May - be smaller than the partition size. */ - int partitionSize; /* number of blocks on this *partition*. - Must exactly match the partition size - from the disklabel. */ - int future_use[33]; /* Future expansion */ - int autoconfigure; /* automatically configure this RAID set. - 0 == no, 1 == yes */ - int root_partition; /* Use this set as / - 0 == no, 1 == yes*/ - int last_unit; /* last unit number (e.g. 0 for /dev/raid0) - of this component. Used for autoconfigure - only. */ - int config_order; /* 0 .. n. The order in which the component - should be auto-configured. E.g. 0 is will - done first, (and would become raid0). - This may be in conflict with last_unit!!?! */ - /* Not currently used. */ - int future_use2[44]; /* More future expansion */ + int version; /* The version of this label. */ + int serial_number; /* + * A user-specified serial number for this + * RAID set. + */ + int mod_counter; /* + * Modification counter. Changed (usually + * by incrementing) every time the label + * is changed. + */ + int row; /* The row number of this component. */ + int column; /* The column number of this component. */ + int num_rows; /* Number of rows in this RAID set. */ + int num_columns; /* Number of columns in this RAID set. */ + int clean; /* 1 when clean, 0 when dirty. */ + int status; /* + * rf_ds_optimal, rf_ds_dist_spared, whatever. + */ + /* Stuff that will be in version 2 of the label. */ + int sectPerSU; /* Sectors per Stripe Unit. */ + int SUsPerPU; /* Stripe Units per Parity Units. */ + int SUsPerRU; /* Stripe Units per Reconstruction Units. */ + int parityConfig; /* '0' == RAID0, '1' == RAID1, etc. */ + int maxOutstanding; /* maxOutstanding disk requests. */ + int blockSize; /* + * Size of component block. + * (disklabel->d_secsize) + */ + int numBlocks; /* + * Number of blocks on this component. May + * be smaller than the partition size. + */ + int partitionSize; /* + * Number of blocks on this *partition*. + * Must exactly match the partition size + * from the disklabel. + */ + int future_use[33]; /* Future expansion. */ + int autoconfigure; /* + * Automatically configure this RAID set. + * 0 == no, 1 == yes + */ + int root_partition; /* + * Use this set as : + * 0 == no, 1 == yes + */ + int last_unit; /* + * Last unit number (e.g. 0 for /dev/raid0) + * of this component. Used for autoconfigure + * only. + */ + int config_order; /* + * 0 .. n. The order in which the component + * should be auto-configured. E.g. 0 is will + * done first, (and would become raid0). + * This may be in conflict with last_unit !!?! + */ + /* Not currently used. */ + int fut_use2[44]; /* More future expansion. */ } RF_ComponentLabel_t; typedef struct RF_SingleComponent_s { - int row; - int column; - char component_name[50]; /* name of the component */ -} RF_SingleComponent_t; - -#ifdef _KERNEL + int row; + int column; + char component_name[50]; /* name of the component */ +} RF_SingleComponent_t; + +#ifdef _KERNEL struct raidcinfo { - struct vnode *ci_vp; /* component device's vnode */ - dev_t ci_dev; /* component device's dev_t */ - RF_ComponentLabel_t ci_label; /* components RAIDframe label */ + struct vnode *ci_vp; /* Component device's vnode. */ + dev_t ci_dev; /* Component device's dev_t. */ + RF_ComponentLabel_t ci_label; /* + * Components RAIDframe label. + */ #if 0 - size_t ci_size; /* size */ - char *ci_path; /* path to component */ - size_t ci_pathlen; /* length of component path */ + size_t ci_size; /* Size. */ + char *ci_path; /* Path to component. */ + size_t ci_pathlen; /* Length of component path. */ #endif }; -/* XXX probably belongs in a different .h file. */ +/* XXX Probably belongs in a different .h file. */ typedef struct RF_AutoConfig_s { - char devname[56]; /* the name of this component */ - int flag; /* a general-purpose flag */ - dev_t dev; /* the device for this component */ - struct vnode *vp; /* Mr. Vnode Pointer */ - RF_ComponentLabel_t *clabel; /* the label */ - struct RF_AutoConfig_s *next; /* the next autoconfig structure - in this set. */ + char devname[56]; /* + * The name of this component. + */ + int flag; /* A general-purpose flag. */ + dev_t dev; /* + * The device for this + * component. + */ + struct vnode *vp; /* Master Vnode Pointer. */ + RF_ComponentLabel_t *clabel; /* The label. */ + struct RF_AutoConfig_s *next; /* + * The next autoconfig + * structure in this set. + */ } RF_AutoConfig_t; typedef struct RF_ConfigSet_s { - struct RF_AutoConfig_s *ac; /* all of the autoconfig structures for - this config set. */ - int rootable; /* Set to 1 if this set can be root */ - struct RF_ConfigSet_s *next; + struct RF_AutoConfig_s *ac; /* + * All of the autoconfig + * structures for this + * config set. + */ + int rootable; /* + * Set to 1 if this set can + * be root. + */ + struct RF_ConfigSet_s *next; } RF_ConfigSet_t; diff --git a/sys/dev/raidframe/rf_openbsdkintf.c b/sys/dev/raidframe/rf_openbsdkintf.c index 18964abf085..c8b446653d1 100644 --- a/sys/dev/raidframe/rf_openbsdkintf.c +++ b/sys/dev/raidframe/rf_openbsdkintf.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_openbsdkintf.c,v 1.20 2002/10/12 02:03:46 krw Exp $ */ +/* $OpenBSD: rf_openbsdkintf.c,v 1.21 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_netbsdkintf.c,v 1.109 2001/07/27 03:30:07 oster Exp $ */ + /*- * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. * All rights reserved. @@ -78,9 +79,6 @@ * @(#)cd.c 8.2 (Berkeley) 11/16/93 */ - - - /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -108,11 +106,11 @@ * rights to redistribute these changes. */ -/*********************************************************** +/***************************************************************************** * - * rf_kintf.c -- the kernel interface routines for RAIDframe + * rf_kintf.c -- The kernel interface routines for RAIDframe. * - ***********************************************************/ + *****************************************************************************/ #include <sys/errno.h> @@ -156,51 +154,51 @@ int rf_kdebug_level = 0; -#ifdef RAIDDEBUG -#define db1_printf(a) do if (rf_kdebug_level > 0) printf a; while(0) +#ifdef RAIDDEBUG +#define db1_printf(a) do { if (rf_kdebug_level > 0) printf a; } while(0) #else /* RAIDDEBUG */ -#define db1_printf(a) (void)0 -#endif /* RAIDDEBUG */ +#define db1_printf(a) (void)0 +#endif /* ! RAIDDEBUG */ -static RF_Raid_t **raidPtrs; /* global raid device descriptors */ +static RF_Raid_t **raidPtrs; /* Global raid device descriptors. */ -RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex) +RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex); -/* requests to install a spare table */ +/* Requests to install a spare table. */ static RF_SparetWait_t *rf_sparet_wait_queue; -/* responses from installation process */ +/* Responses from installation process. */ static RF_SparetWait_t *rf_sparet_resp_queue; -/* prototypes */ -void rf_KernelWakeupFunc(struct buf *); -void rf_InitBP(struct buf *, struct vnode *, unsigned, dev_t, - RF_SectorNum_t, RF_SectorCount_t, caddr_t, void (*)(struct buf *), - void *, int, struct proc *); +/* Prototypes. */ +void rf_KernelWakeupFunc(struct buf *); +void rf_InitBP(struct buf *, struct vnode *, unsigned, dev_t, RF_SectorNum_t, + RF_SectorCount_t, caddr_t, void (*)(struct buf *), void *, int, + struct proc *); void raidinit(RF_Raid_t *); -void raidattach(int); -int raidsize(dev_t); -int raidopen(dev_t, int, int, struct proc *); -int raidclose(dev_t, int, int, struct proc *); -int raidioctl(dev_t, u_long, caddr_t, int, struct proc *); -int raidwrite(dev_t, struct uio *, int); -int raidread(dev_t, struct uio *, int); -void raidstrategy(struct buf *); -int raiddump(dev_t, daddr_t, caddr_t, size_t); +void raidattach(int); +int raidsize(dev_t); +int raidopen(dev_t, int, int, struct proc *); +int raidclose(dev_t, int, int, struct proc *); +int raidioctl(dev_t, u_long, caddr_t, int, struct proc *); +int raidwrite(dev_t, struct uio *, int); +int raidread(dev_t, struct uio *, int); +void raidstrategy(struct buf *); +int raiddump(dev_t, daddr_t, caddr_t, size_t); /* * Pilfered from ccd.c */ struct raidbuf { - struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */ - struct buf *rf_obp; /* ptr. to original I/O buf */ - int rf_flags; /* misc. flags */ - RF_DiskQueueData_t *req;/* the request that this was part of.. */ + struct buf rf_buf; /* New I/O buf. MUST BE FIRST!!! */ + struct buf *rf_obp; /* Ptr. to original I/O buf. */ + int rf_flags; /* Miscellaneous flags. */ + RF_DiskQueueData_t *req; /* The request that this was part of. */ }; -#define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT) -#define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp) +#define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT) +#define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp) /* * Some port (like i386) use a swapgeneric that wants to snoop around @@ -213,24 +211,24 @@ struct cfdriver raid_cd = { /* * XXX Not sure if the following should be replacing the raidPtrs above, - * or if it should be used in conjunction with that... + * or if it should be used in conjunction with that... */ struct raid_softc { - int sc_flags; /* flags */ - int sc_cflags; /* configuration flags */ - size_t sc_size; /* size of the raid device */ - char sc_xname[20]; /* XXX external name */ - struct disk sc_dkdev; /* generic disk device info */ - struct pool sc_cbufpool; /* component buffer pool */ - struct buf sc_q; /* used for the device queue */ + int sc_flags; /* Flags. */ + int sc_cflags; /* Configuration flags. */ + size_t sc_size; /* Size of the raid device. */ + char sc_xname[20]; /* XXX external name. */ + struct disk sc_dkdev; /* Generic disk device info. */ + struct pool sc_cbufpool; /* Component buffer pool. */ + struct buf sc_q; /* Used for the device queue. */ }; /* sc_flags */ -#define RAIDF_INITED 0x01 /* unit has been initialized */ -#define RAIDF_WLABEL 0x02 /* label area is writable */ -#define RAIDF_LABELLING 0x04 /* unit is currently being labelled */ -#define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */ -#define RAIDF_LOCKED 0x80 /* unit is locked */ +#define RAIDF_INITED 0x01 /* Unit has been initialized. */ +#define RAIDF_WLABEL 0x02 /* Label area is writable. */ +#define RAIDF_LABELLING 0x04 /* Unit is currently being labelled. */ +#define RAIDF_WANTED 0x40 /* Someone is waiting to obtain a lock. */ +#define RAIDF_LOCKED 0x80 /* Unit is locked. */ #define raidunit(x) DISKUNIT(x) int numraid = 0; @@ -240,11 +238,11 @@ int numraid = 0; * into the device tree. This is needed by some archs that look for * bootable devices in there. */ -int rf_probe(struct device *, void *, void *); -void rf_attach(struct device *, struct device *, void *); -int rf_detach(struct device *, int); -int rf_activate(struct device *, enum devact); -void rf_zeroref(struct device *); +int rf_probe(struct device *, void *, void *); +void rf_attach(struct device *, struct device *, void *); +int rf_detach(struct device *, int); +int rf_activate(struct device *, enum devact); +void rf_zeroref(struct device *); struct cfattach raid_ca = { sizeof(struct raid_softc), rf_probe, rf_attach, @@ -255,132 +253,123 @@ struct cfattach raid_ca = { * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. * Be aware that large numbers can allow the driver to consume a lot of * kernel memory, especially on writes, and in degraded mode reads. - * - * For example: with a stripe width of 64 blocks (32k) and 5 disks, - * a single 64K write will typically require 64K for the old data, - * 64K for the old parity, and 64K for the new parity, for a total + * + * For example: with a stripe width of 64 blocks (32k) and 5 disks, + * a single 64K write will typically require 64K for the old data, + * 64K for the old parity, and 64K for the new parity, for a total * of 192K (if the parity buffer is not re-used immediately). * Even it if is used immedately, that's still 128K, which when multiplied * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. - * + * * Now in degraded mode, for example, a 64K read on the above setup may - * require data reconstruction, which will require *all* of the 4 remaining + * require data reconstruction, which will require *all* of the 4 remaining * disks to participate -- 4 * 32K/disk == 128K again. */ -#ifndef RAIDOUTSTANDING -#define RAIDOUTSTANDING 6 +#ifndef RAIDOUTSTANDING +#define RAIDOUTSTANDING 6 #endif -#define RAIDLABELDEV(dev) \ +#define RAIDLABELDEV(dev) \ (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) -/* declared here, and made public, for the benefit of KVM stuff.. */ -struct raid_softc *raid_softc; +/* Declared here, and made public, for the benefit of KVM stuff... */ +struct raid_softc *raid_softc; struct raid_softc **raid_scPtrs; -void rf_shutdown_hook(RF_ThreadArg_t); -void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, struct disklabel *); -void raidgetdisklabel(dev_t); -void raidmakedisklabel(struct raid_softc *); +void rf_shutdown_hook(RF_ThreadArg_t); +void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, struct disklabel *); +void raidgetdisklabel(dev_t); +void raidmakedisklabel(struct raid_softc *); -int raidlock(struct raid_softc *); -void raidunlock(struct raid_softc *); +int raidlock(struct raid_softc *); +void raidunlock(struct raid_softc *); -void rf_markalldirty(RF_Raid_t *); +void rf_markalldirty(RF_Raid_t *); struct device *raidrootdev; -int findblkmajor(struct device *dv); +int findblkmajor(struct device *dv); char *findblkname(int); void rf_ReconThread(struct rf_recon_req *); /* XXX what I want is: */ -/*void rf_ReconThread(RF_Raid_t *raidPtr); */ +/*void rf_ReconThread(RF_Raid_t *raidPtr);*/ void rf_RewriteParityThread(RF_Raid_t *raidPtr); void rf_CopybackThread(RF_Raid_t *raidPtr); void rf_ReconstructInPlaceThread(struct rf_recon_req *); -#ifdef RAID_AUTOCONFIG +#ifdef RAID_AUTOCONFIG void rf_buildroothack(void *); -int rf_reasonable_label(RF_ComponentLabel_t *); -#endif +int rf_reasonable_label(RF_ComponentLabel_t *); +#endif /* RAID_AUTOCONFIG */ RF_AutoConfig_t *rf_find_raid_components(void); RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); -int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); +int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); -int rf_set_autoconfig(RF_Raid_t *, int); -int rf_set_rootpartition(RF_Raid_t *, int); +int rf_set_autoconfig(RF_Raid_t *, int); +int rf_set_rootpartition(RF_Raid_t *, int); void rf_release_all_vps(RF_ConfigSet_t *); void rf_cleanup_config_set(RF_ConfigSet_t *); -int rf_have_enough_components(RF_ConfigSet_t *); -int rf_auto_config_set(RF_ConfigSet_t *, int *); - -#ifdef RAID_AUTOCONFIG -static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not - allow autoconfig to take place. - Note that this is overridden by having - RAID_AUTOCONFIG as an option in the - kernel config file. */ -#endif +int rf_have_enough_components(RF_ConfigSet_t *); +int rf_auto_config_set(RF_ConfigSet_t *, int *); + +#ifdef RAID_AUTOCONFIG +static int raidautoconfig = 0; /* + * Debugging, mostly. Set to 0 to not + * allow autoconfig to take place. + * Note that this is overridden by having + * RAID_AUTOCONFIG as an option in the + * kernel config file. + */ +#endif /* RAID_AUTOCONFIG */ int -rf_probe(parent, match_, aux) - struct device *parent; - void *match_; - void *aux; +rf_probe(struct device *parent, void *match_, void *aux) { return 0; } void -rf_attach(parent, self, aux) - struct device *parent, *self; - void *aux; +rf_attach(struct device *parent, struct device *self, void *aux) { - /* struct raid_softc *raid = (void *)self; */ + /*struct raid_softc *raid = (void *)self;*/ } int -rf_detach(self, flags) - struct device *self; - int flags; +rf_detach(struct device *self, int flags) { return 0; } int -rf_activate(self, act) - struct device *self; - enum devact act; +rf_activate(struct device *self, enum devact act) { return 0; } void -rf_zeroref(self) - struct device *self; +rf_zeroref(struct device *self) { } void -raidattach(num) - int num; +raidattach(int num) { int raidID; int i, rc; -#ifdef RAID_AUTOCONFIG - RF_AutoConfig_t *ac_list; /* autoconfig list */ +#ifdef RAID_AUTOCONFIG + RF_AutoConfig_t *ac_list; /* Autoconfig list. */ RF_ConfigSet_t *config_sets; -#endif +#endif /* RAID_AUTOCONFIG */ db1_printf(("raidattach: Asked for %d units\n", num)); if (num <= 0) { -#ifdef DIAGNOSTIC +#ifdef DIAGNOSTIC panic("raidattach: count <= 0"); -#endif +#endif /* DIAGNOSTIC */ return; } @@ -396,27 +385,26 @@ raidattach(num) rc = rf_mutex_init(&rf_sparet_wait_mutex); if (rc) { - RF_PANIC(); - } + RF_PANIC(); + } rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; for (i = 0; i < num; i++) - raidPtrs[i] = NULL; + raidPtrs[i] = NULL; rc = rf_BootRaidframe(); if (rc == 0) - printf("Kernelized RAIDframe activated\n"); + printf("Kernelized RAIDframe activated\n"); else - panic("Serious error booting RAID!!"); + panic("Serious error booting RAID !!!"); /* - * Put together some datastructures like the CCD device does.. + * Put together some datastructures like the CCD device does... * This lets us lock the device and what-not when it gets opened. */ - + raid_softc = (struct raid_softc *) - malloc(num * sizeof(struct raid_softc), - M_RAIDFRAME, M_NOWAIT); + malloc(num * sizeof(struct raid_softc), M_RAIDFRAME, M_NOWAIT); if (raid_softc == NULL) { printf("WARNING: no memory for RAIDframe driver\n"); return; @@ -425,8 +413,8 @@ raidattach(num) bzero(raid_softc, num * sizeof (struct raid_softc)); raid_scPtrs = (struct raid_softc **) - malloc(num * sizeof(struct raid_softc *), - M_RAIDFRAME, M_NOWAIT); + malloc(num * sizeof(struct raid_softc *), M_RAIDFRAME, + M_NOWAIT); if (raid_scPtrs == NULL) { printf("WARNING: no memory for RAIDframe driver\n"); return; @@ -435,7 +423,7 @@ raidattach(num) bzero(raid_scPtrs, num * sizeof (struct raid_softc *)); raidrootdev = (struct device *)malloc(num * sizeof(struct device), - M_RAIDFRAME, M_NOWAIT); + M_RAIDFRAME, M_NOWAIT); if (raidrootdev == NULL) { panic("No memory for RAIDframe driver!!?!?!"); } @@ -464,38 +452,41 @@ raidattach(num) raid_cd.cd_devs = (void **) raid_scPtrs; raid_cd.cd_ndevs = num; -#ifdef RAID_AUTOCONFIG +#ifdef RAID_AUTOCONFIG raidautoconfig = 1; if (raidautoconfig) { - /* 1. locate all RAID components on the system */ + /* 1. Locate all RAID components on the system. */ #ifdef RAIDDEBUG printf("Searching for raid components...\n"); -#endif +#endif /* RAIDDEBUG */ ac_list = rf_find_raid_components(); - /* 2. sort them into their respective sets */ + /* 2. Sort them into their respective sets. */ config_sets = rf_create_auto_sets(ac_list); - /* 3. evaluate each set and configure the valid ones - This gets done in rf_buildroothack() */ + /* + * 3. Evaluate each set and configure the valid ones + * This gets done in rf_buildroothack(). + */ - /* schedule the creation of the thread to do the - "/ on RAID" stuff */ + /* + * Schedule the creation of the thread to do the + * "/ on RAID" stuff. + */ rf_buildroothack(config_sets); } -#endif +#endif /* RAID_AUTOCONFIG */ } -#ifdef RAID_AUTOCONFIG +#ifdef RAID_AUTOCONFIG void -rf_buildroothack(arg) - void *arg; +rf_buildroothack(void *arg) { RF_ConfigSet_t *config_sets = arg; RF_ConfigSet_t *cset; @@ -511,30 +502,34 @@ rf_buildroothack(arg) cset = config_sets; while(cset != NULL ) { next_cset = cset->next; - if (rf_have_enough_components(cset) && + if (rf_have_enough_components(cset) && cset->ac->clabel->autoconfigure==1) { retcode = rf_auto_config_set(cset,&raidID); if (!retcode) { if (cset->rootable) { rootID = raidID; #ifdef RAIDDEBUG - printf("eligible root device %d: raid%d\n", num_root, rootID); + printf("eligible root device %d:" + " raid%d\n", num_root, rootID); #endif /* RAIDDEBUG */ num_root++; } } else { /* The autoconfig didn't work :( */ #ifdef RAIDDEBUG - printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID); -#endif + printf("Autoconfig failed with code %d for" + " raid%d\n", retcode, raidID); +#endif /* RAIDDEBUG */ rf_release_all_vps(cset); } } else { - /* we're not autoconfiguring this set... - release the associated resources */ + /* + * We're not autoconfiguring this set... + * Release the associated resources. + */ rf_release_all_vps(cset); } - /* cleanup */ + /* Cleanup. */ rf_cleanup_config_set(cset); cset = next_cset; } @@ -548,26 +543,25 @@ rf_buildroothack(arg) if (majdev < 0) boothowto |= RB_ASKNAME; else { - rootdev = MAKEDISKDEV(majdev,rootID,0); + rootdev = MAKEDISKDEV(majdev,rootID,0); boothowto |= RB_DFLTROOT; } } else if (num_root > 1) { - /* we can't guess.. require the user to answer... */ + /* We can't guess... Require the user to answer... */ boothowto |= RB_ASKNAME; } } } -#endif +#endif /* RAID_AUTOCONFIG */ void -rf_shutdown_hook(arg) - RF_ThreadArg_t arg; +rf_shutdown_hook(RF_ThreadArg_t arg) { int unit; struct raid_softc *rs; RF_Raid_t *raidPtr; - /* Don't do it if we are not "safe" */ + /* Don't do it if we are not "safe". */ if (boothowto & RB_NOSYNC) return; @@ -575,7 +569,7 @@ rf_shutdown_hook(arg) unit = raidPtr->raidid; rs = &raid_softc[unit]; - /* Shutdown the system */ + /* Shutdown the system. */ if (rf_hook_cookies != NULL && rf_hook_cookies[unit] != NULL) rf_hook_cookies[unit] = NULL; @@ -595,8 +589,7 @@ rf_shutdown_hook(arg) } int -raidsize(dev) - dev_t dev; +raidsize(dev_t dev) { struct raid_softc *rs; struct disklabel *lp; @@ -631,11 +624,7 @@ raidsize(dev) } int -raiddump(dev, blkno, va, size) - dev_t dev; - daddr_t blkno; - caddr_t va; - size_t size; +raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size) { /* Not implemented. */ return (ENXIO); @@ -643,17 +632,14 @@ raiddump(dev, blkno, va, size) /* ARGSUSED */ int -raidopen(dev, flags, fmt, p) - dev_t dev; - int flags, fmt; - struct proc *p; +raidopen(dev_t dev, int flags, int fmt, struct proc *p) { int unit = raidunit(dev); struct raid_softc *rs; struct disklabel *lp; int part,pmask; int error = 0; - + if (unit >= numraid) return (ENXIO); rs = &raid_softc[unit]; @@ -672,7 +658,7 @@ raidopen(dev, flags, fmt, p) if ((rs->sc_flags & RAIDF_INITED) && (rs->sc_dkdev.dk_openmask == 0)) raidgetdisklabel(dev); - /* make sure that this partition exists */ + /* Make sure that this partition exists. */ if (part != RAW_PART) { db1_printf(("Not a raw partition..\n")); @@ -686,7 +672,7 @@ raidopen(dev, flags, fmt, p) } } - /* Prevent this unit from being unconfigured while open. */ + /* Prevent this unit from being unconfigured while opened. */ switch (fmt) { case S_IFCHR: rs->sc_dkdev.dk_copenmask |= pmask; @@ -699,15 +685,19 @@ raidopen(dev, flags, fmt, p) if ((rs->sc_dkdev.dk_openmask == 0) && ((rs->sc_flags & RAIDF_INITED) != 0)) { - /* First one... mark things as dirty... Note that we *MUST* - have done a configure before this. I DO NOT WANT TO BE - SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED - THAT THEY BELONG TOGETHER!!!!! */ - /* XXX should check to see if we're only open for reading - here... If so, we needn't do this, but then need some - other way of keeping track of what's happened.. */ + /* + * First one... Mark things as dirty... Note that we *MUST* + * have done a configure before this. I DO NOT WANT TO BE + * SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED + * THAT THEY BELONG TOGETHER!!!!! + */ + /* + * XXX should check to see if we're only open for reading + * here... If so, we needn't do this, but then need some + * other way of keeping track of what's happened... + */ - rf_markalldirty( raidPtrs[unit] ); + rf_markalldirty( raidPtrs[unit] ); } rs->sc_dkdev.dk_openmask = @@ -720,10 +710,7 @@ raidopen(dev, flags, fmt, p) /* ARGSUSED */ int -raidclose(dev, flags, fmt, p) - dev_t dev; - int flags, fmt; - struct proc *p; +raidclose(dev_t dev, int flags, int fmt, struct proc *p) { int unit = raidunit(dev); struct raid_softc *rs; @@ -754,10 +741,12 @@ raidclose(dev, flags, fmt, p) if ((rs->sc_dkdev.dk_openmask == 0) && ((rs->sc_flags & RAIDF_INITED) != 0)) { - /* Last one... device is not unconfigured yet. - Device shutdown has taken care of setting the - clean bits if RAIDF_INITED is not set - mark things as clean... */ + /* + * Last one... Device is not unconfigured yet. + * Device shutdown has taken care of setting the + * clean bits if RAIDF_INITED is not set. + * Mark things as clean... + */ db1_printf(("Last one on raid%d. Updating status.\n",unit)); rf_update_component_labels(raidPtrs[unit], RF_FINAL_COMPONENT_UPDATE); @@ -768,8 +757,7 @@ raidclose(dev, flags, fmt, p) } void -raidstrategy(bp) - struct buf *bp; +raidstrategy(struct buf *bp) { int s; @@ -838,10 +826,7 @@ raidstrategy_end: /* ARGSUSED */ int -raidread(dev, uio, flags) - dev_t dev; - struct uio *uio; - int flags; +raidread(dev_t dev, struct uio *uio, int flags) { int unit = raidunit(dev); struct raid_softc *rs; @@ -862,10 +847,7 @@ raidread(dev, uio, flags) /* ARGSUSED */ int -raidwrite(dev, uio, flags) - dev_t dev; - struct uio *uio; - int flags; +raidwrite(dev_t dev, struct uio *uio, int flags) { int unit = raidunit(dev); struct raid_softc *rs; @@ -881,12 +863,7 @@ raidwrite(dev, uio, flags) } int -raidioctl(dev, cmd, data, flag, p) - dev_t dev; - u_long cmd; - caddr_t data; - int flag; - struct proc *p; +raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) { int unit = raidunit(dev); int error = 0; @@ -965,19 +942,19 @@ raidioctl(dev, cmd, data, flag, p) if ((rs->sc_flags & RAIDF_INITED) == 0) return (ENXIO); } - + switch (cmd) { - /* Configure the system */ + /* Configure the system. */ case RAIDFRAME_CONFIGURE: if (raidPtr->valid) { - /* There is a valid RAID set running on this unit! */ + /* There is a valid RAID set running on this unit ! */ printf("raid%d: Device already configured!\n",unit); return(EINVAL); } /* - * Copy-in the configuration information + * Copy-in the configuration information. * data points to a pointer to the configuration structure. */ u_cfg = *((RF_Config_t **)data); @@ -998,7 +975,7 @@ raidioctl(dev, cmd, data, flag, p) */ if (k_cfg->layoutSpecificSize) { if (k_cfg->layoutSpecificSize > 10000) { - /* sanity check */ + /* Sanity check. */ RF_Free(k_cfg, sizeof(RF_Config_t)); return (EINVAL); } @@ -1012,38 +989,40 @@ raidioctl(dev, cmd, data, flag, p) (caddr_t)specific_buf, k_cfg->layoutSpecificSize); if (retcode) { RF_Free(k_cfg, sizeof(RF_Config_t)); - RF_Free(specific_buf, + RF_Free(specific_buf, k_cfg->layoutSpecificSize); return (retcode); } } else specific_buf = NULL; k_cfg->layoutSpecific = specific_buf; - + /* * We should do some kind of sanity check on the * configuration. - * Store the sum of all the bytes in the last byte? + * Store the sum of all the bytes in the last byte ? */ /* * Clear the entire RAID descriptor, just to make sure - * there is no stale data left in the case of a - * reconfiguration + * there is no stale data left in the case of a + * reconfiguration. */ bzero((char *) raidPtr, sizeof(RF_Raid_t)); - /* configure the system */ + /* Configure the system. */ raidPtr->raidid = unit; retcode = rf_Configure(raidPtr, k_cfg, NULL); if (retcode == 0) { - /* allow this many simultaneous IO's to - this RAID device */ + /* + * Allow this many simultaneous IO's to + * this RAID device. + */ raidPtr->openings = RAIDOUTSTANDING; - + raidinit(raidPtr); rf_markalldirty(raidPtr); } @@ -1053,12 +1032,12 @@ raidioctl(dev, cmd, data, flag, p) RF_Free(specific_buf, k_cfg->layoutSpecificSize); } RF_Free(k_cfg, sizeof (RF_Config_t)); - + return (retcode); - + case RAIDFRAME_SHUTDOWN: - /* Shutdown the system */ - + /* Shutdown the system. */ + if ((error = raidlock(rs)) != 0) return (error); @@ -1093,14 +1072,18 @@ raidioctl(dev, cmd, data, flag, p) raidunlock(rs); return (retcode); - + case RAIDFRAME_GET_COMPONENT_LABEL: clabel_ptr = (RF_ComponentLabel_t **) data; - /* need to read the component label for the disk indicated - by row,column in clabel */ + /* + * We need to read the component label for the disk indicated + * by row,column in clabel. + */ - /* For practice, let's get it directly fromdisk, rather - than from the in-core copy */ + /* + * For practice, let's get it directly from disk, rather + * than from the in-core copy. + */ RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ), (RF_ComponentLabel_t *)); if (clabel == NULL) @@ -1108,14 +1091,14 @@ raidioctl(dev, cmd, data, flag, p) bzero((char *) clabel, sizeof(RF_ComponentLabel_t)); - retcode = copyin( *clabel_ptr, clabel, + retcode = copyin( *clabel_ptr, clabel, sizeof(RF_ComponentLabel_t)); if (retcode) { RF_Free( clabel, sizeof(RF_ComponentLabel_t)); return(retcode); } - + row = clabel->row; column = clabel->column; @@ -1124,12 +1107,10 @@ raidioctl(dev, cmd, data, flag, p) return(EINVAL); } - raidread_component_label( - raidPtr->Disks[row][column].dev, - raidPtr->raid_cinfo[row][column].ci_vp, - clabel ); + raidread_component_label(raidPtr->Disks[row][column].dev, + raidPtr->raid_cinfo[row][column].ci_vp, clabel ); - retcode = copyout((caddr_t) clabel, + retcode = copyout((caddr_t) clabel, (caddr_t) *clabel_ptr, sizeof(RF_ComponentLabel_t)); RF_Free( clabel, sizeof(RF_ComponentLabel_t)); @@ -1139,10 +1120,11 @@ raidioctl(dev, cmd, data, flag, p) clabel = (RF_ComponentLabel_t *) data; /* XXX check the label for valid stuff... */ - /* Note that some things *should not* get modified -- - the user should be re-initing the labels instead of - trying to patch things. - */ + /* + * Note that some things *should not* get modified -- + * the user should be re-initing the labels instead of + * trying to patch things. + */ #ifdef RAIDDEBUG printf("Got component label:\n"); @@ -1155,7 +1137,7 @@ raidioctl(dev, cmd, data, flag, p) printf("Num Columns: %d\n", clabel->num_columns); printf("Clean: %d\n", clabel->clean); printf("Status: %d\n", clabel->status); -#endif +#endif /* RAIDDEBUG */ row = clabel->row; column = clabel->column; @@ -1168,24 +1150,22 @@ raidioctl(dev, cmd, data, flag, p) /* XXX this isn't allowed to do anything for now :-) */ #if 0 - raidwrite_component_label( - raidPtr->Disks[row][column].dev, - raidPtr->raid_cinfo[row][column].ci_vp, - clabel ); + raidwrite_component_label(raidPtr->Disks[row][column].dev, + raidPtr->raid_cinfo[row][column].ci_vp, clabel ); #endif return (0); - case RAIDFRAME_INIT_LABELS: + case RAIDFRAME_INIT_LABELS: clabel = (RF_ComponentLabel_t *) data; - /* - we only want the serial number from - the above. We get all the rest of the information - from the config that was used to create this RAID - set. - */ + /* + * We only want the serial number from the above. + * We get all the rest of the information from + * the config that was used to create this RAID + * set. + */ raidPtr->serial_number = clabel->serial_number; - + raid_init_component_label(raidPtr, &ci_label); ci_label.serial_number = clabel->serial_number; @@ -1197,27 +1177,27 @@ raidioctl(dev, cmd, data, flag, p) ci_label.partitionSize = diskPtr->partitionSize; ci_label.column = column; - raidwrite_component_label( - raidPtr->Disks[row][column].dev, - raidPtr->raid_cinfo[row][column].ci_vp, + raidwrite_component_label( + raidPtr->Disks[row][column].dev, + raidPtr->raid_cinfo[row][column].ci_vp, &ci_label ); } } } return (retcode); - + case RAIDFRAME_REWRITEPARITY: if (raidPtr->Layout.map->faultsTolerated == 0) { - /* Parity for RAID 0 is trivially correct */ + /* Parity for RAID 0 is trivially correct. */ raidPtr->parity_good = RF_RAID_CLEAN; return(0); } - + if (raidPtr->parity_rewrite_in_progress == 1) { - /* Re-write is already in progress! */ + /* Re-write is already in progress ! */ return(EINVAL); } @@ -1251,14 +1231,14 @@ raidioctl(dev, cmd, data, flag, p) case RAIDFRAME_DELETE_COMPONENT: componentPtr = (RF_SingleComponent_t *)data; - memcpy( &component, componentPtr, + memcpy( &component, componentPtr, sizeof(RF_SingleComponent_t)); retcode = rf_delete_component(raidPtr, &component); return(retcode); case RAIDFRAME_INCORPORATE_HOT_SPARE: componentPtr = (RF_SingleComponent_t *)data; - memcpy( &component, componentPtr, + memcpy( &component, componentPtr, sizeof(RF_SingleComponent_t)); retcode = rf_incorporate_hot_spare(raidPtr, &component); return(retcode); @@ -1266,17 +1246,17 @@ raidioctl(dev, cmd, data, flag, p) case RAIDFRAME_REBUILD_IN_PLACE: if (raidPtr->Layout.map->faultsTolerated == 0) { - /* Can't do this on a RAID 0!! */ + /* Can't do this on a RAID 0 !! */ return(EINVAL); } if (raidPtr->recon_in_progress == 1) { - /* a reconstruct is already in progress! */ + /* A reconstruct is already in progress ! */ return(EINVAL); } componentPtr = (RF_SingleComponent_t *) data; - memcpy( &component, componentPtr, + memcpy( &component, componentPtr, sizeof(RF_SingleComponent_t)); row = component.row; column = component.column; @@ -1345,7 +1325,7 @@ raidioctl(dev, cmd, data, flag, p) case RAIDFRAME_RESET_ACCTOTALS: bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals)); return (0); - + case RAIDFRAME_GET_ACCTOTALS: totals = (RF_AccTotals_t *) data; *totals = raidPtr->acc_totals; @@ -1354,25 +1334,25 @@ raidioctl(dev, cmd, data, flag, p) case RAIDFRAME_KEEP_ACCTOTALS: raidPtr->keep_acc_totals = *(int *)data; return (0); - + case RAIDFRAME_GET_SIZE: *(int *) data = raidPtr->totalSectors; return (0); - /* fail a disk & optionally start reconstruction */ + /* Fail a disk & optionally start reconstruction. */ case RAIDFRAME_FAIL_DISK: rr = (struct rf_recon_req *)data; - + if (rr->row < 0 || rr->row >= raidPtr->numRow || rr->col < 0 || rr->col >= raidPtr->numCol) return (EINVAL); db1_printf(("raid%d: Failing the disk: row: %d col: %d\n", - unit, rr->row, rr->col)); - + unit, rr->row, rr->col)); + /* * Make a copy of the recon request so that we don't - * rely on the user's buffer + * rely on the user's buffer. */ RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); if (rrcopy == NULL) @@ -1384,32 +1364,34 @@ raidioctl(dev, cmd, data, flag, p) rf_ReconThread, rrcopy,"raid_recon"); return (0); - + /* * Invoke a copyback operation after recon on whatever * disk needs it, if any. */ - case RAIDFRAME_COPYBACK: + case RAIDFRAME_COPYBACK: if (raidPtr->Layout.map->faultsTolerated == 0) { - /* This makes no sense on a RAID 0!! */ + /* This makes no sense on a RAID 0 !! */ return(EINVAL); } - + if (raidPtr->copyback_in_progress == 1) { - /* Copyback is already in progress! */ + /* Copyback is already in progress ! */ return(EINVAL); } - + retcode = RF_CREATE_THREAD(raidPtr->copyback_thread, rf_CopybackThread, raidPtr,"raid_copyback"); return (retcode); - /* Return the percentage completion of reconstruction */ + /* Return the percentage completion of reconstruction. */ case RAIDFRAME_CHECK_RECON_STATUS: if (raidPtr->Layout.map->faultsTolerated == 0) { - /* This makes no sense on a RAID 0, so tell the - user it's done. */ + /* + * This makes no sense on a RAID 0, so tell the + * user it's done. + */ *(int *) data = 100; return(0); } @@ -1429,9 +1411,9 @@ raidioctl(dev, cmd, data, flag, p) progressInfo.completed = 100; progressInfo.total = 100; } else { - progressInfo.total = + progressInfo.total = raidPtr->reconControl[row]->numRUsTotal; - progressInfo.completed = + progressInfo.completed = raidPtr->reconControl[row]->numRUsComplete; progressInfo.remaining = progressInfo.total - progressInfo.completed; @@ -1443,14 +1425,16 @@ raidioctl(dev, cmd, data, flag, p) case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: if (raidPtr->Layout.map->faultsTolerated == 0) { - /* This makes no sense on a RAID 0, so tell the - user it's done. */ + /* + * This makes no sense on a RAID 0, so tell the + * user it's done. + */ *(int *) data = 100; return(0); } if (raidPtr->parity_rewrite_in_progress == 1) { - *(int *) data = 100 * - raidPtr->parity_rewrite_stripes_done / + *(int *) data = 100 * + raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe; } else { *(int *) data = 100; @@ -1461,7 +1445,7 @@ raidioctl(dev, cmd, data, flag, p) progressInfoPtr = (RF_ProgressInfo_t **) data; if (raidPtr->parity_rewrite_in_progress == 1) { progressInfo.total = raidPtr->Layout.numStripe; - progressInfo.completed = + progressInfo.completed = raidPtr->parity_rewrite_stripes_done; progressInfo.remaining = progressInfo.total - progressInfo.completed; @@ -1477,7 +1461,7 @@ raidioctl(dev, cmd, data, flag, p) case RAIDFRAME_CHECK_COPYBACK_STATUS: if (raidPtr->Layout.map->faultsTolerated == 0) { - /* This makes no sense on a RAID 0 */ + /* This makes no sense on a RAID 0 !! */ *(int *) data = 100; return(0); } @@ -1493,7 +1477,7 @@ raidioctl(dev, cmd, data, flag, p) progressInfoPtr = (RF_ProgressInfo_t **) data; if (raidPtr->copyback_in_progress == 1) { progressInfo.total = raidPtr->Layout.numStripe; - progressInfo.completed = + progressInfo.completed = raidPtr->copyback_stripes_done; progressInfo.remaining = progressInfo.total - progressInfo.completed; @@ -1529,12 +1513,12 @@ raidioctl(dev, cmd, data, flag, p) waitreq = rf_sparet_wait_queue; rf_sparet_wait_queue = rf_sparet_wait_queue->next; RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); - + *((RF_SparetWait_t *)data) = *waitreq; - + RF_Free(waitreq, sizeof *waitreq); return (0); - + case RAIDFRAME_ABORT_SPARET_WAIT: /* * Wakes up a process waiting on SPARET_WAIT and puts an @@ -1552,15 +1536,15 @@ raidioctl(dev, cmd, data, flag, p) case RAIDFRAME_SEND_SPARET: /* * Used by the spare table daemon to deliver a spare table - * into the kernel + * into the kernel. */ - - /* Install the spare table */ + + /* Install the spare table. */ retcode = rf_SetSpareTable(raidPtr,*(void **)data); - + /* - * Respond to the requestor. the return status of the - * spare table installation is passed in the "fcol" field + * Respond to the requestor. The return status of the + * spare table installation is passed in the "fcol" field. */ RF_Malloc(waitreq, sizeof *waitreq, (RF_SparetWait_t *)); waitreq->fcol = retcode; @@ -1569,17 +1553,17 @@ raidioctl(dev, cmd, data, flag, p) rf_sparet_resp_queue = waitreq; wakeup(&rf_sparet_resp_queue); RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); - + return (retcode); #endif - /* fall through to the os-specific code below */ + /* Fall through to the os-specific code below. */ default: break; } - + if (!raidPtr->valid) return (EINVAL); - + /* * Add support for "regular" device ioctls here. */ @@ -1633,7 +1617,7 @@ raidioctl(dev, cmd, data, flag, p) case DIOCGPDINFO: raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data); break; - + default: retcode = ENOTTY; } @@ -1642,35 +1626,34 @@ raidioctl(dev, cmd, data, flag, p) } /* - * raidinit -- complete the rest of the initialization for the + * raidinit -- Complete the rest of the initialization for the * RAIDframe device. */ void -raidinit(raidPtr) - RF_Raid_t *raidPtr; +raidinit(RF_Raid_t *raidPtr) { struct raid_softc *rs; struct cfdata *cf; - int unit; + int unit; unit = raidPtr->raidid; rs = &raid_softc[unit]; pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0, 0, 0, "raidpl", NULL); - + /* XXX should check return code first... */ rs->sc_flags |= RAIDF_INITED; - /* XXX doesn't check bounds.*/ + /* XXX doesn't check bounds. */ sprintf(rs->sc_xname, "raid%d", unit); - rs->sc_dkdev.dk_name = rs->sc_xname; + rs->sc_dkdev.dk_name = rs->sc_xname; /* * disk_attach actually creates space for the CPU disklabel, among - * other things, so it's critical to call this *BEFORE* we - * try putzing with disklabels. + * other things, so it's critical to call this *BEFORE* we try + * putzing with disklabels. */ disk_attach(&rs->sc_dkdev); @@ -1699,18 +1682,17 @@ raidinit(raidPtr) } /* - * Wake up the daemon & tell it to get us a spare table + * Wake up the daemon & tell it to get us a spare table. * XXX - * The entries in the queues should be tagged with the raidPtr so that in the - * extremely rare case that two recons happen at once, we know for - * which device were requesting a spare table. + * The entries in the queues should be tagged with the raidPtr so that + * in the extremely rare case that two recons happen at once, we know + * which devices were requesting a spare table. * XXX - * + * * XXX This code is not currently used. GO */ int -rf_GetSpareTableFromDaemon(req) - RF_SparetWait_t *req; +rf_GetSpareTableFromDaemon(RF_SparetWait_t *req) { int retcode; @@ -1719,39 +1701,38 @@ rf_GetSpareTableFromDaemon(req) rf_sparet_wait_queue = req; wakeup(&rf_sparet_wait_queue); - /* mpsleep unlocks the mutex */ + /* mpsleep unlocks the mutex. */ while (!rf_sparet_resp_queue) { tsleep(&rf_sparet_resp_queue, PRIBIO, - "raidframe getsparetable", 0); + "RAIDframe getsparetable", 0); } req = rf_sparet_resp_queue; rf_sparet_resp_queue = req->next; RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); retcode = req->fcol; - /* this is not the same req as we alloc'd */ + /* This is not the same req as we alloc'd. */ RF_Free(req, sizeof *req); return (retcode); } /* * A wrapper around rf_DoAccess that extracts appropriate info from the - * bp & passes it down. - * Any calls originating in the kernel must use non-blocking I/O - * do some extra sanity checking to return "appropriate" error values for - * certain conditions (to make some standard utilities work) - * + * bp and passes it down. + * Any calls originating in the kernel must use non-blocking I/O. + * Do some extra sanity checking to return "appropriate" error values for + * certain conditions (to make some standard utilities work). + * * Formerly known as: rf_DoAccessKernel */ void -raidstart(raidPtr) - RF_Raid_t *raidPtr; +raidstart(RF_Raid_t *raidPtr) { RF_SectorCount_t num_blocks, pb, sum; RF_RaidAddr_t raid_addr; int retcode; struct partition *pp; - daddr_t blocknum; + daddr_t blocknum; int unit; struct raid_softc *rs; int do_async; @@ -1760,10 +1741,10 @@ raidstart(raidPtr) unit = raidPtr->raidid; rs = &raid_softc[unit]; - /* quick check to see if anything has died recently */ + /* Quick check to see if anything has died recently. */ RF_LOCK_MUTEX(raidPtr->mutex); if (raidPtr->numNewFailures > 0) { - rf_update_component_labels(raidPtr, + rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE); raidPtr->numNewFailures--; } @@ -1773,41 +1754,45 @@ raidstart(raidPtr) RF_LOCK_MUTEX(raidPtr->mutex); while (raidPtr->openings > 0) { RF_UNLOCK_MUTEX(raidPtr->mutex); - + bp = rs->sc_q.b_actf; if (bp == NULL) { - /* nothing more to do */ + /* Nothing more to do. */ return; } rs->sc_q.b_actf = bp->b_actf; - /* Ok, for the bp we have here, bp->b_blkno is relative to the - * partition.. Need to make it absolute to the underlying - * device.. */ - + /* + * Ok, for the bp we have here, bp->b_blkno is relative to the + * partition... We need to make it absolute to the underlying + * device... + */ + blocknum = bp->b_blkno; if (DISKPART(bp->b_dev) != RAW_PART) { pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; blocknum += pp->p_offset; } - - db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, + + db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum)); - + db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount)); db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid)); - - /* *THIS* is where we adjust what block we're going to... - * but DO NOT TOUCH bp->b_blkno!!! */ + + /* + * *THIS* is where we adjust what block we're going to... + * But DO NOT TOUCH bp->b_blkno !!! + */ raid_addr = blocknum; - + num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector; pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0; sum = raid_addr + num_blocks + pb; if (1 || rf_debugKernelAccess) { - db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n", - (int) raid_addr, (int) sum, (int) num_blocks, - (int) pb, (int) bp->b_resid)); + db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d)" + " (%d)\n", (int)raid_addr, (int)sum, + (int)num_blocks, (int)pb, (int)bp->b_resid)); } if ((sum > raidPtr->totalSectors) || (sum < raid_addr) || (sum < num_blocks) || (sum < pb)) { @@ -1815,31 +1800,31 @@ raidstart(raidPtr) bp->b_flags |= B_ERROR; bp->b_resid = bp->b_bcount; /* db1_printf(("%s: Calling biodone on 0x%x\n", - __func__, bp)); */ + __func__, bp)); */ splassert(IPL_BIO); biodone(bp); RF_LOCK_MUTEX(raidPtr->mutex); continue; } /* - * XXX rf_DoAccess() should do this, not just DoAccessKernel() + * XXX rf_DoAccess() should do this, not just DoAccessKernel(). */ - + if (bp->b_bcount & raidPtr->sectorMask) { bp->b_error = EINVAL; bp->b_flags |= B_ERROR; bp->b_resid = bp->b_bcount; /* db1_printf(("%s: Calling biodone on 0x%x\n", - __func__, bp)); */ + __func__, bp)); */ splassert(IPL_BIO); biodone(bp); RF_LOCK_MUTEX(raidPtr->mutex); continue; - + } db1_printf(("Calling DoAccess..\n")); - - + + RF_LOCK_MUTEX(raidPtr->mutex); raidPtr->openings--; RF_UNLOCK_MUTEX(raidPtr->mutex); @@ -1848,19 +1833,23 @@ raidstart(raidPtr) * Everything is async. */ do_async = 1; - + disk_busy(&rs->sc_dkdev); - - /* XXX we're still at splbio() here... do we *really* - * need to be? */ - /* don't ever condition on bp->b_flags & B_WRITE. - * always condition on B_READ instead */ + /* + * XXX we're still at splbio() here... Do we *really* + * need to be ? + */ + + /* + * Don't ever condition on bp->b_flags & B_WRITE. + * Always condition on B_READ instead. + */ retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, do_async, raid_addr, num_blocks, - bp->b_data, bp, NULL, NULL, + bp->b_data, bp, NULL, NULL, RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL); RF_LOCK_MUTEX(raidPtr->mutex); @@ -1868,29 +1857,24 @@ raidstart(raidPtr) RF_UNLOCK_MUTEX(raidPtr->mutex); } -/* Invoke an I/O from kernel mode. Disk queue should be locked upon entry */ +/* Invoke an I/O from kernel mode. Disk queue should be locked upon entry. */ int -rf_DispatchKernelIO(queue, req) - RF_DiskQueue_t *queue; - RF_DiskQueueData_t *req; +rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req) { int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; struct buf *bp; struct raidbuf *raidbp = NULL; struct raid_softc *rs; int unit; - int s; - - s=0; - /* s = splbio();*/ /* want to test this */ - + /*int s = splbio();*/ /* Want to test this. */ + /* * XXX along with the vnode, we also need the softc associated with - * this device.. + * this device... */ req->queue = queue; - + unit = queue->raidPtr->raidid; db1_printf(("DispatchKernelIO unit: %d\n", unit)); @@ -1907,10 +1891,10 @@ rf_DispatchKernelIO(queue, req) #if 1 /* * XXX When there is a physical disk failure, someone is passing - * us a buffer that contains old stuff!! Attempt to deal with + * us a buffer that contains old stuff !! Attempt to deal with * this problem without taking a performance hit... - * (not sure where the real bug is. It's buried in RAIDframe - * somewhere) :-( GO ) + * (not sure where the real bug is; it's buried in RAIDframe + * somewhere) :-( GO ) */ if (bp->b_flags & B_ERROR) { bp->b_flags &= ~B_ERROR; @@ -1922,10 +1906,10 @@ rf_DispatchKernelIO(queue, req) raidbp = RAIDGETBUF(rs); - raidbp->rf_flags = 0; /* XXX not really used anywhere... */ + raidbp->rf_flags = 0; /* XXX not really used anywhere... */ /* - * context for raidiodone + * Context for raidiodone. */ raidbp->rf_obp = bp; raidbp->req = req; @@ -1939,7 +1923,7 @@ rf_DispatchKernelIO(queue, req) db1_printf(("rf_DispatchKernelIO: NOP to r %d c %d\n", queue->row, queue->col)); - /* XXX need to do something extra here.. */ + /* XXX need to do something extra here... */ /* * I'm leaving this in, as I've never actually seen it @@ -1948,11 +1932,11 @@ rf_DispatchKernelIO(queue, req) db1_printf(("WAKEUP CALLED\n")); queue->numOutstanding++; - /* XXX need to glue the original buffer into this?? */ + /* XXX need to glue the original buffer into this ?? */ rf_KernelWakeupFunc(&raidbp->rf_buf); break; - + case RF_IO_TYPE_READ: case RF_IO_TYPE_WRITE: if (req->tracerec) { @@ -1962,7 +1946,7 @@ rf_DispatchKernelIO(queue, req) rf_InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp, op | bp->b_flags, queue->rf_cinfo->ci_dev, req->sectorOffset, req->numSector, - req->buf, rf_KernelWakeupFunc, (void *)req, + req->buf, rf_KernelWakeupFunc, (void *)req, queue->raidPtr->logBytesPerSector, req->b_proc); if (rf_debugKernelAccess) { @@ -1990,12 +1974,12 @@ rf_DispatchKernelIO(queue, req) VOP_STRATEGY(&raidbp->rf_buf); break; - + default: panic("bad req->type in rf_DispatchKernelIO"); } db1_printf(("Exiting from DispatchKernelIO\n")); - /* splx(s); */ /* want to test this */ + /*splx(s);*/ /* want to test this */ return (0); } @@ -2004,8 +1988,7 @@ rf_DispatchKernelIO(queue, req) * kernel code. */ void -rf_KernelWakeupFunc(vbp) - struct buf *vbp; +rf_KernelWakeupFunc(struct buf *vbp) { RF_DiskQueueData_t *req = NULL; RF_DiskQueue_t *queue; @@ -2046,12 +2029,12 @@ rf_KernelWakeupFunc(vbp) RF_UNLOCK_MUTEX(rf_tracing_mutex); } - bp->b_bcount = raidbp->rf_buf.b_bcount;/* XXXX ?? */ + bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */ - unit = queue->raidPtr->raidid; /* *Much* simpler :-> */ + unit = queue->raidPtr->raidid; /* *Much* simpler :-> */ /* - * XXX Ok, let's get aggressive... If B_ERROR is set, let's go + * XXX Ok, let's get aggressive... If B_ERROR is set, let's go * ballistic, and mark the component as hosed... */ if (bp->b_flags & B_ERROR) { @@ -2086,29 +2069,29 @@ rf_KernelWakeupFunc(vbp) * Initialize a buf structure for doing an I/O in the kernel. */ void -rf_InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg, - logBytesPerSector, b_proc) - struct buf *bp; - struct vnode *b_vp; - unsigned rw_flag; - dev_t dev; - RF_SectorNum_t startSect; - RF_SectorCount_t numSect; - caddr_t buf; - void (*cbFunc)(struct buf *); - void *cbArg; - int logBytesPerSector; - struct proc *b_proc; +rf_InitBP( + struct buf *bp, + struct vnode *b_vp, + unsigned rw_flag, + dev_t dev, + RF_SectorNum_t startSect, + RF_SectorCount_t numSect, + caddr_t buf, + void (*cbFunc)(struct buf *), + void *cbArg, + int logBytesPerSector, + struct proc *b_proc +) { - /* bp->b_flags = B_PHYS | rw_flag; */ - bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */ + /*bp->b_flags = B_PHYS | rw_flag;*/ + bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too ??? */ bp->b_bcount = numSect << logBytesPerSector; bp->b_bufsize = bp->b_bcount; bp->b_error = 0; bp->b_dev = dev; bp->b_data = buf; bp->b_blkno = startSect; - bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ + bp->b_resid = bp->b_bcount; /* XXX is this right !??!?!! */ if (bp->b_bcount == 0) { panic("bp->b_bcount is zero in rf_InitBP!!"); } @@ -2119,21 +2102,19 @@ rf_InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg, } void -raidgetdefaultlabel(raidPtr, rs, lp) - RF_Raid_t *raidPtr; - struct raid_softc *rs; - struct disklabel *lp; +raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs, + struct disklabel *lp) { db1_printf(("Building a default label...\n")); bzero(lp, sizeof(*lp)); - /* fabricate a label... */ + /* Fabricate a label... */ lp->d_secperunit = raidPtr->totalSectors; lp->d_secsize = raidPtr->bytesPerSector; lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe; lp->d_ntracks = 4 * raidPtr->numCol; - lp->d_ncylinders = raidPtr->totalSectors / - (lp->d_nsectors * lp->d_ntracks); + lp->d_ncylinders = raidPtr->totalSectors / + (lp->d_nsectors * lp->d_ntracks); lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; strncpy(lp->d_typename, "raid", sizeof(lp->d_typename)); @@ -2154,12 +2135,11 @@ raidgetdefaultlabel(raidPtr, rs, lp) } /* - * Read the disklabel from the raid device. If one is not present, fake one - * up. + * Read the disklabel from the raid device. + * If one is not present, fake one up. */ void -raidgetdisklabel(dev) - dev_t dev; +raidgetdisklabel(dev_t dev) { int unit = raidunit(dev); struct raid_softc *rs = &raid_softc[unit]; @@ -2186,7 +2166,7 @@ raidgetdisklabel(dev) if (errstring) { printf("%s: %s\n", rs->sc_xname, errstring); return; - /*raidmakedisklabel(rs); */ + /*raidmakedisklabel(rs);*/ } /* @@ -2218,8 +2198,7 @@ raidgetdisklabel(dev) * that a disklabel isn't present. */ void -raidmakedisklabel(rs) - struct raid_softc *rs; +raidmakedisklabel(struct raid_softc *rs) { struct disklabel *lp = rs->sc_dkdev.dk_label; db1_printf(("Making a label..\n")); @@ -2243,10 +2222,11 @@ raidmakedisklabel(rs) * You'll find the original of this in ccd.c */ int -raidlookup(path, p, vpp) - char *path; - struct proc *p; - struct vnode **vpp; /* result */ +raidlookup( + char *path, + struct proc *p, + struct vnode **vpp /* result */ +) { struct nameidata nd; struct vnode *vp; @@ -2290,8 +2270,7 @@ raidlookup(path, p, vpp) * (Hmm... where have we seen this warning before :-> GO ) */ int -raidlock(rs) - struct raid_softc *rs; +raidlock(struct raid_softc *rs) { int error; @@ -2308,8 +2287,7 @@ raidlock(rs) * Unlock and wake up any waiters. */ void -raidunlock(rs) - struct raid_softc *rs; +raidunlock(struct raid_softc *rs) { rs->sc_flags &= ~RAIDF_LOCKED; if ((rs->sc_flags & RAIDF_WANTED) != 0) { @@ -2317,12 +2295,12 @@ raidunlock(rs) wakeup(rs); } } - -#define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ -#define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ -int +#define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ +#define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ + +int raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter) { RF_ComponentLabel_t clabel; @@ -2334,7 +2312,7 @@ raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter) } -int +int raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter) { RF_ComponentLabel_t clabel; @@ -2347,28 +2325,30 @@ raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter) /* ARGSUSED */ int -raidread_component_label(dev, b_vp, clabel) - dev_t dev; - struct vnode *b_vp; - RF_ComponentLabel_t *clabel; +raidread_component_label(dev_t dev, struct vnode *b_vp, + RF_ComponentLabel_t *clabel) { struct buf *bp; int error; - - /* XXX should probably ensure that we don't try to do this if - someone has changed rf_protected_sectors. */ + + /* + * XXX should probably ensure that we don't try to do this if + * someone has changed rf_protected_sectors. + */ if (b_vp == NULL) { - /* For whatever reason, this component is not valid. - Don't try to read a component label from it. */ + /* + * For whatever reason, this component is not valid. + * Don't try to read a component label from it. + */ return(EINVAL); } - /* get a block of the appropriate size... */ + /* Get a block of the appropriate size... */ bp = geteblk((int)RF_COMPONENT_INFO_SIZE); bp->b_dev = dev; - /* get our ducks in a row for the read */ + /* Get our ducks in a row for the read. */ bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; bp->b_bcount = RF_COMPONENT_INFO_SIZE; bp->b_flags |= B_READ; @@ -2376,37 +2356,34 @@ raidread_component_label(dev, b_vp, clabel) (*bdevsw[major(bp->b_dev)].d_strategy)(bp); - error = biowait(bp); + error = biowait(bp); if (!error) { - memcpy(clabel, bp->b_data, - sizeof(RF_ComponentLabel_t)); + memcpy(clabel, bp->b_data, sizeof(RF_ComponentLabel_t)); #if 0 rf_print_component_label( clabel ); #endif - } else { + } else { db1_printf(("Failed to read RAID component label!\n")); } - brelse(bp); + brelse(bp); return(error); } /* ARGSUSED */ -int -raidwrite_component_label(dev, b_vp, clabel) - dev_t dev; - struct vnode *b_vp; - RF_ComponentLabel_t *clabel; +int +raidwrite_component_label(dev_t dev, struct vnode *b_vp, + RF_ComponentLabel_t *clabel) { struct buf *bp; int error; - /* get a block of the appropriate size... */ + /* Get a block of the appropriate size... */ bp = geteblk((int)RF_COMPONENT_INFO_SIZE); bp->b_dev = dev; - /* get our ducks in a row for the write */ + /* Get our ducks in a row for the write. */ bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; bp->b_bcount = RF_COMPONENT_INFO_SIZE; bp->b_flags |= B_WRITE; @@ -2417,7 +2394,7 @@ raidwrite_component_label(dev, b_vp, clabel) memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t)); (*bdevsw[major(bp->b_dev)].d_strategy)(bp); - error = biowait(bp); + error = biowait(bp); brelse(bp); if (error) { printf("Failed to write RAID component info!\n"); @@ -2426,9 +2403,8 @@ raidwrite_component_label(dev, b_vp, clabel) return(error); } -void -rf_markalldirty(raidPtr) - RF_Raid_t *raidPtr; +void +rf_markalldirty(RF_Raid_t *raidPtr) { RF_ComponentLabel_t clabel; int r,c; @@ -2436,57 +2412,56 @@ rf_markalldirty(raidPtr) raidPtr->mod_counter++; for (r = 0; r < raidPtr->numRow; r++) { for (c = 0; c < raidPtr->numCol; c++) { - /* we don't want to touch (at all) a disk that has - failed */ + /* + * We don't want to touch (at all) a disk that has + * failed. + */ if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) { raidread_component_label( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - &clabel); + raidPtr->Disks[r][c].dev, + raidPtr->raid_cinfo[r][c].ci_vp, &clabel); if (clabel.status == rf_ds_spared) { - /* XXX do something special... - but whatever you do, don't - try to access it!! */ + /* + * XXX do something special... + * But whatever you do, don't + * try to access it !!! + */ } else { #if 0 - clabel.status = - raidPtr->Disks[r][c].status; - raidwrite_component_label( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - &clabel); + clabel.status = + raidPtr->Disks[r][c].status; + raidwrite_component_label( + raidPtr->Disks[r][c].dev, + raidPtr->raid_cinfo[r][c].ci_vp, + &clabel); #endif - raidmarkdirty( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - raidPtr->mod_counter); + raidmarkdirty( + raidPtr->Disks[r][c].dev, + raidPtr->raid_cinfo[r][c].ci_vp, + raidPtr->mod_counter); } } - } + } } - /* printf("Component labels marked dirty.\n"); */ + /*printf("Component labels marked dirty.\n");*/ #if 0 for( c = 0; c < raidPtr->numSpare ; c++) { sparecol = raidPtr->numCol + c; if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) { - /* - - XXX this is where we get fancy and map this spare - into it's correct spot in the array. - + /* + * XXX This is where we get fancy and map this spare + * into it's correct spot in the array. */ - /* - - we claim this disk is "optimal" if it's - rf_ds_used_spare, as that means it should be - directly substitutable for the disk it replaced. - We note that too... - + /* + * We claim this disk is "optimal" if it's + * rf_ds_used_spare, as that means it should be + * directly substitutable for the disk it replaced. + * We note that too... */ for(i=0;i<raidPtr->numRow;i++) { for(j=0;j<raidPtr->numCol;j++) { - if ((raidPtr->Disks[i][j].spareRow == + if ((raidPtr->Disks[i][j].spareRow == r) && (raidPtr->Disks[i][j].spareCol == sparecol)) { @@ -2496,27 +2471,25 @@ rf_markalldirty(raidPtr) } } } - - raidread_component_label( - raidPtr->Disks[r][sparecol].dev, - raidPtr->raid_cinfo[r][sparecol].ci_vp, - &clabel); - /* make sure status is noted */ - clabel.version = RF_COMPONENT_LABEL_VERSION; + + raidread_component_label( + raidPtr->Disks[r][sparecol].dev, + raidPtr->raid_cinfo[r][sparecol].ci_vp, &clabel); + /* Make sure status is noted. */ + clabel.version = RF_COMPONENT_LABEL_VERSION; clabel.mod_counter = raidPtr->mod_counter; clabel.serial_number = raidPtr->serial_number; clabel.row = srow; clabel.column = scol; clabel.num_rows = raidPtr->numRow; clabel.num_columns = raidPtr->numCol; - clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/ + clabel.clean = RF_RAID_DIRTY; /* Changed in a bit. */ clabel.status = rf_ds_optimal; raidwrite_component_label( - raidPtr->Disks[r][sparecol].dev, - raidPtr->raid_cinfo[r][sparecol].ci_vp, - &clabel); - raidmarkclean( raidPtr->Disks[r][sparecol].dev, - raidPtr->raid_cinfo[r][sparecol].ci_vp); + raidPtr->Disks[r][sparecol].dev, + raidPtr->raid_cinfo[r][sparecol].ci_vp, &clabel); + raidmarkclean( raidPtr->Disks[r][sparecol].dev, + raidPtr->raid_cinfo[r][sparecol].ci_vp); } } @@ -2525,9 +2498,7 @@ rf_markalldirty(raidPtr) void -rf_update_component_labels(raidPtr, final) - RF_Raid_t *raidPtr; - int final; +rf_update_component_labels(RF_Raid_t *raidPtr, int final) { RF_ComponentLabel_t clabel; int sparecol; @@ -2538,8 +2509,10 @@ rf_update_component_labels(raidPtr, final) srow = -1; scol = -1; - /* XXX should do extra checks to make sure things really are clean, - rather than blindly setting the clean bit... */ + /* + * XXX should do extra checks to make sure things really are clean, + * rather than blindly setting the clean bit... + */ raidPtr->mod_counter++; @@ -2550,43 +2523,43 @@ rf_update_component_labels(raidPtr, final) raidPtr->Disks[r][c].dev, raidPtr->raid_cinfo[r][c].ci_vp, &clabel); - /* make sure status is noted */ + /* Make sure status is noted. */ clabel.status = rf_ds_optimal; - /* bump the counter */ + /* Bump the counter. */ clabel.mod_counter = raidPtr->mod_counter; - raidwrite_component_label( + raidwrite_component_label( raidPtr->Disks[r][c].dev, raidPtr->raid_cinfo[r][c].ci_vp, &clabel); if (final == RF_FINAL_COMPONENT_UPDATE) { - if (raidPtr->parity_good == RF_RAID_CLEAN) { - raidmarkclean( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - raidPtr->mod_counter); + if (raidPtr->parity_good == + RF_RAID_CLEAN) { + raidmarkclean( + raidPtr->Disks[r][c].dev, + raidPtr-> + raid_cinfo[r][c].ci_vp, + raidPtr->mod_counter); } } - } - /* else we don't touch it.. */ - } + } + /* Else we don't touch it... */ + } } for( c = 0; c < raidPtr->numSpare ; c++) { sparecol = raidPtr->numCol + c; if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) { - /* - - we claim this disk is "optimal" if it's - rf_ds_used_spare, as that means it should be - directly substitutable for the disk it replaced. - We note that too... - + /* + * We claim this disk is "optimal" if it's + * rf_ds_used_spare, as that means it should be + * directly substitutable for the disk it replaced. + * We note that too... */ for(i=0;i<raidPtr->numRow;i++) { for(j=0;j<raidPtr->numCol;j++) { - if ((raidPtr->Disks[i][j].spareRow == + if ((raidPtr->Disks[i][j].spareRow == 0) && (raidPtr->Disks[i][j].spareCol == sparecol)) { @@ -2596,13 +2569,12 @@ rf_update_component_labels(raidPtr, final) } } } - - /* XXX shouldn't *really* need this... */ - raidread_component_label( - raidPtr->Disks[0][sparecol].dev, - raidPtr->raid_cinfo[0][sparecol].ci_vp, - &clabel); - /* make sure status is noted */ + + /* XXX Shouldn't *really* need this... */ + raidread_component_label( + raidPtr->Disks[0][sparecol].dev, + raidPtr->raid_cinfo[0][sparecol].ci_vp, &clabel); + /* Make sure status is noted. */ raid_init_component_label(raidPtr, &clabel); @@ -2612,26 +2584,24 @@ rf_update_component_labels(raidPtr, final) clabel.status = rf_ds_optimal; raidwrite_component_label( - raidPtr->Disks[0][sparecol].dev, - raidPtr->raid_cinfo[0][sparecol].ci_vp, - &clabel); + raidPtr->Disks[0][sparecol].dev, + raidPtr->raid_cinfo[0][sparecol].ci_vp, &clabel); if (final == RF_FINAL_COMPONENT_UPDATE) { if (raidPtr->parity_good == RF_RAID_CLEAN) { - raidmarkclean( raidPtr->Disks[0][sparecol].dev, - raidPtr->raid_cinfo[0][sparecol].ci_vp, - raidPtr->mod_counter); + raidmarkclean(raidPtr-> + Disks[0][sparecol].dev, + raidPtr-> + raid_cinfo[0][sparecol].ci_vp, + raidPtr->mod_counter); } } } } - /* printf("Component labels updated\n"); */ + /*printf("Component labels updated\n");*/ } void -rf_close_component(raidPtr, vp, auto_configured) - RF_Raid_t *raidPtr; - struct vnode *vp; - int auto_configured; +rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured) { struct proc *p; @@ -2642,8 +2612,8 @@ rf_close_component(raidPtr, vp, auto_configured) if (auto_configured == 1) { VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); vrele(vp); - - } else { + + } else { VOP_UNLOCK(vp, 0, p); (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); } @@ -2654,15 +2624,14 @@ rf_close_component(raidPtr, vp, auto_configured) void -rf_UnconfigureVnodes(raidPtr) - RF_Raid_t *raidPtr; +rf_UnconfigureVnodes(RF_Raid_t *raidPtr) { - int r,c; + int r,c; struct vnode *vp; int acd; - /* We take this opportunity to close the vnodes like we should.. */ + /* We take this opportunity to close the vnodes like we should... */ for (r = 0; r < raidPtr->numRow; r++) { for (c = 0; c < raidPtr->numCol; c++) { @@ -2686,11 +2655,10 @@ rf_UnconfigureVnodes(raidPtr) } -void -rf_ReconThread(req) - struct rf_recon_req *req; +void +rf_ReconThread(struct rf_recon_req *req) { - int s; + int s; RF_Raid_t *raidPtr; s = splbio(); @@ -2700,19 +2668,18 @@ rf_ReconThread(req) rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col, ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); - /* XXX get rid of this! we don't need it at all.. */ + /* XXX Get rid of this! we don't need it at all... */ RF_Free(req, sizeof(*req)); raidPtr->recon_in_progress = 0; splx(s); /* That's all... */ - kthread_exit(0); /* does not return */ + kthread_exit(0); /* Does not return. */ } void -rf_RewriteParityThread(raidPtr) - RF_Raid_t *raidPtr; +rf_RewriteParityThread(RF_Raid_t *raidPtr) { int retcode; int s; @@ -2723,27 +2690,28 @@ rf_RewriteParityThread(raidPtr) if (retcode) { printf("raid%d: Error re-writing parity!\n",raidPtr->raidid); } else { - /* set the clean bit! If we shutdown correctly, - the clean bit on each component label will get - set */ + /* + * Set the clean bit ! If we shutdown correctly, + * the clean bit on each component label will get + * set. + */ raidPtr->parity_good = RF_RAID_CLEAN; } raidPtr->parity_rewrite_in_progress = 0; splx(s); - /* Anyone waiting for us to stop? If so, inform them... */ + /* Anyone waiting for us to stop ? If so, inform them... */ if (raidPtr->waitShutdown) { wakeup(&raidPtr->parity_rewrite_in_progress); } /* That's all... */ - kthread_exit(0); /* does not return */ + kthread_exit(0); /* Does not return. */ } void -rf_CopybackThread(raidPtr) - RF_Raid_t *raidPtr; +rf_CopybackThread(RF_Raid_t *raidPtr) { int s; @@ -2754,18 +2722,17 @@ rf_CopybackThread(raidPtr) splx(s); /* That's all... */ - kthread_exit(0); /* does not return */ + kthread_exit(0); /* Does not return. */ } void -rf_ReconstructInPlaceThread(req) - struct rf_recon_req *req; +rf_ReconstructInPlaceThread(struct rf_recon_req *req) { int retcode; int s; RF_Raid_t *raidPtr; - + s = splbio(); raidPtr = req->raidPtr; raidPtr->recon_in_progress = 1; @@ -2775,14 +2742,14 @@ rf_ReconstructInPlaceThread(req) splx(s); /* That's all... */ - kthread_exit(0); /* does not return */ + kthread_exit(0); /* Does not return. */ } RF_AutoConfig_t * -rf_find_raid_components() +rf_find_raid_components(void) { -#ifdef RAID_AUTOCONFIG +#ifdef RAID_AUTOCONFIG int major; struct vnode *vp; struct disklabel label; @@ -2793,32 +2760,34 @@ rf_find_raid_components() int good_one; RF_ComponentLabel_t *clabel; RF_AutoConfig_t *ac; -#endif +#endif /* RAID_AUTOCONFIG */ RF_AutoConfig_t *ac_list; - /* initialize the AutoConfig list */ + /* Initialize the AutoConfig list. */ ac_list = NULL; -#ifdef RAID_AUTOCONFIG - /* we begin by trolling through *all* the devices on the system */ +#ifdef RAID_AUTOCONFIG + /* We begin by trolling through *all* the devices on the system. */ - for (dv = alldevs.tqh_first; dv != NULL; - dv = dv->dv_list.tqe_next) { + for (dv = alldevs.tqh_first; dv != NULL; dv = dv->dv_list.tqe_next) { - /* we are only interested in disks... */ + /* We are only interested in disks... */ if (dv->dv_class != DV_DISK) continue; - /* we don't care about floppies... */ + /* We don't care about floppies... */ if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) { continue; } - - /* need to find the device_name_to_block_device_major stuff */ + + /* + * We need to find the device_name_to_block_device_major + * stuff. + */ major = findblkmajor(dv); - /* get a vnode for the raw partition of this disk */ + /* Get a vnode for the raw partition of this disk. */ dev = MAKEDISKDEV(major, dv->dv_unit, RAW_PART); if (bdevvp(dev, &vp)) @@ -2827,34 +2796,40 @@ rf_find_raid_components() error = VOP_OPEN(vp, FREAD, NOCRED, 0); if (error) { - /* "Who cares." Continue looking - for something that exists*/ + /* + * "Who cares." Continue looking + * for something that exists. + */ vput(vp); continue; } /* Ok, the disk exists. Go get the disklabel. */ - error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label, + error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label, FREAD, NOCRED, 0); if (error) { /* * XXX can't happen - open() would - * have errored out (or faked up one) + * have errored out (or faked up one). */ printf("can't get label for dev %s%c (%d)!?!?\n", - dv->dv_xname, 'a' + RAW_PART, error); + dv->dv_xname, 'a' + RAW_PART, error); } - /* don't need this any more. We'll allocate it again - a little later if we really do... */ + /* + * We don't need this any more. We'll allocate it again + * a little later if we really do... + */ VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); vrele(vp); for (i=0; i < label.d_npartitions; i++) { - /* We only support partitions marked as RAID. */ - /* Aside on sparc/sparc64 where FS_RAID doesn't */ - /* fit in the SUN disklabel and we need to look */ - /* into each and every partition !!! */ + /* + * We only support partitions marked as RAID. + * Except on sparc/sparc64 where FS_RAID doesn't + * fit in the SUN disklabel and we need to look + * into each and every partition !!! + */ #if !defined(__sparc__) && !defined(__sparc64__) && !defined(__sun3__) if (label.d_partitions[i].p_fstype != FS_RAID) continue; @@ -2877,37 +2852,38 @@ rf_find_raid_components() good_one = 0; - clabel = (RF_ComponentLabel_t *) - malloc(sizeof(RF_ComponentLabel_t), - M_RAIDFRAME, M_NOWAIT); + clabel = (RF_ComponentLabel_t *) + malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, + M_NOWAIT); if (clabel == NULL) { - /* XXX CLEANUP HERE */ + /* XXX CLEANUP HERE. */ printf("RAID auto config: out of memory!\n"); - return(NULL); /* XXX probably should panic? */ + return(NULL); /* XXX probably should panic ? */ } if (!raidread_component_label(dev, vp, clabel)) { - /* Got the label. Does it look reasonable? */ + /* Got the label. Does it look reasonable ? */ if (rf_reasonable_label(clabel) && - (clabel->partitionSize <= + (clabel->partitionSize <= label.d_partitions[i].p_size)) { #ifdef RAIDDEBUG - printf("Component on: %s%c: %d\n", - dv->dv_xname, 'a'+i, - label.d_partitions[i].p_size); + printf("Component on: %s%c: %d\n", + dv->dv_xname, 'a'+i, + label.d_partitions[i].p_size); rf_print_component_label(clabel); -#endif - /* if it's reasonable, add it, - else ignore it. */ +#endif /* RAIDDEBUG */ + /* + * If it's reasonable, add it, + * else ignore it. + */ ac = (RF_AutoConfig_t *) malloc(sizeof(RF_AutoConfig_t), - M_RAIDFRAME, - M_NOWAIT); + M_RAIDFRAME, M_NOWAIT); if (ac == NULL) { - /* XXX should panic?? */ + /* XXX should panic ??? */ return(NULL); } - + sprintf(ac->devname, "%s%c", dv->dv_xname, 'a'+i); ac->dev = dev; @@ -2916,75 +2892,68 @@ rf_find_raid_components() ac->next = ac_list; ac_list = ac; good_one = 1; - } + } } if (!good_one) { - /* cleanup */ + /* Cleanup. */ free(clabel, M_RAIDFRAME); VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); vrele(vp); } } } -#endif -return(ac_list); +#endif /* RAID_AUTOCONFIG */ + return(ac_list); } - -#ifdef RAID_AUTOCONFIG + +#ifdef RAID_AUTOCONFIG int -rf_reasonable_label(clabel) - RF_ComponentLabel_t *clabel; +rf_reasonable_label(RF_ComponentLabel_t *clabel) { - + if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || (clabel->version==RF_COMPONENT_LABEL_VERSION)) && ((clabel->clean == RF_RAID_CLEAN) || (clabel->clean == RF_RAID_DIRTY)) && - clabel->row >=0 && - clabel->column >= 0 && + clabel->row >=0 && + clabel->column >= 0 && clabel->num_rows > 0 && clabel->num_columns > 0 && - clabel->row < clabel->num_rows && + clabel->row < clabel->num_rows && clabel->column < clabel->num_columns && clabel->blockSize > 0 && clabel->numBlocks > 0) { - /* label looks reasonable enough... */ + /* Label looks reasonable enough... */ return(1); } return(0); } -#endif +#endif /* RAID_AUTOCONFIG */ void -rf_print_component_label(clabel) - RF_ComponentLabel_t *clabel; +rf_print_component_label(RF_ComponentLabel_t *clabel) { printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", - clabel->row, clabel->column, - clabel->num_rows, clabel->num_columns); + clabel->row, clabel->column, clabel->num_rows, clabel->num_columns); printf(" Version: %d Serial Number: %d Mod Counter: %d\n", - clabel->version, clabel->serial_number, - clabel->mod_counter); - printf(" Clean: %s Status: %d\n", - clabel->clean ? "Yes" : "No", clabel->status ); + clabel->version, clabel->serial_number, clabel->mod_counter); + printf(" Clean: %s Status: %d\n", clabel->clean ? "Yes" : "No", + clabel->status ); printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", - clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); + clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); printf(" RAID Level: %c blocksize: %d numBlocks: %d\n", - (char) clabel->parityConfig, clabel->blockSize, - clabel->numBlocks); + (char) clabel->parityConfig, clabel->blockSize, clabel->numBlocks); printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" ); - printf(" Contains root partition: %s\n", - clabel->root_partition ? "Yes" : "No" ); + printf(" Contains root partition: %s\n", clabel->root_partition ? + "Yes" : "No" ); printf(" Last configured as: raid%d\n", clabel->last_unit ); #if 0 printf(" Config order: %d\n", clabel->config_order); #endif - } RF_ConfigSet_t * -rf_create_auto_sets(ac_list) - RF_AutoConfig_t *ac_list; +rf_create_auto_sets(RF_AutoConfig_t *ac_list) { RF_AutoConfig_t *ac; RF_ConfigSet_t *config_sets; @@ -2994,33 +2963,37 @@ rf_create_auto_sets(ac_list) config_sets = NULL; - /* Go through the AutoConfig list, and figure out which components - belong to what sets. */ + /* + * Go through the AutoConfig list, and figure out which components + * belong to what sets. + */ ac = ac_list; while(ac!=NULL) { - /* we're going to putz with ac->next, so save it here - for use at the end of the loop */ + /* + * We're going to putz with ac->next, so save it here + * for use at the end of the loop. + */ ac_next = ac->next; if (config_sets == NULL) { - /* will need at least this one... */ + /* We will need at least this one... */ config_sets = (RF_ConfigSet_t *) - malloc(sizeof(RF_ConfigSet_t), - M_RAIDFRAME, M_NOWAIT); + malloc(sizeof(RF_ConfigSet_t), M_RAIDFRAME, + M_NOWAIT); if (config_sets == NULL) { panic("rf_create_auto_sets: No memory!"); } - /* this one is easy :) */ + /* This one is easy :) */ config_sets->ac = ac; config_sets->next = NULL; config_sets->rootable = 0; ac->next = NULL; } else { - /* which set does this component fit into? */ + /* Which set does this component fit into ? */ cset = config_sets; while(cset!=NULL) { if (rf_does_it_fit(cset, ac)) { - /* looks like it matches... */ + /* Looks like it matches... */ ac->next = cset->ac; cset->ac = ac; break; @@ -3028,10 +3001,10 @@ rf_create_auto_sets(ac_list) cset = cset->next; } if (cset==NULL) { - /* didn't find a match above... new set..*/ + /* Didn't find a match above... new set... */ cset = (RF_ConfigSet_t *) - malloc(sizeof(RF_ConfigSet_t), - M_RAIDFRAME, M_NOWAIT); + malloc(sizeof(RF_ConfigSet_t), + M_RAIDFRAME, M_NOWAIT); if (cset == NULL) { panic("rf_create_auto_sets: No memory!"); } @@ -3050,36 +3023,36 @@ rf_create_auto_sets(ac_list) } int -rf_does_it_fit(cset, ac) - RF_ConfigSet_t *cset; - RF_AutoConfig_t *ac; +rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac) { RF_ComponentLabel_t *clabel1, *clabel2; - /* If this one matches the *first* one in the set, that's good - enough, since the other members of the set would have been - through here too... */ - /* note that we are not checking partitionSize here.. - - Note that we are also not checking the mod_counters here. - If everything else matches execpt the mod_counter, that's - good enough for this test. We will deal with the mod_counters - a little later in the autoconfiguration process. - - (clabel1->mod_counter == clabel2->mod_counter) && - - The reason we don't check for this is that failed disks - will have lower modification counts. If those disks are - not added to the set they used to belong to, then they will - form their own set, which may result in 2 different sets, - for example, competing to be configured at raid0, and - perhaps competing to be the root filesystem set. If the - wrong ones get configured, or both attempt to become /, - weird behaviour and or serious lossage will occur. Thus we - need to bring them into the fold here, and kick them out at - a later point. - - */ + /* + * If this one matches the *first* one in the set, that's good + * enough, since the other members of the set would have been + * through here too... + */ + /* + * Note that we are not checking partitionSize here... + * + * Note that we are also not checking the mod_counters here. + * If everything else matches except the mod_counter, that's + * good enough for this test. We will deal with the mod_counters + * a little later in the autoconfiguration process. + * + * (clabel1->mod_counter == clabel2->mod_counter) && + * + * The reason we don't check for this is that failed disks + * will have lower modification counts. If those disks are + * not added to the set they used to belong to, then they will + * form their own set, which may result in 2 different sets, + * for example, competing to be configured at raid0, and + * perhaps competing to be the root filesystem set. If the + * wrong ones get configured, or both attempt to become /, + * weird behaviour and or serious lossage will occur. Thus we + * need to bring them into the fold here, and kick them out at + * a later point. + */ clabel1 = cset->ac->clabel; clabel2 = ac->clabel; @@ -3098,19 +3071,17 @@ rf_does_it_fit(cset, ac) (clabel1->root_partition == clabel2->root_partition) && (clabel1->last_unit == clabel2->last_unit) && (clabel1->config_order == clabel2->config_order)) { - /* if it get's here, it almost *has* to be a match */ + /* If it get's here, it almost *has* to be a match. */ } else { - /* it's not consistent with somebody in the set.. - punt */ + /* It's not consistent with somebody in the set... Punt. */ return(0); } - /* all was fine.. it must fit... */ + /* All was fine.. It must fit... */ return(1); } int -rf_have_enough_components(cset) - RF_ConfigSet_t *cset; +rf_have_enough_components(RF_ConfigSet_t *cset) { RF_AutoConfig_t *ac; RF_AutoConfig_t *auto_config; @@ -3123,16 +3094,18 @@ rf_have_enough_components(cset) int mod_counter_found; int even_pair_failed; char parity_type; - - /* check to see that we have enough 'live' components - of this set. If so, we can configure it if necessary */ + + /* + * Check to see that we have enough 'live' components + * of this set. If so, we can configure it if necessary. + */ num_rows = cset->ac->clabel->num_rows; num_cols = cset->ac->clabel->num_columns; parity_type = cset->ac->clabel->parityConfig; - /* XXX Check for duplicate components!?!?!? */ + /* XXX Check for duplicate components !?!?!? */ /* Determine what the mod_counter is supposed to be for this set. */ @@ -3160,44 +3133,50 @@ rf_have_enough_components(cset) ac = auto_config; while(ac!=NULL) { if ((ac->clabel->row == r) && - (ac->clabel->column == c) && + (ac->clabel->column == c) && (ac->clabel->mod_counter == mod_counter)) { - /* it's this one... */ + /* It's this one... */ #ifdef RAIDDEBUG printf("Found: %s at %d,%d\n", - ac->devname,r,c); -#endif + ac->devname,r,c); +#endif /* RAIDDEBUG */ break; } ac=ac->next; } if (ac==NULL) { /* Didn't find one here! */ - /* special case for RAID 1, especially - where there are more than 2 - components (where RAIDframe treats - things a little differently :( ) */ + /* + * Special case for RAID 1, especially + * where there are more than 2 + * components (where RAIDframe treats + * things a little differently :( ) + */ if (parity_type == '1') { - if (c%2 == 0) { /* even component */ + if (c%2 == 0) { /* Even component. */ even_pair_failed = 1; - } else { /* odd component. If - we're failed, and - so is the even - component, it's - "Good Night, Charlie" */ + } else { /* + * Odd component. + * If we're failed, + * and so is the even + * component, it's + * "Good Night, Charlie" + */ if (even_pair_failed == 1) { return(0); } } } else { - /* normal accounting */ + /* Normal accounting. */ num_missing++; } } if ((parity_type == '1') && (c%2 == 1)) { - /* Just did an even component, and we didn't - bail.. reset the even_pair_failed flag, - and go on to the next component.... */ + /* + * Just did an even component, and we didn't + * bail... Reset the even_pair_failed flag, + * and go on to the next component... + */ even_pair_failed = 0; } } @@ -3208,30 +3187,30 @@ rf_have_enough_components(cset) if (((clabel->parityConfig == '0') && (num_missing > 0)) || ((clabel->parityConfig == '4') && (num_missing > 1)) || ((clabel->parityConfig == '5') && (num_missing > 1))) { - /* XXX this needs to be made *much* more general */ - /* Too many failures */ + /* XXX This needs to be made *much* more general. */ + /* Too many failures. */ return(0); } - /* otherwise, all is well, and we've got enough to take a kick - at autoconfiguring this set */ + /* + * Otherwise, all is well, and we've got enough to take a kick + * at autoconfiguring this set. + */ return(1); } void -rf_create_configuration(ac,config,raidPtr) - RF_AutoConfig_t *ac; - RF_Config_t *config; - RF_Raid_t *raidPtr; +rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config, + RF_Raid_t *raidPtr) { RF_ComponentLabel_t *clabel; int i; clabel = ac->clabel; - /* 1. Fill in the common stuff */ + /* 1. Fill in the common stuff. */ config->numRow = clabel->num_rows; config->numCol = clabel->num_columns; - config->numSpare = 0; /* XXX should this be set here? */ + config->numSpare = 0; /* XXX Should this be set here ? */ config->sectPerSU = clabel->sectPerSU; config->SUsPerPU = clabel->SUsPerPU; config->SUsPerRU = clabel->SUsPerRU; @@ -3239,13 +3218,15 @@ rf_create_configuration(ac,config,raidPtr) /* XXX... */ strcpy(config->diskQueueType,"fifo"); config->maxOutstandingDiskReqs = clabel->maxOutstanding; - config->layoutSpecificSize = 0; /* XXX ?? */ + config->layoutSpecificSize = 0; /* XXX ?? */ while(ac!=NULL) { - /* row/col values will be in range due to the checks - in reasonable_label() */ + /* + * row/col values will be in range due to the checks + * in reasonable_label(). + */ strcpy(config->devnames[ac->clabel->row][ac->clabel->column], - ac->devname); + ac->devname); ac = ac->next; } @@ -3253,106 +3234,116 @@ rf_create_configuration(ac,config,raidPtr) config->debugVars[i][0] = NULL; } -#ifdef RAID_DEBUG_ALL -#ifdef RF_DBG_OPTION -#undef RF_DBG_OPTION -#endif +#ifdef RAID_DEBUG_ALL + +#ifdef RF_DBG_OPTION +#undef RF_DBG_OPTION +#endif /* RF_DBG_OPTION */ -#ifdef __STDC__ -#define RF_DBG_OPTION(_option_,_val_) \ - snprintf(&(config->debugVars[i++][0]), 50, \ - "%s %ld", #_option_, _val_); -#else /* __STDC__ */ -#define RF_DBG_OPTION(_option_,_val_) \ - snprintf(&(config->debugVars[i++][0]), 50, \ - "%s %ld", "/**/_option_/**/", _val_); -#endif /* __STDC__ */ +#ifdef __STDC__ +#define RF_DBG_OPTION(_option_,_val_) do { \ + snprintf(&(config->debugVars[i++][0]), 50, "%s %ld", \ + #_option_, _val_); \ +} while (0) +#else /* __STDC__ */ +#define RF_DBG_OPTION(_option_,_val_) do { \ + snprintf(&(config->debugVars[i++][0]), 50, "%s %ld", \ + "/**/_option_/**/", _val_); \ +} while (0) +#endif /* __STDC__ */ i = 0; -/* RF_DBG_OPTION(accessDebug, 0) */ -/* RF_DBG_OPTION(accessTraceBufSize, 0) */ - RF_DBG_OPTION(cscanDebug, 1) /* debug CSCAN sorting */ - RF_DBG_OPTION(dagDebug, 1) -/* RF_DBG_OPTION(debugPrintUseBuffer, 0) */ - RF_DBG_OPTION(degDagDebug, 1) - RF_DBG_OPTION(disableAsyncAccs, 1) - RF_DBG_OPTION(diskDebug, 1) - RF_DBG_OPTION(enableAtomicRMW, 0) /* this debug var enables - * locking of the disk arm - * during small-write - * operations. Setting this - * variable to anything other - * than 0 will result in - * deadlock. (wvcii) */ - RF_DBG_OPTION(engineDebug, 1) - RF_DBG_OPTION(fifoDebug, 1) /* debug fifo queueing */ -/* RF_DBG_OPTION(floatingRbufDebug, 1) */ -/* RF_DBG_OPTION(forceHeadSepLimit, -1) */ -/* RF_DBG_OPTION(forceNumFloatingReconBufs, -1) */ /* wire down number of - * extra recon buffers - * to use */ -/* RF_DBG_OPTION(keepAccTotals, 1) */ /* turn on keep_acc_totals */ - RF_DBG_OPTION(lockTableSize, RF_DEFAULT_LOCK_TABLE_SIZE) - RF_DBG_OPTION(mapDebug, 1) - RF_DBG_OPTION(maxNumTraces, -1) - -/* RF_DBG_OPTION(memChunkDebug, 1) */ -/* RF_DBG_OPTION(memDebug, 1) */ -/* RF_DBG_OPTION(memDebugAddress, 1) */ -/* RF_DBG_OPTION(numBufsToAccumulate, 1) */ /* number of buffers to - * accumulate before - * doing XOR */ - RF_DBG_OPTION(prReconSched, 0) - RF_DBG_OPTION(printDAGsDebug, 1) - RF_DBG_OPTION(printStatesDebug, 1) - RF_DBG_OPTION(protectedSectors, 64L) /* # of sectors at start - * of disk to exclude - * from RAID address - * space */ - RF_DBG_OPTION(pssDebug, 1) - RF_DBG_OPTION(queueDebug, 1) - RF_DBG_OPTION(quiesceDebug, 1) - RF_DBG_OPTION(raidSectorOffset, 0) /* added to all incoming sectors - * to debug alignment problems */ - RF_DBG_OPTION(reconDebug, 1) - RF_DBG_OPTION(reconbufferDebug, 1) - RF_DBG_OPTION(scanDebug, 1) /* debug SCAN sorting */ - RF_DBG_OPTION(showXorCallCounts, 0) /* show n-way Xor call counts */ - RF_DBG_OPTION(shutdownDebug, 1) /* show shutdown calls */ - RF_DBG_OPTION(sizePercentage, 100) - RF_DBG_OPTION(sstfDebug, 1) /* turn on debugging info for sstf - * queueing */ - RF_DBG_OPTION(stripeLockDebug, 1) - RF_DBG_OPTION(suppressLocksAndLargeWrites, 0) - RF_DBG_OPTION(suppressTraceDelays, 0) - RF_DBG_OPTION(useMemChunks, 1) - RF_DBG_OPTION(validateDAGDebug, 1) - RF_DBG_OPTION(validateVisitedDebug, 1) /* XXX turn to zero by - * default? */ - RF_DBG_OPTION(verifyParityDebug, 1) - RF_DBG_OPTION(debugKernelAccess, 1) /* DoAccessKernel debugging */ - -#if 0 /* RF_INCLUDE_PARITYLOGGING > 0 */ - RF_DBG_OPTION(forceParityLogReint, 0) - RF_DBG_OPTION(numParityRegions, 0) /* number of regions in the - * array */ - RF_DBG_OPTION(numReintegrationThreads, 1) - RF_DBG_OPTION(parityLogDebug, 1) /* if nonzero, enables debugging - * of parity logging */ - RF_DBG_OPTION(totalInCoreLogCapacity, 1024 * 1024) /* target bytes - * available for - * in-core - * logs */ -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ - -#endif /* RAID_DEBUG_ALL */ +/* RF_DBG_OPTION(accessDebug, 0); */ +/* RF_DBG_OPTION(accessTraceBufSize, 0); */ + RF_DBG_OPTION(cscanDebug, 1); /* Debug CSCAN sorting. */ + RF_DBG_OPTION(dagDebug, 1); +/* RF_DBG_OPTION(debugPrintUseBuffer, 0); */ + RF_DBG_OPTION(degDagDebug, 1); + RF_DBG_OPTION(disableAsyncAccs, 1); + RF_DBG_OPTION(diskDebug, 1); + RF_DBG_OPTION(enableAtomicRMW, 0); + /* + * This debug variable enables locking of the + * disk arm during small-write operations. + * Setting this variable to anything other than + * 0 will result in deadlock. (wvcii) + */ + RF_DBG_OPTION(engineDebug, 1); + RF_DBG_OPTION(fifoDebug, 1); /* Debug fifo queueing. */ +/* RF_DBG_OPTION(floatingRbufDebug, 1); */ +/* RF_DBG_OPTION(forceHeadSepLimit, -1); */ +/* RF_DBG_OPTION(forceNumFloatingReconBufs, -1); */ + /* + * Wire down the number of extra recon buffers + * to use. + */ +/* RF_DBG_OPTION(keepAccTotals, 1); */ + /* Turn on keep_acc_totals. */ + RF_DBG_OPTION(lockTableSize, RF_DEFAULT_LOCK_TABLE_SIZE); + RF_DBG_OPTION(mapDebug, 1); + RF_DBG_OPTION(maxNumTraces, -1); + +/* RF_DBG_OPTION(memChunkDebug, 1); */ +/* RF_DBG_OPTION(memDebug, 1); */ +/* RF_DBG_OPTION(memDebugAddress, 1); */ +/* RF_DBG_OPTION(numBufsToAccumulate, 1); */ + /* + * Number of buffers to accumulate before + * doing XOR. + */ + RF_DBG_OPTION(prReconSched, 0); + RF_DBG_OPTION(printDAGsDebug, 1); + RF_DBG_OPTION(printStatesDebug, 1); + RF_DBG_OPTION(protectedSectors, 64L); + /* + * Number of sectors at start of disk to exclude + * from RAID address space. + */ + RF_DBG_OPTION(pssDebug, 1); + RF_DBG_OPTION(queueDebug, 1); + RF_DBG_OPTION(quiesceDebug, 1); + RF_DBG_OPTION(raidSectorOffset, 0); + /* + * Value added to all incoming sectors to debug + * alignment problems. + */ + RF_DBG_OPTION(reconDebug, 1); + RF_DBG_OPTION(reconbufferDebug, 1); + RF_DBG_OPTION(scanDebug, 1); /* Debug SCAN sorting. */ + RF_DBG_OPTION(showXorCallCounts, 0); + /* Show n-way Xor call counts. */ + RF_DBG_OPTION(shutdownDebug, 1); /* Show shutdown calls. */ + RF_DBG_OPTION(sizePercentage, 100); + RF_DBG_OPTION(sstfDebug, 1); + /* Turn on debugging info for sstf queueing. */ + RF_DBG_OPTION(stripeLockDebug, 1); + RF_DBG_OPTION(suppressLocksAndLargeWrites, 0); + RF_DBG_OPTION(suppressTraceDelays, 0); + RF_DBG_OPTION(useMemChunks, 1); + RF_DBG_OPTION(validateDAGDebug, 1); + RF_DBG_OPTION(validateVisitedDebug, 1); + /* XXX turn to zero by default ? */ + RF_DBG_OPTION(verifyParityDebug, 1); + RF_DBG_OPTION(debugKernelAccess, 1); + /* DoAccessKernel debugging. */ + +#if RF_INCLUDE_PARITYLOGGING > 0 + RF_DBG_OPTION(forceParityLogReint, 0); + RF_DBG_OPTION(numParityRegions, 0); + /* Number of regions in the array. */ + RF_DBG_OPTION(numReintegrationThreads, 1); + RF_DBG_OPTION(parityLogDebug, 1); + /* If nonzero, enables debugging of parity logging. */ + RF_DBG_OPTION(totalInCoreLogCapacity, 1024 * 1024); + /* Target bytes available for in-core logs. */ +#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ + +#endif /* RAID_DEBUG_ALL */ } int -rf_set_autoconfig(raidPtr, new_value) - RF_Raid_t *raidPtr; - int new_value; +rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value) { RF_ComponentLabel_t clabel; struct vnode *vp; @@ -3362,7 +3353,7 @@ rf_set_autoconfig(raidPtr, new_value) raidPtr->autoconfigure = new_value; for(row=0; row<raidPtr->numRow; row++) { for(column=0; column<raidPtr->numCol; column++) { - if (raidPtr->Disks[row][column].status == + if (raidPtr->Disks[row][column].status == rf_ds_optimal) { dev = raidPtr->Disks[row][column].dev; vp = raidPtr->raid_cinfo[row][column].ci_vp; @@ -3376,9 +3367,7 @@ rf_set_autoconfig(raidPtr, new_value) } int -rf_set_rootpartition(raidPtr, new_value) - RF_Raid_t *raidPtr; - int new_value; +rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value) { RF_ComponentLabel_t clabel; struct vnode *vp; @@ -3388,7 +3377,7 @@ rf_set_rootpartition(raidPtr, new_value) raidPtr->root_partition = new_value; for(row=0; row<raidPtr->numRow; row++) { for(column=0; column<raidPtr->numCol; column++) { - if (raidPtr->Disks[row][column].status == + if (raidPtr->Disks[row][column].status == rf_ds_optimal) { dev = raidPtr->Disks[row][column].dev; vp = raidPtr->raid_cinfo[row][column].ci_vp; @@ -3402,14 +3391,13 @@ rf_set_rootpartition(raidPtr, new_value) } void -rf_release_all_vps(cset) - RF_ConfigSet_t *cset; +rf_release_all_vps(RF_ConfigSet_t *cset) { RF_AutoConfig_t *ac; - + ac = cset->ac; while(ac!=NULL) { - /* Close the vp, and give it back */ + /* Close the vp, and give it back. */ if (ac->vp) { VOP_CLOSE(ac->vp, FREAD, NOCRED, 0); vrele(ac->vp); @@ -3421,41 +3409,38 @@ rf_release_all_vps(cset) void -rf_cleanup_config_set(cset) - RF_ConfigSet_t *cset; +rf_cleanup_config_set(RF_ConfigSet_t *cset) { RF_AutoConfig_t *ac; RF_AutoConfig_t *next_ac; - + ac = cset->ac; while(ac!=NULL) { next_ac = ac->next; - /* nuke the label */ + /* Nuke the label. */ free(ac->clabel, M_RAIDFRAME); - /* cleanup the config structure */ + /* Cleanup the config structure. */ free(ac, M_RAIDFRAME); - /* "next.." */ + /* "next..." */ ac = next_ac; } - /* and, finally, nuke the config set */ + /* And, finally, nuke the config set. */ free(cset, M_RAIDFRAME); } void -raid_init_component_label(raidPtr, clabel) - RF_Raid_t *raidPtr; - RF_ComponentLabel_t *clabel; +raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) { - /* current version number */ - clabel->version = RF_COMPONENT_LABEL_VERSION; + /* Current version number. */ + clabel->version = RF_COMPONENT_LABEL_VERSION; clabel->serial_number = raidPtr->serial_number; clabel->mod_counter = raidPtr->mod_counter; clabel->num_rows = raidPtr->numRow; clabel->num_columns = raidPtr->numCol; - clabel->clean = RF_RAID_DIRTY; /* not clean */ - clabel->status = rf_ds_optimal; /* "It's good!" */ - + clabel->clean = RF_RAID_DIRTY; /* Not clean. */ + clabel->status = rf_ds_optimal; /* "It's good !" */ + clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; @@ -3463,7 +3448,7 @@ raid_init_component_label(raidPtr, clabel) clabel->blockSize = raidPtr->bytesPerSector; clabel->numBlocks = raidPtr->sectorsPerDisk; - /* XXX not portable */ + /* XXX Not portable. */ clabel->parityConfig = raidPtr->Layout.map->parityConfig; clabel->maxOutstanding = raidPtr->maxOutstanding; clabel->autoconfigure = raidPtr->autoconfigure; @@ -3473,9 +3458,7 @@ raid_init_component_label(raidPtr, clabel) } int -rf_auto_config_set(cset,unit) - RF_ConfigSet_t *cset; - int *unit; +rf_auto_config_set(RF_ConfigSet_t *cset, int *unit) { RF_Raid_t *raidPtr; RF_Config_t *config; @@ -3487,78 +3470,79 @@ rf_auto_config_set(cset,unit) retcode = 0; *unit = -1; - /* 1. Create a config structure */ + /* 1. Create a config structure. */ - config = (RF_Config_t *)malloc(sizeof(RF_Config_t), - M_RAIDFRAME, - M_NOWAIT); + config = (RF_Config_t *)malloc(sizeof(RF_Config_t), M_RAIDFRAME, + M_NOWAIT); if (config==NULL) { printf("Out of mem!?!?\n"); - /* XXX do something more intelligent here. */ + /* XXX Do something more intelligent here. */ return(1); } memset(config, 0, sizeof(RF_Config_t)); - /* XXX raidID needs to be set correctly.. */ + /* XXX raidID needs to be set correctly... */ - /* - 2. Figure out what RAID ID this one is supposed to live at - See if we can get the same RAID dev that it was configured - on last time.. - */ + /* + * 2. Figure out what RAID ID this one is supposed to live at. + * See if we can get the same RAID dev that it was configured + * on last time... + */ raidID = cset->ac->clabel->last_unit; if ((raidID < 0) || (raidID >= numraid)) { - /* let's not wander off into lala land. */ + /* Let's not wander off into lala land. */ raidID = numraid - 1; } if (raidPtrs[raidID]->valid != 0) { - /* - Nope... Go looking for an alternative... - Start high so we don't immediately use raid0 if that's - not taken. - */ + /* + * Nope... Go looking for an alternative... + * Start high so we don't immediately use raid0 if that's + * not taken. + */ for(raidID = numraid - 1; raidID >= 0; raidID--) { if (raidPtrs[raidID]->valid == 0) { - /* can use this one! */ + /* We can use this one ! */ break; } } } if (raidID < 0) { - /* punt... */ + /* Punt... */ printf("Unable to auto configure this set!\n"); printf("(Out of RAID devs!)\n"); return(1); } raidPtr = raidPtrs[raidID]; - /* XXX all this stuff should be done SOMEWHERE ELSE! */ + /* XXX All this stuff should be done SOMEWHERE ELSE ! */ raidPtr->raidid = raidID; raidPtr->openings = RAIDOUTSTANDING; - /* 3. Build the configuration structure */ + /* 3. Build the configuration structure. */ rf_create_configuration(cset->ac, config, raidPtr); - /* 4. Do the configuration */ + /* 4. Do the configuration. */ retcode = rf_Configure(raidPtr, config, cset->ac); - + if (retcode == 0) { raidinit(raidPtrs[raidID]); rf_markalldirty(raidPtrs[raidID]); - raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */ + raidPtrs[raidID]->autoconfigure = 1; /* XXX Do this here ? */ if (cset->ac->clabel->root_partition==1) { - /* everything configured just fine. Make a note - that this set is eligible to be root. */ + /* + * Everything configured just fine. Make a note + * that this set is eligible to be root. + */ cset->rootable = 1; - /* XXX do this here? */ - raidPtrs[raidID]->root_partition = 1; + /* XXX Do this here ? */ + raidPtrs[raidID]->root_partition = 1; } } @@ -3569,20 +3553,19 @@ rf_auto_config_set(cset,unit) (1 << raidPtrs[raidID]->logBytesPerSector) / 1024), raidPtrs[raidID]->root_partition ? " as root" : ""); - /* 5. Cleanup */ + /* 5. Cleanup. */ free(config, M_RAIDFRAME); - + *unit = raidID; return(retcode); } void -rf_disk_unbusy(desc) - RF_RaidAccessDesc_t *desc; +rf_disk_unbusy(RF_RaidAccessDesc_t *desc) { struct buf *bp; bp = (struct buf *)desc->bp; - disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev, + disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev, (bp->b_bcount - bp->b_resid)); } diff --git a/sys/dev/raidframe/rf_options.c b/sys/dev/raidframe/rf_options.c index dd7a9957c8a..def8a552730 100644 --- a/sys/dev/raidframe/rf_options.c +++ b/sys/dev/raidframe/rf_options.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_options.c,v 1.2 1999/02/16 00:03:02 niklas Exp $ */ +/* $OpenBSD: rf_options.c,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_options.c,v 1.3 1999/02/05 00:06:13 oster Exp $ */ + /* * rf_options.c */ @@ -37,40 +38,46 @@ #include "rf_general.h" #include "rf_options.h" -#ifdef RF_DBG_OPTION -#undef RF_DBG_OPTION -#endif /* RF_DBG_OPTION */ +#ifdef RF_DBG_OPTION +#undef RF_DBG_OPTION +#endif /* RF_DBG_OPTION */ -#ifdef __STDC__ -#define RF_DBG_OPTION(_option_,_defval_) long rf_##_option_ = _defval_; -#else /* __STDC__ */ -#define RF_DBG_OPTION(_option_,_defval_) long rf_/**/_option_ = _defval_; -#endif /* __STDC__ */ +#ifdef __STDC__ +#define RF_DBG_OPTION(_option_,_defval_) \ + long rf_##_option_ = _defval_; +#else /* __STDC__ */ +#define RF_DBG_OPTION(_option_,_defval_) \ + long rf_/**/_option_ = _defval_; +#endif /* __STDC__ */ #include "rf_optnames.h" -#undef RF_DBG_OPTION +#undef RF_DBG_OPTION -#ifdef __STDC__ -#define RF_DBG_OPTION(_option_,_defval_) { RF_STRING(_option_), &rf_##_option_ }, -#else /* __STDC__ */ -#define RF_DBG_OPTION(_option_,_defval_) { RF_STRING(_option_), &rf_/**/_option_ }, -#endif /* __STDC__ */ +#ifdef __STDC__ +#define RF_DBG_OPTION(_option_,_defval_) \ + {RF_STRING(_option_), &rf_##_option_}, +#else /* __STDC__ */ +#define RF_DBG_OPTION(_option_,_defval_) \ + {RF_STRING(_option_), &rf_/**/_option_}, +#endif /* __STDC__ */ RF_DebugName_t rf_debugNames[] = { #include "rf_optnames.h" {NULL, NULL} }; -#undef RF_DBG_OPTION +#undef RF_DBG_OPTION -#ifdef __STDC__ -#define RF_DBG_OPTION(_option_,_defval_) rf_##_option_ = _defval_ ; -#else /* __STDC__ */ -#define RF_DBG_OPTION(_option_,_defval_) rf_/**/_option_ = _defval_ ; -#endif /* __STDC__ */ +#ifdef __STDC__ +#define RF_DBG_OPTION(_option_,_defval_) \ + rf_##_option_ = _defval_; +#else /* __STDC__ */ +#define RF_DBG_OPTION(_option_,_defval_) \ + rf_/**/_option_ = _defval_; +#endif /* __STDC__ */ -void -rf_ResetDebugOptions() +void +rf_ResetDebugOptions(void) { #include "rf_optnames.h" } diff --git a/sys/dev/raidframe/rf_options.h b/sys/dev/raidframe/rf_options.h index fa603b61419..e89a9734d89 100644 --- a/sys/dev/raidframe/rf_options.h +++ b/sys/dev/raidframe/rf_options.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_options.h,v 1.2 1999/02/16 00:03:02 niklas Exp $ */ +/* $OpenBSD: rf_options.h,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_options.h,v 1.3 1999/02/05 00:06:13 oster Exp $ */ + /* * rf_options.h */ @@ -30,29 +31,31 @@ * rights to redistribute these changes. */ -#ifndef _RF__RF_OPTIONS_H_ -#define _RF__RF_OPTIONS_H_ +#ifndef _RF__RF_OPTIONS_H_ +#define _RF__RF_OPTIONS_H_ -#define RF_DEFAULT_LOCK_TABLE_SIZE 256 +#define RF_DEFAULT_LOCK_TABLE_SIZE 256 typedef struct RF_DebugNames_s { - char *name; - long *ptr; -} RF_DebugName_t; + char *name; + long *ptr; +} RF_DebugName_t; extern RF_DebugName_t rf_debugNames[]; -#ifdef RF_DBG_OPTION -#undef RF_DBG_OPTION -#endif /* RF_DBG_OPTION */ - -#ifdef __STDC__ -#define RF_DBG_OPTION(_option_,_defval_) extern long rf_##_option_; -#else /* __STDC__ */ -#define RF_DBG_OPTION(_option_,_defval_) extern long rf_/**/_option_; -#endif /* __STDC__ */ +#ifdef RF_DBG_OPTION +#undef RF_DBG_OPTION +#endif /* RF_DBG_OPTION */ + +#ifdef __STDC__ +#define RF_DBG_OPTION(_option_,_defval_) \ + extern long rf_##_option_; +#else /* __STDC__ */ +#define RF_DBG_OPTION(_option_,_defval_) \ + extern long rf_/**/_option_; +#endif /* __STDC__ */ #include "rf_optnames.h" -void rf_ResetDebugOptions(void); +void rf_ResetDebugOptions(void); -#endif /* !_RF__RF_OPTIONS_H_ */ +#endif /* !_RF__RF_OPTIONS_H_ */ diff --git a/sys/dev/raidframe/rf_optnames.h b/sys/dev/raidframe/rf_optnames.h index 3575f1cd792..b4f837027bb 100644 --- a/sys/dev/raidframe/rf_optnames.h +++ b/sys/dev/raidframe/rf_optnames.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_optnames.h,v 1.4 2000/01/07 14:50:22 peter Exp $ */ +/* $OpenBSD: rf_optnames.h,v 1.5 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_optnames.h,v 1.6 1999/12/07 02:54:08 oster Exp $ */ + /* * rf_optnames.h */ @@ -36,25 +37,28 @@ RF_DBG_OPTION(accessDebug, 0) RF_DBG_OPTION(accessTraceBufSize, 0) -RF_DBG_OPTION(cscanDebug, 0) /* debug CSCAN sorting */ +RF_DBG_OPTION(cscanDebug, 0) /* Debug CSCAN sorting. */ RF_DBG_OPTION(dagDebug, 0) RF_DBG_OPTION(debugPrintUseBuffer, 0) RF_DBG_OPTION(degDagDebug, 0) RF_DBG_OPTION(disableAsyncAccs, 0) RF_DBG_OPTION(diskDebug, 0) -RF_DBG_OPTION(enableAtomicRMW, 0) /* this debug var enables locking of +RF_DBG_OPTION(enableAtomicRMW, 0) /* + * This debug var enables locking of * the disk arm during small-write - * operations. Setting this variable + * operations. Setting this variable * to anything other than 0 will - * result in deadlock. (wvcii) */ + * result in deadlock. (wvcii) + */ RF_DBG_OPTION(engineDebug, 0) -RF_DBG_OPTION(fifoDebug, 0) /* debug fifo queueing */ +RF_DBG_OPTION(fifoDebug, 0) /* Debug fifo queueing. */ RF_DBG_OPTION(floatingRbufDebug, 0) RF_DBG_OPTION(forceHeadSepLimit, -1) -RF_DBG_OPTION(forceNumFloatingReconBufs, -1) /* wire down number of - * extra recon buffers - * to use */ -RF_DBG_OPTION(keepAccTotals, 0) /* turn on keep_acc_totals */ +RF_DBG_OPTION(forceNumFloatingReconBufs, -1) /* + * Wire down number of extra + * recon buffers to use. + */ +RF_DBG_OPTION(keepAccTotals, 0) /* Turn on keep_acc_totals. */ RF_DBG_OPTION(lockTableSize, RF_DEFAULT_LOCK_TABLE_SIZE) RF_DBG_OPTION(mapDebug, 0) RF_DBG_OPTION(maxNumTraces, -1) @@ -62,44 +66,55 @@ RF_DBG_OPTION(maxNumTraces, -1) RF_DBG_OPTION(memChunkDebug, 0) RF_DBG_OPTION(memDebug, 0) RF_DBG_OPTION(memDebugAddress, 0) -RF_DBG_OPTION(numBufsToAccumulate, 1) /* number of buffers to - * accumulate before doing XOR */ +RF_DBG_OPTION(numBufsToAccumulate, 1) /* + * Number of buffers to accumulate + * before doing XOR. + */ RF_DBG_OPTION(prReconSched, 0) RF_DBG_OPTION(printDAGsDebug, 0) RF_DBG_OPTION(printStatesDebug, 0) -RF_DBG_OPTION(protectedSectors, 64L) /* # of sectors at start of - * disk to exclude from RAID - * address space */ +RF_DBG_OPTION(protectedSectors, 64L) /* + * # of sectors at start of disk to + * exclude from RAID address space. + */ RF_DBG_OPTION(pssDebug, 0) RF_DBG_OPTION(queueDebug, 0) RF_DBG_OPTION(quiesceDebug, 0) -RF_DBG_OPTION(raidSectorOffset, 0) /* added to all incoming sectors to - * debug alignment problems */ +RF_DBG_OPTION(raidSectorOffset, 0) /* + * Added to all incoming sectors to + * debug alignment problems. + */ RF_DBG_OPTION(reconDebug, 0) RF_DBG_OPTION(reconbufferDebug, 0) -RF_DBG_OPTION(scanDebug, 0) /* debug SCAN sorting */ -RF_DBG_OPTION(showXorCallCounts, 0) /* show n-way Xor call counts */ -RF_DBG_OPTION(shutdownDebug, 0) /* show shutdown calls */ +RF_DBG_OPTION(scanDebug, 0) /* Debug SCAN sorting. */ +RF_DBG_OPTION(showXorCallCounts, 0) /* Show n-way Xor call counts. */ +RF_DBG_OPTION(shutdownDebug, 0) /* Show shutdown calls. */ RF_DBG_OPTION(sizePercentage, 100) -RF_DBG_OPTION(sstfDebug, 0) /* turn on debugging info for sstf queueing */ +RF_DBG_OPTION(sstfDebug, 0) /* + * Turn on debugging info for sstf + * queueing. + */ RF_DBG_OPTION(stripeLockDebug, 0) RF_DBG_OPTION(suppressLocksAndLargeWrites, 0) RF_DBG_OPTION(suppressTraceDelays, 0) RF_DBG_OPTION(useMemChunks, 1) RF_DBG_OPTION(validateDAGDebug, 0) -RF_DBG_OPTION(validateVisitedDebug, 1) /* XXX turn to zero by - * default? */ +RF_DBG_OPTION(validateVisitedDebug, 1) /* XXX Turn to zero by default ? */ RF_DBG_OPTION(verifyParityDebug, 0) -RF_DBG_OPTION(debugKernelAccess, 0) /* DoAccessKernel debugging */ +RF_DBG_OPTION(debugKernelAccess, 0) /* DoAccessKernel debugging. */ -#if RF_INCLUDE_PARITYLOGGING > 0 +#if RF_INCLUDE_PARITYLOGGING > 0 RF_DBG_OPTION(forceParityLogReint, 0) -RF_DBG_OPTION(numParityRegions, 0) /* number of regions in the array */ +RF_DBG_OPTION(numParityRegions, 0) /* Number of regions in the array. */ RF_DBG_OPTION(numReintegrationThreads, 1) -RF_DBG_OPTION(parityLogDebug, 0) /* if nonzero, enables debugging of - * parity logging */ -RF_DBG_OPTION(totalInCoreLogCapacity, 1024 * 1024) /* target bytes +RF_DBG_OPTION(parityLogDebug, 0) /* + * If nonzero, enables debugging of + * parity logging. + */ +RF_DBG_OPTION(totalInCoreLogCapacity, 1024 * 1024) /* + * Target bytes * available for in-core - * logs */ -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ + * logs. + */ +#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ diff --git a/sys/dev/raidframe/rf_paritylog.c b/sys/dev/raidframe/rf_paritylog.c index 657bb6db9fa..aa318e49cb3 100644 --- a/sys/dev/raidframe/rf_paritylog.c +++ b/sys/dev/raidframe/rf_paritylog.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_paritylog.c,v 1.4 2000/01/11 18:02:22 peter Exp $ */ +/* $OpenBSD: rf_paritylog.c,v 1.5 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_paritylog.c,v 1.5 2000/01/07 03:41:01 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,16 +28,16 @@ * rights to redistribute these changes. */ -/* Code for manipulating in-core parity logs - * +/* + * Code for manipulating in-core parity logs. */ #include "rf_archs.h" -#if RF_INCLUDE_PARITYLOGGING > 0 +#if RF_INCLUDE_PARITYLOGGING > 0 /* - * Append-only log for recording parity "update" and "overwrite" records + * Append-only log for recording parity "update" and "overwrite" records. */ #include "rf_types.h" @@ -55,28 +56,49 @@ #include "rf_paritylogging.h" #include "rf_paritylogDiskMgr.h" -static RF_CommonLogData_t * -AllocParityLogCommonData(RF_Raid_t * raidPtr) +RF_CommonLogData_t *rf_AllocParityLogCommonData(RF_Raid_t *); +void rf_FreeParityLogCommonData(RF_CommonLogData_t *); +RF_ParityLogData_t *rf_AllocParityLogData(RF_Raid_t *); +void rf_FreeParityLogData(RF_ParityLogData_t *); +void rf_EnqueueParityLogData(RF_ParityLogData_t *, RF_ParityLogData_t **, + RF_ParityLogData_t **); +RF_ParityLogData_t *rf_DequeueParityLogData(RF_Raid_t *, RF_ParityLogData_t **, + RF_ParityLogData_t **, int); +void rf_RequeueParityLogData(RF_ParityLogData_t *, RF_ParityLogData_t **, + RF_ParityLogData_t **); +RF_ParityLogData_t *rf_DequeueMatchingLogData(RF_Raid_t *, + RF_ParityLogData_t **, RF_ParityLogData_t **); +RF_ParityLog_t *rf_AcquireParityLog(RF_ParityLogData_t *, int); +void rf_ReintLog(RF_Raid_t *, int, RF_ParityLog_t *); +void rf_FlushLog(RF_Raid_t *, RF_ParityLog_t *); +int rf_DumpParityLogToDisk(int, RF_ParityLogData_t *); + +RF_CommonLogData_t * +rf_AllocParityLogCommonData(RF_Raid_t *raidPtr) { RF_CommonLogData_t *common = NULL; - int rc; + int rc; - /* Return a struct for holding common parity log information from the - * free list (rf_parityLogDiskQueue.freeCommonList). If the free list - * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */ + /* + * Return a struct for holding common parity log information from the + * free list (rf_parityLogDiskQueue.freeCommonList). If the free list + * is empty, call RF_Malloc to create a new structure. NON-BLOCKING + */ RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); if (raidPtr->parityLogDiskQueue.freeCommonList) { common = raidPtr->parityLogDiskQueue.freeCommonList; - raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next; + raidPtr->parityLogDiskQueue.freeCommonList = + raidPtr->parityLogDiskQueue.freeCommonList->next; RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); } else { RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *)); + RF_Malloc(common, sizeof(RF_CommonLogData_t), + (RF_CommonLogData_t *)); rc = rf_mutex_init(&common->mutex); if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + RF_ERRORMSG3("Unable to init mutex file %s line %d" + " rc=%d\n", __FILE__, __LINE__, rc); RF_Free(common, sizeof(RF_CommonLogData_t)); common = NULL; } @@ -85,14 +107,16 @@ AllocParityLogCommonData(RF_Raid_t * raidPtr) return (common); } -static void -FreeParityLogCommonData(RF_CommonLogData_t * common) +void +rf_FreeParityLogCommonData(RF_CommonLogData_t *common) { RF_Raid_t *raidPtr; - /* Insert a single struct for holding parity log information (data) + /* + * Insert a single struct for holding parity log information (data) * into the free list (rf_parityLogDiskQueue.freeCommonList). - * NON-BLOCKING */ + * NON-BLOCKING + */ raidPtr = common->raidPtr; RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); @@ -101,23 +125,27 @@ FreeParityLogCommonData(RF_CommonLogData_t * common) RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); } -static RF_ParityLogData_t * -AllocParityLogData(RF_Raid_t * raidPtr) +RF_ParityLogData_t * +rf_AllocParityLogData(RF_Raid_t *raidPtr) { RF_ParityLogData_t *data = NULL; - /* Return a struct for holding parity log information from the free - * list (rf_parityLogDiskQueue.freeList). If the free list is empty, - * call RF_Malloc to create a new structure. NON-BLOCKING */ + /* + * Return a struct for holding parity log information from the free + * list (rf_parityLogDiskQueue.freeList). If the free list is empty, + * call RF_Malloc to create a new structure. NON-BLOCKING + */ RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); if (raidPtr->parityLogDiskQueue.freeDataList) { data = raidPtr->parityLogDiskQueue.freeDataList; - raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next; + raidPtr->parityLogDiskQueue.freeDataList = + raidPtr->parityLogDiskQueue.freeDataList->next; RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); } else { RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *)); + RF_Malloc(data, sizeof(RF_ParityLogData_t), + (RF_ParityLogData_t *)); } data->next = NULL; data->prev = NULL; @@ -125,15 +153,17 @@ AllocParityLogData(RF_Raid_t * raidPtr) } -static void -FreeParityLogData(RF_ParityLogData_t * data) +void +rf_FreeParityLogData(RF_ParityLogData_t *data) { RF_ParityLogData_t *nextItem; RF_Raid_t *raidPtr; - /* Insert a linked list of structs for holding parity log information + /* + * Insert a linked list of structs for holding parity log information * (data) into the free list (parityLogDiskQueue.freeList). - * NON-BLOCKING */ + * NON-BLOCKING + */ raidPtr = data->common->raidPtr; RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); @@ -147,32 +177,35 @@ FreeParityLogData(RF_ParityLogData_t * data) } -static void -EnqueueParityLogData( - RF_ParityLogData_t * data, - RF_ParityLogData_t ** head, - RF_ParityLogData_t ** tail) +void +rf_EnqueueParityLogData(RF_ParityLogData_t *data, RF_ParityLogData_t **head, + RF_ParityLogData_t **tail) { RF_Raid_t *raidPtr; - /* Insert an in-core parity log (*data) into the head of a disk queue - * (*head, *tail). NON-BLOCKING */ + /* + * Insert an in-core parity log (*data) into the head of a disk queue + * (*head, *tail). NON-BLOCKING + */ raidPtr = data->common->raidPtr; if (rf_parityLogDebug) - printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); + printf("[enqueueing parity log data, region %d," + " raidAddress %d, numSector %d]\n", data->regionID, + (int) data->diskAddress.raidAddress, + (int) data->diskAddress.numSector); RF_ASSERT(data->prev == NULL); RF_ASSERT(data->next == NULL); RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); if (*head) { - /* insert into head of queue */ + /* Insert into head of queue. */ RF_ASSERT((*head)->prev == NULL); RF_ASSERT((*tail)->next == NULL); data->next = *head; (*head)->prev = data; *head = data; } else { - /* insert into empty list */ + /* Insert into empty list. */ RF_ASSERT(*head == NULL); RF_ASSERT(*tail == NULL); *head = data; @@ -183,25 +216,24 @@ EnqueueParityLogData( RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); } -static RF_ParityLogData_t * -DequeueParityLogData( - RF_Raid_t * raidPtr, - RF_ParityLogData_t ** head, - RF_ParityLogData_t ** tail, - int ignoreLocks) +RF_ParityLogData_t * +rf_DequeueParityLogData(RF_Raid_t *raidPtr, RF_ParityLogData_t **head, + RF_ParityLogData_t **tail, int ignoreLocks) { RF_ParityLogData_t *data; - /* Remove and return an in-core parity log from the tail of a disk - * queue (*head, *tail). NON-BLOCKING */ + /* + * Remove and return an in-core parity log from the tail of a disk + * queue (*head, *tail). NON-BLOCKING + */ - /* remove from tail, preserving FIFO order */ + /* Remove from tail, preserving FIFO order. */ if (!ignoreLocks) RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); data = *tail; if (data) { if (*head == *tail) { - /* removing last item from queue */ + /* Removing last item from queue. */ *head = NULL; *tail = NULL; } else { @@ -213,7 +245,10 @@ DequeueParityLogData( data->next = NULL; data->prev = NULL; if (rf_parityLogDebug) - printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); + printf("[dequeueing parity log data, region %d," + " raidAddress %d, numSector %d]\n", data->regionID, + (int) data->diskAddress.raidAddress, + (int) data->diskAddress.numSector); } if (*head) { RF_ASSERT((*head)->prev == NULL); @@ -225,30 +260,33 @@ DequeueParityLogData( } -static void -RequeueParityLogData( - RF_ParityLogData_t * data, - RF_ParityLogData_t ** head, - RF_ParityLogData_t ** tail) +void +rf_RequeueParityLogData(RF_ParityLogData_t *data, RF_ParityLogData_t **head, + RF_ParityLogData_t **tail) { RF_Raid_t *raidPtr; - /* Insert an in-core parity log (*data) into the tail of a disk queue - * (*head, *tail). NON-BLOCKING */ + /* + * Insert an in-core parity log (*data) into the tail of a disk queue + * (*head, *tail). NON-BLOCKING + */ raidPtr = data->common->raidPtr; RF_ASSERT(data); if (rf_parityLogDebug) - printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); + printf("[requeueing parity log data, region %d," + " raidAddress %d, numSector %d]\n", data->regionID, + (int) data->diskAddress.raidAddress, + (int) data->diskAddress.numSector); RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); if (*tail) { - /* append to tail of list */ + /* Append to tail of list. */ data->prev = *tail; data->next = NULL; (*tail)->next = data; *tail = data; } else { - /* inserting into an empty list */ + /* Inserting into an empty list. */ *head = data; *tail = data; (*head)->prev = NULL; @@ -260,28 +298,25 @@ RequeueParityLogData( } RF_ParityLogData_t * -rf_CreateParityLogData( - RF_ParityRecordType_t operation, - RF_PhysDiskAddr_t * pda, - caddr_t bufPtr, - RF_Raid_t * raidPtr, +rf_CreateParityLogData(RF_ParityRecordType_t operation, RF_PhysDiskAddr_t *pda, + caddr_t bufPtr, RF_Raid_t *raidPtr, int (*wakeFunc) (RF_DagNode_t * node, int status), - void *wakeArg, - RF_AccTraceEntry_t * tracerec, - RF_Etimer_t startTime) + void *wakeArg, RF_AccTraceEntry_t *tracerec, RF_Etimer_t startTime) { RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL; RF_CommonLogData_t *common; RF_PhysDiskAddr_t *diskAddress; - int boundary, offset = 0; + int boundary, offset = 0; - /* Return an initialized struct of info to be logged. Build one item + /* + * Return an initialized struct of info to be logged. Build one item * per physical disk address, one item per region. - * - * NON-BLOCKING */ + * + * NON-BLOCKING + */ diskAddress = pda; - common = AllocParityLogCommonData(raidPtr); + common = rf_AllocParityLogCommonData(raidPtr); RF_ASSERT(common); common->operation = operation; @@ -297,34 +332,38 @@ rf_CreateParityLogData( printf("[entering CreateParityLogData]\n"); while (diskAddress) { common->cnt++; - data = AllocParityLogData(raidPtr); + data = rf_AllocParityLogData(raidPtr); RF_ASSERT(data); data->common = common; data->next = NULL; data->prev = NULL; - data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector); - if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) { - /* disk address does not cross a region boundary */ + data->regionID = rf_MapRegionIDParityLogging(raidPtr, + diskAddress->startSector); + if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, + diskAddress->startSector + diskAddress->numSector - 1)) { + /* Disk address does not cross a region boundary. */ data->diskAddress = *diskAddress; data->bufOffset = offset; offset = offset + diskAddress->numSector; - EnqueueParityLogData(data, &resultHead, &resultTail); - /* adjust disk address */ + rf_EnqueueParityLogData(data, &resultHead, &resultTail); + /* Adjust disk address. */ diskAddress = diskAddress->next; } else { - /* disk address crosses a region boundary */ - /* find address where region is crossed */ + /* Disk address crosses a region boundary. */ + /* Find address where region is crossed. */ boundary = 0; - while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary)) + while (data->regionID == + rf_MapRegionIDParityLogging(raidPtr, + diskAddress->startSector + boundary)) boundary++; - /* enter data before the boundary */ + /* Enter data before the boundary. */ data->diskAddress = *diskAddress; data->diskAddress.numSector = boundary; data->bufOffset = offset; offset += boundary; - EnqueueParityLogData(data, &resultHead, &resultTail); - /* adjust disk address */ + rf_EnqueueParityLogData(data, &resultHead, &resultTail); + /* Adjust disk address. */ diskAddress->startSector += boundary; diskAddress->numSector -= boundary; } @@ -336,35 +375,35 @@ rf_CreateParityLogData( RF_ParityLogData_t * -rf_SearchAndDequeueParityLogData( - RF_Raid_t * raidPtr, - int regionID, - RF_ParityLogData_t ** head, - RF_ParityLogData_t ** tail, - int ignoreLocks) +rf_SearchAndDequeueParityLogData(RF_Raid_t *raidPtr, int regionID, + RF_ParityLogData_t **head, RF_ParityLogData_t **tail, int ignoreLocks) { RF_ParityLogData_t *w; - /* Remove and return an in-core parity log from a specified region + /* + * Remove and return an in-core parity log from a specified region * (regionID). If a matching log is not found, return NULL. - * - * NON-BLOCKING. */ - - /* walk backward through a list, looking for an entry with a matching - * region ID */ + * + * NON-BLOCKING + */ + + /* + * walk backward through a list, looking for an entry with a matching + * region ID. + */ if (!ignoreLocks) RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); w = (*tail); while (w) { if (w->regionID == regionID) { - /* remove an element from the list */ + /* Remove an element from the list. */ if (w == *tail) { if (*head == *tail) { - /* removing only element in the list */ + /* Removing only element in the list. */ *head = NULL; *tail = NULL; } else { - /* removing last item in the list */ + /* Removing last item in the list. */ *tail = (*tail)->prev; (*tail)->next = NULL; RF_ASSERT((*head)->prev == NULL); @@ -372,14 +411,16 @@ rf_SearchAndDequeueParityLogData( } } else { if (w == *head) { - /* removing first item in the list */ + /* Removing first item in the list. */ *head = (*head)->next; (*head)->prev = NULL; RF_ASSERT((*head)->prev == NULL); RF_ASSERT((*tail)->next == NULL); } else { - /* removing an item from the middle of - * the list */ + /* + * Removing an item from the middle of + * the list. + */ w->prev->next = w->next; w->next->prev = w->prev; RF_ASSERT((*head)->prev == NULL); @@ -389,7 +430,11 @@ rf_SearchAndDequeueParityLogData( w->prev = NULL; w->next = NULL; if (rf_parityLogDebug) - printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector); + printf("[dequeueing parity log data," + " region %d, raidAddress %d," + " numSector %d]\n", w->regionID, + (int) w->diskAddress.raidAddress, + (int) w->diskAddress.numSector); return (w); } else w = w->prev; @@ -399,93 +444,109 @@ rf_SearchAndDequeueParityLogData( return (NULL); } -static RF_ParityLogData_t * -DequeueMatchingLogData( - RF_Raid_t * raidPtr, - RF_ParityLogData_t ** head, - RF_ParityLogData_t ** tail) +RF_ParityLogData_t * +rf_DequeueMatchingLogData(RF_Raid_t *raidPtr, RF_ParityLogData_t **head, + RF_ParityLogData_t **tail) { RF_ParityLogData_t *logDataList, *logData; - int regionID; + int regionID; - /* Remove and return an in-core parity log from the tail of a disk - * queue (*head, *tail). Then remove all matching (identical + /* + * Remove and return an in-core parity log from the tail of a disk + * queue (*head, *tail). Then remove all matching (identical * regionIDs) logData and return as a linked list. - * - * NON-BLOCKING */ + * + * NON-BLOCKING + */ - logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE); + logDataList = rf_DequeueParityLogData(raidPtr, head, tail, RF_TRUE); if (logDataList) { regionID = logDataList->regionID; logData = logDataList; - logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); + logData->next = rf_SearchAndDequeueParityLogData(raidPtr, + regionID, head, tail, RF_TRUE); while (logData->next) { logData = logData->next; - logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); + logData->next = + rf_SearchAndDequeueParityLogData(raidPtr, regionID, + head, tail, RF_TRUE); } } return (logDataList); } -static RF_ParityLog_t * -AcquireParityLog( - RF_ParityLogData_t * logData, - int finish) +RF_ParityLog_t * +rf_AcquireParityLog(RF_ParityLogData_t *logData, int finish) { RF_ParityLog_t *log = NULL; RF_Raid_t *raidPtr; - /* Grab a log buffer from the pool and return it. If no buffers are - * available, return NULL. NON-BLOCKING */ + /* + * Grab a log buffer from the pool and return it. If no buffers are + * available, return NULL. NON-BLOCKING + */ raidPtr = logData->common->raidPtr; RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); if (raidPtr->parityLogPool.parityLogs) { log = raidPtr->parityLogPool.parityLogs; - raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next; + raidPtr->parityLogPool.parityLogs = + raidPtr->parityLogPool.parityLogs->next; log->regionID = logData->regionID; log->numRecords = 0; log->next = NULL; raidPtr->logsInUse++; - RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); + RF_ASSERT(raidPtr->logsInUse >= 0 && + raidPtr->logsInUse <= raidPtr->numParityLogs); } else { - /* no logs available, so place ourselves on the queue of work + /* + * No logs available, so place ourselves on the queue of work * waiting on log buffers this is done while * parityLogPool.mutex is held, to ensure synchronization with - * ReleaseParityLogs. */ + * ReleaseParityLogs. + */ if (rf_parityLogDebug) - printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish); + printf("[blocked on log, region %d, finish %d]\n", + logData->regionID, finish); if (finish) - RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); + rf_RequeueParityLogData(logData, + &raidPtr->parityLogDiskQueue.logBlockHead, + &raidPtr->parityLogDiskQueue.logBlockTail); else - EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); + rf_EnqueueParityLogData(logData, + &raidPtr->parityLogDiskQueue.logBlockHead, + &raidPtr->parityLogDiskQueue.logBlockTail); } RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); return (log); } -void -rf_ReleaseParityLogs( - RF_Raid_t * raidPtr, - RF_ParityLog_t * firstLog) +void +rf_ReleaseParityLogs(RF_Raid_t *raidPtr, RF_ParityLog_t *firstLog) { RF_ParityLogData_t *logDataList; RF_ParityLog_t *log, *lastLog; - int cnt; + int cnt; - /* Insert a linked list of parity logs (firstLog) to the free list + /* + * Insert a linked list of parity logs (firstLog) to the free list * (parityLogPool.parityLogPool) - * - * NON-BLOCKING. */ + * + * NON-BLOCKING + */ RF_ASSERT(firstLog); - /* Before returning logs to global free list, service all requests - * which are blocked on logs. Holding mutexes for parityLogPool and - * parityLogDiskQueue forces synchronization with AcquireParityLog(). */ + /* + * Before returning logs to global free list, service all requests + * which are blocked on logs. Holding mutexes for parityLogPool and + * parityLogDiskQueue forces synchronization with rf_AcquireParityLog(). + */ RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); + logDataList = rf_DequeueMatchingLogData(raidPtr, + &raidPtr->parityLogDiskQueue.logBlockHead, + &raidPtr->parityLogDiskQueue.logBlockTail); log = firstLog; if (firstLog) firstLog = firstLog->next; @@ -496,7 +557,8 @@ rf_ReleaseParityLogs( RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE); if (rf_parityLogDebug) - printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID); + printf("[finishing up buf-blocked log data," + " region %d]\n", logDataList->regionID); if (log == NULL) { log = firstLog; if (firstLog) { @@ -508,9 +570,11 @@ rf_ReleaseParityLogs( RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); if (log) - logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); + logDataList = rf_DequeueMatchingLogData(raidPtr, + &raidPtr->parityLogDiskQueue.logBlockHead, + &raidPtr->parityLogDiskQueue.logBlockTail); } - /* return remaining logs to pool */ + /* Return remaining logs to pool. */ if (log) { log->next = firstLog; firstLog = log; @@ -518,11 +582,13 @@ rf_ReleaseParityLogs( if (firstLog) { lastLog = firstLog; raidPtr->logsInUse--; - RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); + RF_ASSERT(raidPtr->logsInUse >= 0 && + raidPtr->logsInUse <= raidPtr->numParityLogs); while (lastLog->next) { lastLog = lastLog->next; raidPtr->logsInUse--; - RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); + RF_ASSERT(raidPtr->logsInUse >= 0 && + raidPtr->logsInUse <= raidPtr->numParityLogs); } lastLog->next = raidPtr->parityLogPool.parityLogs; raidPtr->parityLogPool.parityLogs = firstLog; @@ -538,26 +604,26 @@ rf_ReleaseParityLogs( RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); } -static void -ReintLog( - RF_Raid_t * raidPtr, - int regionID, - RF_ParityLog_t * log) +void +rf_ReintLog(RF_Raid_t *raidPtr, int regionID, RF_ParityLog_t *log) { RF_ASSERT(log); - /* Insert an in-core parity log (log) into the disk queue of - * reintegration work. Set the flag (reintInProgress) for the + /* + * Insert an in-core parity log (log) into the disk queue of + * reintegration work. Set the flag (reintInProgress) for the * specified region (regionID) to indicate that reintegration is in - * progress for this region. NON-BLOCKING */ + * progress for this region. NON-BLOCKING + */ RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); - raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint - * complete */ + /* Cleared when reint complete. */ + raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; if (rf_parityLogDebug) - printf("[requesting reintegration of region %d]\n", log->regionID); - /* move record to reintegration queue */ + printf("[requesting reintegration of region %d]\n", + log->regionID); + /* Move record to reintegration queue. */ RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); log->next = raidPtr->parityLogDiskQueue.reintQueue; raidPtr->parityLogDiskQueue.reintQueue = log; @@ -566,19 +632,19 @@ ReintLog( RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); } -static void -FlushLog( - RF_Raid_t * raidPtr, - RF_ParityLog_t * log) +void +rf_FlushLog(RF_Raid_t *raidPtr, RF_ParityLog_t *log) { - /* insert a core log (log) into a list of logs + /* + * Insert a core log (log) into a list of logs * (parityLogDiskQueue.flushQueue) waiting to be written to disk. - * NON-BLOCKING */ + * NON-BLOCKING + */ RF_ASSERT(log); RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); RF_ASSERT(log->next == NULL); - /* move log to flush queue */ + /* Move log to flush queue. */ RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); log->next = raidPtr->parityLogDiskQueue.flushQueue; raidPtr->parityLogDiskQueue.flushQueue = log; @@ -586,27 +652,27 @@ FlushLog( RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); } -static int -DumpParityLogToDisk( - int finish, - RF_ParityLogData_t * logData) +int +rf_DumpParityLogToDisk(int finish, RF_ParityLogData_t *logData) { - int i, diskCount, regionID = logData->regionID; + int i, diskCount, regionID = logData->regionID; RF_ParityLog_t *log; RF_Raid_t *raidPtr; raidPtr = logData->common->raidPtr; - /* Move a core log to disk. If the log disk is full, initiate + /* + * Move a core log to disk. If the log disk is full, initiate * reintegration. - * + * * Return (0) if we can enqueue the dump immediately, otherwise return * (1) to indicate we are blocked on reintegration and control of the * thread should be relinquished. - * - * Caller must hold regionInfo[regionID].mutex - * - * NON-BLOCKING */ + * + * Caller must hold regionInfo[regionID].mutex. + * + * NON-BLOCKING + */ if (rf_parityLogDebug) printf("[dumping parity log to disk, region %d]\n", regionID); @@ -614,200 +680,293 @@ DumpParityLogToDisk( RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); RF_ASSERT(log->next == NULL); - /* if reintegration is in progress, must queue work */ + /* If reintegration is in progress, must queue work. */ RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); if (raidPtr->regionInfo[regionID].reintInProgress) { - /* Can not proceed since this region is currently being + /* + * Can not proceed since this region is currently being * reintegrated. We can not block, so queue remaining work and - * return */ + * return. + */ if (rf_parityLogDebug) - printf("[region %d waiting on reintegration]\n", regionID); - /* XXX not sure about the use of finish - shouldn't this - * always be "Enqueue"? */ + printf("[region %d waiting on reintegration]\n", + regionID); + /* + * XXX Not sure about the use of finish - shouldn't this + * always be "Enqueue" ? + */ if (finish) - RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); + rf_RequeueParityLogData(logData, + &raidPtr->parityLogDiskQueue.reintBlockHead, + &raidPtr->parityLogDiskQueue.reintBlockTail); else - EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); + rf_EnqueueParityLogData(logData, + &raidPtr->parityLogDiskQueue.reintBlockHead, + &raidPtr->parityLogDiskQueue.reintBlockTail); RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); - return (1); /* relenquish control of this thread */ + return (1); /* Relenquish control of this thread. */ } RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); raidPtr->regionInfo[regionID].coreLog = NULL; - if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity) - /* IMPORTANT!! this loop bound assumes region disk holds an - * integral number of core logs */ + if ((raidPtr->regionInfo[regionID].diskCount) < + raidPtr->regionInfo[regionID].capacity) + /* + * IMPORTANT !!! This loop bound assumes region disk holds an + * integral number of core logs. + */ { - /* update disk map for this region */ + /* Update disk map for this region. */ diskCount = raidPtr->regionInfo[regionID].diskCount; for (i = 0; i < raidPtr->numSectorsPerLog; i++) { - raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation; - raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr; + raidPtr->regionInfo[regionID].diskMap[i + diskCount] + .operation = log->records[i].operation; + raidPtr->regionInfo[regionID].diskMap[i + diskCount] + .parityAddr = log->records[i].parityAddr; } log->diskOffset = diskCount; - raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog; - FlushLog(raidPtr, log); + raidPtr->regionInfo[regionID].diskCount += + raidPtr->numSectorsPerLog; + rf_FlushLog(raidPtr, log); } else { - /* no room for log on disk, send it to disk manager and - * request reintegration */ - RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity); - ReintLog(raidPtr, regionID, log); + /* + * No room for log on disk, send it to disk manager and + * request reintegration. + */ + RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == + raidPtr->regionInfo[regionID].capacity); + rf_ReintLog(raidPtr, regionID, log); } if (rf_parityLogDebug) - printf("[finished dumping parity log to disk, region %d]\n", regionID); + printf("[finished dumping parity log to disk, region %d]\n", + regionID); return (0); } -int -rf_ParityLogAppend( - RF_ParityLogData_t * logData, - int finish, - RF_ParityLog_t ** incomingLog, - int clearReintFlag) +int +rf_ParityLogAppend(RF_ParityLogData_t *logData, int finish, + RF_ParityLog_t **incomingLog, int clearReintFlag) { - int regionID, logItem, itemDone; + int regionID, logItem, itemDone; RF_ParityLogData_t *item; - int punt, done = RF_FALSE; + int punt, done = RF_FALSE; RF_ParityLog_t *log; RF_Raid_t *raidPtr; RF_Etimer_t timer; - int (*wakeFunc) (RF_DagNode_t * node, int status); - void *wakeArg; + int (*wakeFunc) (RF_DagNode_t * node, int status); + void *wakeArg; - /* Add parity to the appropriate log, one sector at a time. This + /* + * Add parity to the appropriate log, one sector at a time. This * routine is called is called by dag functions ParityLogUpdateFunc * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING. - * - * Parity to be logged is contained in a linked-list (logData). When + * + * Parity to be logged is contained in a linked-list (logData). When * this routine returns, every sector in the list will be in one of * three places: 1) entered into the parity log 2) queued, waiting on - * reintegration 3) queued, waiting on a core log - * - * Blocked work is passed to the ParityLoggingDiskManager for completion. - * Later, as conditions which required the block are removed, the work - * reenters this routine with the "finish" parameter set to "RF_TRUE." - * - * NON-BLOCKING */ + * reintegration 3) queued, waiting on a core log. + * + * Blocked work is passed to the ParityLoggingDiskManager for + * completion. Later, as conditions which required the block are + * removed, the work reenters this routine with the "finish" parameter + * set to "RF_TRUE." + * + * NON-BLOCKING + */ raidPtr = logData->common->raidPtr; - /* lock the region for the first item in logData */ + /* Lock the region for the first item in logData. */ RF_ASSERT(logData != NULL); regionID = logData->regionID; RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); if (clearReintFlag) { - /* Enable flushing for this region. Holding both locks - * provides a synchronization barrier with DumpParityLogToDisk */ + /* + * Enable flushing for this region. Holding both locks + * provides a synchronization barrier with + * rf_DumpParityLogToDisk. + */ RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE); + RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == + RF_TRUE); raidPtr->regionInfo[regionID].diskCount = 0; raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now - * enabled */ + /* Flushing is now enabled. */ + RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); } - /* process each item in logData */ + /* Process each item in logData. */ while (logData) { - /* remove an item from logData */ + /* Remove an item from logData. */ item = logData; logData = logData->next; item->next = NULL; item->prev = NULL; if (rf_parityLogDebug) - printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector); + printf("[appending parity log data, region %d," + " raidAddress %d, numSector %d]\n", item->regionID, + (int) item->diskAddress.raidAddress, + (int) item->diskAddress.numSector); - /* see if we moved to a new region */ + /* See if we moved to a new region. */ if (regionID != item->regionID) { RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); regionID = item->regionID; RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); } - punt = RF_FALSE;/* Set to RF_TRUE if work is blocked. This + punt = RF_FALSE;/* + * Set to RF_TRUE if work is blocked. This * can happen in one of two ways: 1) no core - * log (AcquireParityLog) 2) waiting on - * reintegration (DumpParityLogToDisk) If punt - * is RF_TRUE, the dataItem was queued, so - * skip to next item. */ - - /* process item, one sector at a time, until all sectors - * processed or we punt */ + * log (rf_AcquireParityLog) 2) waiting on + * reintegration (rf_DumpParityLogToDisk). + * If punt is RF_TRUE, the dataItem was queued, + * so skip to next item. + */ + + /* + * Process item, one sector at a time, until all sectors + * processed or we punt. + */ if (item->diskAddress.numSector > 0) done = RF_FALSE; else RF_ASSERT(0); while (!punt && !done) { - /* verify that a core log exists for this region */ + /* Verify that a core log exists for this region. */ if (!raidPtr->regionInfo[regionID].coreLog) { - /* Attempt to acquire a parity log. If + /* + * Attempt to acquire a parity log. If * acquisition fails, queue remaining work in - * data item and move to nextItem. */ + * data item and move to nextItem. + */ if (incomingLog) { if (*incomingLog) { - RF_ASSERT((*incomingLog)->next == NULL); - raidPtr->regionInfo[regionID].coreLog = *incomingLog; - raidPtr->regionInfo[regionID].coreLog->regionID = regionID; + RF_ASSERT((*incomingLog)->next + == NULL); + raidPtr->regionInfo[regionID] + .coreLog = *incomingLog; + raidPtr->regionInfo[regionID] + .coreLog->regionID = + regionID; *incomingLog = NULL; } else - raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); + raidPtr->regionInfo[regionID] + .coreLog = + rf_AcquireParityLog(item, + finish); } else - raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); - /* Note: AcquireParityLog either returns a log - * or enqueues currentItem */ + raidPtr->regionInfo[regionID].coreLog = + rf_AcquireParityLog(item, finish); + /* + * Note: rf_AcquireParityLog either returns + * a log or enqueues currentItem. + */ } if (!raidPtr->regionInfo[regionID].coreLog) - punt = RF_TRUE; /* failed to find a core log */ + punt = RF_TRUE; /* Failed to find a core log. */ else { - RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); - /* verify that the log has room for new - * entries */ - /* if log is full, dump it to disk and grab a - * new log */ - if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) { - /* log is full, dump it to disk */ - if (DumpParityLogToDisk(finish, item)) - punt = RF_TRUE; /* dump unsuccessful, - * blocked on - * reintegration */ + RF_ASSERT(raidPtr->regionInfo[regionID].coreLog + ->next == NULL); + /* + * Verify that the log has room for new + * entries. + */ + /* + * If log is full, dump it to disk and grab a + * new log. + */ + if (raidPtr->regionInfo[regionID].coreLog + ->numRecords == raidPtr->numSectorsPerLog) + { + /* Log is full, dump it to disk. */ + if (rf_DumpParityLogToDisk(finish, + item)) + /* + * Dump unsuccessful, blocked + * on reintegration. + */ + punt = RF_TRUE; else { - /* dump was successful */ + /* Dump was successful. */ if (incomingLog) { if (*incomingLog) { - RF_ASSERT((*incomingLog)->next == NULL); - raidPtr->regionInfo[regionID].coreLog = *incomingLog; - raidPtr->regionInfo[regionID].coreLog->regionID = regionID; - *incomingLog = NULL; + RF_ASSERT( + (*incomingLog)->next == + NULL); + raidPtr-> + regionInfo[regionID].coreLog = + *incomingLog; + raidPtr-> + regionInfo[regionID].coreLog-> + regionID = regionID; + *incomingLog = + NULL; } else - raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); + raidPtr-> + regionInfo[regionID].coreLog = + rf_AcquireParityLog(item, + finish); } else - raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); - /* if a core log is not + raidPtr->regionInfo + [regionID].coreLog = + rf_AcquireParityLog(item, + finish); + /* + * If a core log is not * available, must queue work - * and return */ - if (!raidPtr->regionInfo[regionID].coreLog) - punt = RF_TRUE; /* blocked on log - * availability */ + * and return. + */ + if (!raidPtr->regionInfo + [regionID].coreLog) + /* + * Blocked on log + * availability. + */ + punt = RF_TRUE; } } } - /* if we didn't punt on this item, attempt to add a - * sector to the core log */ + /* + * If we didn't punt on this item, attempt to add a + * sector to the core log. + */ if (!punt) { - RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); - /* at this point, we have a core log with - * enough room for a sector */ - /* copy a sector into the log */ + RF_ASSERT(raidPtr->regionInfo[regionID].coreLog + ->next == NULL); + /* + * At this point, we have a core log with + * enough room for a sector. + */ + /* Copy a sector into the log. */ log = raidPtr->regionInfo[regionID].coreLog; - RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog); + RF_ASSERT(log->numRecords < + raidPtr->numSectorsPerLog); logItem = log->numRecords++; - log->records[logItem].parityAddr = item->diskAddress; - RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr); - RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity); + log->records[logItem].parityAddr = + item->diskAddress; + RF_ASSERT(log->records[logItem].parityAddr + .startSector >= + raidPtr->regionInfo[regionID] + .parityStartAddr); + RF_ASSERT(log->records[logItem].parityAddr + .startSector < + raidPtr->regionInfo[regionID] + .parityStartAddr + + raidPtr->regionInfo[regionID] + .numSectorsParity); log->records[logItem].parityAddr.numSector = 1; - log->records[logItem].operation = item->common->operation; - bcopy((item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (1 << item->common->raidPtr->logBytesPerSector)); + log->records[logItem].operation = + item->common->operation; + bcopy((item->common->bufPtr + + (item->bufOffset++ * (1 << + item->common->raidPtr->logBytesPerSector))), + log->bufPtr + (logItem * (1 << + item->common->raidPtr->logBytesPerSector)), + (1 << item->common->raidPtr + ->logBytesPerSector)); item->diskAddress.numSector--; item->diskAddress.startSector++; if (item->diskAddress.numSector == 0) @@ -816,8 +975,10 @@ rf_ParityLogAppend( } if (!punt) { - /* Processed this item completely, decrement count of - * items to be processed. */ + /* + * Processed this item completely, decrement count of + * items to be processed. + */ RF_ASSERT(item->diskAddress.numSector == 0); RF_LOCK_MUTEX(item->common->mutex); item->common->cnt--; @@ -827,23 +988,27 @@ rf_ParityLogAppend( itemDone = RF_FALSE; RF_UNLOCK_MUTEX(item->common->mutex); if (itemDone) { - /* Finished processing all log data for this + /* + * Finished processing all log data for this * IO Return structs to free list and invoke - * wakeup function. */ - timer = item->common->startTime; /* grab initial value of - * timer */ + * wakeup function. + */ + /* Grab initial value of timer. */ + timer = item->common->startTime; RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); - item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer); + item->common->tracerec->plog_us += + RF_ETIMER_VAL_US(timer); if (rf_parityLogDebug) - printf("[waking process for region %d]\n", item->regionID); + printf("[waking process for region" + " %d]\n", item->regionID); wakeFunc = item->common->wakeFunc; wakeArg = item->common->wakeArg; - FreeParityLogCommonData(item->common); - FreeParityLogData(item); + rf_FreeParityLogCommonData(item->common); + rf_FreeParityLogData(item); (wakeFunc) (wakeArg, 0); } else - FreeParityLogData(item); + rf_FreeParityLogData(item); } } RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); @@ -853,10 +1018,10 @@ rf_ParityLogAppend( } -void -rf_EnableParityLogging(RF_Raid_t * raidPtr) +void +rf_EnableParityLogging(RF_Raid_t *raidPtr) { - int regionID; + int regionID; for (regionID = 0; regionID < rf_numParityRegions; regionID++) { RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); @@ -866,4 +1031,4 @@ rf_EnableParityLogging(RF_Raid_t * raidPtr) if (rf_parityLogDebug) printf("[parity logging enabled]\n"); } -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ +#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ diff --git a/sys/dev/raidframe/rf_paritylog.h b/sys/dev/raidframe/rf_paritylog.h index 43c5711c666..f50bfa0b0d6 100644 --- a/sys/dev/raidframe/rf_paritylog.h +++ b/sys/dev/raidframe/rf_paritylog.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_paritylog.h,v 1.2 1999/02/16 00:03:05 niklas Exp $ */ +/* $OpenBSD: rf_paritylog.h,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_paritylog.h,v 1.3 1999/02/05 00:06:14 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,16 +28,16 @@ * rights to redistribute these changes. */ -/* header file for parity log - * +/* + * Header file for parity log. */ -#ifndef _RF__RF_PARITYLOG_H_ -#define _RF__RF_PARITYLOG_H_ +#ifndef _RF__RF_PARITYLOG_H_ +#define _RF__RF_PARITYLOG_H_ #include "rf_types.h" -#define RF_DEFAULT_NUM_SECTORS_PER_LOG 64 +#define RF_DEFAULT_NUM_SECTORS_PER_LOG 64 typedef int RF_RegionId_t; @@ -44,138 +45,187 @@ typedef enum RF_ParityRecordType_e { RF_STOP, RF_UPDATE, RF_OVERWRITE -} RF_ParityRecordType_t; +} RF_ParityRecordType_t; struct RF_CommonLogData_s { - RF_DECLARE_MUTEX(mutex) /* protects cnt */ - int cnt; /* when 0, time to call wakeFunc */ - RF_Raid_t *raidPtr; -/* int (*wakeFunc)(struct buf *); */ - int (*wakeFunc) (RF_DagNode_t * node, int status); - void *wakeArg; - RF_AccTraceEntry_t *tracerec; - RF_Etimer_t startTime; - caddr_t bufPtr; - RF_ParityRecordType_t operation; - RF_CommonLogData_t *next; + RF_DECLARE_MUTEX(mutex); /* Protects cnt. */ + int cnt; /* When 0, time to call wakeFunc. */ + RF_Raid_t *raidPtr; +/* int (*wakeFunc) (struct buf *); */ + int (*wakeFunc) (RF_DagNode_t *, int); + void *wakeArg; + RF_AccTraceEntry_t *tracerec; + RF_Etimer_t startTime; + caddr_t bufPtr; + RF_ParityRecordType_t operation; + RF_CommonLogData_t *next; }; struct RF_ParityLogData_s { - RF_RegionId_t regionID; /* this struct guaranteed to span a single - * region */ - int bufOffset; /* offset from common->bufPtr */ - RF_PhysDiskAddr_t diskAddress; - RF_CommonLogData_t *common; /* info shared by one or more - * parityLogData structs */ - RF_ParityLogData_t *next; - RF_ParityLogData_t *prev; + RF_RegionId_t regionID; /* + * This struct guaranteed to + * span a single region. + */ + int bufOffset; /* + * Offset from common->bufPtr. + */ + RF_PhysDiskAddr_t diskAddress; + RF_CommonLogData_t *common; /* + * Info shared by one or more + * parityLogData structs. + */ + RF_ParityLogData_t *next; + RF_ParityLogData_t *prev; }; struct RF_ParityLogAppendQueue_s { - RF_DECLARE_MUTEX(mutex) + RF_DECLARE_MUTEX(mutex); }; struct RF_ParityLogRecord_s { - RF_PhysDiskAddr_t parityAddr; - RF_ParityRecordType_t operation; + RF_PhysDiskAddr_t parityAddr; + RF_ParityRecordType_t operation; }; struct RF_ParityLog_s { - RF_RegionId_t regionID; - int numRecords; - int diskOffset; - RF_ParityLogRecord_t *records; - caddr_t bufPtr; - RF_ParityLog_t *next; + RF_RegionId_t regionID; + int numRecords; + int diskOffset; + RF_ParityLogRecord_t *records; + caddr_t bufPtr; + RF_ParityLog_t *next; }; struct RF_ParityLogQueue_s { - RF_DECLARE_MUTEX(mutex) - RF_ParityLog_t *parityLogs; + RF_DECLARE_MUTEX(mutex); + RF_ParityLog_t *parityLogs; }; struct RF_RegionBufferQueue_s { - RF_DECLARE_MUTEX(mutex) - RF_DECLARE_COND(cond) - int bufferSize; - int totalBuffers; /* size of array 'buffers' */ - int availableBuffers; /* num available 'buffers' */ - int emptyBuffersIndex; /* stick next freed buffer here */ - int availBuffersIndex; /* grab next buffer from here */ - caddr_t *buffers; /* array buffers used to hold parity */ + RF_DECLARE_MUTEX(mutex); + RF_DECLARE_COND(cond); + int bufferSize; + int totalBuffers; /* Size of array 'buffers'. */ + int availableBuffers; /* Num available 'buffers'. */ + int emptyBuffersIndex; /* Stick next freed buffer here. */ + int availBuffersIndex; /* Grab next buffer from here. */ + caddr_t *buffers; /* Array buffers used to hold parity. */ }; -#define RF_PLOG_CREATED (1<<0)/* thread is created */ -#define RF_PLOG_RUNNING (1<<1)/* thread is running */ -#define RF_PLOG_TERMINATE (1<<2)/* thread is terminated (should exit) */ -#define RF_PLOG_SHUTDOWN (1<<3)/* thread is aware and exiting/exited */ +#define RF_PLOG_CREATED (1<<0) /* Thread is created. */ +#define RF_PLOG_RUNNING (1<<1) /* Thread is running. */ +#define RF_PLOG_TERMINATE (1<<2) /* Thread is terminated (should exit).*/ +#define RF_PLOG_SHUTDOWN (1<<3) /* Thread is aware and exiting/exited.*/ struct RF_ParityLogDiskQueue_s { - RF_DECLARE_MUTEX(mutex) /* protects all vars in this struct */ - RF_DECLARE_COND(cond) - int threadState; /* is thread running, should it shutdown (see - * above) */ - RF_ParityLog_t *flushQueue; /* list of parity logs to be flushed - * to log disk */ - RF_ParityLog_t *reintQueue; /* list of parity logs waiting to be - * reintegrated */ - RF_ParityLogData_t *bufHead; /* head of FIFO list of log data, - * waiting on a buffer */ - RF_ParityLogData_t *bufTail; /* tail of FIFO list of log data, - * waiting on a buffer */ - RF_ParityLogData_t *reintHead; /* head of FIFO list of log data, - * waiting on reintegration */ - RF_ParityLogData_t *reintTail; /* tail of FIFO list of log data, - * waiting on reintegration */ - RF_ParityLogData_t *logBlockHead; /* queue of work, blocked - * until a log is available */ - RF_ParityLogData_t *logBlockTail; - RF_ParityLogData_t *reintBlockHead; /* queue of work, blocked + RF_DECLARE_MUTEX(mutex); /* Protects all vars in this struct. */ + RF_DECLARE_COND(cond); + int threadState; /* + * Is thread running, should it + * shutdown ? (see above) + */ + RF_ParityLog_t *flushQueue; /* + * List of parity logs to be + * flushed to log disk. + */ + RF_ParityLog_t *reintQueue; /* + * List of parity logs waiting + * to be reintegrated. + */ + RF_ParityLogData_t *bufHead; /* + * Head of FIFO list of log + * data, waiting on a buffer. + */ + RF_ParityLogData_t *bufTail; /* + * Tail of FIFO list of log + * data, waiting on a buffer. + */ + RF_ParityLogData_t *reintHead; /* + * Head of FIFO list of + * log data, waiting on + * reintegration. + */ + RF_ParityLogData_t *reintTail; /* + * Tail of FIFO list of + * log data, waiting on + * reintegration. + */ + RF_ParityLogData_t *logBlockHead; /* + * Queue of work, blocked + * until a log is available. + */ + RF_ParityLogData_t *logBlockTail; + RF_ParityLogData_t *reintBlockHead;/* + * Queue of work, blocked * until reintegration is - * complete */ - RF_ParityLogData_t *reintBlockTail; - RF_CommonLogData_t *freeCommonList; /* list of unused common data - * structs */ - RF_ParityLogData_t *freeDataList; /* list of unused log data - * structs */ + * complete. + */ + RF_ParityLogData_t *reintBlockTail; + RF_CommonLogData_t *freeCommonList;/* + * List of unused common + * data structs. + */ + RF_ParityLogData_t *freeDataList; /* + * List of unused log + * data structs. + */ }; struct RF_DiskMap_s { - RF_PhysDiskAddr_t parityAddr; - RF_ParityRecordType_t operation; + RF_PhysDiskAddr_t parityAddr; + RF_ParityRecordType_t operation; }; struct RF_RegionInfo_s { - RF_DECLARE_MUTEX(mutex) /* protects: diskCount, diskMap, - * loggingEnabled, coreLog */ - RF_DECLARE_MUTEX(reintMutex) /* protects: reintInProgress */ - int reintInProgress;/* flag used to suspend flushing operations */ - RF_SectorCount_t capacity; /* capacity of this region in sectors */ - RF_SectorNum_t regionStartAddr; /* starting disk address for this - * region */ - RF_SectorNum_t parityStartAddr; /* starting disk address for this - * region */ - RF_SectorCount_t numSectorsParity; /* number of parity sectors - * protected by this region */ - RF_SectorCount_t diskCount; /* num of sectors written to this - * region's disk log */ - RF_DiskMap_t *diskMap; /* in-core map of what's in this region's disk - * log */ - int loggingEnabled; /* logging enable for this region */ - RF_ParityLog_t *coreLog;/* in-core log for this region */ + RF_DECLARE_MUTEX(mutex); /* + * Protects: diskCount, diskMap, + * loggingEnabled, coreLog. + */ + RF_DECLARE_MUTEX(reintMutex); /* Protects: reintInProgress. */ + int reintInProgress; /* + * Flag used to suspend flushing + * operations. + */ + RF_SectorCount_t capacity; /* + * Capacity of this region + * in sectors. + */ + RF_SectorNum_t regionStartAddr; /* + * Starting disk address for + * this region. + */ + RF_SectorNum_t parityStartAddr; /* + * Starting disk address for + * this region. + */ + RF_SectorCount_t numSectorsParity; /* + * Number of parity sectors + * protected by this region. + */ + RF_SectorCount_t diskCount; /* + * Num of sectors written to + * this region's disk log. + */ + RF_DiskMap_t *diskMap; /* + * In-core map of what's in + * this region's disk log. + */ + int loggingEnabled; /* + * Logging enable for this + * region. + */ + RF_ParityLog_t *coreLog; /* + * In-core log for this region. + */ }; -RF_ParityLogData_t * -rf_CreateParityLogData(RF_ParityRecordType_t operation, - RF_PhysDiskAddr_t * pda, caddr_t bufPtr, RF_Raid_t * raidPtr, - int (*wakeFunc) (RF_DagNode_t * node, int status), - void *wakeArg, RF_AccTraceEntry_t * tracerec, - RF_Etimer_t startTime); - RF_ParityLogData_t *rf_SearchAndDequeueParityLogData(RF_Raid_t * raidPtr, - RF_RegionId_t regionID, RF_ParityLogData_t ** head, - RF_ParityLogData_t ** tail, int ignoreLocks); - void rf_ReleaseParityLogs(RF_Raid_t * raidPtr, RF_ParityLog_t * firstLog); - int rf_ParityLogAppend(RF_ParityLogData_t * logData, int finish, - RF_ParityLog_t ** incomingLog, int clearReintFlag); - void rf_EnableParityLogging(RF_Raid_t * raidPtr); - -#endif /* !_RF__RF_PARITYLOG_H_ */ +RF_ParityLogData_t *rf_CreateParityLogData(RF_ParityRecordType_t, + RF_PhysDiskAddr_t *, caddr_t, RF_Raid_t *, + int (*) (RF_DagNode_t *, int), void *, + RF_AccTraceEntry_t *, RF_Etimer_t); +RF_ParityLogData_t *rf_SearchAndDequeueParityLogData(RF_Raid_t *, RF_RegionId_t, + RF_ParityLogData_t **, RF_ParityLogData_t **, int); +void rf_ReleaseParityLogs(RF_Raid_t *, RF_ParityLog_t *); +int rf_ParityLogAppend(RF_ParityLogData_t *, int, RF_ParityLog_t **, int); +void rf_EnableParityLogging(RF_Raid_t *); + +#endif /* !_RF__RF_PARITYLOG_H_ */ diff --git a/sys/dev/raidframe/rf_paritylogDiskMgr.c b/sys/dev/raidframe/rf_paritylogDiskMgr.c index 6914b19e535..65f699d4f5f 100644 --- a/sys/dev/raidframe/rf_paritylogDiskMgr.c +++ b/sys/dev/raidframe/rf_paritylogDiskMgr.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_paritylogDiskMgr.c,v 1.5 2000/08/08 16:07:43 peter Exp $ */ +/* $OpenBSD: rf_paritylogDiskMgr.c,v 1.6 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_paritylogDiskMgr.c,v 1.10 2000/01/15 01:57:57 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -26,13 +27,13 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ -/* Code for flushing and reintegration operations related to parity logging. - * +/* + * Code for flushing and reintegrating operations related to parity logging. */ #include "rf_archs.h" -#if RF_INCLUDE_PARITYLOGGING > 0 +#if RF_INCLUDE_PARITYLOGGING > 0 #include "rf_types.h" #include "rf_threadstuff.h" @@ -54,16 +55,30 @@ #include "rf_paritylogDiskMgr.h" -static caddr_t AcquireReintBuffer(RF_RegionBufferQueue_t *); - -static caddr_t -AcquireReintBuffer(pool) - RF_RegionBufferQueue_t *pool; +caddr_t rf_AcquireReintBuffer(RF_RegionBufferQueue_t *); +void rf_ReleaseReintBuffer(RF_RegionBufferQueue_t *, caddr_t); +void rf_ReadRegionLog(RF_RegionId_t, RF_MCPair_t *, caddr_t, RF_Raid_t *, + RF_DagHeader_t **, RF_AllocListElem_t **, RF_PhysDiskAddr_t **); +void rf_WriteCoreLog(RF_ParityLog_t *, RF_MCPair_t *, RF_Raid_t *, + RF_DagHeader_t **, RF_AllocListElem_t **, RF_PhysDiskAddr_t **); +void rf_ReadRegionParity(RF_RegionId_t, RF_MCPair_t *, caddr_t, RF_Raid_t *, + RF_DagHeader_t **, RF_AllocListElem_t **, RF_PhysDiskAddr_t **); +void rf_WriteRegionParity(RF_RegionId_t, RF_MCPair_t *, caddr_t, RF_Raid_t *, + RF_DagHeader_t **, RF_AllocListElem_t **, RF_PhysDiskAddr_t **); +void rf_FlushLogsToDisk(RF_Raid_t *, RF_ParityLog_t *); +void rf_ReintegrateRegion(RF_Raid_t *, RF_RegionId_t, RF_ParityLog_t *); +void rf_ReintegrateLogs(RF_Raid_t *, RF_ParityLog_t *); + + +caddr_t +rf_AcquireReintBuffer(RF_RegionBufferQueue_t *pool) { caddr_t bufPtr = NULL; - /* Return a region buffer from the free list (pool). If the free list - * is empty, WAIT. BLOCKING */ + /* + * Return a region buffer from the free list (pool). If the free list + * is empty, WAIT. BLOCKING + */ RF_LOCK_MUTEX(pool->mutex); if (pool->availableBuffers > 0) { @@ -74,20 +89,23 @@ AcquireReintBuffer(pool) pool->availBuffersIndex = 0; RF_UNLOCK_MUTEX(pool->mutex); } else { - RF_PANIC(); /* should never happen in correct config, - * single reint */ + RF_PANIC(); /* + * Should never happen in correct config, + * single reint. + */ RF_WAIT_COND(pool->cond, pool->mutex); } return (bufPtr); } -static void -ReleaseReintBuffer( - RF_RegionBufferQueue_t * pool, - caddr_t bufPtr) + +void +rf_ReleaseReintBuffer(RF_RegionBufferQueue_t *pool, caddr_t bufPtr) { - /* Insert a region buffer (bufPtr) into the free list (pool). - * NON-BLOCKING */ + /* + * Insert a region buffer (bufPtr) into the free list (pool). + * NON-BLOCKING + */ RF_LOCK_MUTEX(pool->mutex); pool->availableBuffers++; @@ -101,147 +119,133 @@ ReleaseReintBuffer( } - -static void -ReadRegionLog( - RF_RegionId_t regionID, - RF_MCPair_t * rrd_mcpair, - caddr_t regionBuffer, - RF_Raid_t * raidPtr, - RF_DagHeader_t ** rrd_dag_h, - RF_AllocListElem_t ** rrd_alloclist, - RF_PhysDiskAddr_t ** rrd_pda) +void +rf_ReadRegionLog(RF_RegionId_t regionID, RF_MCPair_t *rrd_mcpair, + caddr_t regionBuffer, RF_Raid_t *raidPtr, RF_DagHeader_t **rrd_dag_h, + RF_AllocListElem_t **rrd_alloclist, RF_PhysDiskAddr_t **rrd_pda) { - /* Initiate the read a region log from disk. Once initiated, return + /* + * Initiate the read a region log from disk. Once initiated, return * to the calling routine. - * - * NON-BLOCKING */ + * + * NON-BLOCKING + */ RF_AccTraceEntry_t *tracerec; RF_DagNode_t *rrd_rdNode; - /* create DAG to read region log from disk */ + /* Create DAG to read region log from disk. */ rf_MakeAllocList(*rrd_alloclist); - *rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer, - rf_DiskReadFunc, rf_DiskReadUndoFunc, - "Rrl", *rrd_alloclist, - RF_DAG_FLAGS_NONE, - RF_IO_NORMAL_PRIORITY); - - /* create and initialize PDA for the core log */ - /* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t - * *)); */ + *rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer, + rf_DiskReadFunc, rf_DiskReadUndoFunc, "Rrl", *rrd_alloclist, + RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY); + + /* Create and initialize PDA for the core log. */ + /* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t), + * (RF_PhysDiskAddr_t *)); */ *rrd_pda = rf_AllocPDAList(1); - rf_MapLogParityLogging(raidPtr, regionID, 0, &((*rrd_pda)->row), - &((*rrd_pda)->col), &((*rrd_pda)->startSector)); + rf_MapLogParityLogging(raidPtr, regionID, 0, &((*rrd_pda)->row), + &((*rrd_pda)->col), &((*rrd_pda)->startSector)); (*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity; if ((*rrd_pda)->next) { (*rrd_pda)->next = NULL; printf("set rrd_pda->next to NULL\n"); } - /* initialize DAG parameters */ - RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); + /* Initialize DAG parameters. */ + RF_Malloc(tracerec, sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t)); (*rrd_dag_h)->tracerec = tracerec; rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0]; rrd_rdNode->params[0].p = *rrd_pda; -/* rrd_rdNode->params[1] = regionBuffer; */ + /* rrd_rdNode->params[1] = regionBuffer; */ rrd_rdNode->params[2].v = 0; - rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); - /* launch region log read dag */ + /* Launch region log read dag. */ rf_DispatchDAG(*rrd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, (void *) rrd_mcpair); } - -static void -WriteCoreLog( - RF_ParityLog_t * log, - RF_MCPair_t * fwr_mcpair, - RF_Raid_t * raidPtr, - RF_DagHeader_t ** fwr_dag_h, - RF_AllocListElem_t ** fwr_alloclist, - RF_PhysDiskAddr_t ** fwr_pda) +void +rf_WriteCoreLog(RF_ParityLog_t *log, RF_MCPair_t *fwr_mcpair, + RF_Raid_t *raidPtr, RF_DagHeader_t **fwr_dag_h, + RF_AllocListElem_t **fwr_alloclist, RF_PhysDiskAddr_t **fwr_pda) { RF_RegionId_t regionID = log->regionID; RF_AccTraceEntry_t *tracerec; RF_SectorNum_t regionOffset; RF_DagNode_t *fwr_wrNode; - /* Initiate the write of a core log to a region log disk. Once + /* + * Initiate the write of a core log to a region log disk. Once * initiated, return to the calling routine. - * - * NON-BLOCKING */ + * + * NON-BLOCKING + */ - /* create DAG to write a core log to a region log disk */ + /* Create DAG to write a core log to a region log disk. */ rf_MakeAllocList(*fwr_alloclist); - *fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr, - rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - "Wcl", *fwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY); + *fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr, + rf_DiskWriteFunc, rf_DiskWriteUndoFunc, "Wcl", *fwr_alloclist, + RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY); - /* create and initialize PDA for the region log */ - /* RF_Malloc(*fwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t - * *)); */ + /* Create and initialize PDA for the region log. */ + /* RF_Malloc(*fwr_pda, sizeof(RF_PhysDiskAddr_t), + * (RF_PhysDiskAddr_t *)); */ *fwr_pda = rf_AllocPDAList(1); regionOffset = log->diskOffset; - rf_MapLogParityLogging(raidPtr, regionID, regionOffset, - &((*fwr_pda)->row), &((*fwr_pda)->col), - &((*fwr_pda)->startSector)); + rf_MapLogParityLogging(raidPtr, regionID, regionOffset, + &((*fwr_pda)->row), &((*fwr_pda)->col), &((*fwr_pda)->startSector)); (*fwr_pda)->numSector = raidPtr->numSectorsPerLog; - /* initialize DAG parameters */ - RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); + /* Initialize DAG parameters. */ + RF_Malloc(tracerec, sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t)); (*fwr_dag_h)->tracerec = tracerec; fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0]; fwr_wrNode->params[0].p = *fwr_pda; -/* fwr_wrNode->params[1] = log->bufPtr; */ + /* fwr_wrNode->params[1] = log->bufPtr; */ fwr_wrNode->params[2].v = 0; - fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, 0, 0); + fwr_wrNode->params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); - /* launch the dag to write the core log to disk */ + /* Launch the dag to write the core log to disk. */ rf_DispatchDAG(*fwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, (void *) fwr_mcpair); } -static void -ReadRegionParity( - RF_RegionId_t regionID, - RF_MCPair_t * prd_mcpair, - caddr_t parityBuffer, - RF_Raid_t * raidPtr, - RF_DagHeader_t ** prd_dag_h, - RF_AllocListElem_t ** prd_alloclist, - RF_PhysDiskAddr_t ** prd_pda) +void +rf_ReadRegionParity(RF_RegionId_t regionID, RF_MCPair_t *prd_mcpair, + caddr_t parityBuffer, RF_Raid_t *raidPtr, RF_DagHeader_t **prd_dag_h, + RF_AllocListElem_t **prd_alloclist, RF_PhysDiskAddr_t **prd_pda) { - /* Initiate the read region parity from disk. Once initiated, return + /* + * Initiate the read region parity from disk. Once initiated, return * to the calling routine. - * - * NON-BLOCKING */ + * + * NON-BLOCKING + */ RF_AccTraceEntry_t *tracerec; RF_DagNode_t *prd_rdNode; - /* create DAG to read region parity from disk */ + /* Create DAG to read region parity from disk. */ rf_MakeAllocList(*prd_alloclist); - *prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc, - rf_DiskReadUndoFunc, "Rrp", - *prd_alloclist, RF_DAG_FLAGS_NONE, - RF_IO_NORMAL_PRIORITY); - - /* create and initialize PDA for region parity */ - /* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t - * *)); */ + *prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc, + rf_DiskReadUndoFunc, "Rrp", *prd_alloclist, RF_DAG_FLAGS_NONE, + RF_IO_NORMAL_PRIORITY); + + /* Create and initialize PDA for region parity. */ + /* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t), + * (RF_PhysDiskAddr_t *)); */ *prd_pda = rf_AllocPDAList(1); - rf_MapRegionParity(raidPtr, regionID, &((*prd_pda)->row), - &((*prd_pda)->col), &((*prd_pda)->startSector), - &((*prd_pda)->numSector)); + rf_MapRegionParity(raidPtr, regionID, &((*prd_pda)->row), + &((*prd_pda)->col), &((*prd_pda)->startSector), + &((*prd_pda)->numSector)); if (rf_parityLogDebug) printf("[reading %d sectors of parity from region %d]\n", (int) (*prd_pda)->numSector, regionID); @@ -249,86 +253,82 @@ ReadRegionParity( (*prd_pda)->next = NULL; printf("set prd_pda->next to NULL\n"); } - /* initialize DAG parameters */ - RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); + /* Initialize DAG parameters. */ + RF_Malloc(tracerec, sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t)); (*prd_dag_h)->tracerec = tracerec; prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0]; prd_rdNode->params[0].p = *prd_pda; prd_rdNode->params[1].p = parityBuffer; prd_rdNode->params[2].v = 0; - prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, 0, 0); + prd_rdNode->params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); if (rf_validateDAGDebug) rf_ValidateDAG(*prd_dag_h); - /* launch region parity read dag */ + /* Launch region parity read dag. */ rf_DispatchDAG(*prd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, (void *) prd_mcpair); } -static void -WriteRegionParity( - RF_RegionId_t regionID, - RF_MCPair_t * pwr_mcpair, - caddr_t parityBuffer, - RF_Raid_t * raidPtr, - RF_DagHeader_t ** pwr_dag_h, - RF_AllocListElem_t ** pwr_alloclist, - RF_PhysDiskAddr_t ** pwr_pda) +void +rf_WriteRegionParity(RF_RegionId_t regionID, RF_MCPair_t *pwr_mcpair, + caddr_t parityBuffer, RF_Raid_t *raidPtr, RF_DagHeader_t **pwr_dag_h, + RF_AllocListElem_t **pwr_alloclist, RF_PhysDiskAddr_t **pwr_pda) { - /* Initiate the write of region parity to disk. Once initiated, return + /* + * Initiate the write of region parity to disk. Once initiated, return * to the calling routine. - * - * NON-BLOCKING */ + * + * NON-BLOCKING + */ RF_AccTraceEntry_t *tracerec; RF_DagNode_t *pwr_wrNode; - /* create DAG to write region log from disk */ + /* Create DAG to write region log from disk. */ rf_MakeAllocList(*pwr_alloclist); - *pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer, - rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - "Wrp", *pwr_alloclist, - RF_DAG_FLAGS_NONE, - RF_IO_NORMAL_PRIORITY); - - /* create and initialize PDA for region parity */ - /* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t - * *)); */ + *pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer, + rf_DiskWriteFunc, rf_DiskWriteUndoFunc, "Wrp", *pwr_alloclist, + RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY); + + /* Create and initialize PDA for region parity. */ + /* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t), + * (RF_PhysDiskAddr_t *)); */ *pwr_pda = rf_AllocPDAList(1); - rf_MapRegionParity(raidPtr, regionID, &((*pwr_pda)->row), - &((*pwr_pda)->col), &((*pwr_pda)->startSector), - &((*pwr_pda)->numSector)); + rf_MapRegionParity(raidPtr, regionID, &((*pwr_pda)->row), + &((*pwr_pda)->col), &((*pwr_pda)->startSector), + &((*pwr_pda)->numSector)); - /* initialize DAG parameters */ - RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); + /* Initialize DAG parameters. */ + RF_Malloc(tracerec, sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t)); (*pwr_dag_h)->tracerec = tracerec; pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0]; pwr_wrNode->params[0].p = *pwr_pda; -/* pwr_wrNode->params[1] = parityBuffer; */ + /* pwr_wrNode->params[1] = parityBuffer; */ pwr_wrNode->params[2].v = 0; - pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, - 0, 0, 0); + pwr_wrNode->params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); - /* launch the dag to write region parity to disk */ + /* Launch the dag to write region parity to disk. */ rf_DispatchDAG(*pwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, (void *) pwr_mcpair); } -static void -FlushLogsToDisk( - RF_Raid_t * raidPtr, - RF_ParityLog_t * logList) +void +rf_FlushLogsToDisk(RF_Raid_t *raidPtr, RF_ParityLog_t *logList) { - /* Flush a linked list of core logs to the log disk. Logs contain the - * disk location where they should be written. Logs were written in + /* + * Flush a linked list of core logs to the log disk. Logs contain the + * disk location where they should be written. Logs were written in * FIFO order and that order must be preserved. - * - * Recommended optimizations: 1) allow multiple flushes to occur - * simultaneously 2) coalesce contiguous flush operations - * - * BLOCKING */ + * + * Recommended optimizations: + * 1) Allow multiple flushes to occur simultaneously. + * 2) Coalesce contiguous flush operations. + * + * BLOCKING + */ RF_ParityLog_t *log; RF_RegionId_t regionID; @@ -345,18 +345,20 @@ FlushLogsToDisk( while (log) { regionID = log->regionID; - /* create and launch a DAG to write the core log */ + /* Create and launch a DAG to write the core log. */ if (rf_parityLogDebug) - printf("[initiating write of core log for region %d]\n", regionID); + printf("[initiating write of core log for region" + " %d]\n", regionID); fwr_mcpair->flag = RF_FALSE; - WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h, - &fwr_alloclist, &fwr_pda); + rf_WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h, + &fwr_alloclist, &fwr_pda); - /* wait for the DAG to complete */ + /* Wait for the DAG to complete. */ while (!fwr_mcpair->flag) RF_WAIT_COND(fwr_mcpair->cond, fwr_mcpair->mutex); if (fwr_dag_h->status != rf_enable) { - RF_ERRORMSG1("Unable to write core log to disk (region %d)\n", regionID); + RF_ERRORMSG1("Unable to write core log to disk" + " (region %d)\n", regionID); RF_ASSERT(0); } /* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */ @@ -371,11 +373,9 @@ FlushLogsToDisk( rf_ReleaseParityLogs(raidPtr, logList); } -static void -ReintegrateRegion( - RF_Raid_t * raidPtr, - RF_RegionId_t regionID, - RF_ParityLog_t * coreLog) +void +rf_ReintegrateRegion(RF_Raid_t *raidPtr, RF_RegionId_t regionID, + RF_ParityLog_t *coreLog) { RF_MCPair_t *rrd_mcpair = NULL, *prd_mcpair, *pwr_mcpair; RF_DagHeader_t *rrd_dag_h, *prd_dag_h, *pwr_dag_h; @@ -383,101 +383,104 @@ ReintegrateRegion( RF_PhysDiskAddr_t *rrd_pda, *prd_pda, *pwr_pda; caddr_t parityBuffer, regionBuffer = NULL; - /* Reintegrate a region (regionID). + /* + * Reintegrate a region (regionID). * - * 1. acquire region and parity buffers - * 2. read log from disk - * 3. read parity from disk - * 4. apply log to parity - * 5. apply core log to parity - * 6. write new parity to disk - * - * BLOCKING */ + * 1. Acquire region and parity buffers. + * 2. Read log from disk. + * 3. Read parity from disk. + * 4. Apply log to parity. + * 5. Apply core log to parity. + * 6. Write new parity to disk. + * + * BLOCKING + */ if (rf_parityLogDebug) printf("[reintegrating region %d]\n", regionID); - /* initiate read of region parity */ + /* Initiate read of region parity. */ if (rf_parityLogDebug) - printf("[initiating read of parity for region %d]\n",regionID); - parityBuffer = AcquireReintBuffer(&raidPtr->parityBufferPool); + printf("[initiating read of parity for region %d]\n", regionID); + parityBuffer = rf_AcquireReintBuffer(&raidPtr->parityBufferPool); prd_mcpair = rf_AllocMCPair(); RF_LOCK_MUTEX(prd_mcpair->mutex); prd_mcpair->flag = RF_FALSE; - ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr, + rf_ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr, &prd_dag_h, &prd_alloclist, &prd_pda); - /* if region log nonempty, initiate read */ + /* If region log nonempty, initiate read. */ if (raidPtr->regionInfo[regionID].diskCount > 0) { if (rf_parityLogDebug) printf("[initiating read of disk log for region %d]\n", - regionID); - regionBuffer = AcquireReintBuffer(&raidPtr->regionBufferPool); + regionID); + regionBuffer = + rf_AcquireReintBuffer(&raidPtr->regionBufferPool); rrd_mcpair = rf_AllocMCPair(); RF_LOCK_MUTEX(rrd_mcpair->mutex); rrd_mcpair->flag = RF_FALSE; - ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr, - &rrd_dag_h, &rrd_alloclist, &rrd_pda); + rf_ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr, + &rrd_dag_h, &rrd_alloclist, &rrd_pda); } - /* wait on read of region parity to complete */ + /* Wait on read of region parity to complete. */ while (!prd_mcpair->flag) { RF_WAIT_COND(prd_mcpair->cond, prd_mcpair->mutex); } RF_UNLOCK_MUTEX(prd_mcpair->mutex); if (prd_dag_h->status != rf_enable) { RF_ERRORMSG("Unable to read parity from disk\n"); - /* add code to fail the parity disk */ + /* Add code to fail the parity disk. */ RF_ASSERT(0); } - /* apply core log to parity */ + /* Apply core log to parity. */ /* if (coreLog) ApplyLogsToParity(coreLog, parityBuffer); */ if (raidPtr->regionInfo[regionID].diskCount > 0) { - /* wait on read of region log to complete */ + /* Wait on read of region log to complete. */ while (!rrd_mcpair->flag) RF_WAIT_COND(rrd_mcpair->cond, rrd_mcpair->mutex); RF_UNLOCK_MUTEX(rrd_mcpair->mutex); if (rrd_dag_h->status != rf_enable) { RF_ERRORMSG("Unable to read region log from disk\n"); - /* add code to fail the log disk */ + /* Add code to fail the log disk. */ RF_ASSERT(0); } - /* apply region log to parity */ + /* Apply region log to parity. */ /* ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */ - /* release resources associated with region log */ + /* Release resources associated with region log. */ /* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */ rf_FreePhysDiskAddr(rrd_pda); rf_FreeDAG(rrd_dag_h); rf_FreeAllocList(rrd_alloclist); rf_FreeMCPair(rrd_mcpair); - ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer); + rf_ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer); } - /* write reintegrated parity to disk */ + /* Write reintegrated parity to disk. */ if (rf_parityLogDebug) printf("[initiating write of parity for region %d]\n", - regionID); + regionID); pwr_mcpair = rf_AllocMCPair(); RF_LOCK_MUTEX(pwr_mcpair->mutex); pwr_mcpair->flag = RF_FALSE; - WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr, - &pwr_dag_h, &pwr_alloclist, &pwr_pda); + rf_WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr, + &pwr_dag_h, &pwr_alloclist, &pwr_pda); while (!pwr_mcpair->flag) RF_WAIT_COND(pwr_mcpair->cond, pwr_mcpair->mutex); RF_UNLOCK_MUTEX(pwr_mcpair->mutex); if (pwr_dag_h->status != rf_enable) { RF_ERRORMSG("Unable to write parity to disk\n"); - /* add code to fail the parity disk */ + /* Add code to fail the parity disk. */ RF_ASSERT(0); } - /* release resources associated with read of old parity */ + /* Release resources associated with read of old parity. */ /* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */ rf_FreePhysDiskAddr(prd_pda); rf_FreeDAG(prd_dag_h); rf_FreeAllocList(prd_alloclist); rf_FreeMCPair(prd_mcpair); - /* release resources associated with write of new parity */ - ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer); + /* Release resources associated with write of new parity. */ + rf_ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer); /* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */ rf_FreePhysDiskAddr(pwr_pda); rf_FreeDAG(pwr_dag_h); @@ -489,11 +492,8 @@ ReintegrateRegion( } - -static void -ReintegrateLogs( - RF_Raid_t * raidPtr, - RF_ParityLog_t * logList) +void +rf_ReintegrateLogs(RF_Raid_t *raidPtr, RF_ParityLog_t *logList) { RF_ParityLog_t *log, *freeLogList = NULL; RF_ParityLogData_t *logData, *logDataList; @@ -505,47 +505,55 @@ ReintegrateLogs( logList = logList->next; log->next = NULL; regionID = log->regionID; - ReintegrateRegion(raidPtr, regionID, log); + rf_ReintegrateRegion(raidPtr, regionID, log); log->numRecords = 0; - /* remove all items which are blocked on reintegration of this - * region */ + /* + * Remove all items which are blocked on reintegration of this + * region. + */ RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID, - &raidPtr->parityLogDiskQueue.reintBlockHead, - &raidPtr->parityLogDiskQueue.reintBlockTail, - RF_TRUE); + logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID, + &raidPtr->parityLogDiskQueue.reintBlockHead, + &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE); logDataList = logData; while (logData) { - logData->next = rf_SearchAndDequeueParityLogData( - raidPtr, regionID, - &raidPtr->parityLogDiskQueue.reintBlockHead, - &raidPtr->parityLogDiskQueue.reintBlockTail, - RF_TRUE); + logData->next = + rf_SearchAndDequeueParityLogData(raidPtr, regionID, + &raidPtr->parityLogDiskQueue.reintBlockHead, + &raidPtr->parityLogDiskQueue.reintBlockTail, + RF_TRUE); logData = logData->next; } RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - /* process blocked log data and clear reintInProgress flag for - * this region */ + /* + * Process blocked log data and clear reintInProgress flag for + * this region. + */ if (logDataList) rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE); else { - /* Enable flushing for this region. Holding both + /* + * Enable flushing for this region. Holding both * locks provides a synchronization barrier with - * DumpParityLogToDisk */ + * DumpParityLogToDisk. + */ RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); raidPtr->regionInfo[regionID].diskCount = 0; - raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; + raidPtr->regionInfo[regionID].reintInProgress = + RF_FALSE; RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now - * enabled */ + RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID] + .reintMutex); /* Flushing is now enabled. */ RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); } - /* if log wasn't used, attach it to the list of logs to be - * returned */ + /* + * If log wasn't used, attach it to the list of logs to be + * returned. + */ if (log) { log->next = freeLogList; freeLogList = log; @@ -555,11 +563,14 @@ ReintegrateLogs( rf_ReleaseParityLogs(raidPtr, freeLogList); } -int -rf_ShutdownLogging(RF_Raid_t * raidPtr) +int +rf_ShutdownLogging(RF_Raid_t *raidPtr) { - /* shutdown parity logging 1) disable parity logging in all regions 2) - * reintegrate all regions */ + /* + * Shutdown parity logging: + * 1) Disable parity logging in all regions. + * 2) Reintegrate all regions. + */ RF_SectorCount_t diskCount; RF_RegionId_t regionID; @@ -567,57 +578,60 @@ rf_ShutdownLogging(RF_Raid_t * raidPtr) if (rf_parityLogDebug) printf("[shutting down parity logging]\n"); - /* Since parity log maps are volatile, we must reintegrate all - * regions. */ + /* + * Since parity log maps are volatile, we must reintegrate all + * regions. + */ if (rf_forceParityLogReint) { for (regionID = 0; regionID < rf_numParityRegions; regionID++) { RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - raidPtr->regionInfo[regionID].loggingEnabled = - RF_FALSE; + raidPtr->regionInfo[regionID].loggingEnabled = RF_FALSE; log = raidPtr->regionInfo[regionID].coreLog; raidPtr->regionInfo[regionID].coreLog = NULL; diskCount = raidPtr->regionInfo[regionID].diskCount; RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); if (diskCount > 0 || log != NULL) - ReintegrateRegion(raidPtr, regionID, log); + rf_ReintegrateRegion(raidPtr, regionID, log); if (log != NULL) rf_ReleaseParityLogs(raidPtr, log); } } if (rf_parityLogDebug) { printf("[parity logging disabled]\n"); - printf("[should be done!]\n"); + printf("[should be done !]\n"); } return (0); } -int -rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr) +int +rf_ParityLoggingDiskManager(RF_Raid_t *raidPtr) { RF_ParityLog_t *reintQueue, *flushQueue; - int workNeeded, done = RF_FALSE; + int workNeeded, done = RF_FALSE; int s; - /* Main program for parity logging disk thread. This routine waits + /* + * Main program for parity logging disk thread. This routine waits * for work to appear in either the flush or reintegration queues and * is responsible for flushing core logs to the log disk as well as * reintegrating parity regions. - * - * BLOCKING */ + * + * BLOCKING + */ s = splbio(); RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); /* - * Inform our creator that we're running. Don't bother doing the - * mutex lock/unlock dance- we locked above, and we'll unlock - * below with nothing to do, yet. - */ + * Inform our creator that we're running. Don't bother doing the + * mutex lock/unlock dance: we locked above, and we'll unlock + * below with nothing to do, yet. + */ raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING; RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); - /* empty the work queues */ + /* Empty the work queues. */ flushQueue = raidPtr->parityLogDiskQueue.flushQueue; raidPtr->parityLogDiskQueue.flushQueue = NULL; reintQueue = raidPtr->parityLogDiskQueue.reintQueue; @@ -626,30 +640,36 @@ rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr) while (!done) { while (workNeeded) { - /* First, flush all logs in the flush queue, freeing - * buffers Second, reintegrate all regions which are + /* + * First, flush all logs in the flush queue, freeing + * buffers. Second, reintegrate all regions that are * reported as full. Third, append queued log data * until blocked. - * - * Note: Incoming appends (ParityLogAppend) can block on - * either 1. empty buffer pool 2. region under - * reintegration To preserve a global FIFO ordering of + * + * Note: Incoming appends (ParityLogAppend) can block + * on either 1. empty buffer pool 2. region under + * reintegration. To preserve a global FIFO ordering of * appends, buffers are not released to the world * until those appends blocked on buffers are removed - * from the append queue. Similarly, regions which - * are reintegrated are not opened for general use - * until the append queue has been emptied. */ + * from the append queue. Similarly, regions that are + * reintegrated are not opened for general use until + * the append queue has been emptied. + */ RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - /* empty flushQueue, using free'd log buffers to - * process bufTail */ + /* + * Empty flushQueue, using free'd log buffers to + * process bufTail. + */ if (flushQueue) - FlushLogsToDisk(raidPtr, flushQueue); + rf_FlushLogsToDisk(raidPtr, flushQueue); - /* empty reintQueue, flushing from reintTail as we go */ + /* + * Empty reintQueue, flushing from reintTail as we go. + */ if (reintQueue) - ReintegrateLogs(raidPtr, reintQueue); + rf_ReintegrateLogs(raidPtr, reintQueue); RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); flushQueue = raidPtr->parityLogDiskQueue.flushQueue; @@ -658,22 +678,27 @@ rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr) raidPtr->parityLogDiskQueue.reintQueue = NULL; workNeeded = (flushQueue || reintQueue); } - /* no work is needed at this point */ + /* No work is needed at this point. */ if (raidPtr->parityLogDiskQueue.threadState & RF_PLOG_TERMINATE) { - /* shutdown parity logging 1. disable parity logging - * in all regions 2. reintegrate all regions */ - done = RF_TRUE; /* thread disabled, no work needed */ + /* + * Shutdown parity logging: + * 1. Disable parity logging in all regions. + * 2. Reintegrate all regions. + */ + done = RF_TRUE; /* Thread disabled, no work needed. */ RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); rf_ShutdownLogging(raidPtr); } if (!done) { - /* thread enabled, no work needed, so sleep */ + /* Thread enabled, no work needed, so sleep. */ if (rf_parityLogDebug) - printf("[parity logging disk manager sleeping]\n"); - RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, - raidPtr->parityLogDiskQueue.mutex); + printf("[parity logging disk manager" + " sleeping]\n"); + RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, + raidPtr->parityLogDiskQueue.mutex); if (rf_parityLogDebug) - printf("[parity logging disk manager just woke up]\n"); + printf("[parity logging disk manager just" + " woke up]\n"); flushQueue = raidPtr->parityLogDiskQueue.flushQueue; raidPtr->parityLogDiskQueue.flushQueue = NULL; reintQueue = raidPtr->parityLogDiskQueue.reintQueue; @@ -682,8 +707,8 @@ rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr) } } /* - * Announce that we're done. - */ + * Announce that we're done. + */ RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN; RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); @@ -692,9 +717,9 @@ rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr) splx(s); /* - * In the Net- & OpenBSD kernel, the thread must exit; returning would - * cause the proc trampoline to attempt to return to userspace. - */ + * In the Net- & OpenBSD kernel, the thread must exit; returning would + * cause the proc trampoline to attempt to return to userspace. + */ kthread_exit(0); /* does not return */ } -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ +#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ diff --git a/sys/dev/raidframe/rf_paritylogDiskMgr.h b/sys/dev/raidframe/rf_paritylogDiskMgr.h index 96e0ac7485f..cc8a01bd08e 100644 --- a/sys/dev/raidframe/rf_paritylogDiskMgr.h +++ b/sys/dev/raidframe/rf_paritylogDiskMgr.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_paritylogDiskMgr.h,v 1.2 1999/02/16 00:03:06 niklas Exp $ */ +/* $OpenBSD: rf_paritylogDiskMgr.h,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_paritylogDiskMgr.h,v 1.3 1999/02/05 00:06:14 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,16 +28,16 @@ * rights to redistribute these changes. */ -/* header file for parity log disk mgr code - * +/* + * Header file for parity log disk mgr code. */ -#ifndef _RF__RF_PARITYLOGDISKMGR_H_ -#define _RF__RF_PARITYLOGDISKMGR_H_ +#ifndef _RF__RF_PARITYLOGDISKMGR_H_ +#define _RF__RF_PARITYLOGDISKMGR_H_ #include "rf_types.h" -int rf_ShutdownLogging(RF_Raid_t * raidPtr); -int rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr); +int rf_ShutdownLogging(RF_Raid_t *); +int rf_ParityLoggingDiskManager(RF_Raid_t *); -#endif /* !_RF__RF_PARITYLOGDISKMGR_H_ */ +#endif /* !_RF__RF_PARITYLOGDISKMGR_H_ */ diff --git a/sys/dev/raidframe/rf_paritylogging.c b/sys/dev/raidframe/rf_paritylogging.c index c0a04bbd062..bc730bb6001 100644 --- a/sys/dev/raidframe/rf_paritylogging.c +++ b/sys/dev/raidframe/rf_paritylogging.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_paritylogging.c,v 1.5 2000/08/08 16:07:44 peter Exp $ */ +/* $OpenBSD: rf_paritylogging.c,v 1.6 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_paritylogging.c,v 1.10 2000/02/12 16:06:27 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,12 +30,12 @@ /* - parity logging configuration, dag selection, and mapping is implemented here + * Parity logging configuration, dag selection, and mapping is implemented here. */ #include "rf_archs.h" -#if RF_INCLUDE_PARITYLOGGING > 0 +#if RF_INCLUDE_PARITYLOGGING > 0 #include "rf_types.h" #include "rf_raid.h" @@ -55,25 +56,28 @@ #include "rf_shutdown.h" typedef struct RF_ParityLoggingConfigInfo_s { - RF_RowCol_t **stripeIdentifier; /* filled in at config time & used by - * IdentifyStripe */ -} RF_ParityLoggingConfigInfo_t; - -static void FreeRegionInfo(RF_Raid_t * raidPtr, RF_RegionId_t regionID); -static void rf_ShutdownParityLogging(RF_ThreadArg_t arg); -static void rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t arg); -static void rf_ShutdownParityLoggingPool(RF_ThreadArg_t arg); -static void rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t arg); -static void rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t arg); -static void rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t arg); - -int -rf_ConfigureParityLogging( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) + RF_RowCol_t **stripeIdentifier; /* + * Filled in at config time & used by + * IdentifyStripe. + */ +} RF_ParityLoggingConfigInfo_t; + +void rf_FreeRegionInfo(RF_Raid_t *, RF_RegionId_t); +void rf_FreeParityLogQueue(RF_Raid_t *, RF_ParityLogQueue_t *); +void rf_FreeRegionBufferQueue(RF_RegionBufferQueue_t *); +void rf_ShutdownParityLogging(RF_ThreadArg_t); +void rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t); +void rf_ShutdownParityLoggingPool(RF_ThreadArg_t); +void rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t); +void rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t); +void rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t); + + +int +rf_ConfigureParityLogging(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, + RF_Config_t *cfgPtr) { - int i, j, startdisk, rc; + int i, j, startdisk, rc; RF_SectorCount_t totalLogCapacity, fragmentation, lastRegionCapacity; RF_SectorCount_t parityBufferCapacity, maxRegionParityRange; RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; @@ -85,17 +89,17 @@ rf_ConfigureParityLogging( return(EINVAL); /* - * We create multiple entries on the shutdown list here, since - * this configuration routine is fairly complicated in and of - * itself, and this makes backing out of a failed configuration - * much simpler. - */ + * We create multiple entries on the shutdown list here, since + * this configuration routine is fairly complicated in and of + * itself, and this makes backing out of a failed configuration + * much simpler. + */ raidPtr->numSectorsPerLog = RF_DEFAULT_NUM_SECTORS_PER_LOG; - /* create a parity logging configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_ParityLoggingConfigInfo_t), - (RF_ParityLoggingConfigInfo_t *), + /* Create a parity logging configuration structure. */ + RF_MallocAndAdd(info, sizeof(RF_ParityLoggingConfigInfo_t), + (RF_ParityLoggingConfigInfo_t *), raidPtr->cleanupList); if (info == NULL) return (ENOMEM); @@ -103,10 +107,12 @@ rf_ConfigureParityLogging( RF_ASSERT(raidPtr->numRow == 1); - /* the stripe identifier must identify the disks in each stripe, IN - * THE ORDER THAT THEY APPEAR IN THE STRIPE. */ - info->stripeIdentifier = rf_make_2d_array((raidPtr->numCol), - (raidPtr->numCol), + /* + * The stripe identifier must identify the disks in each stripe, IN + * THE ORDER THAT THEY APPEAR IN THE STRIPE. + */ + info->stripeIdentifier = rf_make_2d_array((raidPtr->numCol), + (raidPtr->numCol), raidPtr->cleanupList); if (info->stripeIdentifier == NULL) return (ENOMEM); @@ -114,161 +120,168 @@ rf_ConfigureParityLogging( startdisk = 0; for (i = 0; i < (raidPtr->numCol); i++) { for (j = 0; j < (raidPtr->numCol); j++) { - info->stripeIdentifier[i][j] = (startdisk + j) % - (raidPtr->numCol - 1); + info->stripeIdentifier[i][j] = (startdisk + j) % + (raidPtr->numCol - 1); } if ((--startdisk) < 0) startdisk = raidPtr->numCol - 1 - 1; } - /* fill in the remaining layout parameters */ + /* Fill in the remaining layout parameters. */ layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << - raidPtr->logBytesPerSector; + layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << + raidPtr->logBytesPerSector; layoutPtr->numParityCol = 1; layoutPtr->numParityLogCol = 1; - layoutPtr->numDataCol = raidPtr->numCol - layoutPtr->numParityCol - - layoutPtr->numParityLogCol; - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * - layoutPtr->sectorsPerStripeUnit; + layoutPtr->numDataCol = raidPtr->numCol - layoutPtr->numParityCol - + layoutPtr->numParityLogCol; + layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * + layoutPtr->sectorsPerStripeUnit; layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk; - raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * - layoutPtr->sectorsPerStripeUnit; - - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * - layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; - - /* configure parity log parameters - * - * parameter comment/constraints - * ------------------------------------------- - * numParityRegions* all regions (except possibly last) - * of equal size - * totalInCoreLogCapacity* amount of memory in bytes available - * for in-core logs (default 1 MB) - * numSectorsPerLog# capacity of an in-core log in sectors - * (1 * disk track) - * numParityLogs total number of in-core logs, - * should be at least numParityRegions - * regionLogCapacity size of a region log (except possibly - * last one) in sectors - * totalLogCapacity total amount of log space in sectors - * - * where '*' denotes a user settable parameter. - * Note that logs are fixed to be the size of a disk track, - * value #defined in rf_paritylog.h - * + raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * + layoutPtr->sectorsPerStripeUnit; + + raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * + layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; + + /* + * Configure parity log parameters. + * + * Parameter Comment/constraints + * ------------------------------------------------ + * numParityRegions* All regions (except possibly last) + * of equal size. + * totalInCoreLogCapacity* Amount of memory in bytes available + * for in-core logs (default 1 MB). + * numSectorsPerLog# Capacity of an in-core log in sectors + * (1 * disk track). + * numParityLogs Total number of in-core logs, + * should be at least numParityRegions. + * regionLogCapacity Size of a region log (except possibly + * last one) in sectors. + * totalLogCapacity Total amount of log space in sectors. + * + * Where '*' denotes a user settable parameter. + * Note that logs are fixed to be the size of a disk track, + * value #defined in rf_paritylog.h. + * */ - totalLogCapacity = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit * layoutPtr->numParityLogCol; + totalLogCapacity = layoutPtr->stripeUnitsPerDisk * + layoutPtr->sectorsPerStripeUnit * layoutPtr->numParityLogCol; raidPtr->regionLogCapacity = totalLogCapacity / rf_numParityRegions; if (rf_parityLogDebug) printf("bytes per sector %d\n", raidPtr->bytesPerSector); - /* reduce fragmentation within a disk region by adjusting the number + /* + * Reduce fragmentation within a disk region by adjusting the number * of regions in an attempt to allow an integral number of logs to fit - * into a disk region */ + * into a disk region. + */ fragmentation = raidPtr->regionLogCapacity % raidPtr->numSectorsPerLog; if (fragmentation > 0) for (i = 1; i < (raidPtr->numSectorsPerLog / 2); i++) { - if (((totalLogCapacity / (rf_numParityRegions + i)) % + if (((totalLogCapacity / (rf_numParityRegions + i)) % raidPtr->numSectorsPerLog) < fragmentation) { rf_numParityRegions++; raidPtr->regionLogCapacity = totalLogCapacity / - rf_numParityRegions; - fragmentation = raidPtr->regionLogCapacity % - raidPtr->numSectorsPerLog; + rf_numParityRegions; + fragmentation = raidPtr->regionLogCapacity % + raidPtr->numSectorsPerLog; } - if (((totalLogCapacity / (rf_numParityRegions - i)) % + if (((totalLogCapacity / (rf_numParityRegions - i)) % raidPtr->numSectorsPerLog) < fragmentation) { rf_numParityRegions--; raidPtr->regionLogCapacity = totalLogCapacity / - rf_numParityRegions; - fragmentation = raidPtr->regionLogCapacity % - raidPtr->numSectorsPerLog; + rf_numParityRegions; + fragmentation = raidPtr->regionLogCapacity % + raidPtr->numSectorsPerLog; } } - /* ensure integral number of regions per log */ - raidPtr->regionLogCapacity = (raidPtr->regionLogCapacity / - raidPtr->numSectorsPerLog) * - raidPtr->numSectorsPerLog; - - raidPtr->numParityLogs = rf_totalInCoreLogCapacity / - (raidPtr->bytesPerSector * raidPtr->numSectorsPerLog); - /* to avoid deadlock, must ensure that enough logs exist for each - * region to have one simultaneously */ + /* Ensure integral number of regions per log. */ + raidPtr->regionLogCapacity = (raidPtr->regionLogCapacity / + raidPtr->numSectorsPerLog) * raidPtr->numSectorsPerLog; + + raidPtr->numParityLogs = rf_totalInCoreLogCapacity / + (raidPtr->bytesPerSector * raidPtr->numSectorsPerLog); + /* + * To avoid deadlock, must ensure that enough logs exist for each + * region to have one simultaneously. + */ if (raidPtr->numParityLogs < rf_numParityRegions) raidPtr->numParityLogs = rf_numParityRegions; - /* create region information structs */ + /* Create region information structs. */ printf("Allocating %d bytes for in-core parity region info\n", (int) (rf_numParityRegions * sizeof(RF_RegionInfo_t))); - RF_Malloc(raidPtr->regionInfo, - (rf_numParityRegions * sizeof(RF_RegionInfo_t)), + RF_Malloc(raidPtr->regionInfo, + (rf_numParityRegions * sizeof(RF_RegionInfo_t)), (RF_RegionInfo_t *)); if (raidPtr->regionInfo == NULL) return (ENOMEM); - /* last region may not be full capacity */ + /* Last region may not be full capacity. */ lastRegionCapacity = raidPtr->regionLogCapacity; - while ((rf_numParityRegions - 1) * raidPtr->regionLogCapacity + + while ((rf_numParityRegions - 1) * raidPtr->regionLogCapacity + lastRegionCapacity > totalLogCapacity) - lastRegionCapacity = lastRegionCapacity - - raidPtr->numSectorsPerLog; + lastRegionCapacity = lastRegionCapacity - + raidPtr->numSectorsPerLog; - raidPtr->regionParityRange = raidPtr->sectorsPerDisk / - rf_numParityRegions; + raidPtr->regionParityRange = raidPtr->sectorsPerDisk / + rf_numParityRegions; maxRegionParityRange = raidPtr->regionParityRange; -/* i can't remember why this line is in the code -wvcii 6/30/95 */ -/* if (raidPtr->sectorsPerDisk % rf_numParityRegions > 0) - regionParityRange++; */ + /* I can't remember why this line is in the code -wvcii 6/30/95. */ + /* if (raidPtr->sectorsPerDisk % rf_numParityRegions > 0) + * regionParityRange++; */ - /* build pool of unused parity logs */ + /* Build pool of unused parity logs. */ printf("Allocating %d bytes for %d parity logs\n", - raidPtr->numParityLogs * raidPtr->numSectorsPerLog * + raidPtr->numParityLogs * raidPtr->numSectorsPerLog * raidPtr->bytesPerSector, raidPtr->numParityLogs); - RF_Malloc(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * - raidPtr->numSectorsPerLog * raidPtr->bytesPerSector, + RF_Malloc(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * + raidPtr->numSectorsPerLog * raidPtr->bytesPerSector, (caddr_t)); if (raidPtr->parityLogBufferHeap == NULL) return (ENOMEM); lHeapPtr = raidPtr->parityLogBufferHeap; rc = rf_mutex_init(&raidPtr->parityLogPool.mutex); if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, __LINE__, rc); - RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * + RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * raidPtr->numSectorsPerLog * raidPtr->bytesPerSector); return (ENOMEM); } for (i = 0; i < raidPtr->numParityLogs; i++) { if (i == 0) { - RF_Calloc(raidPtr->parityLogPool.parityLogs, 1, + RF_Calloc(raidPtr->parityLogPool.parityLogs, 1, sizeof(RF_ParityLog_t), (RF_ParityLog_t *)); if (raidPtr->parityLogPool.parityLogs == NULL) { - RF_Free(raidPtr->parityLogBufferHeap, - raidPtr->numParityLogs * - raidPtr->numSectorsPerLog * + RF_Free(raidPtr->parityLogBufferHeap, + raidPtr->numParityLogs * + raidPtr->numSectorsPerLog * raidPtr->bytesPerSector); return (ENOMEM); } l = raidPtr->parityLogPool.parityLogs; } else { - RF_Calloc(l->next, 1, sizeof(RF_ParityLog_t), + RF_Calloc(l->next, 1, sizeof(RF_ParityLog_t), (RF_ParityLog_t *)); if (l->next == NULL) { - RF_Free(raidPtr->parityLogBufferHeap, - raidPtr->numParityLogs * - raidPtr->numSectorsPerLog * + RF_Free(raidPtr->parityLogBufferHeap, + raidPtr->numParityLogs * + raidPtr->numSectorsPerLog * raidPtr->bytesPerSector); - for (l = raidPtr->parityLogPool.parityLogs; + for (l = raidPtr->parityLogPool.parityLogs; l; l = next) { next = l->next; if (l->records) - RF_Free(l->records, (raidPtr->numSectorsPerLog * sizeof(RF_ParityLogRecord_t))); + RF_Free(l->records, + (raidPtr->numSectorsPerLog * + sizeof(RF_ParityLogRecord_t))); RF_Free(l, sizeof(RF_ParityLog_t)); } return (ENOMEM); @@ -276,23 +289,23 @@ rf_ConfigureParityLogging( l = l->next; } l->bufPtr = lHeapPtr; - lHeapPtr += raidPtr->numSectorsPerLog * - raidPtr->bytesPerSector; - RF_Malloc(l->records, (raidPtr->numSectorsPerLog * - sizeof(RF_ParityLogRecord_t)), + lHeapPtr += raidPtr->numSectorsPerLog * + raidPtr->bytesPerSector; + RF_Malloc(l->records, (raidPtr->numSectorsPerLog * + sizeof(RF_ParityLogRecord_t)), (RF_ParityLogRecord_t *)); if (l->records == NULL) { - RF_Free(raidPtr->parityLogBufferHeap, - raidPtr->numParityLogs * - raidPtr->numSectorsPerLog * + RF_Free(raidPtr->parityLogBufferHeap, + raidPtr->numParityLogs * + raidPtr->numSectorsPerLog * raidPtr->bytesPerSector); - for (l = raidPtr->parityLogPool.parityLogs; - l; + for (l = raidPtr->parityLogPool.parityLogs; + l; l = next) { next = l->next; if (l->records) - RF_Free(l->records, - (raidPtr->numSectorsPerLog * + RF_Free(l->records, + (raidPtr->numSectorsPerLog * sizeof(RF_ParityLogRecord_t))); RF_Free(l, sizeof(RF_ParityLog_t)); } @@ -301,42 +314,42 @@ rf_ConfigureParityLogging( } rc = rf_ShutdownCreate(listp, rf_ShutdownParityLoggingPool, raidPtr); if (rc) { - RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + RF_ERRORMSG3("Unable to create shutdown entry file %s line %d" + " rc=%d\n", __FILE__, __LINE__, rc); rf_ShutdownParityLoggingPool(raidPtr); return (rc); } - /* build pool of region buffers */ + /* Build pool of region buffers. */ rc = rf_mutex_init(&raidPtr->regionBufferPool.mutex); if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, __LINE__, rc); return (ENOMEM); } rc = rf_cond_init(&raidPtr->regionBufferPool.cond); if (rc) { - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", + RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, __LINE__, rc); rf_mutex_destroy(&raidPtr->regionBufferPool.mutex); return (ENOMEM); } - raidPtr->regionBufferPool.bufferSize = raidPtr->regionLogCapacity * - raidPtr->bytesPerSector; - printf("regionBufferPool.bufferSize %d\n", + raidPtr->regionBufferPool.bufferSize = raidPtr->regionLogCapacity * + raidPtr->bytesPerSector; + printf("regionBufferPool.bufferSize %d\n", raidPtr->regionBufferPool.bufferSize); - /* for now, only one region at a time may be reintegrated */ - raidPtr->regionBufferPool.totalBuffers = 1; + /* For now, only one region at a time may be reintegrated. */ + raidPtr->regionBufferPool.totalBuffers = 1; - raidPtr->regionBufferPool.availableBuffers = - raidPtr->regionBufferPool.totalBuffers; + raidPtr->regionBufferPool.availableBuffers = + raidPtr->regionBufferPool.totalBuffers; raidPtr->regionBufferPool.availBuffersIndex = 0; raidPtr->regionBufferPool.emptyBuffersIndex = 0; printf("Allocating %d bytes for regionBufferPool\n", - (int) (raidPtr->regionBufferPool.totalBuffers * + (int) (raidPtr->regionBufferPool.totalBuffers * sizeof(caddr_t))); - RF_Malloc(raidPtr->regionBufferPool.buffers, - raidPtr->regionBufferPool.totalBuffers * sizeof(caddr_t), + RF_Malloc(raidPtr->regionBufferPool.buffers, + raidPtr->regionBufferPool.totalBuffers * sizeof(caddr_t), (caddr_t *)); if (raidPtr->regionBufferPool.buffers == NULL) { rf_mutex_destroy(&raidPtr->regionBufferPool.mutex); @@ -345,69 +358,69 @@ rf_ConfigureParityLogging( } for (i = 0; i < raidPtr->regionBufferPool.totalBuffers; i++) { printf("Allocating %d bytes for regionBufferPool#%d\n", - (int) (raidPtr->regionBufferPool.bufferSize * + (int) (raidPtr->regionBufferPool.bufferSize * sizeof(char)), i); - RF_Malloc(raidPtr->regionBufferPool.buffers[i], + RF_Malloc(raidPtr->regionBufferPool.buffers[i], raidPtr->regionBufferPool.bufferSize * sizeof(char), (caddr_t)); if (raidPtr->regionBufferPool.buffers[i] == NULL) { rf_mutex_destroy(&raidPtr->regionBufferPool.mutex); rf_cond_destroy(&raidPtr->regionBufferPool.cond); for (j = 0; j < i; j++) { - RF_Free(raidPtr->regionBufferPool.buffers[i], + RF_Free(raidPtr->regionBufferPool.buffers[i], raidPtr->regionBufferPool.bufferSize * sizeof(char)); } - RF_Free(raidPtr->regionBufferPool.buffers, - raidPtr->regionBufferPool.totalBuffers * + RF_Free(raidPtr->regionBufferPool.buffers, + raidPtr->regionBufferPool.totalBuffers * sizeof(caddr_t)); return (ENOMEM); } printf("raidPtr->regionBufferPool.buffers[%d] = %lx\n", i, (long) raidPtr->regionBufferPool.buffers[i]); } - rc = rf_ShutdownCreate(listp, + rc = rf_ShutdownCreate(listp, rf_ShutdownParityLoggingRegionBufferPool, raidPtr); if (rc) { - RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + RF_ERRORMSG3("Unable to create shutdown entry file %s line %d" + " rc=%d\n", __FILE__, __LINE__, rc); rf_ShutdownParityLoggingRegionBufferPool(raidPtr); return (rc); } - /* build pool of parity buffers */ + /* Build pool of parity buffers. */ parityBufferCapacity = maxRegionParityRange; rc = rf_mutex_init(&raidPtr->parityBufferPool.mutex); if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, __LINE__, rc); return (rc); } rc = rf_cond_init(&raidPtr->parityBufferPool.cond); if (rc) { - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", + RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, __LINE__, rc); rf_mutex_destroy(&raidPtr->parityBufferPool.mutex); return (ENOMEM); } - raidPtr->parityBufferPool.bufferSize = parityBufferCapacity * - raidPtr->bytesPerSector; - printf("parityBufferPool.bufferSize %d\n", + raidPtr->parityBufferPool.bufferSize = parityBufferCapacity * + raidPtr->bytesPerSector; + printf("parityBufferPool.bufferSize %d\n", raidPtr->parityBufferPool.bufferSize); - /* for now, only one region at a time may be reintegrated */ - raidPtr->parityBufferPool.totalBuffers = 1; + /* For now, only one region at a time may be reintegrated. */ + raidPtr->parityBufferPool.totalBuffers = 1; - raidPtr->parityBufferPool.availableBuffers = - raidPtr->parityBufferPool.totalBuffers; + raidPtr->parityBufferPool.availableBuffers = + raidPtr->parityBufferPool.totalBuffers; raidPtr->parityBufferPool.availBuffersIndex = 0; raidPtr->parityBufferPool.emptyBuffersIndex = 0; printf("Allocating %d bytes for parityBufferPool of %d units\n", - (int) (raidPtr->parityBufferPool.totalBuffers * + (int) (raidPtr->parityBufferPool.totalBuffers * sizeof(caddr_t)), - raidPtr->parityBufferPool.totalBuffers ); - RF_Malloc(raidPtr->parityBufferPool.buffers, - raidPtr->parityBufferPool.totalBuffers * sizeof(caddr_t), + raidPtr->parityBufferPool.totalBuffers); + RF_Malloc(raidPtr->parityBufferPool.buffers, + raidPtr->parityBufferPool.totalBuffers * sizeof(caddr_t), (caddr_t *)); if (raidPtr->parityBufferPool.buffers == NULL) { rf_mutex_destroy(&raidPtr->parityBufferPool.mutex); @@ -416,47 +429,47 @@ rf_ConfigureParityLogging( } for (i = 0; i < raidPtr->parityBufferPool.totalBuffers; i++) { printf("Allocating %d bytes for parityBufferPool#%d\n", - (int) (raidPtr->parityBufferPool.bufferSize * - sizeof(char)),i); - RF_Malloc(raidPtr->parityBufferPool.buffers[i], + (int) (raidPtr->parityBufferPool.bufferSize * + sizeof(char)), i); + RF_Malloc(raidPtr->parityBufferPool.buffers[i], raidPtr->parityBufferPool.bufferSize * sizeof(char), (caddr_t)); if (raidPtr->parityBufferPool.buffers == NULL) { rf_mutex_destroy(&raidPtr->parityBufferPool.mutex); rf_cond_destroy(&raidPtr->parityBufferPool.cond); for (j = 0; j < i; j++) { - RF_Free(raidPtr->parityBufferPool.buffers[i], - raidPtr->regionBufferPool.bufferSize * + RF_Free(raidPtr->parityBufferPool.buffers[i], + raidPtr->regionBufferPool.bufferSize * sizeof(char)); } - RF_Free(raidPtr->parityBufferPool.buffers, - raidPtr->regionBufferPool.totalBuffers * + RF_Free(raidPtr->parityBufferPool.buffers, + raidPtr->regionBufferPool.totalBuffers * sizeof(caddr_t)); return (ENOMEM); } printf("parityBufferPool.buffers[%d] = %lx\n", i, (long) raidPtr->parityBufferPool.buffers[i]); } - rc = rf_ShutdownCreate(listp, - rf_ShutdownParityLoggingParityBufferPool, + rc = rf_ShutdownCreate(listp, + rf_ShutdownParityLoggingParityBufferPool, raidPtr); if (rc) { - RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + RF_ERRORMSG3("Unable to create shutdown entry file %s line %d" + " rc=%d\n", __FILE__, __LINE__, rc); rf_ShutdownParityLoggingParityBufferPool(raidPtr); return (rc); } - /* initialize parityLogDiskQueue */ - rc = rf_create_managed_mutex(listp, + /* Initialize parityLogDiskQueue. */ + rc = rf_create_managed_mutex(listp, &raidPtr->parityLogDiskQueue.mutex); if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, __LINE__, rc); return (rc); } rc = rf_create_managed_cond(listp, &raidPtr->parityLogDiskQueue.cond); if (rc) { - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", + RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, __LINE__, rc); return (rc); } @@ -473,80 +486,79 @@ rf_ConfigureParityLogging( raidPtr->parityLogDiskQueue.freeDataList = NULL; raidPtr->parityLogDiskQueue.freeCommonList = NULL; - rc = rf_ShutdownCreate(listp, - rf_ShutdownParityLoggingDiskQueue, + rc = rf_ShutdownCreate(listp, + rf_ShutdownParityLoggingDiskQueue, raidPtr); if (rc) { - RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + RF_ERRORMSG3("Unable to create shutdown entry file %s line %d" + " rc=%d\n", __FILE__, __LINE__, rc); return (rc); } for (i = 0; i < rf_numParityRegions; i++) { rc = rf_mutex_init(&raidPtr->regionInfo[i].mutex); if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + RF_ERRORMSG3("Unable to init mutex file %s line %d" + " rc=%d\n", __FILE__, __LINE__, rc); for (j = 0; j < i; j++) - FreeRegionInfo(raidPtr, j); - RF_Free(raidPtr->regionInfo, - (rf_numParityRegions * + rf_FreeRegionInfo(raidPtr, j); + RF_Free(raidPtr->regionInfo, + (rf_numParityRegions * sizeof(RF_RegionInfo_t))); return (ENOMEM); } rc = rf_mutex_init(&raidPtr->regionInfo[i].reintMutex); if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + RF_ERRORMSG3("Unable to init mutex file %s line %d" + " rc=%d\n", __FILE__, __LINE__, rc); rf_mutex_destroy(&raidPtr->regionInfo[i].mutex); for (j = 0; j < i; j++) - FreeRegionInfo(raidPtr, j); - RF_Free(raidPtr->regionInfo, - (rf_numParityRegions * + rf_FreeRegionInfo(raidPtr, j); + RF_Free(raidPtr->regionInfo, + (rf_numParityRegions * sizeof(RF_RegionInfo_t))); return (ENOMEM); } raidPtr->regionInfo[i].reintInProgress = RF_FALSE; - raidPtr->regionInfo[i].regionStartAddr = - raidPtr->regionLogCapacity * i; - raidPtr->regionInfo[i].parityStartAddr = - raidPtr->regionParityRange * i; + raidPtr->regionInfo[i].regionStartAddr = + raidPtr->regionLogCapacity * i; + raidPtr->regionInfo[i].parityStartAddr = + raidPtr->regionParityRange * i; if (i < rf_numParityRegions - 1) { - raidPtr->regionInfo[i].capacity = - raidPtr->regionLogCapacity; - raidPtr->regionInfo[i].numSectorsParity = - raidPtr->regionParityRange; + raidPtr->regionInfo[i].capacity = + raidPtr->regionLogCapacity; + raidPtr->regionInfo[i].numSectorsParity = + raidPtr->regionParityRange; } else { - raidPtr->regionInfo[i].capacity = - lastRegionCapacity; - raidPtr->regionInfo[i].numSectorsParity = - raidPtr->sectorsPerDisk - - raidPtr->regionParityRange * i; - if (raidPtr->regionInfo[i].numSectorsParity > + raidPtr->regionInfo[i].capacity = lastRegionCapacity; + raidPtr->regionInfo[i].numSectorsParity = + raidPtr->sectorsPerDisk - + raidPtr->regionParityRange * i; + if (raidPtr->regionInfo[i].numSectorsParity > maxRegionParityRange) - maxRegionParityRange = - raidPtr->regionInfo[i].numSectorsParity; + maxRegionParityRange = + raidPtr->regionInfo[i].numSectorsParity; } raidPtr->regionInfo[i].diskCount = 0; - RF_ASSERT(raidPtr->regionInfo[i].capacity + - raidPtr->regionInfo[i].regionStartAddr <= + RF_ASSERT(raidPtr->regionInfo[i].capacity + + raidPtr->regionInfo[i].regionStartAddr <= totalLogCapacity); - RF_ASSERT(raidPtr->regionInfo[i].parityStartAddr + - raidPtr->regionInfo[i].numSectorsParity <= + RF_ASSERT(raidPtr->regionInfo[i].parityStartAddr + + raidPtr->regionInfo[i].numSectorsParity <= raidPtr->sectorsPerDisk); printf("Allocating %d bytes for region %d\n", (int) (raidPtr->regionInfo[i].capacity * sizeof(RF_DiskMap_t)), i); - RF_Malloc(raidPtr->regionInfo[i].diskMap, + RF_Malloc(raidPtr->regionInfo[i].diskMap, (raidPtr->regionInfo[i].capacity * - sizeof(RF_DiskMap_t)), + sizeof(RF_DiskMap_t)), (RF_DiskMap_t *)); if (raidPtr->regionInfo[i].diskMap == NULL) { rf_mutex_destroy(&raidPtr->regionInfo[i].mutex); rf_mutex_destroy(&raidPtr->regionInfo[i].reintMutex); for (j = 0; j < i; j++) - FreeRegionInfo(raidPtr, j); - RF_Free(raidPtr->regionInfo, - (rf_numParityRegions * + rf_FreeRegionInfo(raidPtr, j); + RF_Free(raidPtr->regionInfo, + (rf_numParityRegions * sizeof(RF_RegionInfo_t))); return (ENOMEM); } @@ -554,64 +566,71 @@ rf_ConfigureParityLogging( raidPtr->regionInfo[i].coreLog = NULL; } rc = rf_ShutdownCreate(listp, - rf_ShutdownParityLoggingRegionInfo, + rf_ShutdownParityLoggingRegionInfo, raidPtr); if (rc) { - RF_ERRORMSG3("Unable to create shutdown entry file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + RF_ERRORMSG3("Unable to create shutdown entry file %s line %d" + " rc=%d\n", __FILE__, __LINE__, rc); rf_ShutdownParityLoggingRegionInfo(raidPtr); return (rc); } RF_ASSERT(raidPtr->parityLogDiskQueue.threadState == 0); raidPtr->parityLogDiskQueue.threadState = RF_PLOG_CREATED; - rc = RF_CREATE_THREAD(raidPtr->pLogDiskThreadHandle, - rf_ParityLoggingDiskManager, raidPtr,"rf_log"); + rc = RF_CREATE_THREAD(raidPtr->pLogDiskThreadHandle, + rf_ParityLoggingDiskManager, raidPtr, "rf_log"); if (rc) { raidPtr->parityLogDiskQueue.threadState = 0; - RF_ERRORMSG3("Unable to create parity logging disk thread file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); + RF_ERRORMSG3("Unable to create parity logging disk thread" + " file %s line %d rc=%d\n", + __FILE__, __LINE__, rc); return (ENOMEM); } - /* wait for thread to start */ + /* Wait for thread to start. */ RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); while (!(raidPtr->parityLogDiskQueue.threadState & RF_PLOG_RUNNING)) { - RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, + RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, raidPtr->parityLogDiskQueue.mutex); } RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); rc = rf_ShutdownCreate(listp, rf_ShutdownParityLogging, raidPtr); if (rc) { - RF_ERRORMSG1("Got rc=%d adding parity logging shutdown event\n", rc); + RF_ERRORMSG1("Got rc=%d adding parity logging shutdown" + " event.\n", rc); rf_ShutdownParityLogging(raidPtr); return (rc); } if (rf_parityLogDebug) { - printf(" size of disk log in sectors: %d\n", - (int) totalLogCapacity); - printf(" total number of parity regions is %d\n", (int) rf_numParityRegions); - printf(" nominal sectors of log per parity region is %d\n", (int) raidPtr->regionLogCapacity); - printf(" nominal region fragmentation is %d sectors\n", (int) fragmentation); - printf(" total number of parity logs is %d\n", raidPtr->numParityLogs); - printf(" parity log size is %d sectors\n", raidPtr->numSectorsPerLog); - printf(" total in-core log space is %d bytes\n", (int) rf_totalInCoreLogCapacity); + printf("\t\t\tsize of disk log in sectors: %d\n", + (int) totalLogCapacity); + printf("\t\t\ttotal number of parity regions is %d\n", + (int) rf_numParityRegions); + printf("\t\t\tnominal sectors of log per parity region is %d\n", + (int) raidPtr->regionLogCapacity); + printf("\t\t\tnominal region fragmentation is %d sectors\n", + (int) fragmentation); + printf("\t\t\ttotal number of parity logs is %d\n", + raidPtr->numParityLogs); + printf("\t\t\tparity log size is %d sectors\n", + raidPtr->numSectorsPerLog); + printf("\t\t\ttotal in-core log space is %d bytes\n", + (int) rf_totalInCoreLogCapacity); } rf_EnableParityLogging(raidPtr); return (0); } -static void -FreeRegionInfo( - RF_Raid_t * raidPtr, - RF_RegionId_t regionID) + +void +rf_FreeRegionInfo(RF_Raid_t *raidPtr, RF_RegionId_t regionID) { RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - RF_Free(raidPtr->regionInfo[regionID].diskMap, - (raidPtr->regionInfo[regionID].capacity * + RF_Free(raidPtr->regionInfo[regionID].diskMap, + (raidPtr->regionInfo[regionID].capacity * sizeof(RF_DiskMap_t))); if (!rf_forceParityLogReint && raidPtr->regionInfo[regionID].coreLog) { - rf_ReleaseParityLogs(raidPtr, + rf_ReleaseParityLogs(raidPtr, raidPtr->regionInfo[regionID].coreLog); raidPtr->regionInfo[regionID].coreLog = NULL; } else { @@ -624,10 +643,8 @@ FreeRegionInfo( } -static void -FreeParityLogQueue( - RF_Raid_t * raidPtr, - RF_ParityLogQueue_t * queue) +void +rf_FreeParityLogQueue(RF_Raid_t *raidPtr, RF_ParityLogQueue_t *queue) { RF_ParityLog_t *l1, *l2; @@ -636,7 +653,7 @@ FreeParityLogQueue( while (l1) { l2 = l1; l1 = l2->next; - RF_Free(l2->records, (raidPtr->numSectorsPerLog * + RF_Free(l2->records, (raidPtr->numSectorsPerLog * sizeof(RF_ParityLogRecord_t))); RF_Free(l2, sizeof(RF_ParityLog_t)); } @@ -645,14 +662,14 @@ FreeParityLogQueue( } -static void -FreeRegionBufferQueue(RF_RegionBufferQueue_t * queue) +void +rf_FreeRegionBufferQueue(RF_RegionBufferQueue_t *queue) { - int i; + int i; RF_LOCK_MUTEX(queue->mutex); if (queue->availableBuffers != queue->totalBuffers) { - printf("Attempt to free region queue which is still in use!\n"); + printf("Attempt to free region queue that is still in use !\n"); RF_ASSERT(0); } for (i = 0; i < queue->totalBuffers; i++) @@ -662,7 +679,8 @@ FreeRegionBufferQueue(RF_RegionBufferQueue_t * queue) rf_mutex_destroy(&queue->mutex); } -static void + +void rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t arg) { RF_Raid_t *raidPtr; @@ -670,18 +688,19 @@ rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t arg) raidPtr = (RF_Raid_t *) arg; if (rf_parityLogDebug) { - printf("raid%d: ShutdownParityLoggingRegionInfo\n", + printf("raid%d: ShutdownParityLoggingRegionInfo\n", raidPtr->raidid); } - /* free region information structs */ + /* Free region information structs. */ for (i = 0; i < rf_numParityRegions; i++) - FreeRegionInfo(raidPtr, i); - RF_Free(raidPtr->regionInfo, (rf_numParityRegions * + rf_FreeRegionInfo(raidPtr, i); + RF_Free(raidPtr->regionInfo, (rf_numParityRegions * sizeof(raidPtr->regionInfo))); raidPtr->regionInfo = NULL; } -static void + +void rf_ShutdownParityLoggingPool(RF_ThreadArg_t arg) { RF_Raid_t *raidPtr; @@ -690,26 +709,28 @@ rf_ShutdownParityLoggingPool(RF_ThreadArg_t arg) if (rf_parityLogDebug) { printf("raid%d: ShutdownParityLoggingPool\n", raidPtr->raidid); } - /* free contents of parityLogPool */ - FreeParityLogQueue(raidPtr, &raidPtr->parityLogPool); - RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * + /* Free contents of parityLogPool. */ + rf_FreeParityLogQueue(raidPtr, &raidPtr->parityLogPool); + RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * raidPtr->numSectorsPerLog * raidPtr->bytesPerSector); } -static void + +void rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t arg) { RF_Raid_t *raidPtr; raidPtr = (RF_Raid_t *) arg; if (rf_parityLogDebug) { - printf("raid%d: ShutdownParityLoggingRegionBufferPool\n", + printf("raid%d: ShutdownParityLoggingRegionBufferPool\n", raidPtr->raidid); } - FreeRegionBufferQueue(&raidPtr->regionBufferPool); + rf_FreeRegionBufferQueue(&raidPtr->regionBufferPool); } -static void + +void rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t arg) { RF_Raid_t *raidPtr; @@ -719,10 +740,11 @@ rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t arg) printf("raid%d: ShutdownParityLoggingParityBufferPool\n", raidPtr->raidid); } - FreeRegionBufferQueue(&raidPtr->parityBufferPool); + rf_FreeRegionBufferQueue(&raidPtr->parityBufferPool); } -static void + +void rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t arg) { RF_ParityLogData_t *d; @@ -734,27 +756,28 @@ rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t arg) printf("raid%d: ShutdownParityLoggingDiskQueue\n", raidPtr->raidid); } - /* free disk manager stuff */ + /* Free disk manager stuff. */ RF_ASSERT(raidPtr->parityLogDiskQueue.bufHead == NULL); RF_ASSERT(raidPtr->parityLogDiskQueue.bufTail == NULL); RF_ASSERT(raidPtr->parityLogDiskQueue.reintHead == NULL); RF_ASSERT(raidPtr->parityLogDiskQueue.reintTail == NULL); while (raidPtr->parityLogDiskQueue.freeDataList) { d = raidPtr->parityLogDiskQueue.freeDataList; - raidPtr->parityLogDiskQueue.freeDataList = - raidPtr->parityLogDiskQueue.freeDataList->next; + raidPtr->parityLogDiskQueue.freeDataList = + raidPtr->parityLogDiskQueue.freeDataList->next; RF_Free(d, sizeof(RF_ParityLogData_t)); } while (raidPtr->parityLogDiskQueue.freeCommonList) { c = raidPtr->parityLogDiskQueue.freeCommonList; rf_mutex_destroy(&c->mutex); - raidPtr->parityLogDiskQueue.freeCommonList = - raidPtr->parityLogDiskQueue.freeCommonList->next; + raidPtr->parityLogDiskQueue.freeCommonList = + raidPtr->parityLogDiskQueue.freeCommonList->next; RF_Free(c, sizeof(RF_CommonLogData_t)); } } -static void + +void rf_ShutdownParityLogging(RF_ThreadArg_t arg) { RF_Raid_t *raidPtr; @@ -763,135 +786,129 @@ rf_ShutdownParityLogging(RF_ThreadArg_t arg) if (rf_parityLogDebug) { printf("raid%d: ShutdownParityLogging\n", raidPtr->raidid); } - /* shutdown disk thread */ - /* This has the desirable side-effect of forcing all regions to be - * reintegrated. This is necessary since all parity log maps are - * currently held in volatile memory. */ + /* Shutdown disk thread. */ + /* + * This has the desirable side-effect of forcing all regions to be + * reintegrated. This is necessary since all parity log maps are + * currently held in volatile memory. + */ RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_TERMINATE; RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); /* - * pLogDiskThread will now terminate when queues are cleared - * now wait for it to be done - */ + * pLogDiskThread will now terminate when queues are cleared. + * Now wait for it to be done. + */ RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); while (!(raidPtr->parityLogDiskQueue.threadState & RF_PLOG_SHUTDOWN)) { - RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, + RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, raidPtr->parityLogDiskQueue.mutex); } RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); if (rf_parityLogDebug) { - printf("raid%d: ShutdownParityLogging done (thread completed)\n", raidPtr->raidid); + printf("raid%d: ShutdownParityLogging done" + " (thread completed)\n", raidPtr->raidid); } } -int -rf_GetDefaultNumFloatingReconBuffersParityLogging(RF_Raid_t * raidPtr) + +int +rf_GetDefaultNumFloatingReconBuffersParityLogging(RF_Raid_t *raidPtr) { return (20); } -RF_HeadSepLimit_t -rf_GetDefaultHeadSepLimitParityLogging(RF_Raid_t * raidPtr) + +RF_HeadSepLimit_t +rf_GetDefaultHeadSepLimitParityLogging(RF_Raid_t *raidPtr) { return (10); } -/* return the region ID for a given RAID address */ -RF_RegionId_t -rf_MapRegionIDParityLogging( - RF_Raid_t * raidPtr, - RF_SectorNum_t address) + + +/* Return the region ID for a given RAID address. */ +RF_RegionId_t +rf_MapRegionIDParityLogging(RF_Raid_t *raidPtr, RF_SectorNum_t address) { RF_RegionId_t regionID; -/* regionID = address / (raidPtr->regionParityRange * raidPtr->Layout.numDataCol); */ + /* regionID = address / (raidPtr->regionParityRange * + * raidPtr->Layout.numDataCol); */ regionID = address / raidPtr->regionParityRange; if (regionID == rf_numParityRegions) { - /* last region may be larger than other regions */ + /* Last region may be larger than other regions. */ regionID--; } RF_ASSERT(address >= raidPtr->regionInfo[regionID].parityStartAddr); - RF_ASSERT(address < raidPtr->regionInfo[regionID].parityStartAddr + + RF_ASSERT(address < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity); RF_ASSERT(regionID < rf_numParityRegions); return (regionID); } -/* given a logical RAID sector, determine physical disk address of data */ -void -rf_MapSectorParityLogging( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) +/* Given a logical RAID sector, determine physical disk address of data. */ +void +rf_MapSectorParityLogging(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap) { - RF_StripeNum_t SUID = raidSector / + RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; *row = 0; /* *col = (SUID % (raidPtr->numCol - - * raidPtr->Layout.numParityLogCol)); */ + * raidPtr->Layout.numParityLogCol)); */ *col = SUID % raidPtr->Layout.numDataCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * - raidPtr->Layout.sectorsPerStripeUnit + + *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * + raidPtr->Layout.sectorsPerStripeUnit + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); } -/* given a logical RAID sector, determine physical disk address of parity */ -void -rf_MapParityParityLogging( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) +/* Given a logical RAID sector, determine physical disk address of parity. */ +void +rf_MapParityParityLogging(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap) { - RF_StripeNum_t SUID = raidSector / - raidPtr->Layout.sectorsPerStripeUnit; + RF_StripeNum_t SUID = raidSector / + raidPtr->Layout.sectorsPerStripeUnit; *row = 0; /* *col = - * raidPtr->Layout.numDataCol-(SUID/raidPtr->Layout.numDataCol)%(raidPt - * r->numCol - raidPtr->Layout.numParityLogCol); */ + * raidPtr->Layout.numDataCol-(SUID / raidPtr->Layout.numDataCol) % + * (raidPtr->numCol - raidPtr->Layout.numParityLogCol); */ *col = raidPtr->Layout.numDataCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * - raidPtr->Layout.sectorsPerStripeUnit + + *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * + raidPtr->Layout.sectorsPerStripeUnit + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); } -/* given a regionID and sector offset, determine the physical disk address of the parity log */ -void -rf_MapLogParityLogging( - RF_Raid_t * raidPtr, - RF_RegionId_t regionID, - RF_SectorNum_t regionOffset, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * startSector) +/* + * Given a regionID and sector offset, determine the physical disk address + * of the parity log. + */ +void +rf_MapLogParityLogging(RF_Raid_t *raidPtr, RF_RegionId_t regionID, + RF_SectorNum_t regionOffset, RF_RowCol_t *row, RF_RowCol_t *col, + RF_SectorNum_t *startSector) { *row = 0; *col = raidPtr->numCol - 1; - *startSector = raidPtr->regionInfo[regionID].regionStartAddr + regionOffset; + *startSector = + raidPtr->regionInfo[regionID].regionStartAddr + regionOffset; } -/* given a regionID, determine the physical disk address of the logged - parity for that region */ -void -rf_MapRegionParity( - RF_Raid_t * raidPtr, - RF_RegionId_t regionID, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * startSector, - RF_SectorCount_t * numSector) +/* + * Given a regionID, determine the physical disk address of the logged + * parity for that region. + */ +void +rf_MapRegionParity(RF_Raid_t *raidPtr, RF_RegionId_t regionID, + RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *startSector, + RF_SectorCount_t *numSector) { *row = 0; *col = raidPtr->numCol - 2; @@ -900,135 +917,181 @@ rf_MapRegionParity( } -/* given a logical RAID address, determine the participating disks in - the stripe */ -void -rf_IdentifyStripeParityLogging( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) +/* + * Given a logical RAID address, determine the participating disks in + * the stripe. + */ +void +rf_IdentifyStripeParityLogging(RF_Raid_t *raidPtr, RF_RaidAddr_t addr, + RF_RowCol_t **diskids, RF_RowCol_t *outRow) { - RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, + RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr); - RF_ParityLoggingConfigInfo_t *info = (RF_ParityLoggingConfigInfo_t *) - raidPtr->Layout.layoutSpecificInfo; + RF_ParityLoggingConfigInfo_t *info = (RF_ParityLoggingConfigInfo_t *) + raidPtr->Layout.layoutSpecificInfo; *outRow = 0; *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol]; } -void -rf_MapSIDToPSIDParityLogging( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru) +void +rf_MapSIDToPSIDParityLogging(RF_RaidLayout_t *layoutPtr, + RF_StripeNum_t stripeID, RF_StripeNum_t *psID, RF_ReconUnitNum_t *which_ru) { *which_ru = 0; *psID = stripeID; } -/* select an algorithm for performing an access. Returns two pointers, +/* + * Select an algorithm for performing an access. Returns two pointers, * one to a function that will return information about the DAG, and * another to a function that will create the dag. */ -void -rf_ParityLoggingDagSelect( - RF_Raid_t * raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t * asmp, - RF_VoidFuncPtr * createFunc) +void +rf_ParityLoggingDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type, + RF_AccessStripeMap_t *asmp, RF_VoidFuncPtr *createFunc) { RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); RF_PhysDiskAddr_t *failedPDA = NULL; RF_RowCol_t frow, fcol; RF_RowStatus_t rstat; - int prior_recon; + int prior_recon; RF_ASSERT(RF_IO_IS_R_OR_W(type)); if (asmp->numDataFailed + asmp->numParityFailed > 1) { - RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); + RF_ERRORMSG("Multiple disks failed in a single group !" + " Aborting I/O operation.\n"); /* *infoFunc = */ *createFunc = NULL; return; } else if (asmp->numDataFailed + asmp->numParityFailed == 1) { - /* if under recon & already reconstructed, redirect + /* + * If under recon & already reconstructed, redirect * the access to the spare drive and eliminate the - * failure indication */ + * failure indication. + */ failedPDA = asmp->failedPDAs[0]; frow = failedPDA->row; fcol = failedPDA->col; rstat = raidPtr->status[failedPDA->row]; - prior_recon = (rstat == rf_rs_reconfigured) || ( - (rstat == rf_rs_reconstructing) ? - rf_CheckRUReconstructed(raidPtr->reconControl[frow]->reconMap, failedPDA->startSector) : 0 - ); + prior_recon = (rstat == rf_rs_reconfigured) || + ((rstat == rf_rs_reconstructing) ? + rf_CheckRUReconstructed(raidPtr->reconControl[frow] + ->reconMap, failedPDA->startSector) : 0); if (prior_recon) { - RF_RowCol_t or = failedPDA->row, oc = failedPDA->col; + RF_RowCol_t or = failedPDA->row; + RF_RowCol_t oc = failedPDA->col; RF_SectorNum_t oo = failedPDA->startSector; - if (layoutPtr->map->flags & - RF_DISTRIBUTE_SPARE) { - /* redirect to dist spare space */ + if (layoutPtr->map->flags & + RF_DISTRIBUTE_SPARE) { + /* Redirect to dist spare space. */ if (failedPDA == asmp->parityInfo) { - /* parity has failed */ - (layoutPtr->map->MapParity) (raidPtr, failedPDA->raidAddress, &failedPDA->row, - &failedPDA->col, &failedPDA->startSector, RF_REMAP); - - if (asmp->parityInfo->next) { /* redir 2nd component, - * if any */ - RF_PhysDiskAddr_t *p = asmp->parityInfo->next; - RF_SectorNum_t SUoffs = p->startSector % layoutPtr->sectorsPerStripeUnit; + /* Parity has failed. */ + (layoutPtr->map->MapParity) + (raidPtr, + failedPDA->raidAddress, + &failedPDA->row, + &failedPDA->col, + &failedPDA->startSector, + RF_REMAP); + + if (asmp->parityInfo->next) { + /* + * Redir 2nd component, + * if any. + */ + RF_PhysDiskAddr_t *p = + asmp->parityInfo->next; + RF_SectorNum_t SUoffs = + p->startSector % + layoutPtr->sectorsPerStripeUnit; p->row = failedPDA->row; p->col = failedPDA->col; - p->startSector = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, failedPDA->startSector) + - SUoffs; /* cheating: - * startSector is not - * really a RAID address */ + /* + * Cheating: + * startSector is not + * really a RAID + * address. + */ + p->startSector = + rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, + failedPDA->startSector) + SUoffs; } } else - if (asmp->parityInfo->next && failedPDA == asmp->parityInfo->next) { - RF_ASSERT(0); /* should not ever - * happen */ + if (asmp->parityInfo->next && + failedPDA == + asmp->parityInfo->next) { + /* + * Should not ever + * happen. + */ + RF_ASSERT(0); } else { - - /* data has failed */ - (layoutPtr->map->MapSector) (raidPtr, failedPDA->raidAddress, &failedPDA->row, - &failedPDA->col, &failedPDA->startSector, RF_REMAP); - + /* Data has failed. */ + (layoutPtr->map + ->MapSector) + (raidPtr, + failedPDA->raidAddress, + &failedPDA->row, + &failedPDA->col, + &failedPDA->startSector, + RF_REMAP); } - } else { - /* redirect to dedicated spare space */ + } else { + /* Redirect to dedicated spare space. */ - failedPDA->row = raidPtr->Disks[frow][fcol].spareRow; - failedPDA->col = raidPtr->Disks[frow][fcol].spareCol; + failedPDA->row = + raidPtr->Disks[frow][fcol].spareRow; + failedPDA->col = + raidPtr->Disks[frow][fcol].spareCol; - /* the parity may have two distinct + /* + * The parity may have two distinct * components, both of which may need - * to be redirected */ + * to be redirected. + */ if (asmp->parityInfo->next) { - if (failedPDA == asmp->parityInfo) { - failedPDA->next->row = failedPDA->row; - failedPDA->next->col = failedPDA->col; - } else - if (failedPDA == asmp->parityInfo->next) { /* paranoid: should never occur */ - asmp->parityInfo->row = failedPDA->row; - asmp->parityInfo->col = failedPDA->col; + if (failedPDA == + asmp->parityInfo) { + failedPDA->next->row = + failedPDA->row; + failedPDA->next->col = + failedPDA->col; + } else { + if (failedPDA == + asmp->parityInfo + ->next) { + /* + * Paranoid: + * Should never + * occur. + */ + asmp->parityInfo + ->row = + failedPDA->row; + asmp->parityInfo + ->col = + failedPDA->col; } + } } } RF_ASSERT(failedPDA->col != -1); if (rf_dagDebug || rf_mapDebug) { - printf("raid%d: Redirected type '%c' r %d c %d o %ld -> r %d c %d o %ld\n", - raidPtr->raidid, type, or, oc, (long) oo, failedPDA->row, failedPDA->col, (long) failedPDA->startSector); + printf("raid%d: Redirected type '%c'" + " r %d c %d o %ld -> r %d c %d" + " o %ld\n", raidPtr->raidid, + type, or, oc, (long) oo, + failedPDA->row, failedPDA->col, + (long) failedPDA->startSector); } asmp->numDataFailed = asmp->numParityFailed = 0; } @@ -1036,38 +1099,48 @@ rf_ParityLoggingDagSelect( if (type == RF_IO_TYPE_READ) { if (asmp->numDataFailed == 0) - *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; + *createFunc = + (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; else - *createFunc = (RF_VoidFuncPtr) rf_CreateRaidFiveDegradedReadDAG; + *createFunc = + (RF_VoidFuncPtr) rf_CreateRaidFiveDegradedReadDAG; } else { - /* if mirroring, always use large writes. If the access + /* + * If mirroring, always use large writes. If the access * requires two distinct parity updates, always do a small - * write. If the stripe contains a failure but the access + * write. If the stripe contains a failure but the access * does not, do a small write. The first conditional * (numStripeUnitsAccessed <= numDataCol/2) uses a * less-than-or-equal rather than just a less-than because * when G is 3 or 4, numDataCol/2 is 1, and I want - * single-stripe-unit updates to use just one disk. */ + * single-stripe-unit updates to use just one disk. + */ if ((asmp->numDataFailed + asmp->numParityFailed) == 0) { - if (((asmp->numStripeUnitsAccessed <= - (layoutPtr->numDataCol / 2)) && + if (((asmp->numStripeUnitsAccessed <= + (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) || - (asmp->parityInfo->next != NULL) || + (asmp->parityInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmp)) { - *createFunc = (RF_VoidFuncPtr) rf_CreateParityLoggingSmallWriteDAG; + *createFunc = (RF_VoidFuncPtr) + rf_CreateParityLoggingSmallWriteDAG; } else - *createFunc = (RF_VoidFuncPtr) rf_CreateParityLoggingLargeWriteDAG; + *createFunc = (RF_VoidFuncPtr) + rf_CreateParityLoggingLargeWriteDAG; } else if (asmp->numParityFailed == 1) - *createFunc = (RF_VoidFuncPtr) rf_CreateNonRedundantWriteDAG; + *createFunc = (RF_VoidFuncPtr) + rf_CreateNonRedundantWriteDAG; else - if (asmp->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit) + if (asmp->numStripeUnitsAccessed != 1 && + failedPDA->numSector != + layoutPtr->sectorsPerStripeUnit) *createFunc = NULL; else - *createFunc = (RF_VoidFuncPtr) rf_CreateDegradedWriteDAG; + *createFunc = (RF_VoidFuncPtr) + rf_CreateDegradedWriteDAG; } } -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ +#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ diff --git a/sys/dev/raidframe/rf_paritylogging.h b/sys/dev/raidframe/rf_paritylogging.h index 532da664940..6f6caaeb039 100644 --- a/sys/dev/raidframe/rf_paritylogging.h +++ b/sys/dev/raidframe/rf_paritylogging.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_paritylogging.h,v 1.2 1999/02/16 00:03:07 niklas Exp $ */ +/* $OpenBSD: rf_paritylogging.h,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_paritylogging.h,v 1.3 1999/02/05 00:06:14 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,44 +28,29 @@ * rights to redistribute these changes. */ -/* header file for Parity Logging */ +/* Header file for Parity Logging. */ -#ifndef _RF__RF_PARITYLOGGING_H_ -#define _RF__RF_PARITYLOGGING_H_ +#ifndef _RF__RF_PARITYLOGGING_H_ +#define _RF__RF_PARITYLOGGING_H_ -int -rf_ConfigureParityLogging(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int rf_GetDefaultNumFloatingReconBuffersParityLogging(RF_Raid_t * raidPtr); -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitParityLogging(RF_Raid_t * raidPtr); -RF_RegionId_t -rf_MapRegionIDParityLogging(RF_Raid_t * raidPtr, - RF_SectorNum_t address); -void -rf_MapSectorParityLogging(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, - int remap); -void -rf_MapParityParityLogging(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, - int remap); -void -rf_MapLogParityLogging(RF_Raid_t * raidPtr, RF_RegionId_t regionID, - RF_SectorNum_t regionOffset, RF_RowCol_t * row, RF_RowCol_t * col, - RF_SectorNum_t * startSector); -void -rf_MapRegionParity(RF_Raid_t * raidPtr, RF_RegionId_t regionID, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * startSector, - RF_SectorCount_t * numSector); -void -rf_IdentifyStripeParityLogging(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -void -rf_MapSIDToPSIDParityLogging(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru); -void -rf_ParityLoggingDagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); +int rf_ConfigureParityLogging(RF_ShutdownList_t **, RF_Raid_t *, + RF_Config_t *); +int rf_GetDefaultNumFloatingReconBuffersParityLogging(RF_Raid_t *); +RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitParityLogging(RF_Raid_t *); +RF_RegionId_t rf_MapRegionIDParityLogging(RF_Raid_t *, RF_SectorNum_t); +void rf_MapSectorParityLogging(RF_Raid_t *, RF_RaidAddr_t, + RF_RowCol_t *, RF_RowCol_t *, RF_SectorNum_t *, int); +void rf_MapParityParityLogging(RF_Raid_t *, RF_RaidAddr_t, + RF_RowCol_t *, RF_RowCol_t *, RF_SectorNum_t *, int); +void rf_MapLogParityLogging(RF_Raid_t *, RF_RegionId_t, RF_SectorNum_t, + RF_RowCol_t *, RF_RowCol_t *, RF_SectorNum_t *); +void rf_MapRegionParity(RF_Raid_t *, RF_RegionId_t, + RF_RowCol_t *, RF_RowCol_t *, RF_SectorNum_t *, RF_SectorCount_t *); +void rf_IdentifyStripeParityLogging(RF_Raid_t *, RF_RaidAddr_t, RF_RowCol_t **, + RF_RowCol_t *); +void rf_MapSIDToPSIDParityLogging(RF_RaidLayout_t *, RF_StripeNum_t, + RF_StripeNum_t *, RF_ReconUnitNum_t *); +void rf_ParityLoggingDagSelect(RF_Raid_t *, RF_IoType_t, + RF_AccessStripeMap_t *, RF_VoidFuncPtr *); -#endif /* !_RF__RF_PARITYLOGGING_H_ */ +#endif /* !_RF__RF_PARITYLOGGING_H_ */ diff --git a/sys/dev/raidframe/rf_parityloggingdags.c b/sys/dev/raidframe/rf_parityloggingdags.c index cd7cd3c6be6..fff383d5737 100644 --- a/sys/dev/raidframe/rf_parityloggingdags.c +++ b/sys/dev/raidframe/rf_parityloggingdags.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_parityloggingdags.c,v 1.3 2000/01/11 18:02:22 peter Exp $ */ +/* $OpenBSD: rf_parityloggingdags.c,v 1.4 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_parityloggingdags.c,v 1.4 2000/01/07 03:41:04 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,10 +30,10 @@ #include "rf_archs.h" -#if RF_INCLUDE_PARITYLOGGING > 0 +#if RF_INCLUDE_PARITYLOGGING > 0 /* - DAGs specific to parity logging are created here + * DAGs specific to parity logging are created here. */ #include "rf_types.h" @@ -47,56 +48,57 @@ #include "rf_parityloggingdags.h" -/****************************************************************************** +/***************************************************************************** * - * creates a DAG to perform a large-write operation: + * Creates a DAG to perform a large-write operation: * * / Rod \ / Wnd \ * H -- NIL- Rod - NIL - Wnd ------ NIL - T * \ Rod / \ Xor - Lpo / * - * The writes are not done until the reads complete because if they were done in - * parallel, a failure on one of the reads could leave the parity in an inconsistent - * state, so that the retry with a new DAG would produce erroneous parity. + * The writes are not done until the reads complete because if they were done + * in parallel, a failure on one of the reads could leave the parity in an + * inconsistent state, so that the retry with a new DAG would produce + * erroneous parity. * - * Note: this DAG has the nasty property that none of the buffers allocated for reading - * old data can be freed until the XOR node fires. Need to fix this. + * Note: This DAG has the nasty property that none of the buffers allocated + * for reading old data can be freed until the XOR node fires. + * Need to fix this. * - * The last two arguments are the number of faults tolerated, and function for the - * redundancy calculation. The undo for the redundancy calc is assumed to be null + * The last two arguments are the number of faults tolerated, and function + * for the redundancy calculation. The undo for the redundancy calc is assumed + * to be null. * *****************************************************************************/ -void -rf_CommonCreateParityLoggingLargeWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - int nfaults, +void +rf_CommonCreateParityLoggingLargeWriteDAG(RF_Raid_t * raidPtr, + RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp, + RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList, int nfaults, int (*redFunc) (RF_DagNode_t *)) { - RF_DagNode_t *nodes, *wndNodes, *rodNodes = NULL, *syncNode, *xorNode, - *lpoNode, *blockNode, *unblockNode, *termNode; - int nWndNodes, nRodNodes, i; + RF_DagNode_t *nodes, *wndNodes, *rodNodes = NULL, *syncNode, *xorNode; + RF_DagNode_t *lpoNode, *blockNode, *unblockNode, *termNode; + int nWndNodes, nRodNodes, i; RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); RF_AccessStripeMapHeader_t *new_asm_h[2]; - int nodeNum, asmNum; + int nodeNum, asmNum; RF_ReconUnitNum_t which_ru; - char *sosBuffer, *eosBuffer; + char *sosBuffer, *eosBuffer; RF_PhysDiskAddr_t *pda; - RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru); + RF_StripeNum_t parityStripeID = + rf_RaidAddressToParityStripeID(&(raidPtr->Layout), + asmap->raidAddress, &which_ru); if (rf_dagDebug) printf("[Creating parity-logging large-write DAG]\n"); - RF_ASSERT(nfaults == 1);/* this arch only single fault tolerant */ + RF_ASSERT(nfaults == 1); /* This arch only single fault tolerant. */ dag_h->creator = "ParityLoggingLargeWriteDAG"; - /* alloc the Wnd nodes, the xor node, and the Lpo node */ + /* Alloc the Wnd nodes, the xor node, and the Lpo node. */ nWndNodes = asmap->numStripeUnitsAccessed; - RF_CallocAndAdd(nodes, nWndNodes + 6, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); + RF_CallocAndAdd(nodes, nWndNodes + 6, sizeof(RF_DagNode_t), + (RF_DagNode_t *), allocList); i = 0; wndNodes = &nodes[i]; i += nWndNodes; @@ -117,26 +119,40 @@ rf_CommonCreateParityLoggingLargeWriteDAG( dag_h->numCommits = 0; dag_h->numSuccedents = 1; - rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h, new_asm_h, &nRodNodes, &sosBuffer, &eosBuffer, allocList); + rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h, + new_asm_h, &nRodNodes, &sosBuffer, &eosBuffer, allocList); if (nRodNodes > 0) - RF_CallocAndAdd(rodNodes, nRodNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); - - /* begin node initialization */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nRodNodes + 1, 0, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nWndNodes + 1, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(syncNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nWndNodes + 1, nRodNodes + 1, 0, 0, dag_h, "Nil", allocList); - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - - /* initialize the Rod nodes */ + RF_CallocAndAdd(rodNodes, nRodNodes, sizeof(RF_DagNode_t), + (RF_DagNode_t *), allocList); + + /* Begin node initialization. */ + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, nRodNodes + 1, 0, 0, 0, dag_h, + "Nil", allocList); + rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, 1, nWndNodes + 1, 0, 0, dag_h, + "Nil", allocList); + rf_InitNode(syncNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, nWndNodes + 1, nRodNodes + 1, + 0, 0, dag_h, "Nil", allocList); + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, + rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); + + /* Initialize the Rod nodes. */ for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) { if (new_asm_h[asmNum]) { pda = new_asm_h[asmNum]->stripeMap->physInfo; while (pda) { - rf_InitNode(&rodNodes[nodeNum], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rod", allocList); + rf_InitNode(&rodNodes[nodeNum], rf_wait, + RF_FALSE, rf_DiskReadFunc, + rf_DiskReadUndoFunc, rf_GenericWakeupFunc, + 1, 1, 4, 0, dag_h, "Rod", allocList); rodNodes[nodeNum].params[0].p = pda; rodNodes[nodeNum].params[1].p = pda->bufPtr; rodNodes[nodeNum].params[2].v = parityStripeID; - rodNodes[nodeNum].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + rodNodes[nodeNum].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + 0, 0, which_ru); nodeNum++; pda = pda->next; } @@ -144,62 +160,78 @@ rf_CommonCreateParityLoggingLargeWriteDAG( } RF_ASSERT(nodeNum == nRodNodes); - /* initialize the wnd nodes */ + /* Initialize the wnd nodes. */ pda = asmap->physInfo; for (i = 0; i < nWndNodes; i++) { - rf_InitNode(&wndNodes[i], rf_wait, RF_TRUE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList); + rf_InitNode(&wndNodes[i], rf_wait, RF_TRUE, rf_DiskWriteFunc, + rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, + dag_h, "Wnd", allocList); RF_ASSERT(pda != NULL); wndNodes[i].params[0].p = pda; wndNodes[i].params[1].p = pda->bufPtr; wndNodes[i].params[2].v = parityStripeID; - wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + wndNodes[i].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); pda = pda->next; } - /* initialize the redundancy node */ - rf_InitNode(xorNode, rf_wait, RF_TRUE, redFunc, rf_NullNodeUndoFunc, NULL, 1, 1, 2 * (nWndNodes + nRodNodes) + 1, 1, dag_h, "Xr ", allocList); + /* Initialize the redundancy node. */ + rf_InitNode(xorNode, rf_wait, RF_TRUE, redFunc, rf_NullNodeUndoFunc, + NULL, 1, 1, 2 * (nWndNodes + nRodNodes) + 1, 1, dag_h, + "Xr ", allocList); xorNode->flags |= RF_DAGNODE_FLAG_YIELD; for (i = 0; i < nWndNodes; i++) { - xorNode->params[2 * i + 0] = wndNodes[i].params[0]; /* pda */ - xorNode->params[2 * i + 1] = wndNodes[i].params[1]; /* buf ptr */ + /* pda */ + xorNode->params[2 * i + 0] = wndNodes[i].params[0]; + /* buf ptr */ + xorNode->params[2 * i + 1] = wndNodes[i].params[1]; } for (i = 0; i < nRodNodes; i++) { - xorNode->params[2 * (nWndNodes + i) + 0] = rodNodes[i].params[0]; /* pda */ - xorNode->params[2 * (nWndNodes + i) + 1] = rodNodes[i].params[1]; /* buf ptr */ + xorNode->params[2 * (nWndNodes + i) + 0] = + rodNodes[i].params[0]; /* pda */ + xorNode->params[2 * (nWndNodes + i) + 1] = + rodNodes[i].params[1]; /* buf ptr */ } - xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr; /* xor node needs to get - * at RAID information */ + /* Xor node needs to get at RAID information. */ + xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr; - /* look for an Rod node that reads a complete SU. If none, alloc a + /* + * Look for an Rod node that reads a complete SU. If none, alloc a * buffer to receive the parity info. Note that we can't use a new * data buffer because it will not have gotten written when the xor - * occurs. */ + * occurs. + */ for (i = 0; i < nRodNodes; i++) - if (((RF_PhysDiskAddr_t *) rodNodes[i].params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit) + if (((RF_PhysDiskAddr_t *) rodNodes[i].params[0].p) + ->numSector == raidPtr->Layout.sectorsPerStripeUnit) break; if (i == nRodNodes) { - RF_CallocAndAdd(xorNode->results[0], 1, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (void *), allocList); + RF_CallocAndAdd(xorNode->results[0], 1, + rf_RaidAddressToByte(raidPtr, + raidPtr->Layout.sectorsPerStripeUnit), (void *), + allocList); } else { xorNode->results[0] = rodNodes[i].params[1].p; } - /* initialize the Lpo node */ - rf_InitNode(lpoNode, rf_wait, RF_FALSE, rf_ParityLogOverwriteFunc, rf_ParityLogOverwriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Lpo", allocList); + /* Initialize the Lpo node. */ + rf_InitNode(lpoNode, rf_wait, RF_FALSE, rf_ParityLogOverwriteFunc, + rf_ParityLogOverwriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, + dag_h, "Lpo", allocList); lpoNode->params[0].p = asmap->parityInfo; lpoNode->params[1].p = xorNode->results[0]; - RF_ASSERT(asmap->parityInfo->next == NULL); /* parityInfo must - * describe entire - * parity unit */ + /* parityInfo must describe entire parity unit. */ + RF_ASSERT(asmap->parityInfo->next == NULL); - /* connect nodes to form graph */ + /* Connect nodes to form graph. */ - /* connect dag header to block node */ + /* Connect dag header to block node. */ RF_ASSERT(dag_h->numSuccedents == 1); RF_ASSERT(blockNode->numAntecedents == 0); dag_h->succedents[0] = blockNode; - /* connect the block node to the Rod nodes */ + /* Connect the block node to the Rod nodes. */ RF_ASSERT(blockNode->numSuccedents == nRodNodes + 1); for (i = 0; i < nRodNodes; i++) { RF_ASSERT(rodNodes[i].numAntecedents == 1); @@ -208,28 +240,28 @@ rf_CommonCreateParityLoggingLargeWriteDAG( rodNodes[i].antType[0] = rf_control; } - /* connect the block node to the sync node */ + /* Connect the block node to the sync node. */ /* necessary if nRodNodes == 0 */ RF_ASSERT(syncNode->numAntecedents == nRodNodes + 1); blockNode->succedents[nRodNodes] = syncNode; syncNode->antecedents[0] = blockNode; syncNode->antType[0] = rf_control; - /* connect the Rod nodes to the syncNode */ + /* Connect the Rod nodes to the syncNode. */ for (i = 0; i < nRodNodes; i++) { rodNodes[i].succedents[0] = syncNode; syncNode->antecedents[1 + i] = &rodNodes[i]; syncNode->antType[1 + i] = rf_control; } - /* connect the sync node to the xor node */ + /* Connect the sync node to the xor node. */ RF_ASSERT(syncNode->numSuccedents == nWndNodes + 1); RF_ASSERT(xorNode->numAntecedents == 1); syncNode->succedents[0] = xorNode; xorNode->antecedents[0] = syncNode; - xorNode->antType[0] = rf_trueData; /* carry forward from sync */ + xorNode->antType[0] = rf_trueData; /* Carry forward from sync. */ - /* connect the sync node to the Wnd nodes */ + /* Connect the sync node to the Wnd nodes. */ for (i = 0; i < nWndNodes; i++) { RF_ASSERT(wndNodes->numAntecedents == 1); syncNode->succedents[1 + i] = &wndNodes[i]; @@ -237,14 +269,14 @@ rf_CommonCreateParityLoggingLargeWriteDAG( wndNodes[i].antType[0] = rf_control; } - /* connect the xor node to the Lpo node */ + /* Connect the xor node to the Lpo node. */ RF_ASSERT(xorNode->numSuccedents == 1); RF_ASSERT(lpoNode->numAntecedents == 1); xorNode->succedents[0] = lpoNode; lpoNode->antecedents[0] = xorNode; lpoNode->antType[0] = rf_trueData; - /* connect the Wnd nodes to the unblock node */ + /* Connect the Wnd nodes to the unblock node. */ RF_ASSERT(unblockNode->numAntecedents == nWndNodes + 1); for (i = 0; i < nWndNodes; i++) { RF_ASSERT(wndNodes->numSuccedents == 1); @@ -253,13 +285,13 @@ rf_CommonCreateParityLoggingLargeWriteDAG( unblockNode->antType[i] = rf_control; } - /* connect the Lpo node to the unblock node */ + /* Connect the Lpo node to the unblock node. */ RF_ASSERT(lpoNode->numSuccedents == 1); lpoNode->succedents[0] = unblockNode; unblockNode->antecedents[nWndNodes] = lpoNode; unblockNode->antType[nWndNodes] = rf_control; - /* connect unblock node to terminator */ + /* Connect unblock node to terminator. */ RF_ASSERT(unblockNode->numSuccedents == 1); RF_ASSERT(termNode->numAntecedents == 1); RF_ASSERT(termNode->numSuccedents == 0); @@ -269,72 +301,73 @@ rf_CommonCreateParityLoggingLargeWriteDAG( } - - -/****************************************************************************** +/***************************************************************************** * - * creates a DAG to perform a small-write operation (either raid 5 or pq), which is as follows: + * Creates a DAG to perform a small-write operation (either raid 5 or pq), + * which is as follows: * - * Header - * | - * Block - * / | ... \ \ - * / | \ \ - * Rod Rod Rod Rop - * | \ /| \ / | \/ | - * | | | /\ | - * Wnd Wnd Wnd X - * | \ / | - * | \ / | - * \ \ / Lpo - * \ \ / / - * +-> Unblock <-+ - * | - * T + * Header + * | + * Block + * / | ... \ \ + * / | \ \ + * Rod Rod Rod Rop + * | \ /| \ / | \/ | + * | | | /\ | + * Wnd Wnd Wnd X + * | \ / | + * | \ / | + * \ \ / Lpo + * \ \ / / + * +-> Unblock <-+ + * | + * T * * * R = Read, W = Write, X = Xor, o = old, n = new, d = data, p = parity. - * When the access spans a stripe unit boundary and is less than one SU in size, there will - * be two Rop -- X -- Wnp branches. I call this the "double-XOR" case. - * The second output from each Rod node goes to the X node. In the double-XOR - * case, there are exactly 2 Rod nodes, and each sends one output to one X node. + * When the access spans a stripe unit boundary and is less than one SU in + * size, there will be two Rop -- X -- Wnp branches. I call this the + * "double-XOR" case. + * The second output from each Rod node goes to the X node. In the double-XOR + * case, there are exactly 2 Rod nodes, and each sends one output to one X + * node. * There is one Rod -- Wnd -- T branch for each stripe unit being updated. * - * The block and unblock nodes are unused. See comment above CreateFaultFreeReadDAG. + * The block and unblock nodes are unused. See comment above + * CreateFaultFreeReadDAG. * - * Note: this DAG ignores all the optimizations related to making the RMWs atomic. - * it also has the nasty property that none of the buffers allocated for reading - * old data & parity can be freed until the XOR node fires. Need to fix this. + * Note: This DAG ignores all the optimizations related to making the RMWs + * atomic. + * It also has the nasty property that none of the buffers allocated + * for reading old data & parity can be freed until the XOR node fires. + * Need to fix this. * - * A null qfuncs indicates single fault tolerant + * A null qfuncs indicates single fault tolerant. *****************************************************************************/ -void -rf_CommonCreateParityLoggingSmallWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - RF_RedFuncs_t * pfuncs, - RF_RedFuncs_t * qfuncs) +void +rf_CommonCreateParityLoggingSmallWriteDAG(RF_Raid_t *raidPtr, + RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp, + RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList, + RF_RedFuncs_t *pfuncs, RF_RedFuncs_t *qfuncs) { RF_DagNode_t *xorNodes, *blockNode, *unblockNode, *nodes; RF_DagNode_t *readDataNodes, *readParityNodes; RF_DagNode_t *writeDataNodes, *lpuNodes; RF_DagNode_t *unlockDataNodes = NULL, *termNode; RF_PhysDiskAddr_t *pda = asmap->physInfo; - int numDataNodes = asmap->numStripeUnitsAccessed; - int numParityNodes = (asmap->parityInfo->next) ? 2 : 1; - int i, j, nNodes, totalNumNodes; + int numDataNodes = asmap->numStripeUnitsAccessed; + int numParityNodes = (asmap->parityInfo->next) ? 2 : 1; + int i, j, nNodes, totalNumNodes; RF_ReconUnitNum_t which_ru; - int (*func) (RF_DagNode_t * node), (*undoFunc) (RF_DagNode_t * node); - int (*qfunc) (RF_DagNode_t * node); - char *name, *qname; - RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru); - long nfaults = qfuncs ? 2 : 1; - int lu_flag = (rf_enableAtomicRMW) ? 1 : 0; /* lock/unlock flag */ + int (*func) (RF_DagNode_t * node), (*undoFunc) (RF_DagNode_t * node); + int (*qfunc) (RF_DagNode_t * node); + char*name, *qname; + RF_StripeNum_t parityStripeID = + rf_RaidAddressToParityStripeID(&(raidPtr->Layout), + asmap->raidAddress, &which_ru); + long nfaults = qfuncs ? 2 : 1; + int lu_flag = (rf_enableAtomicRMW) ? 1 : 0; /* Lock/unlock flag. */ if (rf_dagDebug) printf("[Creating parity-logging small-write DAG]\n"); @@ -342,17 +375,24 @@ rf_CommonCreateParityLoggingSmallWriteDAG( RF_ASSERT(nfaults == 1); dag_h->creator = "ParityLoggingSmallWriteDAG"; - /* DAG creation occurs in three steps: 1. count the number of nodes in - * the DAG 2. create the nodes 3. initialize the nodes 4. connect the - * nodes */ - - /* Step 1. compute number of nodes in the graph */ - - /* number of nodes: a read and write for each data unit a redundancy - * computation node for each parity node a read and Lpu for each - * parity unit a block and unblock node (2) a terminator node if - * atomic RMW an unlock node for each data unit, redundancy unit */ - totalNumNodes = (2 * numDataNodes) + numParityNodes + (2 * numParityNodes) + 3; + /* + * DAG creation occurs in three steps: + * 1. Count the number of nodes in the DAG. + * 2. Create the nodes. + * 3. Initialize the nodes. + * 4. Connect the nodes. + */ + + /* Step 1. Compute number of nodes in the graph. */ + + /* + * Number of nodes: a read and write for each data unit, a redundancy + * computation node for each parity node, a read and Lpu for each + * parity unit, a block and unblock node (2), a terminator node if + * atomic RMW, an unlock node for each data and redundancy unit. + */ + totalNumNodes = (2 * numDataNodes) + numParityNodes + + (2 * numParityNodes) + 3; if (lu_flag) totalNumNodes += numDataNodes; @@ -362,8 +402,9 @@ rf_CommonCreateParityLoggingSmallWriteDAG( dag_h->numCommits = 0; dag_h->numSuccedents = 1; - /* Step 2. create the nodes */ - RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList); + /* Step 2. Create the nodes. */ + RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), + (RF_DagNode_t *), allocList); i = 0; blockNode = &nodes[i]; i += 1; @@ -387,78 +428,103 @@ rf_CommonCreateParityLoggingSmallWriteDAG( } RF_ASSERT(i == totalNumNodes); - /* Step 3. initialize the nodes */ - /* initialize block node (Nil) */ - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", allocList); + /* Step 3. Initialize the nodes. */ + /* Initialize block node (Nil). */ + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, + "Nil", allocList); - /* initialize unblock node (Nil) */ - rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nNodes, 0, 0, dag_h, "Nil", allocList); + /* Initialize unblock node (Nil). */ + rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, 1, nNodes, 0, 0, dag_h, + "Nil", allocList); - /* initialize terminatory node (Trm) */ - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); + /* Initialize terminatory node (Trm). */ + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, + rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList); - /* initialize nodes which read old data (Rod) */ + /* Initialize nodes which read old data (Rod). */ for (i = 0; i < numDataNodes; i++) { - rf_InitNode(&readDataNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, nNodes, 1, 4, 0, dag_h, "Rod", allocList); + rf_InitNode(&readDataNodes[i], rf_wait, RF_FALSE, + rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, + nNodes, 1, 4, 0, dag_h, "Rod", allocList); RF_ASSERT(pda != NULL); - readDataNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - readDataNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old - * data */ + /* Physical disk addr desc. */ + readDataNodes[i].params[0].p = pda; + readDataNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, + pda, allocList); /* Buffer to hold old data. */ readDataNodes[i].params[2].v = parityStripeID; - readDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru); + readDataNodes[i].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, + 0, which_ru); pda = pda->next; readDataNodes[i].propList[0] = NULL; readDataNodes[i].propList[1] = NULL; } - /* initialize nodes which read old parity (Rop) */ + /* Initialize nodes which read old parity (Rop). */ pda = asmap->parityInfo; i = 0; for (i = 0; i < numParityNodes; i++) { RF_ASSERT(pda != NULL); - rf_InitNode(&readParityNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, nNodes, 1, 4, 0, dag_h, "Rop", allocList); + rf_InitNode(&readParityNodes[i], rf_wait, RF_FALSE, + rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, + nNodes, 1, 4, 0, dag_h, "Rop", allocList); readParityNodes[i].params[0].p = pda; - readParityNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old - * parity */ + readParityNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, + pda, allocList); /* Buffer to hold old parity. */ readParityNodes[i].params[2].v = parityStripeID; - readParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + readParityNodes[i].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); readParityNodes[i].propList[0] = NULL; pda = pda->next; } - /* initialize nodes which write new data (Wnd) */ + /* Initialize nodes which write new data (Wnd). */ pda = asmap->physInfo; for (i = 0; i < numDataNodes; i++) { RF_ASSERT(pda != NULL); - rf_InitNode(&writeDataNodes[i], rf_wait, RF_TRUE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, nNodes, 4, 0, dag_h, "Wnd", allocList); - writeDataNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - writeDataNodes[i].params[1].p = pda->bufPtr; /* buffer holding new - * data to be written */ + rf_InitNode(&writeDataNodes[i], rf_wait, RF_TRUE, + rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + rf_GenericWakeupFunc, 1, nNodes, 4, 0, dag_h, + "Wnd", allocList); + /* Physical disk addr desc. */ + writeDataNodes[i].params[0].p = pda; + /* Buffer holding new data to be written. */ + writeDataNodes[i].params[1].p = pda->bufPtr; writeDataNodes[i].params[2].v = parityStripeID; - writeDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + writeDataNodes[i].params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); if (lu_flag) { - /* initialize node to unlock the disk queue */ - rf_InitNode(&unlockDataNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Und", allocList); - unlockDataNodes[i].params[0].p = pda; /* physical disk addr - * desc */ - unlockDataNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru); + /* Initialize node to unlock the disk queue. */ + rf_InitNode(&unlockDataNodes[i], rf_wait, RF_FALSE, + rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, + rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, + "Und", allocList); + /* Physical disk addr desc. */ + unlockDataNodes[i].params[0].p = pda; + unlockDataNodes[i].params[1].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, + lu_flag, which_ru); } pda = pda->next; } - /* initialize nodes which compute new parity */ - /* we use the simple XOR func in the double-XOR case, and when we're - * accessing only a portion of one stripe unit. the distinction + /* Initialize nodes which compute new parity. */ + /* + * We use the simple XOR func in the double-XOR case, and when we're + * accessing only a portion of one stripe unit. The distinction * between the two is that the regular XOR func assumes that the * targbuf is a full SU in size, and examines the pda associated with * the buffer to decide where within the buffer to XOR the data, * whereas the simple XOR func just XORs the data into the start of - * the buffer. */ - if ((numParityNodes == 2) || ((numDataNodes == 1) && (asmap->totalSectorsAccessed < raidPtr->Layout.sectorsPerStripeUnit))) { + * the buffer. + */ + if ((numParityNodes == 2) || ((numDataNodes == 1) && + (asmap->totalSectorsAccessed < + raidPtr->Layout.sectorsPerStripeUnit))) { func = pfuncs->simple; undoFunc = rf_NullNodeUndoFunc; name = pfuncs->SimpleName; @@ -475,12 +541,15 @@ rf_CommonCreateParityLoggingSmallWriteDAG( qname = qfuncs->RegularName; } } - /* initialize the xor nodes: params are {pda,buf} from {Rod,Wnd,Rop} - * nodes, and raidPtr */ - if (numParityNodes == 2) { /* double-xor case */ + /* + * Initialize the xor nodes: params are {pda,buf} from {Rod,Wnd,Rop} + * nodes, and raidPtr. + */ + if (numParityNodes == 2) { /* Double-XOR case. */ for (i = 0; i < numParityNodes; i++) { - rf_InitNode(&xorNodes[i], rf_wait, RF_TRUE, func, undoFunc, NULL, 1, nNodes, 7, 1, dag_h, name, allocList); /* no wakeup func for - * xor */ + rf_InitNode(&xorNodes[i], rf_wait, RF_TRUE, func, + undoFunc, NULL, 1, nNodes, 7, 1, dag_h, name, + allocList); /* No wakeup func for XOR. */ xorNodes[i].flags |= RF_DAGNODE_FLAG_YIELD; xorNodes[i].params[0] = readDataNodes[i].params[0]; xorNodes[i].params[1] = readDataNodes[i].params[1]; @@ -489,48 +558,57 @@ rf_CommonCreateParityLoggingSmallWriteDAG( xorNodes[i].params[4] = writeDataNodes[i].params[0]; xorNodes[i].params[5] = writeDataNodes[i].params[1]; xorNodes[i].params[6].p = raidPtr; - xorNodes[i].results[0] = readParityNodes[i].params[1].p; /* use old parity buf as - * target buf */ + /* Use old parity buf as target buf. */ + xorNodes[i].results[0] = readParityNodes[i].params[1].p; } } else { - /* there is only one xor node in this case */ - rf_InitNode(&xorNodes[0], rf_wait, RF_TRUE, func, undoFunc, NULL, 1, nNodes, (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, name, allocList); + /* There is only one xor node in this case. */ + rf_InitNode(&xorNodes[0], rf_wait, RF_TRUE, func, undoFunc, + NULL, 1, nNodes, + (2 * (numDataNodes + numDataNodes + 1) + 1), 1, + dag_h, name, allocList); xorNodes[0].flags |= RF_DAGNODE_FLAG_YIELD; for (i = 0; i < numDataNodes + 1; i++) { - /* set up params related to Rod and Rop nodes */ - xorNodes[0].params[2 * i + 0] = readDataNodes[i].params[0]; /* pda */ - xorNodes[0].params[2 * i + 1] = readDataNodes[i].params[1]; /* buffer pointer */ + /* Set up params related to Rod and Rop nodes. */ + xorNodes[0].params[2 * i + 0] = + readDataNodes[i].params[0]; /* pda */ + xorNodes[0].params[2 * i + 1] = + readDataNodes[i].params[1]; /* Buffer pointer */ } for (i = 0; i < numDataNodes; i++) { - /* set up params related to Wnd and Wnp nodes */ - xorNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = writeDataNodes[i].params[0]; /* pda */ - xorNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = writeDataNodes[i].params[1]; /* buffer pointer */ + /* Set up params related to Wnd and Wnp nodes. */ + xorNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = + writeDataNodes[i].params[0]; /* pda */ + xorNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = + writeDataNodes[i].params[1]; /* Buffer pointer */ } - xorNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr; /* xor node needs to get - * at RAID information */ + xorNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = + raidPtr; /* Xor node needs to get at RAID information. */ xorNodes[0].results[0] = readParityNodes[0].params[1].p; } - /* initialize the log node(s) */ + /* Initialize the log node(s). */ pda = asmap->parityInfo; for (i = 0; i < numParityNodes; i++) { RF_ASSERT(pda); - rf_InitNode(&lpuNodes[i], rf_wait, RF_FALSE, rf_ParityLogUpdateFunc, rf_ParityLogUpdateUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Lpu", allocList); - lpuNodes[i].params[0].p = pda; /* PhysDiskAddr of parity */ - lpuNodes[i].params[1].p = xorNodes[i].results[0]; /* buffer pointer to - * parity */ + rf_InitNode(&lpuNodes[i], rf_wait, RF_FALSE, + rf_ParityLogUpdateFunc, rf_ParityLogUpdateUndoFunc, + rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Lpu", allocList); + lpuNodes[i].params[0].p = pda; /* PhysDiskAddr of parity. */ + /* Buffer pointer to parity. */ + lpuNodes[i].params[1].p = xorNodes[i].results[0]; pda = pda->next; } - /* Step 4. connect the nodes */ + /* Step 4. Connect the nodes. */ - /* connect header to block node */ + /* Connect header to block node. */ RF_ASSERT(dag_h->numSuccedents == 1); RF_ASSERT(blockNode->numAntecedents == 0); dag_h->succedents[0] = blockNode; - /* connect block node to read old data nodes */ + /* Connect block node to read old data nodes. */ RF_ASSERT(blockNode->numSuccedents == (numDataNodes + numParityNodes)); for (i = 0; i < numDataNodes; i++) { blockNode->succedents[i] = &readDataNodes[i]; @@ -539,7 +617,7 @@ rf_CommonCreateParityLoggingSmallWriteDAG( readDataNodes[i].antType[0] = rf_control; } - /* connect block node to read old parity nodes */ + /* Connect block node to read old parity nodes. */ for (i = 0; i < numParityNodes; i++) { blockNode->succedents[numDataNodes + i] = &readParityNodes[i]; RF_ASSERT(readParityNodes[i].numAntecedents == 1); @@ -547,11 +625,13 @@ rf_CommonCreateParityLoggingSmallWriteDAG( readParityNodes[i].antType[0] = rf_control; } - /* connect read old data nodes to write new data nodes */ + /* Connect read old data nodes to write new data nodes. */ for (i = 0; i < numDataNodes; i++) { - RF_ASSERT(readDataNodes[i].numSuccedents == numDataNodes + numParityNodes); + RF_ASSERT(readDataNodes[i].numSuccedents == + numDataNodes + numParityNodes); for (j = 0; j < numDataNodes; j++) { - RF_ASSERT(writeDataNodes[j].numAntecedents == numDataNodes + numParityNodes); + RF_ASSERT(writeDataNodes[j].numAntecedents == + numDataNodes + numParityNodes); readDataNodes[i].succedents[j] = &writeDataNodes[j]; writeDataNodes[j].antecedents[i] = &readDataNodes[i]; if (i == j) @@ -561,34 +641,41 @@ rf_CommonCreateParityLoggingSmallWriteDAG( } } - /* connect read old data nodes to xor nodes */ + /* Connect read old data nodes to xor nodes. */ for (i = 0; i < numDataNodes; i++) for (j = 0; j < numParityNodes; j++) { - RF_ASSERT(xorNodes[j].numAntecedents == numDataNodes + numParityNodes); - readDataNodes[i].succedents[numDataNodes + j] = &xorNodes[j]; + RF_ASSERT(xorNodes[j].numAntecedents == + numDataNodes + numParityNodes); + readDataNodes[i].succedents[numDataNodes + j] = + &xorNodes[j]; xorNodes[j].antecedents[i] = &readDataNodes[i]; xorNodes[j].antType[i] = rf_trueData; } - /* connect read old parity nodes to write new data nodes */ + /* Connect read old parity nodes to write new data nodes. */ for (i = 0; i < numParityNodes; i++) { - RF_ASSERT(readParityNodes[i].numSuccedents == numDataNodes + numParityNodes); + RF_ASSERT(readParityNodes[i].numSuccedents == + numDataNodes + numParityNodes); for (j = 0; j < numDataNodes; j++) { readParityNodes[i].succedents[j] = &writeDataNodes[j]; - writeDataNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i]; - writeDataNodes[j].antType[numDataNodes + i] = rf_control; + writeDataNodes[j].antecedents[numDataNodes + i] = + &readParityNodes[i]; + writeDataNodes[j].antType[numDataNodes + i] = + rf_control; } } - /* connect read old parity nodes to xor nodes */ + /* Connect read old parity nodes to xor nodes. */ for (i = 0; i < numParityNodes; i++) for (j = 0; j < numParityNodes; j++) { - readParityNodes[i].succedents[numDataNodes + j] = &xorNodes[j]; - xorNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i]; + readParityNodes[i].succedents[numDataNodes + j] = + &xorNodes[j]; + xorNodes[j].antecedents[numDataNodes + i] = + &readParityNodes[i]; xorNodes[j].antType[numDataNodes + i] = rf_trueData; } - /* connect xor nodes to write new parity nodes */ + /* Connect xor nodes to write new parity nodes. */ for (i = 0; i < numParityNodes; i++) { RF_ASSERT(xorNodes[i].numSuccedents == 1); RF_ASSERT(lpuNodes[i].numAntecedents == 1); @@ -599,30 +686,32 @@ rf_CommonCreateParityLoggingSmallWriteDAG( for (i = 0; i < numDataNodes; i++) { if (lu_flag) { - /* connect write new data nodes to unlock nodes */ + /* Connect write new data nodes to unlock nodes. */ RF_ASSERT(writeDataNodes[i].numSuccedents == 1); RF_ASSERT(unlockDataNodes[i].numAntecedents == 1); writeDataNodes[i].succedents[0] = &unlockDataNodes[i]; unlockDataNodes[i].antecedents[0] = &writeDataNodes[i]; unlockDataNodes[i].antType[0] = rf_control; - /* connect unlock nodes to unblock node */ + /* Connect unlock nodes to unblock node. */ RF_ASSERT(unlockDataNodes[i].numSuccedents == 1); - RF_ASSERT(unblockNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); + RF_ASSERT(unblockNode->numAntecedents == + (numDataNodes + (nfaults * numParityNodes))); unlockDataNodes[i].succedents[0] = unblockNode; unblockNode->antecedents[i] = &unlockDataNodes[i]; unblockNode->antType[i] = rf_control; } else { - /* connect write new data nodes to unblock node */ + /* Connect write new data nodes to unblock node. */ RF_ASSERT(writeDataNodes[i].numSuccedents == 1); - RF_ASSERT(unblockNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes))); + RF_ASSERT(unblockNode->numAntecedents == + (numDataNodes + (nfaults * numParityNodes))); writeDataNodes[i].succedents[0] = unblockNode; unblockNode->antecedents[i] = &writeDataNodes[i]; unblockNode->antType[i] = rf_control; } } - /* connect write new parity nodes to unblock node */ + /* Connect write new parity nodes to unblock node. */ for (i = 0; i < numParityNodes; i++) { RF_ASSERT(lpuNodes[i].numSuccedents == 1); lpuNodes[i].succedents[0] = unblockNode; @@ -630,7 +719,7 @@ rf_CommonCreateParityLoggingSmallWriteDAG( unblockNode->antType[numDataNodes + i] = rf_control; } - /* connect unblock node to terminator */ + /* Connect unblock node to terminator. */ RF_ASSERT(unblockNode->numSuccedents == 1); RF_ASSERT(termNode->numAntecedents == 1); RF_ASSERT(termNode->numSuccedents == 0); @@ -640,34 +729,26 @@ rf_CommonCreateParityLoggingSmallWriteDAG( } -void -rf_CreateParityLoggingSmallWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - RF_RedFuncs_t * pfuncs, - RF_RedFuncs_t * qfuncs) +void +rf_CreateParityLoggingSmallWriteDAG(RF_Raid_t *raidPtr, + RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp, + RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList, + RF_RedFuncs_t *pfuncs, RF_RedFuncs_t *qfuncs) { dag_h->creator = "ParityLoggingSmallWriteDAG"; - rf_CommonCreateParityLoggingSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_xorFuncs, NULL); + rf_CommonCreateParityLoggingSmallWriteDAG(raidPtr, asmap, dag_h, bp, + flags, allocList, &rf_xorFuncs, NULL); } -void -rf_CreateParityLoggingLargeWriteDAG( - RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, - RF_DagHeader_t * dag_h, - void *bp, - RF_RaidAccessFlags_t flags, - RF_AllocListElem_t * allocList, - int nfaults, +void +rf_CreateParityLoggingLargeWriteDAG(RF_Raid_t *raidPtr, + RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp, + RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList, int nfaults, int (*redFunc) (RF_DagNode_t *)) { dag_h->creator = "ParityLoggingSmallWriteDAG"; - rf_CommonCreateParityLoggingLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularXorFunc); + rf_CommonCreateParityLoggingLargeWriteDAG(raidPtr, asmap, dag_h, bp, + flags, allocList, 1, rf_RegularXorFunc); } -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ +#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ diff --git a/sys/dev/raidframe/rf_parityloggingdags.h b/sys/dev/raidframe/rf_parityloggingdags.h index 91ee70a9487..8067e9d95ad 100644 --- a/sys/dev/raidframe/rf_parityloggingdags.h +++ b/sys/dev/raidframe/rf_parityloggingdags.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_parityloggingdags.h,v 1.2 1999/02/16 00:03:08 niklas Exp $ */ +/* $OpenBSD: rf_parityloggingdags.h,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_parityloggingdags.h,v 1.3 1999/02/05 00:06:14 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,31 +30,29 @@ /**************************************************************************** * * - * rf_parityloggingdags.h -- header file for parity logging dags * + * rf_parityloggingdags.h -- Header file for parity logging dags. * * * ****************************************************************************/ -#ifndef _RF__RF_PARITYLOGGINGDAGS_H_ -#define _RF__RF_PARITYLOGGINGDAGS_H_ +#ifndef _RF__RF_PARITYLOGGINGDAGS_H_ +#define _RF__RF_PARITYLOGGINGDAGS_H_ + +/* Routines that create DAGs. */ -/* routines that create DAGs */ -void -rf_CommonCreateParityLoggingLargeWriteDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, - void *bp, RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, - int nfaults, int (*redFunc) (RF_DagNode_t *)); - void rf_CommonCreateParityLoggingSmallWriteDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, - void *bp, RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, - RF_RedFuncs_t * pfuncs, RF_RedFuncs_t * qfuncs); +void rf_CommonCreateParityLoggingLargeWriteDAG(RF_Raid_t *, + RF_AccessStripeMap_t *, RF_DagHeader_t *, void *, + RF_RaidAccessFlags_t, RF_AllocListElem_t *, int, + int (*) (RF_DagNode_t *)); +void rf_CommonCreateParityLoggingSmallWriteDAG(RF_Raid_t *, + RF_AccessStripeMap_t *, RF_DagHeader_t *, void *, + RF_RaidAccessFlags_t, RF_AllocListElem_t *, + RF_RedFuncs_t *, RF_RedFuncs_t *); - void rf_CreateParityLoggingLargeWriteDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, - void *bp, RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, - int nfaults, int (*redFunc) (RF_DagNode_t *)); - void rf_CreateParityLoggingSmallWriteDAG(RF_Raid_t * raidPtr, - RF_AccessStripeMap_t * asmap, RF_DagHeader_t * dag_h, - void *bp, RF_RaidAccessFlags_t flags, RF_AllocListElem_t * allocList, - RF_RedFuncs_t * pfuncs, RF_RedFuncs_t * qfuncs); +void rf_CreateParityLoggingLargeWriteDAG(RF_Raid_t *, RF_AccessStripeMap_t *, + RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *, + int, int (*) (RF_DagNode_t *)); +void rf_CreateParityLoggingSmallWriteDAG(RF_Raid_t *, RF_AccessStripeMap_t *, + RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *, + RF_RedFuncs_t *, RF_RedFuncs_t *); -#endif /* !_RF__RF_PARITYLOGGINGDAGS_H_ */ +#endif /* !_RF__RF_PARITYLOGGINGDAGS_H_ */ diff --git a/sys/dev/raidframe/rf_parityscan.c b/sys/dev/raidframe/rf_parityscan.c index 3a46b610d53..860388c33d8 100644 --- a/sys/dev/raidframe/rf_parityscan.c +++ b/sys/dev/raidframe/rf_parityscan.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_parityscan.c,v 1.6 2000/08/08 16:07:44 peter Exp $ */ +/* $OpenBSD: rf_parityscan.c,v 1.7 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_parityscan.c,v 1.9 2000/05/28 03:00:31 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,7 +30,7 @@ /***************************************************************************** * - * rf_parityscan.c -- misc utilities related to parity verification + * rf_parityscan.c -- Misc utilities related to parity verification. * *****************************************************************************/ @@ -44,22 +45,21 @@ #include "rf_parityscan.h" #include "rf_map.h" -/***************************************************************************************** + +/***************************************************************************** * - * walk through the entire arry and write new parity. + * Walk through the entire arry and write new parity. * This works by creating two DAGs, one to read a stripe of data and one to - * write new parity. The first is executed, the data is xored together, and - * then the second is executed. To avoid constantly building and tearing down + * write new parity. The first is executed, the data is xored together, and + * then the second is executed. To avoid constantly building and tearing down * the DAGs, we create them a priori and fill them in with the mapping * information as we go along. * - * there should never be more than one thread running this. + * There should never be more than one thread running this. * - ****************************************************************************************/ - -int -rf_RewriteParity(raidPtr) - RF_Raid_t *raidPtr; + *****************************************************************************/ +int +rf_RewriteParity(RF_Raid_t *raidPtr) { RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; RF_AccessStripeMapHeader_t *asm_h; @@ -74,9 +74,9 @@ rf_RewriteParity(raidPtr) } if (raidPtr->status[0] != rf_rs_optimal) { /* - * We're in degraded mode. Don't try to verify parity now! - * XXX: this should be a "we don't want to", not a - * "we can't" error. + * We're in degraded mode. Don't try to verify parity now ! + * XXX: This should be a "we don't want to", not a + * "we can't" error. */ return (RF_PARITY_COULD_NOT_VERIFY); } @@ -87,18 +87,18 @@ rf_RewriteParity(raidPtr) pda.numSector = raidPtr->Layout.sectorsPerStripeUnit; rc = RF_PARITY_OKAY; - for (i = 0; i < raidPtr->totalSectors && - rc <= RF_PARITY_CORRECTED; + for (i = 0; i < raidPtr->totalSectors && rc <= RF_PARITY_CORRECTED; i += layoutPtr->dataSectorsPerStripe) { if (raidPtr->waitShutdown) { - /* Someone is pulling the plug on this set... - abort the re-write */ + /* + * Someone is pulling the plug on this set... + * Abort the re-write. + */ return (1); } - asm_h = rf_MapAccess(raidPtr, i, - layoutPtr->dataSectorsPerStripe, - NULL, RF_DONT_REMAP); - raidPtr->parity_rewrite_stripes_done = + asm_h = rf_MapAccess(raidPtr, i, + layoutPtr->dataSectorsPerStripe, NULL, RF_DONT_REMAP); + raidPtr->parity_rewrite_stripes_done = i / layoutPtr->dataSectorsPerStripe ; rc = rf_VerifyParity(raidPtr, asm_h->stripeMap, 1, 0); switch (rc) { @@ -106,65 +106,66 @@ rf_RewriteParity(raidPtr) case RF_PARITY_CORRECTED: break; case RF_PARITY_BAD: - printf("Parity bad during correction\n"); + printf("Parity bad during correction.\n"); ret_val = 1; break; case RF_PARITY_COULD_NOT_CORRECT: - printf("Could not correct bad parity\n"); + printf("Could not correct bad parity.\n"); ret_val = 1; break; case RF_PARITY_COULD_NOT_VERIFY: - printf("Could not verify parity\n"); + printf("Could not verify parity.\n"); ret_val = 1; break; default: - printf("Bad rc=%d from VerifyParity in RewriteParity\n", rc); + printf("Bad rc=%d from VerifyParity in" + " RewriteParity.\n", rc); ret_val = 1; } rf_FreeAccessStripeMap(asm_h); } return (ret_val); } -/***************************************************************************************** + + +/***************************************************************************** * - * verify that the parity in a particular stripe is correct. - * we validate only the range of parity defined by parityPDA, since - * this is all we have locked. The way we do this is to create an asm + * Verify that the parity in a particular stripe is correct. + * We validate only the range of parity defined by parityPDA, since + * this is all we have locked. The way we do this is to create an asm * that maps the whole stripe and then range-restrict it to the parity * region defined by the parityPDA. * - ****************************************************************************************/ -int -rf_VerifyParity(raidPtr, aasm, correct_it, flags) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *aasm; - int correct_it; - RF_RaidAccessFlags_t flags; + *****************************************************************************/ +int +rf_VerifyParity(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *aasm, int correct_it, + RF_RaidAccessFlags_t flags) { RF_PhysDiskAddr_t *parityPDA; RF_AccessStripeMap_t *doasm; RF_LayoutSW_t *lp; - int lrc, rc; + int lrc, rc; lp = raidPtr->Layout.map; if (lp->faultsTolerated == 0) { /* - * There isn't any parity. Call it "okay." - */ + * There isn't any parity. Call it "okay." + */ return (RF_PARITY_OKAY); } rc = RF_PARITY_OKAY; if (lp->VerifyParity) { for (doasm = aasm; doasm; doasm = doasm->next) { - for (parityPDA = doasm->parityInfo; parityPDA; + for (parityPDA = doasm->parityInfo; parityPDA; parityPDA = parityPDA->next) { - lrc = lp->VerifyParity(raidPtr, - doasm->raidAddress, - parityPDA, - correct_it, flags); + lrc = lp->VerifyParity(raidPtr, + doasm->raidAddress, parityPDA, correct_it, + flags); if (lrc > rc) { - /* see rf_parityscan.h for why this - * works */ + /* + * see rf_parityscan.h for why this + * works. + */ rc = lrc; } } @@ -175,33 +176,28 @@ rf_VerifyParity(raidPtr, aasm, correct_it, flags) return (rc); } -int -rf_VerifyParityBasic(raidPtr, raidAddr, parityPDA, correct_it, flags) - RF_Raid_t *raidPtr; - RF_RaidAddr_t raidAddr; - RF_PhysDiskAddr_t *parityPDA; - int correct_it; - RF_RaidAccessFlags_t flags; +int +rf_VerifyParityBasic(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr, + RF_PhysDiskAddr_t *parityPDA, int correct_it, RF_RaidAccessFlags_t flags) { RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); RF_RaidAddr_t startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, - raidAddr); + raidAddr); RF_SectorCount_t numsector = parityPDA->numSector; - int numbytes = rf_RaidAddressToByte(raidPtr, numsector); - int bytesPerStripe = numbytes * layoutPtr->numDataCol; - RF_DagHeader_t *rd_dag_h, *wr_dag_h; /* read, write dag */ + int numbytes = rf_RaidAddressToByte(raidPtr, numsector); + int bytesPerStripe = numbytes * layoutPtr->numDataCol; + RF_DagHeader_t *rd_dag_h, *wr_dag_h; /* Read, write dag. */ RF_DagNode_t *blockNode, *unblockNode, *wrBlock, *wrUnblock; RF_AccessStripeMapHeader_t *asm_h; RF_AccessStripeMap_t *asmap; RF_AllocListElem_t *alloclist; RF_PhysDiskAddr_t *pda; - char *pbuf, *buf, *end_p, *p; - int i, retcode; + char *pbuf, *buf, *end_p, *p; + int i, retcode; RF_ReconUnitNum_t which_ru; RF_StripeNum_t psID = rf_RaidAddressToParityStripeID(layoutPtr, - raidAddr, - &which_ru); - int stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; + raidAddr, &which_ru); + int stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; RF_AccTraceEntry_t tracerec; RF_MCPair_t *mcpair; @@ -209,30 +205,37 @@ rf_VerifyParityBasic(raidPtr, raidAddr, parityPDA, correct_it, flags) mcpair = rf_AllocMCPair(); rf_MakeAllocList(alloclist); - RF_MallocAndAdd(buf, numbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol), (char *), alloclist); - RF_CallocAndAdd(pbuf, 1, numbytes, (char *), alloclist); /* use calloc to make - * sure buffer is zeroed */ + RF_MallocAndAdd(buf, numbytes * (layoutPtr->numDataCol + + layoutPtr->numParityCol), (char *), alloclist); + /* Use calloc to make sure buffer is zeroed. */ + RF_CallocAndAdd(pbuf, 1, numbytes, (char *), alloclist); end_p = buf + bytesPerStripe; - rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, numbytes, buf, rf_DiskReadFunc, rf_DiskReadUndoFunc, + rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, numbytes, buf, + rf_DiskReadFunc, rf_DiskReadUndoFunc, "Rod", alloclist, flags, RF_IO_NORMAL_PRIORITY); blockNode = rd_dag_h->succedents[0]; unblockNode = blockNode->succedents[0]->succedents[0]; - /* map the stripe and fill in the PDAs in the dag */ - asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, buf, RF_DONT_REMAP); + /* Map the stripe and fill in the PDAs in the dag. */ + asm_h = rf_MapAccess(raidPtr, startAddr, + layoutPtr->dataSectorsPerStripe, buf, RF_DONT_REMAP); asmap = asm_h->stripeMap; - for (pda = asmap->physInfo, i = 0; i < layoutPtr->numDataCol; i++, pda = pda->next) { + for (pda = asmap->physInfo, i = 0; i < layoutPtr->numDataCol; + i++, pda = pda->next) { RF_ASSERT(pda); rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); RF_ASSERT(pda->numSector != 0); if (rf_TryToRedirectPDA(raidPtr, pda, 0)) - goto out; /* no way to verify parity if disk is - * dead. return w/ good status */ + goto out; /* + * No way to verify parity if disk is + * dead. Return w/ good status. + */ blockNode->succedents[i]->params[0].p = pda; blockNode->succedents[i]->params[2].v = psID; - blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + blockNode->succedents[i]->params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); } RF_ASSERT(!asmap->parityInfo->next); @@ -240,9 +243,10 @@ rf_VerifyParityBasic(raidPtr, raidAddr, parityPDA, correct_it, flags) RF_ASSERT(asmap->parityInfo->numSector != 0); if (rf_TryToRedirectPDA(raidPtr, asmap->parityInfo, 1)) goto out; - blockNode->succedents[layoutPtr->numDataCol]->params[0].p = asmap->parityInfo; + blockNode->succedents[layoutPtr->numDataCol]->params[0].p = + asmap->parityInfo; - /* fire off the DAG */ + /* Fire off the DAG. */ bzero((char *) &tracerec, sizeof(tracerec)); rd_dag_h->tracerec = &tracerec; @@ -258,7 +262,8 @@ rf_VerifyParityBasic(raidPtr, raidAddr, parityPDA, correct_it, flags) RF_WAIT_COND(mcpair->cond, mcpair->mutex); RF_UNLOCK_MUTEX(mcpair->mutex); if (rd_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to verify parity: can't read the stripe\n"); + RF_ERRORMSG("Unable to verify parity: can't read the" + " stripe.\n"); retcode = RF_PARITY_COULD_NOT_VERIFY; goto out; } @@ -268,26 +273,31 @@ rf_VerifyParityBasic(raidPtr, raidAddr, parityPDA, correct_it, flags) for (i = 0; i < numbytes; i++) { #if 0 if (pbuf[i] != 0 || buf[bytesPerStripe + i] != 0) { - printf("Bytes: %d %d %d\n", i, pbuf[i], buf[bytesPerStripe + i]); + printf("Bytes: %d %d %d\n", i, pbuf[i], + buf[bytesPerStripe + i]); } #endif if (pbuf[i] != buf[bytesPerStripe + i]) { if (!correct_it) - RF_ERRORMSG3("Parity verify error: byte %d of parity is 0x%x should be 0x%x\n", - i, (u_char) buf[bytesPerStripe + i], (u_char) pbuf[i]); + RF_ERRORMSG3("Parity verify error: byte %d of" + " parity is 0x%x should be 0x%x.\n", i, + (u_char) buf[bytesPerStripe + i], + (u_char) pbuf[i]); retcode = RF_PARITY_BAD; break; } } if (retcode && correct_it) { - wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, pbuf, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, pbuf, + rf_DiskWriteFunc, rf_DiskWriteUndoFunc, "Wnp", alloclist, flags, RF_IO_NORMAL_PRIORITY); wrBlock = wr_dag_h->succedents[0]; wrUnblock = wrBlock->succedents[0]->succedents[0]; wrBlock->succedents[0]->params[0].p = asmap->parityInfo; wrBlock->succedents[0]->params[2].v = psID; - wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + wrBlock->succedents[0]->params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); bzero((char *) &tracerec, sizeof(tracerec)); wr_dag_h->tracerec = &tracerec; if (rf_verifyParityDebug) { @@ -296,13 +306,14 @@ rf_VerifyParityBasic(raidPtr, raidAddr, parityPDA, correct_it, flags) } RF_LOCK_MUTEX(mcpair->mutex); mcpair->flag = 0; - rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, - (void *) mcpair); + rf_DispatchDAG(wr_dag_h, (void (*) (void *)) + rf_MCPairWakeupFunc, (void *) mcpair); while (!mcpair->flag) RF_WAIT_COND(mcpair->cond, mcpair->mutex); RF_UNLOCK_MUTEX(mcpair->mutex); if (wr_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to correct parity in VerifyParity: can't write the stripe\n"); + RF_ERRORMSG("Unable to correct parity in VerifyParity:" + " can't write the stripe.\n"); retcode = RF_PARITY_COULD_NOT_CORRECT; } rf_FreeDAG(wr_dag_h); @@ -317,31 +328,45 @@ out: return (retcode); } -int -rf_TryToRedirectPDA(raidPtr, pda, parity) - RF_Raid_t *raidPtr; - RF_PhysDiskAddr_t *pda; - int parity; +int +rf_TryToRedirectPDA(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda, int parity) { if (raidPtr->Disks[pda->row][pda->col].status == rf_ds_reconstructing) { - if (rf_CheckRUReconstructed(raidPtr->reconControl[pda->row]->reconMap, pda->startSector)) { + if (rf_CheckRUReconstructed(raidPtr->reconControl[pda->row] + ->reconMap, pda->startSector)) { if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { RF_RowCol_t or = pda->row, oc = pda->col; RF_SectorNum_t os = pda->startSector; if (parity) { - (raidPtr->Layout.map->MapParity) (raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP); + (raidPtr->Layout.map->MapParity) + (raidPtr, pda->raidAddress, + &pda->row, &pda->col, + &pda->startSector, RF_REMAP); if (rf_verifyParityDebug) - printf("VerifyParity: Redir P r %d c %d sect %ld -> r %d c %d sect %ld\n", - or, oc, (long) os, pda->row, pda->col, (long) pda->startSector); + printf("VerifyParity: Redir P" + " r %d c %d sect %ld ->" + " r %d c %d sect %ld.\n", + or, oc, (long) os, + pda->row, pda->col, + (long) pda->startSector); } else { - (raidPtr->Layout.map->MapSector) (raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP); + (raidPtr->Layout.map->MapSector) + (raidPtr, pda->raidAddress, + &pda->row, &pda->col, + &pda->startSector, RF_REMAP); if (rf_verifyParityDebug) - printf("VerifyParity: Redir D r %d c %d sect %ld -> r %d c %d sect %ld\n", - or, oc, (long) os, pda->row, pda->col, (long) pda->startSector); + printf("VerifyParity: Redir D" + " r %d c %d sect %ld ->" + " r %d c %d sect %ld.\n", + or, oc, (long) os, + pda->row, pda->col, + (long) pda->startSector); } } else { - RF_RowCol_t spRow = raidPtr->Disks[pda->row][pda->col].spareRow; - RF_RowCol_t spCol = raidPtr->Disks[pda->row][pda->col].spareCol; + RF_RowCol_t spRow = + raidPtr->Disks[pda->row][pda->col].spareRow; + RF_RowCol_t spCol = + raidPtr->Disks[pda->row][pda->col].spareCol; pda->row = spRow; pda->col = spCol; } @@ -351,90 +376,99 @@ rf_TryToRedirectPDA(raidPtr, pda, parity) return (1); return (0); } -/***************************************************************************************** + + +/***************************************************************************** * - * currently a stub. + * Currently a stub. * - * takes as input an ASM describing a write operation and containing one failure, and - * verifies that the parity was correctly updated to reflect the write. + * Takes as input an ASM describing a write operation and containing one + * failure, and verifies that the parity was correctly updated to reflect the + * write. * - * if it's a data unit that's failed, we read the other data units in the stripe and - * the parity unit, XOR them together, and verify that we get the data intended for - * the failed disk. Since it's easy, we also validate that the right data got written - * to the surviving data disks. + * If it's a data unit that has failed, we read the other data units in the + * stripe and the parity unit, XOR them together, and verify that we get the + * data intended for the failed disk. Since it's easy, we also validate that + * the right data got written to the surviving data disks. * - * If it's the parity that failed, there's really no validation we can do except the - * above verification that the right data got written to all disks. This is because - * the new data intended for the failed disk is supplied in the ASM, but this is of - * course not the case for the new parity. + * If it's the parity that failed, there's really no validation we can do + * except the above verification that the right data got written to all disks. + * This is because the new data intended for the failed disk is supplied in + * the ASM, but this is of course not the case for the new parity. * - ****************************************************************************************/ -int -rf_VerifyDegrModeWrite(raidPtr, asmh) - RF_Raid_t *raidPtr; - RF_AccessStripeMapHeader_t *asmh; + *****************************************************************************/ +int +rf_VerifyDegrModeWrite(RF_Raid_t *raidPtr, RF_AccessStripeMapHeader_t *asmh) { return (0); } -/* creates a simple DAG with a header, a block-recon node at level 1, + + +/* + * Creates a simple DAG with a header, a block-recon node at level 1, * nNodes nodes at level 2, an unblock-recon node at level 3, and - * a terminator node at level 4. The stripe address field in + * a terminator node at level 4. The stripe address field in * the block and unblock nodes are not touched, nor are the pda * fields in the second-level nodes, so they must be filled in later. * - * commit point is established at unblock node - this means that any - * failure during dag execution causes the dag to fail + * Commit point is established at unblock node - this means that any + * failure during dag execution causes the dag to fail. */ RF_DagHeader_t * -rf_MakeSimpleDAG(raidPtr, nNodes, bytesPerSU, databuf, doFunc, undoFunc, name, alloclist, flags, priority) - RF_Raid_t *raidPtr; - int nNodes; - int bytesPerSU; - char *databuf; - int (*doFunc) (RF_DagNode_t * node); - int (*undoFunc) (RF_DagNode_t * node); - char *name; /* node names at the second level */ - RF_AllocListElem_t *alloclist; - RF_RaidAccessFlags_t flags; - int priority; +rf_MakeSimpleDAG(RF_Raid_t *raidPtr, int nNodes, int bytesPerSU, char *databuf, + int (*doFunc) (RF_DagNode_t * node), int (*undoFunc) (RF_DagNode_t * node), + char *name /* Node names at the second level. */, + RF_AllocListElem_t *alloclist, RF_RaidAccessFlags_t flags, int priority) { RF_DagHeader_t *dag_h; RF_DagNode_t *nodes, *termNode, *blockNode, *unblockNode; - int i; + int i; - /* create the nodes, the block & unblock nodes, and the terminator - * node */ - RF_CallocAndAdd(nodes, nNodes + 3, sizeof(RF_DagNode_t), (RF_DagNode_t *), alloclist); + /* + * Create the nodes, the block & unblock nodes, and the terminator + * node. + */ + RF_CallocAndAdd(nodes, nNodes + 3, sizeof(RF_DagNode_t), + (RF_DagNode_t *), alloclist); blockNode = &nodes[nNodes]; unblockNode = blockNode + 1; termNode = unblockNode + 1; dag_h = rf_AllocDAGHeader(); dag_h->raidPtr = (void *) raidPtr; - dag_h->allocList = NULL;/* we won't use this alloc list */ + dag_h->allocList = NULL; /* We won't use this alloc list. */ dag_h->status = rf_enable; dag_h->numSuccedents = 1; dag_h->creator = "SimpleDAG"; - /* this dag can not commit until the unblock node is reached errors - * prior to the commit point imply the dag has failed */ + /* + * This dag can not commit until the unblock node is reached. + * Errors prior to the commit point imply the dag has failed. + */ dag_h->numCommitNodes = 1; dag_h->numCommits = 0; dag_h->succedents[0] = blockNode; - rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", alloclist); - rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nNodes, 0, 0, dag_h, "Nil", alloclist); + rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, + "Nil", alloclist); + rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, + rf_NullNodeUndoFunc, NULL, 1, nNodes, 0, 0, dag_h, + "Nil", alloclist); unblockNode->succedents[0] = termNode; for (i = 0; i < nNodes; i++) { - blockNode->succedents[i] = unblockNode->antecedents[i] = &nodes[i]; + blockNode->succedents[i] = unblockNode->antecedents[i] + = &nodes[i]; unblockNode->antType[i] = rf_control; - rf_InitNode(&nodes[i], rf_wait, RF_FALSE, doFunc, undoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, alloclist); + rf_InitNode(&nodes[i], rf_wait, RF_FALSE, doFunc, undoFunc, + rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, alloclist); nodes[i].succedents[0] = unblockNode; nodes[i].antecedents[0] = blockNode; nodes[i].antType[0] = rf_control; nodes[i].params[1].p = (databuf + (i * bytesPerSU)); } - rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", alloclist); + rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, + rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", alloclist); termNode->antecedents[0] = unblockNode; termNode->antType[0] = rf_control; return (dag_h); diff --git a/sys/dev/raidframe/rf_parityscan.h b/sys/dev/raidframe/rf_parityscan.h index 66324207abc..e43b001db15 100644 --- a/sys/dev/raidframe/rf_parityscan.h +++ b/sys/dev/raidframe/rf_parityscan.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_parityscan.h,v 1.2 1999/02/16 00:03:09 niklas Exp $ */ +/* $OpenBSD: rf_parityscan.h,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_parityscan.h,v 1.3 1999/02/05 00:06:14 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,41 +28,35 @@ * rights to redistribute these changes. */ -#ifndef _RF__RF_PARITYSCAN_H_ -#define _RF__RF_PARITYSCAN_H_ +#ifndef _RF__RF_PARITYSCAN_H_ +#define _RF__RF_PARITYSCAN_H_ #include "rf_types.h" #include "rf_alloclist.h" -int rf_RewriteParity(RF_Raid_t * raidPtr); -int -rf_VerifyParityBasic(RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t * parityPDA, int correct_it, RF_RaidAccessFlags_t flags); -int -rf_VerifyParity(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * stripeMap, - int correct_it, RF_RaidAccessFlags_t flags); -int rf_TryToRedirectPDA(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * pda, int parity); -int rf_VerifyDegrModeWrite(RF_Raid_t * raidPtr, RF_AccessStripeMapHeader_t * asmh); -RF_DagHeader_t * -rf_MakeSimpleDAG(RF_Raid_t * raidPtr, int nNodes, - int bytesPerSU, char *databuf, - int (*doFunc) (RF_DagNode_t *), - int (*undoFunc) (RF_DagNode_t *), - char *name, RF_AllocListElem_t * alloclist, - RF_RaidAccessFlags_t flags, int priority); +int rf_RewriteParity(RF_Raid_t *); +int rf_VerifyParityBasic(RF_Raid_t *, RF_RaidAddr_t, RF_PhysDiskAddr_t *, int, + RF_RaidAccessFlags_t); +int rf_VerifyParity(RF_Raid_t *, RF_AccessStripeMap_t *, int, + RF_RaidAccessFlags_t); +int rf_TryToRedirectPDA(RF_Raid_t *, RF_PhysDiskAddr_t *, int); +int rf_VerifyDegrModeWrite(RF_Raid_t *, RF_AccessStripeMapHeader_t *); +RF_DagHeader_t *rf_MakeSimpleDAG(RF_Raid_t *, int, int, char *, + int (*) (RF_DagNode_t *), int (*) (RF_DagNode_t *), char *, + RF_AllocListElem_t *, RF_RaidAccessFlags_t, int); -#define RF_DO_CORRECT_PARITY 1 -#define RF_DONT_CORRECT_PARITY 0 +#define RF_DO_CORRECT_PARITY 1 +#define RF_DONT_CORRECT_PARITY 0 /* - * Return vals for VerifyParity operation + * Return vals for VerifyParity operation. * * Ordering is important here. */ -#define RF_PARITY_OKAY 0 /* or no parity information */ -#define RF_PARITY_CORRECTED 1 -#define RF_PARITY_BAD 2 -#define RF_PARITY_COULD_NOT_CORRECT 3 -#define RF_PARITY_COULD_NOT_VERIFY 4 +#define RF_PARITY_OKAY 0 /* Or no parity information. */ +#define RF_PARITY_CORRECTED 1 +#define RF_PARITY_BAD 2 +#define RF_PARITY_COULD_NOT_CORRECT 3 +#define RF_PARITY_COULD_NOT_VERIFY 4 -#endif /* !_RF__RF_PARITYSCAN_H_ */ +#endif /* !_RF__RF_PARITYSCAN_H_ */ diff --git a/sys/dev/raidframe/rf_pq.c b/sys/dev/raidframe/rf_pq.c index 06a04287e16..20a56eb52c5 100644 --- a/sys/dev/raidframe/rf_pq.c +++ b/sys/dev/raidframe/rf_pq.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_pq.c,v 1.5 2000/01/11 18:02:22 peter Exp $ */ +/* $OpenBSD: rf_pq.c,v 1.6 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_pq.c,v 1.7 2000/01/07 03:41:02 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -47,58 +48,68 @@ #include "rf_map.h" #include "rf_pq.h" -RF_RedFuncs_t rf_pFuncs = {rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P"}; -RF_RedFuncs_t rf_pRecoveryFuncs = {rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func"}; - -int -rf_RegularONPFunc(node) - RF_DagNode_t *node; +RF_RedFuncs_t rf_pFuncs = { + rf_RegularONPFunc, "Regular Old-New P", + rf_SimpleONPFunc, "Simple Old-New P" +}; +RF_RedFuncs_t rf_pRecoveryFuncs = { + rf_RecoveryPFunc, "Recovery P Func", + rf_RecoveryPFunc, "Recovery P Func" +}; + +int +rf_RegularONPFunc(RF_DagNode_t *node) { return (rf_RegularXorFunc(node)); } + + /* - same as simpleONQ func, but the coefficient is always 1 -*/ + * Same as simpleONQ func, but the coefficient is always 1. + */ -int -rf_SimpleONPFunc(node) - RF_DagNode_t *node; +int +rf_SimpleONPFunc(RF_DagNode_t *node) { return (rf_SimpleXorFunc(node)); } -int -rf_RecoveryPFunc(node) - RF_DagNode_t *node; +int +rf_RecoveryPFunc(RF_DagNode_t *node) { return (rf_RecoveryXorFunc(node)); } -int -rf_RegularPFunc(node) - RF_DagNode_t *node; +int +rf_RegularPFunc(RF_DagNode_t *node) { return (rf_RegularXorFunc(node)); } -#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) - -static void -QDelta(char *dest, char *obuf, char *nbuf, unsigned length, - unsigned char coeff); -static void -rf_InvertQ(unsigned long *qbuf, unsigned long *abuf, - unsigned length, unsigned coeff); - -RF_RedFuncs_t rf_qFuncs = {rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q"}; -RF_RedFuncs_t rf_qRecoveryFuncs = {rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func"}; -RF_RedFuncs_t rf_pqRecoveryFuncs = {rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func"}; - -void -rf_PQDagSelect( - RF_Raid_t * raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t * asmap, - RF_VoidFuncPtr * createFunc) + + +#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) + +void rf_QDelta(char *dest, char *obuf, char *nbuf, unsigned length, + unsigned char coeff); +void rf_InvertQ(unsigned long *qbuf, unsigned long *abuf, unsigned length, + unsigned coeff); + +RF_RedFuncs_t rf_qFuncs = { + rf_RegularONQFunc, "Regular Old-New Q", + rf_SimpleONQFunc, "Simple Old-New Q" +}; +RF_RedFuncs_t rf_qRecoveryFuncs = { + rf_RecoveryQFunc, "Recovery Q Func", + rf_RecoveryQFunc, "Recovery Q Func" +}; +RF_RedFuncs_t rf_pqRecoveryFuncs = { + rf_RecoveryPQFunc, "Recovery PQ Func", + rf_RecoveryPQFunc, "Recovery PQ Func" +}; + +void +rf_PQDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type, + RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc) { RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); unsigned ndfail = asmap->numDataFailed; @@ -107,50 +118,67 @@ rf_PQDagSelect( RF_ASSERT(RF_IO_IS_R_OR_W(type)); if (ntfail > 2) { - RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n"); + RF_ERRORMSG("more than two disks failed in a single group !" + " Aborting I/O operation.\n"); /* *infoFunc = */ *createFunc = NULL; return; } - /* ok, we can do this I/O */ + /* Ok, we can do this I/O. */ if (type == RF_IO_TYPE_READ) { switch (ndfail) { case 0: - /* fault free read */ - *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; /* same as raid 5 */ + /* Fault free read. */ + *createFunc = (RF_VoidFuncPtr) + rf_CreateFaultFreeReadDAG; /* Same as raid 5. */ break; case 1: - /* lost a single data unit */ - /* two cases: (1) parity is not lost. do a normal raid - * 5 reconstruct read. (2) parity is lost. do a - * reconstruct read using "q". */ - if (ntfail == 2) { /* also lost redundancy */ - if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) - *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateReadDAG; + /* Lost a single data unit. */ + /* + * Two cases: + * (1) Parity is not lost. Do a normal raid 5 + * reconstruct read. + * (2) Parity is lost. Do a reconstruct read using "q". + */ + if (ntfail == 2) { /* Also lost redundancy. */ + if (asmap->failedPDAs[1]->type == + RF_PDA_TYPE_PARITY) + *createFunc = (RF_VoidFuncPtr) + rf_PQ_110_CreateReadDAG; else - *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateReadDAG; + *createFunc = (RF_VoidFuncPtr) + rf_PQ_101_CreateReadDAG; } else { - /* P and Q are ok. But is there a failure in - * some unaccessed data unit? */ - if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2) - *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG; + /* + * P and Q are ok. But is there a failure in + * some unaccessed data unit ? + */ + if (rf_NumFailedDataUnitsInStripe(raidPtr, + asmap) == 2) + *createFunc = (RF_VoidFuncPtr) + rf_PQ_200_CreateReadDAG; else - *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateReadDAG; + *createFunc = (RF_VoidFuncPtr) + rf_PQ_100_CreateReadDAG; } break; case 2: - /* lost two data units */ - /* *infoFunc = PQOneTwo; */ + /* Lost two data units. */ + /* *infoFunc = rf_PQOneTwo; */ *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG; break; } return; } - /* a write */ + /* A write. */ switch (ntfail) { - case 0: /* fault free */ + case 0: /* Fault free. */ if (rf_suppressLocksAndLargeWrites || - (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) || - (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) { + (((asmap->numStripeUnitsAccessed <= + (layoutPtr->numDataCol / 2)) && + (layoutPtr->numDataCol != 1)) || + (asmap->parityInfo->next != NULL) || + (asmap->qInfo->next != NULL) || + rf_CheckStripeForFailures(raidPtr, asmap))) { *createFunc = (RF_VoidFuncPtr) rf_PQCreateSmallWriteDAG; } else { @@ -158,79 +186,98 @@ rf_PQDagSelect( } break; - case 1: /* single disk fault */ + case 1: /* Single disk fault. */ if (npfail == 1) { - RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q)); - if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) { /* q died, treat like - * normal mode raid5 - * write. */ - if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) - || rf_NumFailedDataUnitsInStripe(raidPtr, asmap)) - *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateSmallWriteDAG; + RF_ASSERT((asmap->failedPDAs[0]->type == + RF_PDA_TYPE_PARITY) || + (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q)); + if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) { + /* + * Q died, treat like normal mode raid5 write. + */ + if (((asmap->numStripeUnitsAccessed <= + (layoutPtr->numDataCol / 2)) || + (asmap->numStripeUnitsAccessed == 1)) || + rf_NumFailedDataUnitsInStripe(raidPtr, + asmap)) + *createFunc = (RF_VoidFuncPtr) + rf_PQ_001_CreateSmallWriteDAG; else - *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateLargeWriteDAG; - } else {/* parity died, small write only updating Q */ - if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) - || rf_NumFailedDataUnitsInStripe(raidPtr, asmap)) - *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateSmallWriteDAG; + *createFunc = (RF_VoidFuncPtr) + rf_PQ_001_CreateLargeWriteDAG; + } else {/* Parity died, small write only updating Q. */ + if (((asmap->numStripeUnitsAccessed <= + (layoutPtr->numDataCol / 2)) || + (asmap->numStripeUnitsAccessed == 1)) || + rf_NumFailedDataUnitsInStripe(raidPtr, + asmap)) + *createFunc = (RF_VoidFuncPtr) + rf_PQ_010_CreateSmallWriteDAG; else - *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateLargeWriteDAG; + *createFunc = (RF_VoidFuncPtr) + rf_PQ_010_CreateLargeWriteDAG; } - } else { /* data missing. Do a P reconstruct write if + } else { /* + * Data missing. Do a P reconstruct write if * only a single data unit is lost in the - * stripe, otherwise a PQ reconstruct write. */ + * stripe, otherwise a PQ reconstruct write. + */ if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2) - *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG; + *createFunc = (RF_VoidFuncPtr) + rf_PQ_200_CreateWriteDAG; else - *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateWriteDAG; + *createFunc = (RF_VoidFuncPtr) + rf_PQ_100_CreateWriteDAG; } break; - case 2: /* two disk faults */ + case 2: /* Two disk faults. */ switch (npfail) { - case 2: /* both p and q dead */ + case 2: /* Both p and q dead. */ *createFunc = (RF_VoidFuncPtr) rf_PQ_011_CreateWriteDAG; break; - case 1: /* either p or q and dead data */ - RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA); - RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)); + case 1: /* Either p or q and dead data. */ + RF_ASSERT(asmap->failedPDAs[0]->type == + RF_PDA_TYPE_DATA); + RF_ASSERT((asmap->failedPDAs[1]->type == + RF_PDA_TYPE_PARITY) || + (asmap->failedPDAs[1]->type == + RF_PDA_TYPE_Q)); if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q) - *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateWriteDAG; + *createFunc = (RF_VoidFuncPtr) + rf_PQ_101_CreateWriteDAG; else - *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateWriteDAG; + *createFunc = (RF_VoidFuncPtr) + rf_PQ_110_CreateWriteDAG; break; - case 0: /* double data loss */ + case 0: /* Double data loss. */ *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG; break; } break; - default: /* more than 2 disk faults */ + default: /* More than 2 disk faults. */ *createFunc = NULL; RF_PANIC(); } return; } + + /* - Used as a stop gap info function -*/ + * Used as a stop gap info function. + */ #if 0 -static void -PQOne(raidPtr, nSucc, nAnte, asmap) - RF_Raid_t *raidPtr; - int *nSucc; - int *nAnte; - RF_AccessStripeMap_t *asmap; +void +rf_PQOne(RF_Raid_t *raidPtr, int *nSucc, int *nAnte, + RF_AccessStripeMap_t *asmap) { *nSucc = *nAnte = 1; } -static void -PQOneTwo(raidPtr, nSucc, nAnte, asmap) - RF_Raid_t *raidPtr; - int *nSucc; - int *nAnte; - RF_AccessStripeMap_t *asmap; +void +rf_PQOneTwo(RF_Raid_t *raidPtr, int *nSucc, int *nAnte, + RF_AccessStripeMap_t *asmap) { *nSucc = 1; *nAnte = 2; @@ -239,22 +286,21 @@ PQOneTwo(raidPtr, nSucc, nAnte, asmap) RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG) { - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2, - rf_RegularPQFunc, RF_FALSE); + rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, + allocList, 2, rf_RegularPQFunc, RF_FALSE); } -int -rf_RegularONQFunc(node) - RF_DagNode_t *node; +int +rf_RegularONQFunc(RF_DagNode_t *node) { - int np = node->numParams; - int d; + int np = node->numParams; + int d; RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; - int i; + int i; RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; RF_Etimer_t timer; - char *qbuf, *qpbuf; - char *obuf, *nbuf; + char *qbuf, *qpbuf; + char *obuf, *nbuf; RF_PhysDiskAddr_t *old, *new; unsigned long coeff; unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; @@ -263,7 +309,7 @@ rf_RegularONQFunc(node) d = (np - 3) / 4; RF_ASSERT(4 * d + 3 == np); - qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */ + qbuf = (char *) node->params[2 * d + 1].p; /* Q buffer. */ for (i = 0; i < d; i++) { old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; obuf = (char *) node->params[2 * i + 1].p; @@ -271,55 +317,63 @@ rf_RegularONQFunc(node) nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p; RF_ASSERT(new->numSector == old->numSector); RF_ASSERT(new->raidAddress == old->raidAddress); - /* the stripe unit within the stripe tells us the coefficient - * to use for the multiply. */ - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress); - /* compute the data unit offset within the column, then add - * one */ + /* + * The stripe unit within the stripe tells us the coefficient + * to use for the multiply. + */ + coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), + new->raidAddress); + /* + * Compute the data unit offset within the column, then add + * one. + */ coeff = (coeff % raidPtr->Layout.numDataCol); - qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU); - QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); + qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, + old->startSector % secPerSU); + rf_QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, + old->numSector), coeff); } RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->q_us += RF_ETIMER_VAL_US(timer); - rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no - * I/O in this node */ + rf_GenericWakeupFunc(node, 0); /* + * Call wake func explicitly since no + * I/O in this node. + */ return (0); } -/* - See the SimpleXORFunc for the difference between a simple and regular func. - These Q functions should be used for - new q = Q(data,old data,old q) - style updates and not for - - q = ( new data, new data, .... ) - - computations. - - The simple q takes 2(2d+1)+1 params, where d is the number - of stripes written. The order of params is - old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... old data pda_d, old data buffer_d - [2d] old q pda_0, old q buffer - [2d_2] new data pda_0, new data buffer_0, ... new data pda_d, new data buffer_d - raidPtr -*/ +/* + * See the SimpleXORFunc for the difference between a simple and regular func. + * These Q functions should be used for + * new q = Q(data, old data, old q) + * style updates and not for + * q = (new data, new data, ...) + * computations. + * + * The simple q takes 2(2d+1)+1 params, where d is the number + * of stripes written. The order of params is + * old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... + * old data pda_d, old data buffer_d + * [2d] old q pda_0, old q buffer + * [2d_2] new data pda_0, new data buffer_0, ... + * new data pda_d, new data buffer_d + * raidPtr + */ -int -rf_SimpleONQFunc(node) - RF_DagNode_t *node; +int +rf_SimpleONQFunc(RF_DagNode_t *node) { - int np = node->numParams; - int d; + int np = node->numParams; + int d; RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; - int i; + int i; RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; RF_Etimer_t timer; - char *qbuf; - char *obuf, *nbuf; + char *qbuf; + char *obuf, *nbuf; RF_PhysDiskAddr_t *old, *new; unsigned long coeff; @@ -327,7 +381,7 @@ rf_SimpleONQFunc(node) d = (np - 3) / 4; RF_ASSERT(4 * d + 3 == np); - qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */ + qbuf = (char *) node->params[2 * d + 1].p; /* Q buffer. */ for (i = 0; i < d; i++) { old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; obuf = (char *) node->params[2 * i + 1].p; @@ -335,42 +389,51 @@ rf_SimpleONQFunc(node) nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p; RF_ASSERT(new->numSector == old->numSector); RF_ASSERT(new->raidAddress == old->raidAddress); - /* the stripe unit within the stripe tells us the coefficient - * to use for the multiply. */ - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress); - /* compute the data unit offset within the column, then add - * one */ + /* + * The stripe unit within the stripe tells us the coefficient + * to use for the multiply. + */ + coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), + new->raidAddress); + /* + * Compute the data unit offset within the column, then add + * one. + */ coeff = (coeff % raidPtr->Layout.numDataCol); - QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); + rf_QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, + old->numSector), coeff); } RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->q_us += RF_ETIMER_VAL_US(timer); - rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no - * I/O in this node */ + rf_GenericWakeupFunc(node, 0); /* + * Call wake func explicitly since no + * I/O in this node. + */ return (0); } + RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG) { - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs); + rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, + allocList, &rf_pFuncs, &rf_qFuncs); } -static void RegularQSubr(RF_DagNode_t *node, char *qbuf); -static void -RegularQSubr(node, qbuf) - RF_DagNode_t *node; - char *qbuf; +void rf_RegularQSubr(RF_DagNode_t *, char *); + +void +rf_RegularQSubr(RF_DagNode_t *node, char *qbuf) { - int np = node->numParams; - int d; + int np = node->numParams; + int d; RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; - int i; + int i; RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; RF_Etimer_t timer; - char *obuf, *qpbuf; + char *obuf, *qpbuf; RF_PhysDiskAddr_t *old; unsigned long coeff; @@ -381,44 +444,51 @@ RegularQSubr(node, qbuf) for (i = 0; i < d; i++) { old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; obuf = (char *) node->params[2 * i + 1].p; - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress); - /* compute the data unit offset within the column, then add - * one */ + coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), + old->raidAddress); + /* + * Compute the data unit offset within the column, then add + * one. + */ coeff = (coeff % raidPtr->Layout.numDataCol); - /* the input buffers may not all be aligned with the start of - * the stripe. so shift by their sector offset within the - * stripe unit */ - qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU); - rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); + /* + * The input buffers may not all be aligned with the start of + * the stripe. So shift by their sector offset within the + * stripe unit. + */ + qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, + old->startSector % secPerSU); + rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, + rf_RaidAddressToByte(raidPtr, old->numSector), coeff); } RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->q_us += RF_ETIMER_VAL_US(timer); } + + /* - used in degraded writes. -*/ + * Used in degraded writes. + */ -static void DegrQSubr(RF_DagNode_t *node); +void rf_DegrQSubr(RF_DagNode_t *); -static void -DegrQSubr(node) - RF_DagNode_t *node; +void +rf_DegrQSubr(RF_DagNode_t *node) { - int np = node->numParams; - int d; + int np = node->numParams; + int d; RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; - int i; RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; RF_Etimer_t timer; - char *qbuf = node->results[1]; - char *obuf, *qpbuf; + char *qbuf = node->results[1]; + char *obuf, *qpbuf; RF_PhysDiskAddr_t *old; unsigned long coeff; unsigned fail_start; - int j; + int i, j; old = (RF_PhysDiskAddr_t *) node->params[np - 2].p; fail_start = old->startSector % secPerSU; @@ -430,137 +500,151 @@ DegrQSubr(node) for (i = 0; i < d; i++) { old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; obuf = (char *) node->params[2 * i + 1].p; - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress); - /* compute the data unit offset within the column, then add - * one */ + coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), + old->raidAddress); + /* + * Compute the data unit offset within the column, then add + * one. + */ coeff = (coeff % raidPtr->Layout.numDataCol); - /* the input buffers may not all be aligned with the start of - * the stripe. so shift by their sector offset within the - * stripe unit */ + /* + * The input buffers may not all be aligned with the start of + * the stripe. So shift by their sector offset within the + * stripe unit. + */ j = old->startSector % secPerSU; RF_ASSERT(j >= fail_start); qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start); - rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); + rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, + rf_RaidAddressToByte(raidPtr, old->numSector), coeff); } RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->q_us += RF_ETIMER_VAL_US(timer); } -/* - Called by large write code to compute the new parity and the new q. - structure of the params: - pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d ( d = numDataCol - raidPtr - - for a total of 2d+1 arguments. - The result buffers results[0], results[1] are the buffers for the p and q, - respectively. - - We compute Q first, then compute P. The P calculation may try to reuse - one of the input buffers for its output, so if we computed P first, we would - corrupt the input for the q calculation. -*/ +/* + * Called by large write code to compute the new parity and the new q. + * + * Structure of the params: + * + * pda_0, buffer_0, pda_1 , buffer_1, ..., pda_d, buffer_d (d = numDataCol) + * raidPtr + * + * For a total of 2d+1 arguments. + * The result buffers results[0], results[1] are the buffers for the p and q, + * respectively. + * + * We compute Q first, then compute P. The P calculation may try to reuse + * one of the input buffers for its output, so if we computed P first, we would + * corrupt the input for the q calculation. + */ -int -rf_RegularPQFunc(node) - RF_DagNode_t *node; +int +rf_RegularPQFunc(RF_DagNode_t *node) { - RegularQSubr(node, node->results[1]); - return (rf_RegularXorFunc(node)); /* does the wakeup */ + rf_RegularQSubr(node, node->results[1]); + return (rf_RegularXorFunc(node)); /* Does the wakeup. */ } -int -rf_RegularQFunc(node) - RF_DagNode_t *node; +int +rf_RegularQFunc(RF_DagNode_t *node) { - /* Almost ... adjust Qsubr args */ - RegularQSubr(node, node->results[0]); - rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no - * I/O in this node */ + /* Almost ... adjust Qsubr args. */ + rf_RegularQSubr(node, node->results[0]); + rf_GenericWakeupFunc(node, 0); /* + * Call wake func explicitly since no + * I/O in this node. + */ return (0); } -/* - Called by singly degraded write code to compute the new parity and the new q. - structure of the params: - pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d - failedPDA raidPtr - - for a total of 2d+2 arguments. - The result buffers results[0], results[1] are the buffers for the parity and q, - respectively. - - We compute Q first, then compute parity. The parity calculation may try to reuse - one of the input buffers for its output, so if we computed parity first, we would - corrupt the input for the q calculation. - - We treat this identically to the regularPQ case, ignoring the failedPDA extra argument. -*/ +/* + * Called by singly degraded write code to compute the new parity and + * the new q. + * + * Structure of the params: + * + * pda_0, buffer_0, pda_1 , buffer_1, ..., pda_d, buffer_d + * failedPDA raidPtr + * + * for a total of 2d+2 arguments. + * The result buffers results[0], results[1] are the buffers for the parity + * and q, respectively. + * + * We compute Q first, then compute parity. The parity calculation may try + * to reuse one of the input buffers for its output, so if we computed parity + * first, we would corrupt the input for the q calculation. + * + * We treat this identically to the regularPQ case, ignoring the failedPDA + * extra argument. + */ -void -rf_Degraded_100_PQFunc(node) - RF_DagNode_t *node; +void +rf_Degraded_100_PQFunc(RF_DagNode_t *node) { int np = node->numParams; RF_ASSERT(np >= 2); - DegrQSubr(node); + rf_DegrQSubr(node); rf_RecoveryXorFunc(node); } /* - The two below are used when reading a stripe with a single lost data unit. - The parameters are - - pda_0, buffer_0, .... pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr - - and results[0] contains the data buffer. Which is originally zero-filled. - -*/ + * The two below are used when reading a stripe with a single lost data unit. + * The parameters are + * + * pda_0, buffer_0, ..., pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr + * + * and results[0] contains the data buffer, which is originally zero-filled. + */ -/* this Q func is used by the degraded-mode dag functions to recover lost data. - * the second-to-last parameter is the PDA for the failed portion of the access. - * the code here looks at this PDA and assumes that the xor target buffer is - * equal in size to the number of sectors in the failed PDA. It then uses - * the other PDAs in the parameter list to determine where within the target - * buffer the corresponding data should be xored. +/* + * This Q func is used by the degraded-mode dag functions to recover lost data. + * The second-to-last parameter is the PDA for the failed portion of the + * access. The code here looks at this PDA and assumes that the xor target + * buffer is equal in size to the number of sectors in the failed PDA. It then + * uses the other PDAs in the parameter list to determine where within the + * target buffer the corresponding data should be xored. * * Recall the basic equation is * - * Q = ( data_1 + 2 * data_2 ... + k * data_k ) mod 256 + * Q = (data_1 + 2 * data_2 ... + k * data_k) mod 256 * * so to recover data_j we need * - * J data_j = (Q - data_1 - 2 data_2 ....- k* data_k) mod 256 + * J data_j = (Q - data_1 - 2 data_2 ... - k * data_k) mod 256 * - * So the coefficient for each buffer is (255 - data_col), and j should be initialized by - * copying Q into it. Then we need to do a table lookup to convert to solve + * So the coefficient for each buffer is (255 - data_col), and j should be + * initialized by copying Q into it. Then we need to do a table lookup to + * convert to solve * data_j /= J * - * */ -int -rf_RecoveryQFunc(node) - RF_DagNode_t *node; + +int +rf_RecoveryQFunc(RF_DagNode_t *node) { RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; - RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; - int i; + RF_PhysDiskAddr_t *failedPDA = + (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; + int i; RF_PhysDiskAddr_t *pda; - RF_RaidAddr_t suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); - char *srcbuf, *destbuf; + RF_RaidAddr_t suoffset; + RF_RaidAddr_t failedSUOffset = + rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); + char *srcbuf, *destbuf; RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; RF_Etimer_t timer; unsigned long coeff; RF_ETIMER_START(timer); - /* start by copying Q into the buffer */ + /* Start by copying Q into the buffer. */ bcopy(node->params[node->numParams - 3].p, node->results[0], rf_RaidAddressToByte(raidPtr, failedPDA->numSector)); for (i = 0; i < node->numParams - 4; i += 2) { @@ -568,15 +652,20 @@ rf_RecoveryQFunc(node) pda = (RF_PhysDiskAddr_t *) node->params[i].p; srcbuf = (char *) node->params[i + 1].p; suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); - destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress); - /* compute the data unit offset within the column */ + destbuf = ((char *) node->results[0]) + + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); + coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), + pda->raidAddress); + /* Compute the data unit offset within the column. */ coeff = (coeff % raidPtr->Layout.numDataCol); - rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff); + rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf, + rf_RaidAddressToByte(raidPtr, pda->numSector), coeff); } - /* Do the nasty inversion now */ - coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), failedPDA->startSector) % raidPtr->Layout.numDataCol); - rf_InvertQ(node->results[0], node->results[0], rf_RaidAddressToByte(raidPtr, pda->numSector), coeff); + /* Do the nasty inversion now. */ + coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), + failedPDA->startSector) % raidPtr->Layout.numDataCol); + rf_InvertQ(node->results[0], node->results[0], + rf_RaidAddressToByte(raidPtr, pda->numSector), coeff); RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->q_us += RF_ETIMER_VAL_US(timer); @@ -584,43 +673,42 @@ rf_RecoveryQFunc(node) return (0); } -int -rf_RecoveryPQFunc(node) - RF_DagNode_t *node; +int +rf_RecoveryPQFunc(RF_DagNode_t *node) { RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; - printf("raid%d: Recovery from PQ not implemented.\n",raidPtr->raidid); + printf("raid%d: Recovery from PQ not implemented.\n", raidPtr->raidid); return (1); } -/* - Degraded write Q subroutine. - Used when P is dead. - Large-write style Q computation. - Parameters - - (pda,buf),(pda,buf),.....,(failedPDA,bufPtr),failedPDA,raidPtr. - We ignore failedPDA. - This is a "simple style" recovery func. -*/ +/* + * Degraded write Q subroutine. + * Used when P is dead. + * Large-write style Q computation. + * Parameters: + * + * (pda, buf), (pda, buf), ..., (failedPDA, bufPtr), failedPDA, raidPtr. + * + * We ignore failedPDA. + * + * This is a "simple style" recovery func. + */ -void -rf_PQ_DegradedWriteQFunc(node) - RF_DagNode_t *node; +void +rf_PQ_DegradedWriteQFunc(RF_DagNode_t *node) { - int np = node->numParams; - int d; + int np = node->numParams; + int d; RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; - int i; RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; RF_Etimer_t timer; - char *qbuf = node->results[0]; - char *obuf, *qpbuf; + char *qbuf = node->results[0]; + char *obuf, *qpbuf; RF_PhysDiskAddr_t *old; unsigned long coeff; - int fail_start, j; + int fail_start, i, j; old = (RF_PhysDiskAddr_t *) node->params[np - 2].p; fail_start = old->startSector % secPerSU; @@ -633,14 +721,18 @@ rf_PQ_DegradedWriteQFunc(node) for (i = 0; i < d; i++) { old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; obuf = (char *) node->params[2 * i + 1].p; - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress); - /* compute the data unit offset within the column, then add - * one */ + coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), + old->raidAddress); + /* + * Compute the data unit offset within the column, then add + * one. + */ coeff = (coeff % raidPtr->Layout.numDataCol); j = old->startSector % secPerSU; RF_ASSERT(j >= fail_start); qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start); - rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); + rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, + rf_RaidAddressToByte(raidPtr, old->numSector), coeff); } RF_ETIMER_STOP(timer); @@ -650,36 +742,31 @@ rf_PQ_DegradedWriteQFunc(node) } - - -/* Q computations */ +/* Q computations. */ /* - coeff - colummn; - - compute dest ^= qfor[28-coeff][rn[coeff+1] a] - - on 5-bit basis; - length in bytes; -*/ + * Coeff - colummn; + * + * Compute dest ^= qfor[28-coeff][rn[coeff+1] a] + * + * On 5-bit basis; + * Length in bytes; + */ -void -rf_IncQ(dest, buf, length, coeff) - unsigned long *dest; - unsigned long *buf; - unsigned length; - unsigned coeff; +void +rf_IncQ(unsigned long *dest, unsigned long *buf, unsigned length, + unsigned coeff) { unsigned long a, d, new; unsigned long a1, a2; unsigned int *q = &(rf_qfor[28 - coeff][0]); unsigned r = rf_rn[coeff + 1]; -#define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f) -#define INSERT(a,i) (a << (5L*i)) +#define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f) +#define INSERT(a,i) (a << (5L*i)) length /= 8; - /* 13 5 bit quants in a 64 bit word */ + /* 13 5 bit quants in a 64 bit word. */ while (length) { a = *buf++; d = *dest; @@ -701,7 +788,7 @@ rf_IncQ(dest, buf, length, coeff) a1 = q[a1]; a2 = q[a2]; new = new | INSERT(a1, 5) | INSERT(a2, 6); -#if RF_LONGSHIFT > 2 +#if RF_LONGSHIFT > 2 a1 = EXTRACT(a, 7) ^ r; a2 = EXTRACT(a, 8) ^ r; a1 = q[a1]; @@ -717,29 +804,27 @@ rf_IncQ(dest, buf, length, coeff) a1 = q[a1]; a2 = q[a2]; new = new | INSERT(a1, 11) | INSERT(a2, 12); -#endif /* RF_LONGSHIFT > 2 */ +#endif /* RF_LONGSHIFT > 2 */ d ^= new; *dest++ = d; length--; } } -/* - compute - dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new) ] - on a five bit basis. - optimization: compute old ^ new on 64 bit basis. - - length in bytes. -*/ +/* + * Compute. + * + * dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new)] + * + * On a five bit basis. + * Optimization: compute old ^ new on 64 bit basis. + * + * Length in bytes. + */ -static void -QDelta( - char *dest, - char *obuf, - char *nbuf, - unsigned length, +void +rf_QDelta(char *dest, char *obuf, char *nbuf, unsigned length, unsigned char coeff) { unsigned long a, d, new; @@ -747,19 +832,23 @@ QDelta( unsigned int *q = &(rf_qfor[28 - coeff][0]); unsigned int r = rf_rn[coeff + 1]; - r = a1 = a2 = new = d = a = 0; /* XXX for now... */ - q = NULL; /* XXX for now */ + r = a1 = a2 = new = d = a = 0; /* XXX For now... */ + q = NULL; /* XXX For now */ -#ifdef _KERNEL - /* PQ in kernel currently not supported because the encoding/decoding - * table is not present */ +#ifdef _KERNEL + /* + * PQ in kernel currently not supported because the encoding/decoding + * table is not present. + */ bzero(dest, length); -#else /* _KERNEL */ - /* this code probably doesn't work and should be rewritten -wvcii */ - /* 13 5 bit quants in a 64 bit word */ +#else /* _KERNEL */ + /* This code probably doesn't work and should be rewritten. -wvcii */ + /* 13 5 bit quants in a 64 bit word. */ length /= 8; while (length) { - a = *obuf++; /* XXX need to reorg to avoid cache conflicts */ + a = *obuf++; /* + * XXX Need to reorg to avoid cache conflicts. + */ a ^= *nbuf++; d = *dest; a1 = EXTRACT(a, 0) ^ r; @@ -782,7 +871,7 @@ QDelta( a1 = q[a1]; a2 = q[a2]; new = new | INSERT(a1, 5) | INSERT(a2, 6); -#if RF_LONGSHIFT > 2 +#if RF_LONGSHIFT > 2 a1 = EXTRACT(a, 7) ^ r; a2 = EXTRACT(a, 8) ^ r; a1 = q[a1]; @@ -798,37 +887,33 @@ QDelta( a1 = q[a1]; a2 = q[a2]; new = new | INSERT(a1, 11) | INSERT(a2, 12); -#endif /* RF_LONGSHIFT > 2 */ +#endif /* RF_LONGSHIFT > 2 */ d ^= new; *dest++ = d; length--; } -#endif /* _KERNEL */ +#endif /* _KERNEL */ } -/* - recover columns a and b from the given p and q into - bufs abuf and bbuf. All bufs are word aligned. - Length is in bytes. -*/ /* + * Recover columns a and b from the given p and q into + * bufs abuf and bbuf. All bufs are word aligned. + * Length is in bytes. + */ + +/* * XXX * * Everything about this seems wrong. */ -void -rf_PQ_recover(pbuf, qbuf, abuf, bbuf, length, coeff_a, coeff_b) - unsigned long *pbuf; - unsigned long *qbuf; - unsigned long *abuf; - unsigned long *bbuf; - unsigned length; - unsigned coeff_a; - unsigned coeff_b; + +void +rf_PQ_recover(unsigned long *pbuf, unsigned long *qbuf, unsigned long *abuf, + unsigned long *bbuf, unsigned length, unsigned coeff_a, unsigned coeff_b) { unsigned long p, q, a, a0, a1; - int col = (29 * coeff_a) + coeff_b; + int col = (29 * coeff_a) + coeff_b; unsigned char *q0 = &(rf_qinv[col][0]); length /= 8; @@ -838,10 +923,13 @@ rf_PQ_recover(pbuf, qbuf, abuf, bbuf, length, coeff_a, coeff_b) a0 = EXTRACT(p, 0); a1 = EXTRACT(q, 0); a = q0[a0 << 5 | a1]; -#define MF(i) \ - a0 = EXTRACT(p,i); \ - a1 = EXTRACT(q,i); \ - a = a | INSERT(q0[a0<<5 | a1],i) + +#define MF(i) \ +do { \ + a0 = EXTRACT(p, i); \ + a1 = EXTRACT(q, i); \ + a = a | INSERT(q0[a0<<5 | a1], i); \ +} while (0) MF(1); MF(2); @@ -856,37 +944,34 @@ rf_PQ_recover(pbuf, qbuf, abuf, bbuf, length, coeff_a, coeff_b) MF(10); MF(11); MF(12); -#endif /* 0 */ +#endif /* 0 */ *abuf++ = a; *bbuf++ = a ^ p; length--; } } -/* - Lost parity and a data column. Recover that data column. - Assume col coeff is lost. Let q the contents of Q after - all surviving data columns have been q-xored out of it. - Then we have the equation - - q[28-coeff][a_i ^ r_i+1] = q - - but q is cyclic with period 31. - So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] = - q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} . - - so a_i = r_{coeff+1} ^ q[3+coeff][q] - - The routine is passed q buffer and the buffer - the data is to be recoverd into. They can be the same. -*/ +/* + * Lost parity and a data column. Recover that data column. + * Assume col coeff is lost. Let q the contents of Q after + * all surviving data columns have been q-xored out of it. + * Then we have the equation + * + * q[28-coeff][a_i ^ r_i+1] = q + * + * but q is cyclic with period 31. + * So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] = + * q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} . + * + * so a_i = r_{coeff+1} ^ q[3+coeff][q] + * + * The routine is passed q buffer and the buffer + * the data is to be recoverd into. They can be the same. + */ -static void -rf_InvertQ( - unsigned long *qbuf, - unsigned long *abuf, - unsigned length, +void +rf_InvertQ(unsigned long *qbuf, unsigned long *abuf, unsigned length, unsigned coeff) { unsigned long a, new; @@ -894,7 +979,7 @@ rf_InvertQ( unsigned int *q = &(rf_qfor[3 + coeff][0]); unsigned r = rf_rn[coeff + 1]; - /* 13 5 bit quants in a 64 bit word */ + /* 13 5 bit quants in a 64 bit word. */ length /= 8; while (length) { a = *qbuf++; @@ -903,24 +988,26 @@ rf_InvertQ( a1 = r ^ q[a1]; a2 = r ^ q[a2]; new = INSERT(a2, 1) | a1; -#define M(i,j) \ - a1 = EXTRACT(a,i); \ - a2 = EXTRACT(a,j); \ - a1 = r ^ q[a1]; \ - a2 = r ^ q[a2]; \ - new = new | INSERT(a1,i) | INSERT(a2,j) + +#define M(i,j) \ +do { \ + a1 = EXTRACT(a, i); \ + a2 = EXTRACT(a, j); \ + a1 = r ^ q[a1]; \ + a2 = r ^ q[a2]; \ + new = new | INSERT(a1, i) | INSERT(a2, j); \ +} while (0) M(2, 3); M(4, 5); M(5, 6); -#if RF_LONGSHIFT > 2 +#if RF_LONGSHIFT > 2 M(7, 8); M(9, 10); M(11, 12); -#endif /* RF_LONGSHIFT > 2 */ +#endif /* RF_LONGSHIFT > 2 */ *abuf++ = new; length--; } } -#endif /* (RF_INCLUDE_DECL_PQ > 0) || - * (RF_INCLUDE_RAID6 > 0) */ +#endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */ diff --git a/sys/dev/raidframe/rf_pq.h b/sys/dev/raidframe/rf_pq.h index 70472786c85..36ce43cead4 100644 --- a/sys/dev/raidframe/rf_pq.h +++ b/sys/dev/raidframe/rf_pq.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_pq.h,v 1.2 1999/02/16 00:03:10 niklas Exp $ */ +/* $OpenBSD: rf_pq.h,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_pq.h,v 1.3 1999/02/05 00:06:15 oster Exp $ */ + /* * rf_pq.h */ @@ -30,46 +31,41 @@ * rights to redistribute these changes. */ -#ifndef _RF__RF_PQ_H_ -#define _RF__RF_PQ_H_ +#ifndef _RF__RF_PQ_H_ +#define _RF__RF_PQ_H_ #include "rf_archs.h" extern RF_RedFuncs_t rf_pFuncs; extern RF_RedFuncs_t rf_pRecoveryFuncs; -int rf_RegularONPFunc(RF_DagNode_t * node); -int rf_SimpleONPFunc(RF_DagNode_t * node); -int rf_RecoveryPFunc(RF_DagNode_t * node); -int rf_RegularPFunc(RF_DagNode_t * node); +int rf_RegularONPFunc(RF_DagNode_t *); +int rf_SimpleONPFunc(RF_DagNode_t *); +int rf_RecoveryPFunc(RF_DagNode_t *); +int rf_RegularPFunc(RF_DagNode_t *); -#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) +#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) extern RF_RedFuncs_t rf_qFuncs; extern RF_RedFuncs_t rf_qRecoveryFuncs; extern RF_RedFuncs_t rf_pqRecoveryFuncs; -void -rf_PQDagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); +void rf_PQDagSelect(RF_Raid_t *, RF_IoType_t, RF_AccessStripeMap_t *, + RF_VoidFuncPtr *); RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG); -int rf_RegularONQFunc(RF_DagNode_t * node); -int rf_SimpleONQFunc(RF_DagNode_t * node); +int rf_RegularONQFunc(RF_DagNode_t *); +int rf_SimpleONQFunc(RF_DagNode_t *); RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG); -int rf_RegularPQFunc(RF_DagNode_t * node); -int rf_RegularQFunc(RF_DagNode_t * node); -void rf_Degraded_100_PQFunc(RF_DagNode_t * node); -int rf_RecoveryQFunc(RF_DagNode_t * node); -int rf_RecoveryPQFunc(RF_DagNode_t * node); -void rf_PQ_DegradedWriteQFunc(RF_DagNode_t * node); -void -rf_IncQ(unsigned long *dest, unsigned long *buf, unsigned length, - unsigned coeff); -void -rf_PQ_recover(unsigned long *pbuf, unsigned long *qbuf, unsigned long *abuf, - unsigned long *bbuf, unsigned length, unsigned coeff_a, unsigned coeff_b); +int rf_RegularPQFunc(RF_DagNode_t *); +int rf_RegularQFunc(RF_DagNode_t *); +void rf_Degraded_100_PQFunc(RF_DagNode_t *); +int rf_RecoveryQFunc(RF_DagNode_t *); +int rf_RecoveryPQFunc(RF_DagNode_t *); +void rf_PQ_DegradedWriteQFunc(RF_DagNode_t *); +void rf_IncQ(unsigned long *, unsigned long *, unsigned, unsigned); +void rf_PQ_recover(unsigned long *, unsigned long *, unsigned long *, + unsigned long *, unsigned, unsigned, unsigned); -#endif /* (RF_INCLUDE_DECL_PQ > 0) || - * (RF_INCLUDE_RAID6 > 0) */ +#endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */ -#endif /* !_RF__RF_PQ_H_ */ +#endif /* !_RF__RF_PQ_H_ */ diff --git a/sys/dev/raidframe/rf_pqdeg.c b/sys/dev/raidframe/rf_pqdeg.c index 6cfbebc4430..f90c72f82ff 100644 --- a/sys/dev/raidframe/rf_pqdeg.c +++ b/sys/dev/raidframe/rf_pqdeg.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_pqdeg.c,v 1.4 2000/01/11 18:02:22 peter Exp $ */ +/* $OpenBSD: rf_pqdeg.c,v 1.5 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_pqdeg.c,v 1.5 2000/01/07 03:41:04 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,7 +30,7 @@ #include "rf_archs.h" -#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) +#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) #include "rf_types.h" #include "rf_raid.h" @@ -47,171 +48,177 @@ #include "rf_pq.h" /* - Degraded mode dag functions for P+Q calculations. - - The following nomenclature is used. - - PQ_<D><P><Q>_Create{Large,Small}<Write|Read>DAG - - where <D><P><Q> are single digits representing the number of failed - data units <D> (0,1,2), parity units <P> (0,1), and Q units <Q>, effecting - the I/O. The reads have only PQ_<D><P><Q>_CreateReadDAG variants, while - the single fault writes have both large and small write versions. (Single fault - PQ is equivalent to normal mode raid 5 in many aspects. - - Some versions degenerate into the same case, and are grouped together below. -*/ + * Degraded mode dag functions for P+Q calculations. + * + * The following nomenclature is used. + * + * PQ_<D><P><Q>_Create{Large,Small}<Write|Read>DAG + * + * where <D><P><Q> are single digits representing the number of failed + * data units <D> (0,1,2), parity units <P> (0,1), and Q units <Q>, effecting + * the I/O. The reads have only PQ_<D><P><Q>_CreateReadDAG variants, while + * the single fault writes have both large and small write versions. + * Single fault PQ is equivalent to normal mode raid 5 in many aspects. + * + * Some versions degenerate into the same case, and are grouped together below. + */ -/* Reads, single failure - we have parity, so we can do a raid 5 - reconstruct read. -*/ +/* Reads, single failure. */ +/* We have parity, so we can do a raid 5 reconstruct read. */ RF_CREATE_DAG_FUNC_DECL(rf_PQ_100_CreateReadDAG) { - rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pRecoveryFuncs); + rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, + &rf_pRecoveryFuncs); } -/* Reads double failure */ -/* - Q is lost, but not parity - so we can a raid 5 reconstruct read. -*/ +/* Reads double failure. */ + +/* + * Q is lost, but not parity. + * So we can a raid 5 reconstruct read. + */ RF_CREATE_DAG_FUNC_DECL(rf_PQ_101_CreateReadDAG) { - rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pRecoveryFuncs); + rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, + &rf_pRecoveryFuncs); } -/* - parity is lost, so we need to - do a reconstruct read and recompute - the data with Q. -*/ +/* + * Parity is lost, so we need to + * do a reconstruct read and recompute + * the data with Q. + */ RF_CREATE_DAG_FUNC_DECL(rf_PQ_110_CreateReadDAG) { RF_PhysDiskAddr_t *temp; - /* swap P and Q pointers to fake out the DegradedReadDAG code */ + /* Swap P and Q pointers to fake out the DegradedReadDAG code. */ temp = asmap->parityInfo; asmap->parityInfo = asmap->qInfo; asmap->qInfo = temp; - rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_qRecoveryFuncs); + rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, + &rf_qRecoveryFuncs); } + /* - Two data units are dead in this stripe, so we will need read - both P and Q to reconstruct the data. Note that only - one data unit we are reading may actually be missing. -*/ + * Two data units are dead in this stripe, so we will need read + * both P and Q to reconstruct the data. Note that only + * one data unit we are reading may actually be missing. + */ RF_CREATE_DAG_FUNC_DECL(rf_CreateDoubleDegradedReadDAG); RF_CREATE_DAG_FUNC_DECL(rf_CreateDoubleDegradedReadDAG) { rf_PQ_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList); } + RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateReadDAG); RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateReadDAG) { - rf_CreateDoubleDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList); + rf_CreateDoubleDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, + allocList); } -/* Writes, single failure */ + + +/* Writes, single failure. */ RF_CREATE_DAG_FUNC_DECL(rf_PQ_100_CreateWriteDAG); RF_CREATE_DAG_FUNC_DECL(rf_PQ_100_CreateWriteDAG) { if (asmap->numStripeUnitsAccessed != 1 && - asmap->failedPDAs[0]->numSector != + asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) RF_PANIC(); - rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, - flags, allocList, 2, - (int (*) (RF_DagNode_t *)) rf_Degraded_100_PQFunc, - RF_FALSE); + rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, + flags, allocList, 2, (int (*) (RF_DagNode_t *)) + rf_Degraded_100_PQFunc, RF_FALSE); } -/* Dead P - act like a RAID 5 small write with parity = Q */ + +/* Dead P - act like a RAID 5 small write with parity = Q. */ RF_CREATE_DAG_FUNC_DECL(rf_PQ_010_CreateSmallWriteDAG) { RF_PhysDiskAddr_t *temp; - /* swap P and Q pointers to fake out the DegradedReadDAG code */ + /* Swap P and Q pointers to fake out the DegradedReadDAG code. */ temp = asmap->parityInfo; asmap->parityInfo = asmap->qInfo; asmap->qInfo = temp; - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, - allocList, &rf_qFuncs, NULL); + rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, + allocList, &rf_qFuncs, NULL); } -/* Dead Q - act like a RAID 5 small write */ + +/* Dead Q - act like a RAID 5 small write. */ RF_CREATE_DAG_FUNC_DECL(rf_PQ_001_CreateSmallWriteDAG) { - rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, - allocList, &rf_pFuncs, NULL); + rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, + allocList, &rf_pFuncs, NULL); } -/* Dead P - act like a RAID 5 large write but for Q */ + +/* Dead P - act like a RAID 5 large write but for Q. */ RF_CREATE_DAG_FUNC_DECL(rf_PQ_010_CreateLargeWriteDAG) { RF_PhysDiskAddr_t *temp; - /* swap P and Q pointers to fake out the code */ + /* Swap P and Q pointers to fake out the code. */ temp = asmap->parityInfo; asmap->parityInfo = asmap->qInfo; asmap->qInfo = temp; - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, - allocList, 1, rf_RegularQFunc, RF_FALSE); + rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, + allocList, 1, rf_RegularQFunc, RF_FALSE); } -/* Dead Q - act like a RAID 5 large write */ + +/* Dead Q - act like a RAID 5 large write. */ RF_CREATE_DAG_FUNC_DECL(rf_PQ_001_CreateLargeWriteDAG) { - rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, - allocList, 1, rf_RegularPFunc, RF_FALSE); + rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, + allocList, 1, rf_RegularPFunc, RF_FALSE); } -/* - * writes, double failure - */ +/* Writes, double failure. */ -/* - * Lost P & Q - do a nonredundant write - */ +/* Lost P & Q - do a nonredundant write. */ RF_CREATE_DAG_FUNC_DECL(rf_PQ_011_CreateWriteDAG) { - rf_CreateNonRedundantWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, - RF_IO_TYPE_WRITE); + rf_CreateNonRedundantWriteDAG(raidPtr, asmap, dag_h, bp, flags, + allocList, RF_IO_TYPE_WRITE); } -/* - In the two cases below, - A nasty case arises when the write a (strict) portion of a failed stripe unit - and parts of another su. For now, we do not support this. -*/ + /* - Lost Data and P - do a Q write. -*/ + * In the two cases below, a nasty case arises when it's a write for a + * (strict) portion of a failed stripe unit and parts of another su. + * For now, we do not support this. + */ + +/* Lost Data and P - do a Q write. */ RF_CREATE_DAG_FUNC_DECL(rf_PQ_110_CreateWriteDAG) { RF_PhysDiskAddr_t *temp; if (asmap->numStripeUnitsAccessed != 1 && - asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) { + asmap->failedPDAs[0]->numSector != + raidPtr->Layout.sectorsPerStripeUnit) { RF_PANIC(); } - /* swap P and Q to fake out parity code */ + /* Swap P and Q to fake out parity code. */ temp = asmap->parityInfo; asmap->parityInfo = asmap->qInfo; asmap->qInfo = temp; rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, - allocList, 1, - (int (*) (RF_DagNode_t *)) rf_PQ_DegradedWriteQFunc, - RF_FALSE); - /* is the regular Q func the right one to call? */ + allocList, 1, (int (*) (RF_DagNode_t *)) + rf_PQ_DegradedWriteQFunc, RF_FALSE); + /* Is the regular Q func the right one to call ? */ } -/* - Lost Data and Q - do degraded mode P write -*/ + +/* Lost Data and Q - do degraded mode P write. */ RF_CREATE_DAG_FUNC_DECL(rf_PQ_101_CreateWriteDAG) { if (asmap->numStripeUnitsAccessed != 1 && - asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit) + asmap->failedPDAs[0]->numSector != + raidPtr->Layout.sectorsPerStripeUnit) RF_PANIC(); rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RecoveryXorFunc, RF_FALSE); } -#endif /* (RF_INCLUDE_DECL_PQ > 0) || - * (RF_INCLUDE_RAID6 > 0) */ + +#endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */ diff --git a/sys/dev/raidframe/rf_pqdeg.h b/sys/dev/raidframe/rf_pqdeg.h index 5841231c014..2479ad1e10f 100644 --- a/sys/dev/raidframe/rf_pqdeg.h +++ b/sys/dev/raidframe/rf_pqdeg.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_pqdeg.h,v 1.2 1999/02/16 00:03:11 niklas Exp $ */ +/* $OpenBSD: rf_pqdeg.h,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_pqdeg.h,v 1.3 1999/02/05 00:06:15 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,49 +28,52 @@ * rights to redistribute these changes. */ -#ifndef _RF__RF_PQDEG_H_ -#define _RF__RF_PQDEG_H_ +#ifndef _RF__RF_PQDEG_H_ +#define _RF__RF_PQDEG_H_ #include "rf_types.h" -#if RF_UTILITY == 0 +#if RF_UTILITY == 0 #include "rf_dag.h" -/* extern decl's of the failure mode PQ functions. +/* + * Extern decl's of the failure mode PQ functions. * See pddeg.c for nomenclature discussion. */ -/* reads, single failure */ +/* Reads, single failure. */ RF_CREATE_DAG_FUNC_DECL(rf_PQ_100_CreateReadDAG); -/* reads, two failure */ +/* Reads, two failures. */ RF_CREATE_DAG_FUNC_DECL(rf_PQ_110_CreateReadDAG); RF_CREATE_DAG_FUNC_DECL(rf_PQ_101_CreateReadDAG); RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateReadDAG); -/* writes, single failure */ +/* Writes, single failure. */ RF_CREATE_DAG_FUNC_DECL(rf_PQ_100_CreateWriteDAG); RF_CREATE_DAG_FUNC_DECL(rf_PQ_010_CreateSmallWriteDAG); RF_CREATE_DAG_FUNC_DECL(rf_PQ_010_CreateLargeWriteDAG); RF_CREATE_DAG_FUNC_DECL(rf_PQ_001_CreateSmallWriteDAG); RF_CREATE_DAG_FUNC_DECL(rf_PQ_001_CreateLargeWriteDAG); -/* writes, double failure */ +/* Writes, double failures. */ RF_CREATE_DAG_FUNC_DECL(rf_PQ_011_CreateWriteDAG); RF_CREATE_DAG_FUNC_DECL(rf_PQ_110_CreateWriteDAG); RF_CREATE_DAG_FUNC_DECL(rf_PQ_101_CreateWriteDAG); RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateWriteDAG); -#endif /* RF_UTILITY == 0 */ +#endif /* RF_UTILITY == 0 */ -typedef RF_uint32 RF_ua32_t[32]; -typedef RF_uint8 RF_ua1024_t[1024]; +typedef RF_uint32 RF_ua32_t[32]; +typedef RF_uint8 RF_ua1024_t[1024]; extern RF_ua32_t rf_rn; extern RF_ua32_t rf_qfor[32]; -#ifndef _KERNEL /* we don't support PQ in the kernel yet, so - * don't link in this monster table */ +#ifndef _KERNEL /* + * We don't support PQ in the kernel yet, so + * don't link in this monster table. + */ extern RF_ua1024_t rf_qinv[29 * 29]; -#else /* !_KERNEL */ +#else /* !_KERNEL */ extern RF_ua1024_t rf_qinv[1]; -#endif /* !_KERNEL */ +#endif /* !_KERNEL */ -#endif /* !_RF__RF_PQDEG_H_ */ +#endif /* !_RF__RF_PQDEG_H_ */ diff --git a/sys/dev/raidframe/rf_pqdegdags.c b/sys/dev/raidframe/rf_pqdegdags.c index b66c1ccfe2b..8fc3429b302 100644 --- a/sys/dev/raidframe/rf_pqdegdags.c +++ b/sys/dev/raidframe/rf_pqdegdags.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_pqdegdags.c,v 1.4 2000/01/07 14:50:22 peter Exp $ */ +/* $OpenBSD: rf_pqdegdags.c,v 1.5 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_pqdegdags.c,v 1.5 1999/08/15 02:36:40 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -30,12 +31,12 @@ /* * rf_pqdegdags.c * Degraded mode dags for double fault cases. -*/ + */ #include "rf_archs.h" -#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) +#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) #include "rf_types.h" #include "rf_raid.h" @@ -50,66 +51,75 @@ #include "rf_pqdegdags.h" #include "rf_pq.h" -static void -applyPDA(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * pda, RF_PhysDiskAddr_t * ppda, - RF_PhysDiskAddr_t * qpda, void *bp); +void rf_applyPDA(RF_Raid_t *, RF_PhysDiskAddr_t *, RF_PhysDiskAddr_t *, + RF_PhysDiskAddr_t *, void *); /* - Two data drives have failed, and we are doing a read that covers one of them. - We may also be reading some of the surviving drives. + * Two data drives have failed, and we are doing a read that covers one of them. + * We may also be reading some of the surviving drives. + */ - ***************************************************************************************** +/***************************************************************************** * - * creates a DAG to perform a degraded-mode read of data within one stripe. + * Creates a DAG to perform a degraded-mode read of data within one stripe. * This DAG is as follows: * - * Hdr - * | - * Block - * / / \ \ \ \ - * Rud ... Rud Rrd ... Rrd Rp Rq - * | \ | \ | \ | \ | \ | \ + * Hdr + * | + * Block + * / / \ \ \ \ + * Rud ... Rud Rrd ... Rrd Rp Rq + * | \ | \ | \ | \ | \ | \ * - * | | - * Unblock X - * \ / - * ------ T ------ + * | | + * Unblock X + * \ / + * ------ T ------ * - * Each R node is a successor of the L node - * One successor arc from each R node goes to U, and the other to X - * There is one Rud for each chunk of surviving user data requested by the user, - * and one Rrd for each chunk of surviving user data _not_ being read by the user - * R = read, ud = user data, rd = recovery (surviving) data, p = P data, q = Qdata - * X = pq recovery node, T = terminate + * Each R node is a successor of the L node. + * One successor arc from each R node goes to U, and the other to X. + * There is one Rud for each chunk of surviving user data requested by the + * user, and one Rrd for each chunk of surviving user data _not_ being read + * by the user. + * R = read, ud = user data, rd = recovery (surviving) data, p = P data, + * q = Qdata, X = pq recovery node, T = terminate * - * The block & unblock nodes are leftovers from a previous version. They + * The block & unblock nodes are leftovers from a previous version. They * do nothing, but I haven't deleted them because it would be a tremendous * effort to put them back in. * - * Note: The target buffer for the XOR node is set to the actual user buffer where the - * failed data is supposed to end up. This buffer is zero'd by the code here. Thus, - * if you create a degraded read dag, use it, and then re-use, you have to be sure to - * zero the target buffer prior to the re-use. + * Note: The target buffer for the XOR node is set to the actual user buffer + * where the failed data is supposed to end up. This buffer is zero'd by the + * code here. Thus, if you create a degraded read dag, use it, and then + * re-use. You have to be sure to zero the target buffer prior to the re-use. * - * Every buffer read is passed to the pq recovery node, whose job it is to sort out whats - * needs and what's not. - ****************************************************************************************/ -/* init a disk node with 2 successors and one predecessor */ -#define INIT_DISK_NODE(node,name) \ -rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 2,1,4,0, dag_h, name, allocList); \ -(node)->succedents[0] = unblockNode; \ -(node)->succedents[1] = recoveryNode; \ -(node)->antecedents[0] = blockNode; \ -(node)->antType[0] = rf_control - -#define DISK_NODE_PARAMS(_node_,_p_) \ - (_node_).params[0].p = _p_ ; \ - (_node_).params[1].p = (_p_)->bufPtr; \ - (_node_).params[2].v = parityStripeID; \ - (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru) - -#define DISK_NODE_PDA(node) ((node)->params[0].p) + * Every buffer read is passed to the pq recovery node, whose job it is to + * sort out what's needed and what's not. + *****************************************************************************/ + +/* Init a disk node with 2 successors and one predecessor. */ +#define INIT_DISK_NODE(node,name) \ +do { \ + rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, \ + rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 2, 1, 4, 0, \ + dag_h, name, allocList); \ + (node)->succedents[0] = unblockNode; \ + (node)->succedents[1] = recoveryNode; \ + (node)->antecedents[0] = blockNode; \ + (node)->antType[0] = rf_control; \ +} while (0) + +#define DISK_NODE_PARAMS(_node_,_p_) \ +do { \ + (_node_).params[0].p = _p_ ; \ + (_node_).params[1].p = (_p_)->bufPtr; \ + (_node_).params[2].v = parityStripeID; \ + (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, \ + 0, 0, which_ru); \ +} while (0) + +#define DISK_NODE_PDA(node) ((node)->params[0].p) RF_CREATE_DAG_FUNC_DECL(rf_PQ_DoubleDegRead) { @@ -117,96 +127,99 @@ RF_CREATE_DAG_FUNC_DECL(rf_PQ_DoubleDegRead) "Rq", "PQ Recovery", rf_PQDoubleRecoveryFunc); } -static void -applyPDA(raidPtr, pda, ppda, qpda, bp) - RF_Raid_t *raidPtr; - RF_PhysDiskAddr_t *pda; - RF_PhysDiskAddr_t *ppda; - RF_PhysDiskAddr_t *qpda; - void *bp; +void +rf_applyPDA(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda, + RF_PhysDiskAddr_t *ppda, RF_PhysDiskAddr_t *qpda, void *bp) { RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); RF_RaidAddr_t s0off = rf_StripeUnitOffset(layoutPtr, ppda->startSector); RF_SectorCount_t s0len = ppda->numSector, len; RF_SectorNum_t suoffset; unsigned coeff; - char *pbuf = ppda->bufPtr; - char *qbuf = qpda->bufPtr; - char *buf; - int delta; + char *pbuf = ppda->bufPtr; + char *qbuf = qpda->bufPtr; + char *buf; + int delta; suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); len = pda->numSector; - /* see if pda intersects a recovery pda */ + /* See if pda intersects a recovery pda. */ if ((suoffset < s0off + s0len) && (suoffset + len > s0off)) { buf = pda->bufPtr; - coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress); + coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), + pda->raidAddress); coeff = (coeff % raidPtr->Layout.numDataCol); if (suoffset < s0off) { delta = s0off - suoffset; - buf += rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), delta); + buf += rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), + delta); suoffset = s0off; len -= delta; } if (suoffset > s0off) { delta = suoffset - s0off; - pbuf += rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), delta); - qbuf += rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), delta); + pbuf += rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), + delta); + qbuf += rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), + delta); } if ((suoffset + len) > (s0len + s0off)) len = s0len + s0off - suoffset; - /* src, dest, len */ + /* Src, dest, len. */ rf_bxor(buf, pbuf, rf_RaidAddressToByte(raidPtr, len), bp); - /* dest, src, len, coeff */ - rf_IncQ((unsigned long *) qbuf, (unsigned long *) buf, rf_RaidAddressToByte(raidPtr, len), coeff); + /* Dest, src, len, coeff. */ + rf_IncQ((unsigned long *) qbuf, (unsigned long *) buf, + rf_RaidAddressToByte(raidPtr, len), coeff); } } -/* - Recover data in the case of a double failure. There can be two - result buffers, one for each chunk of data trying to be recovered. - The params are pda's that have not been range restricted or otherwise - politely massaged - this should be done here. The last params are the - pdas of P and Q, followed by the raidPtr. The list can look like - - pda, pda, ... , p pda, q pda, raidptr, asm - or - - pda, pda, ... , p_1 pda, p_2 pda, q_1 pda, q_2 pda, raidptr, asm - - depending on wether two chunks of recovery data were required. - - The second condition only arises if there are two failed buffers - whose lengths do not add up a stripe unit. -*/ +/* + * Recover data in the case of a double failure. There can be two + * result buffers, one for each chunk of data trying to be recovered. + * The params are pda's that have not been range restricted or otherwise + * politely massaged - this should be done here. The last params are the + * pdas of P and Q, followed by the raidPtr. The list can look like + * + * pda, pda, ..., p pda, q pda, raidptr, asm + * + * or + * + * pda, pda, ..., p_1 pda, p_2 pda, q_1 pda, q_2 pda, raidptr, asm + * + * depending on whether two chunks of recovery data were required. + * + * The second condition only arises if there are two failed buffers + * whose lengths do not add up a stripe unit. + */ -int -rf_PQDoubleRecoveryFunc(node) - RF_DagNode_t *node; +int +rf_PQDoubleRecoveryFunc(RF_DagNode_t *node) { - int np = node->numParams; - RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p; + int np = node->numParams; + RF_AccessStripeMap_t *asmap = + (RF_AccessStripeMap_t *) node->params[np - 1].p; RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p; RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); - int d, i; + int d, i; unsigned coeff; RF_RaidAddr_t sosAddr, suoffset; RF_SectorCount_t len, secPerSU = layoutPtr->sectorsPerStripeUnit; - int two = 0; + int two = 0; RF_PhysDiskAddr_t *ppda, *ppda2, *qpda, *qpda2, *pda, npda; - char *buf; - int numDataCol = layoutPtr->numDataCol; + char *buf; + int numDataCol = layoutPtr->numDataCol; RF_Etimer_t timer; RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; RF_ETIMER_START(timer); if (asmap->failedPDAs[1] && - (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) { + (asmap->failedPDAs[1]->numSector + + asmap->failedPDAs[0]->numSector < secPerSU)) { RF_ASSERT(0); ppda = node->params[np - 6].p; ppda2 = node->params[np - 5].p; @@ -225,43 +238,65 @@ rf_PQDoubleRecoveryFunc(node) buf = pda->bufPtr; suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); len = pda->numSector; - coeff = rf_RaidAddressToStripeUnitID(layoutPtr, pda->raidAddress); - /* compute the data unit offset within the column */ + coeff = rf_RaidAddressToStripeUnitID(layoutPtr, + pda->raidAddress); + /* Compute the data unit offset within the column. */ coeff = (coeff % raidPtr->Layout.numDataCol); - /* see if pda intersects a recovery pda */ - applyPDA(raidPtr, pda, ppda, qpda, node->dagHdr->bp); + /* See if pda intersects a recovery pda. */ + rf_applyPDA(raidPtr, pda, ppda, qpda, node->dagHdr->bp); if (two) - applyPDA(raidPtr, pda, ppda, qpda, node->dagHdr->bp); + rf_applyPDA(raidPtr, pda, ppda, qpda, node->dagHdr->bp); } - /* ok, we got the parity back to the point where we can recover. We + /* + * Ok, we got the parity back to the point where we can recover. We * now need to determine the coeff of the columns that need to be - * recovered. We can also only need to recover a single stripe unit. */ + * recovered. We can also only need to recover a single stripe unit. + */ - if (asmap->failedPDAs[1] == NULL) { /* only a single stripe unit - * to recover. */ + if (asmap->failedPDAs[1] == NULL) { /* + * Only a single stripe unit + * to recover. + */ pda = asmap->failedPDAs[0]; - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - /* need to determine the column of the other failed disk */ - coeff = rf_RaidAddressToStripeUnitID(layoutPtr, pda->raidAddress); - /* compute the data unit offset within the column */ + sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, + asmap->raidAddress); + /* Need to determine the column of the other failed disk. */ + coeff = rf_RaidAddressToStripeUnitID(layoutPtr, + pda->raidAddress); + /* Compute the data unit offset within the column. */ coeff = (coeff % raidPtr->Layout.numDataCol); for (i = 0; i < numDataCol; i++) { npda.raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0); - /* skip over dead disks */ - if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status)) + (raidPtr->Layout.map->MapSector) (raidPtr, + npda.raidAddress, &(npda.row), &(npda.col), + &(npda.startSector), 0); + /* Skip over dead disks. */ + if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col] + .status)) if (i != coeff) break; } RF_ASSERT(i < numDataCol); RF_ASSERT(two == 0); - /* recover the data. Since we need only want to recover one - * column, we overwrite the parity with the other one. */ - if (coeff < i) /* recovering 'a' */ - rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) pda->bufPtr, (unsigned long *) ppda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff, i); - else /* recovering 'b' */ - rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, (unsigned long *) pda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), i, coeff); + /* + * Recover the data. Since we need only to recover one + * column, we overwrite the parity with the other one. + */ + if (coeff < i) /* Recovering 'a'. */ + rf_PQ_recover((unsigned long *) ppda->bufPtr, + (unsigned long *) qpda->bufPtr, + (unsigned long *) pda->bufPtr, + (unsigned long *) ppda->bufPtr, + rf_RaidAddressToByte(raidPtr, pda->numSector), + coeff, i); + else /* Recovering 'b'. */ + rf_PQ_recover((unsigned long *) ppda->bufPtr, + (unsigned long *) qpda->bufPtr, + (unsigned long *) ppda->bufPtr, + (unsigned long *) pda->bufPtr, + rf_RaidAddressToByte(raidPtr, pda->numSector), + i, coeff); } else RF_PANIC(); @@ -273,43 +308,46 @@ rf_PQDoubleRecoveryFunc(node) return (0); } -int -rf_PQWriteDoubleRecoveryFunc(node) - RF_DagNode_t *node; +int +rf_PQWriteDoubleRecoveryFunc(RF_DagNode_t *node) { - /* The situation: - * + /* + * The situation: + * * We are doing a write that hits only one failed data unit. The other * failed data unit is not being overwritten, so we need to generate * it. - * + * * For the moment, we assume all the nonfailed data being written is in - * the shadow of the failed data unit. (i.e,, either a single data - * unit write or the entire failed stripe unit is being overwritten. ) - * - * Recovery strategy: apply the recovery data to the parity and q. Use P - * & Q to recover the second failed data unit in P. Zero fill Q, then - * apply the recovered data to p. Then apply the data being written to - * the failed drive. Then walk through the surviving drives, applying - * new data when it exists, othewise the recovery data. Quite a mess. - * - * - * The params - * - * read pda0, read pda1, ... read pda (numDataCol-3), write pda0, ... , - * write pda (numStripeUnitAccess - numDataFailed), failed pda, - * raidPtr, asmap */ - - int np = node->numParams; - RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p; + * the shadow of the failed data unit. (i.e., either a single data + * unit write or the entire failed stripe unit is being overwritten.) + * + * Recovery strategy: apply the recovery data to the parity and Q. + * Use P & Q to recover the second failed data unit in P. Zero fill + * Q, then apply the recovered data to P. Then apply the data being + * written to the failed drive. Then walk through the surviving drives, + * applying new data when it exists, othewise the recovery data. + * Quite a mess. + * + * + * The params: + * + * read pda0, read pda1, ..., read pda (numDataCol-3), + * write pda0, ..., write pda (numStripeUnitAccess - numDataFailed), + * failed pda, raidPtr, asmap + */ + + int np = node->numParams; + RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) + node->params[np - 1].p; RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p; RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); - int i; + int i; RF_RaidAddr_t sosAddr; unsigned coeff; RF_StripeCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; RF_PhysDiskAddr_t *ppda, *qpda, *pda, npda; - int numDataCol = layoutPtr->numDataCol; + int numDataCol = layoutPtr->numDataCol; RF_Etimer_t timer; RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; @@ -320,46 +358,66 @@ rf_PQWriteDoubleRecoveryFunc(node) qpda = node->results[1]; /* apply the recovery data */ for (i = 0; i < numDataCol - 2; i++) - applyPDA(raidPtr, node->params[i].p, ppda, qpda, node->dagHdr->bp); + rf_applyPDA(raidPtr, node->params[i].p, ppda, qpda, + node->dagHdr->bp); - /* determine the other failed data unit */ + /* Determine the other failed data unit. */ pda = asmap->failedPDAs[0]; - sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); - /* need to determine the column of the other failed disk */ + sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, + asmap->raidAddress); + /* Need to determine the column of the other failed disk. */ coeff = rf_RaidAddressToStripeUnitID(layoutPtr, pda->raidAddress); - /* compute the data unit offset within the column */ + /* Compute the data unit offset within the column. */ coeff = (coeff % raidPtr->Layout.numDataCol); for (i = 0; i < numDataCol; i++) { npda.raidAddress = sosAddr + (i * secPerSU); - (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0); - /* skip over dead disks */ + (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, + &(npda.row), &(npda.col), &(npda.startSector), 0); + /* Skip over dead disks. */ if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status)) if (i != coeff) break; } RF_ASSERT(i < numDataCol); - /* recover the data. The column we want to recover we write over the - * parity. The column we don't care about we dump in q. */ - if (coeff < i) /* recovering 'a' */ - rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff, i); - else /* recovering 'b' */ - rf_PQ_recover((unsigned long *) ppda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, rf_RaidAddressToByte(raidPtr, pda->numSector), i, coeff); + /* + * Recover the data. The column we want to recover, we write over the + * parity. The column we don't care about, we dump in q. + */ + if (coeff < i) /* Recovering 'a'. */ + rf_PQ_recover((unsigned long *) ppda->bufPtr, + (unsigned long *) qpda->bufPtr, + (unsigned long *) ppda->bufPtr, + (unsigned long *) qpda->bufPtr, + rf_RaidAddressToByte(raidPtr, pda->numSector), coeff, i); + else /* Recovering 'b'. */ + rf_PQ_recover((unsigned long *) ppda->bufPtr, + (unsigned long *) qpda->bufPtr, + (unsigned long *) qpda->bufPtr, + (unsigned long *) ppda->bufPtr, + rf_RaidAddressToByte(raidPtr, pda->numSector), i, coeff); /* OK. The valid data is in P. Zero fill Q, then inc it into it. */ bzero(qpda->bufPtr, rf_RaidAddressToByte(raidPtr, qpda->numSector)); - rf_IncQ((unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, rf_RaidAddressToByte(raidPtr, qpda->numSector), i); - - /* now apply all the write data to the buffer */ - /* single stripe unit write case: the failed data is only thing we are - * writing. */ + rf_IncQ((unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr, + rf_RaidAddressToByte(raidPtr, qpda->numSector), i); + + /* Now apply all the write data to the buffer. */ + /* + * Single stripe unit write case: The failed data is the only thing + * we are writing. + */ RF_ASSERT(asmap->numStripeUnitsAccessed == 1); - /* dest, src, len, coeff */ - rf_IncQ((unsigned long *) qpda->bufPtr, (unsigned long *) asmap->failedPDAs[0]->bufPtr, rf_RaidAddressToByte(raidPtr, qpda->numSector), coeff); - rf_bxor(asmap->failedPDAs[0]->bufPtr, ppda->bufPtr, rf_RaidAddressToByte(raidPtr, ppda->numSector), node->dagHdr->bp); - - /* now apply all the recovery data */ + /* Dest, src, len, coeff. */ + rf_IncQ((unsigned long *) qpda->bufPtr, + (unsigned long *) asmap->failedPDAs[0]->bufPtr, + rf_RaidAddressToByte(raidPtr, qpda->numSector), coeff); + rf_bxor(asmap->failedPDAs[0]->bufPtr, ppda->bufPtr, + rf_RaidAddressToByte(raidPtr, ppda->numSector), node->dagHdr->bp); + + /* Now apply all the recovery data. */ for (i = 0; i < numDataCol - 2; i++) - applyPDA(raidPtr, node->params[i].p, ppda, qpda, node->dagHdr->bp); + rf_applyPDA(raidPtr, node->params[i].p, ppda, qpda, + node->dagHdr->bp); RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); @@ -369,62 +427,69 @@ rf_PQWriteDoubleRecoveryFunc(node) rf_GenericWakeupFunc(node, 0); return (0); } + RF_CREATE_DAG_FUNC_DECL(rf_PQ_DDLargeWrite) { RF_PANIC(); } -/* - Two lost data unit write case. - - There are really two cases here: - - (1) The write completely covers the two lost data units. - In that case, a reconstruct write that doesn't write the - failed data units will do the correct thing. So in this case, - the dag looks like - - full stripe read of surviving data units (not being overwritten) - write new data (ignoring failed units) compute P&Q - write P&Q - - (2) The write does not completely cover both failed data units - (but touches at least one of them). Then we need to do the - equivalent of a reconstruct read to recover the missing data - unit from the other stripe. - For any data we are writing that is not in the "shadow" - of the failed units, we need to do a four cycle update. - PANIC on this case. for now - -*/ +/* + * Two lost data unit write case. + * + * There are really two cases here: + * + * (1) The write completely covers the two lost data units. + * In that case, a reconstruct write that doesn't write the + * failed data units will do the correct thing. So in this case, + * the dag looks like + * + * Full stripe read of surviving data units (not being overwritten) + * Write new data (ignoring failed units) + * Compute P&Q + * Write P&Q + * + * + * (2) The write does not completely cover both failed data units + * (but touches at least one of them). Then we need to do the + * equivalent of a reconstruct read to recover the missing data + * unit from the other stripe. + * + * For any data we are writing that is not in the "shadow" + * of the failed units, we need to do a four cycle update. + * PANIC on this case. For now. + * + */ RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateWriteDAG) { RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); RF_SectorCount_t sectorsPerSU = layoutPtr->sectorsPerStripeUnit; - int sum; - int nf = asmap->numDataFailed; + int sum; + int nf = asmap->numDataFailed; sum = asmap->failedPDAs[0]->numSector; if (nf == 2) sum += asmap->failedPDAs[1]->numSector; if ((nf == 2) && (sum == (2 * sectorsPerSU))) { - /* large write case */ + /* Large write case. */ rf_PQ_DDLargeWrite(raidPtr, asmap, dag_h, bp, flags, allocList); return; } if ((nf == asmap->numStripeUnitsAccessed) || (sum >= sectorsPerSU)) { - /* small write case, no user data not in shadow */ - rf_PQ_DDSimpleSmallWrite(raidPtr, asmap, dag_h, bp, flags, allocList); + /* Small write case, no user data not in shadow. */ + rf_PQ_DDSimpleSmallWrite(raidPtr, asmap, dag_h, bp, flags, + allocList); return; } RF_PANIC(); } + RF_CREATE_DAG_FUNC_DECL(rf_PQ_DDSimpleSmallWrite) { - rf_DoubleDegSmallWrite(raidPtr, asmap, dag_h, bp, flags, allocList, "Rq", "Wq", "PQ Recovery", rf_PQWriteDoubleRecoveryFunc); + rf_DoubleDegSmallWrite(raidPtr, asmap, dag_h, bp, flags, allocList, + "Rq", "Wq", "PQ Recovery", rf_PQWriteDoubleRecoveryFunc); } -#endif /* (RF_INCLUDE_DECL_PQ > 0) || - * (RF_INCLUDE_RAID6 > 0) */ + +#endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */ diff --git a/sys/dev/raidframe/rf_pqdegdags.h b/sys/dev/raidframe/rf_pqdegdags.h index 8fc1f3dbf97..00ee5f04cf4 100644 --- a/sys/dev/raidframe/rf_pqdegdags.h +++ b/sys/dev/raidframe/rf_pqdegdags.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_pqdegdags.h,v 1.2 1999/02/16 00:03:12 niklas Exp $ */ +/* $OpenBSD: rf_pqdegdags.h,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_pqdegdags.h,v 1.3 1999/02/05 00:06:15 oster Exp $ */ + /* * rf_pqdegdags.h */ @@ -34,16 +35,16 @@ * Degraded mode dags for double fault cases. */ -#ifndef _RF__RF_PQDEGDAGS_H_ -#define _RF__RF_PQDEGDAGS_H_ +#ifndef _RF__RF_PQDEGDAGS_H_ +#define _RF__RF_PQDEGDAGS_H_ #include "rf_dag.h" RF_CREATE_DAG_FUNC_DECL(rf_PQ_DoubleDegRead); -int rf_PQDoubleRecoveryFunc(RF_DagNode_t * node); -int rf_PQWriteDoubleRecoveryFunc(RF_DagNode_t * node); +int rf_PQDoubleRecoveryFunc(RF_DagNode_t *); +int rf_PQWriteDoubleRecoveryFunc(RF_DagNode_t *); RF_CREATE_DAG_FUNC_DECL(rf_PQ_DDLargeWrite); RF_CREATE_DAG_FUNC_DECL(rf_PQ_DDSimpleSmallWrite); RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateWriteDAG); -#endif /* !_RF__RF_PQDEGDAGS_H_ */ +#endif /* !_RF__RF_PQDEGDAGS_H_ */ diff --git a/sys/dev/raidframe/rf_psstatus.c b/sys/dev/raidframe/rf_psstatus.c index 6581f97d7c7..c6abe9f0afb 100644 --- a/sys/dev/raidframe/rf_psstatus.c +++ b/sys/dev/raidframe/rf_psstatus.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_psstatus.c,v 1.5 2002/12/03 11:31:35 tdeval Exp $ */ +/* $OpenBSD: rf_psstatus.c,v 1.6 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_psstatus.c,v 1.5 2000/01/08 22:57:31 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -32,7 +33,7 @@ * psstatus.c * * The reconstruction code maintains a bunch of status related to the parity - * stripes that are currently under reconstruction. This header file defines + * stripes that are currently under reconstruction. This header file defines * the status structures. * *****************************************************************************/ @@ -45,26 +46,43 @@ #include "rf_psstatus.h" #include "rf_shutdown.h" -#define Dprintf1(s,a) if (rf_pssDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf2(s,a,b) if (rf_pssDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf3(s,a,b,c) if (rf_pssDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL) - -static void -RealPrintPSStatusTable(RF_Raid_t * raidPtr, - RF_PSStatusHeader_t * pssTable); - -#define RF_MAX_FREE_PSS 32 -#define RF_PSS_INC 8 -#define RF_PSS_INITIAL 4 - -static int init_pss(RF_ReconParityStripeStatus_t *, RF_Raid_t *); -static void clean_pss(RF_ReconParityStripeStatus_t *, RF_Raid_t *); -static void rf_ShutdownPSStatus(void *); - -static int -init_pss(p, raidPtr) - RF_ReconParityStripeStatus_t *p; - RF_Raid_t *raidPtr; +#define Dprintf1(s,a) \ +do { \ + if (rf_pssDebug) \ + rf_debug_printf(s, \ + (void *)((unsigned long)a), \ + NULL, NULL, NULL, NULL, NULL, NULL, NULL); \ +} while(0) +#define Dprintf2(s,a,b) \ +do { \ + if (rf_pssDebug) \ + rf_debug_printf(s, \ + (void *)((unsigned long)a), \ + (void *)((unsigned long)b), \ + NULL, NULL, NULL, NULL, NULL, NULL); \ +} while(0) +#define Dprintf3(s,a,b,c) \ +do { \ + if (rf_pssDebug) \ + rf_debug_printf(s, \ + (void *)((unsigned long)a), \ + (void *)((unsigned long)b), \ + (void *)((unsigned long)c), \ + NULL, NULL, NULL, NULL, NULL); \ +} while(0) + +void rf_RealPrintPSStatusTable(RF_Raid_t *, RF_PSStatusHeader_t *); + +#define RF_MAX_FREE_PSS 32 +#define RF_PSS_INC 8 +#define RF_PSS_INITIAL 4 + +int rf_init_pss(RF_ReconParityStripeStatus_t *, RF_Raid_t *); +void rf_clean_pss(RF_ReconParityStripeStatus_t *, RF_Raid_t *); +void rf_ShutdownPSStatus(void *); + +int +rf_init_pss(RF_ReconParityStripeStatus_t *p, RF_Raid_t *raidPtr) { RF_Calloc(p->issued, raidPtr->numCol, sizeof(char), (char *)); if (p->issued == NULL) @@ -72,88 +90,87 @@ init_pss(p, raidPtr) return (0); } -static void -clean_pss(p, raidPtr) - RF_ReconParityStripeStatus_t *p; - RF_Raid_t *raidPtr; +void +rf_clean_pss(RF_ReconParityStripeStatus_t *p, RF_Raid_t *raidPtr) { RF_Free(p->issued, raidPtr->numCol * sizeof(char)); } -static void -rf_ShutdownPSStatus(arg) - void *arg; +void +rf_ShutdownPSStatus(void *arg) { RF_Raid_t *raidPtr = (RF_Raid_t *) arg; - RF_FREELIST_DESTROY_CLEAN_ARG(raidPtr->pss_freelist, next, (RF_ReconParityStripeStatus_t *), clean_pss, raidPtr); + RF_FREELIST_DESTROY_CLEAN_ARG(raidPtr->pss_freelist, next, + (RF_ReconParityStripeStatus_t *), rf_clean_pss, raidPtr); } -int -rf_ConfigurePSStatus( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) +int +rf_ConfigurePSStatus(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, + RF_Config_t *cfgPtr) { - int rc; + int rc; raidPtr->pssTableSize = RF_PSS_DEFAULT_TABLESIZE; - RF_FREELIST_CREATE(raidPtr->pss_freelist, RF_MAX_FREE_PSS, - RF_PSS_INC, sizeof(RF_ReconParityStripeStatus_t)); + RF_FREELIST_CREATE(raidPtr->pss_freelist, RF_MAX_FREE_PSS, RF_PSS_INC, + sizeof(RF_ReconParityStripeStatus_t)); if (raidPtr->pss_freelist == NULL) return (ENOMEM); rc = rf_ShutdownCreate(listp, rf_ShutdownPSStatus, raidPtr); if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); + RF_ERRORMSG3("Unable to add to shutdown list file %s line %d" + " rc=%d.\n", __FILE__, __LINE__, rc); rf_ShutdownPSStatus(raidPtr); return (rc); } RF_FREELIST_PRIME_INIT_ARG(raidPtr->pss_freelist, RF_PSS_INITIAL, next, - (RF_ReconParityStripeStatus_t *), init_pss, raidPtr); + (RF_ReconParityStripeStatus_t *), rf_init_pss, raidPtr); return (0); } -/***************************************************************************************** - * sets up the pss table + + +/***************************************************************************** + * Sets up the pss table. * We pre-allocate a bunch of entries to avoid as much as possible having to * malloc up hash chain entries. - ****************************************************************************************/ + *****************************************************************************/ RF_PSStatusHeader_t * -rf_MakeParityStripeStatusTable(raidPtr) - RF_Raid_t *raidPtr; +rf_MakeParityStripeStatusTable(RF_Raid_t *raidPtr) { RF_PSStatusHeader_t *pssTable; - int i, j, rc; + int i, j, rc; - RF_Calloc(pssTable, raidPtr->pssTableSize, sizeof(RF_PSStatusHeader_t), (RF_PSStatusHeader_t *)); + RF_Calloc(pssTable, raidPtr->pssTableSize, sizeof(RF_PSStatusHeader_t), + (RF_PSStatusHeader_t *)); for (i = 0; i < raidPtr->pssTableSize; i++) { rc = rf_mutex_init(&pssTable[i].mutex); if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - /* fail and deallocate */ + RF_ERRORMSG3("Unable to init mutex file %s line %d" + " rc=%d.\n", __FILE__, __LINE__, rc); + /* Fail and deallocate. */ for (j = 0; j < i; j++) { rf_mutex_destroy(&pssTable[i].mutex); } - RF_Free(pssTable, raidPtr->pssTableSize * sizeof(RF_PSStatusHeader_t)); + RF_Free(pssTable, raidPtr->pssTableSize * + sizeof(RF_PSStatusHeader_t)); return (NULL); } } return (pssTable); } -void -rf_FreeParityStripeStatusTable(raidPtr, pssTable) - RF_Raid_t *raidPtr; - RF_PSStatusHeader_t *pssTable; +void +rf_FreeParityStripeStatusTable(RF_Raid_t *raidPtr, + RF_PSStatusHeader_t *pssTable) { - int i; + int i; if (rf_pssDebug) - RealPrintPSStatusTable(raidPtr, pssTable); + rf_RealPrintPSStatusTable(raidPtr, pssTable); for (i = 0; i < raidPtr->pssTableSize; i++) { if (pssTable[i].chain) { - printf("ERROR: pss hash chain not null at recon shutdown\n"); + printf("ERROR: pss hash chain not null at recon" + " shutdown.\n"); } rf_mutex_destroy(&pssTable[i].mutex); } @@ -161,20 +178,20 @@ rf_FreeParityStripeStatusTable(raidPtr, pssTable) } -/* looks up the status structure for a parity stripe. - * if the create_flag is on, creates and returns the status structure it it doesn't exist - * otherwise returns NULL if the status structure does not exist +/* + * Looks up the status structure for a parity stripe. + * If the create_flag is on, returns the status structure, creating it if + * it doesn't exist. Otherwise returns NULL if the status structure does + * not exist already. + * + * The flags tell whether or not to create it if it doesn't exist + what + * flags to set initially. * - * ASSUMES THE PSS DESCRIPTOR IS LOCKED UPON ENTRY + * ASSUMES THE PSS DESCRIPTOR IS LOCKED UPON ENTRY. */ RF_ReconParityStripeStatus_t * -rf_LookupRUStatus( - RF_Raid_t * raidPtr, - RF_PSStatusHeader_t * pssTable, - RF_StripeNum_t psID, - RF_ReconUnitNum_t which_ru, - RF_PSSFlags_t flags, /* whether or not to create it if it doesn't - * exist + what flags to set initially */ +rf_LookupRUStatus(RF_Raid_t *raidPtr, RF_PSStatusHeader_t *pssTable, + RF_StripeNum_t psID, RF_ReconUnitNum_t which_ru, RF_PSSFlags_t flags, int *created) { RF_PSStatusHeader_t *hdr = &pssTable[RF_HASH_PSID(raidPtr, psID)]; @@ -187,7 +204,8 @@ rf_LookupRUStatus( } if (!p && (flags & RF_PSS_CREATE)) { - Dprintf2("PSS: creating pss for psid %ld ru %d\n", psID, which_ru); + Dprintf2("PSS: creating pss for psid %ld ru %d.\n", + psID, which_ru); p = rf_AllocPSStatus(raidPtr); p->next = hdr->chain; hdr->chain = p; @@ -204,31 +222,38 @@ rf_LookupRUStatus( p->bufWaitList = NULL; *created = 1; } else - if (p) { /* we didn't create, but we want to specify - * some new status */ - p->flags |= flags; /* add in whatever flags we're - * specifying */ + if (p) { + /* + * We didn't create, but we want to specify + * some new status. + */ + p->flags |= flags; /* + * Add in whatever flags we're + * specifying. + */ } if (p && (flags & RF_PSS_RECON_BLOCKED)) { - p->blockCount++;/* if we're asking to block recon, bump the - * count */ - Dprintf3("raid%d: Blocked recon on psid %ld. count now %d\n", + /* If we're asking to block recon, bump the count. */ + p->blockCount++; + Dprintf3("raid%d: Blocked recon on psid %ld. count now %d.\n", raidPtr->raidid, psID, p->blockCount); } return (p); } -/* deletes an entry from the parity stripe status table. typically used + + +/* + * Deletes an entry from the parity stripe status table. Typically used * when an entry has been allocated solely to block reconstruction, and - * no recon was requested while recon was blocked. Assumes the hash + * no recon was requested while recon was blocked. Assumes the hash * chain is ALREADY LOCKED. */ -void -rf_PSStatusDelete(raidPtr, pssTable, pssPtr) - RF_Raid_t *raidPtr; - RF_PSStatusHeader_t *pssTable; - RF_ReconParityStripeStatus_t *pssPtr; +void +rf_PSStatusDelete(RF_Raid_t *raidPtr, RF_PSStatusHeader_t *pssTable, + RF_ReconParityStripeStatus_t *pssPtr) { - RF_PSStatusHeader_t *hdr = &(pssTable[RF_HASH_PSID(raidPtr, pssPtr->parityStripeID)]); + RF_PSStatusHeader_t *hdr = + &(pssTable[RF_HASH_PSID(raidPtr, pssPtr->parityStripeID)]); RF_ReconParityStripeStatus_t *p = hdr->chain, *pt = NULL; while (p) { @@ -244,17 +269,21 @@ rf_PSStatusDelete(raidPtr, pssTable, pssPtr) pt = p; p = p->next; } - RF_ASSERT(0); /* we must find it here */ + RF_ASSERT(0); /* We must find it here. */ } -/* deletes an entry from the ps status table after reconstruction has completed */ -void -rf_RemoveFromActiveReconTable(raidPtr, row, psid, which_ru) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_ReconUnitNum_t which_ru; - RF_StripeNum_t psid; + + +/* + * Deletes an entry from the ps status table after reconstruction has + * completed. + */ +void +rf_RemoveFromActiveReconTable(RF_Raid_t *raidPtr, RF_RowCol_t row, + RF_StripeNum_t psid, RF_ReconUnitNum_t which_ru) { - RF_PSStatusHeader_t *hdr = &(raidPtr->reconControl[row]->pssTable[RF_HASH_PSID(raidPtr, psid)]); + RF_PSStatusHeader_t *hdr = + &(raidPtr->reconControl[row] + ->pssTable[RF_HASH_PSID(raidPtr, psid)]); RF_ReconParityStripeStatus_t *p, *pt; RF_CallbackDesc_t *cb, *cb1; @@ -266,11 +295,11 @@ rf_RemoveFromActiveReconTable(raidPtr, row, psid, which_ru) if (p == NULL) { rf_PrintPSStatusTable(raidPtr, row); } - RF_ASSERT(p); /* it must be there */ + RF_ASSERT(p); /* It must be there. */ - Dprintf2("PSS: deleting pss for psid %ld ru %d\n", psid, which_ru); + Dprintf2("PSS: deleting pss for psid %ld ru %d.\n", psid, which_ru); - /* delete this entry from the hash chain */ + /* Delete this entry from the hash chain. */ if (pt) pt->next = p->next; else @@ -279,16 +308,19 @@ rf_RemoveFromActiveReconTable(raidPtr, row, psid, which_ru) RF_UNLOCK_MUTEX(hdr->mutex); - /* wakup anyone waiting on the parity stripe ID */ + /* Wake-up anyone waiting on the parity stripe ID. */ cb = p->procWaitList; p->procWaitList = NULL; while (cb) { - Dprintf1("Waking up access waiting on parity stripe ID %ld\n", p->parityStripeID); + Dprintf1("Waking up access waiting on parity stripe ID %ld.\n", + p->parityStripeID); cb1 = cb->next; (cb->callbackFunc) (cb->callbackArg); - /* THIS IS WHAT THE ORIGINAL CODE HAD... the extra 0 is bogus, - * IMHO */ + /* + * THIS IS WHAT THE ORIGINAL CODE HAD... the extra 0 is bogus, + * IMHO. + */ /* (cb->callbackFunc)(cb->callbackArg, 0); */ rf_FreeCallbackDesc(cb); cb = cb1; @@ -298,39 +330,38 @@ rf_RemoveFromActiveReconTable(raidPtr, row, psid, which_ru) } RF_ReconParityStripeStatus_t * -rf_AllocPSStatus(raidPtr) - RF_Raid_t *raidPtr; +rf_AllocPSStatus(RF_Raid_t *raidPtr) { RF_ReconParityStripeStatus_t *p; - RF_FREELIST_GET_INIT_ARG(raidPtr->pss_freelist, p, next, (RF_ReconParityStripeStatus_t *), init_pss, raidPtr); + RF_FREELIST_GET_INIT_ARG(raidPtr->pss_freelist, p, next, + (RF_ReconParityStripeStatus_t *), rf_init_pss, raidPtr); if (p) { bzero(p->issued, raidPtr->numCol); } p->next = NULL; - /* no need to initialize here b/c the only place we're called from is - * the above Lookup */ + /* + * No need to initialize here b/c the only place we're called from is + * the above Lookup. + */ return (p); } -void -rf_FreePSStatus(raidPtr, p) - RF_Raid_t *raidPtr; - RF_ReconParityStripeStatus_t *p; +void +rf_FreePSStatus(RF_Raid_t *raidPtr, RF_ReconParityStripeStatus_t *p) { RF_ASSERT(p->procWaitList == NULL); RF_ASSERT(p->blockWaitList == NULL); RF_ASSERT(p->bufWaitList == NULL); - RF_FREELIST_FREE_CLEAN_ARG(raidPtr->pss_freelist, p, next, clean_pss, raidPtr); + RF_FREELIST_FREE_CLEAN_ARG(raidPtr->pss_freelist, p, next, + rf_clean_pss, raidPtr); } -static void -RealPrintPSStatusTable(raidPtr, pssTable) - RF_Raid_t *raidPtr; - RF_PSStatusHeader_t *pssTable; +void +rf_RealPrintPSStatusTable(RF_Raid_t *raidPtr, RF_PSStatusHeader_t *pssTable) { - int i, j, procsWaiting, blocksWaiting, bufsWaiting; + int i, j, procsWaiting, blocksWaiting, bufsWaiting; RF_ReconParityStripeStatus_t *p; RF_CallbackDesc_t *cb; @@ -344,8 +375,11 @@ RealPrintPSStatusTable(raidPtr, pssTable) blocksWaiting++; for (cb = p->bufWaitList; cb; cb = cb->next) bufsWaiting++; - printf("PSID %ld RU %d : blockCount %d %d/%d/%d proc/block/buf waiting, issued ", - (long) p->parityStripeID, p->which_ru, p->blockCount, procsWaiting, blocksWaiting, bufsWaiting); + printf("PSID %ld RU %d : blockCount %d %d/%d/%d" + " proc/block/buf waiting, issued ", + (long) p->parityStripeID, p->which_ru, + p->blockCount, procsWaiting, blocksWaiting, + bufsWaiting); for (j = 0; j < raidPtr->numCol; j++) printf("%c", (p->issued[j]) ? '1' : '0'); if (!p->flags) @@ -367,11 +401,9 @@ RealPrintPSStatusTable(raidPtr, pssTable) } } -void -rf_PrintPSStatusTable(raidPtr, row) - RF_Raid_t *raidPtr; - RF_RowCol_t row; +void +rf_PrintPSStatusTable(RF_Raid_t *raidPtr, RF_RowCol_t row) { RF_PSStatusHeader_t *pssTable = raidPtr->reconControl[row]->pssTable; - RealPrintPSStatusTable(raidPtr, pssTable); + rf_RealPrintPSStatusTable(raidPtr, pssTable); } diff --git a/sys/dev/raidframe/rf_psstatus.h b/sys/dev/raidframe/rf_psstatus.h index 76fbb6999a0..2fa397981e9 100644 --- a/sys/dev/raidframe/rf_psstatus.h +++ b/sys/dev/raidframe/rf_psstatus.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_psstatus.h,v 1.2 1999/02/16 00:03:13 niklas Exp $ */ +/* $OpenBSD: rf_psstatus.h,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_psstatus.h,v 1.3 1999/02/05 00:06:15 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -32,101 +33,145 @@ * psstatus.h * * The reconstruction code maintains a bunch of status related to the parity - * stripes that are currently under reconstruction. This header file defines + * stripes that are currently under reconstruction. This header file defines * the status structures. * *****************************************************************************/ -#ifndef _RF__RF_PSSTATUS_H_ -#define _RF__RF_PSSTATUS_H_ +#ifndef _RF__RF_PSSTATUS_H_ +#define _RF__RF_PSSTATUS_H_ #include "rf_types.h" #include "rf_threadstuff.h" #include "rf_callback.h" -#define RF_PS_MAX_BUFS 10 /* max number of bufs we'll accumulate before - * we do an XOR */ +#define RF_PS_MAX_BUFS 10 /* + * Max number of bufs we'll + * accumulate before we do + * an XOR. + */ -#define RF_PSS_DEFAULT_TABLESIZE 200 +#define RF_PSS_DEFAULT_TABLESIZE 200 /* * Macros to acquire/release the mutex lock on a parity stripe status * descriptor. Note that we use just one lock for the whole hash chain. */ -#define RF_HASH_PSID(_raid_,_psid_) ( (_psid_) % ((_raid_)->pssTableSize) ) /* simple hash function */ -#define RF_LOCK_PSS_MUTEX(_raidPtr, _row, _psid) \ - RF_LOCK_MUTEX((_raidPtr)->reconControl[_row]->pssTable[ RF_HASH_PSID(_raidPtr,_psid) ].mutex) -#define RF_UNLOCK_PSS_MUTEX(_raidPtr, _row, _psid) \ - RF_UNLOCK_MUTEX((_raidPtr)->reconControl[_row]->pssTable[ RF_HASH_PSID(_raidPtr,_psid) ].mutex) +/* Simple hash function. */ +#define RF_HASH_PSID(_raid_,_psid_) ((_psid_) % ((_raid_)->pssTableSize)) + +#define RF_LOCK_PSS_MUTEX(_raidPtr,_row,_psid) \ + RF_LOCK_MUTEX((_raidPtr)->reconControl[_row] \ + ->pssTable[RF_HASH_PSID(_raidPtr, _psid)].mutex) +#define RF_UNLOCK_PSS_MUTEX(_raidPtr, _row, _psid) \ + RF_UNLOCK_MUTEX((_raidPtr)->reconControl[_row] \ + ->pssTable[RF_HASH_PSID(_raidPtr, _psid)].mutex) struct RF_ReconParityStripeStatus_s { - RF_StripeNum_t parityStripeID; /* the parity stripe ID */ - RF_ReconUnitNum_t which_ru; /* which reconstruction unit with the - * indicated parity stripe */ - RF_PSSFlags_t flags; /* flags indicating various conditions */ - void *rbuf; /* this is the accumulating xor sum */ - void *writeRbuf; /* DEBUG ONLY: a pointer to the rbuf after it - * has filled & been sent to disk */ - void *rbufsForXor[RF_PS_MAX_BUFS]; /* these are buffers still to + RF_StripeNum_t parityStripeID;/* The parity stripe ID. */ + RF_ReconUnitNum_t which_ru; /* + * Which reconstruction unit + * with the indicated parity + * stripe. + */ + RF_PSSFlags_t flags; /* + * Flags indicating various + * conditions. + */ + void *rbuf; /* + * This is the accumulating + * xor sum. + */ + void *writeRbuf; /* + * DEBUG ONLY: A pointer to + * the rbuf after it has filled + * & been sent to disk. + */ + void *rbufsForXor[RF_PS_MAX_BUFS]; + /* + * These are buffers still to * be xored into the - * accumulating sum */ - int xorBufCount; /* num buffers waiting to be xored */ - int blockCount; /* count of # proc that have blocked recon on - * this parity stripe */ - char *issued; /* issued[i]==1 <=> column i has already - * issued a read request for the indicated RU */ - RF_CallbackDesc_t *procWaitList; /* list of user procs waiting - * for recon to be done */ - RF_CallbackDesc_t *blockWaitList; /* list of disks blocked + * accumulating sum. + */ + int xorBufCount; /* + * Num buffers waiting to be + * xored. + */ + int blockCount; /* + * Count of # proc that have + * blocked recon on this parity + * stripe. + */ + char *issued; /* + * issued[i]==1 <=> column i + * has already issued a read + * request for the indicated + * RU. + */ + RF_CallbackDesc_t *procWaitList; /* + * List of user procs waiting + * for recon to be done. + */ + RF_CallbackDesc_t *blockWaitList; /* + * List of disks blocked * waiting for user write to - * complete */ - RF_CallbackDesc_t *bufWaitList; /* list of disks blocked waiting to - * acquire a buffer for this RU */ + * complete. + */ + RF_CallbackDesc_t *bufWaitList; /* + * List of disks blocked + * waiting to acquire a buffer + * for this RU. + */ RF_ReconParityStripeStatus_t *next; }; struct RF_PSStatusHeader_s { - RF_DECLARE_MUTEX(mutex) /* mutex for this hash chain */ - RF_ReconParityStripeStatus_t *chain; /* the hash chain */ + RF_DECLARE_MUTEX(mutex); /* Mutex for this hash chain. */ + RF_ReconParityStripeStatus_t *chain; /* The hash chain. */ }; -/* masks for the "flags" field above */ -#define RF_PSS_NONE 0x00000000 /* no flags */ -#define RF_PSS_UNDER_RECON 0x00000001 /* this parity stripe is + +/* Masks for the "flags" field above. */ +#define RF_PSS_NONE 0x00000000 /* No flags. */ +#define RF_PSS_UNDER_RECON 0x00000001 /* + * This parity stripe is * currently under - * reconstruction */ -#define RF_PSS_FORCED_ON_WRITE 0x00000002 /* indicates a recon was + * reconstruction. + */ +#define RF_PSS_FORCED_ON_WRITE 0x00000002 /* + * Indicates a recon was * forced due to a user-write - * operation */ -#define RF_PSS_FORCED_ON_READ 0x00000004 /* ditto for read, but not - * currently implemented */ -#define RF_PSS_RECON_BLOCKED 0x00000008 /* reconstruction is currently + * operation. + */ +#define RF_PSS_FORCED_ON_READ 0x00000004 /* + * Ditto for read, but not + * currently implemented. + */ +#define RF_PSS_RECON_BLOCKED 0x00000008 /* + * Reconstruction is currently * blocked due to a pending - * user I/O */ -#define RF_PSS_CREATE 0x00000010 /* tells LookupRUStatus to - * create the entry */ -#define RF_PSS_BUFFERWAIT 0x00000020 /* someone is waiting for a - * buffer for this RU */ - -int -rf_ConfigurePSStatus(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); + * user I/O. + */ +#define RF_PSS_CREATE 0x00000010 /* + * Tells LookupRUStatus to + * create the entry. + */ +#define RF_PSS_BUFFERWAIT 0x00000020 /* + * Someone is waiting for a + * buffer for this RU. + */ -RF_PSStatusHeader_t *rf_MakeParityStripeStatusTable(RF_Raid_t * raidPtr); -void -rf_FreeParityStripeStatusTable(RF_Raid_t * raidPtr, - RF_PSStatusHeader_t * pssTable); -RF_ReconParityStripeStatus_t * -rf_LookupRUStatus(RF_Raid_t * raidPtr, - RF_PSStatusHeader_t * pssTable, RF_StripeNum_t psID, - RF_ReconUnitNum_t which_ru, RF_PSSFlags_t flags, int *created); -void -rf_PSStatusDelete(RF_Raid_t * raidPtr, RF_PSStatusHeader_t * pssTable, - RF_ReconParityStripeStatus_t * pssPtr); -void -rf_RemoveFromActiveReconTable(RF_Raid_t * raidPtr, RF_RowCol_t row, - RF_StripeNum_t psid, RF_ReconUnitNum_t which_ru); -RF_ReconParityStripeStatus_t *rf_AllocPSStatus(RF_Raid_t * raidPtr); -void rf_FreePSStatus(RF_Raid_t * raidPtr, RF_ReconParityStripeStatus_t * p); -void rf_PrintPSStatusTable(RF_Raid_t * raidPtr, RF_RowCol_t row); +int rf_ConfigurePSStatus(RF_ShutdownList_t **, RF_Raid_t *, RF_Config_t *); +RF_PSStatusHeader_t *rf_MakeParityStripeStatusTable(RF_Raid_t *); +void rf_FreeParityStripeStatusTable(RF_Raid_t *, RF_PSStatusHeader_t *); +RF_ReconParityStripeStatus_t *rf_LookupRUStatus(RF_Raid_t *, + RF_PSStatusHeader_t *, RF_StripeNum_t, RF_ReconUnitNum_t, + RF_PSSFlags_t, int *); +void rf_PSStatusDelete(RF_Raid_t *, RF_PSStatusHeader_t *, + RF_ReconParityStripeStatus_t *); +void rf_RemoveFromActiveReconTable(RF_Raid_t *, RF_RowCol_t, + RF_StripeNum_t, RF_ReconUnitNum_t); +RF_ReconParityStripeStatus_t *rf_AllocPSStatus(RF_Raid_t *); +void rf_FreePSStatus(RF_Raid_t *, RF_ReconParityStripeStatus_t *); +void rf_PrintPSStatusTable(RF_Raid_t *, RF_RowCol_t); -#endif /* !_RF__RF_PSSTATUS_H_ */ +#endif /* !_RF__RF_PSSTATUS_H_ */ diff --git a/sys/dev/raidframe/rf_raid.h b/sys/dev/raidframe/rf_raid.h index a78b4a41841..d1277a723ca 100644 --- a/sys/dev/raidframe/rf_raid.h +++ b/sys/dev/raidframe/rf_raid.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_raid.h,v 1.6 2000/08/08 16:07:44 peter Exp $ */ +/* $OpenBSD: rf_raid.h,v 1.7 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_raid.h,v 1.12 2000/02/24 17:12:10 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,21 +28,21 @@ * rights to redistribute these changes. */ -/********************************************** - * rf_raid.h -- main header file for RAID driver - **********************************************/ +/************************************************* + * rf_raid.h -- Main header file for RAID driver. + *************************************************/ -#ifndef _RF__RF_RAID_H_ -#define _RF__RF_RAID_H_ +#ifndef _RF__RF_RAID_H_ +#define _RF__RF_RAID_H_ #include "rf_archs.h" #include "rf_types.h" #include "rf_threadstuff.h" -#if defined(__NetBSD__) +#if defined(__NetBSD__) #include "rf_netbsd.h" -#elif defined(__OpenBSD__) +#elif defined(__OpenBSD__) #include "rf_openbsd.h" #endif @@ -57,17 +58,17 @@ #include "rf_reconstruct.h" #include "rf_acctrace.h" -#if RF_INCLUDE_PARITYLOGGING > 0 +#if RF_INCLUDE_PARITYLOGGING > 0 #include "rf_paritylog.h" -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ +#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ -#define RF_MAX_DISKS 128 /* max disks per array */ -#define RF_DEV2RAIDID(_dev) (DISKUNIT(_dev)) +#define RF_MAX_DISKS 128 /* Max disks per array. */ +#define RF_DEV2RAIDID(_dev) (DISKUNIT(_dev)) -#define RF_COMPONENT_LABEL_VERSION_1 1 -#define RF_COMPONENT_LABEL_VERSION 2 -#define RF_RAID_DIRTY 0 -#define RF_RAID_CLEAN 1 +#define RF_COMPONENT_LABEL_VERSION_1 1 +#define RF_COMPONENT_LABEL_VERSION 2 +#define RF_RAID_DIRTY 0 +#define RF_RAID_CLEAN 1 /* * Each row in the array is a distinct parity group, so @@ -78,218 +79,345 @@ typedef enum RF_RowStatus_e { rf_rs_degraded, rf_rs_reconstructing, rf_rs_reconfigured -} RF_RowStatus_t; +} RF_RowStatus_t; struct RF_CumulativeStats_s { - struct timeval start; /* the time when the stats were last started */ - struct timeval stop; /* the time when the stats were last stopped */ - long sum_io_us; /* sum of all user response times (us) */ - long num_ios; /* total number of I/Os serviced */ - long num_sect_moved; /* total number of sectors read or written */ + struct timeval start; /* + * The time when the stats were + * last started. + */ + struct timeval stop; /* + * The time when the stats were + * last stopped. + */ + long sum_io_us; /* + * Sum of all user response + * times (us). + */ + long num_ios; /* + * Total number of I/Os + * serviced. + */ + long num_sect_moved; /* + * Total number of sectors read + * or written. + */ }; struct RF_ThroughputStats_s { - RF_DECLARE_MUTEX(mutex) /* a mutex used to lock the configuration - * stuff */ - struct timeval start; /* timer started when numOutstandingRequests - * moves from 0 to 1 */ - struct timeval stop; /* timer stopped when numOutstandingRequests - * moves from 1 to 0 */ - RF_uint64 sum_io_us; /* total time timer is enabled */ - RF_uint64 num_ios; /* total number of ios processed by RAIDframe */ - long num_out_ios; /* number of outstanding ios */ + RF_DECLARE_MUTEX (mutex); /* + * A mutex used to lock the + * configuration stuff. + */ + struct timeval start; /* + * Timer started when + * numOutstandingRequests + * moves from 0 to 1. + */ + struct timeval stop; /* + * Timer stopped when + * numOutstandingRequests + * moves from 1 to 0. + */ + RF_uint64 sum_io_us; /* + * Total time timer is enabled. + */ + RF_uint64 num_ios; /* + * Total number of I/Os + * processed by RAIDframe. + */ + long num_out_ios; /* + * Number of outstanding I/Os. + */ }; struct RF_Raid_s { /* This portion never changes, and can be accessed without locking */ - /* an exception is Disks[][].status, which requires locking when it is - * changed. XXX this is no longer true. numSpare and friends can - * change now. - */ - u_int numRow; /* number of rows of disks, typically == # of - * ranks */ - u_int numCol; /* number of columns of disks, typically == # - * of disks/rank */ - u_int numSpare; /* number of spare disks */ - int maxQueueDepth; /* max disk queue depth */ - RF_SectorCount_t totalSectors; /* total number of sectors in the - * array */ - RF_SectorCount_t sectorsPerDisk; /* number of sectors on each - * disk */ - u_int logBytesPerSector; /* base-2 log of the number of bytes - * in a sector */ - u_int bytesPerSector; /* bytes in a sector */ - RF_int32 sectorMask; /* mask of bytes-per-sector */ - - RF_RaidLayout_t Layout; /* all information related to layout */ - RF_RaidDisk_t **Disks; /* all information related to physical disks */ - RF_DiskQueue_t **Queues;/* all information related to disk queues */ - RF_DiskQueueSW_t *qType;/* pointer to the DiskQueueSW used for the - component queues. */ - /* NOTE: This is an anchor point via which the queues can be + /* + * An exception is Disks[][].status, which requires locking when it is + * changed. XXX This is no longer true. numSpare and friends can + * change now. + */ + u_int numRow; /* + * Number of rows of disks, + * typically == # of ranks. + */ + u_int numCol; /* + * Number of columns of disks, + * typically == # of disks/rank. + */ + u_int numSpare; /* Number of spare disks. */ + int maxQueueDepth;/* Max disk queue depth. */ + RF_SectorCount_t totalSectors; /* + * Total number of sectors + * in the array. + */ + RF_SectorCount_t sectorsPerDisk; + /* + * Number of sectors on each + * disk. + */ + u_int logBytesPerSector; + /* + * Base-2 log of the number + * of bytes in a sector. + */ + u_int bytesPerSector; + /* Bytes in a sector. */ + RF_int32 sectorMask; /* Mask of bytes-per-sector. */ + + RF_RaidLayout_t Layout; /* + * All information related to + * layout. + */ + RF_RaidDisk_t **Disks; /* + * All information related to + * physical disks. + */ + RF_DiskQueue_t **Queues; /* + * All information related to + * disk queues. + */ + RF_DiskQueueSW_t *qType; /* + * Pointer to the DiskQueueSW + * used for the component + * queues. + */ + /* + * NOTE: This is an anchor point via which the queues can be * accessed, but the enqueue/dequeue routines in diskqueue.c use a - * local copy of this pointer for the actual accesses. */ - /* The remainder of the structure can change, and therefore requires - * locking on reads and updates */ - RF_DECLARE_MUTEX(mutex) /* mutex used to serialize access to - * the fields below */ - RF_RowStatus_t *status; /* the status of each row in the array */ - int valid; /* indicates successful configuration */ - RF_LockTableEntry_t *lockTable; /* stripe-lock table */ - RF_LockTableEntry_t *quiesceLock; /* quiesnce table */ - int numFailures; /* total number of failures in the array */ - int numNewFailures; /* number of *new* failures (that havn't - caused a mod_counter update */ - - int parity_good; /* !0 if parity is known to be correct */ - int serial_number; /* a "serial number" for this set */ - int mod_counter; /* modification counter for component labels */ - int clean; /* the clean bit for this array. */ - - int openings; /* Number of IO's which can be scheduled - simultaneously (high-level - not a - per-component limit)*/ - - int maxOutstanding; /* maxOutstanding requests (per-component) */ - int autoconfigure; /* automatically configure this RAID set. - 0 == no, 1 == yes */ - int root_partition; /* Use this set as / - 0 == no, 1 == yes*/ - int last_unit; /* last unit number (e.g. 0 for /dev/raid0) - of this component. Used for autoconfigure - only. */ - int config_order; /* 0 .. n. The order in which the component - should be auto-configured. E.g. 0 is will - done first, (and would become raid0). - This may be in conflict with last_unit!!?! */ - /* Not currently used. */ + * local copy of this pointer for the actual accesses. + */ + /* + * The remainder of the structure can change, and therefore requires + * locking on reads and updates. + */ + RF_DECLARE_MUTEX (mutex); /* + * Mutex used to serialize + * access to the fields below. + */ + RF_RowStatus_t *status; /* + * The status of each row in + * the array. + */ + int valid; /* + * Indicates successful + * configuration. + */ + RF_LockTableEntry_t *lockTable; /* Stripe-lock table. */ + RF_LockTableEntry_t *quiesceLock; /* Quiescence table. */ + int numFailures; /* + * Total number of failures + * in the array. + */ + int numNewFailures; + /* + * Number of *new* failures + * (that haven't caused a + * mod_counter update). + */ + + int parity_good; /* + * !0 if parity is known to be + * correct. + */ + int serial_number;/* + * A "serial number" for this + * set. + */ + int mod_counter; /* + * Modification counter for + * component labels. + */ + int clean; /* + * The clean bit for this array. + */ + + int openings; /* + * Number of I/Os that can be + * scheduled simultaneously + * (high-level - not a + * per-component limit). + */ + + int maxOutstanding; + /* + * maxOutstanding requests + * (per-component). + */ + int autoconfigure; + /* + * Automatically configure + * this RAID set. + * 0 == no, 1 == yes + */ + int root_partition; + /* + * Use this set as + * 0 == no, 1 == yes. + */ + int last_unit; /* + * Last unit number (e.g. 0 + * for /dev/raid0) of this + * component. Used for + * autoconfigure only. + */ + int config_order; /* + * 0 .. n. The order in which + * the component should be + * auto-configured. + * E.g. 0 is will done first, + * (and would become raid0). + * This may be in conflict + * with last_unit !!?! + */ + /* Not currently used. */ /* - * Cleanup stuff - */ - RF_ShutdownList_t *shutdownList; /* shutdown activities */ - RF_AllocListElem_t *cleanupList; /* memory to be freed at - * shutdown time */ + * Cleanup stuff. + */ + RF_ShutdownList_t *shutdownList; /* Shutdown activities. */ + RF_AllocListElem_t *cleanupList; /* + * Memory to be freed at + * shutdown time. + */ /* - * Recon stuff - */ - RF_HeadSepLimit_t headSepLimit; - int numFloatingReconBufs; - int reconInProgress; - RF_DECLARE_COND(waitForReconCond) - RF_RaidReconDesc_t *reconDesc; /* reconstruction descriptor */ - RF_ReconCtrl_t **reconControl; /* reconstruction control structure - * pointers for each row in the array */ + * Recon stuff. + */ + RF_HeadSepLimit_t headSepLimit; + int numFloatingReconBufs; + int reconInProgress; + RF_DECLARE_COND (waitForReconCond); + RF_RaidReconDesc_t *reconDesc; /* Reconstruction descriptor. */ + RF_ReconCtrl_t **reconControl; /* + * Reconstruction control + * structure pointers for each + * row in the array. + */ /* - * Array-quiescence stuff - */ - RF_DECLARE_MUTEX(access_suspend_mutex) - RF_DECLARE_COND(quiescent_cond) - RF_IoCount_t accesses_suspended; - RF_IoCount_t accs_in_flight; - int access_suspend_release; - int waiting_for_quiescence; - RF_CallbackDesc_t *quiesce_wait_list; + * Array-quiescence stuff. + */ + RF_DECLARE_MUTEX (access_suspend_mutex); + RF_DECLARE_COND (quiescent_cond); + RF_IoCount_t accesses_suspended; + RF_IoCount_t accs_in_flight; + int access_suspend_release; + int waiting_for_quiescence; + RF_CallbackDesc_t *quiesce_wait_list; /* - * Statistics - */ -#if !defined(_KERNEL) && !defined(SIMULATE) - RF_ThroughputStats_t throughputstats; -#endif /* !_KERNEL && !SIMULATE */ - RF_CumulativeStats_t userstats; - int parity_rewrite_stripes_done; - int recon_stripes_done; - int copyback_stripes_done; - - int recon_in_progress; - int parity_rewrite_in_progress; - int copyback_in_progress; + * Statistics. + */ +#if !defined(_KERNEL) && !defined(SIMULATE) + RF_ThroughputStats_t throughputstats; +#endif /* !_KERNEL && !SIMULATE */ + RF_CumulativeStats_t userstats; + int parity_rewrite_stripes_done; + int recon_stripes_done; + int copyback_stripes_done; + + int recon_in_progress; + int parity_rewrite_in_progress; + int copyback_in_progress; /* - * Engine thread control - */ - RF_DECLARE_MUTEX(node_queue_mutex) - RF_DECLARE_COND(node_queue_cond) - RF_DagNode_t *node_queue; - RF_Thread_t parity_rewrite_thread; - RF_Thread_t copyback_thread; - RF_Thread_t engine_thread; - RF_Thread_t recon_thread; - RF_ThreadGroup_t engine_tg; - int shutdown_engine; - int dags_in_flight; /* debug */ + * Engine thread control. + */ + RF_DECLARE_MUTEX (node_queue_mutex); + RF_DECLARE_COND (node_queue_cond); + RF_DagNode_t *node_queue; + RF_Thread_t parity_rewrite_thread; + RF_Thread_t copyback_thread; + RF_Thread_t engine_thread; + RF_Thread_t recon_thread; + RF_ThreadGroup_t engine_tg; + int shutdown_engine; + int dags_in_flight; /* Debug. */ /* - * PSS (Parity Stripe Status) stuff - */ - RF_FreeList_t *pss_freelist; - long pssTableSize; + * PSS (Parity Stripe Status) stuff. + */ + RF_FreeList_t *pss_freelist; + long pssTableSize; /* - * Reconstruction stuff - */ - int procsInBufWait; - int numFullReconBuffers; - RF_AccTraceEntry_t *recon_tracerecs; - unsigned long accumXorTimeUs; - RF_ReconDoneProc_t *recon_done_procs; - RF_DECLARE_MUTEX(recon_done_proc_mutex) + * Reconstruction stuff. + */ + int procsInBufWait; + int numFullReconBuffers; + RF_AccTraceEntry_t *recon_tracerecs; + unsigned long accumXorTimeUs; + RF_ReconDoneProc_t *recon_done_procs; + RF_DECLARE_MUTEX (recon_done_proc_mutex); /* - * nAccOutstanding, waitShutdown protected by desc freelist lock - * (This may seem strange, since that's a central serialization point - * for a per-array piece of data, but otherwise, it'd be an extra - * per-array lock, and that'd only be less efficient...) - */ - RF_DECLARE_COND(outstandingCond) - int waitShutdown; - int nAccOutstanding; + * nAccOutstanding, waitShutdown protected by desc freelist lock + * (This may seem strange, since that's a central serialization point + * for a per-array piece of data, but otherwise, it'd be an extra + * per-array lock, and that'd only be less efficient...) + */ + RF_DECLARE_COND (outstandingCond); + int waitShutdown; + int nAccOutstanding; - RF_DiskId_t **diskids; - RF_DiskId_t *sparediskids; + RF_DiskId_t **diskids; + RF_DiskId_t *sparediskids; - int raidid; - RF_AccTotals_t acc_totals; - int keep_acc_totals; + int raidid; + RF_AccTotals_t acc_totals; + int keep_acc_totals; - struct raidcinfo **raid_cinfo; /* array of component info */ + struct raidcinfo **raid_cinfo; /* Array of component info. */ - int terminate_disk_queues; + int terminate_disk_queues; /* - * XXX - * - * config-specific information should be moved - * somewhere else, or at least hung off this - * in some generic way - */ + * XXX + * + * Config-specific information should be moved + * somewhere else, or at least hung off this + * in some generic way. + */ - /* used by rf_compute_workload_shift */ - RF_RowCol_t hist_diskreq[RF_MAXROW][RF_MAXCOL]; + /* Used by rf_compute_workload_shift. */ + RF_RowCol_t hist_diskreq[RF_MAXROW][RF_MAXCOL]; - /* used by declustering */ - int noRotate; + /* Used by declustering. */ + int noRotate; -#if RF_INCLUDE_PARITYLOGGING > 0 +#if RF_INCLUDE_PARITYLOGGING > 0 /* used by parity logging */ - RF_SectorCount_t regionLogCapacity; - RF_ParityLogQueue_t parityLogPool; /* pool of unused parity logs */ - RF_RegionInfo_t *regionInfo; /* array of region state */ - int numParityLogs; - int numSectorsPerLog; - int regionParityRange; - int logsInUse; /* debugging */ - RF_ParityLogDiskQueue_t parityLogDiskQueue; /* state of parity - * logging disk work */ - RF_RegionBufferQueue_t regionBufferPool; /* buffers for holding - * region log */ - RF_RegionBufferQueue_t parityBufferPool; /* buffers for holding - * parity */ - caddr_t parityLogBufferHeap; /* pool of unused parity logs */ - RF_Thread_t pLogDiskThreadHandle; - -#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ + RF_SectorCount_t regionLogCapacity; + RF_ParityLogQueue_t parityLogPool;/* + * Pool of unused parity logs. + */ + RF_RegionInfo_t *regionInfo; /* Array of region state. */ + int numParityLogs; + int numSectorsPerLog; + int regionParityRange; + int logsInUse; /* Debugging. */ + RF_ParityLogDiskQueue_t parityLogDiskQueue; + /* + * State of parity logging + * disk work. + */ + RF_RegionBufferQueue_t regionBufferPool; + /* + * buffers for holding region + * log. + */ + RF_RegionBufferQueue_t parityBufferPool; + /* + * Buffers for holding parity. + */ + caddr_t parityLogBufferHeap; + /* + * Pool of unused parity logs. + */ + RF_Thread_t pLogDiskThreadHandle; + +#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ }; -#endif /* !_RF__RF_RAID_H_ */ + +#endif /* !_RF__RF_RAID_H_ */ diff --git a/sys/dev/raidframe/rf_raid0.c b/sys/dev/raidframe/rf_raid0.c index be28e945deb..0cf113db220 100644 --- a/sys/dev/raidframe/rf_raid0.c +++ b/sys/dev/raidframe/rf_raid0.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_raid0.c,v 1.3 2000/01/11 18:02:23 peter Exp $ */ +/* $OpenBSD: rf_raid0.c,v 1.4 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_raid0.c,v 1.4 2000/01/07 03:41:02 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,11 +28,11 @@ * rights to redistribute these changes. */ -/*************************************** +/***************************************** * - * rf_raid0.c -- implements RAID Level 0 + * rf_raid0.c -- Implements RAID Level 0. * - ***************************************/ + *****************************************/ #include "rf_types.h" #include "rf_raid.h" @@ -47,75 +48,67 @@ typedef struct RF_Raid0ConfigInfo_s { RF_RowCol_t *stripeIdentifier; -} RF_Raid0ConfigInfo_t; +} RF_Raid0ConfigInfo_t; -int -rf_ConfigureRAID0( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) +int +rf_ConfigureRAID0(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, + RF_Config_t *cfgPtr) { RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; RF_Raid0ConfigInfo_t *info; RF_RowCol_t i; - /* create a RAID level 0 configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_Raid0ConfigInfo_t), (RF_Raid0ConfigInfo_t *), raidPtr->cleanupList); + /* Create a RAID level 0 configuration structure. */ + RF_MallocAndAdd(info, sizeof(RF_Raid0ConfigInfo_t), + (RF_Raid0ConfigInfo_t *), raidPtr->cleanupList); if (info == NULL) return (ENOMEM); layoutPtr->layoutSpecificInfo = (void *) info; - RF_MallocAndAdd(info->stripeIdentifier, raidPtr->numCol * sizeof(RF_RowCol_t), (RF_RowCol_t *), raidPtr->cleanupList); + RF_MallocAndAdd(info->stripeIdentifier, raidPtr->numCol * + sizeof(RF_RowCol_t), (RF_RowCol_t *), raidPtr->cleanupList); if (info->stripeIdentifier == NULL) return (ENOMEM); for (i = 0; i < raidPtr->numCol; i++) info->stripeIdentifier[i] = i; RF_ASSERT(raidPtr->numRow == 1); - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * raidPtr->numCol * layoutPtr->sectorsPerStripeUnit; + raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * + raidPtr->numCol * layoutPtr->sectorsPerStripeUnit; layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; - layoutPtr->dataSectorsPerStripe = raidPtr->numCol * layoutPtr->sectorsPerStripeUnit; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; + layoutPtr->dataSectorsPerStripe = raidPtr->numCol * + layoutPtr->sectorsPerStripeUnit; + layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << + raidPtr->logBytesPerSector; layoutPtr->numDataCol = raidPtr->numCol; layoutPtr->numParityCol = 0; return (0); } -void -rf_MapSectorRAID0( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) +void +rf_MapSectorRAID0(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap) { - RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; + RF_StripeNum_t SUID = + raidSector / raidPtr->Layout.sectorsPerStripeUnit; *row = 0; *col = SUID % raidPtr->numCol; - *diskSector = (SUID / raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit + + *diskSector = (SUID / raidPtr->numCol) * + raidPtr->Layout.sectorsPerStripeUnit + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); } -void -rf_MapParityRAID0( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) +void +rf_MapParityRAID0(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap) { *row = *col = 0; *diskSector = 0; } -void -rf_IdentifyStripeRAID0( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) +void +rf_IdentifyStripeRAID0( RF_Raid_t *raidPtr, RF_RaidAddr_t addr, + RF_RowCol_t **diskids, RF_RowCol_t *outRow) { RF_Raid0ConfigInfo_t *info; @@ -124,38 +117,29 @@ rf_IdentifyStripeRAID0( *outRow = 0; } -void -rf_MapSIDToPSIDRAID0( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru) +void +rf_MapSIDToPSIDRAID0(RF_RaidLayout_t *layoutPtr, RF_StripeNum_t stripeID, + RF_StripeNum_t *psID, RF_ReconUnitNum_t *which_ru) { *which_ru = 0; *psID = stripeID; } -void -rf_RAID0DagSelect( - RF_Raid_t * raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t * asmap, - RF_VoidFuncPtr * createFunc) +void +rf_RAID0DagSelect(RF_Raid_t *raidPtr, RF_IoType_t type, + RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc) { *createFunc = ((type == RF_IO_TYPE_READ) ? - (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG : (RF_VoidFuncPtr) rf_CreateRAID0WriteDAG); + (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG : + (RF_VoidFuncPtr) rf_CreateRAID0WriteDAG); } -int -rf_VerifyParityRAID0( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t * parityPDA, - int correct_it, - RF_RaidAccessFlags_t flags) +int +rf_VerifyParityRAID0(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr, + RF_PhysDiskAddr_t *parityPDA, int correct_it, RF_RaidAccessFlags_t flags) { /* - * No parity is always okay. - */ + * No parity is always okay. + */ return (RF_PARITY_OKAY); } diff --git a/sys/dev/raidframe/rf_raid0.h b/sys/dev/raidframe/rf_raid0.h index fbc74006cc7..11de277a470 100644 --- a/sys/dev/raidframe/rf_raid0.h +++ b/sys/dev/raidframe/rf_raid0.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_raid0.h,v 1.2 1999/02/16 00:03:14 niklas Exp $ */ +/* $OpenBSD: rf_raid0.h,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_raid0.h,v 1.3 1999/02/05 00:06:15 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,32 +28,23 @@ * rights to redistribute these changes. */ -/* rf_raid0.h - header file for RAID Level 0 */ +/* rf_raid0.h - Header file for RAID Level 0. */ -#ifndef _RF__RF_RAID0_H_ -#define _RF__RF_RAID0_H_ +#ifndef _RF__RF_RAID0_H_ +#define _RF__RF_RAID0_H_ -int -rf_ConfigureRAID0(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -void -rf_MapSectorRAID0(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapParityRAID0(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_IdentifyStripeRAID0(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -void -rf_MapSIDToPSIDRAID0(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru); -void -rf_RAID0DagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); -int -rf_VerifyParityRAID0(RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t * parityPDA, int correct_it, RF_RaidAccessFlags_t flags); +int rf_ConfigureRAID0(RF_ShutdownList_t **, RF_Raid_t *, RF_Config_t *); +void rf_MapSectorRAID0(RF_Raid_t *, RF_RaidAddr_t, + RF_RowCol_t *, RF_RowCol_t *, RF_SectorNum_t *, int); +void rf_MapParityRAID0(RF_Raid_t *, RF_RaidAddr_t, + RF_RowCol_t *, RF_RowCol_t *, RF_SectorNum_t *, int); +void rf_IdentifyStripeRAID0(RF_Raid_t *, RF_RaidAddr_t, + RF_RowCol_t **, RF_RowCol_t *); +void rf_MapSIDToPSIDRAID0(RF_RaidLayout_t *, RF_StripeNum_t, + RF_StripeNum_t *, RF_ReconUnitNum_t *); +void rf_RAID0DagSelect(RF_Raid_t *, RF_IoType_t, RF_AccessStripeMap_t *, + RF_VoidFuncPtr *); +int rf_VerifyParityRAID0(RF_Raid_t *, RF_RaidAddr_t, RF_PhysDiskAddr_t *, + int, RF_RaidAccessFlags_t); -#endif /* !_RF__RF_RAID0_H_ */ +#endif /* !_RF__RF_RAID0_H_ */ diff --git a/sys/dev/raidframe/rf_raid1.c b/sys/dev/raidframe/rf_raid1.c index aad180d4e33..e9a3dd31fe1 100644 --- a/sys/dev/raidframe/rf_raid1.c +++ b/sys/dev/raidframe/rf_raid1.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_raid1.c,v 1.4 2000/01/11 18:02:23 peter Exp $ */ +/* $OpenBSD: rf_raid1.c,v 1.5 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_raid1.c,v 1.5 2000/01/08 22:57:30 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,7 +30,7 @@ /***************************************************************************** * - * rf_raid1.c -- implements RAID Level 1 + * rf_raid1.c -- Implements RAID Level 1. * *****************************************************************************/ @@ -53,26 +54,28 @@ typedef struct RF_Raid1ConfigInfo_s { RF_RowCol_t **stripeIdentifier; -} RF_Raid1ConfigInfo_t; -/* start of day code specific to RAID level 1 */ -int -rf_ConfigureRAID1( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) +} RF_Raid1ConfigInfo_t; + + +/* Start of day code specific to RAID level 1. */ +int +rf_ConfigureRAID1(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, + RF_Config_t *cfgPtr) { RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; RF_Raid1ConfigInfo_t *info; RF_RowCol_t i; - /* create a RAID level 1 configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_Raid1ConfigInfo_t), (RF_Raid1ConfigInfo_t *), raidPtr->cleanupList); + /* Create a RAID level 1 configuration structure. */ + RF_MallocAndAdd(info, sizeof(RF_Raid1ConfigInfo_t), + (RF_Raid1ConfigInfo_t *), raidPtr->cleanupList); if (info == NULL) return (ENOMEM); layoutPtr->layoutSpecificInfo = (void *) info; /* ... and fill it in. */ - info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol / 2, 2, raidPtr->cleanupList); + info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol / 2, 2, + raidPtr->cleanupList); if (info->stripeIdentifier == NULL) return (ENOMEM); for (i = 0; i < (raidPtr->numCol / 2); i++) { @@ -82,52 +85,52 @@ rf_ConfigureRAID1( RF_ASSERT(raidPtr->numRow == 1); - /* this implementation of RAID level 1 uses one row of numCol disks - * and allows multiple (numCol / 2) stripes per row. A stripe + /* + * This implementation of RAID level 1 uses one row of numCol disks + * and allows multiple (numCol / 2) stripes per row. A stripe * consists of a single data unit and a single parity (mirror) unit. - * stripe id = raidAddr / stripeUnitSize */ - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * (raidPtr->numCol / 2) * layoutPtr->sectorsPerStripeUnit; - layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk * (raidPtr->numCol / 2); + * Stripe id = raidAddr / stripeUnitSize. + */ + raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * + (raidPtr->numCol / 2) * layoutPtr->sectorsPerStripeUnit; + layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk * + (raidPtr->numCol / 2); layoutPtr->dataSectorsPerStripe = layoutPtr->sectorsPerStripeUnit; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; + layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << + raidPtr->logBytesPerSector; layoutPtr->numDataCol = 1; layoutPtr->numParityCol = 1; return (0); } -/* returns the physical disk location of the primary copy in the mirror pair */ -void -rf_MapSectorRAID1( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) +/* + * Returns the physical disk location of the primary copy in the mirror pair. + */ +void +rf_MapSectorRAID1(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap) { RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2); *row = 0; *col = 2 * mirrorPair; - *diskSector = ((SUID / (raidPtr->numCol / 2)) * raidPtr->Layout.sectorsPerStripeUnit) + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + *diskSector = ((SUID / (raidPtr->numCol / 2)) * + raidPtr->Layout.sectorsPerStripeUnit) + + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); } -/* Map Parity +/* + * Map Parity. * - * returns the physical disk location of the secondary copy in the mirror - * pair + * Returns the physical disk location of the secondary copy in the mirror + * pair. */ -void -rf_MapParityRAID1( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) +void +rf_MapParityRAID1(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap) { RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2); @@ -135,22 +138,23 @@ rf_MapParityRAID1( *row = 0; *col = (2 * mirrorPair) + 1; - *diskSector = ((SUID / (raidPtr->numCol / 2)) * raidPtr->Layout.sectorsPerStripeUnit) + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); + *diskSector = ((SUID / (raidPtr->numCol / 2)) * + raidPtr->Layout.sectorsPerStripeUnit) + + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); } -/* IdentifyStripeRAID1 +/* + * IdentifyStripeRAID1 * - * returns a list of disks for a given redundancy group + * Returns a list of disks for a given redundancy group. */ -void -rf_IdentifyStripeRAID1( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) +void +rf_IdentifyStripeRAID1(RF_Raid_t *raidPtr, RF_RaidAddr_t addr, + RF_RowCol_t **diskids, RF_RowCol_t *outRow) { - RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr); + RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, + addr); RF_Raid1ConfigInfo_t *info = raidPtr->Layout.layoutSpecificInfo; RF_ASSERT(stripeID >= 0); RF_ASSERT(addr >= 0); @@ -160,16 +164,14 @@ rf_IdentifyStripeRAID1( } -/* MapSIDToPSIDRAID1 +/* + * MapSIDToPSIDRAID1 * - * maps a logical stripe to a stripe in the redundant array + * Maps a logical stripe to a stripe in the redundant array. */ -void -rf_MapSIDToPSIDRAID1( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru) +void +rf_MapSIDToPSIDRAID1(RF_RaidLayout_t *layoutPtr, RF_StripeNum_t stripeID, + RF_StripeNum_t *psID, RF_ReconUnitNum_t *which_ru) { *which_ru = 0; *psID = stripeID; @@ -177,25 +179,22 @@ rf_MapSIDToPSIDRAID1( -/****************************************************************************** - * select a graph to perform a single-stripe access +/***************************************************************************** + * Select a graph to perform a single-stripe access. * - * Parameters: raidPtr - description of the physical array - * type - type of operation (read or write) requested - * asmap - logical & physical addresses for this access - * createFunc - name of function to use to create the graph + * Parameters: raidPtr - Description of the physical array. + * type - Type of operation (read or write) requested. + * asmap - Logical & physical addresses for this access. + * createFunc - Name of function to use to create the graph. *****************************************************************************/ -void -rf_RAID1DagSelect( - RF_Raid_t * raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t * asmap, - RF_VoidFuncPtr * createFunc) +void +rf_RAID1DagSelect(RF_Raid_t *raidPtr, RF_IoType_t type, + RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc) { RF_RowCol_t frow, fcol, or, oc; RF_PhysDiskAddr_t *failedPDA; - int prior_recon; + int prior_recon; RF_RowStatus_t rstat; RF_SectorNum_t oo; @@ -203,58 +202,63 @@ rf_RAID1DagSelect( RF_ASSERT(RF_IO_IS_R_OR_W(type)); if (asmap->numDataFailed + asmap->numParityFailed > 1) { - RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); + RF_ERRORMSG("Multiple disks failed in a single group !" + " Aborting I/O operation.\n"); *createFunc = NULL; return; } if (asmap->numDataFailed + asmap->numParityFailed) { /* - * We've got a fault. Re-map to spare space, iff applicable. - * Shouldn't the arch-independent code do this for us? - * Anyway, it turns out if we don't do this here, then when - * we're reconstructing, writes go only to the surviving - * original disk, and aren't reflected on the reconstructed - * spare. Oops. --jimz - */ + * We've got a fault. Re-map to spare space, iff applicable. + * Shouldn't the arch-independent code do this for us ? + * Anyway, it turns out if we don't do this here, then when + * we're reconstructing, writes go only to the surviving + * original disk, and aren't reflected on the reconstructed + * spare. Oops. --jimz + */ failedPDA = asmap->failedPDAs[0]; frow = failedPDA->row; fcol = failedPDA->col; rstat = raidPtr->status[frow]; prior_recon = (rstat == rf_rs_reconfigured) || ( (rstat == rf_rs_reconstructing) ? - rf_CheckRUReconstructed(raidPtr->reconControl[frow]->reconMap, failedPDA->startSector) : 0 - ); + rf_CheckRUReconstructed(raidPtr->reconControl[frow] + ->reconMap, failedPDA->startSector) : 0); if (prior_recon) { or = frow; oc = fcol; oo = failedPDA->startSector; /* - * If we did distributed sparing, we'd monkey with that here. - * But we don't, so we'll - */ + * If we did distributed sparing, we'd monkey with + * that here. + * But we don't, so we'll. + */ failedPDA->row = raidPtr->Disks[frow][fcol].spareRow; failedPDA->col = raidPtr->Disks[frow][fcol].spareCol; /* - * Redirect other components, iff necessary. This looks - * pretty suspicious to me, but it's what the raid5 - * DAG select does. - */ + * Redirect other components, iff necessary. This looks + * pretty suspicious to me, but it's what the raid5 + * DAG select does. + */ if (asmap->parityInfo->next) { if (failedPDA == asmap->parityInfo) { failedPDA->next->row = failedPDA->row; failedPDA->next->col = failedPDA->col; } else { - if (failedPDA == asmap->parityInfo->next) { - asmap->parityInfo->row = failedPDA->row; - asmap->parityInfo->col = failedPDA->col; + if (failedPDA == + asmap->parityInfo->next) { + asmap->parityInfo->row = + failedPDA->row; + asmap->parityInfo->col = + failedPDA->col; } } } if (rf_dagDebug || rf_mapDebug) { - printf("raid%d: Redirected type '%c' r %d c %d o %ld -> r %d c %d o %ld\n", - raidPtr->raidid, type, or, oc, - (long) oo, failedPDA->row, - failedPDA->col, + printf("raid%d: Redirected type '%c' r %d c %d" + " o %ld -> r %d c %d o %ld.\n", + raidPtr->raidid, type, or, oc, (long) oo, + failedPDA->row, failedPDA->col, (long) failedPDA->startSector); } asmap->numDataFailed = asmap->numParityFailed = 0; @@ -262,23 +266,21 @@ rf_RAID1DagSelect( } if (type == RF_IO_TYPE_READ) { if (asmap->numDataFailed == 0) - *createFunc = (RF_VoidFuncPtr) rf_CreateMirrorIdleReadDAG; + *createFunc = (RF_VoidFuncPtr) + rf_CreateMirrorIdleReadDAG; else - *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneDegradedReadDAG; + *createFunc = (RF_VoidFuncPtr) + rf_CreateRaidOneDegradedReadDAG; } else { *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; } } -int -rf_VerifyParityRAID1( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t * parityPDA, - int correct_it, - RF_RaidAccessFlags_t flags) +int +rf_VerifyParityRAID1(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr, + RF_PhysDiskAddr_t *parityPDA, int correct_it, RF_RaidAccessFlags_t flags) { - int nbytes, bcount, stripeWidth, ret, i, j, nbad, *bbufs; + int nbytes, bcount, stripeWidth, ret, i, j, nbad, *bbufs; RF_DagNode_t *blockNode, *unblockNode, *wrBlock; RF_DagHeader_t *rd_dag_h, *wr_dag_h; RF_AccessStripeMapHeader_t *asm_h; @@ -289,7 +291,7 @@ rf_VerifyParityRAID1( RF_AccessStripeMap_t *aasm; RF_SectorCount_t nsector; RF_RaidAddr_t startAddr; - char *buf, *buf1, *buf2; + char *buf, *buf1, *buf2; RF_PhysDiskAddr_t *pda; RF_StripeNum_t psID; RF_MCPair_t *mcpair; @@ -319,14 +321,14 @@ rf_VerifyParityRAID1( if (buf == NULL) goto done; if (rf_verifyParityDebug) { - printf("raid%d: RAID1 parity verify: buf=%lx bcount=%d (%lx - %lx)\n", - raidPtr->raidid, (long) buf, bcount, (long) buf, - (long) buf + bcount); + printf("raid%d: RAID1 parity verify: buf=%lx bcount=%d" + " (%lx - %lx).\n", raidPtr->raidid, (long) buf, bcount, + (long) buf, (long) buf + bcount); } /* - * Generate a DAG which will read the entire stripe- then we can - * just compare data chunks versus "parity" chunks. - */ + * Generate a DAG that will read the entire stripe- then we can + * just compare data chunks versus "parity" chunks. + */ rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, nbytes, buf, rf_DiskReadFunc, rf_DiskReadUndoFunc, "Rod", allocList, flags, @@ -337,19 +339,20 @@ rf_VerifyParityRAID1( unblockNode = blockNode->succedents[0]->succedents[0]; /* - * Map the access to physical disk addresses (PDAs)- this will - * get us both a list of data addresses, and "parity" addresses - * (which are really mirror copies). - */ - asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, - buf, RF_DONT_REMAP); + * Map the access to physical disk addresses (PDAs)- this will + * get us both a list of data addresses, and "parity" addresses + * (which are really mirror copies). + */ + asm_h = rf_MapAccess(raidPtr, startAddr, + layoutPtr->dataSectorsPerStripe, buf, RF_DONT_REMAP); aasm = asm_h->stripeMap; buf1 = buf; /* - * Loop through the data blocks, setting up read nodes for each. - */ - for (pda = aasm->physInfo, i = 0; i < layoutPtr->numDataCol; i++, pda = pda->next) { + * Loop through the data blocks, setting up read nodes for each. + */ + for (pda = aasm->physInfo, i = 0; i < layoutPtr->numDataCol; + i++, pda = pda->next) { RF_ASSERT(pda); rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); @@ -363,28 +366,31 @@ rf_VerifyParityRAID1( blockNode->succedents[i]->params[0].p = pda; blockNode->succedents[i]->params[1].p = buf1; blockNode->succedents[i]->params[2].v = psID; - blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + blockNode->succedents[i]->params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); buf1 += nbytes; } RF_ASSERT(pda == NULL); /* - * keep i, buf1 running - * - * Loop through parity blocks, setting up read nodes for each. - */ - for (pda = aasm->parityInfo; i < layoutPtr->numDataCol + layoutPtr->numParityCol; i++, pda = pda->next) { + * Keep i, buf1 running. + * + * Loop through parity blocks, setting up read nodes for each. + */ + for (pda = aasm->parityInfo; i < layoutPtr->numDataCol + + layoutPtr->numParityCol; i++, pda = pda->next) { RF_ASSERT(pda); rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); RF_ASSERT(pda->numSector != 0); if (rf_TryToRedirectPDA(raidPtr, pda, 0)) { - /* cannot verify parity with dead disk */ + /* Cannot verify parity with dead disk. */ goto done; } pda->bufPtr = buf1; blockNode->succedents[i]->params[0].p = pda; blockNode->succedents[i]->params[1].p = buf1; blockNode->succedents[i]->params[2].v = psID; - blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + blockNode->succedents[i]->params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); buf1 += nbytes; } RF_ASSERT(pda == NULL); @@ -393,8 +399,8 @@ rf_VerifyParityRAID1( rd_dag_h->tracerec = &tracerec; if (rf_verifyParityDebug > 1) { - printf("raid%d: RAID1 parity verify read dag:\n", - raidPtr->raidid); + printf("raid%d: RAID1 parity verify read dag:\n", + raidPtr->raidid); rf_PrintDAGList(rd_dag_h); } RF_LOCK_MUTEX(mcpair->mutex); @@ -407,34 +413,35 @@ rf_VerifyParityRAID1( RF_UNLOCK_MUTEX(mcpair->mutex); if (rd_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to verify raid1 parity: can't read stripe\n"); + RF_ERRORMSG("Unable to verify raid1 parity:" + " can't read stripe.\n"); ret = RF_PARITY_COULD_NOT_VERIFY; goto done; } /* - * buf1 is the beginning of the data blocks chunk - * buf2 is the beginning of the parity blocks chunk - */ + * buf1 is the beginning of the data blocks chunk. + * buf2 is the beginning of the parity blocks chunk. + */ buf1 = buf; buf2 = buf + (nbytes * layoutPtr->numDataCol); ret = RF_PARITY_OKAY; /* - * bbufs is "bad bufs"- an array whose entries are the data - * column numbers where we had miscompares. (That is, column 0 - * and column 1 of the array are mirror copies, and are considered - * "data column 0" for this purpose). - */ + * bbufs is "bad bufs"- an array whose entries are the data + * column numbers where we had miscompares. (That is, column 0 + * and column 1 of the array are mirror copies, and are considered + * "data column 0" for this purpose). + */ RF_MallocAndAdd(bbufs, layoutPtr->numParityCol * sizeof(int), (int *), allocList); nbad = 0; /* - * Check data vs "parity" (mirror copy). - */ + * Check data vs "parity" (mirror copy). + */ for (i = 0; i < layoutPtr->numDataCol; i++) { if (rf_verifyParityDebug) { - printf("raid%d: RAID1 parity verify %d bytes: i=%d buf1=%lx buf2=%lx buf=%lx\n", - raidPtr->raidid, nbytes, i, (long) buf1, - (long) buf2, (long) buf); + printf("raid%d: RAID1 parity verify %d bytes: i=%d" + " buf1=%lx buf2=%lx buf=%lx.\n", raidPtr->raidid, + nbytes, i, (long) buf1, (long) buf2, (long) buf); } ret = bcmp(buf1, buf2, nbytes); if (ret) { @@ -444,17 +451,22 @@ rf_VerifyParityRAID1( break; } printf("psid=%ld j=%d\n", (long) psID, j); - printf("buf1 %02x %02x %02x %02x %02x\n", buf1[0] & 0xff, - buf1[1] & 0xff, buf1[2] & 0xff, buf1[3] & 0xff, buf1[4] & 0xff); - printf("buf2 %02x %02x %02x %02x %02x\n", buf2[0] & 0xff, - buf2[1] & 0xff, buf2[2] & 0xff, buf2[3] & 0xff, buf2[4] & 0xff); + printf("buf1 %02x %02x %02x %02x %02x\n", + buf1[0] & 0xff, buf1[1] & 0xff, + buf1[2] & 0xff, buf1[3] & 0xff, + buf1[4] & 0xff); + printf("buf2 %02x %02x %02x %02x %02x\n", + buf2[0] & 0xff, buf2[1] & 0xff, + buf2[2] & 0xff, buf2[3] & 0xff, + buf2[4] & 0xff); } if (rf_verifyParityDebug) { - printf("raid%d: RAID1: found bad parity, i=%d\n", raidPtr->raidid, i); + printf("raid%d: RAID1: found bad parity," + " i=%d.\n", raidPtr->raidid, i); } /* - * Parity is bad. Keep track of which columns were bad. - */ + * Parity is bad. Keep track of which columns were bad. + */ if (bbufs) bbufs[nbad] = i; nbad++; @@ -467,26 +479,27 @@ rf_VerifyParityRAID1( if ((ret != RF_PARITY_OKAY) && correct_it) { ret = RF_PARITY_COULD_NOT_CORRECT; if (rf_verifyParityDebug) { - printf("raid%d: RAID1 parity verify: parity not correct\n", raidPtr->raidid); + printf("raid%d: RAID1 parity verify:" + " parity not correct.\n", raidPtr->raidid); } if (bbufs == NULL) goto done; /* - * Make a DAG with one write node for each bad unit. We'll simply - * write the contents of the data unit onto the parity unit for - * correction. (It's possible that the mirror copy was the correct - * copy, and that we're spooging good data by writing bad over it, - * but there's no way we can know that. - */ + * Make a DAG with one write node for each bad unit. We'll + * simply write the contents of the data unit onto the parity + * unit for correction. (It's possible that the mirror copy + * was the correct copy, and that we're spooging good data by + * writing bad over it, but there's no way we can know that. + */ wr_dag_h = rf_MakeSimpleDAG(raidPtr, nbad, nbytes, buf, - rf_DiskWriteFunc, rf_DiskWriteUndoFunc, "Wnp", allocList, flags, - RF_IO_NORMAL_PRIORITY); + rf_DiskWriteFunc, rf_DiskWriteUndoFunc, "Wnp", allocList, + flags, RF_IO_NORMAL_PRIORITY); if (wr_dag_h == NULL) goto done; wrBlock = wr_dag_h->succedents[0]; /* - * Fill in a write node for each bad compare. - */ + * Fill in a write node for each bad compare. + */ for (i = 0; i < nbad; i++) { j = i + layoutPtr->numDataCol; pda = blockNode->succedents[j]->params[0].p; @@ -494,7 +507,9 @@ rf_VerifyParityRAID1( wrBlock->succedents[i]->params[0].p = pda; wrBlock->succedents[i]->params[1].p = pda->bufPtr; wrBlock->succedents[i]->params[2].v = psID; - wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); + wrBlock->succedents[0]->params[3].v = + RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, + which_ru); } bzero((char *) &tracerec, sizeof(tracerec)); wr_dag_h->tracerec = &tracerec; @@ -504,24 +519,25 @@ rf_VerifyParityRAID1( } RF_LOCK_MUTEX(mcpair->mutex); mcpair->flag = 0; - /* fire off the write DAG */ - rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, - (void *) mcpair); + /* Fire off the write DAG. */ + rf_DispatchDAG(wr_dag_h, (void (*) (void *)) + rf_MCPairWakeupFunc, (void *) mcpair); while (!mcpair->flag) { RF_WAIT_COND(mcpair->cond, mcpair->mutex); } RF_UNLOCK_MUTEX(mcpair->mutex); if (wr_dag_h->status != rf_enable) { - RF_ERRORMSG("Unable to correct RAID1 parity in VerifyParity\n"); + RF_ERRORMSG("Unable to correct RAID1 parity in" + " VerifyParity.\n"); goto done; } ret = RF_PARITY_CORRECTED; } done: /* - * All done. We might've gotten here without doing part of the function, - * so cleanup what we have to and return our running status. - */ + * All done. We might've gotten here without doing part of the function, + * so cleanup what we have to and return our running status. + */ if (asm_h) rf_FreeAccessStripeMap(asm_h); if (rd_dag_h) @@ -532,24 +548,29 @@ done: rf_FreeMCPair(mcpair); rf_FreeAllocList(allocList); if (rf_verifyParityDebug) { - printf("raid%d: RAID1 parity verify, returning %d\n", - raidPtr->raidid, ret); + printf("raid%d: RAID1 parity verify, returning %d.\n", + raidPtr->raidid, ret); } return (ret); } -int -rf_SubmitReconBufferRAID1(rbuf, keep_it, use_committed) - RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */ - int keep_it; /* whether we can keep this buffer or we have - * to return it */ - int use_committed; /* whether to use a committed or an available - * recon buffer */ +int +rf_SubmitReconBufferRAID1( + RF_ReconBuffer_t *rbuf, /* The recon buffer to submit. */ + int keep_it, /* + * Whether we can keep this buffer + * or we have to return it ? + */ + int use_committed /* + * Whether to use a committed or + * an available recon buffer ? + */ +) { RF_ReconParityStripeStatus_t *pssPtr; RF_ReconCtrl_t *reconCtrlPtr; RF_RaidLayout_t *layoutPtr; - int retcode, created; + int retcode, created; RF_CallbackDesc_t *cb, *p; RF_ReconBuffer_t *t; RF_Raid_t *raidPtr; @@ -566,9 +587,9 @@ rf_SubmitReconBufferRAID1(rbuf, keep_it, use_committed) RF_ASSERT(rbuf->col != reconCtrlPtr->fcol); if (rf_reconbufferDebug) { - printf("raid%d: RAID1 reconbuffer submission r%d c%d psid %ld ru%d (failed offset %ld)\n", - raidPtr->raidid, rbuf->row, rbuf->col, - (long) rbuf->parityStripeID, rbuf->which_ru, + printf("raid%d: RAID1 reconbuffer submission r%d c%d psid %ld" + " ru%d (failed offset %ld).\n", raidPtr->raidid, rbuf->row, + rbuf->col, (long) rbuf->parityStripeID, rbuf->which_ru, (long) rbuf->failedDiskSectorOffset); } if (rf_reconDebug) { @@ -576,8 +597,8 @@ rf_SubmitReconBufferRAID1(rbuf, keep_it, use_committed) (long) rbuf->parityStripeID, (long) rbuf->buffer); printf("RAID1 psid %ld %02x %02x %02x %02x %02x\n", (long) rbuf->parityStripeID, - rbuf->buffer[0], rbuf->buffer[1], rbuf->buffer[2], rbuf->buffer[3], - rbuf->buffer[4]); + rbuf->buffer[0], rbuf->buffer[1], rbuf->buffer[2], + rbuf->buffer[3], rbuf->buffer[4]); } RF_LOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); @@ -585,25 +606,29 @@ rf_SubmitReconBufferRAID1(rbuf, keep_it, use_committed) pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created); - RF_ASSERT(pssPtr); /* if it didn't exist, we wouldn't have gotten - * an rbuf for it */ + RF_ASSERT(pssPtr); /* + * If it didn't exist, we wouldn't have gotten + * an rbuf for it. + */ /* - * Since this is simple mirroring, the first submission for a stripe is also - * treated as the last. - */ + * Since this is simple mirroring, the first submission for a stripe + * is also treated as the last. + */ t = NULL; if (keep_it) { if (rf_reconbufferDebug) { - printf("raid%d: RAID1 rbuf submission: keeping rbuf\n", - raidPtr->raidid); + printf("raid%d: RAID1 rbuf submission: keeping rbuf.\n", + raidPtr->raidid); } t = rbuf; } else { if (use_committed) { if (rf_reconbufferDebug) { - printf("raid%d: RAID1 rbuf submission: using committed rbuf\n", raidPtr->raidid); + printf("raid%d: RAID1 rbuf submission:" + " using committed rbuf.\n", + raidPtr->raidid); } t = reconCtrlPtr->committedRbufs; RF_ASSERT(t); @@ -612,7 +637,9 @@ rf_SubmitReconBufferRAID1(rbuf, keep_it, use_committed) } else if (reconCtrlPtr->floatingRbufs) { if (rf_reconbufferDebug) { - printf("raid%d: RAID1 rbuf submission: using floating rbuf\n", raidPtr->raidid); + printf("raid%d: RAID1 rbuf submission:" + " using floating rbuf.\n", + raidPtr->raidid); } t = reconCtrlPtr->floatingRbufs; reconCtrlPtr->floatingRbufs = t->next; @@ -621,14 +648,15 @@ rf_SubmitReconBufferRAID1(rbuf, keep_it, use_committed) } if (t == NULL) { if (rf_reconbufferDebug) { - printf("raid%d: RAID1 rbuf submission: waiting for rbuf\n", raidPtr->raidid); + printf("raid%d: RAID1 rbuf submission:" + " waiting for rbuf.\n", raidPtr->raidid); } RF_ASSERT((keep_it == 0) && (use_committed == 0)); raidPtr->procsInBufWait++; if ((raidPtr->procsInBufWait == (raidPtr->numCol - 1)) && (raidPtr->numFullReconBuffers == 0)) { /* ruh-ro */ - RF_ERRORMSG("Buffer wait deadlock\n"); + RF_ERRORMSG("Buffer wait deadlock.\n"); rf_PrintPSStatusTable(raidPtr, rbuf->row); RF_PANIC(); } @@ -640,11 +668,12 @@ rf_SubmitReconBufferRAID1(rbuf, keep_it, use_committed) cb->callbackArg2.v = rbuf->which_ru; cb->next = NULL; if (reconCtrlPtr->bufferWaitList == NULL) { - /* we are the wait list- lucky us */ + /* We are the wait list- lucky us. */ reconCtrlPtr->bufferWaitList = cb; } else { - /* append to wait list */ - for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next); + /* Append to wait list. */ + for (p = reconCtrlPtr->bufferWaitList; p->next; + p = p->next); p->next = cb; } retcode = 1; @@ -659,30 +688,30 @@ rf_SubmitReconBufferRAID1(rbuf, keep_it, use_committed) t->spRow = rbuf->spRow; t->spCol = rbuf->spCol; t->spOffset = rbuf->spOffset; - /* Swap buffers. DANCE! */ + /* Swap buffers. DANCE ! */ ta = t->buffer; t->buffer = rbuf->buffer; rbuf->buffer = ta; } /* - * Use the rbuf we've been given as the target. - */ + * Use the rbuf we've been given as the target. + */ RF_ASSERT(pssPtr->rbuf == NULL); pssPtr->rbuf = t; t->count = 1; /* - * Below, we use 1 for numDataCol (which is equal to the count in the - * previous line), so we'll always be done. - */ + * Below, we use 1 for numDataCol (which is equal to the count in the + * previous line), so we'll always be done. + */ rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, 1); out: RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); if (rf_reconbufferDebug) { - printf("raid%d: RAID1 rbuf submission: returning %d\n", - raidPtr->raidid, retcode); + printf("raid%d: RAID1 rbuf submission: returning %d.\n", + raidPtr->raidid, retcode); } return (retcode); } diff --git a/sys/dev/raidframe/rf_raid1.h b/sys/dev/raidframe/rf_raid1.h index ef2201cacfa..fefcff4f35c 100644 --- a/sys/dev/raidframe/rf_raid1.h +++ b/sys/dev/raidframe/rf_raid1.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_raid1.h,v 1.2 1999/02/16 00:03:16 niklas Exp $ */ +/* $OpenBSD: rf_raid1.h,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_raid1.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,37 +28,26 @@ * rights to redistribute these changes. */ -/* header file for RAID Level 1 */ +/* Header file for RAID Level 1. */ -#ifndef _RF__RF_RAID1_H_ -#define _RF__RF_RAID1_H_ +#ifndef _RF__RF_RAID1_H_ +#define _RF__RF_RAID1_H_ #include "rf_types.h" -int -rf_ConfigureRAID1(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -void -rf_MapSectorRAID1(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapParityRAID1(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_IdentifyStripeRAID1(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -void -rf_MapSIDToPSIDRAID1(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru); -void -rf_RAID1DagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); -int -rf_VerifyParityRAID1(RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr, - RF_PhysDiskAddr_t * parityPDA, int correct_it, RF_RaidAccessFlags_t flags); -int -rf_SubmitReconBufferRAID1(RF_ReconBuffer_t * rbuf, int keep_int, - int use_committed); +int rf_ConfigureRAID1(RF_ShutdownList_t **, RF_Raid_t *, RF_Config_t *); +void rf_MapSectorRAID1(RF_Raid_t *, RF_RaidAddr_t, + RF_RowCol_t *, RF_RowCol_t *, RF_SectorNum_t *, int); +void rf_MapParityRAID1(RF_Raid_t *, RF_RaidAddr_t, + RF_RowCol_t *, RF_RowCol_t *, RF_SectorNum_t *, int); +void rf_IdentifyStripeRAID1(RF_Raid_t *, RF_RaidAddr_t, + RF_RowCol_t **, RF_RowCol_t *); +void rf_MapSIDToPSIDRAID1(RF_RaidLayout_t *, + RF_StripeNum_t, RF_StripeNum_t *, RF_ReconUnitNum_t *); +void rf_RAID1DagSelect(RF_Raid_t *, RF_IoType_t, + RF_AccessStripeMap_t *, RF_VoidFuncPtr *); +int rf_VerifyParityRAID1(RF_Raid_t *, RF_RaidAddr_t, + RF_PhysDiskAddr_t *, int, RF_RaidAccessFlags_t); +int rf_SubmitReconBufferRAID1(RF_ReconBuffer_t *, int, int); -#endif /* !_RF__RF_RAID1_H_ */ +#endif /* !_RF__RF_RAID1_H_ */ diff --git a/sys/dev/raidframe/rf_raid4.c b/sys/dev/raidframe/rf_raid4.c index c0c4af22e4c..3d0ba575c7e 100644 --- a/sys/dev/raidframe/rf_raid4.c +++ b/sys/dev/raidframe/rf_raid4.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_raid4.c,v 1.3 2000/01/11 18:02:23 peter Exp $ */ +/* $OpenBSD: rf_raid4.c,v 1.4 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_raid4.c,v 1.4 2000/01/07 03:41:02 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,11 +28,11 @@ * rights to redistribute these changes. */ -/*************************************** +/***************************************** * - * rf_raid4.c -- implements RAID Level 4 + * rf_raid4.c -- Implements RAID Level 4. * - ***************************************/ + *****************************************/ #include "rf_raid.h" #include "rf_dag.h" @@ -45,30 +46,31 @@ #include "rf_general.h" typedef struct RF_Raid4ConfigInfo_s { - RF_RowCol_t *stripeIdentifier; /* filled in at config time & used by - * IdentifyStripe */ -} RF_Raid4ConfigInfo_t; - + RF_RowCol_t *stripeIdentifier; /* + * Filled in at config time & used by + * IdentifyStripe. + */ +} RF_Raid4ConfigInfo_t; -int -rf_ConfigureRAID4( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) +int +rf_ConfigureRAID4(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, + RF_Config_t *cfgPtr) { RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; RF_Raid4ConfigInfo_t *info; - int i; + int i; - /* create a RAID level 4 configuration structure ... */ - RF_MallocAndAdd(info, sizeof(RF_Raid4ConfigInfo_t), (RF_Raid4ConfigInfo_t *), raidPtr->cleanupList); + /* Create a RAID level 4 configuration structure... */ + RF_MallocAndAdd(info, sizeof(RF_Raid4ConfigInfo_t), + (RF_Raid4ConfigInfo_t *), raidPtr->cleanupList); if (info == NULL) return (ENOMEM); layoutPtr->layoutSpecificInfo = (void *) info; /* ... and fill it in. */ - RF_MallocAndAdd(info->stripeIdentifier, raidPtr->numCol * sizeof(RF_RowCol_t), (RF_RowCol_t *), raidPtr->cleanupList); + RF_MallocAndAdd(info->stripeIdentifier, raidPtr->numCol * + sizeof(RF_RowCol_t), (RF_RowCol_t *), raidPtr->cleanupList); if (info->stripeIdentifier == NULL) return (ENOMEM); for (i = 0; i < raidPtr->numCol; i++) @@ -76,68 +78,60 @@ rf_ConfigureRAID4( RF_ASSERT(raidPtr->numRow == 1); - /* fill in the remaining layout parameters */ + /* Fill in the remaining layout parameters. */ layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; + layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << + raidPtr->logBytesPerSector; layoutPtr->numDataCol = raidPtr->numCol - 1; - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; + layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * + layoutPtr->sectorsPerStripeUnit; layoutPtr->numParityCol = 1; - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; + raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * + layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; return (0); } -int -rf_GetDefaultNumFloatingReconBuffersRAID4(RF_Raid_t * raidPtr) +int +rf_GetDefaultNumFloatingReconBuffersRAID4(RF_Raid_t *raidPtr) { return (20); } -RF_HeadSepLimit_t -rf_GetDefaultHeadSepLimitRAID4(RF_Raid_t * raidPtr) +RF_HeadSepLimit_t +rf_GetDefaultHeadSepLimitRAID4(RF_Raid_t *raidPtr) { return (20); } -void -rf_MapSectorRAID4( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) +void +rf_MapSectorRAID4(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap) { RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; *row = 0; *col = SUID % raidPtr->Layout.numDataCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + + *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * + raidPtr->Layout.sectorsPerStripeUnit + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); } -void -rf_MapParityRAID4( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) +void +rf_MapParityRAID4(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap) { RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; *row = 0; *col = raidPtr->Layout.numDataCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + + *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * + raidPtr->Layout.sectorsPerStripeUnit + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); } -void -rf_IdentifyStripeRAID4( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) +void +rf_IdentifyStripeRAID4(RF_Raid_t *raidPtr, RF_RaidAddr_t addr, + RF_RowCol_t **diskids, RF_RowCol_t *outRow) { RF_Raid4ConfigInfo_t *info = raidPtr->Layout.layoutSpecificInfo; @@ -145,12 +139,9 @@ rf_IdentifyStripeRAID4( *diskids = info->stripeIdentifier; } -void -rf_MapSIDToPSIDRAID4( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru) +void +rf_MapSIDToPSIDRAID4(RF_RaidLayout_t *layoutPtr, RF_StripeNum_t stripeID, + RF_StripeNum_t *psID, RF_ReconUnitNum_t *which_ru) { *which_ru = 0; *psID = stripeID; diff --git a/sys/dev/raidframe/rf_raid4.h b/sys/dev/raidframe/rf_raid4.h index 9d84a594961..36149060270 100644 --- a/sys/dev/raidframe/rf_raid4.h +++ b/sys/dev/raidframe/rf_raid4.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_raid4.h,v 1.2 1999/02/16 00:03:17 niklas Exp $ */ +/* $OpenBSD: rf_raid4.h,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_raid4.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,31 +28,23 @@ * rights to redistribute these changes. */ -/* rf_raid4.h header file for RAID Level 4 */ +/* rf_raid4.h -- Header file for RAID Level 4. */ -#ifndef _RF__RF_RAID4_H_ -#define _RF__RF_RAID4_H_ +#ifndef _RF__RF_RAID4_H_ +#define _RF__RF_RAID4_H_ -int -rf_ConfigureRAID4(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int rf_GetDefaultNumFloatingReconBuffersRAID4(RF_Raid_t * raidPtr); -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitRAID4(RF_Raid_t * raidPtr); -void -rf_MapSectorRAID4(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapParityRAID4(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_IdentifyStripeRAID4(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -void -rf_MapSIDToPSIDRAID4(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru); -void -rf_RAID4DagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); +int rf_ConfigureRAID4(RF_ShutdownList_t **, RF_Raid_t *, RF_Config_t *); +int rf_GetDefaultNumFloatingReconBuffersRAID4(RF_Raid_t *); +RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitRAID4(RF_Raid_t *); +void rf_MapSectorRAID4(RF_Raid_t *, RF_RaidAddr_t, + RF_RowCol_t *, RF_RowCol_t *, RF_SectorNum_t *, int); +void rf_MapParityRAID4(RF_Raid_t *, RF_RaidAddr_t, + RF_RowCol_t *, RF_RowCol_t *, RF_SectorNum_t *, int); +void rf_IdentifyStripeRAID4(RF_Raid_t *, RF_RaidAddr_t, + RF_RowCol_t **, RF_RowCol_t *); +void rf_MapSIDToPSIDRAID4(RF_RaidLayout_t *, + RF_StripeNum_t, RF_StripeNum_t *, RF_ReconUnitNum_t *); +void rf_RAID4DagSelect(RF_Raid_t *, RF_IoType_t, RF_AccessStripeMap_t *, + RF_VoidFuncPtr *); -#endif /* !_RF__RF_RAID4_H_ */ +#endif /* !_RF__RF_RAID4_H_ */ diff --git a/sys/dev/raidframe/rf_raid5.c b/sys/dev/raidframe/rf_raid5.c index 48f7fda379d..c2276333b1e 100644 --- a/sys/dev/raidframe/rf_raid5.c +++ b/sys/dev/raidframe/rf_raid5.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_raid5.c,v 1.3 2000/01/11 18:02:23 peter Exp $ */ +/* $OpenBSD: rf_raid5.c,v 1.4 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_raid5.c,v 1.4 2000/01/08 22:57:30 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,9 +28,9 @@ * rights to redistribute these changes. */ -/****************************************************************************** +/***************************************************************************** * - * rf_raid5.c -- implements RAID Level 5 + * rf_raid5.c -- Implements RAID Level 5. * *****************************************************************************/ @@ -47,274 +48,339 @@ #include "rf_utils.h" typedef struct RF_Raid5ConfigInfo_s { - RF_RowCol_t **stripeIdentifier; /* filled in at config time and used - * by IdentifyStripe */ -} RF_Raid5ConfigInfo_t; - -int -rf_ConfigureRAID5( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) + RF_RowCol_t **stripeIdentifier; /* + * Filled in at config time and used + * by IdentifyStripe. + */ +} RF_Raid5ConfigInfo_t; + + +int +rf_ConfigureRAID5(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, + RF_Config_t *cfgPtr) { RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; RF_Raid5ConfigInfo_t *info; RF_RowCol_t i, j, startdisk; - /* create a RAID level 5 configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_Raid5ConfigInfo_t), (RF_Raid5ConfigInfo_t *), raidPtr->cleanupList); + /* Create a RAID level 5 configuration structure. */ + RF_MallocAndAdd(info, sizeof(RF_Raid5ConfigInfo_t), + (RF_Raid5ConfigInfo_t *), raidPtr->cleanupList); if (info == NULL) return (ENOMEM); layoutPtr->layoutSpecificInfo = (void *) info; RF_ASSERT(raidPtr->numRow == 1); - /* the stripe identifier must identify the disks in each stripe, IN - * THE ORDER THAT THEY APPEAR IN THE STRIPE. */ - info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, raidPtr->numCol, raidPtr->cleanupList); + /* + * The stripe identifier must identify the disks in each stripe, IN + * THE ORDER THAT THEY APPEAR IN THE STRIPE. + */ + info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, + raidPtr->numCol, raidPtr->cleanupList); if (info->stripeIdentifier == NULL) return (ENOMEM); startdisk = 0; for (i = 0; i < raidPtr->numCol; i++) { for (j = 0; j < raidPtr->numCol; j++) { - info->stripeIdentifier[i][j] = (startdisk + j) % raidPtr->numCol; + info->stripeIdentifier[i][j] = (startdisk + j) % + raidPtr->numCol; } if ((--startdisk) < 0) startdisk = raidPtr->numCol - 1; } - /* fill in the remaining layout parameters */ + /* Fill in the remaining layout parameters. */ layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; + layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << + raidPtr->logBytesPerSector; layoutPtr->numDataCol = raidPtr->numCol - 1; - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; + layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * + layoutPtr->sectorsPerStripeUnit; layoutPtr->numParityCol = 1; layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk; - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; + raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * + layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; return (0); } -int -rf_GetDefaultNumFloatingReconBuffersRAID5(RF_Raid_t * raidPtr) +int +rf_GetDefaultNumFloatingReconBuffersRAID5(RF_Raid_t *raidPtr) { return (20); } -RF_HeadSepLimit_t -rf_GetDefaultHeadSepLimitRAID5(RF_Raid_t * raidPtr) +RF_HeadSepLimit_t +rf_GetDefaultHeadSepLimitRAID5(RF_Raid_t *raidPtr) { return (10); } + #if !defined(__NetBSD__) && !defined(__OpenBSD__) && !defined(_KERNEL) -/* not currently used */ -int -rf_ShutdownRAID5(RF_Raid_t * raidPtr) +/* Not currently used. */ +int +rf_ShutdownRAID5(RF_Raid_t *raidPtr) { return (0); } #endif -void -rf_MapSectorRAID5( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) +void +rf_MapSectorRAID5(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap) { RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; *row = 0; *col = (SUID % raidPtr->numCol); - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + + *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * + raidPtr->Layout.sectorsPerStripeUnit + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); } -void -rf_MapParityRAID5( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) +void +rf_MapParityRAID5(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap) { RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; *row = 0; - *col = raidPtr->Layout.numDataCol - (SUID / raidPtr->Layout.numDataCol) % raidPtr->numCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + + *col = raidPtr->Layout.numDataCol - + (SUID / raidPtr->Layout.numDataCol) % raidPtr->numCol; + *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * + raidPtr->Layout.sectorsPerStripeUnit + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); } -void -rf_IdentifyStripeRAID5( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) +void +rf_IdentifyStripeRAID5(RF_Raid_t *raidPtr, RF_RaidAddr_t addr, + RF_RowCol_t **diskids, RF_RowCol_t *outRow) { - RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr); - RF_Raid5ConfigInfo_t *info = (RF_Raid5ConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + RF_StripeNum_t stripeID = + rf_RaidAddressToStripeID(&raidPtr->Layout, addr); + RF_Raid5ConfigInfo_t *info = + (RF_Raid5ConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; *outRow = 0; *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol]; } -void -rf_MapSIDToPSIDRAID5( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru) +void +rf_MapSIDToPSIDRAID5(RF_RaidLayout_t *layoutPtr, RF_StripeNum_t stripeID, + RF_StripeNum_t *psID, RF_ReconUnitNum_t *which_ru) { *which_ru = 0; *psID = stripeID; } -/* select an algorithm for performing an access. Returns two pointers, + + +/* + * Select an algorithm for performing an access. Returns two pointers, * one to a function that will return information about the DAG, and * another to a function that will create the dag. */ -void -rf_RaidFiveDagSelect( - RF_Raid_t * raidPtr, - RF_IoType_t type, - RF_AccessStripeMap_t * asmap, - RF_VoidFuncPtr * createFunc) +void +rf_RaidFiveDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type, + RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc) { RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); RF_PhysDiskAddr_t *failedPDA = NULL; RF_RowCol_t frow, fcol; RF_RowStatus_t rstat; - int prior_recon; + int prior_recon; RF_ASSERT(RF_IO_IS_R_OR_W(type)); if (asmap->numDataFailed + asmap->numParityFailed > 1) { - RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); + RF_ERRORMSG("Multiple disks failed in a single group !" + " Aborting I/O operation.\n"); /* *infoFunc = */ *createFunc = NULL; return; } else if (asmap->numDataFailed + asmap->numParityFailed == 1) { - /* if under recon & already reconstructed, redirect + /* + * If under recon & already reconstructed, redirect * the access to the spare drive and eliminate the - * failure indication */ + * failure indication. + */ failedPDA = asmap->failedPDAs[0]; frow = failedPDA->row; fcol = failedPDA->col; rstat = raidPtr->status[failedPDA->row]; prior_recon = (rstat == rf_rs_reconfigured) || ( (rstat == rf_rs_reconstructing) ? - rf_CheckRUReconstructed(raidPtr->reconControl[frow]->reconMap, failedPDA->startSector) : 0 - ); + rf_CheckRUReconstructed(raidPtr + ->reconControl[frow]->reconMap, + failedPDA->startSector) : 0); if (prior_recon) { - RF_RowCol_t or = failedPDA->row, oc = failedPDA->col; + RF_RowCol_t or = failedPDA->row; + RF_RowCol_t oc = failedPDA->col; RF_SectorNum_t oo = failedPDA->startSector; - if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) { /* redirect to dist - * spare space */ + if (layoutPtr->map->flags & + RF_DISTRIBUTE_SPARE) { + /* Redirect to dist spare space. */ if (failedPDA == asmap->parityInfo) { - /* parity has failed */ - (layoutPtr->map->MapParity) (raidPtr, failedPDA->raidAddress, &failedPDA->row, - &failedPDA->col, &failedPDA->startSector, RF_REMAP); - - if (asmap->parityInfo->next) { /* redir 2nd component, - * if any */ - RF_PhysDiskAddr_t *p = asmap->parityInfo->next; - RF_SectorNum_t SUoffs = p->startSector % layoutPtr->sectorsPerStripeUnit; + /* Parity has failed. */ + (layoutPtr->map->MapParity) + (raidPtr, + failedPDA->raidAddress, + &failedPDA->row, + &failedPDA->col, + &failedPDA->startSector, + RF_REMAP); + + if (asmap->parityInfo->next) { + /* + * Redir 2nd component, + * if any. + */ + RF_PhysDiskAddr_t *p = + asmap + ->parityInfo->next; + RF_SectorNum_t SUoffs = + p->startSector % + layoutPtr->sectorsPerStripeUnit; p->row = failedPDA->row; p->col = failedPDA->col; - p->startSector = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, failedPDA->startSector) + - SUoffs; /* cheating: - * startSector is not - * really a RAID address */ + /* + * Cheating: + * startSector is not + * really a RAID + * address. + */ + p->startSector = + rf_RaidAddressOfPrevStripeUnitBoundary( + layoutPtr, failedPDA->startSector) + + SUoffs; } } else - if (asmap->parityInfo->next && failedPDA == asmap->parityInfo->next) { - RF_ASSERT(0); /* should not ever - * happen */ + if (asmap->parityInfo->next && + failedPDA == + asmap->parityInfo->next) { + /* + * Should never happen. + */ + RF_ASSERT(0); } else { - - /* data has failed */ - (layoutPtr->map->MapSector) (raidPtr, failedPDA->raidAddress, &failedPDA->row, - &failedPDA->col, &failedPDA->startSector, RF_REMAP); - + /* Data has failed. */ + (layoutPtr->map + ->MapSector) (raidPtr, + failedPDA->raidAddress, + &failedPDA->row, + &failedPDA->col, + &failedPDA->startSector, + RF_REMAP); } - } else { /* redirect to dedicated spare - * space */ + } else { + /* Redirect to dedicated spare space. */ - failedPDA->row = raidPtr->Disks[frow][fcol].spareRow; - failedPDA->col = raidPtr->Disks[frow][fcol].spareCol; + failedPDA->row = + raidPtr->Disks[frow][fcol].spareRow; + failedPDA->col = + raidPtr->Disks[frow][fcol].spareCol; - /* the parity may have two distinct + /* + * The parity may have two distinct * components, both of which may need - * to be redirected */ + * to be redirected. + */ if (asmap->parityInfo->next) { - if (failedPDA == asmap->parityInfo) { - failedPDA->next->row = failedPDA->row; - failedPDA->next->col = failedPDA->col; - } else - if (failedPDA == asmap->parityInfo->next) { /* paranoid: should - * never occur */ - asmap->parityInfo->row = failedPDA->row; - asmap->parityInfo->col = failedPDA->col; + if (failedPDA == + asmap->parityInfo) { + failedPDA->next->row = + failedPDA->row; + failedPDA->next->col = + failedPDA->col; + } else { + if (failedPDA == + asmap->parityInfo + ->next) { + /* + * Paranoid: + * Should never + * occur. + */ + asmap + ->parityInfo + ->row = + failedPDA->row; + asmap + ->parityInfo + ->col = + failedPDA->col; } + } } } RF_ASSERT(failedPDA->col != -1); if (rf_dagDebug || rf_mapDebug) { - printf("raid%d: Redirected type '%c' r %d c %d o %ld -> r %d c %d o %ld\n", - raidPtr->raidid, type, or, oc, - (long) oo, failedPDA->row, - failedPDA->col, - (long) failedPDA->startSector); + printf("raid%d: Redirected type '%c'" + " r %d c %d o %ld -> r %d c %d" + " o %ld\n", raidPtr->raidid, + type, or, oc, (long) oo, + failedPDA->row, failedPDA->col, + (long) failedPDA->startSector); } - asmap->numDataFailed = asmap->numParityFailed = 0; + asmap->numDataFailed = asmap->numParityFailed + = 0; } } - /* all dags begin/end with block/unblock node therefore, hdrSucc & - * termAnt counts should always be 1 also, these counts should not be - * visible outside dag creation routines - manipulating the counts - * here should be removed */ + /* + * All DAGs begin/end with block/unblock node. Therefore, hdrSucc & + * termAnt counts should always be 1. Also, these counts should not be + * visible outside DAG creation routines - manipulating the counts + * here should be removed. + */ if (type == RF_IO_TYPE_READ) { if (asmap->numDataFailed == 0) - *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; + *createFunc = (RF_VoidFuncPtr) + rf_CreateFaultFreeReadDAG; else - *createFunc = (RF_VoidFuncPtr) rf_CreateRaidFiveDegradedReadDAG; + *createFunc = (RF_VoidFuncPtr) + rf_CreateRaidFiveDegradedReadDAG; } else { - - - /* if mirroring, always use large writes. If the access + /* + * If mirroring, always use large writes. If the access * requires two distinct parity updates, always do a small - * write. If the stripe contains a failure but the access + * write. If the stripe contains a failure but the access * does not, do a small write. The first conditional * (numStripeUnitsAccessed <= numDataCol/2) uses a * less-than-or-equal rather than just a less-than because * when G is 3 or 4, numDataCol/2 is 1, and I want - * single-stripe-unit updates to use just one disk. */ + * single-stripe-unit updates to use just one disk. + */ if ((asmap->numDataFailed + asmap->numParityFailed) == 0) { if (rf_suppressLocksAndLargeWrites || - (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) || - (asmap->parityInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) { - *createFunc = (RF_VoidFuncPtr) rf_CreateSmallWriteDAG; + (((asmap->numStripeUnitsAccessed <= + (layoutPtr->numDataCol / 2)) && + (layoutPtr->numDataCol != 1)) || + (asmap->parityInfo->next != NULL) || + rf_CheckStripeForFailures(raidPtr, asmap))) { + *createFunc = (RF_VoidFuncPtr) + rf_CreateSmallWriteDAG; } else - *createFunc = (RF_VoidFuncPtr) rf_CreateLargeWriteDAG; + *createFunc = (RF_VoidFuncPtr) + rf_CreateLargeWriteDAG; } else { if (asmap->numParityFailed == 1) - *createFunc = (RF_VoidFuncPtr) rf_CreateNonRedundantWriteDAG; + *createFunc = (RF_VoidFuncPtr) + rf_CreateNonRedundantWriteDAG; else - if (asmap->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit) + if (asmap->numStripeUnitsAccessed != 1 && + failedPDA->numSector != + layoutPtr->sectorsPerStripeUnit) *createFunc = NULL; else - *createFunc = (RF_VoidFuncPtr) rf_CreateDegradedWriteDAG; + *createFunc = (RF_VoidFuncPtr) + rf_CreateDegradedWriteDAG; } } } diff --git a/sys/dev/raidframe/rf_raid5.h b/sys/dev/raidframe/rf_raid5.h index 06eecda486e..b961cb05522 100644 --- a/sys/dev/raidframe/rf_raid5.h +++ b/sys/dev/raidframe/rf_raid5.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_raid5.h,v 1.2 1999/02/16 00:03:17 niklas Exp $ */ +/* $OpenBSD: rf_raid5.h,v 1.3 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_raid5.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,31 +28,23 @@ * rights to redistribute these changes. */ -/* rf_raid5.h - header file for RAID Level 5 */ +/* rf_raid5.h - Header file for RAID Level 5. */ #ifndef _RF__RF_RAID5_H_ #define _RF__RF_RAID5_H_ -int -rf_ConfigureRAID5(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int rf_GetDefaultNumFloatingReconBuffersRAID5(RF_Raid_t * raidPtr); -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitRAID5(RF_Raid_t * raidPtr); -void -rf_MapSectorRAID5(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapParityRAID5(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_IdentifyStripeRAID5(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -void -rf_MapSIDToPSIDRAID5(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru); -void -rf_RaidFiveDagSelect(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_AccessStripeMap_t * asmap, RF_VoidFuncPtr * createFunc); +int rf_ConfigureRAID5(RF_ShutdownList_t **, RF_Raid_t *, RF_Config_t *); +int rf_GetDefaultNumFloatingReconBuffersRAID5(RF_Raid_t *); +RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitRAID5(RF_Raid_t *); +void rf_MapSectorRAID5(RF_Raid_t *, RF_RaidAddr_t, + RF_RowCol_t *, RF_RowCol_t *, RF_SectorNum_t *, int); +void rf_MapParityRAID5(RF_Raid_t *, RF_RaidAddr_t, + RF_RowCol_t *, RF_RowCol_t *, RF_SectorNum_t *, int); +void rf_IdentifyStripeRAID5(RF_Raid_t *, RF_RaidAddr_t, + RF_RowCol_t **, RF_RowCol_t *); +void rf_MapSIDToPSIDRAID5(RF_RaidLayout_t *, + RF_StripeNum_t, RF_StripeNum_t *, RF_ReconUnitNum_t *); +void rf_RaidFiveDagSelect(RF_Raid_t *, RF_IoType_t, RF_AccessStripeMap_t *, + RF_VoidFuncPtr *); -#endif /* !_RF__RF_RAID5_H_ */ +#endif /* !_RF__RF_RAID5_H_ */ diff --git a/sys/dev/raidframe/rf_raid5_rotatedspare.c b/sys/dev/raidframe/rf_raid5_rotatedspare.c index ad7defb05ca..23725b2c4a9 100644 --- a/sys/dev/raidframe/rf_raid5_rotatedspare.c +++ b/sys/dev/raidframe/rf_raid5_rotatedspare.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_raid5_rotatedspare.c,v 1.3 2000/01/11 18:02:23 peter Exp $ */ +/* $OpenBSD: rf_raid5_rotatedspare.c,v 1.4 2002/12/16 07:01:04 tdeval Exp $ */ /* $NetBSD: rf_raid5_rotatedspare.c,v 1.4 2000/01/07 03:41:03 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,11 +28,11 @@ * rights to redistribute these changes. */ -/************************************************************************** +/***************************************************************************** * - * rf_raid5_rotated_spare.c -- implements RAID Level 5 with rotated sparing + * rf_raid5_rotated_spare.c -- Implements RAID Level 5 with rotated sparing. * - **************************************************************************/ + *****************************************************************************/ #include "rf_raid.h" #include "rf_raid5.h" @@ -43,22 +44,23 @@ #include "rf_raid5_rotatedspare.h" typedef struct RF_Raid5RSConfigInfo_s { - RF_RowCol_t **stripeIdentifier; /* filled in at config time & used by - * IdentifyStripe */ -} RF_Raid5RSConfigInfo_t; - -int -rf_ConfigureRAID5_RS( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) + RF_RowCol_t **stripeIdentifier; /* + * Filled in at config time & used by + * IdentifyStripe. + */ +} RF_Raid5RSConfigInfo_t; + +int +rf_ConfigureRAID5_RS(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, + RF_Config_t *cfgPtr) { RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; RF_Raid5RSConfigInfo_t *info; RF_RowCol_t i, j, startdisk; - /* create a RAID level 5 configuration structure */ - RF_MallocAndAdd(info, sizeof(RF_Raid5RSConfigInfo_t), (RF_Raid5RSConfigInfo_t *), raidPtr->cleanupList); + /* Create a RAID level 5 configuration structure. */ + RF_MallocAndAdd(info, sizeof(RF_Raid5RSConfigInfo_t), + (RF_Raid5RSConfigInfo_t *), raidPtr->cleanupList); if (info == NULL) return (ENOMEM); layoutPtr->layoutSpecificInfo = (void *) info; @@ -66,104 +68,104 @@ rf_ConfigureRAID5_RS( RF_ASSERT(raidPtr->numRow == 1); RF_ASSERT(raidPtr->numCol >= 3); - /* the stripe identifier must identify the disks in each stripe, IN - * THE ORDER THAT THEY APPEAR IN THE STRIPE. */ - info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, raidPtr->numCol, raidPtr->cleanupList); + /* + * The stripe identifier must identify the disks in each stripe, IN + * THE ORDER THAT THEY APPEAR IN THE STRIPE. + */ + info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, + raidPtr->numCol, raidPtr->cleanupList); if (info->stripeIdentifier == NULL) return (ENOMEM); startdisk = 0; for (i = 0; i < raidPtr->numCol; i++) { for (j = 0; j < raidPtr->numCol; j++) { - info->stripeIdentifier[i][j] = (startdisk + j) % raidPtr->numCol; + info->stripeIdentifier[i][j] = (startdisk + j) % + raidPtr->numCol; } if ((--startdisk) < 0) startdisk = raidPtr->numCol - 1; } - /* fill in the remaining layout parameters */ + /* Fill in the remaining layout parameters. */ layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; - layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; + layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << + raidPtr->logBytesPerSector; layoutPtr->numDataCol = raidPtr->numCol - 2; - layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; + layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * + layoutPtr->sectorsPerStripeUnit; layoutPtr->numParityCol = 1; layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk; - raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; + raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * + layoutPtr->sectorsPerStripeUnit; - raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; + raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * + layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; return (0); } -RF_ReconUnitCount_t -rf_GetNumSpareRUsRAID5_RS(raidPtr) - RF_Raid_t *raidPtr; +RF_ReconUnitCount_t +rf_GetNumSpareRUsRAID5_RS(RF_Raid_t *raidPtr) { return (raidPtr->Layout.stripeUnitsPerDisk / raidPtr->numCol); } -void -rf_MapSectorRAID5_RS( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) +void +rf_MapSectorRAID5_RS(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap) { RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; *row = 0; if (remap) { - *col = raidPtr->numCol - 1 - (1 + SUID / raidPtr->Layout.numDataCol) % raidPtr->numCol; - *col = (*col + 1) % raidPtr->numCol; /* spare unit is rotated + *col = raidPtr->numCol - 1 - + (1 + SUID / raidPtr->Layout.numDataCol) % raidPtr->numCol; + *col = (*col + 1) % raidPtr->numCol; /* + * Spare unit is rotated * with parity; line - * above maps to parity */ + * above maps to parity. + */ } else { - *col = (SUID + (SUID / raidPtr->Layout.numDataCol)) % raidPtr->numCol; + *col = (SUID + (SUID / raidPtr->Layout.numDataCol)) % + raidPtr->numCol; } - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + + *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * + raidPtr->Layout.sectorsPerStripeUnit + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); } -void -rf_MapParityRAID5_RS( - RF_Raid_t * raidPtr, - RF_RaidAddr_t raidSector, - RF_RowCol_t * row, - RF_RowCol_t * col, - RF_SectorNum_t * diskSector, - int remap) +void +rf_MapParityRAID5_RS(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, + RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap) { RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; *row = 0; - *col = raidPtr->numCol - 1 - (1 + SUID / raidPtr->Layout.numDataCol) % raidPtr->numCol; - *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit + + *col = raidPtr->numCol - 1 - (1 + SUID / raidPtr->Layout.numDataCol) % + raidPtr->numCol; + *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * + raidPtr->Layout.sectorsPerStripeUnit + (raidSector % raidPtr->Layout.sectorsPerStripeUnit); if (remap) *col = (*col + 1) % raidPtr->numCol; } -void -rf_IdentifyStripeRAID5_RS( - RF_Raid_t * raidPtr, - RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, - RF_RowCol_t * outRow) +void +rf_IdentifyStripeRAID5_RS(RF_Raid_t *raidPtr, RF_RaidAddr_t addr, + RF_RowCol_t **diskids, RF_RowCol_t *outRow) { - RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr); - RF_Raid5RSConfigInfo_t *info = (RF_Raid5RSConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; + RF_StripeNum_t stripeID = + rf_RaidAddressToStripeID(&raidPtr->Layout, addr); + RF_Raid5RSConfigInfo_t *info = + (RF_Raid5RSConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; *outRow = 0; *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol]; } -void -rf_MapSIDToPSIDRAID5_RS( - RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, - RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru) +void +rf_MapSIDToPSIDRAID5_RS(RF_RaidLayout_t *layoutPtr, RF_StripeNum_t stripeID, + RF_StripeNum_t *psID, RF_ReconUnitNum_t *which_ru) { *which_ru = 0; *psID = stripeID; diff --git a/sys/dev/raidframe/rf_raid5_rotatedspare.h b/sys/dev/raidframe/rf_raid5_rotatedspare.h index d3d13cb57c4..8b98c222d14 100644 --- a/sys/dev/raidframe/rf_raid5_rotatedspare.h +++ b/sys/dev/raidframe/rf_raid5_rotatedspare.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_raid5_rotatedspare.h,v 1.2 1999/02/16 00:03:18 niklas Exp $ */ +/* $OpenBSD: rf_raid5_rotatedspare.h,v 1.3 2002/12/16 07:01:05 tdeval Exp $ */ /* $NetBSD: rf_raid5_rotatedspare.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,27 +28,24 @@ * rights to redistribute these changes. */ -/* rf_raid5_rotatedspare.h - header file for RAID Level 5 with rotated sparing */ +/* + * rf_raid5_rotatedspare.h + * + * Header file for RAID Level 5 with rotated sparing. + */ -#ifndef _RF__RF_RAID5_ROTATEDSPARE_H_ -#define _RF__RF_RAID5_ROTATEDSPARE_H_ +#ifndef _RF__RF_RAID5_ROTATEDSPARE_H_ +#define _RF__RF_RAID5_ROTATEDSPARE_H_ -int -rf_ConfigureRAID5_RS(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -RF_ReconUnitCount_t rf_GetNumSpareRUsRAID5_RS(RF_Raid_t * raidPtr); -void -rf_MapSectorRAID5_RS(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_MapParityRAID5_RS(RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector, - RF_RowCol_t * row, RF_RowCol_t * col, RF_SectorNum_t * diskSector, int remap); -void -rf_IdentifyStripeRAID5_RS(RF_Raid_t * raidPtr, RF_RaidAddr_t addr, - RF_RowCol_t ** diskids, RF_RowCol_t * outRow); -void -rf_MapSIDToPSIDRAID5_RS(RF_RaidLayout_t * layoutPtr, - RF_StripeNum_t stripeID, RF_StripeNum_t * psID, - RF_ReconUnitNum_t * which_ru); +int rf_ConfigureRAID5_RS(RF_ShutdownList_t **, RF_Raid_t *, RF_Config_t *); +RF_ReconUnitCount_t rf_GetNumSpareRUsRAID5_RS(RF_Raid_t *); +void rf_MapSectorRAID5_RS(RF_Raid_t *, RF_RaidAddr_t, + RF_RowCol_t *, RF_RowCol_t *, RF_SectorNum_t *, int); +void rf_MapParityRAID5_RS(RF_Raid_t *, RF_RaidAddr_t, + RF_RowCol_t *, RF_RowCol_t *, RF_SectorNum_t *, int); +void rf_IdentifyStripeRAID5_RS(RF_Raid_t *, RF_RaidAddr_t, + RF_RowCol_t **, RF_RowCol_t *); +void rf_MapSIDToPSIDRAID5_RS(RF_RaidLayout_t *, + RF_StripeNum_t, RF_StripeNum_t *, RF_ReconUnitNum_t *); -#endif /* !_RF__RF_RAID5_ROTATEDSPARE_H_ */ +#endif /* !_RF__RF_RAID5_ROTATEDSPARE_H_ */ diff --git a/sys/dev/raidframe/rf_raidframe.h b/sys/dev/raidframe/rf_raidframe.h index f92a2d6540e..c92f4aa4e1a 100644 --- a/sys/dev/raidframe/rf_raidframe.h +++ b/sys/dev/raidframe/rf_raidframe.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_raidframe.h,v 1.5 2000/08/08 16:07:44 peter Exp $ */ +/* $OpenBSD: rf_raidframe.h,v 1.6 2002/12/16 07:01:05 tdeval Exp $ */ /* $NetBSD: rf_raidframe.h,v 1.11 2000/05/28 00:48:31 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -31,13 +32,13 @@ * * rf_raidframe.h * - * main header file for using raidframe in the kernel. + * Main header file for using RAIDframe in the kernel. * *****************************************************/ -#ifndef _RF__RF_RAIDFRAME_H_ -#define _RF__RF_RAIDFRAME_H_ +#ifndef _RF__RF_RAIDFRAME_H_ +#define _RF__RF_RAIDFRAME_H_ #include "rf_types.h" #include "rf_configure.h" @@ -46,108 +47,126 @@ typedef RF_uint32 RF_ReconReqFlags_t; -struct rf_recon_req { /* used to tell the kernel to fail a disk */ - RF_RowCol_t row, col; - RF_ReconReqFlags_t flags; - void *raidPtr; /* used internally; need not be set at ioctl - * time */ - struct rf_recon_req *next; /* used internally; need not be set at - * ioctl time */ +struct rf_recon_req { /* Used to tell the kernel to fail a disk. */ + RF_RowCol_t row, col; + RF_ReconReqFlags_t flags; + void *raidPtr; /* + * Used internally; need not be + * set at ioctl time. + */ + struct rf_recon_req *next; /* + * Used internally; need not be + * set at ioctl time. + */ }; struct RF_SparetWait_s { - int C, G, fcol; /* C = # disks in row, G = # units in stripe, - * fcol = which disk has failed */ - - RF_StripeCount_t SUsPerPU; /* this stuff is the info required to - * create a spare table */ - int TablesPerSpareRegion; - int BlocksPerTable; - RF_StripeCount_t TableDepthInPUs; - RF_StripeCount_t SpareSpaceDepthPerRegionInSUs; - - RF_SparetWait_t *next; /* used internally; need not be set at ioctl - * time */ + int C, G, fcol; /* + * C = # disks in row, + * G = # units in stripe, + * fcol = which disk has failed + */ + + RF_StripeCount_t SUsPerPU; /* + * This stuff is the info + * required to create a spare + * table. + */ + int TablesPerSpareRegion; + int BlocksPerTable; + RF_StripeCount_t TableDepthInPUs; + RF_StripeCount_t SpareSpaceDepthPerRegionInSUs; + + RF_SparetWait_t *next; /* + * Used internally; need not be + * set at ioctl time. + */ }; typedef struct RF_DeviceConfig_s { - u_int rows; - u_int cols; - u_int maxqdepth; - int ndevs; - RF_RaidDisk_t devs[RF_MAX_DISKS]; - int nspares; - RF_RaidDisk_t spares[RF_MAX_DISKS]; -} RF_DeviceConfig_t; + u_int rows; + u_int cols; + u_int maxqdepth; + int ndevs; + RF_RaidDisk_t devs[RF_MAX_DISKS]; + int nspares; + RF_RaidDisk_t spares[RF_MAX_DISKS]; +} RF_DeviceConfig_t; typedef struct RF_ProgressInfo_s { - RF_uint64 remaining; - RF_uint64 completed; - RF_uint64 total; + RF_uint64 remaining; + RF_uint64 completed; + RF_uint64 total; } RF_ProgressInfo_t; -/* flags that can be put in the rf_recon_req structure */ -#define RF_FDFLAGS_NONE 0x0 /* just fail the disk */ -#define RF_FDFLAGS_RECON 0x1 /* fail and initiate recon */ - -#define RF_SCSI_DISK_MAJOR 8 /* the device major number for disks in the - * system */ - -#define RAIDFRAME_CONFIGURE _IOW ('r', 1, void *) /* configure the driver */ -#define RAIDFRAME_SHUTDOWN _IO ('r', 2) /* shutdown the driver */ -#define RAIDFRAME_TUR _IOW ('r', 3, dev_t) /* debug only: test unit - * ready */ -#define RAIDFRAME_TEST_ACC _IOWR('r', 4, struct rf_test_acc) /* run a test access */ -#define RAIDFRAME_FAIL_DISK _IOW ('r', 5, struct rf_recon_req) /* fail a disk & - * optionally start - * recon */ -#define RAIDFRAME_CHECK_RECON_STATUS _IOWR('r', 6, int) /* get reconstruction % - * complete on indicated - * row */ -#define RAIDFRAME_REWRITEPARITY _IO ('r', 7) /* rewrite (initialize) - * all parity */ -#define RAIDFRAME_COPYBACK _IO ('r', 8) /* copy reconstructed - * data back to replaced - * disk */ -#define RAIDFRAME_SPARET_WAIT _IOR ('r', 9, RF_SparetWait_t) /* does not return until - * kernel needs a spare - * table */ -#define RAIDFRAME_SEND_SPARET _IOW ('r', 10, void *) /* used to send a spare - * table down into the - * kernel */ -#define RAIDFRAME_ABORT_SPARET_WAIT _IO ('r', 11) /* used to wake up the - * sparemap daemon & - * tell it to exit */ -#define RAIDFRAME_START_ATRACE _IO ('r', 12) /* start tracing - * accesses */ -#define RAIDFRAME_STOP_ATRACE _IO ('r', 13) /* stop tracing accesses */ -#define RAIDFRAME_GET_SIZE _IOR ('r', 14, int) /* get size (# sectors) - * in raid device */ -#define RAIDFRAME_GET_INFO _IOWR('r', 15, RF_DeviceConfig_t *) /* get configuration */ -#define RAIDFRAME_RESET_ACCTOTALS _IO ('r', 16) /* reset AccTotals for - * device */ -#define RAIDFRAME_GET_ACCTOTALS _IOR ('r', 17, RF_AccTotals_t) /* retrieve AccTotals - * for device */ -#define RAIDFRAME_KEEP_ACCTOTALS _IOW ('r', 18, int) /* turn AccTotals on or - * off for device */ -#define RAIDFRAME_GET_COMPONENT_LABEL _IOWR ('r', 19, RF_ComponentLabel_t *) -#define RAIDFRAME_SET_COMPONENT_LABEL _IOW ('r', 20, RF_ComponentLabel_t) - -#define RAIDFRAME_INIT_LABELS _IOW ('r', 21, RF_ComponentLabel_t) -#define RAIDFRAME_ADD_HOT_SPARE _IOW ('r', 22, RF_SingleComponent_t) -#define RAIDFRAME_REMOVE_HOT_SPARE _IOW ('r', 23, RF_SingleComponent_t) -#define RAIDFRAME_REBUILD_IN_PLACE _IOW ('r', 24, RF_SingleComponent_t) -#define RAIDFRAME_CHECK_PARITY _IOWR ('r', 25, int) -#define RAIDFRAME_CHECK_PARITYREWRITE_STATUS _IOWR ('r', 26, int) -#define RAIDFRAME_CHECK_COPYBACK_STATUS _IOWR ('r', 27, int) -#define RAIDFRAME_SET_AUTOCONFIG _IOWR ('r', 28, int) -#define RAIDFRAME_SET_ROOT _IOWR ('r', 29, int) -#define RAIDFRAME_DELETE_COMPONENT _IOW ('r', 30, RF_SingleComponent_t) -#define RAIDFRAME_INCORPORATE_HOT_SPARE _IOW ('r', 31, RF_SingleComponent_t) -/* 'Extended' status versions */ -#define RAIDFRAME_CHECK_RECON_STATUS_EXT _IOWR('r', 32, RF_ProgressInfo_t *) -#define RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT _IOWR ('r', 33, RF_ProgressInfo_t *) -#define RAIDFRAME_CHECK_COPYBACK_STATUS_EXT _IOWR ('r', 34, RF_ProgressInfo_t *) - -#endif /* !_RF__RF_RAIDFRAME_H_ */ +/* Flags that can be put in the rf_recon_req structure. */ +#define RF_FDFLAGS_NONE 0x0 /* Just fail the disk. */ +#define RF_FDFLAGS_RECON 0x1 /* Fail and initiate recon. */ + +#define RF_SCSI_DISK_MAJOR 8 /* + * The device major number for disks + * in the system. + */ + + /* Configure the driver. */ +#define RAIDFRAME_CONFIGURE _IOW ('r', 1, void *) + /* Shutdown the driver. */ +#define RAIDFRAME_SHUTDOWN _IO ('r', 2) + /* Debug only: test unit ready. */ +#define RAIDFRAME_TUR _IOW ('r', 3, dev_t) + /* Run a test access. */ +#define RAIDFRAME_TEST_ACC _IOWR('r', 4, struct rf_test_acc) + /* Fail a disk & optionally start recon. */ +#define RAIDFRAME_FAIL_DISK _IOW ('r', 5, struct rf_recon_req) + /* Get reconstruction % complete on indicated row. */ +#define RAIDFRAME_CHECK_RECON_STATUS _IOWR('r', 6, int) + /* Rewrite (initialize) all parity. */ +#define RAIDFRAME_REWRITEPARITY _IO ('r', 7) + /* Copy reconstructed data back to replaced disk. */ +#define RAIDFRAME_COPYBACK _IO ('r', 8) + /* Does not return until kernel needs a spare table. */ +#define RAIDFRAME_SPARET_WAIT _IOR ('r', 9, RF_SparetWait_t) + /* Used to send a spare table down into the kernel. */ +#define RAIDFRAME_SEND_SPARET _IOW ('r', 10, void *) + /* Used to wake up the sparemap daemon & tell it to exit. */ +#define RAIDFRAME_ABORT_SPARET_WAIT _IO ('r', 11) + /* Start tracing accesses. */ +#define RAIDFRAME_START_ATRAC _IO ('r', 12) + /* Stop tracing accesses. */ +#define RAIDFRAME_STOP_ATRACE _IO ('r', 13) + /* Get size (# sectors) in raid device. */ +#define RAIDFRAME_GET_SIZE _IOR ('r', 14, int) + /* Get configuration. */ +#define RAIDFRAME_GET_INFO _IOWR('r', 15, RF_DeviceConfig_t *) + /* Reset AccTotals for device. */ +#define RAIDFRAME_RESET_ACCTOTALS _IO ('r', 16) + /* Retrieve AccTotals for device. */ +#define RAIDFRAME_GET_ACCTOTALS _IOR ('r', 17, RF_AccTotals_t) + /* Turn AccTotals on or off for device. */ +#define RAIDFRAME_KEEP_ACCTOTALS _IOW ('r', 18, int) + +#define RAIDFRAME_GET_COMPONENT_LABEL _IOWR ('r', 19, RF_ComponentLabel_t *) +#define RAIDFRAME_SET_COMPONENT_LABEL _IOW ('r', 20, RF_ComponentLabel_t) + +#define RAIDFRAME_INIT_LABELS _IOW ('r', 21, RF_ComponentLabel_t) +#define RAIDFRAME_ADD_HOT_SPARE _IOW ('r', 22, RF_SingleComponent_t) +#define RAIDFRAME_REMOVE_HOT_SPARE _IOW ('r', 23, RF_SingleComponent_t) +#define RAIDFRAME_REBUILD_IN_PLACE _IOW ('r', 24, RF_SingleComponent_t) +#define RAIDFRAME_CHECK_PARITY _IOWR ('r', 25, int) +#define RAIDFRAME_CHECK_PARITYREWRITE_STATUS _IOWR ('r', 26, int) +#define RAIDFRAME_CHECK_COPYBACK_STATUS _IOWR ('r', 27, int) +#define RAIDFRAME_SET_AUTOCONFIG _IOWR ('r', 28, int) +#define RAIDFRAME_SET_ROOT _IOWR ('r', 29, int) +#define RAIDFRAME_DELETE_COMPONENT _IOW ('r', 30, RF_SingleComponent_t) +#define RAIDFRAME_INCORPORATE_HOT_SPARE _IOW ('r', 31, RF_SingleComponent_t) + +/* 'Extended' status versions. */ +#define RAIDFRAME_CHECK_RECON_STATUS_EXT \ + _IOWR('r', 32, RF_ProgressInfo_t *) +#define RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT \ + _IOWR ('r', 33, RF_ProgressInfo_t *) +#define RAIDFRAME_CHECK_COPYBACK_STATUS_EXT \ + _IOWR ('r', 34, RF_ProgressInfo_t *) + +#endif /* !_RF__RF_RAIDFRAME_H_ */ diff --git a/sys/dev/raidframe/rf_reconbuffer.c b/sys/dev/raidframe/rf_reconbuffer.c index d1ec64672d5..c99f18b40ad 100644 --- a/sys/dev/raidframe/rf_reconbuffer.c +++ b/sys/dev/raidframe/rf_reconbuffer.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_reconbuffer.c,v 1.3 2000/08/08 16:07:44 peter Exp $ */ +/* $OpenBSD: rf_reconbuffer.c,v 1.4 2002/12/16 07:01:05 tdeval Exp $ */ /* $NetBSD: rf_reconbuffer.c,v 1.4 2000/03/13 23:52:36 soren Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,11 +28,11 @@ * rights to redistribute these changes. */ -/*************************************************** +/***************************************************** * - * rf_reconbuffer.c -- reconstruction buffer manager + * rf_reconbuffer.c -- Reconstruction buffer manager. * - ***************************************************/ + *****************************************************/ #include "rf_raid.h" #include "rf_reconbuffer.h" @@ -43,36 +44,42 @@ #include "rf_reconutil.h" #include "rf_nwayxor.h" -#define Dprintf1(s,a) if (rf_reconbufferDebug) printf(s,a) -#define Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s,a,b) -#define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s,a,b,c) -#define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s,a,b,c,d) -#define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s,a,b,c,d,e) +#define Dprintf1(s,a) if (rf_reconbufferDebug) printf(s, a) +#define Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s, a, b) +#define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s, a, b, c) +#define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s, a, b, c, d) +#define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s, a, b, c, d, e) -/***************************************************************************************** +/***************************************************************************** * * Submit a reconstruction buffer to the manager for XOR. - * We can only submit a buffer if (1) we can xor into an existing buffer, which means - * we don't have to acquire a new one, (2) we can acquire a floating - * recon buffer, or (3) the caller has indicated that we are allowed to keep the - * submitted buffer. + * We can only submit a buffer if (1) we can xor into an existing buffer, + * which means we don't have to acquire a new one, (2) we can acquire a + * floating recon buffer, or (3) the caller has indicated that we are allowed + * to keep the submitted buffer. * * Returns non-zero if and only if we were not able to submit. - * In this case, we append the current disk ID to the wait list on the indicated - * RU, so that it will be re-enabled when we acquire a buffer for this RU. + * In this case, we append the current disk ID to the wait list on the + * indicated RU, so that it will be re-enabled when we acquire a buffer for + * this RU. * - ****************************************************************************************/ - -/* just to make the code below more readable */ -#define BUFWAIT_APPEND(_cb_, _pssPtr_, _row_, _col_) \ - _cb_ = rf_AllocCallbackDesc(); \ - (_cb_)->row = (_row_); (_cb_)->col = (_col_); (_cb_)->next = (_pssPtr_)->bufWaitList; (_pssPtr_)->bufWaitList = (_cb_); + *****************************************************************************/ + +/* Just to make the code below more readable. */ +#define BUFWAIT_APPEND(_cb_,_pssPtr_,_row_,_col_) \ +do { \ + _cb_ = rf_AllocCallbackDesc(); \ + (_cb_)->row = (_row_); \ + (_cb_)->col = (_col_); \ + (_cb_)->next = (_pssPtr_)->bufWaitList; \ + (_pssPtr_)->bufWaitList = (_cb_); \ +} while (0) /* - * nWayXorFuncs[i] is a pointer to a function that will xor "i" + * rf_nWayXorFuncs[i] is a pointer to a function that will xor "i" * bufs into the accumulating sum. */ -static RF_VoidFuncPtr nWayXorFuncs[] = { +static RF_VoidFuncPtr rf_nWayXorFuncs[] = { NULL, (RF_VoidFuncPtr) rf_nWayXor1, (RF_VoidFuncPtr) rf_nWayXor2, @@ -85,89 +92,122 @@ static RF_VoidFuncPtr nWayXorFuncs[] = { (RF_VoidFuncPtr) rf_nWayXor9 }; -int -rf_SubmitReconBuffer(rbuf, keep_it, use_committed) - RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */ - int keep_it; /* whether we can keep this buffer or we have - * to return it */ - int use_committed; /* whether to use a committed or an available - * recon buffer */ + +int +rf_SubmitReconBuffer( + RF_ReconBuffer_t *rbuf, /* The recon buffer to submit. */ + int keep_it, /* + * Whether we can keep this buffer or + * we have to return it. + */ + int use_committed /* + * Whether to use a committed or an + * available recon buffer. + */ +) { RF_LayoutSW_t *lp; - int rc; + int rc; lp = rbuf->raidPtr->Layout.map; rc = lp->SubmitReconBuffer(rbuf, keep_it, use_committed); return (rc); } -int -rf_SubmitReconBufferBasic(rbuf, keep_it, use_committed) - RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */ - int keep_it; /* whether we can keep this buffer or we have - * to return it */ - int use_committed; /* whether to use a committed or an available - * recon buffer */ +int +rf_SubmitReconBufferBasic( + RF_ReconBuffer_t *rbuf, /* The recon buffer to submit. */ + int keep_it, /* + * Whether we can keep this buffer + * or we have to return it. + */ + int use_committed /* + * Whether to use a committed or + * an available recon buffer. + */ +) { RF_Raid_t *raidPtr = rbuf->raidPtr; RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[rbuf->row]; RF_ReconParityStripeStatus_t *pssPtr; - RF_ReconBuffer_t *targetRbuf, *t = NULL; /* temporary rbuf - * pointers */ - caddr_t ta; /* temporary data buffer pointer */ + /* Temporary rbuf pointers. */ + RF_ReconBuffer_t *targetRbuf, *t = NULL; + /* Temporary data buffer pointer. */ + caddr_t ta; RF_CallbackDesc_t *cb, *p; - int retcode = 0, created = 0; + int retcode = 0, created = 0; RF_Etimer_t timer; - /* makes no sense to have a submission from the failed disk */ + /* Makes no sense to have a submission from the failed disk. */ RF_ASSERT(rbuf); RF_ASSERT(rbuf->col != reconCtrlPtr->fcol); - Dprintf5("RECON: submission by row %d col %d for psid %ld ru %d (failed offset %ld)\n", - rbuf->row, rbuf->col, (long) rbuf->parityStripeID, rbuf->which_ru, (long) rbuf->failedDiskSectorOffset); + Dprintf5("RECON: submission by row %d col %d for psid %ld ru %d" + " (failed offset %ld).\n", rbuf->row, rbuf->col, + (long) rbuf->parityStripeID, rbuf->which_ru, + (long) rbuf->failedDiskSectorOffset); RF_LOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); - pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created); - RF_ASSERT(pssPtr); /* if it didn't exist, we wouldn't have gotten - * an rbuf for it */ + pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, + rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created); + RF_ASSERT(pssPtr); /* + * If it didn't exist, we wouldn't have gotten + * an rbuf for it. + */ - /* check to see if enough buffers have accumulated to do an XOR. If - * so, there's no need to acquire a floating rbuf. Before we can do - * any XORing, we must have acquired a destination buffer. If we + /* + * Check to see if enough buffers have accumulated to do an XOR. If + * so, there's no need to acquire a floating rbuf. Before we can do + * any XORing, we must have acquired a destination buffer. If we * have, then we can go ahead and do the XOR if (1) including this * buffer, enough bufs have accumulated, or (2) this is the last * submission for this stripe. Otherwise, we have to go acquire a - * floating rbuf. */ + * floating rbuf. + */ targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; if ((targetRbuf != NULL) && - ((pssPtr->xorBufCount == rf_numBufsToAccumulate - 1) || (targetRbuf->count + pssPtr->xorBufCount + 1 == layoutPtr->numDataCol))) { - pssPtr->rbufsForXor[pssPtr->xorBufCount++] = rbuf; /* install this buffer */ - Dprintf3("RECON: row %d col %d invoking a %d-way XOR\n", rbuf->row, rbuf->col, pssPtr->xorBufCount); + ((pssPtr->xorBufCount == rf_numBufsToAccumulate - 1) || + (targetRbuf->count + pssPtr->xorBufCount + 1 == + layoutPtr->numDataCol))) { + /* Install this buffer. */ + pssPtr->rbufsForXor[pssPtr->xorBufCount++] = rbuf; + Dprintf3("RECON: row %d col %d invoking a %d-way XOR.\n", + rbuf->row, rbuf->col, pssPtr->xorBufCount); RF_ETIMER_START(timer); rf_MultiWayReconXor(raidPtr, pssPtr); RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); raidPtr->accumXorTimeUs += RF_ETIMER_VAL_US(timer); if (!keep_it) { - raidPtr->recon_tracerecs[rbuf->col].xor_us = RF_ETIMER_VAL_US(timer); - RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us += - RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - - rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]); + raidPtr->recon_tracerecs[rbuf->col].xor_us = + RF_ETIMER_VAL_US(timer); + RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col] + .recon_timer); + RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col] + .recon_timer); + raidPtr->recon_tracerecs[rbuf->col] + .specific.recon.recon_return_to_submit_us += + RF_ETIMER_VAL_US(raidPtr + ->recon_tracerecs[rbuf->col].recon_timer); + RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col] + .recon_timer); + + rf_LogTraceRec(raidPtr, + &raidPtr->recon_tracerecs[rbuf->col]); } - rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol); + rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, + layoutPtr->numDataCol); - /* if use_committed is on, we _must_ consume a buffer off the - * committed list. */ + /* + * If use_committed is on, we _must_ consume a buffer off the + * committed list. + */ if (use_committed) { t = reconCtrlPtr->committedRbufs; RF_ASSERT(t); @@ -175,19 +215,21 @@ rf_SubmitReconBufferBasic(rbuf, keep_it, use_committed) rf_ReleaseFloatingReconBuffer(raidPtr, rbuf->row, t); } if (keep_it) { - RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); + RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, + rbuf->parityStripeID); RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); rf_FreeReconBuffer(rbuf); return (retcode); } goto out; } - /* set the value of "t", which we'll use as the rbuf from here on */ + /* Set the value of "t", which we'll use as the rbuf from here on. */ if (keep_it) { t = rbuf; } else { - if (use_committed) { /* if a buffer has been committed to - * us, use it */ + if (use_committed) { + /* If a buffer has been committed to us, use it. */ + t = reconCtrlPtr->committedRbufs; RF_ASSERT(t); reconCtrlPtr->committedRbufs = t->next; @@ -200,21 +242,25 @@ rf_SubmitReconBufferBasic(rbuf, keep_it, use_committed) } } - /* If we weren't able to acquire a buffer, append to the end of the - * buf list in the recon ctrl struct. */ + /* + * If we weren't able to acquire a buffer, append to the end of the + * buf list in the recon ctrl struct. + */ if (!t) { RF_ASSERT(!keep_it && !use_committed); - Dprintf2("RECON: row %d col %d failed to acquire floating rbuf\n", rbuf->row, rbuf->col); + Dprintf2("RECON: row %d col %d failed to acquire floating" + " rbuf.\n", rbuf->row, rbuf->col); raidPtr->procsInBufWait++; - if ((raidPtr->procsInBufWait == raidPtr->numCol - 1) && (raidPtr->numFullReconBuffers == 0)) { - printf("Buffer wait deadlock detected. Exiting.\n"); + if ((raidPtr->procsInBufWait == raidPtr->numCol - 1) && + (raidPtr->numFullReconBuffers == 0)) { + printf("Buffer wait deadlock detected. Exiting.\n"); rf_PrintPSStatusTable(raidPtr, rbuf->row); RF_PANIC(); } pssPtr->flags |= RF_PSS_BUFFERWAIT; - cb = rf_AllocCallbackDesc(); /* append to buf wait list in - * recon ctrl structure */ + /* Append to buf wait list in recon ctrl structure. */ + cb = rf_AllocCallbackDesc(); cb->row = rbuf->row; cb->col = rbuf->col; cb->callbackArg.v = rbuf->parityStripeID; @@ -222,24 +268,29 @@ rf_SubmitReconBufferBasic(rbuf, keep_it, use_committed) cb->next = NULL; if (!reconCtrlPtr->bufferWaitList) reconCtrlPtr->bufferWaitList = cb; - else { /* might want to maintain head/tail pointers - * here rather than search for end of list */ - for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next); + else { + /* + * Might want to maintain head/tail pointers + * here rather than search for end of list. + */ + for (p = reconCtrlPtr->bufferWaitList; p->next; + p = p->next); p->next = cb; } retcode = 1; goto out; } - Dprintf2("RECON: row %d col %d acquired rbuf\n", rbuf->row, rbuf->col); + Dprintf2("RECON: row %d col %d acquired rbuf.\n", rbuf->row, rbuf->col); RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer); RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer); - raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us += - RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer); + raidPtr->recon_tracerecs[rbuf->col] + .specific.recon.recon_return_to_submit_us += + RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer); RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer); rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]); - /* initialize the buffer */ + /* Initialize the buffer. */ if (t != rbuf) { t->row = rbuf->row; t->col = reconCtrlPtr->fcol; @@ -252,19 +303,23 @@ rf_SubmitReconBufferBasic(rbuf, keep_it, use_committed) ta = t->buffer; t->buffer = rbuf->buffer; - rbuf->buffer = ta; /* swap buffers */ + rbuf->buffer = ta; /* Swap buffers. */ } - /* the first installation always gets installed as the destination - * buffer. subsequent installations get stacked up to allow for - * multi-way XOR */ + /* + * The first installation always gets installed as the destination + * buffer. Subsequent installations get stacked up to allow for + * multi-way XOR. + */ if (!pssPtr->rbuf) { pssPtr->rbuf = t; t->count = 1; } else - pssPtr->rbufsForXor[pssPtr->xorBufCount++] = t; /* install this buffer */ + /* Install this buffer. */ + pssPtr->rbufsForXor[pssPtr->xorBufCount++] = t; - rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol); /* the buffer is full if - * G=2 */ + /* The buffer is full if G=2. */ + rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, + layoutPtr->numDataCol); out: RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); @@ -272,36 +327,45 @@ out: return (retcode); } -int -rf_MultiWayReconXor(raidPtr, pssPtr) - RF_Raid_t *raidPtr; - RF_ReconParityStripeStatus_t *pssPtr; /* the pss descriptor for this - * parity stripe */ +int +rf_MultiWayReconXor( + RF_Raid_t *raidPtr, + RF_ReconParityStripeStatus_t *pssPtr /* + * The pss descriptor for this + * parity stripe. + */ +) { - int i, numBufs = pssPtr->xorBufCount; - int numBytes = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU); + int i, numBufs = pssPtr->xorBufCount; + int numBytes = rf_RaidAddressToByte(raidPtr, + raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU); RF_ReconBuffer_t **rbufs = (RF_ReconBuffer_t **) pssPtr->rbufsForXor; RF_ReconBuffer_t *targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; RF_ASSERT(pssPtr->rbuf != NULL); RF_ASSERT(numBufs > 0 && numBufs < RF_PS_MAX_BUFS); -#ifdef _KERNEL -#if !defined(__NetBSD__) && !defined(__OpenBSD__) - thread_block(); /* yield the processor before doing a big XOR */ +#ifdef _KERNEL +#if !defined(__NetBSD__) && !defined(__OpenBSD__) + /* Yield the processor before doing a big XOR. */ + thread_block(); #endif -#endif /* _KERNEL */ +#endif /* _KERNEL */ + /* + * XXX + * + * What if more than 9 bufs ? + */ + rf_nWayXorFuncs[numBufs] (pssPtr->rbufsForXor, targetRbuf, + numBytes / sizeof(long)); + /* - * XXX - * - * What if more than 9 bufs? - */ - nWayXorFuncs[numBufs] (pssPtr->rbufsForXor, targetRbuf, numBytes / sizeof(long)); - - /* release all the reconstruction buffers except the last one, which - * belongs to the disk whose submission caused this XOR to take place */ + * Release all the reconstruction buffers except the last one, which + * belongs to the disk whose submission caused this XOR to take place. + */ for (i = 0; i < numBufs - 1; i++) { if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING) - rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]->row, rbufs[i]); + rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]->row, + rbufs[i]); else if (rbufs[i]->type == RF_RBUF_TYPE_FORCED) rf_FreeReconBuffer(rbufs[i]); @@ -312,13 +376,15 @@ rf_MultiWayReconXor(raidPtr, pssPtr) pssPtr->xorBufCount = 0; return (0); } -/* removes one full buffer from one of the full-buffer lists and returns it. + + +/* + * Removes one full buffer from one of the full-buffer lists and returns it. * * ASSUMES THE RB_MUTEX IS UNLOCKED AT ENTRY. */ RF_ReconBuffer_t * -rf_GetFullReconBuffer(reconCtrlPtr) - RF_ReconCtrl_t *reconCtrlPtr; +rf_GetFullReconBuffer(RF_ReconCtrl_t *reconCtrlPtr) { RF_ReconBuffer_t *p; @@ -340,75 +406,84 @@ out: } -/* if the reconstruction buffer is full, move it to the full list, which is maintained - * sorted by failed disk sector offset +/* + * If the reconstruction buffer is full, move it to the full list, which + * is maintained sorted by failed disk sector offset. * * ASSUMES THE RB_MUTEX IS LOCKED AT ENTRY. */ -int -rf_CheckForFullRbuf(raidPtr, reconCtrl, pssPtr, numDataCol) - RF_Raid_t *raidPtr; - RF_ReconCtrl_t *reconCtrl; - RF_ReconParityStripeStatus_t *pssPtr; - int numDataCol; +int +rf_CheckForFullRbuf(RF_Raid_t *raidPtr, RF_ReconCtrl_t *reconCtrl, + RF_ReconParityStripeStatus_t *pssPtr, int numDataCol) { RF_ReconBuffer_t *p, *pt, *rbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; if (rbuf->count == numDataCol) { raidPtr->numFullReconBuffers++; - Dprintf2("RECON: rbuf for psid %ld ru %d has filled\n", + Dprintf2("RECON: rbuf for psid %ld ru %d has filled.\n", (long) rbuf->parityStripeID, rbuf->which_ru); - if (!reconCtrl->fullBufferList || (rbuf->failedDiskSectorOffset < reconCtrl->fullBufferList->failedDiskSectorOffset)) { - Dprintf2("RECON: rbuf for psid %ld ru %d is head of list\n", - (long) rbuf->parityStripeID, rbuf->which_ru); + if (!reconCtrl->fullBufferList || + (rbuf->failedDiskSectorOffset < + reconCtrl->fullBufferList->failedDiskSectorOffset)) { + Dprintf2("RECON: rbuf for psid %ld ru %d is head of" + " list.\n", (long) rbuf->parityStripeID, + rbuf->which_ru); rbuf->next = reconCtrl->fullBufferList; reconCtrl->fullBufferList = rbuf; } else { - for (pt = reconCtrl->fullBufferList, p = pt->next; p && p->failedDiskSectorOffset < rbuf->failedDiskSectorOffset; pt = p, p = p->next); + for (pt = reconCtrl->fullBufferList, p = pt->next; + p && p->failedDiskSectorOffset < + rbuf->failedDiskSectorOffset; + pt = p, p = p->next); rbuf->next = p; pt->next = rbuf; - Dprintf2("RECON: rbuf for psid %ld ru %d is in list\n", + Dprintf2("RECON: rbuf for psid %ld ru %d is in list.\n", (long) rbuf->parityStripeID, rbuf->which_ru); } #if 0 - pssPtr->writeRbuf = pssPtr->rbuf; /* DEBUG ONLY: we like + pssPtr->writeRbuf = pssPtr->rbuf; /* + * DEBUG ONLY: We like * to be able to find * this rbuf while it's - * awaiting write */ + * awaiting write. + */ #else rbuf->pssPtr = pssPtr; #endif pssPtr->rbuf = NULL; - rf_CauseReconEvent(raidPtr, rbuf->row, rbuf->col, NULL, RF_REVENT_BUFREADY); + rf_CauseReconEvent(raidPtr, rbuf->row, rbuf->col, NULL, + RF_REVENT_BUFREADY); } return (0); } -/* release a floating recon buffer for someone else to use. - * assumes the rb_mutex is LOCKED at entry +/* + * Release a floating recon buffer for someone else to use. + * Assumes the rb_mutex is LOCKED at entry. */ -void -rf_ReleaseFloatingReconBuffer(raidPtr, row, rbuf) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_ReconBuffer_t *rbuf; +void +rf_ReleaseFloatingReconBuffer(RF_Raid_t *raidPtr, RF_RowCol_t row, + RF_ReconBuffer_t *rbuf) { RF_ReconCtrl_t *rcPtr = raidPtr->reconControl[row]; RF_CallbackDesc_t *cb; - Dprintf2("RECON: releasing rbuf for psid %ld ru %d\n", + Dprintf2("RECON: releasing rbuf for psid %ld ru %d.\n", (long) rbuf->parityStripeID, rbuf->which_ru); - /* if anyone is waiting on buffers, wake one of them up. They will - * subsequently wake up anyone else waiting on their RU */ + /* + * If anyone is waiting on buffers, wake one of them up. They will + * subsequently wake up anyone else waiting on their RU. + */ if (rcPtr->bufferWaitList) { rbuf->next = rcPtr->committedRbufs; rcPtr->committedRbufs = rbuf; cb = rcPtr->bufferWaitList; rcPtr->bufferWaitList = cb->next; - rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 1, RF_REVENT_BUFCLEAR); /* arg==1 => we've - * committed a buffer */ + /* arg==1 => We've committed a buffer. */ + rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 1, + RF_REVENT_BUFCLEAR); rf_FreeCallbackDesc(cb); raidPtr->procsInBufWait--; } else { @@ -416,52 +491,63 @@ rf_ReleaseFloatingReconBuffer(raidPtr, row, rbuf) rcPtr->floatingRbufs = rbuf; } } -/* release any disk that is waiting on a buffer for the indicated RU. - * assumes the rb_mutex is LOCKED at entry + + +/* + * Release any disk that is waiting on a buffer for the indicated RU. + * Assumes the rb_mutex is LOCKED at entry. */ -void -rf_ReleaseBufferWaiters(raidPtr, pssPtr) - RF_Raid_t *raidPtr; - RF_ReconParityStripeStatus_t *pssPtr; +void +rf_ReleaseBufferWaiters( + RF_Raid_t *raidPtr, + RF_ReconParityStripeStatus_t *pssPtr +) { RF_CallbackDesc_t *cb1, *cb = pssPtr->bufWaitList; - Dprintf2("RECON: releasing buf waiters for psid %ld ru %d\n", + Dprintf2("RECON: releasing buf waiters for psid %ld ru %d.\n", (long) pssPtr->parityStripeID, pssPtr->which_ru); pssPtr->flags &= ~RF_PSS_BUFFERWAIT; while (cb) { cb1 = cb->next; cb->next = NULL; - rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFCLEAR); /* arg==0 => we haven't - * committed a buffer */ + /* arg==0 => We haven't committed a buffer. */ + rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 0, + RF_REVENT_BUFCLEAR); rf_FreeCallbackDesc(cb); cb = cb1; } pssPtr->bufWaitList = NULL; } -/* when reconstruction is forced on an RU, there may be some disks waiting to - * acquire a buffer for that RU. Since we allocate a new buffer as part of + + +/* + * When reconstruction is forced on an RU, there may be some disks waiting to + * acquire a buffer for that RU. Since we allocate a new buffer as part of * the forced-reconstruction process, we no longer have to wait for any - * buffers, so we wakeup any waiter that we find in the bufferWaitList + * buffers, so we wakeup any waiter that we find in the bufferWaitList. * - * assumes the rb_mutex is LOCKED at entry + * Assumes the rb_mutex is LOCKED at entry. */ -void -rf_ReleaseBufferWaiter(rcPtr, rbuf) - RF_ReconCtrl_t *rcPtr; - RF_ReconBuffer_t *rbuf; +void +rf_ReleaseBufferWaiter(RF_ReconCtrl_t *rcPtr, RF_ReconBuffer_t *rbuf) { RF_CallbackDesc_t *cb, *cbt; - for (cbt = NULL, cb = rcPtr->bufferWaitList; cb; cbt = cb, cb = cb->next) { - if ((cb->callbackArg.v == rbuf->parityStripeID) && (cb->callbackArg2.v == rbuf->which_ru)) { - Dprintf2("RECON: Dropping row %d col %d from buffer wait list\n", cb->row, cb->col); + for (cbt = NULL, cb = rcPtr->bufferWaitList; cb; + cbt = cb, cb = cb->next) { + if ((cb->callbackArg.v == rbuf->parityStripeID) && + (cb->callbackArg2.v == rbuf->which_ru)) { + Dprintf2("RECON: Dropping row %d col %d from buffer" + " wait list.\n", cb->row, cb->col); if (cbt) cbt->next = cb->next; else rcPtr->bufferWaitList = cb->next; - rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, cb->row, cb->col, (void *) 0, RF_REVENT_BUFREADY); /* arg==0 => no - * committed buffer */ + + /* arg==0 => No committed buffer. */ + rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, + cb->row, cb->col, (void *) 0, RF_REVENT_BUFREADY); rf_FreeCallbackDesc(cb); return; } diff --git a/sys/dev/raidframe/rf_reconbuffer.h b/sys/dev/raidframe/rf_reconbuffer.h index 84921fc5393..5a51b01ae6b 100644 --- a/sys/dev/raidframe/rf_reconbuffer.h +++ b/sys/dev/raidframe/rf_reconbuffer.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_reconbuffer.h,v 1.2 1999/02/16 00:03:21 niklas Exp $ */ +/* $OpenBSD: rf_reconbuffer.h,v 1.3 2002/12/16 07:01:05 tdeval Exp $ */ /* $NetBSD: rf_reconbuffer.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,37 +28,27 @@ * rights to redistribute these changes. */ -/******************************************************************* +/********************************************************************* * - * rf_reconbuffer.h -- header file for reconstruction buffer manager + * rf_reconbuffer.h -- Header file for reconstruction buffer manager. * - *******************************************************************/ + *********************************************************************/ -#ifndef _RF__RF_RECONBUFFER_H_ -#define _RF__RF_RECONBUFFER_H_ +#ifndef _RF__RF_RECONBUFFER_H_ +#define _RF__RF_RECONBUFFER_H_ #include "rf_types.h" #include "rf_reconstruct.h" -int -rf_SubmitReconBuffer(RF_ReconBuffer_t * rbuf, int keep_int, - int use_committed); -int -rf_SubmitReconBufferBasic(RF_ReconBuffer_t * rbuf, int keep_int, - int use_committed); -int -rf_MultiWayReconXor(RF_Raid_t * raidPtr, - RF_ReconParityStripeStatus_t * pssPtr); -RF_ReconBuffer_t *rf_GetFullReconBuffer(RF_ReconCtrl_t * reconCtrlPtr); -int -rf_CheckForFullRbuf(RF_Raid_t * raidPtr, RF_ReconCtrl_t * reconCtrl, - RF_ReconParityStripeStatus_t * pssPtr, int numDataCol); -void -rf_ReleaseFloatingReconBuffer(RF_Raid_t * raidPtr, RF_RowCol_t row, - RF_ReconBuffer_t * rbuf); -void -rf_ReleaseBufferWaiters(RF_Raid_t * raidPtr, - RF_ReconParityStripeStatus_t * pssPtr); -void rf_ReleaseBufferWaiter(RF_ReconCtrl_t * rcPtr, RF_ReconBuffer_t * rbuf); +int rf_SubmitReconBuffer(RF_ReconBuffer_t *, int, int); +int rf_SubmitReconBufferBasic(RF_ReconBuffer_t *, int, int); +int rf_MultiWayReconXor(RF_Raid_t *, RF_ReconParityStripeStatus_t *); +RF_ReconBuffer_t *rf_GetFullReconBuffer(RF_ReconCtrl_t *); +int rf_CheckForFullRbuf(RF_Raid_t *, RF_ReconCtrl_t *, + RF_ReconParityStripeStatus_t *, int); +void rf_ReleaseFloatingReconBuffer(RF_Raid_t *, RF_RowCol_t, + RF_ReconBuffer_t *); +void rf_ReleaseBufferWaiters(RF_Raid_t *, RF_ReconParityStripeStatus_t *); +void rf_ReleaseBufferWaiter(RF_ReconCtrl_t *, RF_ReconBuffer_t *); -#endif /* !_RF__RF_RECONBUFFER_H_ */ +#endif /* !_RF__RF_RECONBUFFER_H_ */ diff --git a/sys/dev/raidframe/rf_reconmap.c b/sys/dev/raidframe/rf_reconmap.c index 1d614c02f0a..3288c501ff7 100644 --- a/sys/dev/raidframe/rf_reconmap.c +++ b/sys/dev/raidframe/rf_reconmap.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_reconmap.c,v 1.3 2000/01/07 14:50:22 peter Exp $ */ +/* $OpenBSD: rf_reconmap.c,v 1.4 2002/12/16 07:01:05 tdeval Exp $ */ /* $NetBSD: rf_reconmap.c,v 1.6 1999/08/14 21:44:24 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,64 +28,67 @@ * rights to redistribute these changes. */ -/************************************************************************* +/***************************************************************************** * rf_reconmap.c * - * code to maintain a map of what sectors have/have not been reconstructed + * Code to maintain a map of what sectors have/have not been reconstructed. * - *************************************************************************/ + *****************************************************************************/ #include "rf_raid.h" #include <sys/time.h> #include "rf_general.h" #include "rf_utils.h" -/* special pointer values indicating that a reconstruction unit - * has been either totally reconstructed or not at all. Both +/* + * Special pointer values indicating that a reconstruction unit + * has been either totally reconstructed or not at all. Both * are illegal pointer values, so you have to be careful not to - * dereference through them. RU_NOTHING must be zero, since - * MakeReconMap uses bzero to initialize the structure. These are used + * dereference through them. RU_NOTHING must be zero, since + * MakeReconMap uses bzero to initialize the structure. These are used * only at the head of the list. */ -#define RU_ALL ((RF_ReconMapListElem_t *) -1) -#define RU_NOTHING ((RF_ReconMapListElem_t *) 0) - -/* used to mark the end of the list */ -#define RU_NIL ((RF_ReconMapListElem_t *) 0) - - -static void -compact_stat_entry(RF_Raid_t * raidPtr, RF_ReconMap_t * mapPtr, - int i); -static void crunch_list(RF_ReconMap_t * mapPtr, RF_ReconMapListElem_t * listPtr); -static RF_ReconMapListElem_t * -MakeReconMapListElem(RF_SectorNum_t startSector, - RF_SectorNum_t stopSector, RF_ReconMapListElem_t * next); -static void -FreeReconMapListElem(RF_ReconMap_t * mapPtr, - RF_ReconMapListElem_t * p); -static void update_size(RF_ReconMap_t * mapPtr, int size); -static void PrintList(RF_ReconMapListElem_t * listPtr); - -/*----------------------------------------------------------------------------- +#define RU_ALL ((RF_ReconMapListElem_t *) -1) +#define RU_NOTHING ((RF_ReconMapListElem_t *) 0) + +/* Used to mark the end of the list. */ +#define RU_NIL ((RF_ReconMapListElem_t *) 0) + + +void rf_compact_stat_entry(RF_Raid_t *, RF_ReconMap_t *, int); +void rf_crunch_list(RF_ReconMap_t *, RF_ReconMapListElem_t *); +RF_ReconMapListElem_t * rf_MakeReconMapListElem(RF_SectorNum_t, RF_SectorNum_t, + RF_ReconMapListElem_t *); +void rf_FreeReconMapListElem(RF_ReconMap_t *, RF_ReconMapListElem_t *); +void rf_update_size(RF_ReconMap_t *, int); +void rf_PrintList(RF_ReconMapListElem_t *); + + +/***************************************************************************** * - * Creates and initializes new Reconstruction map + * Creates and initializes new Reconstruction map. * - *-----------------------------------------------------------------------------*/ + *****************************************************************************/ RF_ReconMap_t * -rf_MakeReconMap(raidPtr, ru_sectors, disk_sectors, spareUnitsPerDisk) - RF_Raid_t *raidPtr; - RF_SectorCount_t ru_sectors; /* size of reconstruction unit in - * sectors */ - RF_SectorCount_t disk_sectors; /* size of disk in sectors */ - RF_ReconUnitCount_t spareUnitsPerDisk; /* zero unless distributed - * sparing */ +rf_MakeReconMap( + RF_Raid_t *raidPtr, + RF_SectorCount_t ru_sectors, /* + * Size of reconstruction unit + * in sectors. + */ + RF_SectorCount_t disk_sectors, /* Size of disk in sectors. */ + RF_ReconUnitCount_t spareUnitsPerDisk /* + * Zero unless distributed + * sparing. + */ +) { RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_ReconUnitCount_t num_rus = layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerRU; + RF_ReconUnitCount_t num_rus = layoutPtr->stripeUnitsPerDisk / + layoutPtr->SUsPerRU; RF_ReconMap_t *p; - int rc; + int rc; RF_Malloc(p, sizeof(RF_ReconMap_t), (RF_ReconMap_t *)); p->sectorsPerReconUnit = ru_sectors; @@ -94,18 +98,21 @@ rf_MakeReconMap(raidPtr, ru_sectors, disk_sectors, spareUnitsPerDisk) p->spareRUs = spareUnitsPerDisk; p->unitsLeft = num_rus - spareUnitsPerDisk; - RF_Malloc(p->status, num_rus * sizeof(RF_ReconMapListElem_t *), (RF_ReconMapListElem_t **)); + RF_Malloc(p->status, num_rus * sizeof(RF_ReconMapListElem_t *), + (RF_ReconMapListElem_t **)); RF_ASSERT(p->status != (RF_ReconMapListElem_t **) NULL); - (void) bzero((char *) p->status, num_rus * sizeof(RF_ReconMapListElem_t *)); + (void) bzero((char *) p->status, num_rus * + sizeof(RF_ReconMapListElem_t *)); - p->size = sizeof(RF_ReconMap_t) + num_rus * sizeof(RF_ReconMapListElem_t *); + p->size = sizeof(RF_ReconMap_t) + num_rus * + sizeof(RF_ReconMapListElem_t *); p->maxSize = p->size; rc = rf_mutex_init(&p->mutex); if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d.\n", + __FILE__, __LINE__, rc); RF_Free(p->status, num_rus * sizeof(RF_ReconMapListElem_t *)); RF_Free(p, sizeof(RF_ReconMap_t)); return (NULL); @@ -114,33 +121,32 @@ rf_MakeReconMap(raidPtr, ru_sectors, disk_sectors, spareUnitsPerDisk) } -/*----------------------------------------------------------------------------- +/***************************************************************************** * - * marks a new set of sectors as reconstructed. All the possible mergings get - * complicated. To simplify matters, the approach I take is to just dump + * Marks a new set of sectors as reconstructed. All the possible mergings get + * complicated. To simplify matters, the approach I take is to just dump * something into the list, and then clean it up (i.e. merge elements and - * eliminate redundant ones) in a second pass over the list (compact_stat_entry()). + * eliminate redundant ones) in a second pass over the list + * (rf_compact_stat_entry()). * Not 100% efficient, since a structure can be allocated and then immediately * freed, but it keeps this code from becoming (more of) a nightmare of - * special cases. The only thing that compact_stat_entry() assumes is that the - * list is sorted by startSector, and so this is the only condition I maintain - * here. (MCH) + * special cases. The only thing that rf_compact_stat_entry() assumes is that + * the list is sorted by startSector, and so this is the only condition I + * maintain here. (MCH) * - *-----------------------------------------------------------------------------*/ - -void -rf_ReconMapUpdate(raidPtr, mapPtr, startSector, stopSector) - RF_Raid_t *raidPtr; - RF_ReconMap_t *mapPtr; - RF_SectorNum_t startSector; - RF_SectorNum_t stopSector; + *****************************************************************************/ + +void +rf_ReconMapUpdate(RF_Raid_t *raidPtr, RF_ReconMap_t *mapPtr, + RF_SectorNum_t startSector, RF_SectorNum_t stopSector) { RF_SectorCount_t sectorsPerReconUnit = mapPtr->sectorsPerReconUnit; RF_SectorNum_t i, first_in_RU, last_in_RU; RF_ReconMapListElem_t *p, *pt; RF_LOCK_MUTEX(mapPtr->mutex); - RF_ASSERT(startSector >= 0 && stopSector < mapPtr->sectorsInDisk && stopSector >= startSector); + RF_ASSERT(startSector >= 0 && stopSector < mapPtr->sectorsInDisk && + stopSector >= startSector); while (startSector <= stopSector) { i = startSector / mapPtr->sectorsPerReconUnit; @@ -148,21 +154,27 @@ rf_ReconMapUpdate(raidPtr, mapPtr, startSector, stopSector) last_in_RU = first_in_RU + sectorsPerReconUnit - 1; p = mapPtr->status[i]; if (p != RU_ALL) { - if (p == RU_NOTHING || p->startSector > startSector) { /* insert at front of - * list */ - - mapPtr->status[i] = MakeReconMapListElem(startSector, RF_MIN(stopSector, last_in_RU), (p == RU_NOTHING) ? NULL : p); - update_size(mapPtr, sizeof(RF_ReconMapListElem_t)); - - } else {/* general case */ - do { /* search for place to insert */ + if (p == RU_NOTHING || p->startSector > startSector) { + /* Insert at front of list. */ + + mapPtr->status[i] = + rf_MakeReconMapListElem(startSector, + RF_MIN(stopSector, last_in_RU), + (p == RU_NOTHING) ? NULL : p); + rf_update_size(mapPtr, + sizeof(RF_ReconMapListElem_t)); + + } else {/* General case. */ + do { /* Search for place to insert. */ pt = p; p = p->next; } while (p && (p->startSector < startSector)); - pt->next = MakeReconMapListElem(startSector, RF_MIN(stopSector, last_in_RU), p); - update_size(mapPtr, sizeof(RF_ReconMapListElem_t)); + pt->next = rf_MakeReconMapListElem(startSector, + RF_MIN(stopSector, last_in_RU), p); + rf_update_size(mapPtr, + sizeof(RF_ReconMapListElem_t)); } - compact_stat_entry(raidPtr, mapPtr, i); + rf_compact_stat_entry(raidPtr, mapPtr, i); } startSector = RF_MIN(stopSector, last_in_RU) + 1; } @@ -170,46 +182,41 @@ rf_ReconMapUpdate(raidPtr, mapPtr, startSector, stopSector) } - -/*----------------------------------------------------------------------------- +/***************************************************************************** * - * performs whatever list compactions can be done, and frees any space - * that is no longer necessary. Assumes only that the list is sorted - * by startSector. crunch_list() compacts a single list as much as possible, + * Performs whatever list compactions can be done, and frees any space + * that is no longer necessary. Assumes only that the list is sorted + * by startSector. rf_crunch_list() compacts a single list as much as possible, * and the second block of code deletes the entire list if possible. - * crunch_list() is also called from MakeReconMapAccessList(). + * rf_crunch_list() is also called from MakeReconMapAccessList(). * * When a recon unit is detected to be fully reconstructed, we set the * corresponding bit in the parity stripe map so that the head follow - * code will not select this parity stripe again. This is redundant (but - * harmless) when compact_stat_entry is called from the reconstruction code, - * but necessary when called from the user-write code. + * code will not select this parity stripe again. This is redundant (but + * harmless) when rf_compact_stat_entry is called from the reconstruction + * code, but necessary when called from the user-write code. * - *-----------------------------------------------------------------------------*/ + *****************************************************************************/ -static void -compact_stat_entry(raidPtr, mapPtr, i) - RF_Raid_t *raidPtr; - RF_ReconMap_t *mapPtr; - int i; +void +rf_compact_stat_entry(RF_Raid_t *raidPtr, RF_ReconMap_t *mapPtr, int i) { RF_SectorCount_t sectorsPerReconUnit = mapPtr->sectorsPerReconUnit; RF_ReconMapListElem_t *p = mapPtr->status[i]; - crunch_list(mapPtr, p); + rf_crunch_list(mapPtr, p); if ((p->startSector == i * sectorsPerReconUnit) && - (p->stopSector == i * sectorsPerReconUnit + sectorsPerReconUnit - 1)) { + (p->stopSector == i * sectorsPerReconUnit + + sectorsPerReconUnit - 1)) { mapPtr->status[i] = RU_ALL; mapPtr->unitsLeft--; - FreeReconMapListElem(mapPtr, p); + rf_FreeReconMapListElem(mapPtr, p); } } -static void -crunch_list(mapPtr, listPtr) - RF_ReconMap_t *mapPtr; - RF_ReconMapListElem_t *listPtr; +void +rf_crunch_list(RF_ReconMap_t *mapPtr, RF_ReconMapListElem_t *listPtr) { RF_ReconMapListElem_t *pt, *p = listPtr; @@ -221,7 +228,7 @@ crunch_list(mapPtr, listPtr) if (pt->stopSector >= p->startSector - 1) { pt->stopSector = RF_MAX(pt->stopSector, p->stopSector); pt->next = p->next; - FreeReconMapListElem(mapPtr, p); + rf_FreeReconMapListElem(mapPtr, p); p = pt->next; } else { pt = p; @@ -229,17 +236,17 @@ crunch_list(mapPtr, listPtr) } } } -/*----------------------------------------------------------------------------- + + +/***************************************************************************** * - * Allocate and fill a new list element + * Allocate and fill a new list element. * - *-----------------------------------------------------------------------------*/ + *****************************************************************************/ -static RF_ReconMapListElem_t * -MakeReconMapListElem( - RF_SectorNum_t startSector, - RF_SectorNum_t stopSector, - RF_ReconMapListElem_t * next) +RF_ReconMapListElem_t * +rf_MakeReconMapListElem(RF_SectorNum_t startSector, RF_SectorNum_t stopSector, + RF_ReconMapListElem_t *next) { RF_ReconMapListElem_t *p; @@ -251,33 +258,36 @@ MakeReconMapListElem( p->next = next; return (p); } -/*----------------------------------------------------------------------------- + + +/***************************************************************************** * - * Free a list element + * Free a list element. * - *-----------------------------------------------------------------------------*/ + *****************************************************************************/ -static void -FreeReconMapListElem(mapPtr, p) - RF_ReconMap_t *mapPtr; - RF_ReconMapListElem_t *p; +void +rf_FreeReconMapListElem(RF_ReconMap_t *mapPtr, RF_ReconMapListElem_t *p) { - int delta; + int delta; if (mapPtr) { delta = 0 - (int) sizeof(RF_ReconMapListElem_t); - update_size(mapPtr, delta); + rf_update_size(mapPtr, delta); } RF_Free(p, sizeof(*p)); } -/*----------------------------------------------------------------------------- + + +/***************************************************************************** * - * Free an entire status structure. Inefficient, but can be called at any time. + * Free an entire status structure. Inefficient, but can be called at any + * time. * - *-----------------------------------------------------------------------------*/ -void -rf_FreeReconMap(mapPtr) - RF_ReconMap_t *mapPtr; + *****************************************************************************/ + +void +rf_FreeReconMap(RF_ReconMap_t *mapPtr) { RF_ReconMapListElem_t *p, *q; RF_ReconUnitCount_t numRUs; @@ -296,21 +306,22 @@ rf_FreeReconMap(mapPtr) } } rf_mutex_destroy(&mapPtr->mutex); - RF_Free(mapPtr->status, mapPtr->totalRUs * sizeof(RF_ReconMapListElem_t *)); + RF_Free(mapPtr->status, mapPtr->totalRUs * + sizeof(RF_ReconMapListElem_t *)); RF_Free(mapPtr, sizeof(RF_ReconMap_t)); } -/*----------------------------------------------------------------------------- + + +/***************************************************************************** * - * returns nonzero if the indicated RU has been reconstructed already + * Returns nonzero if the indicated RU has been reconstructed already. * - *---------------------------------------------------------------------------*/ + *****************************************************************************/ -int -rf_CheckRUReconstructed(mapPtr, startSector) - RF_ReconMap_t *mapPtr; - RF_SectorNum_t startSector; +int +rf_CheckRUReconstructed(RF_ReconMap_t *mapPtr, RF_SectorNum_t startSector) { - RF_ReconMapListElem_t *l; /* used for searching */ + RF_ReconMapListElem_t *l; /* Used for searching. */ RF_ReconUnitNum_t i; i = startSector / mapPtr->sectorsPerReconUnit; @@ -318,40 +329,35 @@ rf_CheckRUReconstructed(mapPtr, startSector) return ((l == RU_ALL) ? 1 : 0); } -RF_ReconUnitCount_t -rf_UnitsLeftToReconstruct(mapPtr) - RF_ReconMap_t *mapPtr; +RF_ReconUnitCount_t +rf_UnitsLeftToReconstruct(RF_ReconMap_t *mapPtr) { RF_ASSERT(mapPtr != NULL); return (mapPtr->unitsLeft); } -/* updates the size fields of a status descriptor */ -static void -update_size(mapPtr, size) - RF_ReconMap_t *mapPtr; - int size; + +/* Updates the size fields of a status descriptor. */ +void +rf_update_size(RF_ReconMap_t *mapPtr, int size) { mapPtr->size += size; mapPtr->maxSize = RF_MAX(mapPtr->size, mapPtr->maxSize); } -static void -PrintList(listPtr) - RF_ReconMapListElem_t *listPtr; +void +rf_PrintList(RF_ReconMapListElem_t *listPtr) { while (listPtr) { - printf("%d,%d -> ", (int) listPtr->startSector, (int) listPtr->stopSector); + printf("%d,%d -> ", (int) listPtr->startSector, + (int) listPtr->stopSector); listPtr = listPtr->next; } printf("\n"); } -void -rf_PrintReconMap(raidPtr, mapPtr, frow, fcol) - RF_Raid_t *raidPtr; - RF_ReconMap_t *mapPtr; - RF_RowCol_t frow; - RF_RowCol_t fcol; +void +rf_PrintReconMap(RF_Raid_t *raidPtr, RF_ReconMap_t *mapPtr, RF_RowCol_t frow, + RF_RowCol_t fcol) { RF_ReconUnitCount_t numRUs; RF_ReconMapListElem_t *p; @@ -363,32 +369,32 @@ rf_PrintReconMap(raidPtr, mapPtr, frow, fcol) for (i = 0; i < numRUs; i++) { p = mapPtr->status[i]; - if (p == RU_ALL)/* printf("[%d] ALL\n",i) */ - ; + if (p == RU_ALL) + /* printf("[%d] ALL.\n", i) */; else if (p == RU_NOTHING) { - printf("%d: Unreconstructed\n", i); + printf("%d: Unreconstructed.\n", i); } else { printf("%d: ", i); - PrintList(p); + rf_PrintList(p); } } } -void -rf_PrintReconSchedule(mapPtr, starttime) - RF_ReconMap_t *mapPtr; - struct timeval *starttime; +void +rf_PrintReconSchedule(RF_ReconMap_t *mapPtr, struct timeval *starttime) { static int old_pctg = -1; struct timeval tv, diff; - int new_pctg; + int new_pctg; - new_pctg = 100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 / mapPtr->totalRUs); + new_pctg = 100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 / + mapPtr->totalRUs); if (new_pctg != old_pctg) { RF_GETTIME(tv); RF_TIMEVAL_DIFF(starttime, &tv, &diff); - printf("%d %d.%06d\n", (int) new_pctg, (int) diff.tv_sec, (int) diff.tv_usec); + printf("%d %d.%06d\n", (int) new_pctg, (int) diff.tv_sec, + (int) diff.tv_usec); old_pctg = new_pctg; } } diff --git a/sys/dev/raidframe/rf_reconmap.h b/sys/dev/raidframe/rf_reconmap.h index 476acab9cb2..9910e4cc94e 100644 --- a/sys/dev/raidframe/rf_reconmap.h +++ b/sys/dev/raidframe/rf_reconmap.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_reconmap.h,v 1.2 1999/02/16 00:03:21 niklas Exp $ */ +/* $OpenBSD: rf_reconmap.h,v 1.3 2002/12/16 07:01:05 tdeval Exp $ */ /* $NetBSD: rf_reconmap.h,v 1.3 1999/02/05 00:06:16 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -28,59 +29,73 @@ */ /****************************************************************************** - * rf_reconMap.h -- Header file describing reconstruction status data structure + * rf_reconMap.h + * + * -- Header file describing reconstruction status data structure. ******************************************************************************/ -#ifndef _RF__RF_RECONMAP_H_ -#define _RF__RF_RECONMAP_H_ +#ifndef _RF__RF_RECONMAP_H_ +#define _RF__RF_RECONMAP_H_ #include "rf_types.h" #include "rf_threadstuff.h" /* - * Main reconstruction status descriptor. size and maxsize are used for - * monitoring only: they have no function for reconstruction. + * Main reconstruction status descriptor; size and maxsize are used for + * monitoring only: they have no function for reconstruction. */ struct RF_ReconMap_s { - RF_SectorCount_t sectorsPerReconUnit; /* sectors per reconstruct - * unit */ - RF_SectorCount_t sectorsInDisk; /* total sectors in disk */ - RF_SectorCount_t unitsLeft; /* recon units left to recon */ - RF_ReconUnitCount_t totalRUs; /* total recon units on disk */ - RF_ReconUnitCount_t spareRUs; /* total number of spare RUs on failed - * disk */ - RF_StripeCount_t totalParityStripes; /* total number of parity - * stripes in array */ - u_int size; /* overall size of this structure */ - u_int maxSize; /* maximum size so far */ - RF_ReconMapListElem_t **status; /* array of ptrs to list elements */ - RF_DECLARE_MUTEX(mutex) + RF_SectorCount_t sectorsPerReconUnit; + /* + * Sectors per reconstruct + * unit. + */ + RF_SectorCount_t sectorsInDisk;/* Total sectors in disk. */ + RF_SectorCount_t unitsLeft; /* Recon units left to recon. */ + RF_ReconUnitCount_t totalRUs; /* Total recon units on disk. */ + RF_ReconUnitCount_t spareRUs; /* + * Total number of spare RUs on + * failed disk. + */ + RF_StripeCount_t totalParityStripes; + /* + * Total number of parity + * stripes in array. + */ + u_int size; /* + * Overall size of this + * structure. + */ + u_int maxSize; /* Maximum size so far. */ + RF_ReconMapListElem_t **status; /* + * Array of ptrs to list + * elements. + */ + RF_DECLARE_MUTEX (mutex); }; -/* a list element */ + +/* A list element. */ struct RF_ReconMapListElem_s { - RF_SectorNum_t startSector; /* bounding sect nums on this block */ - RF_SectorNum_t stopSector; - RF_ReconMapListElem_t *next; /* next element in list */ + /* Bounding sect nums on this block. */ + RF_SectorNum_t startSector; + RF_SectorNum_t stopSector; + RF_ReconMapListElem_t *next; /* Next element in list. */ }; -RF_ReconMap_t * -rf_MakeReconMap(RF_Raid_t * raidPtr, RF_SectorCount_t ru_sectors, - RF_SectorCount_t disk_sectors, RF_ReconUnitCount_t spareUnitsPerDisk); +RF_ReconMap_t *rf_MakeReconMap(RF_Raid_t *, + RF_SectorCount_t, RF_SectorCount_t, RF_ReconUnitCount_t); -void -rf_ReconMapUpdate(RF_Raid_t * raidPtr, RF_ReconMap_t * mapPtr, - RF_SectorNum_t startSector, RF_SectorNum_t stopSector); +void rf_ReconMapUpdate(RF_Raid_t *, RF_ReconMap_t *, + RF_SectorNum_t, RF_SectorNum_t); -void rf_FreeReconMap(RF_ReconMap_t * mapPtr); +void rf_FreeReconMap(RF_ReconMap_t *); -int rf_CheckRUReconstructed(RF_ReconMap_t * mapPtr, RF_SectorNum_t startSector); +int rf_CheckRUReconstructed(RF_ReconMap_t *, RF_SectorNum_t); -RF_ReconUnitCount_t rf_UnitsLeftToReconstruct(RF_ReconMap_t * mapPtr); +RF_ReconUnitCount_t rf_UnitsLeftToReconstruct(RF_ReconMap_t *); -void -rf_PrintReconMap(RF_Raid_t * raidPtr, RF_ReconMap_t * mapPtr, - RF_RowCol_t frow, RF_RowCol_t fcol); +void rf_PrintReconMap(RF_Raid_t *, RF_ReconMap_t *, RF_RowCol_t, RF_RowCol_t); -void rf_PrintReconSchedule(RF_ReconMap_t * mapPtr, struct timeval * starttime); +void rf_PrintReconSchedule(RF_ReconMap_t *, struct timeval *); -#endif /* !_RF__RF_RECONMAP_H_ */ +#endif /* !_RF__RF_RECONMAP_H_ */ diff --git a/sys/dev/raidframe/rf_reconstruct.c b/sys/dev/raidframe/rf_reconstruct.c index 90a6435e1df..6d9ac0a05ca 100644 --- a/sys/dev/raidframe/rf_reconstruct.c +++ b/sys/dev/raidframe/rf_reconstruct.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_reconstruct.c,v 1.12 2002/08/09 15:10:20 tdeval Exp $ */ +/* $OpenBSD: rf_reconstruct.c,v 1.13 2002/12/16 07:01:05 tdeval Exp $ */ /* $NetBSD: rf_reconstruct.c,v 1.26 2000/06/04 02:05:13 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,11 +28,11 @@ * rights to redistribute these changes. */ -/************************************************************ +/************************************************************** * - * rf_reconstruct.c -- code to perform on-line reconstruction + * rf_reconstruct.c -- Code to perform on-line reconstruction. * - ************************************************************/ + **************************************************************/ #include "rf_types.h" #include <sys/time.h> @@ -44,7 +45,7 @@ #include <sys/proc.h> #include <sys/ioctl.h> #include <sys/fcntl.h> -#if __NETBSD__ +#if __NETBSD__ #include <sys/vnode.h> #endif @@ -65,57 +66,148 @@ #include "rf_kintf.h" -/* setting these to -1 causes them to be set to their default values if not set by debug options */ - -#define Dprintf(s) if (rf_reconDebug) rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf1(s,a) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf2(s,a,b) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf3(s,a,b,c) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL) -#define Dprintf4(s,a,b,c,d) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),NULL,NULL,NULL,NULL) -#define Dprintf5(s,a,b,c,d,e) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),NULL,NULL,NULL) -#define Dprintf6(s,a,b,c,d,e,f) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),NULL,NULL) -#define Dprintf7(s,a,b,c,d,e,f,g) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),(void *)((unsigned long)g),NULL) +/* + * Setting these to -1 causes them to be set to their default values if not set + * by debug options. + */ -#define DDprintf1(s,a) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL) -#define DDprintf2(s,a,b) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL) +#define Dprintf(s) \ +do { \ + if (rf_reconDebug) \ + rf_debug_printf(s, \ + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); \ +} while (0) +#define Dprintf1(s,a) \ +do { \ + if (rf_reconDebug) \ + rf_debug_printf(s, \ + (void *)((unsigned long)a), \ + NULL, NULL, NULL, NULL, NULL, NULL, NULL); \ +} while (0) +#define Dprintf2(s,a,b) \ +do { \ + if (rf_reconDebug) \ + rf_debug_printf(s, \ + (void *)((unsigned long)a), \ + (void *)((unsigned long)b), \ + NULL, NULL, NULL, NULL, NULL, NULL); \ +} while (0) +#define Dprintf3(s,a,b,c) \ +do { \ + if (rf_reconDebug) \ + rf_debug_printf(s, \ + (void *)((unsigned long)a), \ + (void *)((unsigned long)b), \ + (void *)((unsigned long)c), \ + NULL, NULL, NULL, NULL, NULL); \ +} while (0) +#define Dprintf4(s,a,b,c,d) \ +do { \ + if (rf_reconDebug) \ + rf_debug_printf(s, \ + (void *)((unsigned long)a), \ + (void *)((unsigned long)b), \ + (void *)((unsigned long)c), \ + (void *)((unsigned long)d), \ + NULL, NULL, NULL, NULL); \ +} while (0) +#define Dprintf5(s,a,b,c,d,e) \ +do { \ + if (rf_reconDebug) \ + rf_debug_printf(s, \ + (void *)((unsigned long)a), \ + (void *)((unsigned long)b), \ + (void *)((unsigned long)c), \ + (void *)((unsigned long)d), \ + (void *)((unsigned long)e), \ + NULL, NULL, NULL); \ +} while (0) +#define Dprintf6(s,a,b,c,d,e,f) \ +do { \ + if (rf_reconDebug) \ + rf_debug_printf(s, \ + (void *)((unsigned long)a), \ + (void *)((unsigned long)b), \ + (void *)((unsigned long)c), \ + (void *)((unsigned long)d), \ + (void *)((unsigned long)e), \ + (void *)((unsigned long)f), \ + NULL, NULL); \ +} while (0) +#define Dprintf7(s,a,b,c,d,e,f,g) \ +do { \ + if (rf_reconDebug) \ + rf_debug_printf(s, \ + (void *)((unsigned long)a), \ + (void *)((unsigned long)b), \ + (void *)((unsigned long)c), \ + (void *)((unsigned long)d), \ + (void *)((unsigned long)e), \ + (void *)((unsigned long)f), \ + (void *)((unsigned long)g), \ + NULL); \ +} while (0) + +#define DDprintf1(s,a) \ +do { \ + if (rf_reconDebug) \ + rf_debug_printf(s, \ + (void *)((unsigned long)a), \ + NULL, NULL, NULL, NULL, NULL, NULL, NULL); \ +} while (0) +#define DDprintf2(s,a,b) \ +do { \ + if (rf_reconDebug) \ + rf_debug_printf(s, \ + (void *)((unsigned long)a), \ + (void *)((unsigned long)b), \ + NULL, NULL, NULL, NULL, NULL, NULL); \ +} while (0) static RF_FreeList_t *rf_recond_freelist; -#define RF_MAX_FREE_RECOND 4 -#define RF_RECOND_INC 1 - -RF_RaidReconDesc_t *rf_AllocRaidReconDesc(RF_Raid_t *, RF_RowCol_t, RF_RowCol_t, RF_RaidDisk_t *, int, RF_RowCol_t, RF_RowCol_t); -int rf_ProcessReconEvent(RF_Raid_t *, RF_RowCol_t, RF_ReconEvent_t *); -int rf_IssueNextReadRequest(RF_Raid_t *, RF_RowCol_t, RF_RowCol_t); -int rf_TryToRead(RF_Raid_t *, RF_RowCol_t, RF_RowCol_t); -int rf_ComputePSDiskOffsets(RF_Raid_t *, RF_StripeNum_t, RF_RowCol_t, RF_RowCol_t, RF_SectorNum_t *, RF_SectorNum_t *, RF_RowCol_t *, RF_RowCol_t *, RF_SectorNum_t *); -int rf_ReconReadDoneProc(void *, int); -int rf_ReconWriteDoneProc(void *, int); +#define RF_MAX_FREE_RECOND 4 +#define RF_RECOND_INC 1 + +RF_RaidReconDesc_t *rf_AllocRaidReconDesc(RF_Raid_t *, + RF_RowCol_t, RF_RowCol_t, RF_RaidDisk_t *, int, + RF_RowCol_t, RF_RowCol_t); +int rf_ProcessReconEvent(RF_Raid_t *, RF_RowCol_t, RF_ReconEvent_t *); +int rf_IssueNextReadRequest(RF_Raid_t *, RF_RowCol_t, RF_RowCol_t); +int rf_TryToRead(RF_Raid_t *, RF_RowCol_t, RF_RowCol_t); +int rf_ComputePSDiskOffsets(RF_Raid_t *, RF_StripeNum_t, + RF_RowCol_t, RF_RowCol_t, RF_SectorNum_t *, RF_SectorNum_t *, + RF_RowCol_t *, RF_RowCol_t *, RF_SectorNum_t *); +int rf_ReconReadDoneProc(void *, int); +int rf_ReconWriteDoneProc(void *, int); void rf_CheckForNewMinHeadSep(RF_Raid_t *, RF_RowCol_t, RF_HeadSepLimit_t); -int rf_CheckHeadSeparation(RF_Raid_t *, RF_PerDiskReconCtrl_t *, RF_RowCol_t, RF_RowCol_t, RF_HeadSepLimit_t, RF_ReconUnitNum_t); +int rf_CheckHeadSeparation(RF_Raid_t *, RF_PerDiskReconCtrl_t *, + RF_RowCol_t, RF_RowCol_t, RF_HeadSepLimit_t, RF_ReconUnitNum_t); void rf_ForceReconReadDoneProc(void *, int); void rf_ShutdownReconstruction(void *); /* - * these functions are inlined on gcc. If they are used more than - * once, it is strongly advised to un-line them + * These functions are inlined on gcc. If they are used more than + * once, it is strongly advised to un-line them. */ void rf_FreeReconDesc(RF_RaidReconDesc_t *); -int rf_IssueNextWriteRequest(RF_Raid_t *, RF_RowCol_t); -int rf_CheckForcedOrBlockedReconstruction(RF_Raid_t *, RF_ReconParityStripeStatus_t *, RF_PerDiskReconCtrl_t *, RF_RowCol_t, RF_RowCol_t, RF_StripeNum_t, RF_ReconUnitNum_t); +int rf_IssueNextWriteRequest(RF_Raid_t *, RF_RowCol_t); +int rf_CheckForcedOrBlockedReconstruction(RF_Raid_t *, + RF_ReconParityStripeStatus_t *, RF_PerDiskReconCtrl_t *, + RF_RowCol_t, RF_RowCol_t, RF_StripeNum_t, RF_ReconUnitNum_t); void rf_SignalReconDone(RF_Raid_t *); struct RF_ReconDoneProc_s { - void (*proc) (RF_Raid_t *, void *); - void *arg; - RF_ReconDoneProc_t *next; + void (*proc) (RF_Raid_t *, void *); + void *arg; + RF_ReconDoneProc_t *next; }; static RF_FreeList_t *rf_rdp_freelist; -#define RF_MAX_FREE_RDP 4 -#define RF_RDP_INC 1 +#define RF_MAX_FREE_RDP 4 +#define RF_RDP_INC 1 -void -rf_SignalReconDone(RF_Raid_t * raidPtr) +void +rf_SignalReconDone(RF_Raid_t *raidPtr) { RF_ReconDoneProc_t *p; @@ -126,12 +218,9 @@ rf_SignalReconDone(RF_Raid_t * raidPtr) RF_UNLOCK_MUTEX(raidPtr->recon_done_proc_mutex); } -int -rf_RegisterReconDoneProc( - RF_Raid_t * raidPtr, - void (*proc) (RF_Raid_t *, void *), - void *arg, - RF_ReconDoneProc_t ** handlep) +int +rf_RegisterReconDoneProc(RF_Raid_t *raidPtr, void (*proc) (RF_Raid_t *, void *), + void *arg, RF_ReconDoneProc_t **handlep) { RF_ReconDoneProc_t *p; @@ -148,28 +237,27 @@ rf_RegisterReconDoneProc( *handlep = p; return (0); } -/************************************************************************** + +/***************************************************************************** * - * sets up the parameters that will be used by the reconstruction process - * currently there are none, except for those that the layout-specific + * Sets up the parameters that will be used by the reconstruction process. + * Currently there are none, except for those that the layout-specific * configuration (e.g. rf_ConfigureDeclustered) routine sets up. * - * in the kernel, we fire off the recon thread. + * In the kernel, we fire off the recon thread. * - **************************************************************************/ -void -rf_ShutdownReconstruction(ignored) - void *ignored; + *****************************************************************************/ +void +rf_ShutdownReconstruction(void *ignored) { RF_FREELIST_DESTROY(rf_recond_freelist, next, (RF_RaidReconDesc_t *)); RF_FREELIST_DESTROY(rf_rdp_freelist, next, (RF_ReconDoneProc_t *)); } -int -rf_ConfigureReconstruction(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureReconstruction(RF_ShutdownList_t **listp) { - int rc; + int rc; RF_FREELIST_CREATE(rf_recond_freelist, RF_MAX_FREE_RECOND, RF_RECOND_INC, sizeof(RF_RaidReconDesc_t)); @@ -178,13 +266,14 @@ rf_ConfigureReconstruction(listp) RF_FREELIST_CREATE(rf_rdp_freelist, RF_MAX_FREE_RDP, RF_RDP_INC, sizeof(RF_ReconDoneProc_t)); if (rf_rdp_freelist == NULL) { - RF_FREELIST_DESTROY(rf_recond_freelist, next, (RF_RaidReconDesc_t *)); + RF_FREELIST_DESTROY(rf_recond_freelist, next, + (RF_RaidReconDesc_t *)); return (ENOMEM); } rc = rf_ShutdownCreate(listp, rf_ShutdownReconstruction, NULL); if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); + RF_ERRORMSG3("Unable to add to shutdown list file %s line %d" + " rc=%d.\n", __FILE__, __LINE__, rc); rf_ShutdownReconstruction(NULL); return (rc); } @@ -192,19 +281,15 @@ rf_ConfigureReconstruction(listp) } RF_RaidReconDesc_t * -rf_AllocRaidReconDesc(raidPtr, row, col, spareDiskPtr, numDisksDone, srow, scol) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_RowCol_t col; - RF_RaidDisk_t *spareDiskPtr; - int numDisksDone; - RF_RowCol_t srow; - RF_RowCol_t scol; +rf_AllocRaidReconDesc(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col, + RF_RaidDisk_t *spareDiskPtr, int numDisksDone, RF_RowCol_t srow, + RF_RowCol_t scol) { RF_RaidReconDesc_t *reconDesc; - RF_FREELIST_GET(rf_recond_freelist, reconDesc, next, (RF_RaidReconDesc_t *)); + RF_FREELIST_GET(rf_recond_freelist, reconDesc, next, + (RF_RaidReconDesc_t *)); reconDesc->raidPtr = raidPtr; reconDesc->row = row; @@ -219,46 +304,43 @@ rf_AllocRaidReconDesc(raidPtr, row, col, spareDiskPtr, numDisksDone, srow, scol) return (reconDesc); } -void -rf_FreeReconDesc(reconDesc) - RF_RaidReconDesc_t *reconDesc; +void +rf_FreeReconDesc(RF_RaidReconDesc_t *reconDesc) { -#if RF_RECON_STATS > 0 - printf("RAIDframe: %qu recon event waits, %qu recon delays\n", +#if RF_RECON_STATS > 0 + printf("RAIDframe: %qu recon event waits, %qu recon delays.\n", reconDesc->numReconEventWaits, reconDesc->numReconExecDelays); -#endif /* RF_RECON_STATS > 0 */ +#endif /* RF_RECON_STATS > 0 */ - printf("RAIDframe: %qu max exec ticks\n", reconDesc->maxReconExecTicks); + printf("RAIDframe: %qu max exec ticks.\n", + reconDesc->maxReconExecTicks); -#if (RF_RECON_STATS > 0) || defined(_KERNEL) +#if (RF_RECON_STATS > 0) || defined(_KERNEL) printf("\n"); -#endif /* (RF_RECON_STATS > 0) || _KERNEL */ +#endif /* (RF_RECON_STATS > 0) || _KERNEL */ RF_FREELIST_FREE(rf_recond_freelist, reconDesc, next); } -/*********************************************************************** +/***************************************************************************** * - * primary routine to reconstruct a failed disk. This should be called from - * within its own thread. It won't return until reconstruction completes, + * Primary routine to reconstruct a failed disk. This should be called from + * within its own thread. It won't return until reconstruction completes, * fails, or is aborted. * - ***********************************************************************/ -int -rf_ReconstructFailedDisk(raidPtr, row, col) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_RowCol_t col; + *****************************************************************************/ +int +rf_ReconstructFailedDisk(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col) { RF_LayoutSW_t *lp; - int rc; + int rc; lp = raidPtr->Layout.map; if (lp->SubmitReconBuffer) { /* - * The current infrastructure only supports reconstructing one - * disk at a time for each array. - */ + * The current infrastructure only supports reconstructing one + * disk at a time for each array. + */ RF_LOCK_MUTEX(raidPtr->mutex); while (raidPtr->reconInProgress) { RF_WAIT_COND(raidPtr->waitForReconCond, raidPtr->mutex); @@ -270,38 +352,41 @@ rf_ReconstructFailedDisk(raidPtr, row, col) raidPtr->reconInProgress--; RF_UNLOCK_MUTEX(raidPtr->mutex); } else { - RF_ERRORMSG1("RECON: no way to reconstruct failed disk for arch %c\n", - lp->parityConfig); + RF_ERRORMSG1("RECON: no way to reconstruct failed disk for" + " arch %c.\n", lp->parityConfig); rc = EIO; } RF_SIGNAL_COND(raidPtr->waitForReconCond); - wakeup(&raidPtr->waitForReconCond); /* XXX Methinks this will be - * needed at some point... GO */ + wakeup(&raidPtr->waitForReconCond); /* + * XXX Methinks this will be + * needed at some point... GO + */ return (rc); } -int -rf_ReconstructFailedDiskBasic(raidPtr, row, col) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_RowCol_t col; +int +rf_ReconstructFailedDiskBasic(RF_Raid_t *raidPtr, RF_RowCol_t row, + RF_RowCol_t col) { RF_ComponentLabel_t c_label; RF_RaidDisk_t *spareDiskPtr = NULL; RF_RaidReconDesc_t *reconDesc; RF_RowCol_t srow, scol; - int numDisksDone = 0, rc; + int numDisksDone = 0, rc; - /* first look for a spare drive onto which to reconstruct the data */ - /* spare disk descriptors are stored in row 0. This may have to - * change eventually */ + /* First look for a spare drive onto which to reconstruct the data. */ + /* + * Spare disk descriptors are stored in row 0. This may have to + * change eventually. + */ RF_LOCK_MUTEX(raidPtr->mutex); RF_ASSERT(raidPtr->Disks[row][col].status == rf_ds_failed); if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { if (raidPtr->status[row] != rf_rs_degraded) { - RF_ERRORMSG2("Unable to reconstruct disk at row %d col %d because status not degraded\n", row, col); + RF_ERRORMSG2("Unable to reconstruct disk at row %d" + " col %d because status not degraded.\n", row, col); RF_UNLOCK_MUTEX(raidPtr->mutex); return (EINVAL); } @@ -309,7 +394,8 @@ rf_ReconstructFailedDiskBasic(raidPtr, row, col) scol = (-1); } else { srow = 0; - for (scol = raidPtr->numCol; scol < raidPtr->numCol + raidPtr->numSpare; scol++) { + for (scol = raidPtr->numCol; + scol < raidPtr->numCol + raidPtr->numSpare; scol++) { if (raidPtr->Disks[srow][scol].status == rf_ds_spare) { spareDiskPtr = &raidPtr->Disks[srow][scol]; spareDiskPtr->status = rf_ds_used_spare; @@ -317,73 +403,74 @@ rf_ReconstructFailedDiskBasic(raidPtr, row, col) } } if (!spareDiskPtr) { - RF_ERRORMSG2("Unable to reconstruct disk at row %d col %d because no spares are available\n", row, col); + RF_ERRORMSG2("Unable to reconstruct disk at row %d" + " col %d because no spares are available.\n", + row, col); RF_UNLOCK_MUTEX(raidPtr->mutex); return (ENOSPC); } - printf("RECON: initiating reconstruction on row %d col %d -> spare at row %d col %d\n", row, col, srow, scol); + printf("RECON: initiating reconstruction on row %d col %d" + " -> spare at row %d col %d.\n", row, col, srow, scol); } RF_UNLOCK_MUTEX(raidPtr->mutex); - reconDesc = rf_AllocRaidReconDesc((void *) raidPtr, row, col, spareDiskPtr, numDisksDone, srow, scol); + reconDesc = rf_AllocRaidReconDesc((void *) raidPtr, row, col, + spareDiskPtr, numDisksDone, srow, scol); raidPtr->reconDesc = (void *) reconDesc; -#if RF_RECON_STATS > 0 +#if RF_RECON_STATS > 0 reconDesc->hsStallCount = 0; reconDesc->numReconExecDelays = 0; reconDesc->numReconEventWaits = 0; -#endif /* RF_RECON_STATS > 0 */ +#endif /* RF_RECON_STATS > 0 */ reconDesc->reconExecTimerRunning = 0; reconDesc->reconExecTicks = 0; reconDesc->maxReconExecTicks = 0; rc = rf_ContinueReconstructFailedDisk(reconDesc); if (!rc) { - /* fix up the component label */ - /* Don't actually need the read here.. */ + /* Fix up the component label. */ + /* Don't actually need the read here... */ raidread_component_label( - raidPtr->raid_cinfo[srow][scol].ci_dev, - raidPtr->raid_cinfo[srow][scol].ci_vp, - &c_label); - - raid_init_component_label( raidPtr, &c_label); + raidPtr->raid_cinfo[srow][scol].ci_dev, + raidPtr->raid_cinfo[srow][scol].ci_vp, + &c_label); + + raid_init_component_label(raidPtr, &c_label); c_label.row = row; c_label.column = col; c_label.clean = RF_RAID_DIRTY; c_label.status = rf_ds_optimal; - /* XXXX MORE NEEDED HERE */ - + /* XXXX MORE NEEDED HERE. */ + raidwrite_component_label( - raidPtr->raid_cinfo[srow][scol].ci_dev, - raidPtr->raid_cinfo[srow][scol].ci_vp, - &c_label); - + raidPtr->raid_cinfo[srow][scol].ci_dev, + raidPtr->raid_cinfo[srow][scol].ci_vp, + &c_label); + } return (rc); } -/* - - Allow reconstructing a disk in-place -- i.e. component /dev/sd2e goes AWOL, - and you don't get a spare until the next Monday. With this function - (and hot-swappable drives) you can now put your new disk containing - /dev/sd2e on the bus, scsictl it alive, and then use raidctl(8) to - rebuild the data "on the spot". - -*/ +/* + * + * Allow reconstructing a disk in-place -- i.e. component /dev/sd2e goes AWOL, + * and you don't get a spare until the next Monday. With this function + * (and hot-swappable drives) you can now put your new disk containing + * /dev/sd2e on the bus, scsictl it alive, and then use raidctl(8) to + * rebuild the data "on the spot". + * + */ int -rf_ReconstructInPlace(raidPtr, row, col) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_RowCol_t col; +rf_ReconstructInPlace(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col) { RF_RaidDisk_t *spareDiskPtr = NULL; RF_RaidReconDesc_t *reconDesc; RF_LayoutSW_t *lp; RF_RaidDisk_t *badDisk; RF_ComponentLabel_t c_label; - int numDisksDone = 0, rc; + int numDisksDone = 0, rc; struct partinfo dpart; struct vnode *vp; struct vattr va; @@ -394,28 +481,31 @@ rf_ReconstructInPlace(raidPtr, row, col) lp = raidPtr->Layout.map; if (lp->SubmitReconBuffer) { /* - * The current infrastructure only supports reconstructing one - * disk at a time for each array. - */ + * The current infrastructure only supports reconstructing one + * disk at a time for each array. + */ RF_LOCK_MUTEX(raidPtr->mutex); if ((raidPtr->Disks[row][col].status == rf_ds_optimal) && - (raidPtr->numFailures > 0)) { - /* XXX 0 above shouldn't be constant!!! */ - /* some component other than this has failed. - Let's not make things worse than they already - are... */ + (raidPtr->numFailures > 0)) { + /* XXX 0 above shouldn't be constant !!! */ + /* + * Some component other than this has failed. + * Let's not make things worse than they already + * are... + */ #ifdef RAIDDEBUG - printf("RAIDFRAME: Unable to reconstruct to disk at:\n"); - printf(" Row: %d Col: %d Too many failures.\n", - row, col); + printf("RAIDFRAME: Unable to reconstruct to disk at:\n" + " Row: %d Col: %d Too many failures.\n", + row, col); #endif /* RAIDDEBUG */ RF_UNLOCK_MUTEX(raidPtr->mutex); return (EINVAL); } if (raidPtr->Disks[row][col].status == rf_ds_reconstructing) { #ifdef RAIDDEBUG - printf("RAIDFRAME: Unable to reconstruct to disk at:\n"); - printf(" Row: %d Col: %d Reconstruction already occuring!\n", row, col); + printf("RAIDFRAME: Unable to reconstruct to disk at:\n" + " Row: %d Col: %d Reconstruction already" + " occuring !\n", row, col); #endif /* RAIDDEBUG */ RF_UNLOCK_MUTEX(raidPtr->mutex); @@ -428,8 +518,8 @@ rf_ReconstructInPlace(raidPtr, row, col) raidPtr->numFailures++; raidPtr->Disks[row][col].status = rf_ds_failed; raidPtr->status[row] = rf_rs_degraded; - rf_update_component_labels(raidPtr, - RF_NORMAL_COMPONENT_UPDATE); + rf_update_component_labels(raidPtr, + RF_NORMAL_COMPONENT_UPDATE); } while (raidPtr->reconInProgress) { @@ -438,123 +528,135 @@ rf_ReconstructInPlace(raidPtr, row, col) raidPtr->reconInProgress++; + /* + * First look for a spare drive onto which to reconstruct + * the data. Spare disk descriptors are stored in row 0. + * This may have to change eventually. + */ - /* first look for a spare drive onto which to reconstruct - the data. spare disk descriptors are stored in row 0. - This may have to change eventually */ - - /* Actually, we don't care if it's failed or not... - On a RAID set with correct parity, this function - should be callable on any component without ill affects. */ - /* RF_ASSERT(raidPtr->Disks[row][col].status == rf_ds_failed); + /* + * Actually, we don't care if it's failed or not... + * On a RAID set with correct parity, this function + * should be callable on any component without ill effects. + */ + /* + * RF_ASSERT(raidPtr->Disks[row][col].status == rf_ds_failed); */ if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { - RF_ERRORMSG2("Unable to reconstruct to disk at row %d col %d: operation not supported for RF_DISTRIBUTE_SPARE\n", row, col); + RF_ERRORMSG2("Unable to reconstruct to disk at row %d" + " col %d: operation not supported for" + " RF_DISTRIBUTE_SPARE.\n", row, col); raidPtr->reconInProgress--; RF_UNLOCK_MUTEX(raidPtr->mutex); return (EINVAL); - } - - /* XXX need goop here to see if the disk is alive, - and, if not, make it so... */ - + } + /* + * XXX Need goop here to see if the disk is alive, + * and, if not, make it so... + */ badDisk = &raidPtr->Disks[row][col]; proc = raidPtr->recon_thread; - /* This device may have been opened successfully the - first time. Close it before trying to open it again.. */ + /* + * This device may have been opened successfully the + * first time. Close it before trying to open it again... + */ if (raidPtr->raid_cinfo[row][col].ci_vp != NULL) { printf("Closing the open device: %s\n", - raidPtr->Disks[row][col].devname); + raidPtr->Disks[row][col].devname); vp = raidPtr->raid_cinfo[row][col].ci_vp; ac = raidPtr->Disks[row][col].auto_configured; rf_close_component(raidPtr, vp, ac); raidPtr->raid_cinfo[row][col].ci_vp = NULL; } - /* note that this disk was *not* auto_configured (any longer)*/ + /* + * Note that this disk was *not* auto_configured (any longer). + */ raidPtr->Disks[row][col].auto_configured = 0; printf("About to (re-)open the device for rebuilding: %s\n", - raidPtr->Disks[row][col].devname); - - retcode = raidlookup(raidPtr->Disks[row][col].devname, - proc, &vp); - - if (retcode) { - printf("raid%d: rebuilding: raidlookup on device: %s failed: %d!\n", - raidPtr->raidid, - raidPtr->Disks[row][col].devname, retcode); + raidPtr->Disks[row][col].devname); - /* XXX the component isn't responding properly... - must be still dead :-( */ + retcode = raidlookup(raidPtr->Disks[row][col].devname, + proc, &vp); + + if (retcode) { + printf("raid%d: rebuilding: raidlookup on device: %s" + " failed: %d !\n", raidPtr->raidid, + raidPtr->Disks[row][col].devname, retcode); + + /* + * XXX the component isn't responding properly... + * Must still be dead :-( + */ raidPtr->reconInProgress--; RF_UNLOCK_MUTEX(raidPtr->mutex); return(retcode); } else { - /* Ok, so we can at least do a lookup... - How about actually getting a vp for it? */ + /* + * Ok, so we can at least do a lookup... + * How about actually getting a vp for it ? + */ - if ((retcode = VOP_GETATTR(vp, &va, proc->p_ucred, - proc)) != 0) { + if ((retcode = + VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) { raidPtr->reconInProgress--; RF_UNLOCK_MUTEX(raidPtr->mutex); return(retcode); } retcode = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart, - FREAD, proc->p_ucred, proc); + FREAD, proc->p_ucred, proc); if (retcode) { raidPtr->reconInProgress--; RF_UNLOCK_MUTEX(raidPtr->mutex); return(retcode); } raidPtr->Disks[row][col].blockSize = - dpart.disklab->d_secsize; + dpart.disklab->d_secsize; + + raidPtr->Disks[row][col].numBlocks = + dpart.part->p_size - rf_protectedSectors; - raidPtr->Disks[row][col].numBlocks = - dpart.part->p_size - rf_protectedSectors; - raidPtr->raid_cinfo[row][col].ci_vp = vp; raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev; - + raidPtr->Disks[row][col].dev = va.va_rdev; - - /* we allow the user to specify that only a - fraction of the disks should be used this is - just for debug: it speeds up - * the parity scan */ + + /* + * We allow the user to specify that only a + * fraction of the disks should be used this is + * just for debug: it speeds up the parity scan. + */ raidPtr->Disks[row][col].numBlocks = - raidPtr->Disks[row][col].numBlocks * - rf_sizePercentage / 100; + raidPtr->Disks[row][col].numBlocks * + rf_sizePercentage / 100; } - - spareDiskPtr = &raidPtr->Disks[row][col]; spareDiskPtr->status = rf_ds_used_spare; printf("RECON: initiating in-place reconstruction on\n"); - printf(" row %d col %d -> spare at row %d col %d\n", - row, col, row, col); + printf(" row %d col %d -> spare at row %d col %d.\n", + row, col, row, col); RF_UNLOCK_MUTEX(raidPtr->mutex); - - reconDesc = rf_AllocRaidReconDesc((void *) raidPtr, row, col, - spareDiskPtr, numDisksDone, - row, col); + + reconDesc = rf_AllocRaidReconDesc((void *) raidPtr, row, col, + spareDiskPtr, numDisksDone, row, col); raidPtr->reconDesc = (void *) reconDesc; -#if RF_RECON_STATS > 0 +#if RF_RECON_STATS > 0 reconDesc->hsStallCount = 0; reconDesc->numReconExecDelays = 0; reconDesc->numReconEventWaits = 0; -#endif /* RF_RECON_STATS > 0 */ +#endif /* RF_RECON_STATS > 0 */ reconDesc->reconExecTimerRunning = 0; reconDesc->reconExecTicks = 0; reconDesc->maxReconExecTicks = 0; @@ -565,45 +667,46 @@ rf_ReconstructInPlace(raidPtr, row, col) RF_UNLOCK_MUTEX(raidPtr->mutex); } else { - RF_ERRORMSG1("RECON: no way to reconstruct failed disk for arch %c\n", - lp->parityConfig); + RF_ERRORMSG1("RECON: no way to reconstruct failed disk for" + " arch %c.\n", lp->parityConfig); rc = EIO; } RF_LOCK_MUTEX(raidPtr->mutex); - + if (!rc) { - /* Need to set these here, as at this point it'll be claiming - that the disk is in rf_ds_spared! But we know better :-) */ - + /* + * Need to set these here, as at this point it'll be claiming + * that the disk is in rf_ds_spared ! But we know better :-) + */ + raidPtr->Disks[row][col].status = rf_ds_optimal; raidPtr->status[row] = rf_rs_optimal; - - /* fix up the component label */ - /* Don't actually need the read here.. */ - raidread_component_label(raidPtr->raid_cinfo[row][col].ci_dev, - raidPtr->raid_cinfo[row][col].ci_vp, - &c_label); + + /* Fix up the component label. */ + /* Don't actually need the read here... */ + raidread_component_label( + raidPtr->raid_cinfo[row][col].ci_dev, + raidPtr->raid_cinfo[row][col].ci_vp, + &c_label); raid_init_component_label(raidPtr, &c_label); c_label.row = row; c_label.column = col; - + raidwrite_component_label(raidPtr->raid_cinfo[row][col].ci_dev, - raidPtr->raid_cinfo[row][col].ci_vp, - &c_label); + raidPtr->raid_cinfo[row][col].ci_vp, &c_label); } RF_UNLOCK_MUTEX(raidPtr->mutex); RF_SIGNAL_COND(raidPtr->waitForReconCond); - wakeup(&raidPtr->waitForReconCond); + wakeup(&raidPtr->waitForReconCond); return (rc); } -int -rf_ContinueReconstructFailedDisk(reconDesc) - RF_RaidReconDesc_t *reconDesc; +int +rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t *reconDesc) { RF_Raid_t *raidPtr = reconDesc->raidPtr; RF_RowCol_t row = reconDesc->row; @@ -615,32 +718,40 @@ rf_ContinueReconstructFailedDisk(reconDesc) RF_ReconEvent_t *event; struct timeval etime, elpsd; unsigned long xor_s, xor_resid_us; - int retcode, i, ds; + int retcode, i, ds; switch (reconDesc->state) { case 0: raidPtr->accumXorTimeUs = 0; - /* create one trace record per physical disk */ - RF_Malloc(raidPtr->recon_tracerecs, raidPtr->numCol * sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); + /* Create one trace record per physical disk. */ + RF_Malloc(raidPtr->recon_tracerecs, raidPtr->numCol * + sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); - /* quiesce the array prior to starting recon. this is needed + /* + * Quiesce the array prior to starting recon. This is needed * to assure no nasty interactions with pending user writes. - * We need to do this before we change the disk or row status. */ + * We need to do this before we change the disk or row status. + */ reconDesc->state = 1; - Dprintf("RECON: begin request suspend\n"); + Dprintf("RECON: begin request suspend.\n"); retcode = rf_SuspendNewRequestsAndWait(raidPtr); - Dprintf("RECON: end request suspend\n"); - rf_StartUserStats(raidPtr); /* zero out the stats kept on - * user accs */ - /* fall through to state 1 */ + Dprintf("RECON: end request suspend.\n"); + rf_StartUserStats(raidPtr); /* + * Zero out the stats kept on + * user accs. + */ + /* Fall through to state 1. */ case 1: RF_LOCK_MUTEX(raidPtr->mutex); - /* create the reconstruction control pointer and install it in - * the right slot */ - raidPtr->reconControl[row] = rf_MakeReconControl(reconDesc, row, col, srow, scol); + /* + * Create the reconstruction control pointer and install it in + * the right slot. + */ + raidPtr->reconControl[row] = + rf_MakeReconControl(reconDesc, row, col, srow, scol); mapPtr = raidPtr->reconControl[row]->reconMap; raidPtr->status[row] = rf_rs_reconstructing; raidPtr->Disks[row][col].status = rf_ds_reconstructing; @@ -651,16 +762,21 @@ rf_ContinueReconstructFailedDisk(reconDesc) RF_GETTIME(raidPtr->reconControl[row]->starttime); - /* now start up the actual reconstruction: issue a read for - * each surviving disk */ + /* + * Now start up the actual reconstruction: issue a read for + * each surviving disk. + */ reconDesc->numDisksDone = 0; for (i = 0; i < raidPtr->numCol; i++) { if (i != col) { - /* find and issue the next I/O on the - * indicated disk */ + /* + * Find and issue the next I/O on the + * indicated disk. + */ if (rf_IssueNextReadRequest(raidPtr, row, i)) { - Dprintf2("RECON: done issuing for r%d c%d\n", row, i); + Dprintf2("RECON: done issuing for r%d" + " c%d.\n", row, i); reconDesc->numDisksDone++; } } @@ -669,35 +785,41 @@ rf_ContinueReconstructFailedDisk(reconDesc) reconDesc->state = 2; case 2: - Dprintf("RECON: resume requests\n"); + Dprintf("RECON: resume requests.\n"); rf_ResumeNewRequests(raidPtr); - reconDesc->state = 3; case 3: - /* process reconstruction events until all disks report that - * they've completed all work */ + /* + * Process reconstruction events until all disks report that + * they've completed all work. + */ mapPtr = raidPtr->reconControl[row]->reconMap; while (reconDesc->numDisksDone < raidPtr->numCol - 1) { - event = rf_GetNextReconEvent(reconDesc, row, (void (*) (void *)) rf_ContinueReconstructFailedDisk, reconDesc); + event = rf_GetNextReconEvent(reconDesc, row, + (void (*) (void *)) rf_ContinueReconstructFailedDisk, + reconDesc); RF_ASSERT(event); if (rf_ProcessReconEvent(raidPtr, row, event)) reconDesc->numDisksDone++; - raidPtr->reconControl[row]->numRUsTotal = + raidPtr->reconControl[row]->numRUsTotal = mapPtr->totalRUs; - raidPtr->reconControl[row]->numRUsComplete = - mapPtr->totalRUs - + raidPtr->reconControl[row]->numRUsComplete = + mapPtr->totalRUs - rf_UnitsLeftToReconstruct(mapPtr); - raidPtr->reconControl[row]->percentComplete = - (raidPtr->reconControl[row]->numRUsComplete * 100 / raidPtr->reconControl[row]->numRUsTotal); + raidPtr->reconControl[row]->percentComplete = + (raidPtr->reconControl[row]->numRUsComplete * 100 / + raidPtr->reconControl[row]->numRUsTotal); if (rf_prReconSched) { - rf_PrintReconSchedule(raidPtr->reconControl[row]->reconMap, &(raidPtr->reconControl[row]->starttime)); + rf_PrintReconSchedule( + raidPtr->reconControl[row]->reconMap, + &(raidPtr->reconControl[row]->starttime)); } } @@ -706,75 +828,98 @@ rf_ContinueReconstructFailedDisk(reconDesc) case 4: mapPtr = raidPtr->reconControl[row]->reconMap; if (rf_reconDebug) { - printf("RECON: all reads completed\n"); + printf("RECON: all reads completed.\n"); } - /* at this point all the reads have completed. We now wait - * for any pending writes to complete, and then we're done */ + /* + * At this point all the reads have completed. We now wait + * for any pending writes to complete, and then we're done. + */ - while (rf_UnitsLeftToReconstruct(raidPtr->reconControl[row]->reconMap) > 0) { + while (rf_UnitsLeftToReconstruct( + raidPtr->reconControl[row]->reconMap) > 0) { - event = rf_GetNextReconEvent(reconDesc, row, (void (*) (void *)) rf_ContinueReconstructFailedDisk, reconDesc); + event = rf_GetNextReconEvent(reconDesc, row, + (void (*) (void *)) rf_ContinueReconstructFailedDisk, + reconDesc); RF_ASSERT(event); - (void) rf_ProcessReconEvent(raidPtr, row, event); /* ignore return code */ - raidPtr->reconControl[row]->percentComplete = 100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 / mapPtr->totalRUs); + /* Ignore return code. */ + (void) rf_ProcessReconEvent(raidPtr, row, event); + raidPtr->reconControl[row]->percentComplete = + 100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 / + mapPtr->totalRUs); if (rf_prReconSched) { - rf_PrintReconSchedule(raidPtr->reconControl[row]->reconMap, &(raidPtr->reconControl[row]->starttime)); + rf_PrintReconSchedule( + raidPtr->reconControl[row]->reconMap, + &(raidPtr->reconControl[row]->starttime)); } } reconDesc->state = 5; case 5: - /* Success: mark the dead disk as reconstructed. We quiesce + /* + * Success: mark the dead disk as reconstructed. We quiesce * the array here to assure no nasty interactions with pending - * user accesses when we free up the psstatus structure as - * part of FreeReconControl() */ + * user accesses, when we free up the psstatus structure as + * part of FreeReconControl(). + */ reconDesc->state = 6; retcode = rf_SuspendNewRequestsAndWait(raidPtr); rf_StopUserStats(raidPtr); - rf_PrintUserStats(raidPtr); /* print out the stats on user + rf_PrintUserStats(raidPtr); /* + * Print out the stats on user * accs accumulated during - * recon */ + * recon. + */ - /* fall through to state 6 */ + /* Fall through to state 6. */ case 6: RF_LOCK_MUTEX(raidPtr->mutex); raidPtr->numFailures--; ds = (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE); - raidPtr->Disks[row][col].status = (ds) ? rf_ds_dist_spared : rf_ds_spared; - raidPtr->status[row] = (ds) ? rf_rs_reconfigured : rf_rs_optimal; + raidPtr->Disks[row][col].status = (ds) ? rf_ds_dist_spared : + rf_ds_spared; + raidPtr->status[row] = (ds) ? rf_rs_reconfigured : + rf_rs_optimal; RF_UNLOCK_MUTEX(raidPtr->mutex); RF_GETTIME(etime); - RF_TIMEVAL_DIFF(&(raidPtr->reconControl[row]->starttime), &etime, &elpsd); + RF_TIMEVAL_DIFF(&(raidPtr->reconControl[row]->starttime), + &etime, &elpsd); - /* XXX -- why is state 7 different from state 6 if there is no - * return() here? -- XXX Note that I set elpsd above & use it + /* + * XXX -- Why is state 7 different from state 6 if there is no + * return() here ? -- XXX Note that I set elpsd above & use it * below, so if you put a return here you'll have to fix this. - * (also, FreeReconControl is called below) */ + * (also, FreeReconControl is called below). + */ case 7: rf_ResumeNewRequests(raidPtr); - printf("Reconstruction of disk at row %d col %d completed\n", - row, col); + printf("Reconstruction of disk at row %d col %d completed.\n", + row, col); xor_s = raidPtr->accumXorTimeUs / 1000000; xor_resid_us = raidPtr->accumXorTimeUs % 1000000; - printf("Recon time was %d.%06d seconds, accumulated XOR time was %ld us (%ld.%06ld)\n", - (int) elpsd.tv_sec, (int) elpsd.tv_usec, raidPtr->accumXorTimeUs, xor_s, xor_resid_us); - printf(" (start time %d sec %d usec, end time %d sec %d usec)\n", + printf("Recon time was %d.%06d seconds, accumulated XOR time" + " was %ld us (%ld.%06ld).\n", (int) elpsd.tv_sec, + (int) elpsd.tv_usec, raidPtr->accumXorTimeUs, xor_s, + xor_resid_us); + printf(" (start time %d sec %d usec, end time %d sec %d" + " usec)\n", (int) raidPtr->reconControl[row]->starttime.tv_sec, (int) raidPtr->reconControl[row]->starttime.tv_usec, (int) etime.tv_sec, (int) etime.tv_usec); -#if RF_RECON_STATS > 0 - printf("Total head-sep stall count was %d\n", +#if RF_RECON_STATS > 0 + printf("Total head-sep stall count was %d.\n", (int) reconDesc->hsStallCount); -#endif /* RF_RECON_STATS > 0 */ +#endif /* RF_RECON_STATS > 0 */ rf_FreeReconControl(raidPtr, row); - RF_Free(raidPtr->recon_tracerecs, raidPtr->numCol * sizeof(RF_AccTraceEntry_t)); + RF_Free(raidPtr->recon_tracerecs, raidPtr->numCol * + sizeof(RF_AccTraceEntry_t)); rf_FreeReconDesc(reconDesc); } @@ -782,52 +927,62 @@ rf_ContinueReconstructFailedDisk(reconDesc) rf_SignalReconDone(raidPtr); return (0); } + + /***************************************************************************** - * do the right thing upon each reconstruction event. - * returns nonzero if and only if there is nothing left unread on the - * indicated disk + * Do the right thing upon each reconstruction event. + * Returns nonzero if and only if there is nothing left unread on the + * indicated disk. *****************************************************************************/ -int -rf_ProcessReconEvent(raidPtr, frow, event) - RF_Raid_t *raidPtr; - RF_RowCol_t frow; - RF_ReconEvent_t *event; +int +rf_ProcessReconEvent(RF_Raid_t *raidPtr, RF_RowCol_t frow, + RF_ReconEvent_t *event) { - int retcode = 0, submitblocked; + int retcode = 0, submitblocked; RF_ReconBuffer_t *rbuf; RF_SectorCount_t sectorsPerRU; - Dprintf1("RECON: rf_ProcessReconEvent type %d\n", event->type); + Dprintf1("RECON: rf_ProcessReconEvent type %d.\n", event->type); + switch (event->type) { - /* a read I/O has completed */ + /* A read I/O has completed. */ case RF_REVENT_READDONE: - rbuf = raidPtr->reconControl[frow]->perDiskInfo[event->col].rbuf; - Dprintf3("RECON: READDONE EVENT: row %d col %d psid %ld\n", + rbuf = raidPtr->reconControl[frow] + ->perDiskInfo[event->col].rbuf; + Dprintf3("RECON: READDONE EVENT: row %d col %d psid %ld.\n", frow, event->col, rbuf->parityStripeID); - Dprintf7("RECON: done read psid %ld buf %lx %02x %02x %02x %02x %02x\n", - rbuf->parityStripeID, rbuf->buffer, rbuf->buffer[0] & 0xff, rbuf->buffer[1] & 0xff, - rbuf->buffer[2] & 0xff, rbuf->buffer[3] & 0xff, rbuf->buffer[4] & 0xff); + Dprintf7("RECON: done read psid %ld buf %lx %02x %02x %02x" + " %02x %02x.\n", rbuf->parityStripeID, rbuf->buffer, + rbuf->buffer[0] & 0xff, rbuf->buffer[1] & 0xff, + rbuf->buffer[2] & 0xff, rbuf->buffer[3] & 0xff, + rbuf->buffer[4] & 0xff); rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg); submitblocked = rf_SubmitReconBuffer(rbuf, 0, 0); - Dprintf1("RECON: submitblocked=%d\n", submitblocked); + Dprintf1("RECON: submitblocked=%d.\n", submitblocked); if (!submitblocked) - retcode = rf_IssueNextReadRequest(raidPtr, frow, event->col); + retcode = rf_IssueNextReadRequest(raidPtr, frow, + event->col); break; - /* a write I/O has completed */ + /* A write I/O has completed. */ case RF_REVENT_WRITEDONE: if (rf_floatingRbufDebug) { rf_CheckFloatingRbufCount(raidPtr, 1); } - sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU; + sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit * + raidPtr->Layout.SUsPerRU; rbuf = (RF_ReconBuffer_t *) event->arg; rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg); - Dprintf3("RECON: WRITEDONE EVENT: psid %d ru %d (%d %% complete)\n", - rbuf->parityStripeID, rbuf->which_ru, raidPtr->reconControl[frow]->percentComplete); - rf_ReconMapUpdate(raidPtr, raidPtr->reconControl[frow]->reconMap, - rbuf->failedDiskSectorOffset, rbuf->failedDiskSectorOffset + sectorsPerRU - 1); - rf_RemoveFromActiveReconTable(raidPtr, frow, rbuf->parityStripeID, rbuf->which_ru); + Dprintf3("RECON: WRITEDONE EVENT: psid %d ru %d" + " (%d %% complete).\n", + rbuf->parityStripeID, rbuf->which_ru, + raidPtr->reconControl[frow]->percentComplete); + rf_ReconMapUpdate(raidPtr, raidPtr->reconControl[frow] + ->reconMap, rbuf->failedDiskSectorOffset, + rbuf->failedDiskSectorOffset + sectorsPerRU - 1); + rf_RemoveFromActiveReconTable(raidPtr, frow, + rbuf->parityStripeID, rbuf->which_ru); if (rbuf->type == RF_RBUF_TYPE_FLOATING) { RF_LOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex); @@ -841,51 +996,67 @@ rf_ProcessReconEvent(raidPtr, frow, event) RF_ASSERT(0); break; - case RF_REVENT_BUFCLEAR: /* A buffer-stall condition has been - * cleared */ - Dprintf2("RECON: BUFCLEAR EVENT: row %d col %d\n", frow, event->col); - submitblocked = rf_SubmitReconBuffer(raidPtr->reconControl[frow]->perDiskInfo[event->col].rbuf, 0, (int) (long) event->arg); - RF_ASSERT(!submitblocked); /* we wouldn't have gotten the + /* A buffer-stall condition has been cleared. */ + case RF_REVENT_BUFCLEAR: + Dprintf2("RECON: BUFCLEAR EVENT: row %d col %d.\n", frow, + event->col); + submitblocked = rf_SubmitReconBuffer(raidPtr + ->reconControl[frow]->perDiskInfo[event->col].rbuf, 0, + (int) (long) event->arg); + RF_ASSERT(!submitblocked); /* + * We wouldn't have gotten the * BUFCLEAR event if we - * couldn't submit */ + * couldn't submit. + */ retcode = rf_IssueNextReadRequest(raidPtr, frow, event->col); break; - case RF_REVENT_BLOCKCLEAR: /* A user-write reconstruction - * blockage has been cleared */ - DDprintf2("RECON: BLOCKCLEAR EVENT: row %d col %d\n", frow, event->col); + /* A user-write reconstruction blockage has been cleared. */ + case RF_REVENT_BLOCKCLEAR: + DDprintf2("RECON: BLOCKCLEAR EVENT: row %d col %d.\n", + frow, event->col); retcode = rf_TryToRead(raidPtr, frow, event->col); break; - case RF_REVENT_HEADSEPCLEAR: /* A max-head-separation - * reconstruction blockage has been - * cleared */ - Dprintf2("RECON: HEADSEPCLEAR EVENT: row %d col %d\n", frow, event->col); + /* + * A max-head-separation reconstruction blockage has been + * cleared. + */ + case RF_REVENT_HEADSEPCLEAR: + Dprintf2("RECON: HEADSEPCLEAR EVENT: row %d col %d.\n", + frow, event->col); retcode = rf_TryToRead(raidPtr, frow, event->col); break; - /* a buffer has become ready to write */ + /* A buffer has become ready to write. */ case RF_REVENT_BUFREADY: - Dprintf2("RECON: BUFREADY EVENT: row %d col %d\n", frow, event->col); + Dprintf2("RECON: BUFREADY EVENT: row %d col %d.\n", + frow, event->col); retcode = rf_IssueNextWriteRequest(raidPtr, frow); if (rf_floatingRbufDebug) { rf_CheckFloatingRbufCount(raidPtr, 1); } break; - /* we need to skip the current RU entirely because it got - * recon'd while we were waiting for something else to happen */ + /* + * We need to skip the current RU entirely because it got + * recon'd while we were waiting for something else to happen. + */ case RF_REVENT_SKIP: - DDprintf2("RECON: SKIP EVENT: row %d col %d\n", frow, event->col); + DDprintf2("RECON: SKIP EVENT: row %d col %d.\n", + frow, event->col); retcode = rf_IssueNextReadRequest(raidPtr, frow, event->col); break; - /* a forced-reconstruction read access has completed. Just - * submit the buffer */ + /* + * A forced-reconstruction read access has completed. Just + * submit the buffer. + */ case RF_REVENT_FORCEDREADDONE: rbuf = (RF_ReconBuffer_t *) event->arg; rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg); - DDprintf2("RECON: FORCEDREADDONE EVENT: row %d col %d\n", frow, event->col); + DDprintf2("RECON: FORCEDREADDONE EVENT: row %d col %d.\n", + frow, event->col); submitblocked = rf_SubmitReconBuffer(rbuf, 1, 0); RF_ASSERT(!submitblocked); break; @@ -899,9 +1070,9 @@ rf_ProcessReconEvent(raidPtr, frow, event) /***************************************************************************** * - * find the next thing that's needed on the indicated disk, and issue - * a read request for it. We assume that the reconstruction buffer - * associated with this process is free to receive the data. If + * Find the next thing that's needed on the indicated disk, and issue + * a read request for it. We assume that the reconstruction buffer + * associated with this process is free to receive the data. If * reconstruction is blocked on the indicated RU, we issue a * blockage-release request instead of a physical disk read request. * If the current disk gets too far ahead of the others, we issue a @@ -909,31 +1080,34 @@ rf_ProcessReconEvent(raidPtr, frow, event) * * ctrl->{ru_count, curPSID, diskOffset} and * rbuf->failedDiskSectorOffset are maintained to point to the unit - * we're currently accessing. Note that this deviates from the + * we're currently accessing. Note that this deviates from the * standard C idiom of having counters point to the next thing to be - * accessed. This allows us to easily retry when we're blocked by + * accessed. This allows us to easily retry when we're blocked by * head separation or reconstruction-blockage events. * - * returns nonzero if and only if there is nothing left unread on the - * indicated disk + * Returns nonzero if and only if there is nothing left unread on the + * indicated disk. * *****************************************************************************/ -int -rf_IssueNextReadRequest(raidPtr, row, col) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_RowCol_t col; +int +rf_IssueNextReadRequest(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col) { - RF_PerDiskReconCtrl_t *ctrl = &raidPtr->reconControl[row]->perDiskInfo[col]; + RF_PerDiskReconCtrl_t *ctrl = + &raidPtr->reconControl[row]->perDiskInfo[col]; RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; RF_ReconBuffer_t *rbuf = ctrl->rbuf; - RF_ReconUnitCount_t RUsPerPU = layoutPtr->SUsPerPU / layoutPtr->SUsPerRU; - RF_SectorCount_t sectorsPerRU = layoutPtr->sectorsPerStripeUnit * layoutPtr->SUsPerRU; - int do_new_check = 0, retcode = 0, status; - - /* if we are currently the slowest disk, mark that we have to do a new - * check */ - if (ctrl->headSepCounter <= raidPtr->reconControl[row]->minHeadSepCounter) + RF_ReconUnitCount_t RUsPerPU = + layoutPtr->SUsPerPU / layoutPtr->SUsPerRU; + RF_SectorCount_t sectorsPerRU = + layoutPtr->sectorsPerStripeUnit * layoutPtr->SUsPerRU; + int do_new_check = 0, retcode = 0, status; + + /* + * If we are currently the slowest disk, mark that we have to do a new + * check. + */ + if (ctrl->headSepCounter <= + raidPtr->reconControl[row]->minHeadSepCounter) do_new_check = 1; while (1) { @@ -947,16 +1121,22 @@ rf_IssueNextReadRequest(raidPtr, row, col) ctrl->ru_count = 0; /* code left over from when head-sep was based on * parity stripe id */ - if (ctrl->curPSID >= raidPtr->reconControl[row]->lastPSID) { - rf_CheckForNewMinHeadSep(raidPtr, row, ++(ctrl->headSepCounter)); - return (1); /* finito! */ + if (ctrl->curPSID >= + raidPtr->reconControl[row]->lastPSID) { + rf_CheckForNewMinHeadSep(raidPtr, row, + ++(ctrl->headSepCounter)); + return (1); /* Finito ! */ } - /* find the disk offsets of the start of the parity + /* + * Find the disk offsets of the start of the parity * stripe on both the current disk and the failed - * disk. skip this entire parity stripe if either disk - * does not appear in the indicated PS */ - status = rf_ComputePSDiskOffsets(raidPtr, ctrl->curPSID, row, col, &ctrl->diskOffset, &rbuf->failedDiskSectorOffset, - &rbuf->spRow, &rbuf->spCol, &rbuf->spOffset); + * disk. Skip this entire parity stripe if either disk + * does not appear in the indicated PS. + */ + status = rf_ComputePSDiskOffsets(raidPtr, + ctrl->curPSID, row, col, &ctrl->diskOffset, + &rbuf->failedDiskSectorOffset, &rbuf->spRow, + &rbuf->spCol, &rbuf->spOffset); if (status) { ctrl->ru_count = RUsPerPU - 1; continue; @@ -964,23 +1144,28 @@ rf_IssueNextReadRequest(raidPtr, row, col) } rbuf->which_ru = ctrl->ru_count; - /* skip this RU if it's already been reconstructed */ - if (rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, rbuf->failedDiskSectorOffset)) { - Dprintf2("Skipping psid %ld ru %d: already reconstructed\n", ctrl->curPSID, ctrl->ru_count); + /* Skip this RU if it's already been reconstructed. */ + if (rf_CheckRUReconstructed(raidPtr->reconControl[row] + ->reconMap, rbuf->failedDiskSectorOffset)) { + Dprintf2("Skipping psid %ld ru %d: already" + " reconstructed.\n", ctrl->curPSID, ctrl->ru_count); continue; } break; } ctrl->headSepCounter++; - if (do_new_check) - rf_CheckForNewMinHeadSep(raidPtr, row, ctrl->headSepCounter); /* update min if needed */ + if (do_new_check) /* Update min if needed. */ + rf_CheckForNewMinHeadSep(raidPtr, row, ctrl->headSepCounter); - /* at this point, we have definitely decided what to do, and we have - * only to see if we can actually do it now */ + /* + * At this point, we have definitely decided what to do, and we have + * only to see if we can actually do it now. + */ rbuf->parityStripeID = ctrl->curPSID; rbuf->which_ru = ctrl->ru_count; - bzero((char *) &raidPtr->recon_tracerecs[col], sizeof(raidPtr->recon_tracerecs[col])); + bzero((char *) &raidPtr->recon_tracerecs[col], + sizeof(raidPtr->recon_tracerecs[col])); raidPtr->recon_tracerecs[col].reconacc = 1; RF_ETIMER_START(raidPtr->recon_tracerecs[col].recon_timer); retcode = rf_TryToRead(raidPtr, row, col); @@ -988,76 +1173,94 @@ rf_IssueNextReadRequest(raidPtr, row, col) } /* - * tries to issue the next read on the indicated disk. We may be + * Tries to issue the next read on the indicated disk. We may be * blocked by (a) the heads being too far apart, or (b) recon on the - * indicated RU being blocked due to a write by a user thread. In + * indicated RU being blocked due to a write by a user thread. In * this case, we issue a head-sep or blockage wait request, which will * cause this same routine to be invoked again later when the blockage - * has cleared. + * has cleared. */ -int -rf_TryToRead(raidPtr, row, col) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_RowCol_t col; +int +rf_TryToRead(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col) { - RF_PerDiskReconCtrl_t *ctrl = &raidPtr->reconControl[row]->perDiskInfo[col]; - RF_SectorCount_t sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU; + RF_PerDiskReconCtrl_t *ctrl = + &raidPtr->reconControl[row]->perDiskInfo[col]; + RF_SectorCount_t sectorsPerRU = + raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU; RF_StripeNum_t psid = ctrl->curPSID; RF_ReconUnitNum_t which_ru = ctrl->ru_count; RF_DiskQueueData_t *req; - int status, created = 0; + int status, created = 0; RF_ReconParityStripeStatus_t *pssPtr; - /* if the current disk is too far ahead of the others, issue a - * head-separation wait and return */ - if (rf_CheckHeadSeparation(raidPtr, ctrl, row, col, ctrl->headSepCounter, which_ru)) + /* + * If the current disk is too far ahead of the others, issue a + * head-separation wait and return. + */ + if (rf_CheckHeadSeparation(raidPtr, ctrl, row, col, + ctrl->headSepCounter, which_ru)) return (0); RF_LOCK_PSS_MUTEX(raidPtr, row, psid); - pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl[row]->pssTable, psid, which_ru, RF_PSS_CREATE, &created); + pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl[row] + ->pssTable, psid, which_ru, RF_PSS_CREATE, &created); - /* if recon is blocked on the indicated parity stripe, issue a - * block-wait request and return. this also must mark the indicated RU - * in the stripe as under reconstruction if not blocked. */ - status = rf_CheckForcedOrBlockedReconstruction(raidPtr, pssPtr, ctrl, row, col, psid, which_ru); + /* + * If recon is blocked on the indicated parity stripe, issue a + * block-wait request and return. This also must mark the indicated RU + * in the stripe as under reconstruction if not blocked. + */ + status = rf_CheckForcedOrBlockedReconstruction(raidPtr, pssPtr, ctrl, + row, col, psid, which_ru); if (status == RF_PSS_RECON_BLOCKED) { - Dprintf2("RECON: Stalling psid %ld ru %d: recon blocked\n", psid, which_ru); + Dprintf2("RECON: Stalling psid %ld ru %d: recon blocked.\n", + psid, which_ru); goto out; } else if (status == RF_PSS_FORCED_ON_WRITE) { - rf_CauseReconEvent(raidPtr, row, col, NULL, RF_REVENT_SKIP); + rf_CauseReconEvent(raidPtr, row, col, NULL, + RF_REVENT_SKIP); goto out; } - /* make one last check to be sure that the indicated RU didn't get + /* + * Make one last check to be sure that the indicated RU didn't get * reconstructed while we were waiting for something else to happen. * This is unfortunate in that it causes us to make this check twice - * in the normal case. Might want to make some attempt to re-work + * in the normal case. Might want to make some attempt to re-work * this so that we only do this check if we've definitely blocked on - * one of the above checks. When this condition is detected, we may - * have just created a bogus status entry, which we need to delete. */ - if (rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, ctrl->rbuf->failedDiskSectorOffset)) { - Dprintf2("RECON: Skipping psid %ld ru %d: prior recon after stall\n", psid, which_ru); + * one of the above checks. When this condition is detected, we may + * have just created a bogus status entry, which we need to delete. + */ + if (rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, + ctrl->rbuf->failedDiskSectorOffset)) { + Dprintf2("RECON: Skipping psid %ld ru %d: prior recon after" + " stall.\n", psid, which_ru); if (created) - rf_PSStatusDelete(raidPtr, raidPtr->reconControl[row]->pssTable, pssPtr); + rf_PSStatusDelete(raidPtr, + raidPtr->reconControl[row]->pssTable, pssPtr); rf_CauseReconEvent(raidPtr, row, col, NULL, RF_REVENT_SKIP); goto out; } - /* found something to read. issue the I/O */ - Dprintf5("RECON: Read for psid %ld on row %d col %d offset %ld buf %lx\n", - psid, row, col, ctrl->diskOffset, ctrl->rbuf->buffer); + /* Found something to read. Issue the I/O. */ + Dprintf5("RECON: Read for psid %ld on row %d col %d offset %ld" + " buf %lx.\n", psid, row, col, ctrl->diskOffset, + ctrl->rbuf->buffer); RF_ETIMER_STOP(raidPtr->recon_tracerecs[col].recon_timer); RF_ETIMER_EVAL(raidPtr->recon_tracerecs[col].recon_timer); raidPtr->recon_tracerecs[col].specific.recon.recon_start_to_fetch_us = RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[col].recon_timer); RF_ETIMER_START(raidPtr->recon_tracerecs[col].recon_timer); - /* should be ok to use a NULL proc pointer here, all the bufs we use - * should be in kernel space */ - req = rf_CreateDiskQueueData(RF_IO_TYPE_READ, ctrl->diskOffset, sectorsPerRU, ctrl->rbuf->buffer, psid, which_ru, - rf_ReconReadDoneProc, (void *) ctrl, NULL, &raidPtr->recon_tracerecs[col], (void *) raidPtr, 0, NULL); + /* + * Should be ok to use a NULL proc pointer here, all the bufs we use + * should be in kernel space. + */ + req = rf_CreateDiskQueueData(RF_IO_TYPE_READ, ctrl->diskOffset, + sectorsPerRU, ctrl->rbuf->buffer, psid, which_ru, + rf_ReconReadDoneProc, (void *) ctrl, NULL, + &raidPtr->recon_tracerecs[col], (void *) raidPtr, 0, NULL); - RF_ASSERT(req); /* XXX -- fix this -- XXX */ + RF_ASSERT(req); /* XXX -- Fix this. -- XXX */ ctrl->rbuf->arg = (void *) req; rf_DiskIOEnqueue(&raidPtr->Queues[row][col], req, RF_IO_RECON_PRIORITY); @@ -1070,61 +1273,75 @@ out: /* - * given a parity stripe ID, we want to find out whether both the - * current disk and the failed disk exist in that parity stripe. If - * not, we want to skip this whole PS. If so, we want to find the + * Given a parity stripe ID, we want to find out whether both the + * current disk and the failed disk exist in that parity stripe. If + * not, we want to skip this whole PS. If so, we want to find the * disk offset of the start of the PS on both the current disk and the * failed disk. * - * this works by getting a list of disks comprising the indicated + * This works by getting a list of disks comprising the indicated * parity stripe, and searching the list for the current and failed - * disks. Once we've decided they both exist in the parity stripe, we + * disks. Once we've decided they both exist in the parity stripe, we * need to decide whether each is data or parity, so that we'll know * which mapping function to call to get the corresponding disk * offsets. * - * this is kind of unpleasant, but doing it this way allows the + * This is kind of unpleasant, but doing it this way allows the * reconstruction code to use parity stripe IDs rather than physical * disks address to march through the failed disk, which greatly * simplifies a lot of code, as well as eliminating the need for a - * reverse-mapping function. I also think it will execute faster, + * reverse-mapping function. I also think it will execute faster, * since the calls to the mapping module are kept to a minimum. * * ASSUMES THAT THE STRIPE IDENTIFIER IDENTIFIES THE DISKS COMPRISING - * THE STRIPE IN THE CORRECT ORDER */ + * THE STRIPE IN THE CORRECT ORDER. + */ -int +int rf_ComputePSDiskOffsets( - RF_Raid_t * raidPtr, /* raid descriptor */ - RF_StripeNum_t psid, /* parity stripe identifier */ - RF_RowCol_t row, /* row and column of disk to find the offsets - * for */ - RF_RowCol_t col, - RF_SectorNum_t * outDiskOffset, - RF_SectorNum_t * outFailedDiskSectorOffset, - RF_RowCol_t * spRow, /* OUT: row,col of spare unit for failed unit */ - RF_RowCol_t * spCol, - RF_SectorNum_t * spOffset) -{ /* OUT: offset into disk containing spare unit */ + RF_Raid_t *raidPtr, /* RAID descriptor. */ + RF_StripeNum_t psid, /* Parity stripe identifier. */ + RF_RowCol_t row, /* + * Row and column of disk to find + * the offsets for. + */ + RF_RowCol_t col, + RF_SectorNum_t *outDiskOffset, + RF_SectorNum_t *outFailedDiskSectorOffset, + RF_RowCol_t *spRow, /* + * OUT: Row,col of spare unit for + * failed unit. + */ + RF_RowCol_t *spCol, + RF_SectorNum_t *spOffset /* + * OUT: Offset into disk containing + * spare unit. + */ +) +{ RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; RF_RowCol_t fcol = raidPtr->reconControl[row]->fcol; RF_RaidAddr_t sosRaidAddress; /* start-of-stripe */ RF_RowCol_t *diskids; - u_int i, j, k, i_offset, j_offset; + u_int i, j, k, i_offset, j_offset; RF_RowCol_t prow, pcol; - int testcol, testrow; + int testcol, testrow; RF_RowCol_t stripe; RF_SectorNum_t poffset; - char i_is_parity = 0, j_is_parity = 0; - RF_RowCol_t stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; + char i_is_parity = 0, j_is_parity = 0; + RF_RowCol_t stripeWidth = + layoutPtr->numDataCol + layoutPtr->numParityCol; - /* get a listing of the disks comprising that stripe */ + /* Get a listing of the disks comprising that stripe. */ sosRaidAddress = rf_ParityStripeIDToRaidAddress(layoutPtr, psid); - (layoutPtr->map->IdentifyStripe) (raidPtr, sosRaidAddress, &diskids, &stripe); + (layoutPtr->map->IdentifyStripe) (raidPtr, sosRaidAddress, &diskids, + &stripe); RF_ASSERT(diskids); - /* reject this entire parity stripe if it does not contain the - * indicated disk or it does not contain the failed disk */ + /* + * Reject this entire parity stripe if it does not contain the + * indicated disk or it does not contain the failed disk. + */ if (row != stripe) goto skipit; for (i = 0; i < stripeWidth; i++) { @@ -1140,12 +1357,15 @@ rf_ComputePSDiskOffsets( if (j == stripeWidth) { goto skipit; } - /* find out which disk the parity is on */ - (layoutPtr->map->MapParity) (raidPtr, sosRaidAddress, &prow, &pcol, &poffset, RF_DONT_REMAP); - - /* find out if either the current RU or the failed RU is parity */ - /* also, if the parity occurs in this stripe prior to the data and/or - * failed col, we need to decrement i and/or j */ + /* Find out which disk the parity is on. */ + (layoutPtr->map->MapParity) (raidPtr, sosRaidAddress, &prow, &pcol, + &poffset, RF_DONT_REMAP); + + /* Find out if either the current RU or the failed RU is parity. */ + /* + * Also, if the parity occurs in this stripe prior to the data and/or + * failed col, we need to decrement i and/or j. + */ for (k = 0; k < stripeWidth; k++) if (diskids[k] == pcol) break; @@ -1158,8 +1378,10 @@ rf_ComputePSDiskOffsets( if (k == i) { i_is_parity = 1; i_offset = 0; - } /* set offsets to zero to disable multiply - * below */ + } /* + * Set offsets to zero to disable multiply + * below. + */ if (k < j) j_offset--; else @@ -1167,31 +1389,47 @@ rf_ComputePSDiskOffsets( j_is_parity = 1; j_offset = 0; } - /* at this point, [ij]_is_parity tells us whether the [current,failed] + /* + * At this point, [ij]_is_parity tells us whether the [current,failed] * disk is parity at the start of this RU, and, if data, "[ij]_offset" - * tells us how far into the stripe the [current,failed] disk is. */ + * tells us how far into the stripe the [current,failed] disk is. + */ - /* call the mapping routine to get the offset into the current disk, - * repeat for failed disk. */ + /* + * Call the mapping routine to get the offset into the current disk, + * repeat for failed disk. + */ if (i_is_parity) - layoutPtr->map->MapParity(raidPtr, sosRaidAddress + i_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outDiskOffset, RF_DONT_REMAP); + layoutPtr->map->MapParity(raidPtr, sosRaidAddress + i_offset * + layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, + outDiskOffset, RF_DONT_REMAP); else - layoutPtr->map->MapSector(raidPtr, sosRaidAddress + i_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outDiskOffset, RF_DONT_REMAP); + layoutPtr->map->MapSector(raidPtr, sosRaidAddress + i_offset * + layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, + outDiskOffset, RF_DONT_REMAP); RF_ASSERT(row == testrow && col == testcol); if (j_is_parity) - layoutPtr->map->MapParity(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outFailedDiskSectorOffset, RF_DONT_REMAP); + layoutPtr->map->MapParity(raidPtr, sosRaidAddress + j_offset * + layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, + outFailedDiskSectorOffset, RF_DONT_REMAP); else - layoutPtr->map->MapSector(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outFailedDiskSectorOffset, RF_DONT_REMAP); + layoutPtr->map->MapSector(raidPtr, sosRaidAddress + j_offset * + layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, + outFailedDiskSectorOffset, RF_DONT_REMAP); RF_ASSERT(row == testrow && fcol == testcol); - /* now locate the spare unit for the failed unit */ + /* Now locate the spare unit for the failed unit. */ if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) { if (j_is_parity) - layoutPtr->map->MapParity(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, spRow, spCol, spOffset, RF_REMAP); + layoutPtr->map->MapParity(raidPtr, sosRaidAddress + + j_offset * layoutPtr->sectorsPerStripeUnit, spRow, + spCol, spOffset, RF_REMAP); else - layoutPtr->map->MapSector(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, spRow, spCol, spOffset, RF_REMAP); + layoutPtr->map->MapSector(raidPtr, sosRaidAddress + + j_offset * layoutPtr->sectorsPerStripeUnit, spRow, + spCol, spOffset, RF_REMAP); } else { *spRow = raidPtr->reconControl[row]->spareRow; *spCol = raidPtr->reconControl[row]->spareCol; @@ -1201,125 +1439,135 @@ rf_ComputePSDiskOffsets( return (0); skipit: - Dprintf3("RECON: Skipping psid %ld: nothing needed from r%d c%d\n", + Dprintf3("RECON: Skipping psid %ld: nothing needed from r%d c%d.\n", psid, row, col); return (1); } -/* this is called when a buffer has become ready to write to the replacement disk */ -int -rf_IssueNextWriteRequest(raidPtr, row) - RF_Raid_t *raidPtr; - RF_RowCol_t row; + + +/* + * This is called when a buffer has become ready to write to the replacement + * disk. + */ +int +rf_IssueNextWriteRequest(RF_Raid_t *raidPtr, RF_RowCol_t row) { RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_SectorCount_t sectorsPerRU = layoutPtr->sectorsPerStripeUnit * layoutPtr->SUsPerRU; + RF_SectorCount_t sectorsPerRU = + layoutPtr->sectorsPerStripeUnit * layoutPtr->SUsPerRU; RF_RowCol_t fcol = raidPtr->reconControl[row]->fcol; RF_ReconBuffer_t *rbuf; RF_DiskQueueData_t *req; rbuf = rf_GetFullReconBuffer(raidPtr->reconControl[row]); - RF_ASSERT(rbuf); /* there must be one available, or we wouldn't - * have gotten the event that sent us here */ + RF_ASSERT(rbuf); /* + * There must be one available, or we wouldn't + * have gotten the event that sent us here. + */ RF_ASSERT(rbuf->pssPtr); rbuf->pssPtr->writeRbuf = rbuf; rbuf->pssPtr = NULL; - Dprintf7("RECON: New write (r %d c %d offs %d) for psid %ld ru %d (failed disk offset %ld) buf %lx\n", + Dprintf7("RECON: New write (r %d c %d offs %d) for psid %ld ru %d" + " (failed disk offset %ld) buf %lx.\n", rbuf->spRow, rbuf->spCol, rbuf->spOffset, rbuf->parityStripeID, rbuf->which_ru, rbuf->failedDiskSectorOffset, rbuf->buffer); - Dprintf6("RECON: new write psid %ld %02x %02x %02x %02x %02x\n", - rbuf->parityStripeID, rbuf->buffer[0] & 0xff, rbuf->buffer[1] & 0xff, - rbuf->buffer[2] & 0xff, rbuf->buffer[3] & 0xff, rbuf->buffer[4] & 0xff); - - /* should be ok to use a NULL b_proc here b/c all addrs should be in - * kernel space */ + Dprintf6("RECON: new write psid %ld %02x %02x %02x %02x %02x.\n", + rbuf->parityStripeID, rbuf->buffer[0] & 0xff, + rbuf->buffer[1] & 0xff, rbuf->buffer[2] & 0xff, + rbuf->buffer[3] & 0xff, rbuf->buffer[4] & 0xff); + + /* + * Should be ok to use a NULL b_proc here b/c all addrs should be in + * kernel space. + */ req = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, rbuf->spOffset, - sectorsPerRU, rbuf->buffer, - rbuf->parityStripeID, rbuf->which_ru, + sectorsPerRU, rbuf->buffer, rbuf->parityStripeID, rbuf->which_ru, rf_ReconWriteDoneProc, (void *) rbuf, NULL, - &raidPtr->recon_tracerecs[fcol], - (void *) raidPtr, 0, NULL); + &raidPtr->recon_tracerecs[fcol], (void *) raidPtr, 0, NULL); - RF_ASSERT(req); /* XXX -- fix this -- XXX */ + RF_ASSERT(req); /* XXX -- Fix this. -- XXX */ rbuf->arg = (void *) req; - rf_DiskIOEnqueue(&raidPtr->Queues[rbuf->spRow][rbuf->spCol], req, RF_IO_RECON_PRIORITY); + rf_DiskIOEnqueue(&raidPtr->Queues[rbuf->spRow][rbuf->spCol], req, + RF_IO_RECON_PRIORITY); return (0); } /* - * this gets called upon the completion of a reconstruction read - * operation the arg is a pointer to the per-disk reconstruction + * This gets called upon the completion of a reconstruction read + * operation. The arg is a pointer to the per-disk reconstruction * control structure for the process that just finished a read. * - * called at interrupt context in the kernel, so don't do anything - * illegal here. + * Called at interrupt context in the kernel, so don't do anything + * illegal here. */ -int -rf_ReconReadDoneProc(arg, status) - void *arg; - int status; +int +rf_ReconReadDoneProc(void *arg, int status) { RF_PerDiskReconCtrl_t *ctrl = (RF_PerDiskReconCtrl_t *) arg; RF_Raid_t *raidPtr = ctrl->reconCtrl->reconDesc->raidPtr; if (status) { /* - * XXX - */ - printf("Recon read failed!\n"); + * XXX + */ + printf("Recon read failed !\n"); RF_PANIC(); } RF_ETIMER_STOP(raidPtr->recon_tracerecs[ctrl->col].recon_timer); RF_ETIMER_EVAL(raidPtr->recon_tracerecs[ctrl->col].recon_timer); - raidPtr->recon_tracerecs[ctrl->col].specific.recon.recon_fetch_to_return_us = - RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[ctrl->col].recon_timer); + raidPtr->recon_tracerecs[ctrl->col].specific.recon. + recon_fetch_to_return_us = + RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[ctrl->col].recon_timer); RF_ETIMER_START(raidPtr->recon_tracerecs[ctrl->col].recon_timer); - rf_CauseReconEvent(raidPtr, ctrl->row, ctrl->col, NULL, RF_REVENT_READDONE); + rf_CauseReconEvent(raidPtr, ctrl->row, ctrl->col, NULL, + RF_REVENT_READDONE); return (0); } -/* this gets called upon the completion of a reconstruction write operation. - * the arg is a pointer to the rbuf that was just written + + +/* + * This gets called upon the completion of a reconstruction write operation. + * The arg is a pointer to the rbuf that was just written. * - * called at interrupt context in the kernel, so don't do anything illegal here. + * Called at interrupt context in the kernel, so don't do anything illegal here. */ -int -rf_ReconWriteDoneProc(arg, status) - void *arg; - int status; +int +rf_ReconWriteDoneProc(void *arg, int status) { RF_ReconBuffer_t *rbuf = (RF_ReconBuffer_t *) arg; - Dprintf2("Reconstruction completed on psid %ld ru %d\n", rbuf->parityStripeID, rbuf->which_ru); + Dprintf2("Reconstruction completed on psid %ld ru %d.\n", + rbuf->parityStripeID, rbuf->which_ru); if (status) { - printf("Recon write failed!\n"); /* fprintf(stderr,"Recon - * write failed!\n"); */ + /* fprintf(stderr, "Recon write failed !\n"); */ + printf("Recon write failed !\n"); RF_PANIC(); } - rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, rbuf->row, rbuf->col, arg, RF_REVENT_WRITEDONE); + rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, rbuf->row, rbuf->col, + arg, RF_REVENT_WRITEDONE); return (0); } -/* - * computes a new minimum head sep, and wakes up anyone who needs to - * be woken as a result +/* + * Computes a new minimum head sep, and wakes up anyone who needs to + * be woken as a result. */ -void -rf_CheckForNewMinHeadSep(raidPtr, row, hsCtr) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_HeadSepLimit_t hsCtr; +void +rf_CheckForNewMinHeadSep(RF_Raid_t *raidPtr, RF_RowCol_t row, + RF_HeadSepLimit_t hsCtr) { RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[row]; RF_HeadSepLimit_t new_min; RF_RowCol_t i; RF_CallbackDesc_t *p; - RF_ASSERT(hsCtr >= reconCtrlPtr->minHeadSepCounter); /* from the definition - * of a minimum */ + /* From the definition of a minimum. */ + RF_ASSERT(hsCtr >= reconCtrlPtr->minHeadSepCounter); RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); @@ -1327,20 +1575,25 @@ rf_CheckForNewMinHeadSep(raidPtr, row, hsCtr) new_min = ~(1L << (8 * sizeof(long) - 1)); /* 0x7FFF....FFF */ for (i = 0; i < raidPtr->numCol; i++) if (i != reconCtrlPtr->fcol) { - if (reconCtrlPtr->perDiskInfo[i].headSepCounter < new_min) - new_min = reconCtrlPtr->perDiskInfo[i].headSepCounter; + if (reconCtrlPtr->perDiskInfo[i].headSepCounter < + new_min) + new_min = + reconCtrlPtr->perDiskInfo[i].headSepCounter; } - /* set the new minimum and wake up anyone who can now run again */ + /* Set the new minimum and wake up anyone who can now run again. */ if (new_min != reconCtrlPtr->minHeadSepCounter) { reconCtrlPtr->minHeadSepCounter = new_min; - Dprintf1("RECON: new min head pos counter val is %ld\n", new_min); + Dprintf1("RECON: new min head pos counter val is %ld.\n", + new_min); while (reconCtrlPtr->headSepCBList) { - if (reconCtrlPtr->headSepCBList->callbackArg.v > new_min) + if (reconCtrlPtr->headSepCBList->callbackArg.v > + new_min) break; p = reconCtrlPtr->headSepCBList; reconCtrlPtr->headSepCBList = p->next; p->next = NULL; - rf_CauseReconEvent(raidPtr, p->row, p->col, NULL, RF_REVENT_HEADSEPCLEAR); + rf_CauseReconEvent(raidPtr, p->row, p->col, NULL, + RF_REVENT_HEADSEPCLEAR); rf_FreeCallbackDesc(p); } @@ -1349,7 +1602,7 @@ rf_CheckForNewMinHeadSep(raidPtr, row, hsCtr) } /* - * checks to see that the maximum head separation will not be violated + * Checks to see that the maximum head separation will not be violated * if we initiate a reconstruction I/O on the indicated disk. * Limiting the maximum head separation between two disks eliminates * the nasty buffer-stall conditions that occur when one disk races @@ -1357,47 +1610,54 @@ rf_CheckForNewMinHeadSep(raidPtr, row, hsCtr) * This code is complex and unpleasant but it's necessary to avoid * some very nasty, albeit fairly rare, reconstruction behavior. * - * returns non-zero if and only if we have to stop working on the - * indicated disk due to a head-separation delay. + * Returns non-zero if and only if we have to stop working on the + * indicated disk due to a head-separation delay. */ -int +int rf_CheckHeadSeparation( - RF_Raid_t * raidPtr, - RF_PerDiskReconCtrl_t * ctrl, - RF_RowCol_t row, - RF_RowCol_t col, - RF_HeadSepLimit_t hsCtr, - RF_ReconUnitNum_t which_ru) + RF_Raid_t *raidPtr, + RF_PerDiskReconCtrl_t *ctrl, + RF_RowCol_t row, + RF_RowCol_t col, + RF_HeadSepLimit_t hsCtr, + RF_ReconUnitNum_t which_ru +) { RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[row]; RF_CallbackDesc_t *cb, *p, *pt; - int retval = 0; + int retval = 0; - /* if we're too far ahead of the slowest disk, stop working on this - * disk until the slower ones catch up. We do this by scheduling a + /* + * If we're too far ahead of the slowest disk, stop working on this + * disk until the slower ones catch up. We do this by scheduling a * wakeup callback for the time when the slowest disk has caught up. * We define "caught up" with 20% hysteresis, i.e. the head separation * must have fallen to at most 80% of the max allowable head * separation before we'll wake up. - * */ RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); if ((raidPtr->headSepLimit >= 0) && - ((ctrl->headSepCounter - reconCtrlPtr->minHeadSepCounter) > raidPtr->headSepLimit)) { - Dprintf6("raid%d: RECON: head sep stall: row %d col %d hsCtr %ld minHSCtr %ld limit %ld\n", - raidPtr->raidid, row, col, ctrl->headSepCounter, - reconCtrlPtr->minHeadSepCounter, - raidPtr->headSepLimit); + ((ctrl->headSepCounter - reconCtrlPtr->minHeadSepCounter) > + raidPtr->headSepLimit)) { + Dprintf6("raid%d: RECON: head sep stall: row %d col %d hsCtr" + " %ld minHSCtr %ld limit %ld.\n", + raidPtr->raidid, row, col, ctrl->headSepCounter, + reconCtrlPtr->minHeadSepCounter, raidPtr->headSepLimit); cb = rf_AllocCallbackDesc(); - /* the minHeadSepCounter value we have to get to before we'll - * wake up. build in 20% hysteresis. */ - cb->callbackArg.v = (ctrl->headSepCounter - raidPtr->headSepLimit + raidPtr->headSepLimit / 5); + /* + * The minHeadSepCounter value we have to get to before we'll + * wake up. Build in 20% hysteresis. + */ + cb->callbackArg.v = (ctrl->headSepCounter - + raidPtr->headSepLimit + raidPtr->headSepLimit / 5); cb->row = row; cb->col = col; cb->next = NULL; - /* insert this callback descriptor into the sorted list of - * pending head-sep callbacks */ + /* + * Insert this callback descriptor into the sorted list of + * pending head-sep callbacks. + */ p = reconCtrlPtr->headSepCBList; if (!p) reconCtrlPtr->headSepCBList = cb; @@ -1406,47 +1666,57 @@ rf_CheckHeadSeparation( cb->next = reconCtrlPtr->headSepCBList; reconCtrlPtr->headSepCBList = cb; } else { - for (pt = p, p = p->next; p && (p->callbackArg.v < cb->callbackArg.v); pt = p, p = p->next); + for (pt = p, p = p->next; + p && (p->callbackArg.v < cb->callbackArg.v); + pt = p, p = p->next); cb->next = p; pt->next = cb; } retval = 1; -#if RF_RECON_STATS > 0 +#if RF_RECON_STATS > 0 ctrl->reconCtrl->reconDesc->hsStallCount++; -#endif /* RF_RECON_STATS > 0 */ +#endif /* RF_RECON_STATS > 0 */ } RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); return (retval); } -/* - * checks to see if reconstruction has been either forced or blocked - * by a user operation. if forced, we skip this RU entirely. else if - * blocked, put ourselves on the wait list. else return 0. + + + +/* + * Checks to see if reconstruction has been either forced or blocked + * by a user operation. If forced, we skip this RU entirely. Else if + * blocked, put ourselves on the wait list. Else return 0. * - * ASSUMES THE PSS MUTEX IS LOCKED UPON ENTRY + * ASSUMES THE PSS MUTEX IS LOCKED UPON ENTRY. */ -int +int rf_CheckForcedOrBlockedReconstruction( - RF_Raid_t * raidPtr, - RF_ReconParityStripeStatus_t * pssPtr, - RF_PerDiskReconCtrl_t * ctrl, - RF_RowCol_t row, - RF_RowCol_t col, - RF_StripeNum_t psid, - RF_ReconUnitNum_t which_ru) + RF_Raid_t *raidPtr, + RF_ReconParityStripeStatus_t *pssPtr, + RF_PerDiskReconCtrl_t *ctrl, + RF_RowCol_t row, + RF_RowCol_t col, + RF_StripeNum_t psid, + RF_ReconUnitNum_t which_ru +) { RF_CallbackDesc_t *cb; - int retcode = 0; + int retcode = 0; - if ((pssPtr->flags & RF_PSS_FORCED_ON_READ) || (pssPtr->flags & RF_PSS_FORCED_ON_WRITE)) + if ((pssPtr->flags & RF_PSS_FORCED_ON_READ) || + (pssPtr->flags & RF_PSS_FORCED_ON_WRITE)) retcode = RF_PSS_FORCED_ON_WRITE; else if (pssPtr->flags & RF_PSS_RECON_BLOCKED) { - Dprintf4("RECON: row %d col %d blocked at psid %ld ru %d\n", row, col, psid, which_ru); - cb = rf_AllocCallbackDesc(); /* append ourselves to + Dprintf4("RECON: row %d col %d blocked at psid %ld" + " ru %d.\n", row, col, psid, which_ru); + cb = rf_AllocCallbackDesc(); /* + * Append ourselves to * the blockage-wait - * list */ + * list. + */ cb->row = row; cb->col = col; cb->next = pssPtr->blockWaitList; @@ -1454,201 +1724,274 @@ rf_CheckForcedOrBlockedReconstruction( retcode = RF_PSS_RECON_BLOCKED; } if (!retcode) - pssPtr->flags |= RF_PSS_UNDER_RECON; /* mark this RU as under - * reconstruction */ + pssPtr->flags |= RF_PSS_UNDER_RECON; /* + * Mark this RU as under + * reconstruction. + */ return (retcode); } + + /* - * if reconstruction is currently ongoing for the indicated stripeID, + * If reconstruction is currently ongoing for the indicated stripeID, * reconstruction is forced to completion and we return non-zero to - * indicate that the caller must wait. If not, then reconstruction is - * blocked on the indicated stripe and the routine returns zero. If + * indicate that the caller must wait. If not, then reconstruction is + * blocked on the indicated stripe and the routine returns zero. If * and only if we return non-zero, we'll cause the cbFunc to get - * invoked with the cbArg when the reconstruction has completed. + * invoked with the cbArg when the reconstruction has completed. */ -int -rf_ForceOrBlockRecon(raidPtr, asmap, cbFunc, cbArg) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *asmap; - void (*cbFunc) (RF_Raid_t *, void *); - void *cbArg; +int +rf_ForceOrBlockRecon(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap, + void (*cbFunc) (RF_Raid_t *, void *), void *cbArg) { - RF_RowCol_t row = asmap->physInfo->row; /* which row of the array - * we're working on */ - RF_StripeNum_t stripeID = asmap->stripeID; /* the stripe ID we're - * forcing recon on */ - RF_SectorCount_t sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU; /* num sects in one RU */ - RF_ReconParityStripeStatus_t *pssPtr; /* a pointer to the parity - * stripe status structure */ - RF_StripeNum_t psid; /* parity stripe id */ - RF_SectorNum_t offset, fd_offset; /* disk offset, failed-disk - * offset */ + RF_RowCol_t row = asmap->physInfo->row; /* + * Which row of the array + * we're working on. + */ + RF_StripeNum_t stripeID = asmap->stripeID; /* + * The stripe ID we're + * forcing recon on. + */ + RF_SectorCount_t sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit * + raidPtr->Layout.SUsPerRU; /* Num sects in one RU. */ + RF_ReconParityStripeStatus_t *pssPtr; /* + * A pointer to the parity + * stripe status structure. + */ + RF_StripeNum_t psid; /* Parity stripe id. */ + RF_SectorNum_t offset, fd_offset; /* + * Disk offset, failed-disk + * offset. + */ RF_RowCol_t *diskids; RF_RowCol_t stripe; - RF_ReconUnitNum_t which_ru; /* RU within parity stripe */ + RF_ReconUnitNum_t which_ru; /* RU within parity stripe. */ RF_RowCol_t fcol, diskno, i; - RF_ReconBuffer_t *new_rbuf; /* ptr to newly allocated rbufs */ - RF_DiskQueueData_t *req;/* disk I/O req to be enqueued */ + RF_ReconBuffer_t *new_rbuf; /* Ptr to newly allocated rbufs. */ + RF_DiskQueueData_t *req; /* Disk I/O req to be enqueued. */ RF_CallbackDesc_t *cb; - int created = 0, nPromoted; + int created = 0, nPromoted; - psid = rf_MapStripeIDToParityStripeID(&raidPtr->Layout, stripeID, &which_ru); + psid = rf_MapStripeIDToParityStripeID(&raidPtr->Layout, stripeID, + &which_ru); RF_LOCK_PSS_MUTEX(raidPtr, row, psid); - pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl[row]->pssTable, psid, which_ru, RF_PSS_CREATE | RF_PSS_RECON_BLOCKED, &created); + pssPtr = rf_LookupRUStatus(raidPtr, + raidPtr->reconControl[row]->pssTable, psid, which_ru, + RF_PSS_CREATE | RF_PSS_RECON_BLOCKED, &created); - /* if recon is not ongoing on this PS, just return */ + /* If recon is not ongoing on this PS, just return. */ if (!(pssPtr->flags & RF_PSS_UNDER_RECON)) { RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid); return (0); } - /* otherwise, we have to wait for reconstruction to complete on this - * RU. */ - /* In order to avoid waiting for a potentially large number of + /* + * Otherwise, we have to wait for reconstruction to complete on this + * RU. + */ + /* + * In order to avoid waiting for a potentially large number of * low-priority accesses to complete, we force a normal-priority (i.e. - * not low-priority) reconstruction on this RU. */ - if (!(pssPtr->flags & RF_PSS_FORCED_ON_WRITE) && !(pssPtr->flags & RF_PSS_FORCED_ON_READ)) { - DDprintf1("Forcing recon on psid %ld\n", psid); - pssPtr->flags |= RF_PSS_FORCED_ON_WRITE; /* mark this RU as under - * forced recon */ - pssPtr->flags &= ~RF_PSS_RECON_BLOCKED; /* clear the blockage - * that we just set */ + * not low-priority) reconstruction on this RU. + */ + if (!(pssPtr->flags & RF_PSS_FORCED_ON_WRITE) && + !(pssPtr->flags & RF_PSS_FORCED_ON_READ)) { + DDprintf1("Forcing recon on psid %ld.\n", psid); + /* Mark this RU as under forced recon. */ + pssPtr->flags |= RF_PSS_FORCED_ON_WRITE; + /* Clear the blockage that we just set. */ + pssPtr->flags &= ~RF_PSS_RECON_BLOCKED; fcol = raidPtr->reconControl[row]->fcol; - /* get a listing of the disks comprising the indicated stripe */ - (raidPtr->Layout.map->IdentifyStripe) (raidPtr, asmap->raidAddress, &diskids, &stripe); + /* + * Get a listing of the disks comprising the indicated stripe. + */ + (raidPtr->Layout.map->IdentifyStripe) (raidPtr, + asmap->raidAddress, &diskids, &stripe); RF_ASSERT(row == stripe); - /* For previously issued reads, elevate them to normal - * priority. If the I/O has already completed, it won't be + /* + * For previously issued reads, elevate them to normal + * priority. If the I/O has already completed, it won't be * found in the queue, and hence this will be a no-op. For - * unissued reads, allocate buffers and issue new reads. The + * unissued reads, allocate buffers and issue new reads. The * fact that we've set the FORCED bit means that the regular - * recon procs will not re-issue these reqs */ - for (i = 0; i < raidPtr->Layout.numDataCol + raidPtr->Layout.numParityCol; i++) + * recon procs will not re-issue these reqs. + */ + for (i = 0; i < raidPtr->Layout.numDataCol + + raidPtr->Layout.numParityCol; i++) if ((diskno = diskids[i]) != fcol) { if (pssPtr->issued[diskno]) { - nPromoted = rf_DiskIOPromote(&raidPtr->Queues[row][diskno], psid, which_ru); + nPromoted = rf_DiskIOPromote(&raidPtr + ->Queues[row][diskno], psid, + which_ru); if (rf_reconDebug && nPromoted) - printf("raid%d: promoted read from row %d col %d\n", raidPtr->raidid, row, diskno); + printf("raid%d: promoted read" + " from row %d col %d.\n", + raidPtr->raidid, row, + diskno); } else { - new_rbuf = rf_MakeReconBuffer(raidPtr, row, diskno, RF_RBUF_TYPE_FORCED); /* create new buf */ - rf_ComputePSDiskOffsets(raidPtr, psid, row, diskno, &offset, &fd_offset, - &new_rbuf->spRow, &new_rbuf->spCol, &new_rbuf->spOffset); /* find offsets & spare - * location */ - new_rbuf->parityStripeID = psid; /* fill in the buffer */ + /* Create new buf. */ + new_rbuf = rf_MakeReconBuffer(raidPtr, + row, diskno, RF_RBUF_TYPE_FORCED); + /* Find offsets & spare locationp */ + rf_ComputePSDiskOffsets(raidPtr, psid, + row, diskno, &offset, &fd_offset, + &new_rbuf->spRow, &new_rbuf->spCol, + &new_rbuf->spOffset); + new_rbuf->parityStripeID = psid; + /* Fill in the buffer. */ new_rbuf->which_ru = which_ru; - new_rbuf->failedDiskSectorOffset = fd_offset; - new_rbuf->priority = RF_IO_NORMAL_PRIORITY; - - /* use NULL b_proc b/c all addrs - * should be in kernel space */ - req = rf_CreateDiskQueueData(RF_IO_TYPE_READ, offset + which_ru * sectorsPerRU, sectorsPerRU, new_rbuf->buffer, - psid, which_ru, (int (*) (void *, int)) rf_ForceReconReadDoneProc, (void *) new_rbuf, NULL, + new_rbuf->failedDiskSectorOffset = + fd_offset; + new_rbuf->priority = + RF_IO_NORMAL_PRIORITY; + + /* + * Use NULL b_proc b/c all addrs + * should be in kernel space. + */ + req = rf_CreateDiskQueueData( + RF_IO_TYPE_READ, offset + + which_ru * sectorsPerRU, + sectorsPerRU, new_rbuf->buffer, + psid, which_ru, (int (*) + (void *, int)) + rf_ForceReconReadDoneProc, + (void *) new_rbuf, NULL, NULL, (void *) raidPtr, 0, NULL); - RF_ASSERT(req); /* XXX -- fix this -- - * XXX */ + RF_ASSERT(req); /* + * XXX -- Fix this. -- + * XXX + */ new_rbuf->arg = req; - rf_DiskIOEnqueue(&raidPtr->Queues[row][diskno], req, RF_IO_NORMAL_PRIORITY); /* enqueue the I/O */ - Dprintf3("raid%d: Issued new read req on row %d col %d\n", raidPtr->raidid, row, diskno); + /* Enqueue the I/O. */ + rf_DiskIOEnqueue(&raidPtr + ->Queues[row][diskno], req, + RF_IO_NORMAL_PRIORITY); + Dprintf3("raid%d: Issued new read req" + " on row %d col %d.\n", + raidPtr->raidid, row, diskno); } } - /* if the write is sitting in the disk queue, elevate its - * priority */ - if (rf_DiskIOPromote(&raidPtr->Queues[row][fcol], psid, which_ru)) - printf("raid%d: promoted write to row %d col %d\n", - raidPtr->raidid, row, fcol); + /* + * If the write is sitting in the disk queue, elevate its + * priority. + */ + if (rf_DiskIOPromote(&raidPtr->Queues[row][fcol], + psid, which_ru)) + printf("raid%d: promoted write to row %d col %d.\n", + raidPtr->raidid, row, fcol); } - /* install a callback descriptor to be invoked when recon completes on - * this parity stripe. */ + /* + * Install a callback descriptor to be invoked when recon completes on + * this parity stripe. + */ cb = rf_AllocCallbackDesc(); - /* XXX the following is bogus.. These functions don't really match!! - * GO */ + /* + * XXX The following is bogus... These functions don't really match !!! + * GO + */ cb->callbackFunc = (void (*) (RF_CBParam_t)) cbFunc; cb->callbackArg.p = (void *) cbArg; cb->next = pssPtr->procWaitList; pssPtr->procWaitList = cb; - DDprintf2("raid%d: Waiting for forced recon on psid %ld\n", - raidPtr->raidid, psid); + DDprintf2("raid%d: Waiting for forced recon on psid %ld.\n", + raidPtr->raidid, psid); RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid); return (1); } -/* called upon the completion of a forced reconstruction read. - * all we do is schedule the FORCEDREADONE event. - * called at interrupt context in the kernel, so don't do anything illegal here. + + +/* + * Called upon the completion of a forced reconstruction read. + * All we do is schedule the FORCEDREADONE event. + * Called at interrupt context in the kernel, so don't do anything illegal here. */ -void -rf_ForceReconReadDoneProc(arg, status) - void *arg; - int status; +void +rf_ForceReconReadDoneProc(void *arg, int status) { RF_ReconBuffer_t *rbuf = arg; if (status) { - printf("Forced recon read failed!\n"); /* fprintf(stderr,"Forced - * recon read - * failed!\n"); */ + /* fprintf(stderr, "Forced recon read failed !\n"); */ + printf("Forced recon read failed !\n"); RF_PANIC(); } - rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, rbuf->row, rbuf->col, (void *) rbuf, RF_REVENT_FORCEDREADDONE); + rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, rbuf->row, rbuf->col, + (void *) rbuf, RF_REVENT_FORCEDREADDONE); } -/* releases a block on the reconstruction of the indicated stripe */ -int -rf_UnblockRecon(raidPtr, asmap) - RF_Raid_t *raidPtr; - RF_AccessStripeMap_t *asmap; + + +/* Releases a block on the reconstruction of the indicated stripe. */ +int +rf_UnblockRecon(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap) { RF_RowCol_t row = asmap->origRow; RF_StripeNum_t stripeID = asmap->stripeID; RF_ReconParityStripeStatus_t *pssPtr; RF_ReconUnitNum_t which_ru; RF_StripeNum_t psid; - int created = 0; + int created = 0; RF_CallbackDesc_t *cb; - psid = rf_MapStripeIDToParityStripeID(&raidPtr->Layout, stripeID, &which_ru); + psid = rf_MapStripeIDToParityStripeID(&raidPtr->Layout, stripeID, + &which_ru); RF_LOCK_PSS_MUTEX(raidPtr, row, psid); - pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl[row]->pssTable, psid, which_ru, RF_PSS_NONE, &created); + pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl[row] + ->pssTable, psid, which_ru, RF_PSS_NONE, &created); - /* When recon is forced, the pss desc can get deleted before we get + /* + * When recon is forced, the pss desc can get deleted before we get * back to unblock recon. But, this can _only_ happen when recon is * forced. It would be good to put some kind of sanity check here, but - * how to decide if recon was just forced or not? */ + * how to decide if recon was just forced or not ? + */ if (!pssPtr) { - /* printf("Warning: no pss descriptor upon unblock on psid %ld - * RU %d\n",psid,which_ru); */ + /* + * printf("Warning: no pss descriptor upon unblock on psid %ld" + * " RU %d.\n", psid, which_ru); + */ if (rf_reconDebug || rf_pssDebug) - printf("Warning: no pss descriptor upon unblock on psid %ld RU %d\n", (long) psid, which_ru); + printf("Warning: no pss descriptor upon unblock on" + " psid %ld RU %d.\n", (long) psid, which_ru); goto out; } pssPtr->blockCount--; - Dprintf3("raid%d: unblocking recon on psid %ld: blockcount is %d\n", - raidPtr->raidid, psid, pssPtr->blockCount); - if (pssPtr->blockCount == 0) { /* if recon blockage has been released */ + Dprintf3("raid%d: unblocking recon on psid %ld: blockcount is %d.\n", + raidPtr->raidid, psid, pssPtr->blockCount); + if (pssPtr->blockCount == 0) { + /* If recon blockage has been released. */ - /* unblock recon before calling CauseReconEvent in case + /* + * Unblock recon before calling CauseReconEvent in case * CauseReconEvent causes us to try to issue a new read before - * returning here. */ + * returning here. + */ pssPtr->flags &= ~RF_PSS_RECON_BLOCKED; - while (pssPtr->blockWaitList) { - /* spin through the block-wait list and - release all the waiters */ + while (pssPtr->blockWaitList) { + /* + * Spin through the block-wait list and + * release all the waiters. + */ cb = pssPtr->blockWaitList; pssPtr->blockWaitList = cb->next; cb->next = NULL; - rf_CauseReconEvent(raidPtr, cb->row, cb->col, NULL, RF_REVENT_BLOCKCLEAR); + rf_CauseReconEvent(raidPtr, cb->row, cb->col, NULL, + RF_REVENT_BLOCKCLEAR); rf_FreeCallbackDesc(cb); } if (!(pssPtr->flags & RF_PSS_UNDER_RECON)) { - /* if no recon was requested while recon was blocked */ - rf_PSStatusDelete(raidPtr, raidPtr->reconControl[row]->pssTable, pssPtr); + /* If no recon was requested while recon was blocked. */ + rf_PSStatusDelete(raidPtr, raidPtr->reconControl[row] + ->pssTable, pssPtr); } } out: diff --git a/sys/dev/raidframe/rf_reconstruct.h b/sys/dev/raidframe/rf_reconstruct.h index 8a0842ce867..42296b5db1d 100644 --- a/sys/dev/raidframe/rf_reconstruct.h +++ b/sys/dev/raidframe/rf_reconstruct.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_reconstruct.h,v 1.4 2000/08/08 16:07:45 peter Exp $ */ +/* $OpenBSD: rf_reconstruct.h,v 1.5 2002/12/16 07:01:05 tdeval Exp $ */ /* $NetBSD: rf_reconstruct.h,v 1.5 2000/05/28 00:48:30 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,9 +28,9 @@ * rights to redistribute these changes. */ -/********************************************************* - * rf_reconstruct.h -- header file for reconstruction code - *********************************************************/ +/*********************************************************** + * rf_reconstruct.h -- Header file for reconstruction code. + ***********************************************************/ #ifndef _RF__RF_RECONSTRUCT_H_ #define _RF__RF_RECONSTRUCT_H_ @@ -39,47 +40,93 @@ #include "rf_reconmap.h" #include "rf_psstatus.h" -/* reconstruction configuration information */ +/* Reconstruction configuration information. */ struct RF_ReconConfig_s { - unsigned numFloatingReconBufs; /* number of floating recon bufs to - * use */ - RF_HeadSepLimit_t headSepLimit; /* how far apart the heads are allow - * to become, in parity stripes */ + unsigned numFloatingReconBufs; + /* + * Number of floating recon + * bufs to use. + */ + RF_HeadSepLimit_t headSepLimit; /* + * How far apart the heads are + * allowed to become, in parity + * stripes. + */ }; -/* a reconstruction buffer */ + +/* A reconstruction buffer. */ struct RF_ReconBuffer_s { - RF_Raid_t *raidPtr; /* void * to avoid recursive includes */ - caddr_t buffer; /* points to the data */ - RF_StripeNum_t parityStripeID; /* the parity stripe that this data - * relates to */ - int which_ru; /* which reconstruction unit within the PSS */ - RF_SectorNum_t failedDiskSectorOffset; /* the offset into the failed - * disk */ - RF_RowCol_t row, col; /* which disk this buffer belongs to or is - * targeted at */ - RF_StripeCount_t count; /* counts the # of SUs installed so far */ - int priority; /* used to force hi priority recon */ - RF_RbufType_t type; /* FORCED or FLOATING */ - char *arrived; /* [x] = 1/0 if SU from disk x has/hasn't - * arrived */ - RF_ReconBuffer_t *next; /* used for buffer management */ - void *arg; /* generic field for general use */ - RF_RowCol_t spRow, spCol; /* spare disk to which this buf should - * be written */ - /* if dist sparing off, always identifies the replacement disk */ - RF_SectorNum_t spOffset;/* offset into the spare disk */ - /* if dist sparing off, identical to failedDiskSectorOffset */ - RF_ReconParityStripeStatus_t *pssPtr; /* debug- pss associated with - * issue-pending write */ + RF_Raid_t *raidPtr; /* + * (void *) to avoid recursive + * includes. + */ + caddr_t buffer; /* Points to the data. */ + RF_StripeNum_t parityStripeID;/* + * The parity stripe that this + * data relates to. + */ + int which_ru; /* + * Which reconstruction unit + * within the PSS. + */ + RF_SectorNum_t failedDiskSectorOffset; + /* + * The offset into the failed + * disk. + */ + RF_RowCol_t row, col; /* + * Which disk this buffer + * belongs to or is targeted at. + */ + RF_StripeCount_t count; /* + * Counts the # of SUs + * installed so far. + */ + int priority; /* + * Used to force high priority + * recon. + */ + RF_RbufType_t type; /* FORCED or FLOATING. */ + char *arrived; /* + * [x] = 1/0 if SU from disk x + * has/hasn't arrived. + */ + RF_ReconBuffer_t *next; /* + * Used for buffer management. + */ + void *arg; /* + * Generic field for general + * use. + */ + RF_RowCol_t spRow, spCol; /* + * Spare disk to which this buf + * should be written. + */ + /* If dist sparing off, always identifies the replacement disk */ + RF_SectorNum_t spOffset; /* + * Offset into the spare disk. + */ + /* If dist sparing off, identical to failedDiskSectorOffset */ + RF_ReconParityStripeStatus_t *pssPtr; /* + * Debug pss associated with + * issue-pending write. + */ }; -/* a reconstruction event descriptor. The event types currently are: - * RF_REVENT_READDONE -- a read operation has completed - * RF_REVENT_WRITEDONE -- a write operation has completed - * RF_REVENT_BUFREADY -- the buffer manager has produced a full buffer - * RF_REVENT_BLOCKCLEAR -- a reconstruction blockage has been cleared - * RF_REVENT_BUFCLEAR -- the buffer manager has released a process blocked on submission - * RF_REVENT_SKIP -- we need to skip the current RU and go on to the next one, typ. b/c we found recon forced - * RF_REVENT_FORCEDREADONE- a forced-reconstructoin read operation has completed + +/* + * A reconstruction event descriptor. The event types currently are: + * RF_REVENT_READDONE -- A read operation has completed. + * RF_REVENT_WRITEDONE -- A write operation has completed. + * RF_REVENT_BUFREADY -- The buffer manager has produced a + * full buffer. + * RF_REVENT_BLOCKCLEAR -- A reconstruction blockage has been cleared. + * RF_REVENT_BUFCLEAR -- The buffer manager has released a process + * blocked on submission. + * RF_REVENT_SKIP -- We need to skip the current RU and go on + * to the next one, typ. b/c we found recon + * forced. + * RF_REVENT_FORCEDREADONE -- A forced-reconstructoin read operation has + * completed. */ typedef enum RF_Revent_e { RF_REVENT_READDONE, @@ -90,113 +137,171 @@ typedef enum RF_Revent_e { RF_REVENT_HEADSEPCLEAR, RF_REVENT_SKIP, RF_REVENT_FORCEDREADDONE -} RF_Revent_t; +} RF_Revent_t; struct RF_ReconEvent_s { - RF_Revent_t type; /* what kind of event has occurred */ - RF_RowCol_t col; /* row ID is implicit in the queue in which - * the event is placed */ - void *arg; /* a generic argument */ - RF_ReconEvent_t *next; + RF_Revent_t type; /* What kind of event has occurred. */ + RF_RowCol_t col; /* + * Row ID is implicit in the queue in + * which the event is placed. + */ + void *arg; /* A generic argument. */ + RF_ReconEvent_t *next; }; + /* - * Reconstruction control information maintained per-disk + * Reconstruction control information maintained per-disk. * (for surviving disks) */ struct RF_PerDiskReconCtrl_s { - RF_ReconCtrl_t *reconCtrl; - RF_RowCol_t row, col; /* to make this structure self-identifying */ - RF_StripeNum_t curPSID; /* the next parity stripe ID to check on this - * disk */ - RF_HeadSepLimit_t headSepCounter; /* counter used to control - * maximum head separation */ - RF_SectorNum_t diskOffset; /* the offset into the indicated disk - * of the current PU */ - RF_ReconUnitNum_t ru_count; /* this counts off the recon units - * within each parity unit */ - RF_ReconBuffer_t *rbuf; /* the recon buffer assigned to this disk */ + RF_ReconCtrl_t *reconCtrl; + RF_RowCol_t row, col; /* + * To make this structure + * self-identifying. + */ + RF_StripeNum_t curPSID; /* + * The next parity stripe ID + * to check on this disk. + */ + RF_HeadSepLimit_t headSepCounter;/* + * Counter used to control + * maximum head separation. + */ + RF_SectorNum_t diskOffset; /* + * The offset into the + * indicated disk + * of the current PU. + */ + RF_ReconUnitNum_t ru_count; /* + * This counts off the recon + * units within each parity + * unit. + */ + RF_ReconBuffer_t *rbuf; /* + * The recon buffer assigned + * to this disk. + */ }; -/* main reconstruction control structure */ + +/* Main reconstruction control structure. */ struct RF_ReconCtrl_s { - RF_RaidReconDesc_t *reconDesc; - RF_RowCol_t fcol; /* which column has failed */ - RF_PerDiskReconCtrl_t *perDiskInfo; /* information maintained - * per-disk */ - RF_ReconMap_t *reconMap;/* map of what has/has not been reconstructed */ - RF_RowCol_t spareRow; /* which of the spare disks we're using */ - RF_RowCol_t spareCol; - RF_StripeNum_t lastPSID;/* the ID of the last parity stripe we want - * reconstructed */ - int percentComplete;/* percentage completion of reconstruction */ - int numRUsComplete; /* number of Reconstruction Units done */ - int numRUsTotal; /* total number of Reconstruction Units */ - - /* reconstruction event queue */ - RF_ReconEvent_t *eventQueue; /* queue of pending reconstruction - * events */ - RF_DECLARE_MUTEX(eq_mutex) /* mutex for locking event - * queue */ - RF_DECLARE_COND(eq_cond) /* condition variable for - * signalling recon events */ - int eq_count; /* debug only */ - - /* reconstruction buffer management */ - RF_DECLARE_MUTEX(rb_mutex) /* mutex for messing around - * with recon buffers */ - RF_ReconBuffer_t *floatingRbufs; /* available floating - * reconstruction buffers */ - RF_ReconBuffer_t *committedRbufs; /* recon buffers that have - * been committed to some - * waiting disk */ - RF_ReconBuffer_t *fullBufferList; /* full buffers waiting to be - * written out */ - RF_ReconBuffer_t *priorityList; /* full buffers that have been - * elevated to higher priority */ - RF_CallbackDesc_t *bufferWaitList; /* disks that are currently - * blocked waiting for buffers */ - - /* parity stripe status table */ - RF_PSStatusHeader_t *pssTable; /* stores the reconstruction status of - * active parity stripes */ - - /* maximum-head separation control */ - RF_HeadSepLimit_t minHeadSepCounter; /* the minimum hs counter over - * all disks */ - RF_CallbackDesc_t *headSepCBList; /* list of callbacks to be - * done as minPSID advances */ - - /* performance monitoring */ - struct timeval starttime; /* recon start time */ - - void (*continueFunc) (void *); /* function to call when io - * returns */ - void *continueArg; /* argument for Func */ -}; -/* the default priority for reconstruction accesses */ -#define RF_IO_RECON_PRIORITY RF_IO_LOW_PRIORITY + RF_RaidReconDesc_t *reconDesc; + RF_RowCol_t fcol; /* Which column has failed. */ + RF_PerDiskReconCtrl_t *perDiskInfo; /* + * Information maintained + * per-disk. + */ + RF_ReconMap_t *reconMap; /* + * Map of what has/has not + * been reconstructed. + */ + RF_RowCol_t spareRow; /* + * Which of the spare disks + * we're using. + */ + RF_RowCol_t spareCol; + RF_StripeNum_t lastPSID; /* + * The ID of the last + * parity stripe we want + * reconstructed. + */ + int percentComplete; + /* + * Percentage completion of + * reconstruction. + */ + int numRUsComplete;/* + * Number of Reconstruction + * Units done. + */ + int numRUsTotal; /* + * Total number of + * Reconstruction Units. + */ -int rf_ConfigureReconstruction(RF_ShutdownList_t ** listp); + /* Reconstruction event queue. */ + RF_ReconEvent_t *eventQueue; /* + * Queue of pending + * reconstruction events. + */ + RF_DECLARE_MUTEX (eq_mutex); /* + * Mutex for locking event + * queue. + */ + RF_DECLARE_COND (eq_cond); /* + * Condition variable for + * signalling recon events. + */ + int eq_count; /* Debug only. */ -int -rf_ReconstructFailedDisk(RF_Raid_t * raidPtr, RF_RowCol_t row, - RF_RowCol_t col); + /* Reconstruction buffer management. */ + RF_DECLARE_MUTEX (rb_mutex); /* + * Mutex for messing around + * with recon buffers. + */ + RF_ReconBuffer_t *floatingRbufs; /* + * Available floating + * reconstruction buffers. + */ + RF_ReconBuffer_t *committedRbufs;/* + * Recon buffers that have + * been committed to some + * waiting disk. + */ + RF_ReconBuffer_t *fullBufferList;/* + * Full buffers waiting to be + * written out. + */ + RF_ReconBuffer_t *priorityList; /* + * Full buffers that have been + * elevated to higher priority. + */ + RF_CallbackDesc_t *bufferWaitList;/* + * Disks that are currently + * blocked waiting for buffers. + */ -int -rf_ReconstructFailedDiskBasic(RF_Raid_t * raidPtr, RF_RowCol_t row, - RF_RowCol_t col); + /* Parity stripe status table. */ + RF_PSStatusHeader_t *pssTable; /* + * Stores the reconstruction + * status of active parity + * stripes. + */ -int -rf_ReconstructInPlace(RF_Raid_t * raidPtr, RF_RowCol_t row, RF_RowCol_t col); + /* Maximum-head separation control. */ + RF_HeadSepLimit_t minHeadSepCounter; + /* + * The minimum hs counter over + * all disks. + */ + RF_CallbackDesc_t *headSepCBList; /* + * List of callbacks to be + * done as minPSID advances. + */ -int rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t * reconDesc); + /* Performance monitoring. */ + struct timeval starttime; /* Recon start time. */ -int -rf_ForceOrBlockRecon(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap, - void (*cbFunc) (RF_Raid_t *, void *), void *cbArg); + void (*continueFunc) (void *); + /* + * Function to call when io + * returns. + */ + void *continueArg; /* Argument for Func. */ +}; - int rf_UnblockRecon(RF_Raid_t * raidPtr, RF_AccessStripeMap_t * asmap); +/* The default priority for reconstruction accesses. */ +#define RF_IO_RECON_PRIORITY RF_IO_LOW_PRIORITY - int rf_RegisterReconDoneProc(RF_Raid_t * raidPtr, void (*proc) (RF_Raid_t *, void *), void *arg, - RF_ReconDoneProc_t ** handlep); +int rf_ConfigureReconstruction(RF_ShutdownList_t **); +int rf_ReconstructFailedDisk(RF_Raid_t *, RF_RowCol_t, RF_RowCol_t); +int rf_ReconstructFailedDiskBasic(RF_Raid_t *, RF_RowCol_t, RF_RowCol_t); +int rf_ReconstructInPlace(RF_Raid_t *, RF_RowCol_t, RF_RowCol_t); +int rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t *); +int rf_ForceOrBlockRecon(RF_Raid_t *, RF_AccessStripeMap_t *, + void (*) (RF_Raid_t *, void *), void *); +int rf_UnblockRecon(RF_Raid_t *, RF_AccessStripeMap_t *); +int rf_RegisterReconDoneProc(RF_Raid_t *, void (*) (RF_Raid_t *, void *), + void *, RF_ReconDoneProc_t **); -#endif /* !_RF__RF_RECONSTRUCT_H_ */ +#endif /* !_RF__RF_RECONSTRUCT_H_ */ diff --git a/sys/dev/raidframe/rf_reconutil.c b/sys/dev/raidframe/rf_reconutil.c index f4c83910850..b75c6425a76 100644 --- a/sys/dev/raidframe/rf_reconutil.c +++ b/sys/dev/raidframe/rf_reconutil.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_reconutil.c,v 1.2 1999/02/16 00:03:23 niklas Exp $ */ +/* $OpenBSD: rf_reconutil.c,v 1.3 2002/12/16 07:01:05 tdeval Exp $ */ /* $NetBSD: rf_reconutil.c,v 1.3 1999/02/05 00:06:17 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,9 +28,9 @@ * rights to redistribute these changes. */ -/******************************************** - * rf_reconutil.c -- reconstruction utilities - ********************************************/ +/********************************************** + * rf_reconutil.c -- Reconstruction utilities. + **********************************************/ #include "rf_types.h" #include "rf_raid.h" @@ -42,33 +43,40 @@ #include "rf_interdecluster.h" #include "rf_chaindecluster.h" -/******************************************************************* - * allocates/frees the reconstruction control information structures - *******************************************************************/ + +/********************************************************************* + * Allocates/frees the reconstruction control information structures. + *********************************************************************/ RF_ReconCtrl_t * -rf_MakeReconControl(reconDesc, frow, fcol, srow, scol) - RF_RaidReconDesc_t *reconDesc; - RF_RowCol_t frow; /* failed row and column */ - RF_RowCol_t fcol; - RF_RowCol_t srow; /* identifies which spare we're using */ - RF_RowCol_t scol; +rf_MakeReconControl( + RF_RaidReconDesc_t *reconDesc, + RF_RowCol_t frow, /* Failed row and column. */ + RF_RowCol_t fcol, + RF_RowCol_t srow, /* Identifies which spare we're using. */ + RF_RowCol_t scol +) { RF_Raid_t *raidPtr = reconDesc->raidPtr; RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; - RF_ReconUnitCount_t RUsPerPU = layoutPtr->SUsPerPU / layoutPtr->SUsPerRU; + RF_ReconUnitCount_t RUsPerPU = layoutPtr->SUsPerPU / + layoutPtr->SUsPerRU; RF_ReconUnitCount_t numSpareRUs; RF_ReconCtrl_t *reconCtrlPtr; RF_ReconBuffer_t *rbuf; RF_LayoutSW_t *lp; - int retcode, rc; + int retcode, rc; RF_RowCol_t i; lp = raidPtr->Layout.map; - /* make and zero the global reconstruction structure and the per-disk - * structure */ + /* + * Make and zero the global reconstruction structure and the per-disk + * structure. + */ RF_Calloc(reconCtrlPtr, 1, sizeof(RF_ReconCtrl_t), (RF_ReconCtrl_t *)); - RF_Calloc(reconCtrlPtr->perDiskInfo, raidPtr->numCol, sizeof(RF_PerDiskReconCtrl_t), (RF_PerDiskReconCtrl_t *)); /* this zeros it */ + /* This zeros it. */ + RF_Calloc(reconCtrlPtr->perDiskInfo, raidPtr->numCol, + sizeof(RF_PerDiskReconCtrl_t), (RF_PerDiskReconCtrl_t *)); reconCtrlPtr->reconDesc = reconDesc; reconCtrlPtr->fcol = fcol; reconCtrlPtr->spareRow = srow; @@ -76,19 +84,20 @@ rf_MakeReconControl(reconDesc, frow, fcol, srow, scol) reconCtrlPtr->lastPSID = layoutPtr->numStripe / layoutPtr->SUsPerPU; reconCtrlPtr->percentComplete = 0; - /* initialize each per-disk recon information structure */ + /* Initialize each per-disk recon information structure. */ for (i = 0; i < raidPtr->numCol; i++) { reconCtrlPtr->perDiskInfo[i].reconCtrl = reconCtrlPtr; reconCtrlPtr->perDiskInfo[i].row = frow; reconCtrlPtr->perDiskInfo[i].col = i; - reconCtrlPtr->perDiskInfo[i].curPSID = -1; /* make it appear as if - * we just finished an - * RU */ + /* Make it appear as if we just finished an RU. */ + reconCtrlPtr->perDiskInfo[i].curPSID = -1; reconCtrlPtr->perDiskInfo[i].ru_count = RUsPerPU - 1; } - /* Get the number of spare units per disk and the sparemap in case - * spare is distributed */ + /* + * Get the number of spare units per disk and the sparemap in case + * spare is distributed. + */ if (lp->GetNumSpareRUs) { numSpareRUs = lp->GetNumSpareRUs(raidPtr); @@ -97,47 +106,50 @@ rf_MakeReconControl(reconDesc, frow, fcol, srow, scol) } /* - * Not all distributed sparing archs need dynamic mappings - */ + * Not all distributed sparing archs need dynamic mappings. + */ if (lp->InstallSpareTable) { retcode = rf_InstallSpareTable(raidPtr, frow, fcol); if (retcode) { - RF_PANIC(); /* XXX fix this */ + RF_PANIC(); /* XXX Fix this. */ } } - /* make the reconstruction map */ - reconCtrlPtr->reconMap = rf_MakeReconMap(raidPtr, (int) (layoutPtr->SUsPerRU * layoutPtr->sectorsPerStripeUnit), + /* Make the reconstruction map. */ + reconCtrlPtr->reconMap = rf_MakeReconMap(raidPtr, + (int) (layoutPtr->SUsPerRU * layoutPtr->sectorsPerStripeUnit), raidPtr->sectorsPerDisk, numSpareRUs); - /* make the per-disk reconstruction buffers */ + /* Make the per-disk reconstruction buffers. */ for (i = 0; i < raidPtr->numCol; i++) { - reconCtrlPtr->perDiskInfo[i].rbuf = (i == fcol) ? NULL : rf_MakeReconBuffer(raidPtr, frow, i, RF_RBUF_TYPE_EXCLUSIVE); + reconCtrlPtr->perDiskInfo[i].rbuf = (i == fcol) ? NULL : + rf_MakeReconBuffer(raidPtr, frow, i, + RF_RBUF_TYPE_EXCLUSIVE); } - /* initialize the event queue */ + /* Initialize the event queue. */ rc = rf_mutex_init(&reconCtrlPtr->eq_mutex); if (rc) { - /* XXX deallocate, cleanup */ - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + /* XXX Deallocate, cleanup. */ + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d.\n", + __FILE__, __LINE__, rc); return (NULL); } rc = rf_cond_init(&reconCtrlPtr->eq_cond); if (rc) { - /* XXX deallocate, cleanup */ - RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + /* XXX Deallocate, cleanup. */ + RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d.\n", + __FILE__, __LINE__, rc); return (NULL); } reconCtrlPtr->eventQueue = NULL; reconCtrlPtr->eq_count = 0; - /* make the floating recon buffers and append them to the free list */ + /* Make the floating recon buffers and append them to the free list. */ rc = rf_mutex_init(&reconCtrlPtr->rb_mutex); if (rc) { - /* XXX deallocate, cleanup */ - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + /* XXX Deallocate, cleanup. */ + RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d.\n", + __FILE__, __LINE__, rc); return (NULL); } reconCtrlPtr->fullBufferList = NULL; @@ -145,24 +157,23 @@ rf_MakeReconControl(reconDesc, frow, fcol, srow, scol) reconCtrlPtr->floatingRbufs = NULL; reconCtrlPtr->committedRbufs = NULL; for (i = 0; i < raidPtr->numFloatingReconBufs; i++) { - rbuf = rf_MakeReconBuffer(raidPtr, frow, fcol, RF_RBUF_TYPE_FLOATING); + rbuf = rf_MakeReconBuffer(raidPtr, frow, fcol, + RF_RBUF_TYPE_FLOATING); rbuf->next = reconCtrlPtr->floatingRbufs; reconCtrlPtr->floatingRbufs = rbuf; } - /* create the parity stripe status table */ + /* Create the parity stripe status table. */ reconCtrlPtr->pssTable = rf_MakeParityStripeStatusTable(raidPtr); - /* set the initial min head sep counter val */ + /* Set the initial min head sep counter val. */ reconCtrlPtr->minHeadSepCounter = 0; return (reconCtrlPtr); } -void -rf_FreeReconControl(raidPtr, row) - RF_Raid_t *raidPtr; - RF_RowCol_t row; +void +rf_FreeReconControl(RF_Raid_t *raidPtr, RF_RowCol_t row) { RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[row]; RF_ReconBuffer_t *t; @@ -183,17 +194,17 @@ rf_FreeReconControl(raidPtr, row) rf_cond_destroy(&reconCtrlPtr->eq_cond); rf_FreeReconMap(reconCtrlPtr->reconMap); rf_FreeParityStripeStatusTable(raidPtr, reconCtrlPtr->pssTable); - RF_Free(reconCtrlPtr->perDiskInfo, raidPtr->numCol * sizeof(RF_PerDiskReconCtrl_t)); + RF_Free(reconCtrlPtr->perDiskInfo, raidPtr->numCol * + sizeof(RF_PerDiskReconCtrl_t)); RF_Free(reconCtrlPtr, sizeof(*reconCtrlPtr)); } -/****************************************************************************** - * computes the default head separation limit +/***************************************************************************** + * Computes the default head separation limit. *****************************************************************************/ -RF_HeadSepLimit_t -rf_GetDefaultHeadSepLimit(raidPtr) - RF_Raid_t *raidPtr; +RF_HeadSepLimit_t +rf_GetDefaultHeadSepLimit(RF_Raid_t *raidPtr) { RF_HeadSepLimit_t hsl; RF_LayoutSW_t *lp; @@ -206,15 +217,14 @@ rf_GetDefaultHeadSepLimit(raidPtr) } -/****************************************************************************** - * computes the default number of floating recon buffers +/***************************************************************************** + * Computes the default number of floating recon buffers. *****************************************************************************/ -int -rf_GetDefaultNumFloatingReconBuffers(raidPtr) - RF_Raid_t *raidPtr; +int +rf_GetDefaultNumFloatingReconBuffers(RF_Raid_t *raidPtr) { RF_LayoutSW_t *lp; - int nrb; + int nrb; lp = raidPtr->Layout.map; if (lp->GetDefaultNumFloatingReconBuffers == NULL) @@ -224,19 +234,17 @@ rf_GetDefaultNumFloatingReconBuffers(raidPtr) } -/****************************************************************************** - * creates and initializes a reconstruction buffer +/***************************************************************************** + * Creates and initializes a reconstruction buffer. *****************************************************************************/ RF_ReconBuffer_t * -rf_MakeReconBuffer( - RF_Raid_t * raidPtr, - RF_RowCol_t row, - RF_RowCol_t col, +rf_MakeReconBuffer(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col, RF_RbufType_t type) { RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; RF_ReconBuffer_t *t; - u_int recon_buffer_size = rf_RaidAddressToByte(raidPtr, layoutPtr->SUsPerRU * layoutPtr->sectorsPerStripeUnit); + u_int recon_buffer_size = rf_RaidAddressToByte(raidPtr, + layoutPtr->SUsPerRU * layoutPtr->sectorsPerStripeUnit); RF_Malloc(t, sizeof(RF_ReconBuffer_t), (RF_ReconBuffer_t *)); RF_Malloc(t->buffer, recon_buffer_size, (caddr_t)); @@ -250,15 +258,17 @@ rf_MakeReconBuffer( t->next = NULL; return (t); } -/****************************************************************************** - * frees a reconstruction buffer + + +/***************************************************************************** + * Frees a reconstruction buffer. *****************************************************************************/ -void -rf_FreeReconBuffer(rbuf) - RF_ReconBuffer_t *rbuf; +void +rf_FreeReconBuffer(RF_ReconBuffer_t *rbuf) { RF_Raid_t *raidPtr = rbuf->raidPtr; - u_int recon_buffer_size = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.SUsPerRU * raidPtr->Layout.sectorsPerStripeUnit); + u_int recon_buffer_size = rf_RaidAddressToByte(raidPtr, + raidPtr->Layout.SUsPerRU * raidPtr->Layout.sectorsPerStripeUnit); RF_Free(rbuf->arrived, raidPtr->numCol * sizeof(char)); RF_Free(rbuf->buffer, recon_buffer_size); @@ -266,18 +276,16 @@ rf_FreeReconBuffer(rbuf) } -/****************************************************************************** - * debug only: sanity check the number of floating recon bufs in use +/***************************************************************************** + * Debug only: Sanity check the number of floating recon bufs in use. *****************************************************************************/ -void -rf_CheckFloatingRbufCount(raidPtr, dolock) - RF_Raid_t *raidPtr; - int dolock; +void +rf_CheckFloatingRbufCount(RF_Raid_t *raidPtr, int dolock) { RF_ReconParityStripeStatus_t *p; RF_PSStatusHeader_t *pssTable; RF_ReconBuffer_t *rbuf; - int i, j, sum = 0; + int i, j, sum = 0; RF_RowCol_t frow = 0; for (i = 0; i < raidPtr->numRow; i++) @@ -312,19 +320,23 @@ rf_CheckFloatingRbufCount(raidPtr, dolock) RF_UNLOCK_MUTEX(pssTable[i].mutex); } - for (rbuf = raidPtr->reconControl[frow]->floatingRbufs; rbuf; rbuf = rbuf->next) { + for (rbuf = raidPtr->reconControl[frow]->floatingRbufs; rbuf; + rbuf = rbuf->next) { if (rbuf->type == RF_RBUF_TYPE_FLOATING) sum++; } - for (rbuf = raidPtr->reconControl[frow]->committedRbufs; rbuf; rbuf = rbuf->next) { + for (rbuf = raidPtr->reconControl[frow]->committedRbufs; rbuf; + rbuf = rbuf->next) { if (rbuf->type == RF_RBUF_TYPE_FLOATING) sum++; } - for (rbuf = raidPtr->reconControl[frow]->fullBufferList; rbuf; rbuf = rbuf->next) { + for (rbuf = raidPtr->reconControl[frow]->fullBufferList; rbuf; + rbuf = rbuf->next) { if (rbuf->type == RF_RBUF_TYPE_FLOATING) sum++; } - for (rbuf = raidPtr->reconControl[frow]->priorityList; rbuf; rbuf = rbuf->next) { + for (rbuf = raidPtr->reconControl[frow]->priorityList; rbuf; + rbuf = rbuf->next) { if (rbuf->type == RF_RBUF_TYPE_FLOATING) sum++; } diff --git a/sys/dev/raidframe/rf_reconutil.h b/sys/dev/raidframe/rf_reconutil.h index 4c8d1b9924f..24051b05d63 100644 --- a/sys/dev/raidframe/rf_reconutil.h +++ b/sys/dev/raidframe/rf_reconutil.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_reconutil.h,v 1.2 1999/02/16 00:03:24 niklas Exp $ */ +/* $OpenBSD: rf_reconutil.h,v 1.3 2002/12/16 07:01:05 tdeval Exp $ */ /* $NetBSD: rf_reconutil.h,v 1.3 1999/02/05 00:06:17 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,26 +28,24 @@ * rights to redistribute these changes. */ -/************************************************************ - * rf_reconutil.h -- header file for reconstruction utilities - ************************************************************/ +/************************************************************** + * rf_reconutil.h -- Header file for reconstruction utilities. + **************************************************************/ -#ifndef _RF__RF_RECONUTIL_H_ -#define _RF__RF_RECONUTIL_H_ +#ifndef _RF__RF_RECONUTIL_H_ +#define _RF__RF_RECONUTIL_H_ #include "rf_types.h" #include "rf_reconstruct.h" -RF_ReconCtrl_t * -rf_MakeReconControl(RF_RaidReconDesc_t * reconDesc, - RF_RowCol_t frow, RF_RowCol_t fcol, RF_RowCol_t srow, RF_RowCol_t scol); -void rf_FreeReconControl(RF_Raid_t * raidPtr, RF_RowCol_t row); -RF_HeadSepLimit_t rf_GetDefaultHeadSepLimit(RF_Raid_t * raidPtr); -int rf_GetDefaultNumFloatingReconBuffers(RF_Raid_t * raidPtr); -RF_ReconBuffer_t * -rf_MakeReconBuffer(RF_Raid_t * raidPtr, RF_RowCol_t row, - RF_RowCol_t col, RF_RbufType_t type); -void rf_FreeReconBuffer(RF_ReconBuffer_t * rbuf); -void rf_CheckFloatingRbufCount(RF_Raid_t * raidPtr, int dolock); +RF_ReconCtrl_t *rf_MakeReconControl(RF_RaidReconDesc_t *, + RF_RowCol_t, RF_RowCol_t, RF_RowCol_t, RF_RowCol_t); +void rf_FreeReconControl(RF_Raid_t *, RF_RowCol_t); +RF_HeadSepLimit_t rf_GetDefaultHeadSepLimit(RF_Raid_t *); +int rf_GetDefaultNumFloatingReconBuffers(RF_Raid_t *); +RF_ReconBuffer_t *rf_MakeReconBuffer(RF_Raid_t *, + RF_RowCol_t, RF_RowCol_t, RF_RbufType_t); +void rf_FreeReconBuffer(RF_ReconBuffer_t *); +void rf_CheckFloatingRbufCount(RF_Raid_t *, int); -#endif /* !_RF__RF_RECONUTIL_H_ */ +#endif /* !_RF__RF_RECONUTIL_H_ */ diff --git a/sys/dev/raidframe/rf_revent.c b/sys/dev/raidframe/rf_revent.c index e664e361110..24864943c3f 100644 --- a/sys/dev/raidframe/rf_revent.c +++ b/sys/dev/raidframe/rf_revent.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_revent.c,v 1.9 2000/08/08 16:07:45 peter Exp $ */ +/* $OpenBSD: rf_revent.c,v 1.10 2002/12/16 07:01:05 tdeval Exp $ */ /* $NetBSD: rf_revent.c,v 1.7 2000/05/30 02:04:29 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -26,8 +27,9 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ + /* - * revent.c -- reconstruction event handling code + * revent.c -- Reconstruction event handling code. */ #include <sys/errno.h> @@ -41,39 +43,36 @@ #include "rf_shutdown.h" static RF_FreeList_t *rf_revent_freelist; -#define RF_MAX_FREE_REVENT 128 -#define RF_REVENT_INC 8 -#define RF_REVENT_INITIAL 8 +#define RF_MAX_FREE_REVENT 128 +#define RF_REVENT_INC 8 +#define RF_REVENT_INITIAL 8 #include <sys/proc.h> #include <sys/kernel.h> -#define DO_WAIT(_rc) \ - tsleep(&(_rc)->eventQueue, PRIBIO, "raidframe eventq", 0) +#define DO_WAIT(_rc) \ + tsleep(&(_rc)->eventQueue, PRIBIO, "RAIDframe eventq", 0) -#define DO_SIGNAL(_rc) wakeup(&(_rc)->eventQueue) +#define DO_SIGNAL(_rc) wakeup(&(_rc)->eventQueue) -static void rf_ShutdownReconEvent(void *); +void rf_ShutdownReconEvent(void *); -static RF_ReconEvent_t * -GetReconEventDesc(RF_RowCol_t row, RF_RowCol_t col, - void *arg, RF_Revent_t type); +RF_ReconEvent_t *GetReconEventDesc(RF_RowCol_t, RF_RowCol_t, void *, + RF_Revent_t); -static void -rf_ShutdownReconEvent(ignored) - void *ignored; +void +rf_ShutdownReconEvent(void *ignored) { RF_FREELIST_DESTROY(rf_revent_freelist, next, (RF_ReconEvent_t *)); } -int -rf_ConfigureReconEvent(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureReconEvent(RF_ShutdownList_t **listp) { - int rc; + int rc; RF_FREELIST_CREATE(rf_revent_freelist, RF_MAX_FREE_REVENT, RF_REVENT_INC, sizeof(RF_ReconEvent_t)); @@ -81,8 +80,8 @@ rf_ConfigureReconEvent(listp) return (ENOMEM); rc = rf_ShutdownCreate(listp, rf_ShutdownReconEvent, NULL); if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); + RF_ERRORMSG3("Unable to add to shutdown list file %s line %d" + " rc=%d\n", __FILE__, __LINE__, rc); rf_ShutdownReconEvent(NULL); return (rc); } @@ -91,16 +90,15 @@ rf_ConfigureReconEvent(listp) return (0); } -/* returns the next reconstruction event, blocking the calling thread - * until one becomes available. will now return null if it is blocked - * or will return an event if it is not */ +/* + * Returns the next reconstruction event, blocking the calling thread + * until one becomes available. Will now return null if it is blocked + * or will return an event if it is not. + */ RF_ReconEvent_t * -rf_GetNextReconEvent(reconDesc, row, continueFunc, continueArg) - RF_RaidReconDesc_t *reconDesc; - RF_RowCol_t row; - void (*continueFunc) (void *); - void *continueArg; +rf_GetNextReconEvent(RF_RaidReconDesc_t *reconDesc, RF_RowCol_t row, + void (*continueFunc) (void *), void *continueArg) { RF_Raid_t *raidPtr = reconDesc->raidPtr; RF_ReconCtrl_t *rctrl = raidPtr->reconControl[row]; @@ -108,41 +106,45 @@ rf_GetNextReconEvent(reconDesc, row, continueFunc, continueArg) RF_ASSERT(row >= 0 && row <= raidPtr->numRow); RF_LOCK_MUTEX(rctrl->eq_mutex); - /* q null and count==0 must be equivalent conditions */ + /* q NULL and count==0 must be equivalent conditions. */ RF_ASSERT((rctrl->eventQueue == NULL) == (rctrl->eq_count == 0)); rctrl->continueFunc = continueFunc; rctrl->continueArg = continueArg; - /* mpsleep timeout value: secs = timo_val/hz. 'ticks' here is - defined as cycle-counter ticks, not softclock ticks */ + /* + * mpsleep timeout value: secs = timo_val/hz. 'ticks' here is + * defined as cycle-counter ticks, not softclock ticks. + */ -#define MAX_RECON_EXEC_USECS (100 * 1000) /* 100 ms */ -#define RECON_DELAY_MS 25 -#define RECON_TIMO ((RECON_DELAY_MS * hz) / 1000) +#define MAX_RECON_EXEC_USECS (100 * 1000) /* 100 ms */ +#define RECON_DELAY_MS 25 +#define RECON_TIMO ((RECON_DELAY_MS * hz) / 1000) - /* we are not pre-emptible in the kernel, but we don't want to run + /* + * We are not pre-emptible in the kernel, but we don't want to run * forever. If we run w/o blocking for more than MAX_RECON_EXEC_USECS - * delay for RECON_DELAY_MS before continuing. this may murder us with + * delay for RECON_DELAY_MS before continuing. This may murder us with * context switches, so we may need to increase both the - * MAX...TICKS and the RECON_DELAY_MS. */ + * MAX...TICKS and the RECON_DELAY_MS. + */ if (reconDesc->reconExecTimerRunning) { - int status; + int status; RF_ETIMER_STOP(reconDesc->recon_exec_timer); RF_ETIMER_EVAL(reconDesc->recon_exec_timer); - reconDesc->reconExecTicks += - RF_ETIMER_VAL_US(reconDesc->recon_exec_timer); + reconDesc->reconExecTicks += + RF_ETIMER_VAL_US(reconDesc->recon_exec_timer); if (reconDesc->reconExecTicks > reconDesc->maxReconExecTicks) - reconDesc->maxReconExecTicks = - reconDesc->reconExecTicks; + reconDesc->maxReconExecTicks = + reconDesc->reconExecTicks; if (reconDesc->reconExecTicks >= MAX_RECON_EXEC_USECS) { - /* we've been running too long - sleep */ + /* We've been running too long - sleep. */ #if RF_RECON_STATS > 0 reconDesc->numReconExecDelays++; #endif /* RF_RECON_STATS > 0 */ - status = tsleep(&reconDesc->reconExecTicks, - PRIBIO, "recon delay", RECON_TIMO); + status = tsleep(&reconDesc->reconExecTicks, + PRIBIO, "recon delay", RECON_TIMO); RF_ASSERT(status == EWOULDBLOCK); reconDesc->reconExecTicks = 0; } @@ -152,7 +154,7 @@ rf_GetNextReconEvent(reconDesc, row, continueFunc, continueArg) reconDesc->numReconEventWaits++; #endif /* RF_RECON_STATS > 0 */ DO_WAIT(rctrl); - reconDesc->reconExecTicks = 0; /* we've just waited */ + reconDesc->reconExecTicks = 0; /* We've just waited. */ } RF_ETIMER_START(reconDesc->recon_exec_timer); @@ -163,19 +165,16 @@ rf_GetNextReconEvent(reconDesc, row, continueFunc, continueArg) event->next = NULL; rctrl->eq_count--; - /* q null and count==0 must be equivalent conditions */ + /* q NULL and count==0 must be equivalent conditions. */ RF_ASSERT((rctrl->eventQueue == NULL) == (rctrl->eq_count == 0)); RF_UNLOCK_MUTEX(rctrl->eq_mutex); return (event); } -/* enqueues a reconstruction event on the indicated queue */ -void -rf_CauseReconEvent(raidPtr, row, col, arg, type) - RF_Raid_t *raidPtr; - RF_RowCol_t row; - RF_RowCol_t col; - void *arg; - RF_Revent_t type; + +/* Enqueues a reconstruction event on the indicated queue. */ +void +rf_CauseReconEvent(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col, + void *arg, RF_Revent_t type) { RF_ReconCtrl_t *rctrl = raidPtr->reconControl[row]; RF_ReconEvent_t *event = GetReconEventDesc(row, col, arg, type); @@ -183,9 +182,10 @@ rf_CauseReconEvent(raidPtr, row, col, arg, type) if (type == RF_REVENT_BUFCLEAR) { RF_ASSERT(col != rctrl->fcol); } - RF_ASSERT(row >= 0 && row <= raidPtr->numRow && col >= 0 && col <= raidPtr->numCol); + RF_ASSERT(row >= 0 && row <= raidPtr->numRow && col >= 0 && + col <= raidPtr->numCol); RF_LOCK_MUTEX(rctrl->eq_mutex); - /* q null and count==0 must be equivalent conditions */ + /* q NULL and count==0 must be equivalent conditions. */ RF_ASSERT((rctrl->eventQueue == NULL) == (rctrl->eq_count == 0)); event->next = rctrl->eventQueue; rctrl->eventQueue = event; @@ -194,13 +194,10 @@ rf_CauseReconEvent(raidPtr, row, col, arg, type) DO_SIGNAL(rctrl); } -/* allocates and initializes a recon event descriptor */ -static RF_ReconEvent_t * -GetReconEventDesc(row, col, arg, type) - RF_RowCol_t row; - RF_RowCol_t col; - void *arg; - RF_Revent_t type; + +/* Allocates and initializes a recon event descriptor. */ +RF_ReconEvent_t * +GetReconEventDesc(RF_RowCol_t row, RF_RowCol_t col, void *arg, RF_Revent_t type) { RF_ReconEvent_t *t; @@ -213,9 +210,8 @@ GetReconEventDesc(row, col, arg, type) return (t); } -void -rf_FreeReconEventDesc(event) - RF_ReconEvent_t *event; +void +rf_FreeReconEventDesc(RF_ReconEvent_t *event) { RF_FREELIST_FREE(rf_revent_freelist, event, next); } diff --git a/sys/dev/raidframe/rf_revent.h b/sys/dev/raidframe/rf_revent.h index a4be2d4d03a..f22f57c67e9 100644 --- a/sys/dev/raidframe/rf_revent.h +++ b/sys/dev/raidframe/rf_revent.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_revent.h,v 1.2 1999/02/16 00:03:25 niklas Exp $ */ +/* $OpenBSD: rf_revent.h,v 1.3 2002/12/16 07:01:05 tdeval Exp $ */ /* $NetBSD: rf_revent.h,v 1.3 1999/02/05 00:06:17 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,26 +28,22 @@ * rights to redistribute these changes. */ -/******************************************************************* +/********************************************************************* * - * rf_revent.h -- header file for reconstruction event handling code + * rf_revent.h -- Header file for reconstruction event handling code. * - *******************************************************************/ + *********************************************************************/ -#ifndef _RF__RF_REVENT_H_ -#define _RF__RF_REVENT_H_ +#ifndef _RF__RF_REVENT_H_ +#define _RF__RF_REVENT_H_ #include "rf_types.h" -int rf_ConfigureReconEvent(RF_ShutdownList_t ** listp); - -RF_ReconEvent_t * -rf_GetNextReconEvent(RF_RaidReconDesc_t * reconDesc, - RF_RowCol_t row, void (*continueFunc) (void *), void *continueArg); - - void rf_CauseReconEvent(RF_Raid_t * raidPtr, RF_RowCol_t row, RF_RowCol_t col, - void *arg, RF_Revent_t type); - - void rf_FreeReconEventDesc(RF_ReconEvent_t * event); +int rf_ConfigureReconEvent(RF_ShutdownList_t **); +RF_ReconEvent_t *rf_GetNextReconEvent(RF_RaidReconDesc_t *, + RF_RowCol_t, void (*) (void *), void *); +void rf_CauseReconEvent(RF_Raid_t *, + RF_RowCol_t, RF_RowCol_t, void *, RF_Revent_t); +void rf_FreeReconEventDesc(RF_ReconEvent_t *); -#endif /* !_RF__RF_REVENT_H_ */ +#endif /* !_RF__RF_REVENT_H_ */ diff --git a/sys/dev/raidframe/rf_shutdown.c b/sys/dev/raidframe/rf_shutdown.c index 293ead367e6..be13f747f50 100644 --- a/sys/dev/raidframe/rf_shutdown.c +++ b/sys/dev/raidframe/rf_shutdown.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_shutdown.c,v 1.4 2000/08/08 16:07:45 peter Exp $ */ +/* $OpenBSD: rf_shutdown.c,v 1.5 2002/12/16 07:01:05 tdeval Exp $ */ /* $NetBSD: rf_shutdown.c,v 1.6 2000/01/13 23:41:18 oster Exp $ */ + /* * rf_shutdown.c */ @@ -40,19 +41,16 @@ #include "rf_debugMem.h" #include "rf_freelist.h" -static void -rf_FreeShutdownEnt(RF_ShutdownList_t * ent) +void rf_FreeShutdownEnt(RF_ShutdownList_t *); +void +rf_FreeShutdownEnt(RF_ShutdownList_t *ent) { FREE(ent, M_RAIDFRAME); } -int -_rf_ShutdownCreate( - RF_ShutdownList_t ** listp, - void (*cleanup) (void *arg), - void *arg, - char *file, - int line) +int +_rf_ShutdownCreate(RF_ShutdownList_t **listp, void (*cleanup) (void *arg), + void *arg, char *file, int line) { RF_ShutdownList_t *ent; @@ -60,10 +58,10 @@ _rf_ShutdownCreate( * Have to directly allocate memory here, since we start up before * and shutdown after RAIDframe internal allocation system. */ - /* ent = (RF_ShutdownList_t *) malloc(sizeof(RF_ShutdownList_t), - M_RAIDFRAME, M_WAITOK); */ - ent = (RF_ShutdownList_t *) malloc(sizeof(RF_ShutdownList_t), - M_RAIDFRAME, M_NOWAIT); + /* ent = (RF_ShutdownList_t *) malloc(sizeof(RF_ShutdownList_t), + * M_RAIDFRAME, M_WAITOK); */ + ent = (RF_ShutdownList_t *) malloc(sizeof(RF_ShutdownList_t), + M_RAIDFRAME, M_NOWAIT); if (ent == NULL) return (ENOMEM); ent->cleanup = cleanup; @@ -75,12 +73,12 @@ _rf_ShutdownCreate( return (0); } -int -rf_ShutdownList(RF_ShutdownList_t ** list) +int +rf_ShutdownList(RF_ShutdownList_t **list) { RF_ShutdownList_t *r, *next; - char *file; - int line; + char *file; + int line; for (r = *list; r; r = next) { next = r->next; @@ -93,7 +91,8 @@ rf_ShutdownList(RF_ShutdownList_t ** list) r->cleanup(r->arg); if (rf_shutdownDebug) { - printf("completed shutdown, created %s:%d\n", file, line); + printf("completed shutdown, created %s:%d\n", file, + line); } rf_FreeShutdownEnt(r); } diff --git a/sys/dev/raidframe/rf_shutdown.h b/sys/dev/raidframe/rf_shutdown.h index 77ff2a14fe2..7bd2f72889f 100644 --- a/sys/dev/raidframe/rf_shutdown.h +++ b/sys/dev/raidframe/rf_shutdown.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_shutdown.h,v 1.3 2002/03/06 11:28:27 tdeval Exp $ */ +/* $OpenBSD: rf_shutdown.h,v 1.4 2002/12/16 07:01:05 tdeval Exp $ */ /* $NetBSD: rf_shutdown.h,v 1.2 1999/02/05 00:06:17 oster Exp $ */ + /* * rf_shutdown.h */ @@ -34,14 +35,14 @@ * thread startup and shutdown. */ -#ifndef _RF__RF_SHUTDOWN_H_ -#define _RF__RF_SHUTDOWN_H_ +#ifndef _RF__RF_SHUTDOWN_H_ +#define _RF__RF_SHUTDOWN_H_ #include "rf_types.h" #include "rf_threadstuff.h" /* - * Important note: the shutdown list is run like a stack, new + * Important note: The shutdown list is run like a stack, new * entries pushed on top. Therefore, the most recently added * entry (last started) is the first removed (stopped). This * should handle system-dependencies pretty nicely- if a system @@ -51,19 +52,18 @@ */ struct RF_ShutdownList_s { - void (*cleanup) (void *arg); - void *arg; - char *file; - int line; + void (*cleanup) (void *arg); + void *arg; + char *file; + int line; RF_ShutdownList_t *next; }; -#define rf_ShutdownCreate(_listp_,_func_,_arg_) \ - _rf_ShutdownCreate(_listp_,_func_,_arg_,__FILE__,__LINE__) - -int _rf_ShutdownCreate(RF_ShutdownList_t ** listp, void (*cleanup) (void *arg), - void *arg, char *file, int line); -int rf_ShutdownList(RF_ShutdownList_t ** listp); +#define rf_ShutdownCreate(_listp_,_func_,_arg_) \ + _rf_ShutdownCreate(_listp_, _func_, _arg_, __FILE__, __LINE__) -void rf_shutdown_hook(RF_ThreadArg_t); +int _rf_ShutdownCreate(RF_ShutdownList_t **, void (*) (void *arg), void *, + char *, int); +int rf_ShutdownList(RF_ShutdownList_t **); +void rf_shutdown_hook(RF_ThreadArg_t); -#endif /* !_RF__RF_SHUTDOWN_H_ */ +#endif /* !_RF__RF_SHUTDOWN_H_ */ diff --git a/sys/dev/raidframe/rf_sstf.c b/sys/dev/raidframe/rf_sstf.c index e603b98b859..748e3aee5db 100644 --- a/sys/dev/raidframe/rf_sstf.c +++ b/sys/dev/raidframe/rf_sstf.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_sstf.c,v 1.3 2000/01/11 18:02:23 peter Exp $ */ +/* $OpenBSD: rf_sstf.c,v 1.4 2002/12/16 07:01:05 tdeval Exp $ */ /* $NetBSD: rf_sstf.c,v 1.4 2000/01/08 23:45:05 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,11 +28,11 @@ * rights to redistribute these changes. */ -/******************************************************************************* +/***************************************************************************** * - * sstf.c -- prioritized shortest seek time first disk queueing code + * sstf.c -- Prioritized shortest seek time first disk queueing code. * - ******************************************************************************/ + *****************************************************************************/ #include "rf_alloclist.h" #include "rf_stripelocks.h" @@ -44,33 +45,28 @@ #include "rf_raid.h" #include "rf_types.h" -#define DIR_LEFT 1 -#define DIR_RIGHT 2 -#define DIR_EITHER 3 - -#define SNUM_DIFF(_a_,_b_) (((_a_)>(_b_))?((_a_)-(_b_)):((_b_)-(_a_))) +#define DIR_LEFT 1 +#define DIR_RIGHT 2 +#define DIR_EITHER 3 -#define QSUM(_sstfq_) (((_sstfq_)->lopri.qlen)+((_sstfq_)->left.qlen)+((_sstfq_)->right.qlen)) +#define SNUM_DIFF(_a_,_b_) \ + (((_a_) > (_b_)) ? ((_a_) - (_b_)) : ((_b_) - (_a_))) +#define QSUM(_sstfq_) \ + (((_sstfq_)->lopri.qlen) + ((_sstfq_)->left.qlen) + \ + ((_sstfq_)->right.qlen)) -static void -do_sstf_ord_q(RF_DiskQueueData_t **, - RF_DiskQueueData_t **, - RF_DiskQueueData_t *); -static RF_DiskQueueData_t * -closest_to_arm(RF_SstfQ_t *, - RF_SectorNum_t, - int *, - int); -static void do_dequeue(RF_SstfQ_t *, RF_DiskQueueData_t *); +void rf_do_sstf_ord_q(RF_DiskQueueData_t **, RF_DiskQueueData_t **, + RF_DiskQueueData_t *); +void rf_do_dequeue(RF_SstfQ_t *, RF_DiskQueueData_t *); +RF_DiskQueueData_t *rf_closest_to_arm(RF_SstfQ_t *, RF_SectorNum_t, + int *, int); -static void -do_sstf_ord_q(queuep, tailp, req) - RF_DiskQueueData_t **queuep; - RF_DiskQueueData_t **tailp; - RF_DiskQueueData_t *req; +void +rf_do_sstf_ord_q(RF_DiskQueueData_t **queuep, RF_DiskQueueData_t **tailp, + RF_DiskQueueData_t *req) { RF_DiskQueueData_t *r, *s; @@ -89,14 +85,14 @@ do_sstf_ord_q(queuep, tailp, req) return; } if (req->sectorOffset > (*tailp)->sectorOffset) { - /* optimization */ + /* Optimization. */ r = NULL; s = *tailp; goto q_at_end; } for (s = NULL, r = *queuep; r; s = r, r = r->next) { if (r->sectorOffset >= req->sectorOffset) { - /* insert after s, before r */ + /* Insert after s, before r. */ RF_ASSERT(s); req->next = r; r->prev = req; @@ -106,7 +102,7 @@ do_sstf_ord_q(queuep, tailp, req) } } q_at_end: - /* insert after s, at end of queue */ + /* Insert after s, at end of queue. */ RF_ASSERT(r == NULL); RF_ASSERT(s); RF_ASSERT(s == (*tailp)); @@ -115,57 +111,55 @@ q_at_end: s->next = req; *tailp = req; } -/* for removing from head-of-queue */ -#define DO_HEAD_DEQ(_r_,_q_) { \ - _r_ = (_q_)->queue; \ - RF_ASSERT((_r_) != NULL); \ - (_q_)->queue = (_r_)->next; \ - (_q_)->qlen--; \ - if ((_q_)->qlen == 0) { \ - RF_ASSERT((_r_) == (_q_)->qtail); \ - RF_ASSERT((_q_)->queue == NULL); \ - (_q_)->qtail = NULL; \ - } \ - else { \ - RF_ASSERT((_q_)->queue->prev == (_r_)); \ - (_q_)->queue->prev = NULL; \ - } \ -} - -/* for removing from end-of-queue */ -#define DO_TAIL_DEQ(_r_,_q_) { \ - _r_ = (_q_)->qtail; \ - RF_ASSERT((_r_) != NULL); \ - (_q_)->qtail = (_r_)->prev; \ - (_q_)->qlen--; \ - if ((_q_)->qlen == 0) { \ - RF_ASSERT((_r_) == (_q_)->queue); \ - RF_ASSERT((_q_)->qtail == NULL); \ - (_q_)->queue = NULL; \ - } \ - else { \ - RF_ASSERT((_q_)->qtail->next == (_r_)); \ - (_q_)->qtail->next = NULL; \ - } \ -} -#define DO_BEST_DEQ(_l_,_r_,_q_) { \ - if (SNUM_DIFF((_q_)->queue->sectorOffset,_l_) \ - < SNUM_DIFF((_q_)->qtail->sectorOffset,_l_)) \ - { \ - DO_HEAD_DEQ(_r_,_q_); \ - } \ - else { \ - DO_TAIL_DEQ(_r_,_q_); \ - } \ -} +/* For removing from head-of-queue. */ +#define DO_HEAD_DEQ(_r_,_q_) \ +do { \ + _r_ = (_q_)->queue; \ + RF_ASSERT((_r_) != NULL); \ + (_q_)->queue = (_r_)->next; \ + (_q_)->qlen--; \ + if ((_q_)->qlen == 0) { \ + RF_ASSERT((_r_) == (_q_)->qtail); \ + RF_ASSERT((_q_)->queue == NULL); \ + (_q_)->qtail = NULL; \ + } else { \ + RF_ASSERT((_q_)->queue->prev == (_r_)); \ + (_q_)->queue->prev = NULL; \ + } \ +} while (0) + +/* For removing from end-of-queue. */ +#define DO_TAIL_DEQ(_r_,_q_) \ +do { \ + _r_ = (_q_)->qtail; \ + RF_ASSERT((_r_) != NULL); \ + (_q_)->qtail = (_r_)->prev; \ + (_q_)->qlen--; \ + if ((_q_)->qlen == 0) { \ + RF_ASSERT((_r_) == (_q_)->queue); \ + RF_ASSERT((_q_)->qtail == NULL); \ + (_q_)->queue = NULL; \ + } else { \ + RF_ASSERT((_q_)->qtail->next == (_r_)); \ + (_q_)->qtail->next = NULL; \ + } \ +} while (0) + +#define DO_BEST_DEQ(_l_,_r_,_q_) \ +do { \ + if (SNUM_DIFF((_q_)->queue->sectorOffset,_l_) \ + < SNUM_DIFF((_q_)->qtail->sectorOffset,_l_)) \ + { \ + DO_HEAD_DEQ(_r_,_q_); \ + } else { \ + DO_TAIL_DEQ(_r_,_q_); \ + } \ +} while (0) -static RF_DiskQueueData_t * -closest_to_arm(queue, arm_pos, dir, allow_reverse) - RF_SstfQ_t *queue; - RF_SectorNum_t arm_pos; - int *dir; - int allow_reverse; +RF_DiskQueueData_t * +rf_closest_to_arm(RF_SstfQ_t *queue, RF_SectorNum_t arm_pos, int *dir, + int allow_reverse) { RF_SectorNum_t best_pos_l = 0, this_pos_l = 0, last_pos = 0; RF_SectorNum_t best_pos_r = 0, this_pos_r = 0; @@ -199,7 +193,7 @@ closest_to_arm(queue, arm_pos, dir, allow_reverse) last_pos = this_pos_r; } if (this_pos_r > last_pos) { - /* getting farther away */ + /* Getting farther away. */ break; } } @@ -242,11 +236,9 @@ closest_to_arm(queue, arm_pos, dir, allow_reverse) return (queue->queue); } -void * -rf_SstfCreate(sect_per_disk, cl_list, listp) - RF_SectorCount_t sect_per_disk; - RF_AllocListElem_t *cl_list; - RF_ShutdownList_t **listp; +void * +rf_SstfCreate(RF_SectorCount_t sect_per_disk, RF_AllocListElem_t *cl_list, + RF_ShutdownList_t **listp) { RF_Sstf_t *sstfq; @@ -256,11 +248,9 @@ rf_SstfCreate(sect_per_disk, cl_list, listp) return ((void *) sstfq); } -void * -rf_ScanCreate(sect_per_disk, cl_list, listp) - RF_SectorCount_t sect_per_disk; - RF_AllocListElem_t *cl_list; - RF_ShutdownList_t **listp; +void * +rf_ScanCreate(RF_SectorCount_t sect_per_disk, RF_AllocListElem_t *cl_list, + RF_ShutdownList_t **listp) { RF_Sstf_t *scanq; @@ -270,11 +260,9 @@ rf_ScanCreate(sect_per_disk, cl_list, listp) return ((void *) scanq); } -void * -rf_CscanCreate(sect_per_disk, cl_list, listp) - RF_SectorCount_t sect_per_disk; - RF_AllocListElem_t *cl_list; - RF_ShutdownList_t **listp; +void * +rf_CscanCreate(RF_SectorCount_t sect_per_disk, RF_AllocListElem_t *cl_list, + RF_ShutdownList_t **listp) { RF_Sstf_t *cscanq; @@ -283,11 +271,8 @@ rf_CscanCreate(sect_per_disk, cl_list, listp) return ((void *) cscanq); } -void -rf_SstfEnqueue(qptr, req, priority) - void *qptr; - RF_DiskQueueData_t *req; - int priority; +void +rf_SstfEnqueue(void *qptr, RF_DiskQueueData_t *req, int priority) { RF_Sstf_t *sstfq; @@ -297,34 +282,33 @@ rf_SstfEnqueue(qptr, req, priority) if (rf_sstfDebug || rf_scanDebug || rf_cscanDebug) { RF_DiskQueue_t *dq; dq = (RF_DiskQueue_t *) req->queue; - printf("raid%d: ENQ lopri %d,%d queues are %d,%d,%d\n", - req->raidPtr->raidid, - dq->row, dq->col, + printf("raid%d: ENQ lopri %d,%d queues are %d,%d,%d.\n", + req->raidPtr->raidid, dq->row, dq->col, sstfq->left.qlen, sstfq->right.qlen, - sstfq->lopri.qlen); + sstfq->lopri.qlen); } - do_sstf_ord_q(&sstfq->lopri.queue, &sstfq->lopri.qtail, req); + rf_do_sstf_ord_q(&sstfq->lopri.queue, &sstfq->lopri.qtail, req); sstfq->lopri.qlen++; } else { if (req->sectorOffset < sstfq->last_sector) { - do_sstf_ord_q(&sstfq->left.queue, &sstfq->left.qtail, req); + rf_do_sstf_ord_q(&sstfq->left.queue, + &sstfq->left.qtail, req); sstfq->left.qlen++; } else { - do_sstf_ord_q(&sstfq->right.queue, &sstfq->right.qtail, req); + rf_do_sstf_ord_q(&sstfq->right.queue, + &sstfq->right.qtail, req); sstfq->right.qlen++; } } } -static void -do_dequeue(queue, req) - RF_SstfQ_t *queue; - RF_DiskQueueData_t *req; +void +rf_do_dequeue(RF_SstfQ_t *queue, RF_DiskQueueData_t *req) { RF_DiskQueueData_t *req2; if (rf_sstfDebug || rf_scanDebug || rf_cscanDebug) { - printf("raid%d: do_dequeue\n", req->raidPtr->raidid); + printf("raid%d: rf_do_dequeue.\n", req->raidPtr->raidid); } if (req == queue->queue) { DO_HEAD_DEQ(req2, queue); @@ -334,7 +318,7 @@ do_dequeue(queue, req) DO_TAIL_DEQ(req2, queue); RF_ASSERT(req2 == req); } else { - /* dequeue from middle of list */ + /* Dequeue from middle of list. */ RF_ASSERT(req->next); RF_ASSERT(req->prev); queue->qlen--; @@ -345,8 +329,7 @@ do_dequeue(queue, req) } RF_DiskQueueData_t * -rf_SstfDequeue(qptr) - void *qptr; +rf_SstfDequeue(void *qptr) { RF_DiskQueueData_t *req = NULL; RF_Sstf_t *sstfq; @@ -357,8 +340,8 @@ rf_SstfDequeue(qptr) RF_DiskQueue_t *dq; dq = (RF_DiskQueue_t *) req->queue; RF_ASSERT(QSUM(sstfq) == dq->queueLength); - printf("raid%d: sstf: Dequeue %d,%d queues are %d,%d,%d\n", - req->raidPtr->raidid, dq->row, dq->col, + printf("raid%d: sstf: Dequeue %d,%d queues are %d,%d,%d.\n", + req->raidPtr->raidid, dq->row, dq->col, sstfq->left.qlen, sstfq->right.qlen, sstfq->lopri.qlen); } if (sstfq->left.queue == NULL) { @@ -370,18 +353,20 @@ rf_SstfDequeue(qptr) return (NULL); } if (rf_sstfDebug) { - printf("raid%d: sstf: check for close lopri", + printf("raid%d: sstf: check for close lopri.\n", req->raidPtr->raidid); } - req = closest_to_arm(&sstfq->lopri, sstfq->last_sector, - &sstfq->dir, sstfq->allow_reverse); + req = rf_closest_to_arm(&sstfq->lopri, + sstfq->last_sector, &sstfq->dir, + sstfq->allow_reverse); if (rf_sstfDebug) { - printf("raid%d: sstf: closest_to_arm said %lx", - req->raidPtr->raidid, (long) req); + printf("raid%d: sstf: rf_closest_to_arm said" + " %lx.\n", req->raidPtr->raidid, + (long) req); } if (req == NULL) return (NULL); - do_dequeue(&sstfq->lopri, req); + rf_do_dequeue(&sstfq->lopri, req); } else { DO_BEST_DEQ(sstfq->last_sector, req, &sstfq->right); } @@ -390,8 +375,10 @@ rf_SstfDequeue(qptr) RF_ASSERT(sstfq->right.qlen == 0); DO_BEST_DEQ(sstfq->last_sector, req, &sstfq->left); } else { - if (SNUM_DIFF(sstfq->last_sector, sstfq->right.queue->sectorOffset) - < SNUM_DIFF(sstfq->last_sector, sstfq->left.qtail->sectorOffset)) { + if (SNUM_DIFF(sstfq->last_sector, + sstfq->right.queue->sectorOffset) < + SNUM_DIFF(sstfq->last_sector, + sstfq->left.qtail->sectorOffset)) { DO_HEAD_DEQ(req, &sstfq->right); } else { DO_TAIL_DEQ(req, &sstfq->left); @@ -404,8 +391,7 @@ rf_SstfDequeue(qptr) } RF_DiskQueueData_t * -rf_ScanDequeue(qptr) - void *qptr; +rf_ScanDequeue(void *qptr) { RF_DiskQueueData_t *req = NULL; RF_Sstf_t *scanq; @@ -416,8 +402,8 @@ rf_ScanDequeue(qptr) RF_DiskQueue_t *dq; dq = (RF_DiskQueue_t *) req->queue; RF_ASSERT(QSUM(scanq) == dq->queueLength); - printf("raid%d: scan: Dequeue %d,%d queues are %d,%d,%d\n", - req->raidPtr->raidid, dq->row, dq->col, + printf("raid%d: scan: Dequeue %d,%d queues are %d,%d,%d.\n", + req->raidPtr->raidid, dq->row, dq->col, scanq->left.qlen, scanq->right.qlen, scanq->lopri.qlen); } if (scanq->left.queue == NULL) { @@ -428,11 +414,12 @@ rf_ScanDequeue(qptr) RF_ASSERT(scanq->lopri.qlen == 0); return (NULL); } - req = closest_to_arm(&scanq->lopri, scanq->last_sector, - &scanq->dir, scanq->allow_reverse); + req = rf_closest_to_arm(&scanq->lopri, + scanq->last_sector, &scanq->dir, + scanq->allow_reverse); if (req == NULL) return (NULL); - do_dequeue(&scanq->lopri, req); + rf_do_dequeue(&scanq->lopri, req); } else { scanq->dir = DIR_RIGHT; DO_HEAD_DEQ(req, &scanq->right); @@ -458,8 +445,7 @@ rf_ScanDequeue(qptr) } RF_DiskQueueData_t * -rf_CscanDequeue(qptr) - void *qptr; +rf_CscanDequeue(void *qptr) { RF_DiskQueueData_t *req = NULL; RF_Sstf_t *cscanq; @@ -471,10 +457,10 @@ rf_CscanDequeue(qptr) RF_DiskQueue_t *dq; dq = (RF_DiskQueue_t *) req->queue; RF_ASSERT(QSUM(cscanq) == dq->queueLength); - printf("raid%d: scan: Dequeue %d,%d queues are %d,%d,%d\n", + printf("raid%d: scan: Dequeue %d,%d queues are %d,%d,%d.\n", req->raidPtr->raidid, dq->row, dq->col, cscanq->left.qlen, cscanq->right.qlen, - cscanq->lopri.qlen); + cscanq->lopri.qlen); } if (cscanq->right.queue) { DO_HEAD_DEQ(req, &cscanq->right); @@ -486,11 +472,12 @@ rf_CscanDequeue(qptr) RF_ASSERT(cscanq->lopri.qlen == 0); return (NULL); } - req = closest_to_arm(&cscanq->lopri, cscanq->last_sector, - &cscanq->dir, cscanq->allow_reverse); + req = rf_closest_to_arm(&cscanq->lopri, + cscanq->last_sector, &cscanq->dir, + cscanq->allow_reverse); if (req == NULL) return (NULL); - do_dequeue(&cscanq->lopri, req); + rf_do_dequeue(&cscanq->lopri, req); } else { /* * There's I/Os to the left of the arm. Swing @@ -508,8 +495,7 @@ rf_CscanDequeue(qptr) } RF_DiskQueueData_t * -rf_SstfPeek(qptr) - void *qptr; +rf_SstfPeek(void *qptr) { RF_DiskQueueData_t *req; RF_Sstf_t *sstfq; @@ -517,8 +503,8 @@ rf_SstfPeek(qptr) sstfq = (RF_Sstf_t *) qptr; if ((sstfq->left.queue == NULL) && (sstfq->right.queue == NULL)) { - req = closest_to_arm(&sstfq->lopri, sstfq->last_sector, &sstfq->dir, - sstfq->allow_reverse); + req = rf_closest_to_arm(&sstfq->lopri, sstfq->last_sector, + &sstfq->dir, sstfq->allow_reverse); } else { if (sstfq->left.queue == NULL) req = sstfq->right.queue; @@ -526,8 +512,10 @@ rf_SstfPeek(qptr) if (sstfq->right.queue == NULL) req = sstfq->left.queue; else { - if (SNUM_DIFF(sstfq->last_sector, sstfq->right.queue->sectorOffset) - < SNUM_DIFF(sstfq->last_sector, sstfq->left.qtail->sectorOffset)) { + if (SNUM_DIFF(sstfq->last_sector, + sstfq->right.queue->sectorOffset) < + SNUM_DIFF(sstfq->last_sector, + sstfq->left.qtail->sectorOffset)) { req = sstfq->right.queue; } else { req = sstfq->left.qtail; @@ -542,12 +530,11 @@ rf_SstfPeek(qptr) } RF_DiskQueueData_t * -rf_ScanPeek(qptr) - void *qptr; +rf_ScanPeek(void *qptr) { RF_DiskQueueData_t *req; RF_Sstf_t *scanq; - int dir; + int dir; scanq = (RF_Sstf_t *) qptr; dir = scanq->dir; @@ -560,8 +547,8 @@ rf_ScanPeek(qptr) RF_ASSERT(scanq->lopri.qlen == 0); return (NULL); } - req = closest_to_arm(&scanq->lopri, scanq->last_sector, - &dir, scanq->allow_reverse); + req = rf_closest_to_arm(&scanq->lopri, + scanq->last_sector, &dir, scanq->allow_reverse); } else { req = scanq->right.queue; } @@ -586,8 +573,7 @@ rf_ScanPeek(qptr) } RF_DiskQueueData_t * -rf_CscanPeek(qptr) - void *qptr; +rf_CscanPeek(void *qptr) { RF_DiskQueueData_t *req; RF_Sstf_t *cscanq; @@ -605,8 +591,9 @@ rf_CscanPeek(qptr) RF_ASSERT(cscanq->lopri.qlen == 0); return (NULL); } - req = closest_to_arm(&cscanq->lopri, cscanq->last_sector, - &cscanq->dir, cscanq->allow_reverse); + req = rf_closest_to_arm(&cscanq->lopri, + cscanq->last_sector, &cscanq->dir, + cscanq->allow_reverse); } else { /* * There's I/Os to the left of the arm. We'll end @@ -621,43 +608,39 @@ rf_CscanPeek(qptr) return (req); } -int -rf_SstfPromote(qptr, parityStripeID, which_ru) - void *qptr; - RF_StripeNum_t parityStripeID; - RF_ReconUnitNum_t which_ru; +int +rf_SstfPromote(void *qptr, RF_StripeNum_t parityStripeID, + RF_ReconUnitNum_t which_ru) { RF_DiskQueueData_t *r, *next; RF_Sstf_t *sstfq; - int n; + int n; sstfq = (RF_Sstf_t *) qptr; n = 0; if (rf_sstfDebug || rf_scanDebug || rf_cscanDebug) { - printf("raid%d: promote %ld %d queues are %d,%d,%d\n", - r->raidPtr->raidid, (long) parityStripeID, - (int) which_ru, - sstfq->left.qlen, - sstfq->right.qlen, - sstfq->lopri.qlen); + printf("raid%d: promote %ld %d queues are %d,%d,%d.\n", + r->raidPtr->raidid, (long) parityStripeID, + (int) which_ru, sstfq->left.qlen, sstfq->right.qlen, + sstfq->lopri.qlen); } for (r = sstfq->lopri.queue; r; r = next) { next = r->next; if (rf_sstfDebug || rf_scanDebug || rf_cscanDebug) { - printf("raid%d: check promote %lx\n", + printf("raid%d: check promote %lx.\n", r->raidPtr->raidid, (long) r); } if ((r->parityStripeID == parityStripeID) && (r->which_ru == which_ru)) { - do_dequeue(&sstfq->lopri, r); + rf_do_dequeue(&sstfq->lopri, r); rf_SstfEnqueue(qptr, r, RF_IO_NORMAL_PRIORITY); n++; } } if (rf_sstfDebug || rf_scanDebug || rf_cscanDebug) { - printf("raid%d: promoted %d matching I/Os queues are %d,%d,%d\n", - r->raidPtr->raidid, n, sstfq->left.qlen, + printf("raid%d: promoted %d matching I/Os queues are" + " %d,%d,%d.\n", r->raidPtr->raidid, n, sstfq->left.qlen, sstfq->right.qlen, sstfq->lopri.qlen); } return (n); diff --git a/sys/dev/raidframe/rf_sstf.h b/sys/dev/raidframe/rf_sstf.h index d704e62d206..985cad729b8 100644 --- a/sys/dev/raidframe/rf_sstf.h +++ b/sys/dev/raidframe/rf_sstf.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_sstf.h,v 1.2 1999/02/16 00:03:27 niklas Exp $ */ +/* $OpenBSD: rf_sstf.h,v 1.3 2002/12/16 07:01:05 tdeval Exp $ */ /* $NetBSD: rf_sstf.h,v 1.3 1999/02/05 00:06:17 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,44 +28,39 @@ * rights to redistribute these changes. */ -#ifndef _RF__RF_SSTF_H_ -#define _RF__RF_SSTF_H_ +#ifndef _RF__RF_SSTF_H_ +#define _RF__RF_SSTF_H_ #include "rf_diskqueue.h" typedef struct RF_SstfQ_s { - RF_DiskQueueData_t *queue; - RF_DiskQueueData_t *qtail; - int qlen; -} RF_SstfQ_t; + RF_DiskQueueData_t *queue; + RF_DiskQueueData_t *qtail; + int qlen; +} RF_SstfQ_t; typedef struct RF_Sstf_s { - RF_SstfQ_t left; - RF_SstfQ_t right; - RF_SstfQ_t lopri; - RF_SectorNum_t last_sector; - int dir; - int allow_reverse; -} RF_Sstf_t; + RF_SstfQ_t left; + RF_SstfQ_t right; + RF_SstfQ_t lopri; + RF_SectorNum_t last_sector; + int dir; + int allow_reverse; +} RF_Sstf_t; -void * -rf_SstfCreate(RF_SectorCount_t sect_per_disk, - RF_AllocListElem_t * cl_list, RF_ShutdownList_t ** listp); -void * -rf_ScanCreate(RF_SectorCount_t sect_per_disk, - RF_AllocListElem_t * cl_list, RF_ShutdownList_t ** listp); -void * -rf_CscanCreate(RF_SectorCount_t sect_per_disk, - RF_AllocListElem_t * cl_list, RF_ShutdownList_t ** listp); -void rf_SstfEnqueue(void *qptr, RF_DiskQueueData_t * req, int priority); -RF_DiskQueueData_t *rf_SstfDequeue(void *qptr); -RF_DiskQueueData_t *rf_SstfPeek(void *qptr); -int -rf_SstfPromote(void *qptr, RF_StripeNum_t parityStripeID, - RF_ReconUnitNum_t which_ru); -RF_DiskQueueData_t *rf_ScanDequeue(void *qptr); -RF_DiskQueueData_t *rf_ScanPeek(void *qptr); -RF_DiskQueueData_t *rf_CscanDequeue(void *qptr); -RF_DiskQueueData_t *rf_CscanPeek(void *qptr); +void *rf_SstfCreate(RF_SectorCount_t, RF_AllocListElem_t *, + RF_ShutdownList_t **); +void *rf_ScanCreate(RF_SectorCount_t, RF_AllocListElem_t *, + RF_ShutdownList_t **); +void *rf_CscanCreate(RF_SectorCount_t, RF_AllocListElem_t *, + RF_ShutdownList_t **); +void rf_SstfEnqueue(void *, RF_DiskQueueData_t *, int); +RF_DiskQueueData_t *rf_SstfDequeue(void *); +RF_DiskQueueData_t *rf_SstfPeek(void *); +int rf_SstfPromote(void *, RF_StripeNum_t, RF_ReconUnitNum_t); +RF_DiskQueueData_t *rf_ScanDequeue(void *); +RF_DiskQueueData_t *rf_ScanPeek(void *); +RF_DiskQueueData_t *rf_CscanDequeue(void *); +RF_DiskQueueData_t *rf_CscanPeek(void *); -#endif /* !_RF__RF_SSTF_H_ */ +#endif /* !_RF__RF_SSTF_H_ */ diff --git a/sys/dev/raidframe/rf_states.c b/sys/dev/raidframe/rf_states.c index 0b78cc14233..3d0aa8ef813 100644 --- a/sys/dev/raidframe/rf_states.c +++ b/sys/dev/raidframe/rf_states.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_states.c,v 1.8 2002/05/28 23:38:10 tdeval Exp $ */ +/* $OpenBSD: rf_states.c,v 1.9 2002/12/16 07:01:05 tdeval Exp $ */ /* $NetBSD: rf_states.c,v 1.15 2000/10/20 02:24:45 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -44,19 +45,22 @@ #include "rf_etimer.h" #include "rf_kintf.h" -/* prototypes for some of the available states. - - States must: - - - not block. - - - either schedule rf_ContinueRaidAccess as a callback and return - RF_TRUE, or complete all of their work and return RF_FALSE. +/* + * Prototypes for some of the available states. + * + * States must: + * + * - not block. + * + * - either schedule rf_ContinueRaidAccess as a callback and return + * RF_TRUE, or complete all of their work and return RF_FALSE. + * + * - increment desc->state when they have finished their work. + */ - - increment desc->state when they have finished their work. -*/ +char *StateName(RF_AccessState_t); -static char * +char * StateName(RF_AccessState_t state) { switch (state) { @@ -84,16 +88,15 @@ StateName(RF_AccessState_t state) } } -void -rf_ContinueRaidAccess(RF_RaidAccessDesc_t * desc) +void +rf_ContinueRaidAccess(RF_RaidAccessDesc_t *desc) { - int suspended = RF_FALSE; - int current_state_index = desc->state; + int suspended = RF_FALSE; + int current_state_index = desc->state; RF_AccessState_t current_state = desc->states[current_state_index]; - int unit = desc->raidPtr->raidid; + int unit = desc->raidPtr->raidid; do { - current_state_index = desc->state; current_state = desc->states[current_state_index]; @@ -131,15 +134,17 @@ rf_ContinueRaidAccess(RF_RaidAccessDesc_t * desc) break; } - /* after this point, we cannot dereference desc since desc may + /* + * After this point, we cannot dereference desc since desc may * have been freed. desc is only freed in LastState, so if we - * renter this function or loop back up, desc should be valid. */ + * reenter this function or loop back up, desc should be valid. + */ if (rf_printStatesDebug) { - printf("raid%d: State: %-24s StateIndex: %3i desc: 0x%ld %s\n", - unit, StateName(current_state), - current_state_index, (long) desc, - suspended ? "callback scheduled" : "looping"); + printf("raid%d: State: %-24s StateIndex: %3i desc:" + " 0x%ld %s.\n", unit, StateName(current_state), + current_state_index, (long) desc, suspended ? + "callback scheduled" : "looping"); } } while (!suspended && current_state != rf_LastState); @@ -147,14 +152,14 @@ rf_ContinueRaidAccess(RF_RaidAccessDesc_t * desc) } -void -rf_ContinueDagAccess(RF_DagList_t * dagList) +void +rf_ContinueDagAccess(RF_DagList_t *dagList) { RF_AccTraceEntry_t *tracerec = &(dagList->desc->tracerec); RF_RaidAccessDesc_t *desc; RF_DagHeader_t *dag_h; RF_Etimer_t timer; - int i; + int i; desc = dagList->desc; @@ -164,24 +169,27 @@ rf_ContinueDagAccess(RF_DagList_t * dagList) tracerec->specific.user.exec_us = RF_ETIMER_VAL_US(timer); RF_ETIMER_START(tracerec->timer); - /* skip to dag which just finished */ + /* Skip to dag which just finished. */ dag_h = dagList->dags; for (i = 0; i < dagList->numDagsDone; i++) { dag_h = dag_h->next; } - /* check to see if retry is required */ + /* Check to see if retry is required. */ if (dag_h->status == rf_rollBackward) { - /* when a dag fails, mark desc status as bad and allow all - * other dags in the desc to execute to completion. then, - * free all dags and start over */ - desc->status = 1; /* bad status */ + /* + * When a dag fails, mark desc status as bad and allow all + * other dags in the desc to execute to completion. Then, + * free all dags and start over. + */ + desc->status = 1; /* Bad status. */ { - printf("raid%d: DAG failure: %c addr 0x%lx (%ld) nblk 0x%x (%d) buf 0x%lx\n", - desc->raidPtr->raidid, desc->type, + printf("raid%d: DAG failure: %c addr 0x%lx (%ld)" + " nblk 0x%x (%d) buf 0x%lx.\n", + desc->raidPtr->raidid, desc->type, + (long) desc->raidAddress, (long) desc->raidAddress, - (long) desc->raidAddress, (int) desc->numBlocks, - (int) desc->numBlocks, + (int) desc->numBlocks, (int) desc->numBlocks, (unsigned long) (desc->bufPtr)); } } @@ -189,27 +197,27 @@ rf_ContinueDagAccess(RF_DagList_t * dagList) rf_ContinueRaidAccess(desc); } -int -rf_State_LastState(RF_RaidAccessDesc_t * desc) +int +rf_State_LastState(RF_RaidAccessDesc_t *desc) { - void (*callbackFunc) (RF_CBParam_t) = desc->callbackFunc; + void (*callbackFunc) (RF_CBParam_t) = desc->callbackFunc; RF_CBParam_t callbackArg; callbackArg.p = desc->callbackArg; /* - * If this is not an async request, wake up the caller - */ + * If this is not an async request, wake up the caller. + */ if (desc->async_flag == 0) wakeup(desc->bp); - /* - * That's all the IO for this one... unbusy the 'disk'. + /* + * That's all the IO for this one... Unbusy the 'disk'. */ rf_disk_unbusy(desc); - /* + /* * Wakeup any requests waiting to go. */ @@ -217,12 +225,12 @@ rf_State_LastState(RF_RaidAccessDesc_t * desc) ((RF_Raid_t *) desc->raidPtr)->openings++; RF_UNLOCK_MUTEX(((RF_Raid_t *) desc->raidPtr)->mutex); - /* wake up any pending IO */ + /* Wake up any pending I/O. */ raidstart(((RF_Raid_t *) desc->raidPtr)); - /* printf("%s: Calling biodone on 0x%x\n", __func__, desc->bp); */ + /* printf("%s: Calling biodone on 0x%x.\n", __func__, desc->bp); */ splassert(IPL_BIO); - biodone(desc->bp); /* access came through ioctl */ + biodone(desc->bp); /* Access came through ioctl. */ if (callbackFunc) callbackFunc(callbackArg); @@ -231,24 +239,26 @@ rf_State_LastState(RF_RaidAccessDesc_t * desc) return RF_FALSE; } -int -rf_State_IncrAccessCount(RF_RaidAccessDesc_t * desc) +int +rf_State_IncrAccessCount(RF_RaidAccessDesc_t *desc) { RF_Raid_t *raidPtr; raidPtr = desc->raidPtr; - /* Bummer. We have to do this to be 100% safe w.r.t. the increment - * below */ + /* + * Bummer. We have to do this to be 100% safe w.r.t. the increment + * below. + */ RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); - raidPtr->accs_in_flight++; /* used to detect quiescence */ + raidPtr->accs_in_flight++; /* Used to detect quiescence. */ RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); desc->state++; return RF_FALSE; } -int -rf_State_DecrAccessCount(RF_RaidAccessDesc_t * desc) +int +rf_State_DecrAccessCount(RF_RaidAccessDesc_t *desc) { RF_Raid_t *raidPtr; @@ -259,19 +269,20 @@ rf_State_DecrAccessCount(RF_RaidAccessDesc_t * desc) if (raidPtr->accesses_suspended && raidPtr->accs_in_flight == 0) { rf_SignalQuiescenceLock(raidPtr, raidPtr->reconDesc); } - rf_UpdateUserStats(raidPtr, RF_ETIMER_VAL_US(desc->timer), desc->numBlocks); + rf_UpdateUserStats(raidPtr, RF_ETIMER_VAL_US(desc->timer), + desc->numBlocks); RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); desc->state++; return RF_FALSE; } -int -rf_State_Quiesce(RF_RaidAccessDesc_t * desc) +int +rf_State_Quiesce(RF_RaidAccessDesc_t *desc) { RF_AccTraceEntry_t *tracerec = &desc->tracerec; RF_Etimer_t timer; - int suspended = RF_FALSE; + int suspended = RF_FALSE; RF_Raid_t *raidPtr; raidPtr = desc->raidPtr; @@ -283,10 +294,13 @@ rf_State_Quiesce(RF_RaidAccessDesc_t * desc) if (raidPtr->accesses_suspended) { RF_CallbackDesc_t *cb; cb = rf_AllocCallbackDesc(); - /* XXX the following cast is quite bogus... - * rf_ContinueRaidAccess takes a (RF_RaidAccessDesc_t *) as an - * argument.. GO */ - cb->callbackFunc = (void (*) (RF_CBParam_t)) rf_ContinueRaidAccess; + /* + * XXX The following cast is quite bogus... + * rf_ContinueRaidAccess takes a (RF_RaidAccessDesc_t *) + * as an argument... GO + */ + cb->callbackFunc = (void (*) (RF_CBParam_t)) + rf_ContinueRaidAccess; cb->callbackArg.p = (void *) desc; cb->next = raidPtr->quiesce_wait_list; raidPtr->quiesce_wait_list = cb; @@ -299,14 +313,14 @@ rf_State_Quiesce(RF_RaidAccessDesc_t * desc) tracerec->specific.user.suspend_ovhd_us += RF_ETIMER_VAL_US(timer); if (suspended && rf_quiesceDebug) - printf("Stalling access due to quiescence lock\n"); + printf("Stalling access due to quiescence lock.\n"); desc->state++; return suspended; } -int -rf_State_Map(RF_RaidAccessDesc_t * desc) +int +rf_State_Map(RF_RaidAccessDesc_t *desc) { RF_Raid_t *raidPtr = desc->raidPtr; RF_AccTraceEntry_t *tracerec = &desc->tracerec; @@ -314,8 +328,8 @@ rf_State_Map(RF_RaidAccessDesc_t * desc) RF_ETIMER_START(timer); - if (!(desc->asmap = rf_MapAccess(raidPtr, desc->raidAddress, desc->numBlocks, - desc->bufPtr, RF_DONT_REMAP))) + if (!(desc->asmap = rf_MapAccess(raidPtr, desc->raidAddress, + desc->numBlocks, desc->bufPtr, RF_DONT_REMAP))) RF_PANIC(); RF_ETIMER_STOP(timer); @@ -326,21 +340,21 @@ rf_State_Map(RF_RaidAccessDesc_t * desc) return RF_FALSE; } -int -rf_State_Lock(RF_RaidAccessDesc_t * desc) +int +rf_State_Lock(RF_RaidAccessDesc_t *desc) { RF_AccTraceEntry_t *tracerec = &desc->tracerec; RF_Raid_t *raidPtr = desc->raidPtr; RF_AccessStripeMapHeader_t *asmh = desc->asmap; RF_AccessStripeMap_t *asm_p; RF_Etimer_t timer; - int suspended = RF_FALSE; + int suspended = RF_FALSE; RF_ETIMER_START(timer); if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) { RF_StripeNum_t lastStripeID = -1; - /* acquire each lock that we don't already hold */ + /* Acquire each lock that we don't already hold. */ for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) { RF_ASSERT(RF_IO_IS_R_OR_W(desc->type)); if (!rf_suppressLocksAndLargeWrites && @@ -348,48 +362,63 @@ rf_State_Lock(RF_RaidAccessDesc_t * desc) !(desc->flags & RF_DAG_SUPPRESS_LOCKS) && !(asm_p->flags & RF_ASM_FLAGS_LOCK_TRIED)) { asm_p->flags |= RF_ASM_FLAGS_LOCK_TRIED; - RF_ASSERT(asm_p->stripeID > lastStripeID); /* locks must be - * acquired - * hierarchically */ + /* Locks must be acquired hierarchically. */ + RF_ASSERT(asm_p->stripeID > lastStripeID); lastStripeID = asm_p->stripeID; - /* XXX the cast to (void (*)(RF_CBParam_t)) - * below is bogus! GO */ - RF_INIT_LOCK_REQ_DESC(asm_p->lockReqDesc, desc->type, - (void (*) (struct buf *)) rf_ContinueRaidAccess, desc, asm_p, + /* + * XXX The cast to (void (*)(RF_CBParam_t)) + * below is bogus ! GO + */ + RF_INIT_LOCK_REQ_DESC(asm_p->lockReqDesc, + desc->type, (void (*) (struct buf *)) + rf_ContinueRaidAccess, desc, asm_p, raidPtr->Layout.dataSectorsPerStripe); - if (rf_AcquireStripeLock(raidPtr->lockTable, asm_p->stripeID, - &asm_p->lockReqDesc)) { + if (rf_AcquireStripeLock(raidPtr->lockTable, + asm_p->stripeID, &asm_p->lockReqDesc)) { suspended = RF_TRUE; break; } } if (desc->type == RF_IO_TYPE_WRITE && - raidPtr->status[asm_p->physInfo->row] == rf_rs_reconstructing) { - if (!(asm_p->flags & RF_ASM_FLAGS_FORCE_TRIED)) { - int val; - - asm_p->flags |= RF_ASM_FLAGS_FORCE_TRIED; - /* XXX the cast below is quite - * bogus!!! XXX GO */ - val = rf_ForceOrBlockRecon(raidPtr, asm_p, - (void (*) (RF_Raid_t *, void *)) rf_ContinueRaidAccess, desc); + raidPtr->status[asm_p->physInfo->row] == + rf_rs_reconstructing) { + if (!(asm_p->flags & RF_ASM_FLAGS_FORCE_TRIED)) + { + int val; + + asm_p->flags |= + RF_ASM_FLAGS_FORCE_TRIED; + /* + * XXX The cast below is quite + * bogus !!! XXX GO + */ + val = rf_ForceOrBlockRecon(raidPtr, + asm_p, + (void (*) (RF_Raid_t *, void *)) + rf_ContinueRaidAccess, desc); if (val == 0) { - asm_p->flags |= RF_ASM_FLAGS_RECON_BLOCKED; + asm_p->flags |= + RF_ASM_FLAGS_RECON_BLOCKED; } else { suspended = RF_TRUE; break; } } else { if (rf_pssDebug) { - printf("raid%d: skipping force/block because already done, psid %ld\n", - desc->raidPtr->raidid, + printf("raid%d: skipping" + " force/block because" + " already done, psid" + " %ld.\n", + desc->raidPtr->raidid, (long) asm_p->stripeID); } } } else { if (rf_pssDebug) { - printf("raid%d: skipping force/block because not write or not under recon, psid %ld\n", - desc->raidPtr->raidid, + printf("raid%d: skipping force/block" + " because not write or not" + " under recon, psid %ld.\n", + desc->raidPtr->raidid, (long) asm_p->stripeID); } } @@ -405,15 +434,16 @@ rf_State_Lock(RF_RaidAccessDesc_t * desc) desc->state++; return (RF_FALSE); } + /* - * the following three states create, execute, and post-process dags - * the error recovery unit is a single dag. - * by default, SelectAlgorithm creates an array of dags, one per parity stripe - * in some tricky cases, multiple dags per stripe are created - * - dags within a parity stripe are executed sequentially (arbitrary order) - * - dags for distinct parity stripes are executed concurrently + * The following three states create, execute, and post-process DAGs. + * The error recovery unit is a single DAG. + * By default, SelectAlgorithm creates an array of DAGs, one per parity stripe. + * In some tricky cases, multiple dags per stripe are created. + * - DAGs within a parity stripe are executed sequentially (arbitrary order). + * - DAGs for distinct parity stripes are executed concurrently. * - * repeat until all dags complete successfully -or- dag selection fails + * Repeat until all DAGs complete successfully -or- DAG selection fails. * * while !done * create dag(s) (SelectAlgorithm) @@ -426,37 +456,42 @@ rf_State_Lock(RF_RaidAccessDesc_t * desc) * else * done (FAIL) */ -int -rf_State_CreateDAG(RF_RaidAccessDesc_t * desc) +int +rf_State_CreateDAG(RF_RaidAccessDesc_t *desc) { RF_AccTraceEntry_t *tracerec = &desc->tracerec; RF_Etimer_t timer; RF_DagHeader_t *dag_h; - int i, selectStatus; + int i, selectStatus; - /* generate a dag for the access, and fire it off. When the dag - * completes, we'll get re-invoked in the next state. */ + /* + * Generate a dag for the access, and fire it off. When the dag + * completes, we'll get re-invoked in the next state. + */ RF_ETIMER_START(timer); - /* SelectAlgorithm returns one or more dags */ - selectStatus = rf_SelectAlgorithm(desc, desc->flags | RF_DAG_SUPPRESS_LOCKS); + /* SelectAlgorithm returns one or more dags. */ + selectStatus = rf_SelectAlgorithm(desc, + desc->flags | RF_DAG_SUPPRESS_LOCKS); if (rf_printDAGsDebug) for (i = 0; i < desc->numStripes; i++) rf_PrintDAGList(desc->dagArray[i].dags); RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); - /* update time to create all dags */ + /* Update time to create all dags. */ tracerec->specific.user.dag_create_us = RF_ETIMER_VAL_US(timer); - desc->status = 0; /* good status */ + desc->status = 0; /* Good status. */ if (selectStatus) { - /* failed to create a dag */ - /* this happens when there are too many faults or incomplete - * dag libraries */ + /* Failed to create a dag. */ + /* + * This happens when there are too many faults or incomplete + * dag libraries. + */ printf("[Failed to create a DAG]\n"); RF_PANIC(); } else { - /* bind dags to desc */ + /* Bind dags to desc. */ for (i = 0; i < desc->numStripes; i++) { dag_h = desc->dagArray[i].dags; while (dag_h) { @@ -466,118 +501,135 @@ rf_State_CreateDAG(RF_RaidAccessDesc_t * desc) } } desc->flags |= RF_DAG_DISPATCH_RETURNED; - desc->state++; /* next state should be rf_State_ExecuteDAG */ + desc->state++; /* Next state should be rf_State_ExecuteDAG. */ } return RF_FALSE; } - -/* the access has an array of dagLists, one dagList per parity stripe. - * fire the first dag in each parity stripe (dagList). - * dags within a stripe (dagList) must be executed sequentially - * - this preserves atomic parity update - * dags for independents parity groups (stripes) are fired concurrently */ - -int -rf_State_ExecuteDAG(RF_RaidAccessDesc_t * desc) +/* + * The access has an array of dagLists, one dagList per parity stripe. + * Fire the first DAG in each parity stripe (dagList). + * DAGs within a stripe (dagList) must be executed sequentially. + * - This preserves atomic parity update. + * DAGs for independents parity groups (stripes) are fired concurrently. + */ +int +rf_State_ExecuteDAG(RF_RaidAccessDesc_t *desc) { - int i; + int i; RF_DagHeader_t *dag_h; RF_DagList_t *dagArray = desc->dagArray; - /* next state is always rf_State_ProcessDAG important to do this + /* + * Next state is always rf_State_ProcessDAG. Important to do this * before firing the first dag (it may finish before we leave this - * routine) */ + * routine). + */ desc->state++; - /* sweep dag array, a stripe at a time, firing the first dag in each - * stripe */ + /* + * Sweep dag array, a stripe at a time, firing the first dag in each + * stripe. + */ for (i = 0; i < desc->numStripes; i++) { RF_ASSERT(dagArray[i].numDags > 0); RF_ASSERT(dagArray[i].numDagsDone == 0); RF_ASSERT(dagArray[i].numDagsFired == 0); RF_ETIMER_START(dagArray[i].tracerec.timer); - /* fire first dag in this stripe */ + /* Fire first dag in this stripe. */ dag_h = dagArray[i].dags; RF_ASSERT(dag_h); dagArray[i].numDagsFired++; - /* XXX Yet another case where we pass in a conflicting - * function pointer :-( XXX GO */ - rf_DispatchDAG(dag_h, (void (*) (void *)) rf_ContinueDagAccess, &dagArray[i]); + /* + * XXX Yet another case where we pass in a conflicting + * function pointer :-( XXX GO + */ + rf_DispatchDAG(dag_h, (void (*) (void *)) rf_ContinueDagAccess, + &dagArray[i]); } - /* the DAG will always call the callback, even if there was no - * blocking, so we are always suspended in this state */ + /* + * The DAG will always call the callback, even if there was no + * blocking, so we are always suspended in this state. + */ return RF_TRUE; } - -/* rf_State_ProcessDAG is entered when a dag completes. - * first, check to all dags in the access have completed - * if not, fire as many dags as possible */ - -int -rf_State_ProcessDAG(RF_RaidAccessDesc_t * desc) +/* + * rf_State_ProcessDAG is entered when a dag completes. + * First, check that all DAGs in the access have completed. + * If not, fire as many DAGs as possible. + */ +int +rf_State_ProcessDAG(RF_RaidAccessDesc_t *desc) { RF_AccessStripeMapHeader_t *asmh = desc->asmap; RF_Raid_t *raidPtr = desc->raidPtr; RF_DagHeader_t *dag_h; - int i, j, done = RF_TRUE; + int i, j, done = RF_TRUE; RF_DagList_t *dagArray = desc->dagArray; RF_Etimer_t timer; - /* check to see if this is the last dag */ + /* Check to see if this is the last dag. */ for (i = 0; i < desc->numStripes; i++) if (dagArray[i].numDags != dagArray[i].numDagsDone) done = RF_FALSE; if (done) { if (desc->status) { - /* a dag failed, retry */ + /* A dag failed, retry. */ RF_ETIMER_START(timer); - /* free all dags */ + /* Free all dags. */ for (i = 0; i < desc->numStripes; i++) { rf_FreeDAG(desc->dagArray[i].dags); } rf_MarkFailuresInASMList(raidPtr, asmh); - /* back up to rf_State_CreateDAG */ + /* Back up to rf_State_CreateDAG. */ desc->state = desc->state - 2; return RF_FALSE; } else { - /* move on to rf_State_Cleanup */ + /* Move on to rf_State_Cleanup. */ desc->state++; } return RF_FALSE; } else { - /* more dags to execute */ - /* see if any are ready to be fired. if so, fire them */ - /* don't fire the initial dag in a list, it's fired in - * rf_State_ExecuteDAG */ + /* More dags to execute. */ + /* See if any are ready to be fired. If so, fire them. */ + /* + * Don't fire the initial dag in a list, it's fired in + * rf_State_ExecuteDAG. + */ for (i = 0; i < desc->numStripes; i++) { - if ((dagArray[i].numDagsDone < dagArray[i].numDags) - && (dagArray[i].numDagsDone == dagArray[i].numDagsFired) - && (dagArray[i].numDagsFired > 0)) { + if ((dagArray[i].numDagsDone < dagArray[i].numDags) && + (dagArray[i].numDagsDone == + dagArray[i].numDagsFired) && + (dagArray[i].numDagsFired > 0)) { RF_ETIMER_START(dagArray[i].tracerec.timer); - /* fire next dag in this stripe */ - /* first, skip to next dag awaiting execution */ + /* Fire next dag in this stripe. */ + /* + * First, skip to next dag awaiting execution. + */ dag_h = dagArray[i].dags; for (j = 0; j < dagArray[i].numDagsDone; j++) dag_h = dag_h->next; dagArray[i].numDagsFired++; - /* XXX and again we pass a different function - * pointer.. GO */ - rf_DispatchDAG(dag_h, (void (*) (void *)) rf_ContinueDagAccess, - &dagArray[i]); + /* + * XXX And again we pass a different function + * pointer... GO + */ + rf_DispatchDAG(dag_h, (void (*) (void *)) + rf_ContinueDagAccess, &dagArray[i]); } } return RF_TRUE; } } -/* only make it this far if all dags complete successfully */ -int -rf_State_Cleanup(RF_RaidAccessDesc_t * desc) + +/* Only make it this far if all dags complete successfully. */ +int +rf_State_Cleanup(RF_RaidAccessDesc_t *desc) { RF_AccTraceEntry_t *tracerec = &desc->tracerec; RF_AccessStripeMapHeader_t *asmh = desc->asmap; @@ -594,23 +646,23 @@ rf_State_Cleanup(RF_RaidAccessDesc_t * desc) RF_ETIMER_EVAL(timer); tracerec->specific.user.dag_retry_us = RF_ETIMER_VAL_US(timer); - /* the RAID I/O is complete. Clean up. */ + /* The RAID I/O is complete. Clean up. */ tracerec->specific.user.dag_retry_us = 0; RF_ETIMER_START(timer); if (desc->flags & RF_DAG_RETURN_DAG) { - /* copy dags into paramDAG */ + /* Copy dags into paramDAG. */ *(desc->paramDAG) = desc->dagArray[0].dags; dag_h = *(desc->paramDAG); for (i = 1; i < desc->numStripes; i++) { - /* concatenate dags from remaining stripes */ + /* Concatenate dags from remaining stripes. */ RF_ASSERT(dag_h); while (dag_h->next) dag_h = dag_h->next; dag_h->next = desc->dagArray[i].dags; } } else { - /* free all dags */ + /* Free all dags. */ for (i = 0; i < desc->numStripes; i++) { rf_FreeDAG(desc->dagArray[i].dags); } @@ -627,9 +679,8 @@ rf_State_Cleanup(RF_RaidAccessDesc_t * desc) asm_p->parityInfo && !(desc->flags & RF_DAG_SUPPRESS_LOCKS)) { RF_ASSERT_VALID_LOCKREQ(&asm_p->lockReqDesc); - rf_ReleaseStripeLock(raidPtr->lockTable, - asm_p->stripeID, - &asm_p->lockReqDesc); + rf_ReleaseStripeLock(raidPtr->lockTable, + asm_p->stripeID, &asm_p->lockReqDesc); } if (asm_p->flags & RF_ASM_FLAGS_RECON_BLOCKED) { rf_UnblockRecon(raidPtr, asm_p); diff --git a/sys/dev/raidframe/rf_states.h b/sys/dev/raidframe/rf_states.h index 25beba5905a..bb1f395e7f1 100644 --- a/sys/dev/raidframe/rf_states.h +++ b/sys/dev/raidframe/rf_states.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_states.h,v 1.2 1999/02/16 00:03:28 niklas Exp $ */ +/* $OpenBSD: rf_states.h,v 1.3 2002/12/16 07:01:05 tdeval Exp $ */ /* $NetBSD: rf_states.h,v 1.3 1999/02/05 00:06:17 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,22 +28,22 @@ * rights to redistribute these changes. */ -#ifndef _RF__RF_STATES_H_ -#define _RF__RF_STATES_H_ +#ifndef _RF__RF_STATES_H_ +#define _RF__RF_STATES_H_ #include "rf_types.h" -void rf_ContinueRaidAccess(RF_RaidAccessDesc_t * desc); -void rf_ContinueDagAccess(RF_DagList_t * dagList); -int rf_State_LastState(RF_RaidAccessDesc_t * desc); -int rf_State_IncrAccessCount(RF_RaidAccessDesc_t * desc); -int rf_State_DecrAccessCount(RF_RaidAccessDesc_t * desc); -int rf_State_Quiesce(RF_RaidAccessDesc_t * desc); -int rf_State_Map(RF_RaidAccessDesc_t * desc); -int rf_State_Lock(RF_RaidAccessDesc_t * desc); -int rf_State_CreateDAG(RF_RaidAccessDesc_t * desc); -int rf_State_ExecuteDAG(RF_RaidAccessDesc_t * desc); -int rf_State_ProcessDAG(RF_RaidAccessDesc_t * desc); -int rf_State_Cleanup(RF_RaidAccessDesc_t * desc); +void rf_ContinueRaidAccess(RF_RaidAccessDesc_t *); +void rf_ContinueDagAccess(RF_DagList_t *); +int rf_State_LastState(RF_RaidAccessDesc_t *); +int rf_State_IncrAccessCount(RF_RaidAccessDesc_t *); +int rf_State_DecrAccessCount(RF_RaidAccessDesc_t *); +int rf_State_Quiesce(RF_RaidAccessDesc_t *); +int rf_State_Map(RF_RaidAccessDesc_t *); +int rf_State_Lock(RF_RaidAccessDesc_t *); +int rf_State_CreateDAG(RF_RaidAccessDesc_t *); +int rf_State_ExecuteDAG(RF_RaidAccessDesc_t *); +int rf_State_ProcessDAG(RF_RaidAccessDesc_t *); +int rf_State_Cleanup(RF_RaidAccessDesc_t *); -#endif /* !_RF__RF_STATES_H_ */ +#endif /* !_RF__RF_STATES_H_ */ diff --git a/sys/dev/raidframe/rf_stripelocks.c b/sys/dev/raidframe/rf_stripelocks.c index 34eff4c4b47..72b658a2e5a 100644 --- a/sys/dev/raidframe/rf_stripelocks.c +++ b/sys/dev/raidframe/rf_stripelocks.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_stripelocks.c,v 1.4 2000/01/11 18:02:23 peter Exp $ */ +/* $OpenBSD: rf_stripelocks.c,v 1.5 2002/12/16 07:01:05 tdeval Exp $ */ /* $NetBSD: rf_stripelocks.c,v 1.5 2000/01/08 23:45:05 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -28,14 +29,14 @@ */ /* - * stripelocks.c -- code to lock stripes for read and write access + * stripelocks.c -- Code to lock stripes for read and write access. * * The code distinguishes between read locks and write locks. There can be * as many readers to given stripe as desired. When a write request comes * in, no further readers are allowed to enter, and all subsequent requests - * are queued in FIFO order. When a the number of readers goes to zero, the + * are queued in FIFO order. When the number of readers goes to zero, the * writer is given the lock. When a writer releases the lock, the list of - * queued requests is scanned, and all readersq up to the next writer are + * queued requests is scanned, and all readers up to the next writer are * given the lock. * * The lock table size must be one less than a power of two, but HASH_STRIPEID @@ -46,12 +47,12 @@ * you acquire the lock, you've locked only this range of addresses, and * other threads can concurrently read/write any non-overlapping portions * of the stripe. The "addresses" that you lock are abstract in that you - * can pass in anything you like. The expectation is that you'll pass in + * can pass in anything you like. The expectation is that you'll pass in * the range of physical disk offsets of the parity bits you're planning * to update. The idea behind this, of course, is to allow sub-stripe * locking. The implementation is perhaps not the best imaginable; in the * worst case a lock release is O(n^2) in the total number of outstanding - * requests to a given stripe. Note that if you're striping with a + * requests to a given stripe. Note that if you're striping with a * stripe unit size equal to an entire disk (i.e. not striping), there will * be only one stripe and you may spend some significant number of cycles * searching through stripe lock descriptors. @@ -67,78 +68,118 @@ #include "rf_driver.h" #include "rf_shutdown.h" -#define Dprintf1(s,a) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf2(s,a,b) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL) -#define Dprintf3(s,a,b,c) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL) -#define Dprintf4(s,a,b,c,d) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),NULL,NULL,NULL,NULL) -#define Dprintf5(s,a,b,c,d,e) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),NULL,NULL,NULL) -#define Dprintf6(s,a,b,c,d,e,f) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),NULL,NULL) -#define Dprintf7(s,a,b,c,d,e,f,g) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),(void *)((unsigned long)g),NULL) -#define Dprintf8(s,a,b,c,d,e,f,g,h) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),(void *)((unsigned long)g),(void *)((unsigned long)h)) - -#define FLUSH - -#define HASH_STRIPEID(_sid_) ( (_sid_) & (rf_lockTableSize-1) ) - -static void AddToWaitersQueue(RF_LockTableEntry_t * lockTable, RF_StripeLockDesc_t * lockDesc, RF_LockReqDesc_t * lockReqDesc); -static RF_StripeLockDesc_t *AllocStripeLockDesc(RF_StripeNum_t stripeID); -static void FreeStripeLockDesc(RF_StripeLockDesc_t * p); -static void PrintLockedStripes(RF_LockTableEntry_t * lockTable); - -/* determines if two ranges overlap. always yields false if either start value is negative */ -#define SINGLE_RANGE_OVERLAP(_strt1, _stop1, _strt2, _stop2) \ - ( (_strt1 >= 0) && (_strt2 >= 0) && (RF_MAX(_strt1, _strt2) <= RF_MIN(_stop1, _stop2)) ) - -/* determines if any of the ranges specified in the two lock descriptors overlap each other */ -#define RANGE_OVERLAP(_cand, _pred) \ - ( SINGLE_RANGE_OVERLAP((_cand)->start, (_cand)->stop, (_pred)->start, (_pred)->stop ) || \ - SINGLE_RANGE_OVERLAP((_cand)->start2, (_cand)->stop2, (_pred)->start, (_pred)->stop ) || \ - SINGLE_RANGE_OVERLAP((_cand)->start, (_cand)->stop, (_pred)->start2, (_pred)->stop2) || \ - SINGLE_RANGE_OVERLAP((_cand)->start2, (_cand)->stop2, (_pred)->start2, (_pred)->stop2) ) - -/* Determines if a candidate lock request conflicts with a predecessor lock req. - * Note that the arguments are not interchangeable. +#define Dprintf1(s,a) \ + rf_debug_printf(s, (void *)((unsigned long)a), \ + NULL, NULL, NULL, NULL, NULL, NULL, NULL) +#define Dprintf2(s,a,b) \ + rf_debug_printf(s, (void *)((unsigned long)a), \ + (void *)((unsigned long)b), NULL, NULL, NULL, NULL, NULL, NULL) +#define Dprintf3(s,a,b,c) \ + rf_debug_printf(s, (void *)((unsigned long)a), \ + (void *)((unsigned long)b), (void *)((unsigned long)c), \ + NULL, NULL, NULL, NULL, NULL) +#define Dprintf4(s,a,b,c,d) \ + rf_debug_printf(s, (void *)((unsigned long)a), \ + (void *)((unsigned long)b), (void *)((unsigned long)c), \ + (void *)((unsigned long)d), NULL, NULL, NULL, NULL) +#define Dprintf5(s,a,b,c,d,e) \ + rf_debug_printf(s, (void *)((unsigned long)a), \ + (void *)((unsigned long)b), (void *)((unsigned long)c), \ + (void *)((unsigned long)d), (void *)((unsigned long)e), \ + NULL, NULL, NULL) +#define Dprintf6(s,a,b,c,d,e,f) \ + rf_debug_printf(s, (void *)((unsigned long)a), \ + (void *)((unsigned long)b), (void *)((unsigned long)c), \ + (void *)((unsigned long)d), (void *)((unsigned long)e), \ + (void *)((unsigned long)f), NULL, NULL) +#define Dprintf7(s,a,b,c,d,e,f,g) \ + rf_debug_printf(s, (void *)((unsigned long)a), \ + (void *)((unsigned long)b), (void *)((unsigned long)c), \ + (void *)((unsigned long)d), (void *)((unsigned long)e), \ + (void *)((unsigned long)f), (void *)((unsigned long)g), NULL) +#define Dprintf8(s,a,b,c,d,e,f,g,h) \ + rf_debug_printf(s, (void *)((unsigned long)a), \ + (void *)((unsigned long)b), (void *)((unsigned long)c), \ + (void *)((unsigned long)d), (void *)((unsigned long)e), \ + (void *)((unsigned long)f), (void *)((unsigned long)g), \ + (void *)((unsigned long)h)) + +#define FLUSH + +#define HASH_STRIPEID(_sid_) ((_sid_) & (rf_lockTableSize-1)) + +void rf_AddToWaitersQueue(RF_LockTableEntry_t *, RF_StripeLockDesc_t *, + RF_LockReqDesc_t *); +RF_StripeLockDesc_t *rf_AllocStripeLockDesc(RF_StripeNum_t); +void rf_FreeStripeLockDesc(RF_StripeLockDesc_t *); +void rf_PrintLockedStripes(RF_LockTableEntry_t *); + +/* + * Determines if two ranges overlap. Always yields false if either start + * value is negative. + */ +#define SINGLE_RANGE_OVERLAP(_strt1,_stop1,_strt2,_stop2) \ + ((_strt1 >= 0) && (_strt2 >= 0) && \ + (RF_MAX(_strt1, _strt2) <= RF_MIN(_stop1, _stop2))) + +/* + * Determines if any of the ranges specified in the two lock descriptors + * overlap each other. + */ +#define RANGE_OVERLAP(_cand,_pred) \ + (SINGLE_RANGE_OVERLAP((_cand)->start, (_cand)->stop, \ + (_pred)->start, (_pred)->stop) || \ + SINGLE_RANGE_OVERLAP((_cand)->start2, (_cand)->stop2, \ + (_pred)->start, (_pred)->stop) || \ + SINGLE_RANGE_OVERLAP((_cand)->start, (_cand)->stop, \ + (_pred)->start2, (_pred)->stop2) || \ + SINGLE_RANGE_OVERLAP((_cand)->start2, (_cand)->stop2, \ + (_pred)->start2, (_pred)->stop2)) + +/* + * Determines if a candidate lock request conflicts with a predecessor + * lock req. Note that the arguments are not interchangeable. * The rules are: - * a candidate read conflicts with a predecessor write if any ranges overlap - * a candidate write conflicts with a predecessor read if any ranges overlap - * a candidate write conflicts with a predecessor write if any ranges overlap + * A candidate read conflicts with a predecessor write if any ranges overlap. + * A candidate write conflicts with a predecessor read if any ranges overlap. + * A candidate write conflicts with a predecessor write if any ranges overlap. */ -#define STRIPELOCK_CONFLICT(_cand, _pred) \ - RANGE_OVERLAP((_cand), (_pred)) && \ - ( ( (((_cand)->type == RF_IO_TYPE_READ) && ((_pred)->type == RF_IO_TYPE_WRITE)) || \ - (((_cand)->type == RF_IO_TYPE_WRITE) && ((_pred)->type == RF_IO_TYPE_READ)) || \ - (((_cand)->type == RF_IO_TYPE_WRITE) && ((_pred)->type == RF_IO_TYPE_WRITE)) \ - ) \ - ) +#define STRIPELOCK_CONFLICT(_cand,_pred) \ + (RANGE_OVERLAP((_cand), (_pred)) && \ + (((((_cand)->type == RF_IO_TYPE_READ) && \ + ((_pred)->type == RF_IO_TYPE_WRITE)) || \ + (((_cand)->type == RF_IO_TYPE_WRITE) && \ + ((_pred)->type == RF_IO_TYPE_READ)) || \ + (((_cand)->type == RF_IO_TYPE_WRITE) && \ + ((_pred)->type == RF_IO_TYPE_WRITE))))) static RF_FreeList_t *rf_stripelock_freelist; -#define RF_MAX_FREE_STRIPELOCK 128 -#define RF_STRIPELOCK_INC 8 -#define RF_STRIPELOCK_INITIAL 32 +#define RF_MAX_FREE_STRIPELOCK 128 +#define RF_STRIPELOCK_INC 8 +#define RF_STRIPELOCK_INITIAL 32 -static void rf_ShutdownStripeLockFreeList(void *); -static void rf_RaidShutdownStripeLocks(void *); +void rf_ShutdownStripeLockFreeList(void *); +void rf_RaidShutdownStripeLocks(void *); -static void -rf_ShutdownStripeLockFreeList(ignored) - void *ignored; +void +rf_ShutdownStripeLockFreeList(void *ignored) { - RF_FREELIST_DESTROY(rf_stripelock_freelist, next, (RF_StripeLockDesc_t *)); + RF_FREELIST_DESTROY(rf_stripelock_freelist, next, + (RF_StripeLockDesc_t *)); } -int -rf_ConfigureStripeLockFreeList(listp) - RF_ShutdownList_t **listp; +int +rf_ConfigureStripeLockFreeList(RF_ShutdownList_t **listp) { unsigned mask; - int rc; + int rc; RF_FREELIST_CREATE(rf_stripelock_freelist, RF_MAX_FREE_STRIPELOCK, RF_STRIPELOCK_INITIAL, sizeof(RF_StripeLockDesc_t)); rc = rf_ShutdownCreate(listp, rf_ShutdownStripeLockFreeList, NULL); if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); + RF_ERRORMSG3("Unable to add to shutdown list file %s" + " line %d rc=%d.\n", __FILE__, __LINE__, rc); rf_ShutdownStripeLockFreeList(NULL); return (rc); } @@ -148,40 +189,42 @@ rf_ConfigureStripeLockFreeList(listp) if (rf_lockTableSize == mask) break; if (!mask) { - printf("[WARNING: lock table size must be a power of two. Setting to %d.]\n", RF_DEFAULT_LOCK_TABLE_SIZE); + printf("[WARNING: lock table size must be a power of two." + " Setting to %d.]\n", RF_DEFAULT_LOCK_TABLE_SIZE); rf_lockTableSize = RF_DEFAULT_LOCK_TABLE_SIZE; } return (0); } RF_LockTableEntry_t * -rf_MakeLockTable() +rf_MakeLockTable(void) { RF_LockTableEntry_t *lockTable; - int i, rc; + int i, rc; - RF_Calloc(lockTable, ((int) rf_lockTableSize), sizeof(RF_LockTableEntry_t), (RF_LockTableEntry_t *)); + RF_Calloc(lockTable, ((int) rf_lockTableSize), + sizeof(RF_LockTableEntry_t), (RF_LockTableEntry_t *)); if (lockTable == NULL) return (NULL); for (i = 0; i < rf_lockTableSize; i++) { rc = rf_mutex_init(&lockTable[i].mutex); if (rc) { - RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, - __LINE__, rc); - /* XXX clean up other mutexes */ + RF_ERRORMSG3("Unable to init mutex file %s line %d" + " rc=%d.\n", __FILE__, __LINE__, rc); + /* XXX Clean up other mutexes. */ return (NULL); } } return (lockTable); } -void -rf_ShutdownStripeLocks(RF_LockTableEntry_t * lockTable) +void +rf_ShutdownStripeLocks(RF_LockTableEntry_t *lockTable) { - int i; + int i; if (rf_stripeLockDebug) { - PrintLockedStripes(lockTable); + rf_PrintLockedStripes(lockTable); } for (i = 0; i < rf_lockTableSize; i++) { rf_mutex_destroy(&lockTable[i].mutex); @@ -189,84 +232,91 @@ rf_ShutdownStripeLocks(RF_LockTableEntry_t * lockTable) RF_Free(lockTable, rf_lockTableSize * sizeof(RF_LockTableEntry_t)); } -static void -rf_RaidShutdownStripeLocks(arg) - void *arg; +void +rf_RaidShutdownStripeLocks(void *arg) { RF_Raid_t *raidPtr = (RF_Raid_t *) arg; rf_ShutdownStripeLocks(raidPtr->lockTable); } -int -rf_ConfigureStripeLocks( - RF_ShutdownList_t ** listp, - RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr) +int +rf_ConfigureStripeLocks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, + RF_Config_t *cfgPtr) { - int rc; + int rc; raidPtr->lockTable = rf_MakeLockTable(); if (raidPtr->lockTable == NULL) return (ENOMEM); rc = rf_ShutdownCreate(listp, rf_RaidShutdownStripeLocks, raidPtr); if (rc) { - RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", - __FILE__, __LINE__, rc); + RF_ERRORMSG3("Unable to add to shutdown list file %s line %d" + " rc=%d.\n", __FILE__, __LINE__, rc); rf_ShutdownStripeLocks(raidPtr->lockTable); return (rc); } return (0); } -/* returns 0 if you've got the lock, and non-zero if you have to wait. - * if and only if you have to wait, we'll cause cbFunc to get invoked - * with cbArg when you are granted the lock. We store a tag in *releaseTag + +/* + * Returns 0 if you've got the lock, and non-zero if you have to wait. + * If and only if you have to wait, we'll cause cbFunc to get invoked + * with cbArg when you are granted the lock. We store a tag in *releaseTag * that you need to give back to us when you release the lock. */ -int -rf_AcquireStripeLock( - RF_LockTableEntry_t * lockTable, - RF_StripeNum_t stripeID, - RF_LockReqDesc_t * lockReqDesc) +int +rf_AcquireStripeLock(RF_LockTableEntry_t *lockTable, RF_StripeNum_t stripeID, + RF_LockReqDesc_t *lockReqDesc) { RF_StripeLockDesc_t *lockDesc; RF_LockReqDesc_t *p; - int tid = 0, hashval = HASH_STRIPEID(stripeID); - int retcode = 0; + int tid = 0, hashval = HASH_STRIPEID(stripeID); + int retcode = 0; RF_ASSERT(RF_IO_IS_R_OR_W(lockReqDesc->type)); if (rf_stripeLockDebug) { if (stripeID == -1) - Dprintf1("[%d] Lock acquisition supressed (stripeID == -1)\n", tid); + Dprintf1("[%d] Lock acquisition supressed" + " (stripeID == -1).\n", tid); else { - Dprintf8("[%d] Trying to acquire stripe lock table 0x%lx SID %ld type %c range %ld-%ld, range2 %ld-%ld hashval %d\n", - tid, (unsigned long) lockTable, stripeID, lockReqDesc->type, lockReqDesc->start, - lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2); - Dprintf3("[%d] lock %ld hashval %d\n", tid, stripeID, hashval); + Dprintf8("[%d] Trying to acquire stripe lock table" + " 0x%lx SID %ld type %c range %ld-%ld, range2" + " %ld-%ld hashval %d.\n", tid, + (unsigned long) lockTable, stripeID, + lockReqDesc->type, lockReqDesc->start, + lockReqDesc->stop, lockReqDesc->start2, + lockReqDesc->stop2); + Dprintf3("[%d] lock %ld hashval %d.\n", tid, stripeID, + hashval); FLUSH; } } if (stripeID == -1) return (0); - lockReqDesc->next = NULL; /* just to be sure */ + lockReqDesc->next = NULL; /* Just to be sure. */ RF_LOCK_MUTEX(lockTable[hashval].mutex); - for (lockDesc = lockTable[hashval].descList; lockDesc; lockDesc = lockDesc->next) { + for (lockDesc = lockTable[hashval].descList; lockDesc; + lockDesc = lockDesc->next) { if (lockDesc->stripeID == stripeID) break; } - if (!lockDesc) { /* no entry in table => no one reading or - * writing */ - lockDesc = AllocStripeLockDesc(stripeID); + if (!lockDesc) { + /* No entry in table => no one reading or writing. */ + lockDesc = rf_AllocStripeLockDesc(stripeID); lockDesc->next = lockTable[hashval].descList; lockTable[hashval].descList = lockDesc; if (lockReqDesc->type == RF_IO_TYPE_WRITE) lockDesc->nWriters++; lockDesc->granted = lockReqDesc; if (rf_stripeLockDebug) { - Dprintf7("[%d] no one waiting: lock %ld %c %ld-%ld %ld-%ld granted\n", - tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2); + Dprintf7("[%d] no one waiting: lock %ld %c %ld-%ld" + " %ld-%ld granted.\n", tid, stripeID, + lockReqDesc->type, + lockReqDesc->start, lockReqDesc->stop, + lockReqDesc->start2, lockReqDesc->stop2); FLUSH; } } else { @@ -274,20 +324,26 @@ rf_AcquireStripeLock( if (lockReqDesc->type == RF_IO_TYPE_WRITE) lockDesc->nWriters++; - if (lockDesc->nWriters == 0) { /* no need to search any lists - * if there are no writers - * anywhere */ + if (lockDesc->nWriters == 0) { + /* + * No need to search any lists if there are no writers + * anywhere. + */ lockReqDesc->next = lockDesc->granted; lockDesc->granted = lockReqDesc; if (rf_stripeLockDebug) { - Dprintf7("[%d] no writers: lock %ld %c %ld-%ld %ld-%ld granted\n", - tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2); + Dprintf7("[%d] no writers: lock %ld %c %ld-%ld" + " %ld-%ld granted.\n", tid, + stripeID, lockReqDesc->type, + lockReqDesc->start, lockReqDesc->stop, + lockReqDesc->start2, lockReqDesc->stop2); FLUSH; } } else { - - /* search the granted & waiting lists for a conflict. - * stop searching as soon as we find one */ + /* + * Search the granted & waiting lists for a conflict. + * Stop searching as soon as we find one. + */ retcode = 0; for (p = lockDesc->granted; p; p = p->next) if (STRIPELOCK_CONFLICT(lockReqDesc, p)) { @@ -296,31 +352,41 @@ rf_AcquireStripeLock( } if (!retcode) for (p = lockDesc->waitersH; p; p = p->next) - if (STRIPELOCK_CONFLICT(lockReqDesc, p)) { + if (STRIPELOCK_CONFLICT(lockReqDesc, p)) + { retcode = 2; break; } if (!retcode) { - lockReqDesc->next = lockDesc->granted; /* no conflicts found => - * grant lock */ + /* No conflicts found => grant lock */ + lockReqDesc->next = lockDesc->granted; lockDesc->granted = lockReqDesc; if (rf_stripeLockDebug) { - Dprintf7("[%d] no conflicts: lock %ld %c %ld-%ld %ld-%ld granted\n", - tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, - lockReqDesc->start2, lockReqDesc->stop2); + Dprintf7("[%d] no conflicts: lock %ld" + " %c %ld-%ld %ld-%ld granted.\n", + tid, stripeID, lockReqDesc->type, + lockReqDesc->start, + lockReqDesc->stop, + lockReqDesc->start2, + lockReqDesc->stop2); FLUSH; } } else { if (rf_stripeLockDebug) { - Dprintf6("[%d] conflict: lock %ld %c %ld-%ld hashval=%d not granted\n", - tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, + Dprintf6("[%d] conflict: lock %ld %c" + " %ld-%ld hashval=%d not" + " granted.\n", tid, stripeID, + lockReqDesc->type, + lockReqDesc->start, + lockReqDesc->stop, hashval); - Dprintf3("[%d] lock %ld retcode=%d\n", tid, stripeID, retcode); + Dprintf3("[%d] lock %ld retcode=%d.\n", + tid, stripeID, retcode); FLUSH; } - AddToWaitersQueue(lockTable, lockDesc, lockReqDesc); /* conflict => the - * current access must - * wait */ + /* Conflict => the current access must wait. */ + rf_AddToWaitersQueue(lockTable, lockDesc, + lockReqDesc); } } } @@ -329,27 +395,30 @@ rf_AcquireStripeLock( return (retcode); } -void -rf_ReleaseStripeLock( - RF_LockTableEntry_t * lockTable, - RF_StripeNum_t stripeID, - RF_LockReqDesc_t * lockReqDesc) +void +rf_ReleaseStripeLock(RF_LockTableEntry_t *lockTable, RF_StripeNum_t stripeID, + RF_LockReqDesc_t *lockReqDesc) { RF_StripeLockDesc_t *lockDesc, *ld_t; RF_LockReqDesc_t *lr, *lr_t, *callbacklist, *t; RF_IoType_t type = lockReqDesc->type; - int tid = 0, hashval = HASH_STRIPEID(stripeID); - int release_it, consider_it; + int tid = 0, hashval = HASH_STRIPEID(stripeID); + int release_it, consider_it; RF_LockReqDesc_t *candidate, *candidate_t, *predecessor; RF_ASSERT(RF_IO_IS_R_OR_W(type)); if (rf_stripeLockDebug) { if (stripeID == -1) - Dprintf1("[%d] Lock release supressed (stripeID == -1)\n", tid); + Dprintf1("[%d] Lock release supressed" + " (stripeID == -1).\n", tid); else { - Dprintf8("[%d] Releasing stripe lock on stripe ID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", - tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2, lockTable); + Dprintf8("[%d] Releasing stripe lock on stripe ID %ld," + " type %c range %ld-%ld %ld-%ld table 0x%lx.\n", + tid, stripeID, lockReqDesc->type, + lockReqDesc->start, lockReqDesc->stop, + lockReqDesc->start2, lockReqDesc->stop2, + lockTable); FLUSH; } } @@ -358,22 +427,27 @@ rf_ReleaseStripeLock( RF_LOCK_MUTEX(lockTable[hashval].mutex); - /* find the stripe lock descriptor */ - for (ld_t = NULL, lockDesc = lockTable[hashval].descList; lockDesc; ld_t = lockDesc, lockDesc = lockDesc->next) { + /* Find the stripe lock descriptor. */ + for (ld_t = NULL, lockDesc = lockTable[hashval].descList; + lockDesc; ld_t = lockDesc, lockDesc = lockDesc->next) { if (lockDesc->stripeID == stripeID) break; } - RF_ASSERT(lockDesc); /* major error to release a lock that doesn't - * exist */ + RF_ASSERT(lockDesc); /* + * Major error to release a lock that doesn't + * exist. + */ - /* find the stripe lock request descriptor & delete it from the list */ + /* Find the stripe lock request descriptor & delete it from the list. */ for (lr_t = NULL, lr = lockDesc->granted; lr; lr_t = lr, lr = lr->next) if (lr == lockReqDesc) break; - RF_ASSERT(lr && (lr == lockReqDesc)); /* major error to release a + RF_ASSERT(lr && (lr == lockReqDesc)); /* + * Major error to release a * lock that hasn't been - * granted */ + * granted. + */ if (lr_t) lr_t->next = lr->next; else { @@ -385,56 +459,73 @@ rf_ReleaseStripeLock( if (lockReqDesc->type == RF_IO_TYPE_WRITE) lockDesc->nWriters--; - /* search through the waiters list to see if anyone needs to be woken - * up. for each such descriptor in the wait list, we check it against + /* + * Search through the waiters list to see if anyone needs to be woken + * up. For each such descriptor in the wait list, we check it against * everything granted and against everything _in front_ of it in the - * waiters queue. If it conflicts with none of these, we release it. - * - * DON'T TOUCH THE TEMPLINK POINTER OF ANYTHING IN THE GRANTED LIST HERE. + * waiters queue. If it conflicts with none of these, we release it. + * + * DON'T TOUCH THE TEMPLINK POINTER OF ANYTHING IN THE GRANTED LIST + * HERE. * This will roach the case where the callback tries to acquire a new - * lock in the same stripe. There are some asserts to try and detect + * lock in the same stripe. There are some asserts to try and detect * this. - * - * We apply 2 performance optimizations: (1) if releasing this lock - * results in no more writers to this stripe, we just release - * everybody waiting, since we place no restrictions on the number of - * concurrent reads. (2) we consider as candidates for wakeup only - * those waiters that have a range overlap with either the descriptor - * being woken up or with something in the callbacklist (i.e. - * something we've just now woken up). This allows us to avoid the - * long evaluation for some descriptors. */ + * + * We apply 2 performance optimizations: + * (1) If releasing this lock results in no more writers to this + * stripe, we just release everybody waiting, since we place no + * restrictions on the number of concurrent reads. + * (2) We consider as candidates for wakeup only those waiters that + * have a range overlap with either the descriptor being woken up + * or with something in the callbacklist (i.e. something we've + * just now woken up). + * This allows us to avoid the long evaluation for some descriptors. + */ callbacklist = NULL; - if (lockDesc->nWriters == 0) { /* performance tweak (1) */ + if (lockDesc->nWriters == 0) { /* Performance tweak (1). */ while (lockDesc->waitersH) { - lr = lockDesc->waitersH; /* delete from waiters - * list */ + lr = lockDesc->waitersH; /* + * Delete from waiters + * list. + */ lockDesc->waitersH = lr->next; RF_ASSERT(lr->type == RF_IO_TYPE_READ); - lr->next = lockDesc->granted; /* add to granted list */ + lr->next = lockDesc->granted; /* + * Add to granted list. + */ lockDesc->granted = lr; RF_ASSERT(!lr->templink); - lr->templink = callbacklist; /* put on callback list + lr->templink = callbacklist; /* + * Put on callback list * so that we'll invoke - * callback below */ + * callback below. + */ callbacklist = lr; if (rf_stripeLockDebug) { - Dprintf8("[%d] No writers: granting lock stripe ID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", - tid, stripeID, lr->type, lr->start, lr->stop, lr->start2, lr->stop2, (unsigned long) lockTable); + Dprintf8("[%d] No writers: granting lock" + " stripe ID %ld, type %c range %ld-%l" + "d %ld-%ld table 0x%lx.\n", tid, stripeID, + lr->type, lr->start, lr->stop, + lr->start2, lr->stop2, + (unsigned long) lockTable); FLUSH; } } - lockDesc->waitersT = NULL; /* we've purged the whole - * waiters list */ + lockDesc->waitersT = NULL; /* + * We've purged the whole + * waiters list. + */ } else - for (candidate_t = NULL, candidate = lockDesc->waitersH; candidate;) { + for (candidate_t = NULL, candidate = lockDesc->waitersH; + candidate;) { - /* performance tweak (2) */ + /* Performance tweak (2). */ consider_it = 0; if (RANGE_OVERLAP(lockReqDesc, candidate)) consider_it = 1; @@ -446,8 +537,13 @@ rf_ReleaseStripeLock( } if (!consider_it) { if (rf_stripeLockDebug) { - Dprintf8("[%d] No overlap: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", - tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, + Dprintf8("[%d] No overlap: rejecting" + " candidate stripeID %ld, type %c" + " range %ld-%ld %ld-%ld table" + " 0x%lx.\n", tid, stripeID, + candidate->type, + candidate->start, candidate->stop, + candidate->start2, candidate->stop2, (unsigned long) lockTable); FLUSH; } @@ -455,14 +551,27 @@ rf_ReleaseStripeLock( candidate = candidate->next; continue; } - /* we have a candidate for release. check to make - * sure it is not blocked by any granted locks */ + /* + * We have a candidate for release. Check to make + * sure it is not blocked by any granted locks. + */ release_it = 1; - for (predecessor = lockDesc->granted; predecessor; predecessor = predecessor->next) { - if (STRIPELOCK_CONFLICT(candidate, predecessor)) { + for (predecessor = lockDesc->granted; predecessor; + predecessor = predecessor->next) { + if (STRIPELOCK_CONFLICT(candidate, predecessor)) + { if (rf_stripeLockDebug) { - Dprintf8("[%d] Conflicts with granted lock: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", - tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, + Dprintf8("[%d] Conflicts with" + " granted lock: rejecting" + " candidate stripeID %ld," + " type %c range %ld-%ld" + " %ld-%ld table 0x%lx.\n", + tid, stripeID, + candidate->type, + candidate->start, + candidate->stop, + candidate->start2, + candidate->stop2, (unsigned long) lockTable); FLUSH; } @@ -471,15 +580,35 @@ rf_ReleaseStripeLock( } } - /* now check to see if the candidate is blocked by any - * waiters that occur before it it the wait queue */ + /* + * Now check to see if the candidate is blocked by any + * waiters that occur before it in the wait queue. + */ if (release_it) - for (predecessor = lockDesc->waitersH; predecessor != candidate; predecessor = predecessor->next) { - if (STRIPELOCK_CONFLICT(candidate, predecessor)) { + for (predecessor = lockDesc->waitersH; + predecessor != candidate; + predecessor = predecessor->next) { + if (STRIPELOCK_CONFLICT(candidate, + predecessor)) { if (rf_stripeLockDebug) { - Dprintf8("[%d] Conflicts with waiting lock: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", - tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, - (unsigned long) lockTable); + Dprintf8("[%d]" + " Conflicts with" + " waiting lock:" + " rejecting" + " candidate" + " stripeID %ld," + " type %c" + " range %ld-%ld" + " %ld-%ld" + " table 0x%lx.\n", + tid, stripeID, + candidate->type, + candidate->start, + candidate->stop, + candidate->start2, + candidate->stop2, + (unsigned long) + lockTable); FLUSH; } release_it = 0; @@ -487,54 +616,70 @@ rf_ReleaseStripeLock( } } - /* release it if indicated */ + /* Release it if indicated. */ if (release_it) { if (rf_stripeLockDebug) { - Dprintf8("[%d] Granting lock to candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n", - tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, + Dprintf8("[%d] Granting lock to" + " candidate stripeID %ld, type %c" + " range %ld-%ld %ld-%ld table" + " 0x%lx.\n", tid, stripeID, + candidate->type, + candidate->start, candidate->stop, + candidate->start2, candidate->stop2, (unsigned long) lockTable); FLUSH; } if (candidate_t) { candidate_t->next = candidate->next; if (lockDesc->waitersT == candidate) - lockDesc->waitersT = candidate_t; /* cannot be waitersH - * since candidate_t is - * not NULL */ + /* + * Cannot be waitersH + * since candidate_t is + * not NULL. + */ + lockDesc->waitersT = + candidate_t; } else { - RF_ASSERT(candidate == lockDesc->waitersH); - lockDesc->waitersH = lockDesc->waitersH->next; + RF_ASSERT(candidate == + lockDesc->waitersH); + lockDesc->waitersH = + lockDesc->waitersH->next; if (!lockDesc->waitersH) lockDesc->waitersT = NULL; } - candidate->next = lockDesc->granted; /* move it to the - * granted list */ + /* Move it to the granted list. */ + candidate->next = lockDesc->granted; lockDesc->granted = candidate; RF_ASSERT(!candidate->templink); - candidate->templink = callbacklist; /* put it on the list of - * things to be called - * after we release the - * mutex */ + /* + * Put it on the list of things to be called + * after we release the mutex. + */ + candidate->templink = callbacklist; callbacklist = candidate; if (!candidate_t) candidate = lockDesc->waitersH; else - candidate = candidate_t->next; /* continue with the - * rest of the list */ + /* + * Continue with the rest of the list. + */ + candidate = candidate_t->next; } else { candidate_t = candidate; - candidate = candidate->next; /* continue with the - * rest of the list */ + /* Continue with the rest of the list. */ + candidate = candidate->next; } } - /* delete the descriptor if no one is waiting or active */ + /* Delete the descriptor if no one is waiting or active. */ if (!lockDesc->granted && !lockDesc->waitersH) { RF_ASSERT(lockDesc->nWriters == 0); if (rf_stripeLockDebug) { - Dprintf3("[%d] Last lock released (table 0x%lx): deleting desc for stripeID %ld\n", tid, (unsigned long) lockTable, stripeID); + Dprintf3("[%d] Last lock released (table 0x%lx):" + " deleting desc for stripeID %ld.\n", tid, + (unsigned long) lockTable, stripeID); FLUSH; } if (ld_t) @@ -543,17 +688,21 @@ rf_ReleaseStripeLock( RF_ASSERT(lockDesc == lockTable[hashval].descList); lockTable[hashval].descList = lockDesc->next; } - FreeStripeLockDesc(lockDesc); - lockDesc = NULL;/* only for the ASSERT below */ + rf_FreeStripeLockDesc(lockDesc); + lockDesc = NULL; /* Only for the ASSERT below. */ } RF_UNLOCK_MUTEX(lockTable[hashval].mutex); - /* now that we've unlocked the mutex, invoke the callback on all the - * descriptors in the list */ - RF_ASSERT(!((callbacklist) && (!lockDesc))); /* if we deleted the + /* + * Now that we've unlocked the mutex, invoke the callback on all the + * descriptors in the list. + */ + RF_ASSERT(!((callbacklist) && (!lockDesc))); /* + * If we deleted the * descriptor, we should * have no callbacks to - * do */ + * do. + */ for (candidate = callbacklist; candidate;) { t = candidate; candidate = candidate->templink; @@ -561,17 +710,17 @@ rf_ReleaseStripeLock( (t->cbFunc) (t->cbArg); } } -/* must have the indicated lock table mutex upon entry */ -static void -AddToWaitersQueue( - RF_LockTableEntry_t * lockTable, - RF_StripeLockDesc_t * lockDesc, - RF_LockReqDesc_t * lockReqDesc) + +/* Must have the indicated lock table mutex upon entry. */ +void +rf_AddToWaitersQueue(RF_LockTableEntry_t *lockTable, + RF_StripeLockDesc_t *lockDesc, RF_LockReqDesc_t *lockReqDesc) { - int tid; + int tid; if (rf_stripeLockDebug) { - Dprintf3("[%d] Waiting on lock for stripe %ld table 0x%lx\n", tid, lockDesc->stripeID, (unsigned long) lockTable); + Dprintf3("[%d] Waiting on lock for stripe %ld table 0x%lx.\n", + tid, lockDesc->stripeID, (unsigned long) lockTable); FLUSH; } if (!lockDesc->waitersH) { @@ -582,29 +731,29 @@ AddToWaitersQueue( } } -static RF_StripeLockDesc_t * -AllocStripeLockDesc(RF_StripeNum_t stripeID) +RF_StripeLockDesc_t * +rf_AllocStripeLockDesc(RF_StripeNum_t stripeID) { RF_StripeLockDesc_t *p; - RF_FREELIST_GET(rf_stripelock_freelist, p, next, (RF_StripeLockDesc_t *)); + RF_FREELIST_GET(rf_stripelock_freelist, p, next, + (RF_StripeLockDesc_t *)); if (p) { p->stripeID = stripeID; } return (p); } -static void -FreeStripeLockDesc(RF_StripeLockDesc_t * p) +void +rf_FreeStripeLockDesc(RF_StripeLockDesc_t *p) { RF_FREELIST_FREE(rf_stripelock_freelist, p, next); } -static void -PrintLockedStripes(lockTable) - RF_LockTableEntry_t *lockTable; +void +rf_PrintLockedStripes(RF_LockTableEntry_t *lockTable) { - int i, j, foundone = 0, did; + int i, j, foundone = 0, did; RF_StripeLockDesc_t *p; RF_LockReqDesc_t *q; @@ -615,16 +764,20 @@ PrintLockedStripes(lockTable) foundone = 1; for (p = lockTable[i].descList; p; p = p->next) { printf("Stripe ID 0x%lx (%d) nWriters %d\n", - (long) p->stripeID, (int) p->stripeID, p->nWriters); + (long) p->stripeID, (int) p->stripeID, + p->nWriters); if (!(p->granted)) printf("Granted: (none)\n"); else printf("Granted:\n"); - for (did = 1, j = 0, q = p->granted; q; j++, q = q->next) { - printf(" %c(%ld-%ld", q->type, (long) q->start, (long) q->stop); + for (did = 1, j = 0, q = p->granted; q; + j++, q = q->next) { + printf(" %c(%ld-%ld", q->type, + (long) q->start, (long) q->stop); if (q->start2 != -1) - printf(",%ld-%ld) ", (long) q->start2, + printf(",%ld-%ld) ", + (long) q->start2, (long) q->stop2); else printf(") "); @@ -641,10 +794,14 @@ PrintLockedStripes(lockTable) printf("Waiting: (none)\n"); else printf("Waiting:\n"); - for (did = 1, j = 0, q = p->waitersH; q; j++, q = q->next) { - printf("%c(%ld-%ld", q->type, (long) q->start, (long) q->stop); + for (did = 1, j = 0, q = p->waitersH; q; + j++, q = q->next) { + printf("%c(%ld-%ld", q->type, + (long) q->start, (long) q->stop); if (q->start2 != -1) - printf(",%ld-%ld) ", (long) q->start2, (long) q->stop2); + printf(",%ld-%ld) ", + (long) q->start2, + (long) q->stop2); else printf(") "); if (j && !(j % 4)) { diff --git a/sys/dev/raidframe/rf_stripelocks.h b/sys/dev/raidframe/rf_stripelocks.h index d339e2ae520..2c7a596a213 100644 --- a/sys/dev/raidframe/rf_stripelocks.h +++ b/sys/dev/raidframe/rf_stripelocks.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_stripelocks.h,v 1.2 1999/02/16 00:03:29 niklas Exp $ */ +/* $OpenBSD: rf_stripelocks.h,v 1.3 2002/12/16 07:01:05 tdeval Exp $ */ /* $NetBSD: rf_stripelocks.h,v 1.3 1999/02/05 00:06:18 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -29,21 +30,21 @@ /***************************************************************************** * - * stripelocks.h -- header file for locking stripes + * stripelocks.h -- Header file for locking stripes. * * Note that these functions are called from the execution routines of certain * DAG Nodes, and so they must be NON-BLOCKING to assure maximum parallelism - * in the DAG. Accordingly, when a node wants to acquire a lock, it calls - * AcquireStripeLock, supplying a pointer to a callback function. If the lock + * in the DAG. Accordingly, when a node wants to acquire a lock, it calls + * AcquireStripeLock, supplying a pointer to a callback function. If the lock * is free at the time of the call, 0 is returned, indicating that the lock - * has been acquired. If the lock is not free, 1 is returned, and a copy of - * the function pointer and argument are held in the lock table. When the + * has been acquired. If the lock is not free, 1 is returned, and a copy of + * the function pointer and argument are held in the lock table. When the * lock becomes free, the callback function is invoked. * *****************************************************************************/ -#ifndef _RF__RF_STRIPELOCKS_H_ -#define _RF__RF_STRIPELOCKS_H_ +#ifndef _RF__RF_STRIPELOCKS_H_ +#define _RF__RF_STRIPELOCKS_H_ #include <sys/buf.h> @@ -52,70 +53,98 @@ #include "rf_general.h" struct RF_LockReqDesc_s { - RF_IoType_t type; /* read or write */ - RF_int64 start, stop; /* start and end of range to be locked */ - RF_int64 start2, stop2; /* start and end of 2nd range to be locked */ - void (*cbFunc) (struct buf *); /* callback function */ - void *cbArg; /* argument to callback function */ - RF_LockReqDesc_t *next; /* next element in chain */ - RF_LockReqDesc_t *templink; /* for making short-lived lists of - * request descriptors */ + RF_IoType_t type; /* Read or write. */ + RF_int64 start, stop; /* + * Start and end of range to + * be locked. + */ + RF_int64 start2, stop2;/* + * Start and end of 2nd range + * to be locked. + */ + void (*cbFunc) (struct buf *); + /* Callback function. */ + void *cbArg; /* + * Argument to callback + * function. + */ + RF_LockReqDesc_t *next; /* Next element in chain. */ + RF_LockReqDesc_t *templink; /* + * For making short-lived lists + * of request descriptors. + */ }; -#define RF_ASSERT_VALID_LOCKREQ(_lr_) { \ - RF_ASSERT(RF_IO_IS_R_OR_W((_lr_)->type)); \ -} + +#define RF_ASSERT_VALID_LOCKREQ(_lr_) do { \ + RF_ASSERT(RF_IO_IS_R_OR_W((_lr_)->type)); \ +} while (0) struct RF_StripeLockDesc_s { - RF_StripeNum_t stripeID;/* the stripe ID */ - RF_LockReqDesc_t *granted; /* unordered list of granted requests */ - RF_LockReqDesc_t *waitersH; /* FIFO queue of all waiting reqs, - * both read and write (Head and Tail) */ - RF_LockReqDesc_t *waitersT; - int nWriters; /* number of writers either granted or waiting */ - RF_StripeLockDesc_t *next; /* for hash table collision resolution */ + RF_StripeNum_t stripeID; /* The stripe ID. */ + RF_LockReqDesc_t *granted; /* + * Unordered list of granted + * requests. + */ + RF_LockReqDesc_t *waitersH; /* FIFO queue of all waiting + * reqs, both read and write + * (Head and Tail). + */ + RF_LockReqDesc_t *waitersT; + int nWriters; /* + * Number of writers either + * granted or waiting. + */ + RF_StripeLockDesc_t *next; /* + * For hash table collision + * resolution. + */ }; struct RF_LockTableEntry_s { - RF_DECLARE_MUTEX(mutex) /* mutex on this hash chain */ - RF_StripeLockDesc_t *descList; /* hash chain of lock descriptors */ + RF_DECLARE_MUTEX (mutex); /* Mutex on this hash chain. */ + RF_StripeLockDesc_t *descList; /* + * Hash chain of lock + * descriptors. + */ }; + /* - * Initializes a stripe lock descriptor. _defSize is the number of sectors + * Initializes a stripe lock descriptor. _defSize is the number of sectors * that we lock when there is no parity information in the ASM (e.g. RAID0). */ -#define RF_INIT_LOCK_REQ_DESC(_lrd, _typ, _cbf, _cba, _asm, _defSize) \ - { \ - (_lrd).type = _typ; \ - (_lrd).start2 = -1; \ - (_lrd).stop2 = -1; \ - if ((_asm)->parityInfo) { \ - (_lrd).start = (_asm)->parityInfo->startSector; \ - (_lrd).stop = (_asm)->parityInfo->startSector + (_asm)->parityInfo->numSector-1; \ - if ((_asm)->parityInfo->next) { \ - (_lrd).start2 = (_asm)->parityInfo->next->startSector; \ - (_lrd).stop2 = (_asm)->parityInfo->next->startSector + (_asm)->parityInfo->next->numSector-1; \ - } \ - } else { \ - (_lrd).start = 0; \ - (_lrd).stop = (_defSize); \ - } \ - (_lrd).templink= NULL; \ - (_lrd).cbFunc = (_cbf); \ - (_lrd).cbArg = (void *) (_cba); \ - } +#define RF_INIT_LOCK_REQ_DESC(_lrd, _typ, _cbf, _cba, _asm, _defSize) \ +do { \ + (_lrd).type = _typ; \ + (_lrd).start2 = -1; \ + (_lrd).stop2 = -1; \ + if ((_asm)->parityInfo) { \ + (_lrd).start = (_asm)->parityInfo->startSector; \ + (_lrd).stop = (_asm)->parityInfo->startSector + \ + (_asm)->parityInfo->numSector-1; \ + if ((_asm)->parityInfo->next) { \ + (_lrd).start2 = \ + (_asm)->parityInfo->next->startSector; \ + (_lrd).stop2 = \ + (_asm)->parityInfo->next->startSector + \ + (_asm)->parityInfo->next->numSector-1; \ + } \ + } else { \ + (_lrd).start = 0; \ + (_lrd).stop = (_defSize); \ + } \ + (_lrd).templink= NULL; \ + (_lrd).cbFunc = (_cbf); \ + (_lrd).cbArg = (void *) (_cba); \ +} while (0) -int rf_ConfigureStripeLockFreeList(RF_ShutdownList_t ** listp); +int rf_ConfigureStripeLockFreeList(RF_ShutdownList_t **); RF_LockTableEntry_t *rf_MakeLockTable(void); -void rf_ShutdownStripeLocks(RF_LockTableEntry_t * lockTable); -int -rf_ConfigureStripeLocks(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int -rf_AcquireStripeLock(RF_LockTableEntry_t * lockTable, - RF_StripeNum_t stripeID, RF_LockReqDesc_t * lockReqDesc); -void -rf_ReleaseStripeLock(RF_LockTableEntry_t * lockTable, - RF_StripeNum_t stripeID, RF_LockReqDesc_t * lockReqDesc); +void rf_ShutdownStripeLocks(RF_LockTableEntry_t *); +int rf_ConfigureStripeLocks(RF_ShutdownList_t **, RF_Raid_t *, RF_Config_t *); +int rf_AcquireStripeLock(RF_LockTableEntry_t *, RF_StripeNum_t, + RF_LockReqDesc_t *); +void rf_ReleaseStripeLock(RF_LockTableEntry_t *, RF_StripeNum_t, + RF_LockReqDesc_t *); -#endif /* !_RF__RF_STRIPELOCKS_H_ */ +#endif /* !_RF__RF_STRIPELOCKS_H_ */ diff --git a/sys/dev/raidframe/rf_strutils.c b/sys/dev/raidframe/rf_strutils.c index c55e98a77bb..b551dac2f5d 100644 --- a/sys/dev/raidframe/rf_strutils.c +++ b/sys/dev/raidframe/rf_strutils.c @@ -1,9 +1,10 @@ -/* $OpenBSD: rf_strutils.c,v 1.2 1999/02/16 00:03:29 niklas Exp $ */ +/* $OpenBSD: rf_strutils.c,v 1.3 2002/12/16 07:01:05 tdeval Exp $ */ /* $NetBSD: rf_strutils.c,v 1.3 1999/02/05 00:06:18 oster Exp $ */ + /* * rf_strutils.c * - * String-parsing funcs + * String-parsing funcs. */ /* * Copyright (c) 1995 Carnegie-Mellon University. @@ -40,15 +41,16 @@ #include "rf_utils.h" -/* finds a non-white character in the line */ -char * +/* Finds a non-white character in the line. */ +char * rf_find_non_white(char *p) { for (; *p != '\0' && (*p == ' ' || *p == '\t'); p++); return (p); } -/* finds a white character in the line */ -char * + +/* Finds a white character in the line. */ +char * rf_find_white(char *p) { for (; *p != '\0' && (*p != ' ' && *p != '\t'); p++); diff --git a/sys/dev/raidframe/rf_threadstuff.c b/sys/dev/raidframe/rf_threadstuff.c index cfd1d05ca13..9ea9e20448e 100644 --- a/sys/dev/raidframe/rf_threadstuff.c +++ b/sys/dev/raidframe/rf_threadstuff.c @@ -1,8 +1,10 @@ -/* $OpenBSD: rf_threadstuff.c,v 1.3 2000/01/07 14:50:23 peter Exp $ */ +/* $OpenBSD: rf_threadstuff.c,v 1.4 2002/12/16 07:01:05 tdeval Exp $ */ /* $NetBSD: rf_threadstuff.c,v 1.5 1999/12/07 02:13:28 oster Exp $ */ + /* * rf_threadstuff.c */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -35,18 +37,17 @@ #include "rf_general.h" #include "rf_shutdown.h" -static void mutex_destroyer(void *); -static void cond_destroyer(void *); +void mutex_destroyer(void *); +void cond_destroyer(void *); /* - * Shared stuff + * Shared stuff. */ -static void -mutex_destroyer(arg) - void *arg; +void +mutex_destroyer(void *arg) { - int rc; + int rc; rc = rf_mutex_destroy(arg); if (rc) { @@ -54,148 +55,141 @@ mutex_destroyer(arg) } } -static void -cond_destroyer(arg) - void *arg; +void +cond_destroyer(void *arg) { - int rc; + int rc; rc = rf_cond_destroy(arg); if (rc) { - RF_ERRORMSG1("RAIDFRAME: Error %d auto-destroying condition\n", rc); + RF_ERRORMSG1("RAIDFRAME: Error %d auto-destroying condition\n", + rc); } } -int -_rf_create_managed_mutex(listp, m, file, line) - RF_ShutdownList_t **listp; -RF_DECLARE_MUTEX(*m) - char *file; - int line; +int +_rf_create_managed_mutex(RF_ShutdownList_t **listp, RF_DECLARE_MUTEX(*m), + char *file, int line) { - int rc, rc1; + int rc, rc1; rc = rf_mutex_init(m); if (rc) return (rc); + rc = _rf_ShutdownCreate(listp, mutex_destroyer, (void *) m, file, line); if (rc) { RF_ERRORMSG1("RAIDFRAME: Error %d adding shutdown entry\n", rc); rc1 = rf_mutex_destroy(m); if (rc1) { - RF_ERRORMSG1("RAIDFRAME: Error %d destroying mutex\n", rc1); + RF_ERRORMSG1("RAIDFRAME: Error %d destroying mutex\n", + rc1); } } + return (rc); } -int -_rf_create_managed_cond(listp, c, file, line) - RF_ShutdownList_t **listp; -RF_DECLARE_COND(*c) - char *file; - int line; +int +_rf_create_managed_cond(RF_ShutdownList_t **listp, RF_DECLARE_COND(*c), + char *file, int line) { - int rc, rc1; + int rc, rc1; rc = rf_cond_init(c); if (rc) return (rc); + rc = _rf_ShutdownCreate(listp, cond_destroyer, (void *) c, file, line); if (rc) { RF_ERRORMSG1("RAIDFRAME: Error %d adding shutdown entry\n", rc); rc1 = rf_cond_destroy(c); if (rc1) { - RF_ERRORMSG1("RAIDFRAME: Error %d destroying cond\n", rc1); + RF_ERRORMSG1("RAIDFRAME: Error %d destroying cond\n", + rc1); } } return (rc); } -int -_rf_init_managed_threadgroup(listp, g, file, line) - RF_ShutdownList_t **listp; - RF_ThreadGroup_t *g; - char *file; - int line; +int +_rf_init_managed_threadgroup(RF_ShutdownList_t **listp, RF_ThreadGroup_t *g, + char *file, int line) { - int rc; + int rc; rc = _rf_create_managed_mutex(listp, &g->mutex, file, line); if (rc) return (rc); + rc = _rf_create_managed_cond(listp, &g->cond, file, line); if (rc) return (rc); + g->created = g->running = g->shutdown = 0; return (0); } -int -_rf_destroy_threadgroup(g, file, line) - RF_ThreadGroup_t *g; - char *file; - int line; +int +_rf_destroy_threadgroup(RF_ThreadGroup_t *g, char *file, int line) { - int rc1, rc2; + int rc1, rc2; rc1 = rf_mutex_destroy(&g->mutex); rc2 = rf_cond_destroy(&g->cond); + if (rc1) return (rc1); + return (rc2); } -int -_rf_init_threadgroup(g, file, line) - RF_ThreadGroup_t *g; - char *file; - int line; +int +_rf_init_threadgroup(RF_ThreadGroup_t *g, char *file, int line) { - int rc; + int rc; rc = rf_mutex_init(&g->mutex); if (rc) return (rc); + rc = rf_cond_init(&g->cond); if (rc) { rf_mutex_destroy(&g->mutex); return (rc); } + g->created = g->running = g->shutdown = 0; return (0); } /* - * Kernel + * Kernel. */ -int -rf_mutex_init(m) -decl_simple_lock_data(, *m) + +int +rf_mutex_init(decl_simple_lock_data(, *m)) { simple_lock_init(m); return (0); } -int -rf_mutex_destroy(m) -decl_simple_lock_data(, *m) +int +rf_mutex_destroy(decl_simple_lock_data(, *m)) { return (0); } -int -rf_cond_init(c) -RF_DECLARE_COND(*c) +int +rf_cond_init(RF_DECLARE_COND(*c)) { - *c = 0; /* no reason */ + *c = 0; /* No reason. */ return (0); } -int -rf_cond_destroy(c) -RF_DECLARE_COND(*c) +int +rf_cond_destroy(RF_DECLARE_COND(*c)) { return (0); } diff --git a/sys/dev/raidframe/rf_threadstuff.h b/sys/dev/raidframe/rf_threadstuff.h index 046d6428bd5..93f9706b3a7 100644 --- a/sys/dev/raidframe/rf_threadstuff.h +++ b/sys/dev/raidframe/rf_threadstuff.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_threadstuff.h,v 1.7 2002/03/14 01:27:02 millert Exp $ */ +/* $OpenBSD: rf_threadstuff.h,v 1.8 2002/12/16 07:01:05 tdeval Exp $ */ /* $NetBSD: rf_threadstuff.h,v 1.8 2000/06/11 03:35:38 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -28,157 +29,162 @@ */ /* - * threadstuff.h -- definitions for threads, locks, and synchronization + * threadstuff.h -- Definitions for threads, locks, and synchronization. * * The purpose of this file is provide some illusion of portability. * If the functions below can be implemented with the same semantics on * some new system, then at least the synchronization and thread control * part of the code should not require modification to port to a new machine. - * the only other place where the pthread package is explicitly used is + * The only other place where the pthread package is explicitly used is * threadid.h * - * this file should be included above stdio.h to get some necessary defines. + * This file should be included above stdio.h to get some necessary defines. * */ -#ifndef _RF__RF_THREADSTUFF_H_ -#define _RF__RF_THREADSTUFF_H_ +#ifndef _RF__RF_THREADSTUFF_H_ +#define _RF__RF_THREADSTUFF_H_ #include "rf_types.h" #include <sys/types.h> #include <sys/param.h> -#ifdef _KERNEL +#ifdef _KERNEL #include <sys/systm.h> #include <sys/proc.h> #include <sys/kthread.h> #endif -#define rf_create_managed_mutex(a,b) _rf_create_managed_mutex(a,b,__FILE__,__LINE__) -#define rf_create_managed_cond(a,b) _rf_create_managed_cond(a,b,__FILE__,__LINE__) -#define rf_init_managed_threadgroup(a,b) _rf_init_managed_threadgroup(a,b,__FILE__,__LINE__) -#define rf_init_threadgroup(a) _rf_init_threadgroup(a,__FILE__,__LINE__) -#define rf_destroy_threadgroup(a) _rf_destroy_threadgroup(a,__FILE__,__LINE__) - -int _rf_init_threadgroup(RF_ThreadGroup_t * g, char *file, int line); -int _rf_destroy_threadgroup(RF_ThreadGroup_t * g, char *file, int line); -int -_rf_init_managed_threadgroup(RF_ShutdownList_t ** listp, - RF_ThreadGroup_t * g, char *file, int line); +#define rf_create_managed_mutex(a,b) \ + _rf_create_managed_mutex(a,b,__FILE__,__LINE__) +#define rf_create_managed_cond(a,b) \ + _rf_create_managed_cond(a,b,__FILE__,__LINE__) +#define rf_init_managed_threadgroup(a,b) \ + _rf_init_managed_threadgroup(a,b,__FILE__,__LINE__) +#define rf_init_threadgroup(a) \ + _rf_init_threadgroup(a,__FILE__,__LINE__) +#define rf_destroy_threadgroup(a) \ + _rf_destroy_threadgroup(a,__FILE__,__LINE__) + +int _rf_init_threadgroup(RF_ThreadGroup_t *, char *, int); +int _rf_destroy_threadgroup(RF_ThreadGroup_t *, char *, int); +int _rf_init_managed_threadgroup(RF_ShutdownList_t **, RF_ThreadGroup_t *, + char *, int); #include <sys/lock.h> -#define decl_simple_lock_data(a,b) a struct simplelock b; -#define simple_lock_addr(a) ((struct simplelock *)&(a)) +#define decl_simple_lock_data(a,b) a struct simplelock b +#define simple_lock_addr(a) ((struct simplelock *)&(a)) -typedef struct proc *RF_Thread_t; -typedef void *RF_ThreadArg_t; +typedef struct proc *RF_Thread_t; +typedef void *RF_ThreadArg_t; -#define RF_DECLARE_MUTEX(_m_) decl_simple_lock_data(,(_m_)) -#define RF_DECLARE_STATIC_MUTEX(_m_) decl_simple_lock_data(static,(_m_)) -#define RF_DECLARE_EXTERN_MUTEX(_m_) decl_simple_lock_data(extern,(_m_)) +#define RF_DECLARE_MUTEX(_m_) decl_simple_lock_data(,(_m_)) +#define RF_DECLARE_STATIC_MUTEX(_m_) decl_simple_lock_data(static,(_m_)) +#define RF_DECLARE_EXTERN_MUTEX(_m_) decl_simple_lock_data(extern,(_m_)) -#define RF_DECLARE_COND(_c_) int _c_; -#define RF_DECLARE_STATIC_COND(_c_) static int _c_; -#define RF_DECLARE_EXTERN_COND(_c_) extern int _c_; +#define RF_DECLARE_COND(_c_) int _c_ +#define RF_DECLARE_STATIC_COND(_c_) static int _c_ +#define RF_DECLARE_EXTERN_COND(_c_) extern int _c_ -#define RF_LOCK_MUTEX(_m_) simple_lock(&(_m_)) -#define RF_UNLOCK_MUTEX(_m_) simple_unlock(&(_m_)) +#define RF_LOCK_MUTEX(_m_) simple_lock(&(_m_)) +#define RF_UNLOCK_MUTEX(_m_) simple_unlock(&(_m_)) /* - * In Net- and OpenBSD, kernel threads are simply processes which share several + * In Net- and OpenBSD, kernel threads are simply processes that share several * substructures and never run in userspace. */ -#define RF_WAIT_COND(_c_,_m_) { \ - RF_UNLOCK_MUTEX(_m_); \ - tsleep(&_c_, PRIBIO, "rfwcond", 0); \ - RF_LOCK_MUTEX(_m_); \ -} -#define RF_SIGNAL_COND(_c_) wakeup(&(_c_)) -#define RF_BROADCAST_COND(_c_) wakeup(&(_c_)) -#define RF_CREATE_THREAD(_handle_, _func_, _arg_, _name_) \ - kthread_create((void (*)(void *))(_func_), (void *)(_arg_), \ +#define RF_WAIT_COND(_c_,_m_) do { \ + RF_UNLOCK_MUTEX(_m_); \ + tsleep(&_c_, PRIBIO, "rfwcond", 0); \ + RF_LOCK_MUTEX(_m_); \ +} while (0) +#define RF_SIGNAL_COND(_c_) wakeup(&(_c_)) +#define RF_BROADCAST_COND(_c_) wakeup(&(_c_)) +#define RF_CREATE_THREAD(_handle_, _func_, _arg_, _name_) \ + kthread_create((void (*)(void *))(_func_), (void *)(_arg_), \ (struct proc **)&(_handle_), _name_) struct RF_ThreadGroup_s { - int created; - int running; - int shutdown; - RF_DECLARE_MUTEX(mutex) - RF_DECLARE_COND(cond) + int created; + int running; + int shutdown; + RF_DECLARE_MUTEX (mutex); + RF_DECLARE_COND (cond); }; + /* - * Someone has started a thread in the group + * Someone has started a thread in the group. */ -#define RF_THREADGROUP_STARTED(_g_) { \ - RF_LOCK_MUTEX((_g_)->mutex); \ - (_g_)->created++; \ - RF_UNLOCK_MUTEX((_g_)->mutex); \ -} +#define RF_THREADGROUP_STARTED(_g_) do { \ + RF_LOCK_MUTEX((_g_)->mutex); \ + (_g_)->created++; \ + RF_UNLOCK_MUTEX((_g_)->mutex); \ +} while (0) /* - * Thread announcing that it is now running + * Thread announcing that it is now running. */ -#define RF_THREADGROUP_RUNNING(_g_) { \ - RF_LOCK_MUTEX((_g_)->mutex); \ - (_g_)->running++; \ - RF_UNLOCK_MUTEX((_g_)->mutex); \ - RF_SIGNAL_COND((_g_)->cond); \ -} +#define RF_THREADGROUP_RUNNING(_g_) do { \ + RF_LOCK_MUTEX((_g_)->mutex); \ + (_g_)->running++; \ + RF_UNLOCK_MUTEX((_g_)->mutex); \ + RF_SIGNAL_COND((_g_)->cond); \ +} while (0) /* - * Thread announcing that it is now done + * Thread announcing that it is now done. */ -#define RF_THREADGROUP_DONE(_g_) { \ - RF_LOCK_MUTEX((_g_)->mutex); \ - (_g_)->shutdown++; \ - RF_UNLOCK_MUTEX((_g_)->mutex); \ - RF_SIGNAL_COND((_g_)->cond); \ -} +#define RF_THREADGROUP_DONE(_g_) do { \ + RF_LOCK_MUTEX((_g_)->mutex); \ + (_g_)->shutdown++; \ + RF_UNLOCK_MUTEX((_g_)->mutex); \ + RF_SIGNAL_COND((_g_)->cond); \ +} while (0) /* - * Wait for all threads to start running + * Wait for all threads to start running. */ -#define RF_THREADGROUP_WAIT_START(_g_) { \ - RF_LOCK_MUTEX((_g_)->mutex); \ - while((_g_)->running < (_g_)->created) { \ - RF_WAIT_COND((_g_)->cond, (_g_)->mutex); \ - } \ - RF_UNLOCK_MUTEX((_g_)->mutex); \ -} +#define RF_THREADGROUP_WAIT_START(_g_) do { \ + RF_LOCK_MUTEX((_g_)->mutex); \ + while((_g_)->running < (_g_)->created) { \ + RF_WAIT_COND((_g_)->cond, (_g_)->mutex); \ + } \ + RF_UNLOCK_MUTEX((_g_)->mutex); \ +} while (0) /* - * Wait for all threads to stop running + * Wait for all threads to stop running. */ -#if !defined(__NetBSD__) && !defined(__OpenBSD__) -#define RF_THREADGROUP_WAIT_STOP(_g_) { \ - RF_LOCK_MUTEX((_g_)->mutex); \ - RF_ASSERT((_g_)->running == (_g_)->created); \ - while((_g_)->shutdown < (_g_)->running) { \ - RF_WAIT_COND((_g_)->cond, (_g_)->mutex); \ - } \ - RF_UNLOCK_MUTEX((_g_)->mutex); \ -} +#if !defined(__NetBSD__) && !defined(__OpenBSD__) +#define RF_THREADGROUP_WAIT_STOP(_g_) do { \ + RF_LOCK_MUTEX((_g_)->mutex); \ + RF_ASSERT((_g_)->running == (_g_)->created); \ + while((_g_)->shutdown < (_g_)->running) { \ + RF_WAIT_COND((_g_)->cond, (_g_)->mutex); \ + } \ + RF_UNLOCK_MUTEX((_g_)->mutex); \ +} while (0) #else - /* XXX Note that we've removed the assert. That should get put back in once - * we actually get something like a kernel thread running */ -#define RF_THREADGROUP_WAIT_STOP(_g_) { \ - RF_LOCK_MUTEX((_g_)->mutex); \ - while((_g_)->shutdown < (_g_)->running) { \ - RF_WAIT_COND((_g_)->cond, (_g_)->mutex); \ - } \ - RF_UNLOCK_MUTEX((_g_)->mutex); \ -} + /* + * XXX Note that we've removed the assert. That should be put back in once + * we actually get something like a kernel thread running. + */ +#define RF_THREADGROUP_WAIT_STOP(_g_) do { \ + RF_LOCK_MUTEX((_g_)->mutex); \ + while((_g_)->shutdown < (_g_)->running) { \ + RF_WAIT_COND((_g_)->cond, (_g_)->mutex); \ + } \ + RF_UNLOCK_MUTEX((_g_)->mutex); \ +} while (0) #endif -int rf_mutex_init(struct simplelock *); -int rf_mutex_destroy(struct simplelock *); -int -_rf_create_managed_mutex(RF_ShutdownList_t **, struct simplelock *, - char *, int); -int -_rf_create_managed_cond(RF_ShutdownList_t ** listp, int *, - char *file, int line); +int rf_mutex_init(struct simplelock *); +int rf_mutex_destroy(struct simplelock *); +int _rf_create_managed_mutex(RF_ShutdownList_t **, struct simplelock *, + char *, int); +int _rf_create_managed_cond(RF_ShutdownList_t ** listp, int *, char *, int); + +int rf_cond_init(int *); +int rf_cond_destroy(int *); -int rf_cond_init(int *c); -int rf_cond_destroy(int *c); -#endif /* !_RF__RF_THREADSTUFF_H_ */ +#endif /* !_RF__RF_THREADSTUFF_H_ */ diff --git a/sys/dev/raidframe/rf_types.h b/sys/dev/raidframe/rf_types.h index 73e252d17fd..a5fb966c214 100644 --- a/sys/dev/raidframe/rf_types.h +++ b/sys/dev/raidframe/rf_types.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_types.h,v 1.5 2002/05/22 21:22:32 tdeval Exp $ */ +/* $OpenBSD: rf_types.h,v 1.6 2002/12/16 07:01:05 tdeval Exp $ */ /* $NetBSD: rf_types.h,v 1.6 1999/09/05 03:05:55 oster Exp $ */ + /* * rf_types.h */ @@ -31,12 +32,12 @@ */ /*********************************************************** * - * rf_types.h -- standard types for RAIDframe + * rf_types.h -- Standard types for RAIDframe. * ***********************************************************/ -#ifndef _RF__RF_TYPES_H_ -#define _RF__RF_TYPES_H_ +#ifndef _RF__RF_TYPES_H_ +#define _RF__RF_TYPES_H_ #include "rf_archs.h" @@ -57,7 +58,7 @@ * The various integer types should be self-explanatory; we * use these elsewhere to avoid size confusion. * - * LONGSHIFT is lg(sizeof(long)) (that is, log base two of sizeof(long) + * LONGSHIFT is lg(sizeof(long)) (that is, log base two of sizeof(long)) * */ @@ -65,25 +66,25 @@ #include <sys/limits.h> #include <machine/endian.h> -#if BYTE_ORDER == BIG_ENDIAN -#define RF_IS_BIG_ENDIAN 1 -#elif BYTE_ORDER == LITTLE_ENDIAN -#define RF_IS_BIG_ENDIAN 0 +#if BYTE_ORDER == BIG_ENDIAN +#define RF_IS_BIG_ENDIAN 1 +#elif BYTE_ORDER == LITTLE_ENDIAN +#define RF_IS_BIG_ENDIAN 0 #else #error byte order not defined #endif -typedef int8_t RF_int8; -typedef u_int8_t RF_uint8; -typedef int16_t RF_int16; -typedef u_int16_t RF_uint16; -typedef int32_t RF_int32; -typedef u_int32_t RF_uint32; -typedef int64_t RF_int64; -typedef u_int64_t RF_uint64; -#if LONG_BIT == 32 -#define RF_LONGSHIFT 2 -#elif LONG_BIT == 64 -#define RF_LONGSHIFT 3 +typedef int8_t RF_int8; +typedef u_int8_t RF_uint8; +typedef int16_t RF_int16; +typedef u_int16_t RF_uint16; +typedef int32_t RF_int32; +typedef u_int32_t RF_uint32; +typedef int64_t RF_int64; +typedef u_int64_t RF_uint64; +#if LONG_BIT == 32 +#define RF_LONGSHIFT 2 +#elif LONG_BIT == 64 +#define RF_LONGSHIFT 3 #else #error word size not defined #endif @@ -95,143 +96,153 @@ typedef u_int64_t RF_uint64; * the different places they may be defined in system header * files. */ -#define RF_TRUE 1 -#define RF_FALSE 0 +#define RF_TRUE 1 +#define RF_FALSE 0 /* - * Now, some generic types + * Now, some generic types. */ -typedef RF_uint64 RF_IoCount_t; -typedef RF_uint64 RF_Offset_t; -typedef RF_uint32 RF_PSSFlags_t; -typedef RF_uint64 RF_SectorCount_t; -typedef RF_uint64 RF_StripeCount_t; -typedef RF_int64 RF_SectorNum_t;/* these are unsigned so we can set them to - * (-1) for "uninitialized" */ -typedef RF_int64 RF_StripeNum_t; -typedef RF_int64 RF_RaidAddr_t; -typedef int RF_RowCol_t; /* unsigned so it can be (-1) */ -typedef RF_int64 RF_HeadSepLimit_t; -typedef RF_int64 RF_ReconUnitCount_t; -typedef int RF_ReconUnitNum_t; - -typedef char RF_ParityConfig_t; - -typedef char RF_DiskQueueType_t[1024]; -#define RF_DISK_QUEUE_TYPE_NONE "" - -/* values for the 'type' field in a reconstruction buffer */ -typedef int RF_RbufType_t; -#define RF_RBUF_TYPE_EXCLUSIVE 0 /* this buf assigned exclusively to - * one disk */ -#define RF_RBUF_TYPE_FLOATING 1 /* this is a floating recon buf */ -#define RF_RBUF_TYPE_FORCED 2 /* this rbuf was allocated to complete - * a forced recon */ - -typedef char RF_IoType_t; -#define RF_IO_TYPE_READ 'r' -#define RF_IO_TYPE_WRITE 'w' -#define RF_IO_TYPE_NOP 'n' -#define RF_IO_IS_R_OR_W(_type_) (((_type_) == RF_IO_TYPE_READ) \ - || ((_type_) == RF_IO_TYPE_WRITE)) - -typedef void (*RF_VoidFuncPtr) (void *,...); - -typedef RF_uint32 RF_AccessStripeMapFlags_t; -typedef RF_uint32 RF_DiskQueueDataFlags_t; -typedef RF_uint32 RF_DiskQueueFlags_t; -typedef RF_uint32 RF_RaidAccessFlags_t; - -#define RF_DISKQUEUE_DATA_FLAGS_NONE ((RF_DiskQueueDataFlags_t)0) - -typedef struct RF_AccessStripeMap_s RF_AccessStripeMap_t; -typedef struct RF_AccessStripeMapHeader_s RF_AccessStripeMapHeader_t; -typedef struct RF_AllocListElem_s RF_AllocListElem_t; -typedef struct RF_CallbackDesc_s RF_CallbackDesc_t; -typedef struct RF_ChunkDesc_s RF_ChunkDesc_t; -typedef struct RF_CommonLogData_s RF_CommonLogData_t; -typedef struct RF_Config_s RF_Config_t; -typedef struct RF_CumulativeStats_s RF_CumulativeStats_t; -typedef struct RF_DagHeader_s RF_DagHeader_t; -typedef struct RF_DagList_s RF_DagList_t; -typedef struct RF_DagNode_s RF_DagNode_t; -typedef struct RF_DeclusteredConfigInfo_s RF_DeclusteredConfigInfo_t; -typedef struct RF_DiskId_s RF_DiskId_t; -typedef struct RF_DiskMap_s RF_DiskMap_t; -typedef struct RF_DiskQueue_s RF_DiskQueue_t; -typedef struct RF_DiskQueueData_s RF_DiskQueueData_t; -typedef struct RF_DiskQueueSW_s RF_DiskQueueSW_t; -typedef struct RF_Etimer_s RF_Etimer_t; -typedef struct RF_EventCreate_s RF_EventCreate_t; -typedef struct RF_FreeList_s RF_FreeList_t; -typedef struct RF_LockReqDesc_s RF_LockReqDesc_t; -typedef struct RF_LockTableEntry_s RF_LockTableEntry_t; -typedef struct RF_MCPair_s RF_MCPair_t; -typedef struct RF_OwnerInfo_s RF_OwnerInfo_t; -typedef struct RF_ParityLog_s RF_ParityLog_t; -typedef struct RF_ParityLogAppendQueue_s RF_ParityLogAppendQueue_t; -typedef struct RF_ParityLogData_s RF_ParityLogData_t; -typedef struct RF_ParityLogDiskQueue_s RF_ParityLogDiskQueue_t; -typedef struct RF_ParityLogQueue_s RF_ParityLogQueue_t; -typedef struct RF_ParityLogRecord_s RF_ParityLogRecord_t; -typedef struct RF_PerDiskReconCtrl_s RF_PerDiskReconCtrl_t; -typedef struct RF_PSStatusHeader_s RF_PSStatusHeader_t; -typedef struct RF_PhysDiskAddr_s RF_PhysDiskAddr_t; -typedef struct RF_PropHeader_s RF_PropHeader_t; -typedef struct RF_Raid_s RF_Raid_t; -typedef struct RF_RaidAccessDesc_s RF_RaidAccessDesc_t; -typedef struct RF_RaidDisk_s RF_RaidDisk_t; -typedef struct RF_RaidLayout_s RF_RaidLayout_t; -typedef struct RF_RaidReconDesc_s RF_RaidReconDesc_t; -typedef struct RF_ReconBuffer_s RF_ReconBuffer_t; -typedef struct RF_ReconConfig_s RF_ReconConfig_t; -typedef struct RF_ReconCtrl_s RF_ReconCtrl_t; -typedef struct RF_ReconDoneProc_s RF_ReconDoneProc_t; -typedef struct RF_ReconEvent_s RF_ReconEvent_t; -typedef struct RF_ReconMap_s RF_ReconMap_t; -typedef struct RF_ReconMapListElem_s RF_ReconMapListElem_t; -typedef struct RF_ReconParityStripeStatus_s RF_ReconParityStripeStatus_t; -typedef struct RF_RedFuncs_s RF_RedFuncs_t; -typedef struct RF_RegionBufferQueue_s RF_RegionBufferQueue_t; -typedef struct RF_RegionInfo_s RF_RegionInfo_t; -typedef struct RF_ShutdownList_s RF_ShutdownList_t; -typedef struct RF_SpareTableEntry_s RF_SpareTableEntry_t; -typedef struct RF_SparetWait_s RF_SparetWait_t; -typedef struct RF_StripeLockDesc_s RF_StripeLockDesc_t; -typedef struct RF_ThreadGroup_s RF_ThreadGroup_t; -typedef struct RF_ThroughputStats_s RF_ThroughputStats_t; +typedef RF_uint64 RF_IoCount_t; +typedef RF_uint64 RF_Offset_t; +typedef RF_uint32 RF_PSSFlags_t; +typedef RF_uint64 RF_SectorCount_t; +typedef RF_uint64 RF_StripeCount_t; +typedef RF_int64 RF_SectorNum_t; /* + * These are unsigned so we can set + * them to (-1) for "uninitialized". + */ +typedef RF_int64 RF_StripeNum_t; +typedef RF_int64 RF_RaidAddr_t; +typedef int RF_RowCol_t; /* Unsigned so it can be (-1). */ +typedef RF_int64 RF_HeadSepLimit_t; +typedef RF_int64 RF_ReconUnitCount_t; +typedef int RF_ReconUnitNum_t; + +typedef char RF_ParityConfig_t; + +typedef char RF_DiskQueueType_t[1024]; +#define RF_DISK_QUEUE_TYPE_NONE "" + +/* Values for the 'type' field in a reconstruction buffer. */ +typedef int RF_RbufType_t; +#define RF_RBUF_TYPE_EXCLUSIVE 0 /* + * This buf assigned exclusively to + * one disk. + */ +#define RF_RBUF_TYPE_FLOATING 1 /* This is a floating recon buf. */ +#define RF_RBUF_TYPE_FORCED 2 /* + * This rbuf was allocated to complete + * a forced recon. + */ + +typedef char RF_IoType_t; +#define RF_IO_TYPE_READ 'r' +#define RF_IO_TYPE_WRITE 'w' +#define RF_IO_TYPE_NOP 'n' +#define RF_IO_IS_R_OR_W(_type_) \ + (((_type_) == RF_IO_TYPE_READ) || ((_type_) == RF_IO_TYPE_WRITE)) + +typedef void (*RF_VoidFuncPtr) (void *,...); + +typedef RF_uint32 RF_AccessStripeMapFlags_t; +typedef RF_uint32 RF_DiskQueueDataFlags_t; +typedef RF_uint32 RF_DiskQueueFlags_t; +typedef RF_uint32 RF_RaidAccessFlags_t; + +#define RF_DISKQUEUE_DATA_FLAGS_NONE ((RF_DiskQueueDataFlags_t)0) + +typedef struct RF_AccessStripeMap_s RF_AccessStripeMap_t; +typedef struct RF_AccessStripeMapHeader_s RF_AccessStripeMapHeader_t; +typedef struct RF_AllocListElem_s RF_AllocListElem_t; +typedef struct RF_CallbackDesc_s RF_CallbackDesc_t; +typedef struct RF_ChunkDesc_s RF_ChunkDesc_t; +typedef struct RF_CommonLogData_s RF_CommonLogData_t; +typedef struct RF_Config_s RF_Config_t; +typedef struct RF_CumulativeStats_s RF_CumulativeStats_t; +typedef struct RF_DagHeader_s RF_DagHeader_t; +typedef struct RF_DagList_s RF_DagList_t; +typedef struct RF_DagNode_s RF_DagNode_t; +typedef struct RF_DeclusteredConfigInfo_s RF_DeclusteredConfigInfo_t; +typedef struct RF_DiskId_s RF_DiskId_t; +typedef struct RF_DiskMap_s RF_DiskMap_t; +typedef struct RF_DiskQueue_s RF_DiskQueue_t; +typedef struct RF_DiskQueueData_s RF_DiskQueueData_t; +typedef struct RF_DiskQueueSW_s RF_DiskQueueSW_t; +typedef struct RF_Etimer_s RF_Etimer_t; +typedef struct RF_EventCreate_s RF_EventCreate_t; +typedef struct RF_FreeList_s RF_FreeList_t; +typedef struct RF_LockReqDesc_s RF_LockReqDesc_t; +typedef struct RF_LockTableEntry_s RF_LockTableEntry_t; +typedef struct RF_MCPair_s RF_MCPair_t; +typedef struct RF_OwnerInfo_s RF_OwnerInfo_t; +typedef struct RF_ParityLog_s RF_ParityLog_t; +typedef struct RF_ParityLogAppendQueue_s RF_ParityLogAppendQueue_t; +typedef struct RF_ParityLogData_s RF_ParityLogData_t; +typedef struct RF_ParityLogDiskQueue_s RF_ParityLogDiskQueue_t; +typedef struct RF_ParityLogQueue_s RF_ParityLogQueue_t; +typedef struct RF_ParityLogRecord_s RF_ParityLogRecord_t; +typedef struct RF_PerDiskReconCtrl_s RF_PerDiskReconCtrl_t; +typedef struct RF_PSStatusHeader_s RF_PSStatusHeader_t; +typedef struct RF_PhysDiskAddr_s RF_PhysDiskAddr_t; +typedef struct RF_PropHeader_s RF_PropHeader_t; +typedef struct RF_Raid_s RF_Raid_t; +typedef struct RF_RaidAccessDesc_s RF_RaidAccessDesc_t; +typedef struct RF_RaidDisk_s RF_RaidDisk_t; +typedef struct RF_RaidLayout_s RF_RaidLayout_t; +typedef struct RF_RaidReconDesc_s RF_RaidReconDesc_t; +typedef struct RF_ReconBuffer_s RF_ReconBuffer_t; +typedef struct RF_ReconConfig_s RF_ReconConfig_t; +typedef struct RF_ReconCtrl_s RF_ReconCtrl_t; +typedef struct RF_ReconDoneProc_s RF_ReconDoneProc_t; +typedef struct RF_ReconEvent_s RF_ReconEvent_t; +typedef struct RF_ReconMap_s RF_ReconMap_t; +typedef struct RF_ReconMapListElem_s RF_ReconMapListElem_t; +typedef struct RF_ReconParityStripeStatus_s RF_ReconParityStripeStatus_t; +typedef struct RF_RedFuncs_s RF_RedFuncs_t; +typedef struct RF_RegionBufferQueue_s RF_RegionBufferQueue_t; +typedef struct RF_RegionInfo_s RF_RegionInfo_t; +typedef struct RF_ShutdownList_s RF_ShutdownList_t; +typedef struct RF_SpareTableEntry_s RF_SpareTableEntry_t; +typedef struct RF_SparetWait_s RF_SparetWait_t; +typedef struct RF_StripeLockDesc_s RF_StripeLockDesc_t; +typedef struct RF_ThreadGroup_s RF_ThreadGroup_t; +typedef struct RF_ThroughputStats_s RF_ThroughputStats_t; /* * Important assumptions regarding ordering of the states in this list - * have been made!!! + * have been made !!! * Before disturbing this ordering, look at code in rf_states.c */ typedef enum RF_AccessState_e { - /* original states */ - rf_QuiesceState, /* handles queisence for reconstruction */ - rf_IncrAccessesCountState, /* count accesses in flight */ + /* Original states. */ + rf_QuiesceState, /* Handles quiescence for reconstruction. */ + rf_IncrAccessesCountState, /* Count accesses in flight. */ rf_DecrAccessesCountState, - rf_MapState, /* map access to disk addresses */ - rf_LockState, /* take stripe locks */ - rf_CreateDAGState, /* create DAGs */ - rf_ExecuteDAGState, /* execute DAGs */ - rf_ProcessDAGState, /* DAGs are completing- check if correct, or - * if we need to retry */ - rf_CleanupState, /* release stripe locks, clean up */ - rf_LastState /* must be the last state */ + rf_MapState, /* Map access to disk addresses. */ + rf_LockState, /* Take stripe locks. */ + rf_CreateDAGState, /* Create DAGs. */ + rf_ExecuteDAGState, /* Execute DAGs. */ + rf_ProcessDAGState, /* + * DAGs are completing - check if correct, or + * if we need to retry. + */ + rf_CleanupState, /* Release stripe locks, clean up. */ + rf_LastState /* Must be the last state. */ } RF_AccessState_t; -#define RF_MAXROW 10 /* these are arbitrary and can be modified at - * will */ -#define RF_MAXCOL 40 -#define RF_MAXSPARE 10 -#define RF_MAXDBGV 75 /* max number of debug variables */ -#define RF_MAXDBGVLEN 50 /* max length of debug variables */ +#define RF_MAXROW 10 /* + * These are arbitrary and can be modified at + * will. + */ +#define RF_MAXCOL 40 +#define RF_MAXSPARE 10 +#define RF_MAXDBGV 75 /* Max number of debug variables. */ +#define RF_MAXDBGVLEN 50 /* Max length of debug variables. */ union RF_GenericParam_u { - void *p; - RF_uint64 v; + void *p; + RF_uint64 v; }; -typedef union RF_GenericParam_u RF_DagParam_t; -typedef union RF_GenericParam_u RF_CBParam_t; +typedef union RF_GenericParam_u RF_DagParam_t; +typedef union RF_GenericParam_u RF_CBParam_t; -#endif /* _RF__RF_TYPES_H_ */ +#endif /* _RF__RF_TYPES_H_ */ diff --git a/sys/dev/raidframe/rf_utils.c b/sys/dev/raidframe/rf_utils.c index 2b5a99cbebf..c136b0fe7fa 100644 --- a/sys/dev/raidframe/rf_utils.c +++ b/sys/dev/raidframe/rf_utils.c @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_utils.c,v 1.4 2000/01/11 18:02:23 peter Exp $ */ +/* $OpenBSD: rf_utils.c,v 1.5 2002/12/16 07:01:05 tdeval Exp $ */ /* $NetBSD: rf_utils.c,v 1.5 2000/01/07 03:41:03 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,11 +28,11 @@ * rights to redistribute these changes. */ -/**************************************** +/****************************************** * - * rf_utils.c -- various support routines + * rf_utils.c -- Various support routines. * - ****************************************/ + ******************************************/ #include "rf_threadstuff.h" @@ -43,28 +44,24 @@ #include "rf_alloclist.h" #include "rf_general.h" -/* creates & zeros 2-d array with b rows and k columns (MCH) */ +/* Creates & zeros 2-d array with b rows and k columns. (MCH) */ RF_RowCol_t ** -rf_make_2d_array(b, k, allocList) - int b; - int k; - RF_AllocListElem_t *allocList; +rf_make_2d_array(int b, int k, RF_AllocListElem_t *allocList) { RF_RowCol_t **retval, i; - RF_MallocAndAdd(retval, b * sizeof(RF_RowCol_t *), (RF_RowCol_t **), allocList); + RF_MallocAndAdd(retval, b * sizeof(RF_RowCol_t *), (RF_RowCol_t **), + allocList); for (i = 0; i < b; i++) { - RF_MallocAndAdd(retval[i], k * sizeof(RF_RowCol_t), (RF_RowCol_t *), allocList); - (void) bzero((char *) retval[i], k * sizeof(RF_RowCol_t)); + RF_MallocAndAdd(retval[i], k * sizeof(RF_RowCol_t), + (RF_RowCol_t *), allocList); + bzero((char *) retval[i], k * sizeof(RF_RowCol_t)); } return (retval); } -void -rf_free_2d_array(a, b, k) - RF_RowCol_t **a; - int b; - int k; +void +rf_free_2d_array(RF_RowCol_t **a, int b, int k) { RF_RowCol_t i; @@ -73,36 +70,32 @@ rf_free_2d_array(a, b, k) RF_Free(a, b * sizeof(RF_RowCol_t)); } - -/* creates & zeros a 1-d array with c columns */ +/* Creates & zeroes a 1-d array with c columns. */ RF_RowCol_t * -rf_make_1d_array(c, allocList) - int c; - RF_AllocListElem_t *allocList; +rf_make_1d_array(int c, RF_AllocListElem_t *allocList) { RF_RowCol_t *retval; - RF_MallocAndAdd(retval, c * sizeof(RF_RowCol_t), (RF_RowCol_t *), allocList); - (void) bzero((char *) retval, c * sizeof(RF_RowCol_t)); + RF_MallocAndAdd(retval, c * sizeof(RF_RowCol_t), (RF_RowCol_t *), + allocList); + bzero((char *) retval, c * sizeof(RF_RowCol_t)); return (retval); } -void -rf_free_1d_array(a, n) - RF_RowCol_t *a; - int n; +void +rf_free_1d_array(RF_RowCol_t *a, int n) { RF_Free(a, n * sizeof(RF_RowCol_t)); } -/* Euclid's algorithm: finds and returns the greatest common divisor - * between a and b. (MCH) + +/* + * Euclid's algorithm: Finds and returns the greatest common divisor + * between a and b. (MCH) */ -int -rf_gcd(m, n) - int m; - int n; +int +rf_gcd(int m, int n) { - int t; + int t; while (m > 0) { t = n % m; @@ -111,21 +104,25 @@ rf_gcd(m, n) } return (n); } -/* these convert between text and integer. Apparently the regular C macros - * for doing this are not available in the kernel - */ -#define ISDIGIT(x) ( (x) >= '0' && (x) <= '9' ) -#define ISHEXCHAR(x) ( ((x) >= 'a' && (x) <= 'f') || ((x) >= 'A' && (x) <= 'F') ) -#define ISHEX(x) ( ISDIGIT(x) || ISHEXCHAR(x) ) -#define HC2INT(x) ( ((x) >= 'a' && (x) <= 'f') ? (x) - 'a' + 10 : \ - ( ((x) >= 'A' && (x) <= 'F') ? (x) - 'A' + 10 : (x - '0') ) ) +/* + * These convert between text and integer. Apparently the regular C macros + * for doing this are not available in the kernel. + */ -int -rf_atoi(p) - char *p; +#define ISDIGIT(x) ((x) >= '0' && (x) <= '9') +#define ISHEXCHAR(x) (((x) >= 'a' && (x) <= 'f') || \ + ((x) >= 'A' && (x) <= 'F')) +#define ISHEX(x) (ISDIGIT(x) || ISHEXCHAR(x)) +#define HC2INT(x) (((x) >= 'a' && (x) <= 'f') ? \ + (x) - 'a' + 10 : \ + (((x) >= 'A' && (x) <= 'F') ? \ + (x) - 'A' + 10 : (x - '0'))) + +int +rf_atoi(char *p) { - int val = 0, negate = 0; + int val = 0, negate = 0; if (*p == '-') { negate = 1; @@ -136,11 +133,10 @@ rf_atoi(p) return ((negate) ? -val : val); } -int -rf_htoi(p) - char *p; +int +rf_htoi(char *p) { - int val = 0; + int val = 0; for (; ISHEXCHAR(*p); p++) val = 16 * val + HC2INT(*p); return (val); diff --git a/sys/dev/raidframe/rf_utils.h b/sys/dev/raidframe/rf_utils.h index 622e2061b45..731899521ee 100644 --- a/sys/dev/raidframe/rf_utils.h +++ b/sys/dev/raidframe/rf_utils.h @@ -1,5 +1,6 @@ -/* $OpenBSD: rf_utils.h,v 1.4 2000/01/07 14:50:23 peter Exp $ */ +/* $OpenBSD: rf_utils.h,v 1.5 2002/12/16 07:01:05 tdeval Exp $ */ /* $NetBSD: rf_utils.h,v 1.4 1999/08/13 03:26:55 oster Exp $ */ + /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,44 +28,48 @@ * rights to redistribute these changes. */ -/*************************************** +/**************************************** * - * rf_utils.c -- header file for utils.c + * rf_utils.c -- Header file for utils.c * - ***************************************/ + ****************************************/ -#ifndef _RF__RF_UTILS_H_ -#define _RF__RF_UTILS_H_ +#ifndef _RF__RF_UTILS_H_ +#define _RF__RF_UTILS_H_ #include "rf_types.h" #include "rf_alloclist.h" #include "rf_threadstuff.h" -char *rf_find_non_white(char *p); -char *rf_find_white(char *p); -RF_RowCol_t **rf_make_2d_array(int b, int k, RF_AllocListElem_t * allocList); -RF_RowCol_t *rf_make_1d_array(int c, RF_AllocListElem_t * allocList); -void rf_free_2d_array(RF_RowCol_t ** a, int b, int k); -void rf_free_1d_array(RF_RowCol_t * a, int n); -int rf_gcd(int m, int n); -int rf_atoi(char *p); -int rf_htoi(char *p); +char *rf_find_non_white(char *); +char *rf_find_white(char *); +RF_RowCol_t **rf_make_2d_array(int, int, RF_AllocListElem_t *); +RF_RowCol_t *rf_make_1d_array(int, RF_AllocListElem_t *); +void rf_free_2d_array(RF_RowCol_t **, int, int); +void rf_free_1d_array(RF_RowCol_t *, int); +int rf_gcd(int, int); +int rf_atoi(char *); +int rf_htoi(char *); -#define RF_USEC_PER_SEC 1000000 -#define RF_TIMEVAL_TO_US(_t_) (((_t_).tv_sec) \ - * RF_USEC_PER_SEC + (_t_).tv_usec) +#define RF_USEC_PER_SEC 1000000 +#define RF_TIMEVAL_TO_US(_t_) \ + (((_t_).tv_sec) * RF_USEC_PER_SEC + (_t_).tv_usec) -#define RF_TIMEVAL_DIFF(_start_,_end_,_diff_) { \ - if ((_end_)->tv_usec < (_start_)->tv_usec) { \ - (_diff_)->tv_usec = ((_end_)->tv_usec + RF_USEC_PER_SEC) \ - - (_start_)->tv_usec; \ - (_diff_)->tv_sec = ((_end_)->tv_sec-1) - (_start_)->tv_sec; \ - } \ - else { \ - (_diff_)->tv_usec = (_end_)->tv_usec - (_start_)->tv_usec; \ - (_diff_)->tv_sec = (_end_)->tv_sec - (_start_)->tv_sec; \ - } \ -} +#define RF_TIMEVAL_DIFF(_start_,_end_,_diff_) \ +do { \ + if ((_end_)->tv_usec < (_start_)->tv_usec) { \ + (_diff_)->tv_usec = ((_end_)->tv_usec + \ + RF_USEC_PER_SEC) - (_start_)->tv_usec; \ + (_diff_)->tv_sec = ((_end_)->tv_sec-1) - \ + (_start_)->tv_sec; \ + } \ + else { \ + (_diff_)->tv_usec = (_end_)->tv_usec - \ + (_start_)->tv_usec; \ + (_diff_)->tv_sec = (_end_)->tv_sec - \ + (_start_)->tv_sec; \ + } \ +} while (0) -#endif /* !_RF__RF_UTILS_H_ */ +#endif /* !_RF__RF_UTILS_H_ */ |