diff options
author | Peter Galbavy <peter@cvs.openbsd.org> | 2000-08-08 16:07:46 +0000 |
---|---|---|
committer | Peter Galbavy <peter@cvs.openbsd.org> | 2000-08-08 16:07:46 +0000 |
commit | 32fe17a663470bcff500ec1d022251eb5521ca1d (patch) | |
tree | a9d50274c1f5d004d13fa30644899af9b22328ee /sys | |
parent | ba14061bb6dd00dd21fcd589dca3d5be75d8f752 (diff) |
sync RAIDframe with Gre Oster's work for NetBSD.
This update incorporates changes since January 2000.
RAID1 and RAID5 tested for functionality matching the 2.7 code. A
number of bug fixes (including stopping a parity rebuild when
unconfiguring) have been included. See Greg's RAIDframe info page:
http://www.cs.usask.ca/staff/oster/raid.html
The RAID_AUTOCONFIG feature set does *NOT* yet work. These features
require more work throughout the boot system and as such are a big
task.
IMPORTANT: As with anything that is this near live data on your
systems, please test carefully with existing configurations before
deploying in a live system. Feedback via sendbug or mail direct
to peter@wonderland.org is appreciated.
Diffstat (limited to 'sys')
34 files changed, 3399 insertions, 1049 deletions
diff --git a/sys/dev/raidframe/rf_archs.h b/sys/dev/raidframe/rf_archs.h index 765b577477f..700746d15f0 100644 --- a/sys/dev/raidframe/rf_archs.h +++ b/sys/dev/raidframe/rf_archs.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_archs.h,v 1.4 2000/01/11 18:02:20 peter Exp $ */ -/* $NetBSD: rf_archs.h,v 1.7 2000/01/08 03:49:37 oster Exp $ */ +/* $OpenBSD: rf_archs.h,v 1.5 2000/08/08 16:07:38 peter Exp $ */ +/* $NetBSD: rf_archs.h,v 1.9 2000/03/04 03:27:13 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -67,10 +67,6 @@ #define RF_MEMORY_REDZONES 0 #define RF_RECON_STATS 1 -#define RF_INCLUDE_QUEUE_RANDOM 0 - -#define RF_KEEP_DISKSTATS 1 - #include "rf_options.h" #endif /* !_RF__RF_ARCHS_H_ */ diff --git a/sys/dev/raidframe/rf_copyback.c b/sys/dev/raidframe/rf_copyback.c index f0da14587c3..025aaaca293 100644 --- a/sys/dev/raidframe/rf_copyback.c +++ b/sys/dev/raidframe/rf_copyback.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_copyback.c,v 1.5 2000/01/11 18:02:20 peter Exp $ */ -/* $NetBSD: rf_copyback.c,v 1.12 2000/01/09 01:29:28 oster Exp $ */ +/* $OpenBSD: rf_copyback.c,v 1.6 2000/08/08 16:07:39 peter Exp $ */ +/* $NetBSD: rf_copyback.c,v 1.14 2000/03/07 02:59:50 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -101,6 +101,8 @@ rf_CopybackReconstructedData(raidPtr) struct vattr va; struct proc *proc; + int ac; + done = 0; fcol = 0; for (frow = 0; frow < raidPtr->numRow; frow++) { @@ -129,11 +131,15 @@ rf_CopybackReconstructedData(raidPtr) if (raidPtr->raid_cinfo[frow][fcol].ci_vp != NULL) { printf("Closed the open device: %s\n", raidPtr->Disks[frow][fcol].devname); - VOP_UNLOCK(raidPtr->raid_cinfo[frow][fcol].ci_vp, 0, proc); - (void) vn_close(raidPtr->raid_cinfo[frow][fcol].ci_vp, - FREAD | FWRITE, proc->p_ucred, proc); + vp = raidPtr->raid_cinfo[frow][fcol].ci_vp; + ac = raidPtr->Disks[frow][fcol].auto_configured; + rf_close_component(raidPtr, vp, ac); raidPtr->raid_cinfo[frow][fcol].ci_vp = NULL; + } + /* note that this disk was *not* auto_configured (any longer) */ + raidPtr->Disks[frow][fcol].auto_configured = 0; + printf("About to (re-)open the device: %s\n", raidPtr->Disks[frow][fcol].devname); @@ -234,17 +240,12 @@ rf_CopybackReconstructedData(raidPtr) raidread_component_label( raidPtr->raid_cinfo[frow][fcol].ci_dev, raidPtr->raid_cinfo[frow][fcol].ci_vp, &c_label); - - c_label.version = RF_COMPONENT_LABEL_VERSION; - c_label.mod_counter = raidPtr->mod_counter; - c_label.serial_number = raidPtr->serial_number; + + raid_init_component_label( raidPtr, &c_label ); + c_label.row = frow; c_label.column = fcol; - c_label.num_rows = raidPtr->numRow; - c_label.num_columns = raidPtr->numCol; - c_label.clean = RF_RAID_DIRTY; - c_label.status = rf_ds_optimal; - + raidwrite_component_label( raidPtr->raid_cinfo[frow][fcol].ci_dev, raidPtr->raid_cinfo[frow][fcol].ci_vp, &c_label); diff --git a/sys/dev/raidframe/rf_dagfuncs.c b/sys/dev/raidframe/rf_dagfuncs.c index 741687a04d5..d3e35553d0a 100644 --- a/sys/dev/raidframe/rf_dagfuncs.c +++ b/sys/dev/raidframe/rf_dagfuncs.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_dagfuncs.c,v 1.4 2000/01/07 14:50:20 peter Exp $ */ -/* $NetBSD: rf_dagfuncs.c,v 1.5 1999/08/26 02:40:28 oster Exp $ */ +/* $OpenBSD: rf_dagfuncs.c,v 1.5 2000/08/08 16:07:39 peter Exp $ */ +/* $NetBSD: rf_dagfuncs.c,v 1.6 2000/03/30 12:45:40 augustss Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -637,14 +637,14 @@ rf_bxor(src, dest, len, bp) */ int rf_longword_bxor(src, dest, len, bp) - register unsigned long *src; - register unsigned long *dest; + unsigned long *src; + unsigned long *dest; int len; /* longwords */ void *bp; { - register unsigned long *end = src + len; - register unsigned long d0, d1, d2, d3, s0, s1, s2, s3; /* temps */ - register unsigned long *pg_src, *pg_dest; /* per-page source/dest + unsigned long *end = src + len; + unsigned long d0, d1, d2, d3, s0, s1, s2, s3; /* temps */ + unsigned long *pg_src, *pg_dest; /* per-page source/dest * pointers */ int longs_this_time;/* # longwords to xor in the current iteration */ @@ -714,15 +714,15 @@ rf_longword_bxor(src, dest, len, bp) */ int rf_longword_bxor3(dst, a, b, c, len, bp) - register unsigned long *dst; - register unsigned long *a; - register unsigned long *b; - register unsigned long *c; + unsigned long *dst; + unsigned long *a; + unsigned long *b; + unsigned long *c; int len; /* length in longwords */ void *bp; { unsigned long a0, a1, a2, a3, b0, b1, b2, b3; - register unsigned long *pg_a, *pg_b, *pg_c, *pg_dst; /* per-page source/dest + unsigned long *pg_a, *pg_b, *pg_c, *pg_dst; /* per-page source/dest * pointers */ int longs_this_time;/* # longs to xor in the current iteration */ char dst_is_a = 0; @@ -879,10 +879,10 @@ rf_longword_bxor3(dst, a, b, c, len, bp) int rf_bxor3(dst, a, b, c, len, bp) - register unsigned char *dst; - register unsigned char *a; - register unsigned char *b; - register unsigned char *c; + unsigned char *dst; + unsigned char *a; + unsigned char *b; + unsigned char *c; unsigned long len; void *bp; { diff --git a/sys/dev/raidframe/rf_dagfuncs.h b/sys/dev/raidframe/rf_dagfuncs.h index 0382fab2455..7114fd5f164 100644 --- a/sys/dev/raidframe/rf_dagfuncs.h +++ b/sys/dev/raidframe/rf_dagfuncs.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_dagfuncs.h,v 1.2 1999/02/16 00:02:32 niklas Exp $ */ -/* $NetBSD: rf_dagfuncs.h,v 1.3 1999/02/05 00:06:08 oster Exp $ */ +/* $OpenBSD: rf_dagfuncs.h,v 1.3 2000/08/08 16:07:39 peter Exp $ */ +/* $NetBSD: rf_dagfuncs.h,v 1.4 2000/03/30 13:39:07 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -61,11 +61,10 @@ rf_XorIntoBuffer(RF_Raid_t * raidPtr, RF_PhysDiskAddr_t * pda, char *srcbuf, char *targbuf, void *bp); int rf_bxor(char *src, char *dest, int len, void *bp); int -rf_longword_bxor(register unsigned long *src, register unsigned long *dest, - int len, void *bp); +rf_longword_bxor(unsigned long *src, unsigned long *dest, int len, void *bp); int -rf_longword_bxor3(register unsigned long *dest, register unsigned long *a, - register unsigned long *b, register unsigned long *c, int len, void *bp); +rf_longword_bxor3(unsigned long *dest, unsigned long *a, unsigned long *b, + unsigned long *c, int len, void *bp); int rf_bxor3(unsigned char *dst, unsigned char *a, unsigned char *b, unsigned char *c, unsigned long len, void *bp); diff --git a/sys/dev/raidframe/rf_decluster.c b/sys/dev/raidframe/rf_decluster.c index 43a5f3a5c34..3cae0d27f59 100644 --- a/sys/dev/raidframe/rf_decluster.c +++ b/sys/dev/raidframe/rf_decluster.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_decluster.c,v 1.3 2000/01/07 14:50:21 peter Exp $ */ -/* $NetBSD: rf_decluster.c,v 1.4 1999/08/13 03:41:56 oster Exp $ */ +/* $OpenBSD: rf_decluster.c,v 1.4 2000/08/08 16:07:40 peter Exp $ */ +/* $NetBSD: rf_decluster.c,v 1.5 2000/03/07 01:54:29 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -556,11 +556,6 @@ rf_decluster_adjust_params( RF_StripeNum_t * base_suid) { RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo; -#if (defined(__NetBSD__) || defined (__OpenBSD__)) && defined(_KERNEL) - /* Nothing! */ -#else - char pc = layoutPtr->map->parityConfig; -#endif if (*SUID >= info->FullTableLimitSUID) { /* new full table size is size of last full table on disk */ diff --git a/sys/dev/raidframe/rf_diskqueue.c b/sys/dev/raidframe/rf_diskqueue.c index a4811343f7e..d324f43fadc 100644 --- a/sys/dev/raidframe/rf_diskqueue.c +++ b/sys/dev/raidframe/rf_diskqueue.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_diskqueue.c,v 1.5 2000/01/11 18:02:21 peter Exp $ */ -/* $NetBSD: rf_diskqueue.c,v 1.8 2000/01/07 03:43:39 oster Exp $ */ +/* $OpenBSD: rf_diskqueue.c,v 1.6 2000/08/08 16:07:40 peter Exp $ */ +/* $NetBSD: rf_diskqueue.c,v 1.13 2000/03/04 04:22:34 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -27,7 +27,7 @@ * rights to redistribute these changes. */ -/**************************************************************************************** +/**************************************************************************** * * rf_diskqueue.c -- higher-level disk queue code * @@ -35,32 +35,36 @@ * routines. The code here implements thread scheduling, synchronization, * and locking ops (see below) on top of the lower-level queueing code. * - * to support atomic RMW, we implement "locking operations". When a locking op - * is dispatched to the lower levels of the driver, the queue is locked, and no further - * I/Os are dispatched until the queue receives & completes a corresponding "unlocking - * operation". This code relies on the higher layers to guarantee that a locking - * op will always be eventually followed by an unlocking op. The model is that - * the higher layers are structured so locking and unlocking ops occur in pairs, i.e. - * an unlocking op cannot be generated until after a locking op reports completion. - * There is no good way to check to see that an unlocking op "corresponds" to the - * op that currently has the queue locked, so we make no such attempt. Since by - * definition there can be only one locking op outstanding on a disk, this should - * not be a problem. + * to support atomic RMW, we implement "locking operations". When a + * locking op is dispatched to the lower levels of the driver, the + * queue is locked, and no further I/Os are dispatched until the queue + * receives & completes a corresponding "unlocking operation". This + * code relies on the higher layers to guarantee that a locking op + * will always be eventually followed by an unlocking op. The model + * is that the higher layers are structured so locking and unlocking + * ops occur in pairs, i.e. an unlocking op cannot be generated until + * after a locking op reports completion. There is no good way to + * check to see that an unlocking op "corresponds" to the op that + * currently has the queue locked, so we make no such attempt. Since + * by definition there can be only one locking op outstanding on a + * disk, this should not be a problem. * - * In the kernel, we allow multiple I/Os to be concurrently dispatched to the disk - * driver. In order to support locking ops in this environment, when we decide to - * do a locking op, we stop dispatching new I/Os and wait until all dispatched I/Os - * have completed before dispatching the locking op. + * In the kernel, we allow multiple I/Os to be concurrently dispatched + * to the disk driver. In order to support locking ops in this + * environment, when we decide to do a locking op, we stop dispatching + * new I/Os and wait until all dispatched I/Os have completed before + * dispatching the locking op. * - * Unfortunately, the code is different in the 3 different operating states - * (user level, kernel, simulator). In the kernel, I/O is non-blocking, and - * we have no disk threads to dispatch for us. Therefore, we have to dispatch - * new I/Os to the scsi driver at the time of enqueue, and also at the time - * of completion. At user level, I/O is blocking, and so only the disk threads - * may dispatch I/Os. Thus at user level, all we can do at enqueue time is - * enqueue and wake up the disk thread to do the dispatch. + * Unfortunately, the code is different in the 3 different operating + * states (user level, kernel, simulator). In the kernel, I/O is + * non-blocking, and we have no disk threads to dispatch for us. + * Therefore, we have to dispatch new I/Os to the scsi driver at the + * time of enqueue, and also at the time of completion. At user + * level, I/O is blocking, and so only the disk threads may dispatch + * I/Os. Thus at user level, all we can do at enqueue time is enqueue + * and wake up the disk thread to do the dispatch. * - ***************************************************************************************/ + ****************************************************************************/ #include "rf_types.h" #include "rf_threadstuff.h" @@ -77,31 +81,23 @@ #include "rf_cvscan.h" #include "rf_sstf.h" #include "rf_fifo.h" +#include "rf_kintf.h" static int init_dqd(RF_DiskQueueData_t *); static void clean_dqd(RF_DiskQueueData_t *); static void rf_ShutdownDiskQueueSystem(void *); -/* From rf_kintf.c */ -int rf_DispatchKernelIO(RF_DiskQueue_t *, RF_DiskQueueData_t *); - #define Dprintf1(s,a) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL) #define Dprintf2(s,a,b) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL) #define Dprintf3(s,a,b,c) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL) -#define Dprintf4(s,a,b,c,d) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),NULL,NULL,NULL,NULL) -#define Dprintf5(s,a,b,c,d,e) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),NULL,NULL,NULL) - - -#define SIGNAL_DISK_QUEUE(_q_,_wh_) -#define WAIT_DISK_QUEUE(_q_,_wh_) -/***************************************************************************************** +/***************************************************************************** * - * the disk queue switch defines all the functions used in the different queueing - * disciplines - * queue ID, init routine, enqueue routine, dequeue routine + * the disk queue switch defines all the functions used in the + * different queueing disciplines queue ID, init routine, enqueue + * routine, dequeue routine * - ****************************************************************************************/ + ****************************************************************************/ static RF_DiskQueueSW_t diskqueuesw[] = { {"fifo", /* FIFO */ @@ -139,15 +135,6 @@ static RF_DiskQueueSW_t diskqueuesw[] = { rf_CscanPeek, rf_SstfPromote}, -#if !defined(_KERNEL) && RF_INCLUDE_QUEUE_RANDOM > 0 - /* to make a point to Chris :-> */ - {"random", /* random */ - rf_FifoCreate, - rf_FifoEnqueue, - rf_RandomDequeue, - rf_RandomPeek, - rf_FifoPromote}, -#endif /* !_KERNEL && RF_INCLUDE_QUEUE_RANDOM > 0 */ }; #define NUM_DISK_QUEUE_TYPES (sizeof(diskqueuesw)/sizeof(RF_DiskQueueSW_t)) @@ -163,9 +150,9 @@ static int init_dqd(dqd) RF_DiskQueueData_t *dqd; { - /* XXX not sure if the following malloc is appropriate... probably not - * quite... */ - dqd->bp = (struct buf *) malloc(sizeof(struct buf), M_RAIDFRAME, M_NOWAIT); + + dqd->bp = (struct buf *) malloc(sizeof(struct buf), + M_RAIDFRAME, M_NOWAIT); if (dqd->bp == NULL) { return (ENOMEM); } @@ -181,24 +168,20 @@ clean_dqd(dqd) free(dqd->bp, M_RAIDFRAME); } /* configures a single disk queue */ -int config_disk_queue(RF_Raid_t *, RF_DiskQueue_t *, RF_RowCol_t, - RF_RowCol_t, RF_DiskQueueSW_t *, - RF_SectorCount_t, dev_t, int, - RF_ShutdownList_t **, - RF_AllocListElem_t *); + int -config_disk_queue( - RF_Raid_t * raidPtr, - RF_DiskQueue_t * diskqueue, - RF_RowCol_t r, /* row & col -- debug only. BZZT not any +rf_ConfigureDiskQueue( + RF_Raid_t * raidPtr, + RF_DiskQueue_t * diskqueue, + RF_RowCol_t r, /* row & col -- debug only. BZZT not any * more... */ - RF_RowCol_t c, - RF_DiskQueueSW_t * p, - RF_SectorCount_t sectPerDisk, - dev_t dev, - int maxOutstanding, - RF_ShutdownList_t ** listp, - RF_AllocListElem_t * clList) + RF_RowCol_t c, + RF_DiskQueueSW_t * p, + RF_SectorCount_t sectPerDisk, + dev_t dev, + int maxOutstanding, + RF_ShutdownList_t ** listp, + RF_AllocListElem_t * clList) { int rc; @@ -284,6 +267,7 @@ rf_ConfigureDiskQueues( RF_ERRORMSG2("Unknown queue type \"%s\". Using %s\n", cfgPtr->diskQueueType, diskqueuesw[0].queueType); p = &diskqueuesw[0]; } + raidPtr->qType = p; RF_CallocAndAdd(diskQueues, raidPtr->numRow, sizeof(RF_DiskQueue_t *), (RF_DiskQueue_t **), raidPtr->cleanupList); if (diskQueues == NULL) { return (ENOMEM); @@ -297,9 +281,12 @@ rf_ConfigureDiskQueues( if (diskQueues[r] == NULL) return (ENOMEM); for (c = 0; c < raidPtr->numCol; c++) { - rc = config_disk_queue(raidPtr, &diskQueues[r][c], r, c, p, - raidPtr->sectorsPerDisk, raidPtr->Disks[r][c].dev, - cfgPtr->maxOutstandingDiskReqs, listp, raidPtr->cleanupList); + rc = rf_ConfigureDiskQueue(raidPtr, &diskQueues[r][c], + r, c, p, + raidPtr->sectorsPerDisk, + raidPtr->Disks[r][c].dev, + cfgPtr->maxOutstandingDiskReqs, + listp, raidPtr->cleanupList); if (rc) return (rc); } @@ -307,7 +294,7 @@ rf_ConfigureDiskQueues( spareQueues = &raidPtr->Queues[0][raidPtr->numCol]; for (r = 0; r < raidPtr->numSpare; r++) { - rc = config_disk_queue(raidPtr, &spareQueues[r], + rc = rf_ConfigureDiskQueue(raidPtr, &spareQueues[r], 0, raidPtr->numCol + r, p, raidPtr->sectorsPerDisk, raidPtr->Disks[0][raidPtr->numCol + r].dev, diff --git a/sys/dev/raidframe/rf_diskqueue.h b/sys/dev/raidframe/rf_diskqueue.h index 09d90dcefac..fb8e09927bd 100644 --- a/sys/dev/raidframe/rf_diskqueue.h +++ b/sys/dev/raidframe/rf_diskqueue.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_diskqueue.h,v 1.3 2000/01/11 18:02:21 peter Exp $ */ -/* $NetBSD: rf_diskqueue.h,v 1.4 2000/01/08 23:02:16 oster Exp $ */ +/* $OpenBSD: rf_diskqueue.h,v 1.4 2000/08/08 16:07:40 peter Exp $ */ +/* $NetBSD: rf_diskqueue.h,v 1.5 2000/02/13 04:53:57 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -178,21 +178,36 @@ rf_DiskIOPromote(RF_DiskQueue_t * queue, RF_StripeNum_t parityStripeID, RF_ReconUnitNum_t which_ru); RF_DiskQueueData_t * -rf_CreateDiskQueueData(RF_IoType_t typ, - RF_SectorNum_t ssect, RF_SectorCount_t nsect, caddr_t buf, - RF_StripeNum_t parityStripeID, RF_ReconUnitNum_t which_ru, - int (*wakeF) (void *, int), - void *arg, RF_DiskQueueData_t * next, RF_AccTraceEntry_t * tracerec, - void *raidPtr, RF_DiskQueueDataFlags_t flags, void *kb_proc); - - RF_DiskQueueData_t *rf_CreateDiskQueueDataFull(RF_IoType_t typ, - RF_SectorNum_t ssect, RF_SectorCount_t nsect, caddr_t buf, - RF_StripeNum_t parityStripeID, RF_ReconUnitNum_t which_ru, - int (*wakeF) (void *, int), - void *arg, RF_DiskQueueData_t * next, RF_AccTraceEntry_t * tracerec, - int priority, int (*AuxFunc) (void *,...), caddr_t buf2, - void *raidPtr, RF_DiskQueueDataFlags_t flags, void *kb_proc); - - void rf_FreeDiskQueueData(RF_DiskQueueData_t * p); +rf_CreateDiskQueueData(RF_IoType_t typ, RF_SectorNum_t ssect, + RF_SectorCount_t nsect, caddr_t buf, + RF_StripeNum_t parityStripeID, + RF_ReconUnitNum_t which_ru, + int (*wakeF) (void *, int), + void *arg, RF_DiskQueueData_t * next, + RF_AccTraceEntry_t * tracerec, + void *raidPtr, RF_DiskQueueDataFlags_t flags, + void *kb_proc); + +RF_DiskQueueData_t * +rf_CreateDiskQueueDataFull(RF_IoType_t typ, RF_SectorNum_t ssect, + RF_SectorCount_t nsect, caddr_t buf, + RF_StripeNum_t parityStripeID, + RF_ReconUnitNum_t which_ru, + int (*wakeF) (void *, int), + void *arg, RF_DiskQueueData_t * next, + RF_AccTraceEntry_t * tracerec, + int priority, int (*AuxFunc) (void *,...), + caddr_t buf2, void *raidPtr, + RF_DiskQueueDataFlags_t flags, void *kb_proc); + +void +rf_FreeDiskQueueData(RF_DiskQueueData_t * p); + +int +rf_ConfigureDiskQueue(RF_Raid_t *, RF_DiskQueue_t *, RF_RowCol_t, + RF_RowCol_t, RF_DiskQueueSW_t *, + RF_SectorCount_t, dev_t, int, + RF_ShutdownList_t **, + RF_AllocListElem_t *); #endif /* !_RF__RF_DISKQUEUE_H_ */ diff --git a/sys/dev/raidframe/rf_disks.c b/sys/dev/raidframe/rf_disks.c index a598a218919..80aaffd87d8 100644 --- a/sys/dev/raidframe/rf_disks.c +++ b/sys/dev/raidframe/rf_disks.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_disks.c,v 1.5 2000/01/11 18:02:21 peter Exp $ */ -/* $NetBSD: rf_disks.c,v 1.14 2000/01/09 01:29:28 oster Exp $ */ +/* $OpenBSD: rf_disks.c,v 1.6 2000/08/08 16:07:40 peter Exp $ */ +/* $NetBSD: rf_disks.c,v 1.31 2000/06/02 01:17:14 oster Exp $ */ /*- * Copyright (c) 1999 The NetBSD Foundation, Inc. * All rights reserved. @@ -75,6 +75,12 @@ #include "rf_options.h" #include "rf_kintf.h" +#if defined(__NetBSD__) +#include "rf_netbsd.h" +#elif defined(__OpenBSD__) +#include "rf_openbsd.h" +#endif + #include <sys/types.h> #include <sys/param.h> #include <sys/systm.h> @@ -85,9 +91,11 @@ #include <sys/vnode.h> #endif -/* XXX these should be in a header file somewhere */ -void rf_UnconfigureVnodes( RF_Raid_t * ); -int rf_CheckLabels( RF_Raid_t *, RF_Config_t *); +static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *); +static void rf_print_label_status( RF_Raid_t *, int, int, char *, + RF_ComponentLabel_t *); +static int rf_check_label_vitals( RF_Raid_t *, int, int, char *, + RF_ComponentLabel_t *, int, int ); #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f) #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g) @@ -116,54 +124,23 @@ rf_ConfigureDisks( listp, raidPtr, cfgPtr ) RF_RowCol_t r, c; int bs, ret; unsigned i, count, foundone = 0, numFailuresThisRow; - int num_rows_done, num_cols_done; int force; - num_rows_done = 0; - num_cols_done = 0; force = cfgPtr->force; - RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *), - (RF_RaidDisk_t **), raidPtr->cleanupList); - if (disks == NULL) { - ret = ENOMEM; + ret = rf_AllocDiskStructures(raidPtr, cfgPtr); + if (ret) goto fail; - } - raidPtr->Disks = disks; - /* get space for the device-specific stuff... */ - RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow, - sizeof(struct raidcinfo *), (struct raidcinfo **), - raidPtr->cleanupList); - if (raidPtr->raid_cinfo == NULL) { - ret = ENOMEM; - goto fail; - } + disks = raidPtr->Disks; + for (r = 0; r < raidPtr->numRow; r++) { numFailuresThisRow = 0; - /* We allocate RF_MAXSPARE on the first row so that we - have room to do hot-swapping of spares */ - RF_CallocAndAdd(disks[r], raidPtr->numCol - + ((r == 0) ? RF_MAXSPARE : 0), - sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *), - raidPtr->cleanupList); - if (disks[r] == NULL) { - ret = ENOMEM; - goto fail; - } - /* get more space for device specific stuff.. */ - RF_CallocAndAdd(raidPtr->raid_cinfo[r], - raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0), - sizeof(struct raidcinfo), (struct raidcinfo *), - raidPtr->cleanupList); - if (raidPtr->raid_cinfo[r] == NULL) { - ret = ENOMEM; - goto fail; - } for (c = 0; c < raidPtr->numCol; c++) { ret = rf_ConfigureDisk(raidPtr, &cfgPtr->devnames[r][c][0], &disks[r][c], r, c); + if (ret) goto fail; @@ -186,14 +163,12 @@ rf_ConfigureDisks( listp, raidPtr, cfgPtr ) (long int) disks[r][c].numBlocks * disks[r][c].blockSize / 1024 / 1024); } - num_cols_done++; } /* XXX fix for n-fault tolerant */ /* XXX this should probably check to see how many failures we can handle for this configuration! */ if (numFailuresThisRow > 0) raidPtr->status[r] = rf_rs_degraded; - num_rows_done++; } /* all disks must be the same size & have the same block size, bs must * be a power of 2 */ @@ -340,11 +315,289 @@ fail: */ rf_UnconfigureVnodes( raidPtr ); - + return (ret); } +static int +rf_AllocDiskStructures(raidPtr, cfgPtr) + RF_Raid_t *raidPtr; + RF_Config_t *cfgPtr; +{ + RF_RaidDisk_t **disks; + int ret; + int r; + RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *), + (RF_RaidDisk_t **), raidPtr->cleanupList); + if (disks == NULL) { + ret = ENOMEM; + goto fail; + } + raidPtr->Disks = disks; + /* get space for the device-specific stuff... */ + RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow, + sizeof(struct raidcinfo *), (struct raidcinfo **), + raidPtr->cleanupList); + if (raidPtr->raid_cinfo == NULL) { + ret = ENOMEM; + goto fail; + } + + for (r = 0; r < raidPtr->numRow; r++) { + /* We allocate RF_MAXSPARE on the first row so that we + have room to do hot-swapping of spares */ + RF_CallocAndAdd(disks[r], raidPtr->numCol + + ((r == 0) ? RF_MAXSPARE : 0), + sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *), + raidPtr->cleanupList); + if (disks[r] == NULL) { + ret = ENOMEM; + goto fail; + } + /* get more space for device specific stuff.. */ + RF_CallocAndAdd(raidPtr->raid_cinfo[r], + raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0), + sizeof(struct raidcinfo), (struct raidcinfo *), + raidPtr->cleanupList); + if (raidPtr->raid_cinfo[r] == NULL) { + ret = ENOMEM; + goto fail; + } + } + return(0); +fail: + rf_UnconfigureVnodes( raidPtr ); + + return(ret); +} + + +/* configure a single disk during auto-configuration at boot */ +int +rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config) + RF_Raid_t *raidPtr; + RF_Config_t *cfgPtr; + RF_AutoConfig_t *auto_config; +{ + RF_RaidDisk_t **disks; + RF_RaidDisk_t *diskPtr; + RF_RowCol_t r, c; + RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; + int bs, ret; + int numFailuresThisRow; + int force; + RF_AutoConfig_t *ac; + int parity_good; + int mod_counter; + int mod_counter_found; + +#if DEBUG + printf("Starting autoconfiguration of RAID set...\n"); +#endif + force = cfgPtr->force; + + ret = rf_AllocDiskStructures(raidPtr, cfgPtr); + if (ret) + goto fail; + + disks = raidPtr->Disks; + + /* assume the parity will be fine.. */ + parity_good = RF_RAID_CLEAN; + + /* Check for mod_counters that are too low */ + mod_counter_found = 0; + ac = auto_config; + while(ac!=NULL) { + if (mod_counter_found==0) { + mod_counter = ac->clabel->mod_counter; + mod_counter_found = 1; + } else { + if (ac->clabel->mod_counter > mod_counter) { + mod_counter = ac->clabel->mod_counter; + } + } + ac->flag = 0; /* clear the general purpose flag */ + ac = ac->next; + } + + for (r = 0; r < raidPtr->numRow; r++) { + numFailuresThisRow = 0; + for (c = 0; c < raidPtr->numCol; c++) { + diskPtr = &disks[r][c]; + + /* find this row/col in the autoconfig */ +#if DEBUG + printf("Looking for %d,%d in autoconfig\n",r,c); +#endif + ac = auto_config; + while(ac!=NULL) { + if (ac->clabel==NULL) { + /* big-time bad news. */ + goto fail; + } + if ((ac->clabel->row == r) && + (ac->clabel->column == c) && + (ac->clabel->mod_counter == mod_counter)) { + /* it's this one... */ + /* flag it as 'used', so we don't + free it later. */ + ac->flag = 1; +#if DEBUG + printf("Found: %s at %d,%d\n", + ac->devname,r,c); +#endif + + break; + } + ac=ac->next; + } + + if (ac==NULL) { + /* we didn't find an exact match with a + correct mod_counter above... can we + find one with an incorrect mod_counter + to use instead? (this one, if we find + it, will be marked as failed once the + set configures) + */ + + ac = auto_config; + while(ac!=NULL) { + if (ac->clabel==NULL) { + /* big-time bad news. */ + goto fail; + } + if ((ac->clabel->row == r) && + (ac->clabel->column == c)) { + /* it's this one... + flag it as 'used', so we + don't free it later. */ + ac->flag = 1; +#if DEBUG + printf("Found(low mod_counter): %s at %d,%d\n", + ac->devname,r,c); +#endif + + break; + } + ac=ac->next; + } + } + + + + if (ac!=NULL) { + /* Found it. Configure it.. */ + diskPtr->blockSize = ac->clabel->blockSize; + diskPtr->numBlocks = ac->clabel->numBlocks; + /* Note: rf_protectedSectors is already + factored into numBlocks here */ + raidPtr->raid_cinfo[r][c].ci_vp = ac->vp; + raidPtr->raid_cinfo[r][c].ci_dev = ac->dev; + + memcpy(&raidPtr->raid_cinfo[r][c].ci_label, + ac->clabel, sizeof(*ac->clabel)); + sprintf(diskPtr->devname, "/dev/%s", + ac->devname); + + /* note the fact that this component was + autoconfigured. You'll need this info + later. Trust me :) */ + diskPtr->auto_configured = 1; + diskPtr->dev = ac->dev; + + /* + * we allow the user to specify that + * only a fraction of the disks should + * be used this is just for debug: it + * speeds up the parity scan + */ + + diskPtr->numBlocks = diskPtr->numBlocks * + rf_sizePercentage / 100; + + /* XXX these will get set multiple times, + but since we're autoconfiguring, they'd + better be always the same each time! + If not, this is the least of your worries */ + + bs = diskPtr->blockSize; + min_numblks = diskPtr->numBlocks; + + /* this gets done multiple times, but that's + fine -- the serial number will be the same + for all components, guaranteed */ + raidPtr->serial_number = + ac->clabel->serial_number; + /* check the last time the label + was modified */ + if (ac->clabel->mod_counter != + mod_counter) { + /* Even though we've filled in all + of the above, we don't trust + this component since it's + modification counter is not + in sync with the rest, and we really + consider it to be failed. */ + disks[r][c].status = rf_ds_failed; + numFailuresThisRow++; + } else { + if (ac->clabel->clean != + RF_RAID_CLEAN) { + parity_good = RF_RAID_DIRTY; + } + } + } else { + /* Didn't find it at all!! + Component must really be dead */ + disks[r][c].status = rf_ds_failed; + sprintf(disks[r][c].devname,"component%d", + r * raidPtr->numCol + c); + numFailuresThisRow++; + } + } + /* XXX fix for n-fault tolerant */ + /* XXX this should probably check to see how many failures + we can handle for this configuration! */ + if (numFailuresThisRow > 0) + raidPtr->status[r] = rf_rs_degraded; + } + + /* close the device for the ones that didn't get used */ + + ac = auto_config; + while(ac!=NULL) { + if (ac->flag == 0) { + VOP_CLOSE(ac->vp, FREAD, NOCRED, 0); + vput(ac->vp); + ac->vp = NULL; +#if DEBUG + printf("Released %s from auto-config set.\n", + ac->devname); +#endif + } + ac = ac->next; + } + + raidPtr->mod_counter = mod_counter; + + /* note the state of the parity, if any */ + raidPtr->parity_good = parity_good; + raidPtr->sectorsPerDisk = min_numblks; + raidPtr->logBytesPerSector = ffs(bs) - 1; + raidPtr->bytesPerSector = bs; + raidPtr->sectorMask = bs - 1; + return (0); + +fail: + + rf_UnconfigureVnodes( raidPtr ); + + return (ret); + +} /* configure a single disk in the array */ int @@ -407,10 +660,13 @@ rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col) diskPtr->blockSize = dpart.disklab->d_secsize; diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors; - + diskPtr->partitionSize = dpart.part->p_size; + raidPtr->raid_cinfo[row][col].ci_vp = vp; raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev; + /* This component was not automatically configured */ + diskPtr->auto_configured = 0; diskPtr->dev = va.va_rdev; /* we allow the user to specify that only a fraction of the @@ -422,9 +678,6 @@ rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col) return (0); } -static void rf_print_label_status( RF_Raid_t *, int, int, char *, - RF_ComponentLabel_t *); - static void rf_print_label_status( raidPtr, row, column, dev_name, ci_label ) RF_Raid_t *raidPtr; @@ -446,8 +699,6 @@ rf_print_label_status( raidPtr, row, column, dev_name, ci_label ) ci_label->clean ? "Yes" : "No", ci_label->status ); } -static int rf_check_label_vitals( RF_Raid_t *, int, int, char *, - RF_ComponentLabel_t *, int, int ); static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label, serial_number, mod_counter ) RF_Raid_t *raidPtr; @@ -764,13 +1015,6 @@ rf_CheckLabels( raidPtr, cfgPtr ) return(fatal_error); } -int config_disk_queue(RF_Raid_t *, RF_DiskQueue_t *, RF_RowCol_t, - RF_RowCol_t, RF_DiskQueueSW_t *, - RF_SectorCount_t, dev_t, int, - RF_ShutdownList_t **, - RF_AllocListElem_t *); - -int rf_add_hot_spare(RF_Raid_t *, RF_SingleComponent_t *); int rf_add_hot_spare(raidPtr, sparePtr) RF_Raid_t *raidPtr; @@ -782,8 +1026,10 @@ rf_add_hot_spare(raidPtr, sparePtr) unsigned int bs; int spare_number; +#if 0 printf("Just in rf_add_hot_spare: %d\n",raidPtr->numSpare); printf("Num col: %d\n",raidPtr->numCol); +#endif if (raidPtr->numSpare >= RF_MAXSPARE) { RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare); return(EINVAL); @@ -843,12 +1089,13 @@ rf_add_hot_spare(raidPtr, sparePtr) } spareQueues = &raidPtr->Queues[0][raidPtr->numCol]; - ret = config_disk_queue( raidPtr, &spareQueues[spare_number], + ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number], 0, raidPtr->numCol + spare_number, - raidPtr->Queues[0][0].qPtr, /* XXX */ + raidPtr->qType, raidPtr->sectorsPerDisk, - raidPtr->Disks[0][raidPtr->numCol + spare_number].dev, - raidPtr->Queues[0][0].maxOutstanding, /* XXX */ + raidPtr->Disks[0][raidPtr->numCol + + spare_number].dev, + raidPtr->maxOutstanding, &raidPtr->shutdownList, raidPtr->cleanupList); @@ -892,3 +1139,36 @@ rf_remove_hot_spare(raidPtr,sparePtr) return (0); #endif } + +int +rf_delete_component(raidPtr,component) + RF_Raid_t *raidPtr; + RF_SingleComponent_t *component; +{ + RF_RaidDisk_t *disks; + + if ((component->row < 0) || + (component->row >= raidPtr->numRow) || + (component->column < 0) || + (component->column >= raidPtr->numCol)) { + return(EINVAL); + } + + disks = &raidPtr->Disks[component->row][component->column]; + + /* 1. This component must be marked as 'failed' */ + + return(EINVAL); /* Not implemented yet. */ +} + +int +rf_incorporate_hot_spare(raidPtr,component) + RF_Raid_t *raidPtr; + RF_SingleComponent_t *component; +{ + + /* Issues here include how to 'move' this in if there is IO + taking place (e.g. component queues and such) */ + + return(EINVAL); /* Not implemented yet. */ +} diff --git a/sys/dev/raidframe/rf_disks.h b/sys/dev/raidframe/rf_disks.h index bb3e551396d..bb15d365b5e 100644 --- a/sys/dev/raidframe/rf_disks.h +++ b/sys/dev/raidframe/rf_disks.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_disks.h,v 1.3 1999/07/30 14:45:32 peter Exp $ */ -/* $NetBSD: rf_disks.h,v 1.4 1999/02/24 00:00:03 oster Exp $ */ +/* $OpenBSD: rf_disks.h,v 1.4 2000/08/08 16:07:40 peter Exp $ */ +/* $NetBSD: rf_disks.h,v 1.8 2000/03/27 03:25:17 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -39,6 +39,12 @@ #include "rf_archs.h" #include "rf_types.h" +#if defined(__NetBSD__) +#include "rf_netbsd.h" +#elif defined(__OpenBSD__) +#include "rf_openbsd.h" +#endif + /* * A physical disk can be in one of several states: * IF YOU ADD A STATE, CHECK TO SEE IF YOU NEED TO MODIFY RF_DEAD_DISK() BELOW. @@ -68,16 +74,10 @@ struct RF_RaidDisk_s { RF_SectorCount_t numBlocks; /* number of blocks, obtained via READ * CAPACITY */ int blockSize; - /* XXX the following is needed since we seem to need SIMULATE defined - * in order to get user-land stuff to compile, but we *don't* want this - * in the structure for the user-land utilities, as the kernel doesn't - * know about it!! (and it messes up the size of the structure, so - * there is a communication problem between the kernel and the - * userland utils :-( GO */ -#if RF_KEEP_DISKSTATS > 0 - RF_uint64 nreads; - RF_uint64 nwrites; -#endif /* RF_KEEP_DISKSTATS > 0 */ + RF_SectorCount_t partitionSize; /* The *actual* and *full* size of + the partition, from the disklabel */ + int auto_configured;/* 1 if this component was autoconfigured. + 0 otherwise. */ dev_t dev; }; /* @@ -92,14 +92,27 @@ typedef void RF_DiskOp_t; ((_dstat_) == rf_ds_reconstructing) || ((_dstat_) == rf_ds_failed) || \ ((_dstat_) == rf_ds_dist_spared)) -int -rf_ConfigureDisks(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int -rf_ConfigureSpareDisks(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, - RF_Config_t * cfgPtr); -int -rf_ConfigureDisk(RF_Raid_t * raidPtr, char *buf, RF_RaidDisk_t * diskPtr, - RF_RowCol_t row, RF_RowCol_t col); +#ifdef _KERNEL +#if defined(__NetBSD__) +#include "rf_netbsd.h" +#elif defined(__OpenBSD__) +#include "rf_openbsd.h" +#endif + +int rf_ConfigureDisks(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr); +int rf_ConfigureSpareDisks(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr, + RF_Config_t * cfgPtr); +int rf_ConfigureDisk(RF_Raid_t * raidPtr, char *buf, RF_RaidDisk_t * diskPtr, + RF_RowCol_t row, RF_RowCol_t col); +int rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, + RF_AutoConfig_t *auto_config); +int rf_CheckLabels( RF_Raid_t *, RF_Config_t *); +int rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr); +int rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr); +int rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component); +int rf_incorporate_hot_spare(RF_Raid_t *raidPtr, + RF_SingleComponent_t *component); +#endif /* _KERNEL */ #endif /* !_RF__RF_DISKS_H_ */ diff --git a/sys/dev/raidframe/rf_driver.c b/sys/dev/raidframe/rf_driver.c index 95ef423ecf1..40d6191a4c4 100644 --- a/sys/dev/raidframe/rf_driver.c +++ b/sys/dev/raidframe/rf_driver.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_driver.c,v 1.8 2000/01/11 18:02:21 peter Exp $ */ -/* $NetBSD: rf_driver.c,v 1.27 2000/01/09 03:44:33 oster Exp $ */ +/* $OpenBSD: rf_driver.c,v 1.9 2000/08/08 16:07:40 peter Exp $ */ +/* $NetBSD: rf_driver.c,v 1.37 2000/06/04 02:05:13 oster Exp $ */ /*- * Copyright (c) 1999 The NetBSD Foundation, Inc. * All rights reserved. @@ -140,8 +140,6 @@ static void clean_rad(RF_RaidAccessDesc_t *); static void rf_ShutdownRDFreeList(void *); static int rf_ConfigureRDFreeList(RF_ShutdownList_t **); -void rf_UnconfigureVnodes( RF_Raid_t * ); - RF_DECLARE_MUTEX(rf_printf_mutex) /* debug only: avoids interleaved * printfs by different stripes */ @@ -268,9 +266,16 @@ rf_Shutdown(raidPtr) } RF_FREELIST_DO_UNLOCK(rf_rad_freelist); + /* Wait for any parity re-writes to stop... */ + while (raidPtr->parity_rewrite_in_progress) { + printf("Waiting for parity re-write to exit...\n"); + tsleep(&raidPtr->parity_rewrite_in_progress, PRIBIO, + "rfprwshutdown", 0); + } + raidPtr->valid = 0; - rf_update_component_labels(raidPtr); + rf_update_component_labels(raidPtr, RF_FINAL_COMPONENT_UPDATE); rf_UnconfigureVnodes(raidPtr); @@ -281,44 +286,6 @@ rf_Shutdown(raidPtr) return (0); } -void -rf_UnconfigureVnodes( raidPtr ) - RF_Raid_t *raidPtr; -{ - int r,c; - struct proc *p; - - /* We take this opportunity to close the vnodes like we should.. */ - - p = raidPtr->engine_thread; - - for (r = 0; r < raidPtr->numRow; r++) { - for (c = 0; c < raidPtr->numCol; c++) { - printf("Closing vnode for row: %d col: %d\n", r, c); - if (raidPtr->raid_cinfo[r][c].ci_vp) { - VOP_UNLOCK(raidPtr->raid_cinfo[r][c].ci_vp, 0, p); - (void) vn_close(raidPtr->raid_cinfo[r][c].ci_vp, - FREAD | FWRITE, p->p_ucred, p); - raidPtr->raid_cinfo[r][c].ci_vp = NULL; - } else { - printf("vnode was NULL\n"); - } - - } - } - for (r = 0; r < raidPtr->numSpare; r++) { - printf("Closing vnode for spare: %d\n", r); - if (raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp) { - VOP_UNLOCK(raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp, 0, p); - (void) vn_close(raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp, - FREAD | FWRITE, p->p_ucred, p); - raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL; - } else { - printf("vnode was NULL\n"); - } - } -} - #define DO_INIT_CONFIGURE(f) { \ rc = f (&globalShutdown); \ if (rc) { \ @@ -366,14 +333,18 @@ rf_UnconfigureVnodes( raidPtr ) } int -rf_Configure(raidPtr, cfgPtr) +rf_Configure(raidPtr, cfgPtr, ac) RF_Raid_t *raidPtr; RF_Config_t *cfgPtr; + RF_AutoConfig_t *ac; { RF_RowCol_t row, col; int i, rc; - int unit; + /* XXX This check can probably be removed now, since + RAIDFRAME_CONFIGURRE now checks to make sure that the + RAID set is not already valid + */ if (raidPtr->valid) { RF_ERRORMSG("RAIDframe configuration not shut down. Aborting configure.\n"); return (EINVAL); @@ -389,15 +360,17 @@ rf_Configure(raidPtr, cfgPtr) return (rc); } /* initialize globals */ - printf("RAIDFRAME: protectedSectors is %ld\n", rf_protectedSectors); + printf("RAIDFRAME: protectedSectors is %ld\n", + rf_protectedSectors); rf_clear_debug_print_buffer(); DO_INIT_CONFIGURE(rf_ConfigureAllocList); + /* - * Yes, this does make debugging general to the whole system instead - * of being array specific. Bummer, drag. - */ + * Yes, this does make debugging general to the whole + * system instead of being array specific. Bummer, drag. + */ rf_ConfigureDebug(cfgPtr); DO_INIT_CONFIGURE(rf_ConfigureDebugMem); DO_INIT_CONFIGURE(rf_ConfigureAccessTrace); @@ -419,15 +392,6 @@ rf_Configure(raidPtr, cfgPtr) } RF_UNLOCK_MUTEX(configureMutex); - /* - * Null out the entire raid descriptor to avoid problems when we reconfig. - * This also clears the valid bit. - */ - /* XXX this clearing should be moved UP to outside of here.... that, - * or rf_Configure() needs to take more arguments... XXX */ - unit = raidPtr->raidid; - bzero((char *) raidPtr, sizeof(RF_Raid_t)); - raidPtr->raidid = unit; DO_RAID_MUTEX(&raidPtr->mutex); /* set up the cleanup list. Do this after ConfigureDebug so that * value of memDebug will be set */ @@ -488,8 +452,16 @@ rf_Configure(raidPtr, cfgPtr) DO_RAID_COND(&raidPtr->waitForReconCond); DO_RAID_MUTEX(&raidPtr->recon_done_proc_mutex); - DO_RAID_INIT_CONFIGURE(rf_ConfigureDisks); - DO_RAID_INIT_CONFIGURE(rf_ConfigureSpareDisks); + + if (ac!=NULL) { + /* We have an AutoConfig structure.. Don't do the + normal disk configuration... call the auto config + stuff */ + rf_AutoConfigureDisks(raidPtr, cfgPtr, ac); + } else { + DO_RAID_INIT_CONFIGURE(rf_ConfigureDisks); + DO_RAID_INIT_CONFIGURE(rf_ConfigureSpareDisks); + } /* do this after ConfigureDisks & ConfigureSpareDisks to be sure dev * no. is set */ DO_RAID_INIT_CONFIGURE(rf_ConfigureDiskQueues); @@ -507,6 +479,19 @@ rf_Configure(raidPtr, cfgPtr) } } + raidPtr->numNewFailures = 0; + raidPtr->copyback_in_progress = 0; + raidPtr->parity_rewrite_in_progress = 0; + raidPtr->recon_in_progress = 0; + raidPtr->maxOutstanding = cfgPtr->maxOutstandingDiskReqs; + + /* autoconfigure and root_partition will actually get filled in + after the config is done */ + raidPtr->autoconfigure = 0; + raidPtr->root_partition = 0; + raidPtr->last_unit = raidPtr->raidid; + raidPtr->config_order = 0; + if (rf_keepAccTotals) { raidPtr->keep_acc_totals = 1; } @@ -735,6 +720,7 @@ rf_SetReconfiguredMode(raidPtr, row, col) raidPtr->numFailures++; raidPtr->Disks[row][col].status = rf_ds_dist_spared; raidPtr->status[row] = rf_rs_reconfigured; + rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE); /* install spare table only if declustering + distributed sparing * architecture. */ if (raidPtr->Layout.map->flags & RF_BD_DECLUSTERED) @@ -758,6 +744,7 @@ rf_FailDisk( raidPtr->numFailures++; raidPtr->Disks[frow][fcol].status = rf_ds_failed; raidPtr->status[frow] = rf_rs_degraded; + rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE); RF_UNLOCK_MUTEX(raidPtr->mutex); if (initRecon) rf_ReconstructFailedDisk(raidPtr, frow, fcol); @@ -796,12 +783,12 @@ rf_SuspendNewRequestsAndWait(raidPtr) if (raidPtr->waiting_for_quiescence) { raidPtr->access_suspend_release = 0; while (!raidPtr->access_suspend_release) { - printf("Suspending: Waiting for Quiesence\n"); + printf("Suspending: Waiting for Quiescence\n"); WAIT_FOR_QUIESCENCE(raidPtr); raidPtr->waiting_for_quiescence = 0; } } - printf("Quiesence reached..\n"); + printf("Quiescence reached..\n"); RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); return (raidPtr->waiting_for_quiescence); diff --git a/sys/dev/raidframe/rf_driver.h b/sys/dev/raidframe/rf_driver.h index 50eccc9491e..6e29c3c6d6c 100644 --- a/sys/dev/raidframe/rf_driver.h +++ b/sys/dev/raidframe/rf_driver.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_driver.h,v 1.2 1999/02/16 00:02:41 niklas Exp $ */ -/* $NetBSD: rf_driver.h,v 1.3 1999/02/05 00:06:11 oster Exp $ */ +/* $OpenBSD: rf_driver.h,v 1.3 2000/08/08 16:07:41 peter Exp $ */ +/* $NetBSD: rf_driver.h,v 1.4 2000/02/13 04:53:57 oster Exp $ */ /* * rf_driver.h */ @@ -36,33 +36,49 @@ #include "rf_threadstuff.h" #include "rf_types.h" -RF_DECLARE_EXTERN_MUTEX(rf_printf_mutex) - int rf_BootRaidframe(void); - int rf_UnbootRaidframe(void); - int rf_Shutdown(RF_Raid_t * raidPtr); - int rf_Configure(RF_Raid_t * raidPtr, RF_Config_t * cfgPtr); - RF_RaidAccessDesc_t *rf_AllocRaidAccDesc(RF_Raid_t * raidPtr, RF_IoType_t type, - RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks, caddr_t bufPtr, - void *bp, RF_DagHeader_t ** paramDAG, RF_AccessStripeMapHeader_t ** paramASM, - RF_RaidAccessFlags_t flags, void (*cbF) (struct buf *), void *cbA, - RF_AccessState_t * states); - void rf_FreeRaidAccDesc(RF_RaidAccessDesc_t * desc); - int rf_DoAccess(RF_Raid_t * raidPtr, RF_IoType_t type, int async_flag, - RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks, caddr_t bufPtr, - void *bp_in, RF_DagHeader_t ** paramDAG, - RF_AccessStripeMapHeader_t ** paramASM, RF_RaidAccessFlags_t flags, - RF_RaidAccessDesc_t ** paramDesc, void (*cbF) (struct buf *), void *cbA); - int rf_SetReconfiguredMode(RF_Raid_t * raidPtr, RF_RowCol_t row, - RF_RowCol_t col); - int rf_FailDisk(RF_Raid_t * raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol, - int initRecon); - void rf_SignalQuiescenceLock(RF_Raid_t * raidPtr, RF_RaidReconDesc_t * reconDesc); - int rf_SuspendNewRequestsAndWait(RF_Raid_t * raidPtr); - void rf_ResumeNewRequests(RF_Raid_t * raidPtr); - void rf_StartThroughputStats(RF_Raid_t * raidPtr); - void rf_StartUserStats(RF_Raid_t * raidPtr); - void rf_StopUserStats(RF_Raid_t * raidPtr); - void rf_UpdateUserStats(RF_Raid_t * raidPtr, int rt, int numsect); - void rf_PrintUserStats(RF_Raid_t * raidPtr); +#if defined(__NetBSD__) +#include "rf_netbsd.h" +#elif defined(__OpenBSD__) +#include "rf_openbsd.h" +#endif +#if _KERNEL +RF_DECLARE_EXTERN_MUTEX(rf_printf_mutex) +int rf_BootRaidframe(void); +int rf_UnbootRaidframe(void); +int rf_Shutdown(RF_Raid_t * raidPtr); +int rf_Configure(RF_Raid_t * raidPtr, RF_Config_t * cfgPtr, + RF_AutoConfig_t *ac); +RF_RaidAccessDesc_t *rf_AllocRaidAccDesc(RF_Raid_t * raidPtr, RF_IoType_t type, + RF_RaidAddr_t raidAddress, + RF_SectorCount_t numBlocks, + caddr_t bufPtr, + void *bp, RF_DagHeader_t ** paramDAG, + RF_AccessStripeMapHeader_t ** paramASM, + RF_RaidAccessFlags_t flags, + void (*cbF) (struct buf *), + void *cbA, + RF_AccessState_t * states); +void rf_FreeRaidAccDesc(RF_RaidAccessDesc_t * desc); +int rf_DoAccess(RF_Raid_t * raidPtr, RF_IoType_t type, int async_flag, + RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks, + caddr_t bufPtr, void *bp_in, RF_DagHeader_t ** paramDAG, + RF_AccessStripeMapHeader_t ** paramASM, + RF_RaidAccessFlags_t flags, + RF_RaidAccessDesc_t ** paramDesc, + void (*cbF) (struct buf *), void *cbA); +int rf_SetReconfiguredMode(RF_Raid_t * raidPtr, RF_RowCol_t row, + RF_RowCol_t col); +int rf_FailDisk(RF_Raid_t * raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol, + int initRecon); +void rf_SignalQuiescenceLock(RF_Raid_t * raidPtr, + RF_RaidReconDesc_t * reconDesc); +int rf_SuspendNewRequestsAndWait(RF_Raid_t * raidPtr); +void rf_ResumeNewRequests(RF_Raid_t * raidPtr); +void rf_StartThroughputStats(RF_Raid_t * raidPtr); +void rf_StartUserStats(RF_Raid_t * raidPtr); +void rf_StopUserStats(RF_Raid_t * raidPtr); +void rf_UpdateUserStats(RF_Raid_t * raidPtr, int rt, int numsect); +void rf_PrintUserStats(RF_Raid_t * raidPtr); +#endif /* _KERNEL */ #endif /* !_RF__RF_DRIVER_H_ */ diff --git a/sys/dev/raidframe/rf_evenodd_dagfuncs.c b/sys/dev/raidframe/rf_evenodd_dagfuncs.c index 0ea277c2c9c..7617b8241a3 100644 --- a/sys/dev/raidframe/rf_evenodd_dagfuncs.c +++ b/sys/dev/raidframe/rf_evenodd_dagfuncs.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_evenodd_dagfuncs.c,v 1.5 2000/01/11 18:02:22 peter Exp $ */ -/* $NetBSD: rf_evenodd_dagfuncs.c,v 1.4 2000/01/07 03:41:00 oster Exp $ */ +/* $OpenBSD: rf_evenodd_dagfuncs.c,v 1.6 2000/08/08 16:07:41 peter Exp $ */ +/* $NetBSD: rf_evenodd_dagfuncs.c,v 1.6 2000/03/30 12:45:40 augustss Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -314,11 +314,11 @@ rf_e_EncOneSect( #if RF_EO_MATRIX_DIM > 17 int shortsPerEU = bytesPerEU / sizeof(short); short *destShortBuf, *srcShortBuf1, *srcShortBuf2; - register short temp1; + short temp1; #elif RF_EO_MATRIX_DIM == 17 int longsPerEU = bytesPerEU / sizeof(long); long *destLongBuf, *srcLongBuf1, *srcLongBuf2; - register long temp1; + long temp1; #endif #if RF_EO_MATRIX_DIM > 17 @@ -473,7 +473,7 @@ rf_doubleEOdecode( int shortsPerEU = bytesPerEU / sizeof(short); short *rrdbuf_current, *pbuf_current, *ebuf_current; short *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current; - register short *temp; + short *temp; short *P; RF_ASSERT(bytesPerEU % sizeof(short) == 0); @@ -483,7 +483,7 @@ rf_doubleEOdecode( int longsPerEU = bytesPerEU / sizeof(long); long *rrdbuf_current, *pbuf_current, *ebuf_current; long *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current; - register long *temp; + long *temp; long *P; RF_ASSERT(bytesPerEU % sizeof(long) == 0); @@ -713,7 +713,7 @@ rf_EvenOddDoubleRecoveryFunc(node) startSector = fsuoff[0]; endSector = fsuend[0]; - /* find out the the column of failed disk being accessed */ + /* find out the column of failed disk being accessed */ fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress); /* find out the other failed colume not accessed */ diff --git a/sys/dev/raidframe/rf_fifo.c b/sys/dev/raidframe/rf_fifo.c index 411f1b65cc8..4e37d7e36a0 100644 --- a/sys/dev/raidframe/rf_fifo.c +++ b/sys/dev/raidframe/rf_fifo.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_fifo.c,v 1.4 2000/01/11 18:02:22 peter Exp $ */ -/* $NetBSD: rf_fifo.c,v 1.4 2000/01/08 23:45:05 oster Exp $ */ +/* $OpenBSD: rf_fifo.c,v 1.5 2000/08/08 16:07:41 peter Exp $ */ +/* $NetBSD: rf_fifo.c,v 1.5 2000/03/04 03:27:13 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -147,92 +147,6 @@ rf_FifoDequeue(q_in) } return (nd); } -/* This never gets used!! No loss (I hope) if we don't include it... GO */ -#if !defined(__NetBSD__) && !defined(__OpenBSD__) && !defined(_KERNEL) - -static RF_DiskQueueData_t * -n_in_q(headp, tailp, countp, n, deq) - RF_DiskQueueData_t **headp; - RF_DiskQueueData_t **tailp; - int *countp; - int n; - int deq; -{ - RF_DiskQueueData_t *r, *s; - int i; - - for (s = NULL, i = n, r = *headp; r; s = r, r = r->next) { - if (i == 0) - break; - i--; - } - RF_ASSERT(r != NULL); - if (deq == 0) - return (r); - if (s) { - s->next = r->next; - } else { - *headp = r->next; - } - if (*tailp == r) - *tailp = s; - (*countp)--; - return (r); -} -#endif - -#if !defined(_KERNEL) && RF_INCLUDE_QUEUE_RANDOM > 0 -RF_DiskQueueData_t * -rf_RandomPeek(q_in) - void *q_in; -{ - RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in; - RF_DiskQueueData_t *req; - int n; - - if (q->hq_head) { - n = q->rval % q->hq_count; - req = n_in_q(&q->hq_head, &q->hq_tail, &q->hq_count, n, 0); - } else { - RF_ASSERT(q->hq_count == 0); - if (q->lq_head == NULL) { - RF_ASSERT(q->lq_count == 0); - return (NULL); - } - n = q->rval % q->lq_count; - req = n_in_q(&q->lq_head, &q->lq_tail, &q->lq_count, n, 0); - } - RF_ASSERT((q->hq_count + q->lq_count) == req->queue->queueLength); - RF_ASSERT(req != NULL); - return (req); -} - -RF_DiskQueueData_t * -rf_RandomDequeue(q_in) - void *q_in; -{ - RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in; - RF_DiskQueueData_t *req; - int n; - - if (q->hq_head) { - n = q->rval % q->hq_count; - q->rval = (long) RF_STATIC_RANDOM(); - req = n_in_q(&q->hq_head, &q->hq_tail, &q->hq_count, n, 1); - } else { - RF_ASSERT(q->hq_count == 0); - if (q->lq_head == NULL) { - RF_ASSERT(q->lq_count == 0); - return (NULL); - } - n = q->rval % q->lq_count; - q->rval = (long) RF_STATIC_RANDOM(); - req = n_in_q(&q->lq_head, &q->lq_tail, &q->lq_count, n, 1); - } - RF_ASSERT((q->hq_count + q->lq_count) == (req->queue->queueLength - 1)); - return (req); -} -#endif /* !_KERNEL && RF_INCLUDE_QUEUE_RANDOM > 0 */ /* Return ptr to item at head of queue. Used to examine request * info without actually dequeueing the request. diff --git a/sys/dev/raidframe/rf_general.h b/sys/dev/raidframe/rf_general.h index 7e4b9a1ade6..c69d97088a8 100644 --- a/sys/dev/raidframe/rf_general.h +++ b/sys/dev/raidframe/rf_general.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_general.h,v 1.3 2000/01/07 14:50:21 peter Exp $ */ -/* $NetBSD: rf_general.h,v 1.4 1999/12/07 02:40:28 oster Exp $ */ +/* $OpenBSD: rf_general.h,v 1.4 2000/08/08 16:07:41 peter Exp $ */ +/* $NetBSD: rf_general.h,v 1.5 2000/03/03 02:04:48 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -46,9 +46,7 @@ #define RF_ERRORMSG1(s,a) printf((s),(a)) #define RF_ERRORMSG2(s,a,b) printf((s),(a),(b)) #define RF_ERRORMSG3(s,a,b,c) printf((s),(a),(b),(c)) -#define RF_ERRORMSG4(s,a,b,c,d) printf((s),(a),(b),(c),(d)) -#define RF_ERRORMSG5(s,a,b,c,d,e) printf((s),(a),(b),(c),(d),(e)) -#define perror(x) + extern char rf_panicbuf[]; #define RF_PANIC() {sprintf(rf_panicbuf,"raidframe error at line %d file %s",__LINE__,__FILE__); panic(rf_panicbuf);} diff --git a/sys/dev/raidframe/rf_kintf.h b/sys/dev/raidframe/rf_kintf.h index 00f3fd8bbd4..65e5c88c07a 100644 --- a/sys/dev/raidframe/rf_kintf.h +++ b/sys/dev/raidframe/rf_kintf.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_kintf.h,v 1.5 2000/01/11 18:02:22 peter Exp $ */ -/* $NetBSD: rf_kintf.h,v 1.7 2000/01/09 01:29:27 oster Exp $ */ +/* $OpenBSD: rf_kintf.h,v 1.6 2000/08/08 16:07:41 peter Exp $ */ +/* $NetBSD: rf_kintf.h,v 1.14 2000/06/04 02:05:13 oster Exp $ */ /* * rf_kintf.h * @@ -43,9 +43,15 @@ int rf_DispatchKernelIO(RF_DiskQueue_t * queue, RF_DiskQueueData_t * req); int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *); int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *); -void rf_update_component_labels( RF_Raid_t *); -int raidlookup __P((char *, struct proc *, struct vnode **)); + +#define RF_NORMAL_COMPONENT_UPDATE 0 +#define RF_FINAL_COMPONENT_UPDATE 1 +void rf_update_component_labels(RF_Raid_t *, int); +int raidlookup(char *, struct proc *, struct vnode **); int raidmarkclean(dev_t dev, struct vnode *b_vp, int); int raidmarkdirty(dev_t dev, struct vnode *b_vp, int); - +void raid_init_component_label(RF_Raid_t *, RF_ComponentLabel_t *); +void rf_print_component_label(RF_ComponentLabel_t *); +void rf_UnconfigureVnodes( RF_Raid_t * ); +void rf_close_component( RF_Raid_t *, struct vnode *, int); #endif /* _RF__RF_KINTF_H_ */ diff --git a/sys/dev/raidframe/rf_layout.c b/sys/dev/raidframe/rf_layout.c index 3b337687a72..de80c0096b6 100644 --- a/sys/dev/raidframe/rf_layout.c +++ b/sys/dev/raidframe/rf_layout.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_layout.c,v 1.3 1999/07/30 14:45:32 peter Exp $ */ -/* $NetBSD: rf_layout.c,v 1.4 1999/07/19 01:35:19 oster Exp $ */ +/* $OpenBSD: rf_layout.c,v 1.4 2000/08/08 16:07:42 peter Exp $ */ +/* $NetBSD: rf_layout.c,v 1.6 2000/04/17 19:35:12 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. diff --git a/sys/dev/raidframe/rf_layout.h b/sys/dev/raidframe/rf_layout.h index d2b7cbc7c2a..a368fc8663a 100644 --- a/sys/dev/raidframe/rf_layout.h +++ b/sys/dev/raidframe/rf_layout.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_layout.h,v 1.3 1999/08/04 13:10:54 peter Exp $ */ -/* $NetBSD: rf_layout.h,v 1.3 1999/02/05 00:06:12 oster Exp $ */ +/* $OpenBSD: rf_layout.h,v 1.4 2000/08/08 16:07:42 peter Exp $ */ +/* $NetBSD: rf_layout.h,v 1.4 2000/05/23 00:44:38 thorpej Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -49,7 +49,7 @@ typedef struct RF_LayoutSW_s { RF_ParityConfig_t parityConfig; - char *configName; + const char *configName; #ifndef _KERNEL /* layout-specific parsing */ diff --git a/sys/dev/raidframe/rf_map.c b/sys/dev/raidframe/rf_map.c index afe37457e99..db5d6c7fd1c 100644 --- a/sys/dev/raidframe/rf_map.c +++ b/sys/dev/raidframe/rf_map.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_map.c,v 1.3 2000/01/07 14:50:21 peter Exp $ */ -/* $NetBSD: rf_map.c,v 1.4 1999/08/13 03:41:56 oster Exp $ */ +/* $OpenBSD: rf_map.c,v 1.4 2000/08/08 16:07:42 peter Exp $ */ +/* $NetBSD: rf_map.c,v 1.5 2000/06/29 00:22:27 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -269,7 +269,6 @@ rf_MarkFailuresInASMList(raidPtr, asm_h) RF_MAX_FAILED_PDA * sizeof(RF_PhysDiskAddr_t *)); for (pda = asmap->physInfo; pda; pda = pda->next) { if (RF_DEAD_DISK(disks[pda->row][pda->col].status)) { - printf("DEAD DISK BOGUSLY DETECTED!!\n"); asmap->numDataFailed++; asmap->failedPDAs[asmap->numFailedPDAs] = pda; asmap->numFailedPDAs++; diff --git a/sys/dev/raidframe/rf_netbsd.h b/sys/dev/raidframe/rf_netbsd.h index b69f2f78c1e..0f3a18d3811 100644 --- a/sys/dev/raidframe/rf_netbsd.h +++ b/sys/dev/raidframe/rf_netbsd.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_netbsd.h,v 1.4 1999/08/02 12:29:31 peter Exp $ */ -/* $NetBSD: rf_netbsd.h,v 1.6 1999/05/13 21:46:17 ad Exp $ */ +/* $OpenBSD: rf_netbsd.h,v 1.5 2000/08/08 16:07:42 peter Exp $ */ +/* $NetBSD: rf_netbsd.h,v 1.12 2000/05/28 22:53:49 oster Exp $ */ /*- * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. @@ -69,6 +69,33 @@ typedef struct RF_ComponentLabel_s { int num_columns; /* number of columns in this RAID set */ int clean; /* 1 when clean, 0 when dirty */ int status; /* rf_ds_optimal, rf_ds_dist_spared, whatever. */ + /* stuff that will be in version 2 of the label */ + int sectPerSU; /* Sectors per Stripe Unit */ + int SUsPerPU; /* Stripe Units per Parity Units */ + int SUsPerRU; /* Stripe Units per Reconstruction Units */ + int parityConfig; /* '0' == RAID0, '1' == RAID1, etc. */ + int maxOutstanding; /* maxOutstanding disk requests */ + int blockSize; /* size of component block. + (disklabel->d_secsize) */ + int numBlocks; /* number of blocks on this component. May + be smaller than the partition size. */ + int partitionSize; /* number of blocks on this *partition*. + Must exactly match the partition size + from the disklabel. */ + int future_use[33]; /* Future expansion */ + int autoconfigure; /* automatically configure this RAID set. + 0 == no, 1 == yes */ + int root_partition; /* Use this set as / + 0 == no, 1 == yes*/ + int last_unit; /* last unit number (e.g. 0 for /dev/raid0) + of this component. Used for autoconfigure + only. */ + int config_order; /* 0 .. n. The order in which the component + should be auto-configured. E.g. 0 is will + done first, (and would become raid0). + This may be in conflict with last_unit!!?! */ + /* Not currently used. */ + int future_use2[44]; /* More future expansion */ } RF_ComponentLabel_t; typedef struct RF_SingleComponent_s { @@ -79,11 +106,6 @@ typedef struct RF_SingleComponent_s { #ifdef _KERNEL -/* XXX this is *not* the place for these... */ -int rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr); -int rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr); - - struct raidcinfo { struct vnode *ci_vp; /* component device's vnode */ dev_t ci_dev; /* component device's dev_t */ @@ -93,6 +115,26 @@ int rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr); char *ci_path; /* path to component */ size_t ci_pathlen; /* length of component path */ #endif -}; + }; + + +/* XXX probably belongs in a different .h file. */ +typedef struct RF_AutoConfig_s { + char devname[56]; /* the name of this component */ + int flag; /* a general-purpose flag */ + dev_t dev; /* the device for this component */ + struct vnode *vp; /* Mr. Vnode Pointer */ + RF_ComponentLabel_t *clabel; /* the label */ + struct RF_AutoConfig_s *next; /* the next autoconfig structure + in this set. */ +} RF_AutoConfig_t; + +typedef struct RF_ConfigSet_s { + struct RF_AutoConfig_s *ac; /* all of the autoconfig structures for + this config set. */ + int rootable; /* Set to 1 if this set can be root */ + struct RF_ConfigSet_s *next; +} RF_ConfigSet_t; + #endif /* _KERNEL */ #endif /* _RF__RF_NETBSDSTUFF_H_ */ diff --git a/sys/dev/raidframe/rf_netbsdkintf.c b/sys/dev/raidframe/rf_netbsdkintf.c index 2ad2de25273..61f855fdaaa 100644 --- a/sys/dev/raidframe/rf_netbsdkintf.c +++ b/sys/dev/raidframe/rf_netbsdkintf.c @@ -1,4 +1,4 @@ -/* $NetBSD: rf_netbsdkintf.c,v 1.46 2000/01/09 03:39:13 oster Exp $ */ +/* $NetBSD: rf_netbsdkintf.c,v 1.93 2000/07/14 15:26:29 oster Exp $ */ /*- * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. * All rights reserved. @@ -133,8 +133,10 @@ #include <sys/lock.h> #include <sys/buf.h> #include <sys/user.h> +#include <sys/reboot.h> #include "raid.h" +#include "opt_raid_autoconfig.h" #include "rf_raid.h" #include "rf_raidframe.h" #include "rf_copyback.h" @@ -151,6 +153,7 @@ #include "rf_parityscan.h" #include "rf_debugprint.h" #include "rf_threadstuff.h" +#include "rf_configure.h" int rf_kdebug_level = 0; @@ -176,7 +179,7 @@ static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag, RF_SectorCount_t numSect, caddr_t buf, void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector, struct proc * b_proc); -static int raidinit __P((dev_t, RF_Raid_t *, int)); +static void raidinit __P((RF_Raid_t *)); void raidattach __P((int)); int raidsize __P((dev_t)); @@ -204,17 +207,17 @@ struct raidbuf { #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp) /* XXX Not sure if the following should be replacing the raidPtrs above, - or if it should be used in conjunction with that... */ + or if it should be used in conjunction with that... +*/ struct raid_softc { int sc_flags; /* flags */ int sc_cflags; /* configuration flags */ size_t sc_size; /* size of the raid device */ - dev_t sc_dev; /* our device.. */ char sc_xname[20]; /* XXX external name */ struct disk sc_dkdev; /* generic disk device info */ struct pool sc_cbufpool; /* component buffer pool */ - struct buf buf_queue; /* used for the device queue */ + struct buf_queue buf_queue; /* used for the device queue */ }; /* sc_flags */ #define RAIDF_INITED 0x01 /* unit has been initialized */ @@ -224,7 +227,7 @@ struct raid_softc { #define RAIDF_LOCKED 0x80 /* unit is locked */ #define raidunit(x) DISKUNIT(x) -static int numraid = 0; +int numraid = 0; /* * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. @@ -262,6 +265,9 @@ static int raidlock __P((struct raid_softc *)); static void raidunlock __P((struct raid_softc *)); static void rf_markalldirty __P((RF_Raid_t *)); +void rf_mountroot_hook __P((struct device *)); + +struct device *raidrootdev; void rf_ReconThread __P((struct rf_recon_req *)); /* XXX what I want is: */ @@ -269,6 +275,26 @@ void rf_ReconThread __P((struct rf_recon_req *)); void rf_RewriteParityThread __P((RF_Raid_t *raidPtr)); void rf_CopybackThread __P((RF_Raid_t *raidPtr)); void rf_ReconstructInPlaceThread __P((struct rf_recon_req *)); +void rf_buildroothack __P((void *)); + +RF_AutoConfig_t *rf_find_raid_components __P((void)); +RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *)); +static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *)); +static int rf_reasonable_label __P((RF_ComponentLabel_t *)); +void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *, + RF_Raid_t *)); +int rf_set_autoconfig __P((RF_Raid_t *, int)); +int rf_set_rootpartition __P((RF_Raid_t *, int)); +void rf_release_all_vps __P((RF_ConfigSet_t *)); +void rf_cleanup_config_set __P((RF_ConfigSet_t *)); +int rf_have_enough_components __P((RF_ConfigSet_t *)); +int rf_auto_config_set __P((RF_ConfigSet_t *, int *)); + +static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not + allow autoconfig to take place. + Note that this is overridden by having + RAID_AUTOCONFIG as an option in the + kernel config file. */ void raidattach(num) @@ -276,6 +302,8 @@ raidattach(num) { int raidID; int i, rc; + RF_AutoConfig_t *ac_list; /* autoconfig list */ + RF_ConfigSet_t *config_sets; #ifdef DEBUG printf("raidattach: Asked for %d units\n", num); @@ -289,6 +317,8 @@ raidattach(num) } /* This is where all the initialization stuff gets done. */ + numraid = num; + /* Make some space for requested number of units... */ RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **)); @@ -303,7 +333,7 @@ raidattach(num) rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; - for (i = 0; i < numraid; i++) + for (i = 0; i < num; i++) raidPtrs[i] = NULL; rc = rf_BootRaidframe(); if (rc == 0) @@ -316,15 +346,30 @@ raidattach(num) raid_softc = (struct raid_softc *) malloc(num * sizeof(struct raid_softc), - M_RAIDFRAME, M_NOWAIT); + M_RAIDFRAME, M_NOWAIT); if (raid_softc == NULL) { printf("WARNING: no memory for RAIDframe driver\n"); return; } - numraid = num; + bzero(raid_softc, num * sizeof(struct raid_softc)); + raidrootdev = (struct device *)malloc(num * sizeof(struct device), + M_RAIDFRAME, M_NOWAIT); + if (raidrootdev == NULL) { + panic("No memory for RAIDframe driver!!?!?!\n"); + } + for (raidID = 0; raidID < num; raidID++) { + BUFQ_INIT(&raid_softc[raidID].buf_queue); + + raidrootdev[raidID].dv_class = DV_DISK; + raidrootdev[raidID].dv_cfdata = NULL; + raidrootdev[raidID].dv_unit = raidID; + raidrootdev[raidID].dv_parent = NULL; + raidrootdev[raidID].dv_flags = 0; + sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID); + raid_softc[raidID].buf_queue.b_actf = NULL; raid_softc[raidID].buf_queue.b_actb = &raid_softc[raidID].buf_queue.b_actf; @@ -336,6 +381,90 @@ raidattach(num) return; } } + +#if RAID_AUTOCONFIG + raidautoconfig = 1; +#endif + +if (raidautoconfig) { + /* 1. locate all RAID components on the system */ + +#if DEBUG + printf("Searching for raid components...\n"); +#endif + ac_list = rf_find_raid_components(); + + /* 2. sort them into their respective sets */ + + config_sets = rf_create_auto_sets(ac_list); + + /* 3. evaluate each set and configure the valid ones + This gets done in rf_buildroothack() */ + + /* schedule the creation of the thread to do the + "/ on RAID" stuff */ + + kthread_create(rf_buildroothack,config_sets); + +#if 0 + mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]); +#endif +} + +} + +void +rf_buildroothack(arg) + void *arg; +{ + RF_ConfigSet_t *config_sets = arg; + RF_ConfigSet_t *cset; + RF_ConfigSet_t *next_cset; + int retcode; + int raidID; + int rootID; + int num_root; + + num_root = 0; + cset = config_sets; + while(cset != NULL ) { + next_cset = cset->next; + if (rf_have_enough_components(cset) && + cset->ac->clabel->autoconfigure==1) { + retcode = rf_auto_config_set(cset,&raidID); + if (!retcode) { + if (cset->rootable) { + rootID = raidID; + num_root++; + } + } else { + /* The autoconfig didn't work :( */ +#if DEBUG + printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID); +#endif + rf_release_all_vps(cset); + } + } else { + /* we're not autoconfiguring this set... + release the associated resources */ + rf_release_all_vps(cset); + } + /* cleanup */ + rf_cleanup_config_set(cset); + cset = next_cset; + } + if (boothowto & RB_ASKNAME) { + /* We don't auto-config... */ + } else { + /* They didn't ask, and we found something bootable... */ + + if (num_root == 1) { + booted_device = &raidrootdev[rootID]; + } else if (num_root > 1) { + /* we can't guess.. require the user to answer... */ + boothowto |= RB_ASKNAME; + } + } } @@ -504,7 +633,11 @@ raidclose(dev, flags, fmt, p) Device shutdown has taken care of setting the clean bits if RAIDF_INITED is not set mark things as clean... */ - rf_update_component_labels( raidPtrs[unit] ); +#if 0 + printf("Last one on raid%d. Updating status.\n",unit); +#endif + rf_update_component_labels(raidPtrs[unit], + RF_FINAL_COMPONENT_UPDATE); } raidunlock(rs); @@ -514,15 +647,14 @@ raidclose(dev, flags, fmt, p) void raidstrategy(bp) - register struct buf *bp; + struct buf *bp; { - register int s; + int s; unsigned int raidID = raidunit(bp->b_dev); RF_Raid_t *raidPtr; struct raid_softc *rs = &raid_softc[raidID]; struct disklabel *lp; - struct buf *dp; int wlabel; if ((rs->sc_flags & RAIDF_INITED) ==0) { @@ -572,13 +704,8 @@ raidstrategy(bp) bp->b_resid = 0; /* stuff it onto our queue */ + BUFQ_INSERT_TAIL(&rs->buf_queue, bp); - dp = &rs->buf_queue; - bp->b_actf = NULL; - bp->b_actb = dp->b_actb; - *dp->b_actb = bp; - dp->b_actb = &bp->b_actf; - raidstart(raidPtrs[raidID]); splx(s); @@ -642,6 +769,7 @@ raidioctl(dev, cmd, data, flag, p) struct raid_softc *rs; RF_Config_t *k_cfg, *u_cfg; RF_Raid_t *raidPtr; + RF_RaidDisk_t *diskPtr; RF_AccTotals_t *totals; RF_DeviceConfig_t *d_cfg, **ucfgp; u_char *specific_buf; @@ -649,12 +777,13 @@ raidioctl(dev, cmd, data, flag, p) int row; int column; struct rf_recon_req *rrcopy, *rr; - RF_ComponentLabel_t *component_label; + RF_ComponentLabel_t *clabel; RF_ComponentLabel_t ci_label; - RF_ComponentLabel_t **c_label_ptr; + RF_ComponentLabel_t **clabel_ptr; RF_SingleComponent_t *sparePtr,*componentPtr; RF_SingleComponent_t hot_spare; RF_SingleComponent_t component; + RF_ProgressInfo_t progressInfo, **progressInfoPtr; int i, j, d; if (unit >= numraid) @@ -692,6 +821,7 @@ raidioctl(dev, cmd, data, flag, p) case RAIDFRAME_FAIL_DISK: case RAIDFRAME_COPYBACK: case RAIDFRAME_CHECK_RECON_STATUS: + case RAIDFRAME_CHECK_RECON_STATUS_EXT: case RAIDFRAME_GET_COMPONENT_LABEL: case RAIDFRAME_SET_COMPONENT_LABEL: case RAIDFRAME_ADD_HOT_SPARE: @@ -700,7 +830,13 @@ raidioctl(dev, cmd, data, flag, p) case RAIDFRAME_REBUILD_IN_PLACE: case RAIDFRAME_CHECK_PARITY: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: + case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: case RAIDFRAME_CHECK_COPYBACK_STATUS: + case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: + case RAIDFRAME_SET_AUTOCONFIG: + case RAIDFRAME_SET_ROOT: + case RAIDFRAME_DELETE_COMPONENT: + case RAIDFRAME_INCORPORATE_HOT_SPARE: if ((rs->sc_flags & RAIDF_INITED) == 0) return (ENXIO); } @@ -709,6 +845,13 @@ raidioctl(dev, cmd, data, flag, p) /* configure the system */ case RAIDFRAME_CONFIGURE: + + if (raidPtr->valid) { + /* There is a valid RAID set running on this unit! */ + printf("raid%d: Device already configured!\n",unit); + return(EINVAL); + } + /* copy-in the configuration information */ /* data points to a pointer to the configuration structure */ @@ -759,24 +902,24 @@ raidioctl(dev, cmd, data, flag, p) /* configure the system */ + /* + * Clear the entire RAID descriptor, just to make sure + * there is no stale data left in the case of a + * reconfiguration + */ + bzero((char *) raidPtr, sizeof(RF_Raid_t)); raidPtr->raidid = unit; - retcode = rf_Configure(raidPtr, k_cfg); + retcode = rf_Configure(raidPtr, k_cfg, NULL); if (retcode == 0) { /* allow this many simultaneous IO's to this RAID device */ raidPtr->openings = RAIDOUTSTANDING; - - /* XXX should be moved to rf_Configure() */ - - raidPtr->copyback_in_progress = 0; - raidPtr->parity_rewrite_in_progress = 0; - raidPtr->recon_in_progress = 0; - - retcode = raidinit(dev, raidPtr, unit); - rf_markalldirty( raidPtr ); + + raidinit(raidPtr); + rf_markalldirty(raidPtr); } /* free the buffers. No return code here. */ if (k_cfg->layoutSpecificSize) { @@ -820,49 +963,49 @@ raidioctl(dev, cmd, data, flag, p) return (retcode); case RAIDFRAME_GET_COMPONENT_LABEL: - c_label_ptr = (RF_ComponentLabel_t **) data; + clabel_ptr = (RF_ComponentLabel_t **) data; /* need to read the component label for the disk indicated - by row,column in component_label */ + by row,column in clabel */ /* For practice, let's get it directly fromdisk, rather than from the in-core copy */ - RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ), + RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ), (RF_ComponentLabel_t *)); - if (component_label == NULL) + if (clabel == NULL) return (ENOMEM); - bzero((char *) component_label, sizeof(RF_ComponentLabel_t)); + bzero((char *) clabel, sizeof(RF_ComponentLabel_t)); - retcode = copyin( *c_label_ptr, component_label, + retcode = copyin( *clabel_ptr, clabel, sizeof(RF_ComponentLabel_t)); if (retcode) { - RF_Free( component_label, sizeof(RF_ComponentLabel_t)); + RF_Free( clabel, sizeof(RF_ComponentLabel_t)); return(retcode); } - row = component_label->row; - column = component_label->column; + row = clabel->row; + column = clabel->column; if ((row < 0) || (row >= raidPtr->numRow) || - (column < 0) || (column >= raidPtr->numCol)) { - RF_Free( component_label, sizeof(RF_ComponentLabel_t)); + (column < 0) || (column >= raidPtr->numCol + + raidPtr->numSpare)) { + RF_Free( clabel, sizeof(RF_ComponentLabel_t)); return(EINVAL); } - raidread_component_label( - raidPtr->Disks[row][column].dev, - raidPtr->raid_cinfo[row][column].ci_vp, - component_label ); + raidread_component_label(raidPtr->Disks[row][column].dev, + raidPtr->raid_cinfo[row][column].ci_vp, + clabel ); - retcode = copyout((caddr_t) component_label, - (caddr_t) *c_label_ptr, + retcode = copyout((caddr_t) clabel, + (caddr_t) *clabel_ptr, sizeof(RF_ComponentLabel_t)); - RF_Free( component_label, sizeof(RF_ComponentLabel_t)); + RF_Free( clabel, sizeof(RF_ComponentLabel_t)); return (retcode); case RAIDFRAME_SET_COMPONENT_LABEL: - component_label = (RF_ComponentLabel_t *) data; + clabel = (RF_ComponentLabel_t *) data; /* XXX check the label for valid stuff... */ /* Note that some things *should not* get modified -- @@ -871,18 +1014,18 @@ raidioctl(dev, cmd, data, flag, p) */ printf("Got component label:\n"); - printf("Version: %d\n",component_label->version); - printf("Serial Number: %d\n",component_label->serial_number); - printf("Mod counter: %d\n",component_label->mod_counter); - printf("Row: %d\n", component_label->row); - printf("Column: %d\n", component_label->column); - printf("Num Rows: %d\n", component_label->num_rows); - printf("Num Columns: %d\n", component_label->num_columns); - printf("Clean: %d\n", component_label->clean); - printf("Status: %d\n", component_label->status); - - row = component_label->row; - column = component_label->column; + printf("Version: %d\n",clabel->version); + printf("Serial Number: %d\n",clabel->serial_number); + printf("Mod counter: %d\n",clabel->mod_counter); + printf("Row: %d\n", clabel->row); + printf("Column: %d\n", clabel->column); + printf("Num Rows: %d\n", clabel->num_rows); + printf("Num Columns: %d\n", clabel->num_columns); + printf("Clean: %d\n", clabel->clean); + printf("Status: %d\n", clabel->status); + + row = clabel->row; + column = clabel->column; if ((row < 0) || (row >= raidPtr->numRow) || (column < 0) || (column >= raidPtr->numCol)) { @@ -890,16 +1033,19 @@ raidioctl(dev, cmd, data, flag, p) } /* XXX this isn't allowed to do anything for now :-) */ + + /* XXX and before it is, we need to fill in the rest + of the fields!?!?!?! */ #if 0 raidwrite_component_label( raidPtr->Disks[row][column].dev, raidPtr->raid_cinfo[row][column].ci_vp, - component_label ); + clabel ); #endif return (0); case RAIDFRAME_INIT_LABELS: - component_label = (RF_ComponentLabel_t *) data; + clabel = (RF_ComponentLabel_t *) data; /* we only want the serial number from the above. We get all the rest of the information @@ -907,19 +1053,16 @@ raidioctl(dev, cmd, data, flag, p) set. */ - raidPtr->serial_number = component_label->serial_number; - /* current version number */ - ci_label.version = RF_COMPONENT_LABEL_VERSION; - ci_label.serial_number = component_label->serial_number; - ci_label.mod_counter = raidPtr->mod_counter; - ci_label.num_rows = raidPtr->numRow; - ci_label.num_columns = raidPtr->numCol; - ci_label.clean = RF_RAID_DIRTY; /* not clean */ - ci_label.status = rf_ds_optimal; /* "It's good!" */ + raidPtr->serial_number = clabel->serial_number; + + raid_init_component_label(raidPtr, &ci_label); + ci_label.serial_number = clabel->serial_number; for(row=0;row<raidPtr->numRow;row++) { ci_label.row = row; for(column=0;column<raidPtr->numCol;column++) { + diskPtr = &raidPtr->Disks[row][column]; + ci_label.partitionSize = diskPtr->partitionSize; ci_label.column = column; raidwrite_component_label( raidPtr->Disks[row][column].dev, @@ -929,6 +1072,17 @@ raidioctl(dev, cmd, data, flag, p) } return (retcode); + case RAIDFRAME_SET_AUTOCONFIG: + d = rf_set_autoconfig(raidPtr, *(int *) data); + printf("New autoconfig value is: %d\n", d); + *(int *) data = d; + return (retcode); + + case RAIDFRAME_SET_ROOT: + d = rf_set_rootpartition(raidPtr, *(int *) data); + printf("New rootpartition value is: %d\n", d); + *(int *) data = d; + return (retcode); /* initialize all parity */ case RAIDFRAME_REWRITEPARITY: @@ -953,13 +1107,26 @@ raidioctl(dev, cmd, data, flag, p) case RAIDFRAME_ADD_HOT_SPARE: sparePtr = (RF_SingleComponent_t *) data; memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t)); - printf("Adding spare\n"); retcode = rf_add_hot_spare(raidPtr, &hot_spare); return(retcode); case RAIDFRAME_REMOVE_HOT_SPARE: return(retcode); + case RAIDFRAME_DELETE_COMPONENT: + componentPtr = (RF_SingleComponent_t *)data; + memcpy( &component, componentPtr, + sizeof(RF_SingleComponent_t)); + retcode = rf_delete_component(raidPtr, &component); + return(retcode); + + case RAIDFRAME_INCORPORATE_HOT_SPARE: + componentPtr = (RF_SingleComponent_t *)data; + memcpy( &component, componentPtr, + sizeof(RF_SingleComponent_t)); + retcode = rf_incorporate_hot_spare(raidPtr, &component); + return(retcode); + case RAIDFRAME_REBUILD_IN_PLACE: if (raidPtr->Layout.map->faultsTolerated == 0) { @@ -1117,22 +1284,65 @@ raidioctl(dev, cmd, data, flag, p) *(int *) data = raidPtr->reconControl[row]->percentComplete; return (0); + case RAIDFRAME_CHECK_RECON_STATUS_EXT: + progressInfoPtr = (RF_ProgressInfo_t **) data; + row = 0; /* XXX we only consider a single row... */ + if (raidPtr->status[row] != rf_rs_reconstructing) { + progressInfo.remaining = 0; + progressInfo.completed = 100; + progressInfo.total = 100; + } else { + progressInfo.total = + raidPtr->reconControl[row]->numRUsTotal; + progressInfo.completed = + raidPtr->reconControl[row]->numRUsComplete; + progressInfo.remaining = progressInfo.total - + progressInfo.completed; + } + retcode = copyout((caddr_t) &progressInfo, + (caddr_t) *progressInfoPtr, + sizeof(RF_ProgressInfo_t)); + return (retcode); + case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: if (raidPtr->Layout.map->faultsTolerated == 0) { - /* This makes no sense on a RAID 0 */ - return(EINVAL); + /* This makes no sense on a RAID 0, so tell the + user it's done. */ + *(int *) data = 100; + return(0); } if (raidPtr->parity_rewrite_in_progress == 1) { - *(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe; + *(int *) data = 100 * + raidPtr->parity_rewrite_stripes_done / + raidPtr->Layout.numStripe; } else { *(int *) data = 100; } return (0); + case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: + progressInfoPtr = (RF_ProgressInfo_t **) data; + if (raidPtr->parity_rewrite_in_progress == 1) { + progressInfo.total = raidPtr->Layout.numStripe; + progressInfo.completed = + raidPtr->parity_rewrite_stripes_done; + progressInfo.remaining = progressInfo.total - + progressInfo.completed; + } else { + progressInfo.remaining = 0; + progressInfo.completed = 100; + progressInfo.total = 100; + } + retcode = copyout((caddr_t) &progressInfo, + (caddr_t) *progressInfoPtr, + sizeof(RF_ProgressInfo_t)); + return (retcode); + case RAIDFRAME_CHECK_COPYBACK_STATUS: if (raidPtr->Layout.map->faultsTolerated == 0) { /* This makes no sense on a RAID 0 */ - return(EINVAL); + *(int *) data = 100; + return(0); } if (raidPtr->copyback_in_progress == 1) { *(int *) data = 100 * raidPtr->copyback_stripes_done / raidPtr->Layout.numStripe; @@ -1141,6 +1351,23 @@ raidioctl(dev, cmd, data, flag, p) } return (0); + case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: + progressInfoPtr = (RF_ProgressInfo_t **) data; + if (raidPtr->copyback_in_progress == 1) { + progressInfo.total = raidPtr->Layout.numStripe; + progressInfo.completed = + raidPtr->copyback_stripes_done; + progressInfo.remaining = progressInfo.total - + progressInfo.completed; + } else { + progressInfo.remaining = 0; + progressInfo.completed = 100; + progressInfo.total = 100; + } + retcode = copyout((caddr_t) &progressInfo, + (caddr_t) *progressInfoPtr, + sizeof(RF_ProgressInfo_t)); + return (retcode); /* the sparetable daemon calls this to wait for the kernel to * need a spare table. this ioctl does not return until a @@ -1266,16 +1493,14 @@ raidioctl(dev, cmd, data, flag, p) RAIDframe device. */ -static int -raidinit(dev, raidPtr, unit) - dev_t dev; +static void +raidinit(raidPtr) RF_Raid_t *raidPtr; - int unit; { - int retcode; struct raid_softc *rs; + int unit; - retcode = 0; + unit = raidPtr->raidid; rs = &raid_softc[unit]; pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0, @@ -1299,9 +1524,7 @@ raidinit(dev, raidPtr, unit) * protectedSectors, as used in RAIDframe. */ rs->sc_size = raidPtr->totalSectors; - rs->sc_dev = dev; - return (retcode); } /* wake up the daemon & tell it to get us a spare table @@ -1360,32 +1583,30 @@ raidstart(raidPtr) struct raid_softc *rs; int do_async; struct buf *bp; - struct buf *dp; unit = raidPtr->raidid; rs = &raid_softc[unit]; + /* quick check to see if anything has died recently */ + RF_LOCK_MUTEX(raidPtr->mutex); + if (raidPtr->numNewFailures > 0) { + rf_update_component_labels(raidPtr, + RF_NORMAL_COMPONENT_UPDATE); + raidPtr->numNewFailures--; + } + RF_UNLOCK_MUTEX(raidPtr->mutex); + /* Check to see if we're at the limit... */ RF_LOCK_MUTEX(raidPtr->mutex); while (raidPtr->openings > 0) { RF_UNLOCK_MUTEX(raidPtr->mutex); /* get the next item, if any, from the queue */ - dp = &rs->buf_queue; - bp = dp->b_actf; - if (bp == NULL) { + if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) { /* nothing more to do */ return; } - - /* update structures */ - dp = bp->b_actf; - if (dp != NULL) { - dp->b_actb = bp->b_actb; - } else { - rs->buf_queue.b_actb = bp->b_actb; - } - *bp->b_actb = dp; + BUFQ_REMOVE(&rs->buf_queue, bp); /* Ok, for the bp we have here, bp->b_blkno is relative to the * partition.. Need to make it absolute to the underlying @@ -1456,13 +1677,12 @@ raidstart(raidPtr) need to be? */ - retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? - RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, - do_async, raid_addr, num_blocks, - bp->b_un.b_addr, bp, NULL, NULL, - RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL); - - + retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? + RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, + do_async, raid_addr, num_blocks, + bp->b_data, bp, NULL, NULL, + RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL); + RF_LOCK_MUTEX(raidPtr->mutex); } RF_UNLOCK_MUTEX(raidPtr->mutex); @@ -1600,7 +1820,7 @@ KernelWakeupFunc(vbp) struct buf *bp; struct raid_softc *rs; int unit; - register int s; + int s; s = splbio(); db1_printf(("recovering the request queue:\n")); @@ -1649,7 +1869,7 @@ KernelWakeupFunc(vbp) rf_ds_failed; queue->raidPtr->status[queue->row] = rf_rs_degraded; queue->raidPtr->numFailures++; - /* XXX here we should bump the version number for each component, and write that data out */ + queue->raidPtr->numNewFailures++; } else { /* Disk is already dead... */ /* printf("Disk already marked as dead!\n"); */ } @@ -1677,18 +1897,19 @@ KernelWakeupFunc(vbp) * initialize a buf structure for doing an I/O in the kernel. */ static void -InitBP( - struct buf * bp, - struct vnode * b_vp, - unsigned rw_flag, - dev_t dev, - RF_SectorNum_t startSect, - RF_SectorCount_t numSect, - caddr_t buf, - void (*cbFunc) (struct buf *), - void *cbArg, - int logBytesPerSector, - struct proc * b_proc) +InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg, + logBytesPerSector, b_proc) + struct buf *bp; + struct vnode *b_vp; + unsigned rw_flag; + dev_t dev; + RF_SectorNum_t startSect; + RF_SectorCount_t numSect; + caddr_t buf; + void (*cbFunc) (struct buf *); + void *cbArg; + int logBytesPerSector; + struct proc *b_proc; { /* bp->b_flags = B_PHYS | rw_flag; */ bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */ @@ -1696,7 +1917,7 @@ InitBP( bp->b_bufsize = bp->b_bcount; bp->b_error = 0; bp->b_dev = dev; - bp->b_un.b_addr = buf; + bp->b_data = buf; bp->b_blkno = startSect; bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ if (bp->b_bcount == 0) { @@ -1911,11 +2132,11 @@ raidunlock(rs) int raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter) { - RF_ComponentLabel_t component_label; - raidread_component_label(dev, b_vp, &component_label); - component_label.mod_counter = mod_counter; - component_label.clean = RF_RAID_CLEAN; - raidwrite_component_label(dev, b_vp, &component_label); + RF_ComponentLabel_t clabel; + raidread_component_label(dev, b_vp, &clabel); + clabel.mod_counter = mod_counter; + clabel.clean = RF_RAID_CLEAN; + raidwrite_component_label(dev, b_vp, &clabel); return(0); } @@ -1923,20 +2144,20 @@ raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter) int raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter) { - RF_ComponentLabel_t component_label; - raidread_component_label(dev, b_vp, &component_label); - component_label.mod_counter = mod_counter; - component_label.clean = RF_RAID_DIRTY; - raidwrite_component_label(dev, b_vp, &component_label); + RF_ComponentLabel_t clabel; + raidread_component_label(dev, b_vp, &clabel); + clabel.mod_counter = mod_counter; + clabel.clean = RF_RAID_DIRTY; + raidwrite_component_label(dev, b_vp, &clabel); return(0); } /* ARGSUSED */ int -raidread_component_label(dev, b_vp, component_label) +raidread_component_label(dev, b_vp, clabel) dev_t dev; struct vnode *b_vp; - RF_ComponentLabel_t *component_label; + RF_ComponentLabel_t *clabel; { struct buf *bp; int error; @@ -1959,22 +2180,15 @@ raidread_component_label(dev, b_vp, component_label) error = biowait(bp); if (!error) { - memcpy(component_label, bp->b_un.b_addr, + memcpy(clabel, bp->b_data, sizeof(RF_ComponentLabel_t)); #if 0 - printf("raidread_component_label: got component label:\n"); - printf("Version: %d\n",component_label->version); - printf("Serial Number: %d\n",component_label->serial_number); - printf("Mod counter: %d\n",component_label->mod_counter); - printf("Row: %d\n", component_label->row); - printf("Column: %d\n", component_label->column); - printf("Num Rows: %d\n", component_label->num_rows); - printf("Num Columns: %d\n", component_label->num_columns); - printf("Clean: %d\n", component_label->clean); - printf("Status: %d\n", component_label->status); + rf_print_component_label( clabel ); #endif } else { +#if 0 printf("Failed to read RAID component label!\n"); +#endif } bp->b_flags = B_INVAL | B_AGE; @@ -1983,10 +2197,10 @@ raidread_component_label(dev, b_vp, component_label) } /* ARGSUSED */ int -raidwrite_component_label(dev, b_vp, component_label) +raidwrite_component_label(dev, b_vp, clabel) dev_t dev; struct vnode *b_vp; - RF_ComponentLabel_t *component_label; + RF_ComponentLabel_t *clabel; { struct buf *bp; int error; @@ -2001,26 +2215,28 @@ raidwrite_component_label(dev, b_vp, component_label) bp->b_flags = B_BUSY | B_WRITE; bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; - memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE ); + memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE ); - memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t)); + memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t)); (*bdevsw[major(bp->b_dev)].d_strategy)(bp); error = biowait(bp); bp->b_flags = B_INVAL | B_AGE; brelse(bp); if (error) { +#if 1 printf("Failed to write RAID component info!\n"); +#endif } return(error); } void -rf_markalldirty( raidPtr ) +rf_markalldirty(raidPtr) RF_Raid_t *raidPtr; { - RF_ComponentLabel_t c_label; + RF_ComponentLabel_t clabel; int r,c; raidPtr->mod_counter++; @@ -2030,19 +2246,19 @@ rf_markalldirty( raidPtr ) raidread_component_label( raidPtr->Disks[r][c].dev, raidPtr->raid_cinfo[r][c].ci_vp, - &c_label); - if (c_label.status == rf_ds_spared) { + &clabel); + if (clabel.status == rf_ds_spared) { /* XXX do something special... but whatever you do, don't try to access it!! */ } else { #if 0 - c_label.status = + clabel.status = raidPtr->Disks[r][c].status; raidwrite_component_label( raidPtr->Disks[r][c].dev, raidPtr->raid_cinfo[r][c].ci_vp, - &c_label); + &clabel); #endif raidmarkdirty( raidPtr->Disks[r][c].dev, @@ -2088,21 +2304,21 @@ rf_markalldirty( raidPtr ) raidread_component_label( raidPtr->Disks[r][sparecol].dev, raidPtr->raid_cinfo[r][sparecol].ci_vp, - &c_label); + &clabel); /* make sure status is noted */ - c_label.version = RF_COMPONENT_LABEL_VERSION; - c_label.mod_counter = raidPtr->mod_counter; - c_label.serial_number = raidPtr->serial_number; - c_label.row = srow; - c_label.column = scol; - c_label.num_rows = raidPtr->numRow; - c_label.num_columns = raidPtr->numCol; - c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/ - c_label.status = rf_ds_optimal; + clabel.version = RF_COMPONENT_LABEL_VERSION; + clabel.mod_counter = raidPtr->mod_counter; + clabel.serial_number = raidPtr->serial_number; + clabel.row = srow; + clabel.column = scol; + clabel.num_rows = raidPtr->numRow; + clabel.num_columns = raidPtr->numCol; + clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/ + clabel.status = rf_ds_optimal; raidwrite_component_label( raidPtr->Disks[r][sparecol].dev, raidPtr->raid_cinfo[r][sparecol].ci_vp, - &c_label); + &clabel); raidmarkclean( raidPtr->Disks[r][sparecol].dev, raidPtr->raid_cinfo[r][sparecol].ci_vp); } @@ -2113,10 +2329,11 @@ rf_markalldirty( raidPtr ) void -rf_update_component_labels( raidPtr ) +rf_update_component_labels(raidPtr, final) RF_Raid_t *raidPtr; + int final; { - RF_ComponentLabel_t c_label; + RF_ComponentLabel_t clabel; int sparecol; int r,c; int i,j; @@ -2136,43 +2353,26 @@ rf_update_component_labels( raidPtr ) raidread_component_label( raidPtr->Disks[r][c].dev, raidPtr->raid_cinfo[r][c].ci_vp, - &c_label); + &clabel); /* make sure status is noted */ - c_label.status = rf_ds_optimal; + clabel.status = rf_ds_optimal; + /* bump the counter */ + clabel.mod_counter = raidPtr->mod_counter; + raidwrite_component_label( raidPtr->Disks[r][c].dev, raidPtr->raid_cinfo[r][c].ci_vp, - &c_label); - if (raidPtr->parity_good == RF_RAID_CLEAN) { - raidmarkclean( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - raidPtr->mod_counter); + &clabel); + if (final == RF_FINAL_COMPONENT_UPDATE) { + if (raidPtr->parity_good == RF_RAID_CLEAN) { + raidmarkclean( + raidPtr->Disks[r][c].dev, + raidPtr->raid_cinfo[r][c].ci_vp, + raidPtr->mod_counter); + } } } /* else we don't touch it.. */ -#if 0 - else if (raidPtr->Disks[r][c].status != - rf_ds_failed) { - raidread_component_label( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - &c_label); - /* make sure status is noted */ - c_label.status = - raidPtr->Disks[r][c].status; - raidwrite_component_label( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - &c_label); - if (raidPtr->parity_good == RF_RAID_CLEAN) { - raidmarkclean( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - raidPtr->mod_counter); - } - } -#endif } } @@ -2201,34 +2401,96 @@ rf_update_component_labels( raidPtr ) } } + /* XXX shouldn't *really* need this... */ raidread_component_label( raidPtr->Disks[0][sparecol].dev, raidPtr->raid_cinfo[0][sparecol].ci_vp, - &c_label); + &clabel); /* make sure status is noted */ - c_label.version = RF_COMPONENT_LABEL_VERSION; - c_label.mod_counter = raidPtr->mod_counter; - c_label.serial_number = raidPtr->serial_number; - c_label.row = srow; - c_label.column = scol; - c_label.num_rows = raidPtr->numRow; - c_label.num_columns = raidPtr->numCol; - c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/ - c_label.status = rf_ds_optimal; + + raid_init_component_label(raidPtr, &clabel); + + clabel.mod_counter = raidPtr->mod_counter; + clabel.row = srow; + clabel.column = scol; + clabel.status = rf_ds_optimal; + raidwrite_component_label( raidPtr->Disks[0][sparecol].dev, raidPtr->raid_cinfo[0][sparecol].ci_vp, - &c_label); - if (raidPtr->parity_good == RF_RAID_CLEAN) { - raidmarkclean( raidPtr->Disks[0][sparecol].dev, - raidPtr->raid_cinfo[0][sparecol].ci_vp, - raidPtr->mod_counter); + &clabel); + if (final == RF_FINAL_COMPONENT_UPDATE) { + if (raidPtr->parity_good == RF_RAID_CLEAN) { + raidmarkclean( raidPtr->Disks[0][sparecol].dev, + raidPtr->raid_cinfo[0][sparecol].ci_vp, + raidPtr->mod_counter); + } } } } /* printf("Component labels updated\n"); */ } +void +rf_close_component(raidPtr, vp, auto_configured) + RF_Raid_t *raidPtr; + struct vnode *vp; + int auto_configured; +{ + struct proc *p; + + p = raidPtr->engine_thread; + + if (vp != NULL) { + if (auto_configured == 1) { + VOP_CLOSE(vp, FREAD, NOCRED, 0); + vput(vp); + + } else { + VOP_UNLOCK(vp, 0); + (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); + } + } else { + printf("vnode was NULL\n"); + } +} + + +void +rf_UnconfigureVnodes(raidPtr) + RF_Raid_t *raidPtr; +{ + int r,c; + struct proc *p; + struct vnode *vp; + int acd; + + + /* We take this opportunity to close the vnodes like we should.. */ + + p = raidPtr->engine_thread; + + for (r = 0; r < raidPtr->numRow; r++) { + for (c = 0; c < raidPtr->numCol; c++) { + printf("Closing vnode for row: %d col: %d\n", r, c); + vp = raidPtr->raid_cinfo[r][c].ci_vp; + acd = raidPtr->Disks[r][c].auto_configured; + rf_close_component(raidPtr, vp, acd); + raidPtr->raid_cinfo[r][c].ci_vp = NULL; + raidPtr->Disks[r][c].auto_configured = 0; + } + } + for (r = 0; r < raidPtr->numSpare; r++) { + printf("Closing vnode for spare: %d\n", r); + vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp; + acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured; + rf_close_component(raidPtr, vp, acd); + raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL; + raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0; + } +} + + void rf_ReconThread(req) struct rf_recon_req *req; @@ -2274,6 +2536,11 @@ rf_RewriteParityThread(raidPtr) } raidPtr->parity_rewrite_in_progress = 0; + /* Anyone waiting for us to stop? If so, inform them... */ + if (raidPtr->waitShutdown) { + wakeup(&raidPtr->parity_rewrite_in_progress); + } + /* That's all... */ kthread_exit(0); /* does not return */ } @@ -2315,3 +2582,700 @@ rf_ReconstructInPlaceThread(req) /* That's all... */ kthread_exit(0); /* does not return */ } + +void +rf_mountroot_hook(dev) + struct device *dev; +{ + +} + + +RF_AutoConfig_t * +rf_find_raid_components() +{ + struct devnametobdevmaj *dtobdm; + struct vnode *vp; + struct disklabel label; + struct device *dv; + char *cd_name; + dev_t dev; + int error; + int i; + int good_one; + RF_ComponentLabel_t *clabel; + RF_AutoConfig_t *ac_list; + RF_AutoConfig_t *ac; + + + /* initialize the AutoConfig list */ + ac_list = NULL; + +if (raidautoconfig) { + + /* we begin by trolling through *all* the devices on the system */ + + for (dv = alldevs.tqh_first; dv != NULL; + dv = dv->dv_list.tqe_next) { + + /* we are only interested in disks... */ + if (dv->dv_class != DV_DISK) + continue; + + /* we don't care about floppies... */ + if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) { + continue; + } + + /* need to find the device_name_to_block_device_major stuff */ + cd_name = dv->dv_cfdata->cf_driver->cd_name; + dtobdm = dev_name2blk; + while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) { + dtobdm++; + } + + /* get a vnode for the raw partition of this disk */ + + dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART); + if (bdevvp(dev, &vp)) + panic("RAID can't alloc vnode"); + + error = VOP_OPEN(vp, FREAD, NOCRED, 0); + + if (error) { + /* "Who cares." Continue looking + for something that exists*/ + vput(vp); + continue; + } + + /* Ok, the disk exists. Go get the disklabel. */ + error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label, + FREAD, NOCRED, 0); + if (error) { + /* + * XXX can't happen - open() would + * have errored out (or faked up one) + */ + printf("can't get label for dev %s%c (%d)!?!?\n", + dv->dv_xname, 'a' + RAW_PART, error); + } + + /* don't need this any more. We'll allocate it again + a little later if we really do... */ + VOP_CLOSE(vp, FREAD, NOCRED, 0); + vput(vp); + + for (i=0; i < label.d_npartitions; i++) { + /* We only support partitions marked as RAID */ + if (label.d_partitions[i].p_fstype != FS_RAID) + continue; + + dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i); + if (bdevvp(dev, &vp)) + panic("RAID can't alloc vnode"); + + error = VOP_OPEN(vp, FREAD, NOCRED, 0); + if (error) { + /* Whatever... */ + vput(vp); + continue; + } + + good_one = 0; + + clabel = (RF_ComponentLabel_t *) + malloc(sizeof(RF_ComponentLabel_t), + M_RAIDFRAME, M_NOWAIT); + if (clabel == NULL) { + /* XXX CLEANUP HERE */ + printf("RAID auto config: out of memory!\n"); + return(NULL); /* XXX probably should panic? */ + } + + if (!raidread_component_label(dev, vp, clabel)) { + /* Got the label. Does it look reasonable? */ + if (rf_reasonable_label(clabel) && + (clabel->partitionSize <= + label.d_partitions[i].p_size)) { +#if DEBUG + printf("Component on: %s%c: %d\n", + dv->dv_xname, 'a'+i, + label.d_partitions[i].p_size); + rf_print_component_label(clabel); +#endif + /* if it's reasonable, add it, + else ignore it. */ + ac = (RF_AutoConfig_t *) + malloc(sizeof(RF_AutoConfig_t), + M_RAIDFRAME, + M_NOWAIT); + if (ac == NULL) { + /* XXX should panic?? */ + return(NULL); + } + + sprintf(ac->devname, "%s%c", + dv->dv_xname, 'a'+i); + ac->dev = dev; + ac->vp = vp; + ac->clabel = clabel; + ac->next = ac_list; + ac_list = ac; + good_one = 1; + } + } + if (!good_one) { + /* cleanup */ + free(clabel, M_RAIDFRAME); + VOP_CLOSE(vp, FREAD, NOCRED, 0); + vput(vp); + } + } + } +} +return(ac_list); +} + +static int +rf_reasonable_label(clabel) + RF_ComponentLabel_t *clabel; +{ + + if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || + (clabel->version==RF_COMPONENT_LABEL_VERSION)) && + ((clabel->clean == RF_RAID_CLEAN) || + (clabel->clean == RF_RAID_DIRTY)) && + clabel->row >=0 && + clabel->column >= 0 && + clabel->num_rows > 0 && + clabel->num_columns > 0 && + clabel->row < clabel->num_rows && + clabel->column < clabel->num_columns && + clabel->blockSize > 0 && + clabel->numBlocks > 0) { + /* label looks reasonable enough... */ + return(1); + } + return(0); +} + + +void +rf_print_component_label(clabel) + RF_ComponentLabel_t *clabel; +{ + printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", + clabel->row, clabel->column, + clabel->num_rows, clabel->num_columns); + printf(" Version: %d Serial Number: %d Mod Counter: %d\n", + clabel->version, clabel->serial_number, + clabel->mod_counter); + printf(" Clean: %s Status: %d\n", + clabel->clean ? "Yes" : "No", clabel->status ); + printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", + clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); + printf(" RAID Level: %c blocksize: %d numBlocks: %d\n", + (char) clabel->parityConfig, clabel->blockSize, + clabel->numBlocks); + printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" ); + printf(" Contains root partition: %s\n", + clabel->root_partition ? "Yes" : "No" ); + printf(" Last configured as: raid%d\n", clabel->last_unit ); +#if 0 + printf(" Config order: %d\n", clabel->config_order); +#endif + +} + +RF_ConfigSet_t * +rf_create_auto_sets(ac_list) + RF_AutoConfig_t *ac_list; +{ + RF_AutoConfig_t *ac; + RF_ConfigSet_t *config_sets; + RF_ConfigSet_t *cset; + RF_AutoConfig_t *ac_next; + + + config_sets = NULL; + + /* Go through the AutoConfig list, and figure out which components + belong to what sets. */ + ac = ac_list; + while(ac!=NULL) { + /* we're going to putz with ac->next, so save it here + for use at the end of the loop */ + ac_next = ac->next; + + if (config_sets == NULL) { + /* will need at least this one... */ + config_sets = (RF_ConfigSet_t *) + malloc(sizeof(RF_ConfigSet_t), + M_RAIDFRAME, M_NOWAIT); + if (config_sets == NULL) { + panic("rf_create_auto_sets: No memory!\n"); + } + /* this one is easy :) */ + config_sets->ac = ac; + config_sets->next = NULL; + config_sets->rootable = 0; + ac->next = NULL; + } else { + /* which set does this component fit into? */ + cset = config_sets; + while(cset!=NULL) { + if (rf_does_it_fit(cset, ac)) { + /* looks like it matches... */ + ac->next = cset->ac; + cset->ac = ac; + break; + } + cset = cset->next; + } + if (cset==NULL) { + /* didn't find a match above... new set..*/ + cset = (RF_ConfigSet_t *) + malloc(sizeof(RF_ConfigSet_t), + M_RAIDFRAME, M_NOWAIT); + if (cset == NULL) { + panic("rf_create_auto_sets: No memory!\n"); + } + cset->ac = ac; + ac->next = NULL; + cset->next = config_sets; + cset->rootable = 0; + config_sets = cset; + } + } + ac = ac_next; + } + + + return(config_sets); +} + +static int +rf_does_it_fit(cset, ac) + RF_ConfigSet_t *cset; + RF_AutoConfig_t *ac; +{ + RF_ComponentLabel_t *clabel1, *clabel2; + + /* If this one matches the *first* one in the set, that's good + enough, since the other members of the set would have been + through here too... */ + /* note that we are not checking partitionSize here.. + + Note that we are also not checking the mod_counters here. + If everything else matches execpt the mod_counter, that's + good enough for this test. We will deal with the mod_counters + a little later in the autoconfiguration process. + + (clabel1->mod_counter == clabel2->mod_counter) && + + The reason we don't check for this is that failed disks + will have lower modification counts. If those disks are + not added to the set they used to belong to, then they will + form their own set, which may result in 2 different sets, + for example, competing to be configured at raid0, and + perhaps competing to be the root filesystem set. If the + wrong ones get configured, or both attempt to become /, + weird behaviour and or serious lossage will occur. Thus we + need to bring them into the fold here, and kick them out at + a later point. + + */ + + clabel1 = cset->ac->clabel; + clabel2 = ac->clabel; + if ((clabel1->version == clabel2->version) && + (clabel1->serial_number == clabel2->serial_number) && + (clabel1->num_rows == clabel2->num_rows) && + (clabel1->num_columns == clabel2->num_columns) && + (clabel1->sectPerSU == clabel2->sectPerSU) && + (clabel1->SUsPerPU == clabel2->SUsPerPU) && + (clabel1->SUsPerRU == clabel2->SUsPerRU) && + (clabel1->parityConfig == clabel2->parityConfig) && + (clabel1->maxOutstanding == clabel2->maxOutstanding) && + (clabel1->blockSize == clabel2->blockSize) && + (clabel1->numBlocks == clabel2->numBlocks) && + (clabel1->autoconfigure == clabel2->autoconfigure) && + (clabel1->root_partition == clabel2->root_partition) && + (clabel1->last_unit == clabel2->last_unit) && + (clabel1->config_order == clabel2->config_order)) { + /* if it get's here, it almost *has* to be a match */ + } else { + /* it's not consistent with somebody in the set.. + punt */ + return(0); + } + /* all was fine.. it must fit... */ + return(1); +} + +int +rf_have_enough_components(cset) + RF_ConfigSet_t *cset; +{ + RF_AutoConfig_t *ac; + RF_AutoConfig_t *auto_config; + RF_ComponentLabel_t *clabel; + int r,c; + int num_rows; + int num_cols; + int num_missing; + int mod_counter; + int mod_counter_found; + int even_pair_failed; + char parity_type; + + + /* check to see that we have enough 'live' components + of this set. If so, we can configure it if necessary */ + + num_rows = cset->ac->clabel->num_rows; + num_cols = cset->ac->clabel->num_columns; + parity_type = cset->ac->clabel->parityConfig; + + /* XXX Check for duplicate components!?!?!? */ + + /* Determine what the mod_counter is supposed to be for this set. */ + + mod_counter_found = 0; + ac = cset->ac; + while(ac!=NULL) { + if (mod_counter_found==0) { + mod_counter = ac->clabel->mod_counter; + mod_counter_found = 1; + } else { + if (ac->clabel->mod_counter > mod_counter) { + mod_counter = ac->clabel->mod_counter; + } + } + ac = ac->next; + } + + num_missing = 0; + auto_config = cset->ac; + + for(r=0; r<num_rows; r++) { + even_pair_failed = 0; + for(c=0; c<num_cols; c++) { + ac = auto_config; + while(ac!=NULL) { + if ((ac->clabel->row == r) && + (ac->clabel->column == c) && + (ac->clabel->mod_counter == mod_counter)) { + /* it's this one... */ +#if DEBUG + printf("Found: %s at %d,%d\n", + ac->devname,r,c); +#endif + break; + } + ac=ac->next; + } + if (ac==NULL) { + /* Didn't find one here! */ + /* special case for RAID 1, especially + where there are more than 2 + components (where RAIDframe treats + things a little differently :( ) */ + if (parity_type == '1') { + if (c%2 == 0) { /* even component */ + even_pair_failed = 1; + } else { /* odd component. If + we're failed, and + so is the even + component, it's + "Good Night, Charlie" */ + if (even_pair_failed == 1) { + return(0); + } + } + } else { + /* normal accounting */ + num_missing++; + } + } + if ((parity_type == '1') && (c%2 == 1)) { + /* Just did an even component, and we didn't + bail.. reset the even_pair_failed flag, + and go on to the next component.... */ + even_pair_failed = 0; + } + } + } + + clabel = cset->ac->clabel; + + if (((clabel->parityConfig == '0') && (num_missing > 0)) || + ((clabel->parityConfig == '4') && (num_missing > 1)) || + ((clabel->parityConfig == '5') && (num_missing > 1))) { + /* XXX this needs to be made *much* more general */ + /* Too many failures */ + return(0); + } + /* otherwise, all is well, and we've got enough to take a kick + at autoconfiguring this set */ + return(1); +} + +void +rf_create_configuration(ac,config,raidPtr) + RF_AutoConfig_t *ac; + RF_Config_t *config; + RF_Raid_t *raidPtr; +{ + RF_ComponentLabel_t *clabel; + int i; + + clabel = ac->clabel; + + /* 1. Fill in the common stuff */ + config->numRow = clabel->num_rows; + config->numCol = clabel->num_columns; + config->numSpare = 0; /* XXX should this be set here? */ + config->sectPerSU = clabel->sectPerSU; + config->SUsPerPU = clabel->SUsPerPU; + config->SUsPerRU = clabel->SUsPerRU; + config->parityConfig = clabel->parityConfig; + /* XXX... */ + strcpy(config->diskQueueType,"fifo"); + config->maxOutstandingDiskReqs = clabel->maxOutstanding; + config->layoutSpecificSize = 0; /* XXX ?? */ + + while(ac!=NULL) { + /* row/col values will be in range due to the checks + in reasonable_label() */ + strcpy(config->devnames[ac->clabel->row][ac->clabel->column], + ac->devname); + ac = ac->next; + } + + for(i=0;i<RF_MAXDBGV;i++) { + config->debugVars[i][0] = NULL; + } +} + +int +rf_set_autoconfig(raidPtr, new_value) + RF_Raid_t *raidPtr; + int new_value; +{ + RF_ComponentLabel_t clabel; + struct vnode *vp; + dev_t dev; + int row, column; + + raidPtr->autoconfigure = new_value; + for(row=0; row<raidPtr->numRow; row++) { + for(column=0; column<raidPtr->numCol; column++) { + if (raidPtr->Disks[row][column].status == + rf_ds_optimal) { + dev = raidPtr->Disks[row][column].dev; + vp = raidPtr->raid_cinfo[row][column].ci_vp; + raidread_component_label(dev, vp, &clabel); + clabel.autoconfigure = new_value; + raidwrite_component_label(dev, vp, &clabel); + } + } + } + return(new_value); +} + +int +rf_set_rootpartition(raidPtr, new_value) + RF_Raid_t *raidPtr; + int new_value; +{ + RF_ComponentLabel_t clabel; + struct vnode *vp; + dev_t dev; + int row, column; + + raidPtr->root_partition = new_value; + for(row=0; row<raidPtr->numRow; row++) { + for(column=0; column<raidPtr->numCol; column++) { + if (raidPtr->Disks[row][column].status == + rf_ds_optimal) { + dev = raidPtr->Disks[row][column].dev; + vp = raidPtr->raid_cinfo[row][column].ci_vp; + raidread_component_label(dev, vp, &clabel); + clabel.root_partition = new_value; + raidwrite_component_label(dev, vp, &clabel); + } + } + } + return(new_value); +} + +void +rf_release_all_vps(cset) + RF_ConfigSet_t *cset; +{ + RF_AutoConfig_t *ac; + + ac = cset->ac; + while(ac!=NULL) { + /* Close the vp, and give it back */ + if (ac->vp) { + VOP_CLOSE(ac->vp, FREAD, NOCRED, 0); + vput(ac->vp); + ac->vp = NULL; + } + ac = ac->next; + } +} + + +void +rf_cleanup_config_set(cset) + RF_ConfigSet_t *cset; +{ + RF_AutoConfig_t *ac; + RF_AutoConfig_t *next_ac; + + ac = cset->ac; + while(ac!=NULL) { + next_ac = ac->next; + /* nuke the label */ + free(ac->clabel, M_RAIDFRAME); + /* cleanup the config structure */ + free(ac, M_RAIDFRAME); + /* "next.." */ + ac = next_ac; + } + /* and, finally, nuke the config set */ + free(cset, M_RAIDFRAME); +} + + +void +raid_init_component_label(raidPtr, clabel) + RF_Raid_t *raidPtr; + RF_ComponentLabel_t *clabel; +{ + /* current version number */ + clabel->version = RF_COMPONENT_LABEL_VERSION; + clabel->serial_number = raidPtr->serial_number; + clabel->mod_counter = raidPtr->mod_counter; + clabel->num_rows = raidPtr->numRow; + clabel->num_columns = raidPtr->numCol; + clabel->clean = RF_RAID_DIRTY; /* not clean */ + clabel->status = rf_ds_optimal; /* "It's good!" */ + + clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; + clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; + clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; + + clabel->blockSize = raidPtr->bytesPerSector; + clabel->numBlocks = raidPtr->sectorsPerDisk; + + /* XXX not portable */ + clabel->parityConfig = raidPtr->Layout.map->parityConfig; + clabel->maxOutstanding = raidPtr->maxOutstanding; + clabel->autoconfigure = raidPtr->autoconfigure; + clabel->root_partition = raidPtr->root_partition; + clabel->last_unit = raidPtr->raidid; + clabel->config_order = raidPtr->config_order; +} + +int +rf_auto_config_set(cset,unit) + RF_ConfigSet_t *cset; + int *unit; +{ + RF_Raid_t *raidPtr; + RF_Config_t *config; + int raidID; + int retcode; + + printf("RAID autoconfigure\n"); + + retcode = 0; + *unit = -1; + + /* 1. Create a config structure */ + + config = (RF_Config_t *)malloc(sizeof(RF_Config_t), + M_RAIDFRAME, + M_NOWAIT); + if (config==NULL) { + printf("Out of mem!?!?\n"); + /* XXX do something more intelligent here. */ + return(1); + } + + memset(config, 0, sizeof(RF_Config_t)); + + /* XXX raidID needs to be set correctly.. */ + + /* + 2. Figure out what RAID ID this one is supposed to live at + See if we can get the same RAID dev that it was configured + on last time.. + */ + + raidID = cset->ac->clabel->last_unit; + if ((raidID < 0) || (raidID >= numraid)) { + /* let's not wander off into lala land. */ + raidID = numraid - 1; + } + if (raidPtrs[raidID]->valid != 0) { + + /* + Nope... Go looking for an alternative... + Start high so we don't immediately use raid0 if that's + not taken. + */ + + for(raidID = numraid; raidID >= 0; raidID--) { + if (raidPtrs[raidID]->valid == 0) { + /* can use this one! */ + break; + } + } + } + + if (raidID < 0) { + /* punt... */ + printf("Unable to auto configure this set!\n"); + printf("(Out of RAID devs!)\n"); + return(1); + } + printf("Configuring raid%d:\n",raidID); + raidPtr = raidPtrs[raidID]; + + /* XXX all this stuff should be done SOMEWHERE ELSE! */ + raidPtr->raidid = raidID; + raidPtr->openings = RAIDOUTSTANDING; + + /* 3. Build the configuration structure */ + rf_create_configuration(cset->ac, config, raidPtr); + + /* 4. Do the configuration */ + retcode = rf_Configure(raidPtr, config, cset->ac); + + if (retcode == 0) { + + raidinit(raidPtrs[raidID]); + + rf_markalldirty(raidPtrs[raidID]); + raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */ + if (cset->ac->clabel->root_partition==1) { + /* everything configured just fine. Make a note + that this set is eligible to be root. */ + cset->rootable = 1; + /* XXX do this here? */ + raidPtrs[raidID]->root_partition = 1; + } + } + + /* 5. Cleanup */ + free(config, M_RAIDFRAME); + + *unit = raidID; + return(retcode); +} diff --git a/sys/dev/raidframe/rf_nwayxor.c b/sys/dev/raidframe/rf_nwayxor.c index 465827aeabc..df7604650fb 100644 --- a/sys/dev/raidframe/rf_nwayxor.c +++ b/sys/dev/raidframe/rf_nwayxor.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_nwayxor.c,v 1.2 1999/02/16 00:03:00 niklas Exp $ */ -/* $NetBSD: rf_nwayxor.c,v 1.3 1999/02/05 00:06:13 oster Exp $ */ +/* $OpenBSD: rf_nwayxor.c,v 1.3 2000/08/08 16:07:43 peter Exp $ */ +/* $NetBSD: rf_nwayxor.c,v 1.4 2000/03/30 12:45:41 augustss Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -74,10 +74,10 @@ rf_nWayXor1(src_rbs, dest_rb, len) RF_ReconBuffer_t *dest_rb; int len; { - register unsigned long *src = (unsigned long *) src_rbs[0]->buffer; - register unsigned long *dest = (unsigned long *) dest_rb->buffer; - register unsigned long *end = src + len; - register unsigned long d0, d1, d2, d3, s0, s1, s2, s3; + unsigned long *src = (unsigned long *) src_rbs[0]->buffer; + unsigned long *dest = (unsigned long *) dest_rb->buffer; + unsigned long *end = src + len; + unsigned long d0, d1, d2, d3, s0, s1, s2, s3; callcount[1]++; while (len >= 4) { @@ -108,10 +108,10 @@ rf_nWayXor2(src_rbs, dest_rb, len) RF_ReconBuffer_t *dest_rb; int len; { - register unsigned long *dst = (unsigned long *) dest_rb->buffer; - register unsigned long *a = dst; - register unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - register unsigned long *c = (unsigned long *) src_rbs[1]->buffer; + unsigned long *dst = (unsigned long *) dest_rb->buffer; + unsigned long *a = dst; + unsigned long *b = (unsigned long *) src_rbs[0]->buffer; + unsigned long *c = (unsigned long *) src_rbs[1]->buffer; unsigned long a0, a1, a2, a3, b0, b1, b2, b3; callcount[2]++; @@ -201,10 +201,10 @@ rf_nWayXor3(src_rbs, dest_rb, len) RF_ReconBuffer_t *dest_rb; int len; { - register unsigned long *dst = (unsigned long *) dest_rb->buffer; - register unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - register unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - register unsigned long *d = (unsigned long *) src_rbs[2]->buffer; + unsigned long *dst = (unsigned long *) dest_rb->buffer; + unsigned long *b = (unsigned long *) src_rbs[0]->buffer; + unsigned long *c = (unsigned long *) src_rbs[1]->buffer; + unsigned long *d = (unsigned long *) src_rbs[2]->buffer; unsigned long a0, a1, a2, a3, b0, b1, b2, b3; callcount[3]++; @@ -231,11 +231,11 @@ rf_nWayXor4(src_rbs, dest_rb, len) RF_ReconBuffer_t *dest_rb; int len; { - register unsigned long *dst = (unsigned long *) dest_rb->buffer; - register unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - register unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - register unsigned long *d = (unsigned long *) src_rbs[2]->buffer; - register unsigned long *e = (unsigned long *) src_rbs[3]->buffer; + unsigned long *dst = (unsigned long *) dest_rb->buffer; + unsigned long *b = (unsigned long *) src_rbs[0]->buffer; + unsigned long *c = (unsigned long *) src_rbs[1]->buffer; + unsigned long *d = (unsigned long *) src_rbs[2]->buffer; + unsigned long *e = (unsigned long *) src_rbs[3]->buffer; unsigned long a0, a1, a2, a3, b0, b1, b2, b3; callcount[4]++; @@ -263,12 +263,12 @@ rf_nWayXor5(src_rbs, dest_rb, len) RF_ReconBuffer_t *dest_rb; int len; { - register unsigned long *dst = (unsigned long *) dest_rb->buffer; - register unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - register unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - register unsigned long *d = (unsigned long *) src_rbs[2]->buffer; - register unsigned long *e = (unsigned long *) src_rbs[3]->buffer; - register unsigned long *f = (unsigned long *) src_rbs[4]->buffer; + unsigned long *dst = (unsigned long *) dest_rb->buffer; + unsigned long *b = (unsigned long *) src_rbs[0]->buffer; + unsigned long *c = (unsigned long *) src_rbs[1]->buffer; + unsigned long *d = (unsigned long *) src_rbs[2]->buffer; + unsigned long *e = (unsigned long *) src_rbs[3]->buffer; + unsigned long *f = (unsigned long *) src_rbs[4]->buffer; unsigned long a0, a1, a2, a3, b0, b1, b2, b3; callcount[5]++; @@ -297,13 +297,13 @@ rf_nWayXor6(src_rbs, dest_rb, len) RF_ReconBuffer_t *dest_rb; int len; { - register unsigned long *dst = (unsigned long *) dest_rb->buffer; - register unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - register unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - register unsigned long *d = (unsigned long *) src_rbs[2]->buffer; - register unsigned long *e = (unsigned long *) src_rbs[3]->buffer; - register unsigned long *f = (unsigned long *) src_rbs[4]->buffer; - register unsigned long *g = (unsigned long *) src_rbs[5]->buffer; + unsigned long *dst = (unsigned long *) dest_rb->buffer; + unsigned long *b = (unsigned long *) src_rbs[0]->buffer; + unsigned long *c = (unsigned long *) src_rbs[1]->buffer; + unsigned long *d = (unsigned long *) src_rbs[2]->buffer; + unsigned long *e = (unsigned long *) src_rbs[3]->buffer; + unsigned long *f = (unsigned long *) src_rbs[4]->buffer; + unsigned long *g = (unsigned long *) src_rbs[5]->buffer; unsigned long a0, a1, a2, a3, b0, b1, b2, b3; callcount[6]++; @@ -333,14 +333,14 @@ rf_nWayXor7(src_rbs, dest_rb, len) RF_ReconBuffer_t *dest_rb; int len; { - register unsigned long *dst = (unsigned long *) dest_rb->buffer; - register unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - register unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - register unsigned long *d = (unsigned long *) src_rbs[2]->buffer; - register unsigned long *e = (unsigned long *) src_rbs[3]->buffer; - register unsigned long *f = (unsigned long *) src_rbs[4]->buffer; - register unsigned long *g = (unsigned long *) src_rbs[5]->buffer; - register unsigned long *h = (unsigned long *) src_rbs[6]->buffer; + unsigned long *dst = (unsigned long *) dest_rb->buffer; + unsigned long *b = (unsigned long *) src_rbs[0]->buffer; + unsigned long *c = (unsigned long *) src_rbs[1]->buffer; + unsigned long *d = (unsigned long *) src_rbs[2]->buffer; + unsigned long *e = (unsigned long *) src_rbs[3]->buffer; + unsigned long *f = (unsigned long *) src_rbs[4]->buffer; + unsigned long *g = (unsigned long *) src_rbs[5]->buffer; + unsigned long *h = (unsigned long *) src_rbs[6]->buffer; unsigned long a0, a1, a2, a3, b0, b1, b2, b3; callcount[7]++; @@ -371,15 +371,15 @@ rf_nWayXor8(src_rbs, dest_rb, len) RF_ReconBuffer_t *dest_rb; int len; { - register unsigned long *dst = (unsigned long *) dest_rb->buffer; - register unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - register unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - register unsigned long *d = (unsigned long *) src_rbs[2]->buffer; - register unsigned long *e = (unsigned long *) src_rbs[3]->buffer; - register unsigned long *f = (unsigned long *) src_rbs[4]->buffer; - register unsigned long *g = (unsigned long *) src_rbs[5]->buffer; - register unsigned long *h = (unsigned long *) src_rbs[6]->buffer; - register unsigned long *i = (unsigned long *) src_rbs[7]->buffer; + unsigned long *dst = (unsigned long *) dest_rb->buffer; + unsigned long *b = (unsigned long *) src_rbs[0]->buffer; + unsigned long *c = (unsigned long *) src_rbs[1]->buffer; + unsigned long *d = (unsigned long *) src_rbs[2]->buffer; + unsigned long *e = (unsigned long *) src_rbs[3]->buffer; + unsigned long *f = (unsigned long *) src_rbs[4]->buffer; + unsigned long *g = (unsigned long *) src_rbs[5]->buffer; + unsigned long *h = (unsigned long *) src_rbs[6]->buffer; + unsigned long *i = (unsigned long *) src_rbs[7]->buffer; unsigned long a0, a1, a2, a3, b0, b1, b2, b3; callcount[8]++; @@ -412,16 +412,16 @@ rf_nWayXor9(src_rbs, dest_rb, len) RF_ReconBuffer_t *dest_rb; int len; { - register unsigned long *dst = (unsigned long *) dest_rb->buffer; - register unsigned long *b = (unsigned long *) src_rbs[0]->buffer; - register unsigned long *c = (unsigned long *) src_rbs[1]->buffer; - register unsigned long *d = (unsigned long *) src_rbs[2]->buffer; - register unsigned long *e = (unsigned long *) src_rbs[3]->buffer; - register unsigned long *f = (unsigned long *) src_rbs[4]->buffer; - register unsigned long *g = (unsigned long *) src_rbs[5]->buffer; - register unsigned long *h = (unsigned long *) src_rbs[6]->buffer; - register unsigned long *i = (unsigned long *) src_rbs[7]->buffer; - register unsigned long *j = (unsigned long *) src_rbs[8]->buffer; + unsigned long *dst = (unsigned long *) dest_rb->buffer; + unsigned long *b = (unsigned long *) src_rbs[0]->buffer; + unsigned long *c = (unsigned long *) src_rbs[1]->buffer; + unsigned long *d = (unsigned long *) src_rbs[2]->buffer; + unsigned long *e = (unsigned long *) src_rbs[3]->buffer; + unsigned long *f = (unsigned long *) src_rbs[4]->buffer; + unsigned long *g = (unsigned long *) src_rbs[5]->buffer; + unsigned long *h = (unsigned long *) src_rbs[6]->buffer; + unsigned long *i = (unsigned long *) src_rbs[7]->buffer; + unsigned long *j = (unsigned long *) src_rbs[8]->buffer; unsigned long a0, a1, a2, a3, b0, b1, b2, b3; callcount[9]++; diff --git a/sys/dev/raidframe/rf_openbsd.h b/sys/dev/raidframe/rf_openbsd.h index 59feb987ed3..17333cb9647 100644 --- a/sys/dev/raidframe/rf_openbsd.h +++ b/sys/dev/raidframe/rf_openbsd.h @@ -1,4 +1,4 @@ -/* $OpenBSD: rf_openbsd.h,v 1.3 1999/07/30 14:45:32 peter Exp $ */ +/* $OpenBSD: rf_openbsd.h,v 1.4 2000/08/08 16:07:43 peter Exp $ */ /*- * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. @@ -105,6 +105,33 @@ typedef struct RF_ComponentLabel_s { int num_columns; /* number of columns in this RAID set */ int clean; /* 1 when clean, 0 when dirty */ int status; /* rf_ds_optimal, rf_ds_dist_spared, whatever. */ + /* stuff that will be in version 2 of the label */ + int sectPerSU; /* Sectors per Stripe Unit */ + int SUsPerPU; /* Stripe Units per Parity Units */ + int SUsPerRU; /* Stripe Units per Reconstruction Units */ + int parityConfig; /* '0' == RAID0, '1' == RAID1, etc. */ + int maxOutstanding; /* maxOutstanding disk requests */ + int blockSize; /* size of component block. + (disklabel->d_secsize) */ + int numBlocks; /* number of blocks on this component. May + be smaller than the partition size. */ + int partitionSize; /* number of blocks on this *partition*. + Must exactly match the partition size + from the disklabel. */ + int future_use[33]; /* Future expansion */ + int autoconfigure; /* automatically configure this RAID set. + 0 == no, 1 == yes */ + int root_partition; /* Use this set as / + 0 == no, 1 == yes*/ + int last_unit; /* last unit number (e.g. 0 for /dev/raid0) + of this component. Used for autoconfigure + only. */ + int config_order; /* 0 .. n. The order in which the component + should be auto-configured. E.g. 0 is will + done first, (and would become raid0). + This may be in conflict with last_unit!!?! */ + /* Not currently used. */ + int future_use2[44]; /* More future expansion */ } RF_ComponentLabel_t; typedef struct RF_SingleComponent_s { @@ -115,11 +142,6 @@ typedef struct RF_SingleComponent_s { #ifdef _KERNEL -/* XXX this is *not* the place for these... */ -int rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr); -int rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr); - - struct raidcinfo { struct vnode *ci_vp; /* component device's vnode */ dev_t ci_dev; /* component device's dev_t */ @@ -131,5 +153,24 @@ struct raidcinfo { #endif }; +/* XXX probably belongs in a different .h file. */ +typedef struct RF_AutoConfig_s { + char devname[56]; /* the name of this component */ + int flag; /* a general-purpose flag */ + dev_t dev; /* the device for this component */ + struct vnode *vp; /* Mr. Vnode Pointer */ + RF_ComponentLabel_t *clabel; /* the label */ + struct RF_AutoConfig_s *next; /* the next autoconfig structure + in this set. */ +} RF_AutoConfig_t; + +typedef struct RF_ConfigSet_s { + struct RF_AutoConfig_s *ac; /* all of the autoconfig structures for + this config set. */ + int rootable; /* Set to 1 if this set can be root */ + struct RF_ConfigSet_s *next; +} RF_ConfigSet_t; + + #endif /* _KERNEL */ #endif /* _RF__RF_OPENBSD_H_ */ diff --git a/sys/dev/raidframe/rf_openbsdkintf.c b/sys/dev/raidframe/rf_openbsdkintf.c index 8ad4dc3364c..268eff0a015 100644 --- a/sys/dev/raidframe/rf_openbsdkintf.c +++ b/sys/dev/raidframe/rf_openbsdkintf.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_openbsdkintf.c,v 1.9 2000/01/11 18:02:22 peter Exp $ */ -/* $NetBSD: rf_netbsdkintf.c,v 1.46 2000/01/09 03:39:13 oster Exp $ */ +/* $OpenBSD: rf_openbsdkintf.c,v 1.10 2000/08/08 16:07:43 peter Exp $ */ +/* $NetBSD: rf_netbsdkintf.c,v 1.93 2000/07/14 15:26:29 oster Exp $ */ /*- * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. * All rights reserved. @@ -130,6 +130,7 @@ #include <sys/lock.h> #include <sys/buf.h> #include <sys/user.h> +#include <sys/reboot.h> #include "raid.h" #include "rf_raid.h" @@ -148,6 +149,7 @@ #include "rf_parityscan.h" #include "rf_debugprint.h" #include "rf_threadstuff.h" +#include "rf_configure.h" int rf_kdebug_level = 0; @@ -172,7 +174,7 @@ void rf_KernelWakeupFunc __P((struct buf *)); void rf_InitBP __P((struct buf *, struct vnode *, unsigned, dev_t, RF_SectorNum_t, RF_SectorCount_t, caddr_t, void (*)(struct buf *), void *, int, struct proc *)); -static int raidinit __P((dev_t, RF_Raid_t *, int)); +static void raidinit __P((RF_Raid_t *)); void raidattach __P((int)); int raidsize __P((dev_t)); @@ -199,16 +201,15 @@ struct raidbuf { /* * XXX Not sure if the following should be replacing the raidPtrs above, - * or if it should be used in conjunction with that... - */ + or if it should be used in conjunction with that... +*/ struct raid_softc { int sc_flags; /* flags */ int sc_cflags; /* configuration flags */ size_t sc_size; /* size of the raid device */ - dev_t sc_dev; /* our device..*/ char sc_xname[20]; /* XXX external name */ struct disk sc_dkdev; /* generic disk device info */ - struct buf buf_queue; /* used for the device queue */ + struct buf sc_q; /* used for the device queue */ }; /* sc_flags */ @@ -219,7 +220,7 @@ struct raid_softc { #define RAIDF_LOCKED 0x80 /* unit is locked */ #define raidunit(x) DISKUNIT(x) -static int numraid = 0; +int numraid = 0; /* * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. @@ -257,6 +258,9 @@ int raidlock __P((struct raid_softc *)); void raidunlock __P((struct raid_softc *)); void rf_markalldirty __P((RF_Raid_t *)); +void rf_mountroot_hook __P((struct device *)); + +struct device *raidrootdev; void rf_ReconThread __P((struct rf_recon_req *)); /* XXX what I want is: */ @@ -264,6 +268,30 @@ void rf_ReconThread __P((struct rf_recon_req *)); void rf_RewriteParityThread __P((RF_Raid_t *raidPtr)); void rf_CopybackThread __P((RF_Raid_t *raidPtr)); void rf_ReconstructInPlaceThread __P((struct rf_recon_req *)); +#ifdef RAID_AUTOCONFIG +void rf_buildroothack __P((void *)); +static int rf_reasonable_label __P((RF_ComponentLabel_t *)); +#endif + +RF_AutoConfig_t *rf_find_raid_components __P((void)); +RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *)); +static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *)); +void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *, + RF_Raid_t *)); +int rf_set_autoconfig __P((RF_Raid_t *, int)); +int rf_set_rootpartition __P((RF_Raid_t *, int)); +void rf_release_all_vps __P((RF_ConfigSet_t *)); +void rf_cleanup_config_set __P((RF_ConfigSet_t *)); +int rf_have_enough_components __P((RF_ConfigSet_t *)); +int rf_auto_config_set __P((RF_ConfigSet_t *, int *)); + +#ifdef RAID_AUTOCONFIG +static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not + allow autoconfig to take place. + Note that this is overridden by having + RAID_AUTOCONFIG as an option in the + kernel config file. */ +#endif void raidattach(num) @@ -271,6 +299,10 @@ raidattach(num) { int raidID; int i, rc; +#ifdef RAID_AUTOCONFIG + RF_AutoConfig_t *ac_list; /* autoconfig list */ + RF_ConfigSet_t *config_sets; +#endif db1_printf(("raidattach: Asked for %d units\n", num)); @@ -283,6 +315,8 @@ raidattach(num) /* This is where all the initialization stuff gets done. */ + numraid = num; + /* Make some space for requested number of units... */ RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **)); if (raidPtrs == NULL) { @@ -296,7 +330,7 @@ raidattach(num) rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; - for (i = 0; i < numraid; i++) + for (i = 0; i < num; i++) raidPtrs[i] = NULL; rc = rf_BootRaidframe(); if (rc == 0) @@ -310,18 +344,33 @@ raidattach(num) */ raid_softc = (struct raid_softc *) - malloc(num * sizeof (struct raid_softc), M_RAIDFRAME, M_NOWAIT); + malloc(num * sizeof(struct raid_softc), + M_RAIDFRAME, M_NOWAIT); if (raid_softc == NULL) { printf("WARNING: no memory for RAIDframe driver\n"); return; } - numraid = num; + bzero(raid_softc, num * sizeof (struct raid_softc)); + raidrootdev = (struct device *)malloc(num * sizeof(struct device), + M_RAIDFRAME, M_NOWAIT); + if (raidrootdev == NULL) { + panic("No memory for RAIDframe driver!!?!?!\n"); + } + for (raidID = 0; raidID < num; raidID++) { - raid_softc[raidID].buf_queue.b_actf = NULL; - raid_softc[raidID].buf_queue.b_actb = - &raid_softc[raidID].buf_queue.b_actf; +#if 0 + SIMPLEQ_INIT(&raid_softc[raidID].sc_q); +#endif + + raidrootdev[raidID].dv_class = DV_DISK; + raidrootdev[raidID].dv_cfdata = NULL; + raidrootdev[raidID].dv_unit = raidID; + raidrootdev[raidID].dv_parent = NULL; + raidrootdev[raidID].dv_flags = 0; + sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID); + RF_Calloc(raidPtrs[raidID], 1, sizeof (RF_Raid_t), (RF_Raid_t *)); if (raidPtrs[raidID] == NULL) { @@ -330,7 +379,93 @@ raidattach(num) return; } } + +#if RAID_AUTOCONFIG + raidautoconfig = 1; + +if (raidautoconfig) { + /* 1. locate all RAID components on the system */ + +#if DEBUG + printf("Searching for raid components...\n"); +#endif + ac_list = rf_find_raid_components(); + + /* 2. sort them into their respective sets */ + + config_sets = rf_create_auto_sets(ac_list); + + /* 3. evaluate each set and configure the valid ones + This gets done in rf_buildroothack() */ + + /* schedule the creation of the thread to do the + "/ on RAID" stuff */ + + kthread_create(rf_buildroothack, config_sets, NULL, "raidauto"); + +#if 0 + mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]); +#endif +} +#endif + +} + +#ifdef RAID_AUTOCONFIG +void +rf_buildroothack(arg) + void *arg; +{ + RF_ConfigSet_t *config_sets = arg; + RF_ConfigSet_t *cset; + RF_ConfigSet_t *next_cset; + int retcode; + int raidID; + int rootID; + int num_root; + + num_root = 0; + cset = config_sets; + while(cset != NULL ) { + next_cset = cset->next; + if (rf_have_enough_components(cset) && + cset->ac->clabel->autoconfigure==1) { + retcode = rf_auto_config_set(cset,&raidID); + if (!retcode) { + if (cset->rootable) { + rootID = raidID; + num_root++; + } + } else { + /* The autoconfig didn't work :( */ +#if DEBUG + printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID); +#endif + rf_release_all_vps(cset); + } + } else { + /* we're not autoconfiguring this set... + release the associated resources */ + rf_release_all_vps(cset); + } + /* cleanup */ + rf_cleanup_config_set(cset); + cset = next_cset; + } + if (boothowto & RB_ASKNAME) { + /* We don't auto-config... */ + } else { + /* They didn't ask, and we found something bootable... */ + + if (num_root == 1) { + booted_device = &raidrootdev[rootID]; + } else if (num_root > 1) { + /* we can't guess.. require the user to answer... */ + boothowto |= RB_ASKNAME; + } + } } +#endif int raidsize(dev) @@ -496,7 +631,11 @@ raidclose(dev, flags, fmt, p) Device shutdown has taken care of setting the clean bits if RAIDF_INITED is not set mark things as clean... */ - rf_update_component_labels( raidPtrs[unit] ); +#if 0 + printf("Last one on raid%d. Updating status.\n",unit); +#endif + rf_update_component_labels(raidPtrs[unit], + RF_FINAL_COMPONENT_UPDATE); } raidunlock(rs); @@ -513,7 +652,6 @@ raidstrategy(bp) RF_Raid_t *raidPtr; struct raid_softc *rs = &raid_softc[raidID]; struct disklabel *lp; - struct buf *dp; int wlabel; if ((rs->sc_flags & RAIDF_INITED) ==0) { @@ -563,14 +701,10 @@ raidstrategy(bp) bp->b_resid = 0; - /* stuff it onto our queue */ + bp->b_actf = rs->sc_q.b_actf; + rs->sc_q.b_actf = bp; + rs->sc_q.b_active++; - dp = &rs->buf_queue; - bp->b_actf = NULL; - bp->b_actb = dp->b_actb; - *dp->b_actb = bp; - dp->b_actb = &bp->b_actf; - raidstart(raidPtrs[raidID]); splx(s); @@ -634,6 +768,7 @@ raidioctl(dev, cmd, data, flag, p) struct raid_softc *rs; RF_Config_t *k_cfg, *u_cfg; RF_Raid_t *raidPtr; + RF_RaidDisk_t *diskPtr; RF_AccTotals_t *totals; RF_DeviceConfig_t *d_cfg, **ucfgp; u_char *specific_buf; @@ -641,12 +776,13 @@ raidioctl(dev, cmd, data, flag, p) int row; int column; struct rf_recon_req *rrcopy, *rr; - RF_ComponentLabel_t *component_label; + RF_ComponentLabel_t *clabel; RF_ComponentLabel_t ci_label; - RF_ComponentLabel_t **c_label_ptr; + RF_ComponentLabel_t **clabel_ptr; RF_SingleComponent_t *sparePtr,*componentPtr; RF_SingleComponent_t hot_spare; RF_SingleComponent_t component; + RF_ProgressInfo_t progressInfo, **progressInfoPtr; int i, j, d; if (unit >= numraid) @@ -673,6 +809,7 @@ raidioctl(dev, cmd, data, flag, p) case DIOCWDINFO: case DIOCGPART: case DIOCWLABEL: + case DIOCGPDINFO: case RAIDFRAME_SHUTDOWN: case RAIDFRAME_REWRITEPARITY: case RAIDFRAME_GET_INFO: @@ -683,6 +820,7 @@ raidioctl(dev, cmd, data, flag, p) case RAIDFRAME_FAIL_DISK: case RAIDFRAME_COPYBACK: case RAIDFRAME_CHECK_RECON_STATUS: + case RAIDFRAME_CHECK_RECON_STATUS_EXT: case RAIDFRAME_GET_COMPONENT_LABEL: case RAIDFRAME_SET_COMPONENT_LABEL: case RAIDFRAME_ADD_HOT_SPARE: @@ -691,7 +829,13 @@ raidioctl(dev, cmd, data, flag, p) case RAIDFRAME_REBUILD_IN_PLACE: case RAIDFRAME_CHECK_PARITY: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: + case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: case RAIDFRAME_CHECK_COPYBACK_STATUS: + case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: + case RAIDFRAME_SET_AUTOCONFIG: + case RAIDFRAME_SET_ROOT: + case RAIDFRAME_DELETE_COMPONENT: + case RAIDFRAME_INCORPORATE_HOT_SPARE: if ((rs->sc_flags & RAIDF_INITED) == 0) return (ENXIO); } @@ -699,6 +843,13 @@ raidioctl(dev, cmd, data, flag, p) switch (cmd) { /* Configure the system */ case RAIDFRAME_CONFIGURE: + + if (raidPtr->valid) { + /* There is a valid RAID set running on this unit! */ + printf("raid%d: Device already configured!\n",unit); + return(EINVAL); + } + /* * Copy-in the configuration information * data points to a pointer to the configuration structure. @@ -749,25 +900,26 @@ raidioctl(dev, cmd, data, flag, p) * Store the sum of all the bytes in the last byte? */ + /* + * Clear the entire RAID descriptor, just to make sure + * there is no stale data left in the case of a + * reconfiguration + */ + bzero((char *) raidPtr, sizeof(RF_Raid_t)); + /* configure the system */ raidPtr->raidid = unit; - retcode = rf_Configure(raidPtr, k_cfg); + retcode = rf_Configure(raidPtr, k_cfg, NULL); if (retcode == 0) { /* allow this many simultaneous IO's to this RAID device */ raidPtr->openings = RAIDOUTSTANDING; - - /* XXX should be moved to rf_Configure() */ - - raidPtr->copyback_in_progress = 0; - raidPtr->parity_rewrite_in_progress = 0; - raidPtr->recon_in_progress = 0; - - retcode = raidinit(dev, raidPtr, unit); - rf_markalldirty( raidPtr ); + + raidinit(raidPtr); + rf_markalldirty(raidPtr); } /* Free the buffers. No return code here. */ @@ -811,29 +963,29 @@ raidioctl(dev, cmd, data, flag, p) return (retcode); case RAIDFRAME_GET_COMPONENT_LABEL: - c_label_ptr = (RF_ComponentLabel_t **) data; + clabel_ptr = (RF_ComponentLabel_t **) data; /* need to read the component label for the disk indicated - by row,column in component_label */ + by row,column in clabel */ /* For practice, let's get it directly fromdisk, rather than from the in-core copy */ - RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ), + RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ), (RF_ComponentLabel_t *)); - if (component_label == NULL) + if (clabel == NULL) return (ENOMEM); - bzero((char *) component_label, sizeof(RF_ComponentLabel_t)); + bzero((char *) clabel, sizeof(RF_ComponentLabel_t)); - retcode = copyin( *c_label_ptr, component_label, + retcode = copyin( *clabel_ptr, clabel, sizeof(RF_ComponentLabel_t)); if (retcode) { - RF_Free( component_label, sizeof(RF_ComponentLabel_t)); + RF_Free( clabel, sizeof(RF_ComponentLabel_t)); return(retcode); } - row = component_label->row; - column = component_label->column; + row = clabel->row; + column = clabel->column; if ((row < 0) || (row >= raidPtr->numRow) || (column < 0) || (column >= raidPtr->numCol)) { @@ -843,16 +995,16 @@ raidioctl(dev, cmd, data, flag, p) raidread_component_label( raidPtr->Disks[row][column].dev, raidPtr->raid_cinfo[row][column].ci_vp, - component_label ); + clabel ); - retcode = copyout((caddr_t) component_label, - (caddr_t) *c_label_ptr, + retcode = copyout((caddr_t) clabel, + (caddr_t) *clabel_ptr, sizeof(RF_ComponentLabel_t)); - RF_Free( component_label, sizeof(RF_ComponentLabel_t)); + RF_Free( clabel, sizeof(RF_ComponentLabel_t)); return (retcode); case RAIDFRAME_SET_COMPONENT_LABEL: - component_label = (RF_ComponentLabel_t *) data; + clabel = (RF_ComponentLabel_t *) data; /* XXX check the label for valid stuff... */ /* Note that some things *should not* get modified -- @@ -861,22 +1013,22 @@ raidioctl(dev, cmd, data, flag, p) */ printf("Got component label:\n"); - printf("Version: %d\n",component_label->version); - printf("Serial Number: %d\n",component_label->serial_number); - printf("Mod counter: %d\n",component_label->mod_counter); - printf("Row: %d\n", component_label->row); - printf("Column: %d\n", component_label->column); - printf("Num Rows: %d\n", component_label->num_rows); - printf("Num Columns: %d\n", component_label->num_columns); - printf("Clean: %d\n", component_label->clean); - printf("Status: %d\n", component_label->status); - - row = component_label->row; - column = component_label->column; + printf("Version: %d\n",clabel->version); + printf("Serial Number: %d\n",clabel->serial_number); + printf("Mod counter: %d\n",clabel->mod_counter); + printf("Row: %d\n", clabel->row); + printf("Column: %d\n", clabel->column); + printf("Num Rows: %d\n", clabel->num_rows); + printf("Num Columns: %d\n", clabel->num_columns); + printf("Clean: %d\n", clabel->clean); + printf("Status: %d\n", clabel->status); + + row = clabel->row; + column = clabel->column; if ((row < 0) || (row >= raidPtr->numRow) || (column < 0) || (column >= raidPtr->numCol)) { - RF_Free( component_label, sizeof(RF_ComponentLabel_t)); + RF_Free( clabel, sizeof(RF_ComponentLabel_t)); return(EINVAL); } @@ -885,12 +1037,12 @@ raidioctl(dev, cmd, data, flag, p) raidwrite_component_label( raidPtr->Disks[row][column].dev, raidPtr->raid_cinfo[row][column].ci_vp, - component_label ); + clabel ); #endif return (0); case RAIDFRAME_INIT_LABELS: - component_label = (RF_ComponentLabel_t *) data; + clabel = (RF_ComponentLabel_t *) data; /* we only want the serial number from the above. We get all the rest of the information @@ -898,19 +1050,16 @@ raidioctl(dev, cmd, data, flag, p) set. */ - raidPtr->serial_number = component_label->serial_number; - /* current version number */ - ci_label.version = RF_COMPONENT_LABEL_VERSION; - ci_label.serial_number = component_label->serial_number; - ci_label.mod_counter = raidPtr->mod_counter; - ci_label.num_rows = raidPtr->numRow; - ci_label.num_columns = raidPtr->numCol; - ci_label.clean = RF_RAID_DIRTY; /* not clean */ - ci_label.status = rf_ds_optimal; /* "It's good!" */ + raidPtr->serial_number = clabel->serial_number; + + raid_init_component_label(raidPtr, &ci_label); + ci_label.serial_number = clabel->serial_number; for(row=0;row<raidPtr->numRow;row++) { ci_label.row = row; for(column=0;column<raidPtr->numCol;column++) { + diskPtr = &raidPtr->Disks[row][column]; + ci_label.partitionSize = diskPtr->partitionSize; ci_label.column = column; raidwrite_component_label( raidPtr->Disks[row][column].dev, @@ -940,18 +1089,42 @@ raidioctl(dev, cmd, data, flag, p) raidPtr,"raid_parity"); return (retcode); + case RAIDFRAME_SET_AUTOCONFIG: + d = rf_set_autoconfig(raidPtr, *(int *) data); + printf("New autoconfig value is: %d\n", d); + *(int *) data = d; + return (retcode); + + case RAIDFRAME_SET_ROOT: + d = rf_set_rootpartition(raidPtr, *(int *) data); + printf("New rootpartition value is: %d\n", d); + *(int *) data = d; + return (retcode); case RAIDFRAME_ADD_HOT_SPARE: sparePtr = (RF_SingleComponent_t *) data; memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t)); - printf("Adding spare\n"); retcode = rf_add_hot_spare(raidPtr, &hot_spare); return(retcode); case RAIDFRAME_REMOVE_HOT_SPARE: return(retcode); + case RAIDFRAME_DELETE_COMPONENT: + componentPtr = (RF_SingleComponent_t *)data; + memcpy( &component, componentPtr, + sizeof(RF_SingleComponent_t)); + retcode = rf_delete_component(raidPtr, &component); + return(retcode); + + case RAIDFRAME_INCORPORATE_HOT_SPARE: + componentPtr = (RF_SingleComponent_t *)data; + memcpy( &component, componentPtr, + sizeof(RF_SingleComponent_t)); + retcode = rf_incorporate_hot_spare(raidPtr, &component); + return(retcode); + case RAIDFRAME_REBUILD_IN_PLACE: if (raidPtr->Layout.map->faultsTolerated == 0) { @@ -1096,8 +1269,10 @@ raidioctl(dev, cmd, data, flag, p) /* Return the percentage completion of reconstruction */ case RAIDFRAME_CHECK_RECON_STATUS: if (raidPtr->Layout.map->faultsTolerated == 0) { - /* This makes no sense on a RAID 0 */ - return(EINVAL); + /* This makes no sense on a RAID 0, so tell the + user it's done. */ + *(int *) data = 100; + return(0); } row = 0; /* XXX we only consider a single row... */ if (raidPtr->status[row] != rf_rs_reconstructing) @@ -1106,22 +1281,66 @@ raidioctl(dev, cmd, data, flag, p) *(int *)data = raidPtr->reconControl[row]->percentComplete; return (0); + + case RAIDFRAME_CHECK_RECON_STATUS_EXT: + progressInfoPtr = (RF_ProgressInfo_t **) data; + row = 0; /* XXX we only consider a single row... */ + if (raidPtr->status[row] != rf_rs_reconstructing) { + progressInfo.remaining = 0; + progressInfo.completed = 100; + progressInfo.total = 100; + } else { + progressInfo.total = + raidPtr->reconControl[row]->numRUsTotal; + progressInfo.completed = + raidPtr->reconControl[row]->numRUsComplete; + progressInfo.remaining = progressInfo.total - + progressInfo.completed; + } + retcode = copyout((caddr_t) &progressInfo, + (caddr_t) *progressInfoPtr, + sizeof(RF_ProgressInfo_t)); + return (retcode); + case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: if (raidPtr->Layout.map->faultsTolerated == 0) { - /* This makes no sense on a RAID 0 */ - return(EINVAL); + /* This makes no sense on a RAID 0, so tell the + user it's done. */ + *(int *) data = 100; + return(0); } if (raidPtr->parity_rewrite_in_progress == 1) { - *(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe; + *(int *) data = 100 * + raidPtr->parity_rewrite_stripes_done / + raidPtr->Layout.numStripe; } else { *(int *) data = 100; } return (0); + case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: + progressInfoPtr = (RF_ProgressInfo_t **) data; + if (raidPtr->parity_rewrite_in_progress == 1) { + progressInfo.total = raidPtr->Layout.numStripe; + progressInfo.completed = + raidPtr->parity_rewrite_stripes_done; + progressInfo.remaining = progressInfo.total - + progressInfo.completed; + } else { + progressInfo.remaining = 0; + progressInfo.completed = 100; + progressInfo.total = 100; + } + retcode = copyout((caddr_t) &progressInfo, + (caddr_t) *progressInfoPtr, + sizeof(RF_ProgressInfo_t)); + return (retcode); + case RAIDFRAME_CHECK_COPYBACK_STATUS: if (raidPtr->Layout.map->faultsTolerated == 0) { /* This makes no sense on a RAID 0 */ - return(EINVAL); + *(int *) data = 100; + return(0); } if (raidPtr->copyback_in_progress == 1) { *(int *) data = 100 * raidPtr->copyback_stripes_done / raidPtr->Layout.numStripe; @@ -1130,6 +1349,24 @@ raidioctl(dev, cmd, data, flag, p) } return (0); + case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: + progressInfoPtr = (RF_ProgressInfo_t **) data; + if (raidPtr->copyback_in_progress == 1) { + progressInfo.total = raidPtr->Layout.numStripe; + progressInfo.completed = + raidPtr->copyback_stripes_done; + progressInfo.remaining = progressInfo.total - + progressInfo.completed; + } else { + progressInfo.remaining = 0; + progressInfo.completed = 100; + progressInfo.total = 100; + } + retcode = copyout((caddr_t) &progressInfo, + (caddr_t) *progressInfoPtr, + sizeof(RF_ProgressInfo_t)); + return (retcode); + #if 0 case RAIDFRAME_SPARET_WAIT: /* @@ -1250,10 +1487,11 @@ raidioctl(dev, cmd, data, flag, p) #if 0 case DIOCGDEFLABEL: +#endif + case DIOCGPDINFO: raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data); break; -#endif default: @@ -1266,16 +1504,14 @@ raidioctl(dev, cmd, data, flag, p) * raidinit -- complete the rest of the initialization for the * RAIDframe device. */ -int -raidinit(dev, raidPtr, unit) - dev_t dev; +static void +raidinit(raidPtr) RF_Raid_t *raidPtr; - int unit; { - int retcode; struct raid_softc *rs; + int unit; - retcode = 0; + unit = raidPtr->raidid; rs = &raid_softc[unit]; @@ -1299,8 +1535,6 @@ raidinit(dev, raidPtr, unit) * protectedSectors, as used in RAIDframe. */ rs->sc_size = raidPtr->totalSectors; - rs->sc_dev = dev; - return (retcode); } /* @@ -1361,34 +1595,31 @@ raidstart(raidPtr) struct raid_softc *rs; int do_async; struct buf *bp; - struct buf *dp; unit = raidPtr->raidid; rs = &raid_softc[unit]; - + /* quick check to see if anything has died recently */ + RF_LOCK_MUTEX(raidPtr->mutex); + if (raidPtr->numNewFailures > 0) { + rf_update_component_labels(raidPtr, + RF_NORMAL_COMPONENT_UPDATE); + raidPtr->numNewFailures--; + } + RF_UNLOCK_MUTEX(raidPtr->mutex); + /* Check to see if we're at the limit... */ RF_LOCK_MUTEX(raidPtr->mutex); while (raidPtr->openings > 0) { RF_UNLOCK_MUTEX(raidPtr->mutex); - /* get the next item, if any, from the queue */ - dp = &rs->buf_queue; - bp = dp->b_actf; + bp = rs->sc_q.b_actf; if (bp == NULL) { /* nothing more to do */ return; } - - /* update structures */ - dp = bp->b_actf; - if (dp != NULL) { - dp->b_actb = bp->b_actb; - } else { - rs->buf_queue.b_actb = bp->b_actb; - } - *bp->b_actb = dp; - + rs->sc_q.b_actf = bp->b_actf; + /* Ok, for the bp we have here, bp->b_blkno is relative to the * partition.. Need to make it absolute to the underlying * device.. */ @@ -1457,14 +1688,12 @@ raidstart(raidPtr) /* XXX we're still at splbio() here... do we *really* need to be? */ - retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, do_async, raid_addr, num_blocks, - bp->b_un.b_addr, bp, NULL, NULL, + bp->b_data, bp, NULL, NULL, RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL); - RF_LOCK_MUTEX(raidPtr->mutex); } RF_UNLOCK_MUTEX(raidPtr->mutex); @@ -1671,7 +1900,7 @@ rf_KernelWakeupFunc(vbp) rf_ds_failed; queue->raidPtr->status[queue->row] = rf_rs_degraded; queue->raidPtr->numFailures++; - /* XXX here we should bump the version number for each component, and write that data out */ + queue->raidPtr->numNewFailures++; } else { /* Disk is already dead... */ /* printf("Disk already marked as dead!\n"); */ @@ -1716,7 +1945,7 @@ rf_InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg, bp->b_bufsize = bp->b_bcount; bp->b_error = 0; bp->b_dev = dev; - bp->b_un.b_addr = buf; + bp->b_data = buf; bp->b_blkno = startSect; bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ if (bp->b_bcount == 0) { @@ -1761,7 +1990,6 @@ raidgetdefaultlabel(raidPtr, rs, lp) lp->d_magic = DISKMAGIC; lp->d_magic2 = DISKMAGIC; lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label); - } /* @@ -1778,6 +2006,8 @@ raidgetdisklabel(dev) struct disklabel *lp = rs->sc_dkdev.dk_label; struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel; RF_Raid_t *raidPtr; + int i; + struct partition *pp; db1_printf(("Getting the disklabel...\n")); @@ -1790,34 +2020,33 @@ raidgetdisklabel(dev) /* * Call the generic disklabel extraction routine. */ - errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy, - rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel, 0); - if (errstring) - raidmakedisklabel(rs); - else { - int i; - struct partition *pp; + errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy, lp, + rs->sc_dkdev.dk_cpulabel, 0); + if (errstring) { + printf("%s: %s\n", rs->sc_xname, errstring); + return; + /*raidmakedisklabel(rs); */ + } - /* - * Sanity check whether the found disklabel is valid. - * - * This is necessary since total size of the raid device - * may vary when an interleave is changed even though exactly - * same componets are used, and old disklabel may used - * if that is found. - */ - if (lp->d_secperunit != rs->sc_size) - printf("WARNING: %s: " - "total sector size in disklabel (%d) != " - "the size of raid (%ld)\n", rs->sc_xname, - lp->d_secperunit, (long) rs->sc_size); - for (i = 0; i < lp->d_npartitions; i++) { - pp = &lp->d_partitions[i]; - if (pp->p_offset + pp->p_size > rs->sc_size) - printf("WARNING: %s: end of partition `%c' " - "exceeds the size of raid (%ld)\n", - rs->sc_xname, 'a' + i, (long) rs->sc_size); - } + /* + * Sanity check whether the found disklabel is valid. + * + * This is necessary since total size of the raid device + * may vary when an interleave is changed even though exactly + * same componets are used, and old disklabel may used + * if that is found. + */ + if (lp->d_secperunit != rs->sc_size) + printf("WARNING: %s: " + "total sector size in disklabel (%d) != " + "the size of raid (%ld)\n", rs->sc_xname, + lp->d_secperunit, (long) rs->sc_size); + for (i = 0; i < lp->d_npartitions; i++) { + pp = &lp->d_partitions[i]; + if (pp->p_offset + pp->p_size > rs->sc_size) + printf("WARNING: %s: end of partition `%c' " + "exceeds the size of raid (%ld)\n", + rs->sc_xname, 'a' + i, (long) rs->sc_size); } } @@ -1933,11 +2162,11 @@ raidunlock(rs) int raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter) { - RF_ComponentLabel_t component_label; - raidread_component_label(dev, b_vp, &component_label); - component_label.mod_counter = mod_counter; - component_label.clean = RF_RAID_CLEAN; - raidwrite_component_label(dev, b_vp, &component_label); + RF_ComponentLabel_t clabel; + raidread_component_label(dev, b_vp, &clabel); + clabel.mod_counter = mod_counter; + clabel.clean = RF_RAID_CLEAN; + raidwrite_component_label(dev, b_vp, &clabel); return(0); } @@ -1945,20 +2174,20 @@ raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter) int raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter) { - RF_ComponentLabel_t component_label; - raidread_component_label(dev, b_vp, &component_label); - component_label.mod_counter = mod_counter; - component_label.clean = RF_RAID_DIRTY; - raidwrite_component_label(dev, b_vp, &component_label); + RF_ComponentLabel_t clabel; + raidread_component_label(dev, b_vp, &clabel); + clabel.mod_counter = mod_counter; + clabel.clean = RF_RAID_DIRTY; + raidwrite_component_label(dev, b_vp, &clabel); return(0); } /* ARGSUSED */ int -raidread_component_label(dev, b_vp, component_label) +raidread_component_label(dev, b_vp, clabel) dev_t dev; struct vnode *b_vp; - RF_ComponentLabel_t *component_label; + RF_ComponentLabel_t *clabel; { struct buf *bp; int error; @@ -1981,22 +2210,15 @@ raidread_component_label(dev, b_vp, component_label) error = biowait(bp); if (!error) { - memcpy(component_label, bp->b_un.b_addr, + memcpy(clabel, bp->b_data, sizeof(RF_ComponentLabel_t)); #if 0 - printf("raidread_component_label: got component label:\n"); - printf("Version: %d\n",component_label->version); - printf("Serial Number: %d\n",component_label->serial_number); - printf("Mod counter: %d\n",component_label->mod_counter); - printf("Row: %d\n", component_label->row); - printf("Column: %d\n", component_label->column); - printf("Num Rows: %d\n", component_label->num_rows); - printf("Num Columns: %d\n", component_label->num_columns); - printf("Clean: %d\n", component_label->clean); - printf("Status: %d\n", component_label->status); + rf_print_component_label( clabel ); #endif } else { +#if 0 printf("Failed to read RAID component label!\n"); +#endif } bp->b_flags = B_INVAL | B_AGE; @@ -2005,10 +2227,10 @@ raidread_component_label(dev, b_vp, component_label) } /* ARGSUSED */ int -raidwrite_component_label(dev, b_vp, component_label) +raidwrite_component_label(dev, b_vp, clabel) dev_t dev; struct vnode *b_vp; - RF_ComponentLabel_t *component_label; + RF_ComponentLabel_t *clabel; { struct buf *bp; int error; @@ -2023,26 +2245,28 @@ raidwrite_component_label(dev, b_vp, component_label) bp->b_flags = B_BUSY | B_WRITE; bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; - memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE ); + memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE ); - memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t)); + memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t)); (*bdevsw[major(bp->b_dev)].d_strategy)(bp); error = biowait(bp); bp->b_flags = B_INVAL | B_AGE; brelse(bp); if (error) { +#if 1 printf("Failed to write RAID component info!\n"); +#endif } return(error); } void -rf_markalldirty( raidPtr ) +rf_markalldirty(raidPtr) RF_Raid_t *raidPtr; { - RF_ComponentLabel_t c_label; + RF_ComponentLabel_t clabel; int r,c; raidPtr->mod_counter++; @@ -2052,19 +2276,19 @@ rf_markalldirty( raidPtr ) raidread_component_label( raidPtr->Disks[r][c].dev, raidPtr->raid_cinfo[r][c].ci_vp, - &c_label); - if (c_label.status == rf_ds_spared) { + &clabel); + if (clabel.status == rf_ds_spared) { /* XXX do something special... but whatever you do, don't try to access it!! */ } else { #if 0 - c_label.status = + clabel.status = raidPtr->Disks[r][c].status; raidwrite_component_label( raidPtr->Disks[r][c].dev, raidPtr->raid_cinfo[r][c].ci_vp, - &c_label); + &clabel); #endif raidmarkdirty( raidPtr->Disks[r][c].dev, @@ -2110,21 +2334,21 @@ rf_markalldirty( raidPtr ) raidread_component_label( raidPtr->Disks[r][sparecol].dev, raidPtr->raid_cinfo[r][sparecol].ci_vp, - &c_label); + &clabel); /* make sure status is noted */ - c_label.version = RF_COMPONENT_LABEL_VERSION; - c_label.mod_counter = raidPtr->mod_counter; - c_label.serial_number = raidPtr->serial_number; - c_label.row = srow; - c_label.column = scol; - c_label.num_rows = raidPtr->numRow; - c_label.num_columns = raidPtr->numCol; - c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/ - c_label.status = rf_ds_optimal; + clabel.version = RF_COMPONENT_LABEL_VERSION; + clabel.mod_counter = raidPtr->mod_counter; + clabel.serial_number = raidPtr->serial_number; + clabel.row = srow; + clabel.column = scol; + clabel.num_rows = raidPtr->numRow; + clabel.num_columns = raidPtr->numCol; + clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/ + clabel.status = rf_ds_optimal; raidwrite_component_label( raidPtr->Disks[r][sparecol].dev, raidPtr->raid_cinfo[r][sparecol].ci_vp, - &c_label); + &clabel); raidmarkclean( raidPtr->Disks[r][sparecol].dev, raidPtr->raid_cinfo[r][sparecol].ci_vp); } @@ -2135,10 +2359,11 @@ rf_markalldirty( raidPtr ) void -rf_update_component_labels( raidPtr ) +rf_update_component_labels(raidPtr, final) RF_Raid_t *raidPtr; + int final; { - RF_ComponentLabel_t c_label; + RF_ComponentLabel_t clabel; int sparecol; int r,c; int i,j; @@ -2158,43 +2383,26 @@ rf_update_component_labels( raidPtr ) raidread_component_label( raidPtr->Disks[r][c].dev, raidPtr->raid_cinfo[r][c].ci_vp, - &c_label); + &clabel); /* make sure status is noted */ - c_label.status = rf_ds_optimal; + clabel.status = rf_ds_optimal; + /* bump the counter */ + clabel.mod_counter = raidPtr->mod_counter; + raidwrite_component_label( raidPtr->Disks[r][c].dev, raidPtr->raid_cinfo[r][c].ci_vp, - &c_label); - if (raidPtr->parity_good == RF_RAID_CLEAN) { - raidmarkclean( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - raidPtr->mod_counter); + &clabel); + if (final == RF_FINAL_COMPONENT_UPDATE) { + if (raidPtr->parity_good == RF_RAID_CLEAN) { + raidmarkclean( + raidPtr->Disks[r][c].dev, + raidPtr->raid_cinfo[r][c].ci_vp, + raidPtr->mod_counter); + } } } /* else we don't touch it.. */ -#if 0 - else if (raidPtr->Disks[r][c].status != - rf_ds_failed) { - raidread_component_label( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - &c_label); - /* make sure status is noted */ - c_label.status = - raidPtr->Disks[r][c].status; - raidwrite_component_label( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - &c_label); - if (raidPtr->parity_good == RF_RAID_CLEAN) { - raidmarkclean( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - raidPtr->mod_counter); - } - } -#endif } } @@ -2223,34 +2431,96 @@ rf_update_component_labels( raidPtr ) } } + /* XXX shouldn't *really* need this... */ raidread_component_label( raidPtr->Disks[0][sparecol].dev, raidPtr->raid_cinfo[0][sparecol].ci_vp, - &c_label); + &clabel); /* make sure status is noted */ - c_label.version = RF_COMPONENT_LABEL_VERSION; - c_label.mod_counter = raidPtr->mod_counter; - c_label.serial_number = raidPtr->serial_number; - c_label.row = srow; - c_label.column = scol; - c_label.num_rows = raidPtr->numRow; - c_label.num_columns = raidPtr->numCol; - c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/ - c_label.status = rf_ds_optimal; + + raid_init_component_label(raidPtr, &clabel); + + clabel.mod_counter = raidPtr->mod_counter; + clabel.row = srow; + clabel.column = scol; + clabel.status = rf_ds_optimal; + raidwrite_component_label( raidPtr->Disks[0][sparecol].dev, raidPtr->raid_cinfo[0][sparecol].ci_vp, - &c_label); - if (raidPtr->parity_good == RF_RAID_CLEAN) { - raidmarkclean( raidPtr->Disks[0][sparecol].dev, - raidPtr->raid_cinfo[0][sparecol].ci_vp, - raidPtr->mod_counter); + &clabel); + if (final == RF_FINAL_COMPONENT_UPDATE) { + if (raidPtr->parity_good == RF_RAID_CLEAN) { + raidmarkclean( raidPtr->Disks[0][sparecol].dev, + raidPtr->raid_cinfo[0][sparecol].ci_vp, + raidPtr->mod_counter); + } } } } /* printf("Component labels updated\n"); */ } +void +rf_close_component(raidPtr, vp, auto_configured) + RF_Raid_t *raidPtr; + struct vnode *vp; + int auto_configured; +{ + struct proc *p; + + p = raidPtr->engine_thread; + + if (vp != NULL) { + if (auto_configured == 1) { + VOP_CLOSE(vp, FREAD, NOCRED, 0); + vput(vp); + + } else { + VOP_UNLOCK(vp, 0, p); + (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); + } + } else { + printf("vnode was NULL\n"); + } +} + + +void +rf_UnconfigureVnodes(raidPtr) + RF_Raid_t *raidPtr; +{ + int r,c; + struct proc *p; + struct vnode *vp; + int acd; + + + /* We take this opportunity to close the vnodes like we should.. */ + + p = raidPtr->engine_thread; + + for (r = 0; r < raidPtr->numRow; r++) { + for (c = 0; c < raidPtr->numCol; c++) { + printf("Closing vnode for row: %d col: %d\n", r, c); + vp = raidPtr->raid_cinfo[r][c].ci_vp; + acd = raidPtr->Disks[r][c].auto_configured; + rf_close_component(raidPtr, vp, acd); + raidPtr->raid_cinfo[r][c].ci_vp = NULL; + raidPtr->Disks[r][c].auto_configured = 0; + } + } + for (r = 0; r < raidPtr->numSpare; r++) { + printf("Closing vnode for spare: %d\n", r); + vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp; + acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured; + rf_close_component(raidPtr, vp, acd); + raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL; + raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0; + } +} + + void rf_ReconThread(req) struct rf_recon_req *req; @@ -2296,6 +2566,11 @@ rf_RewriteParityThread(raidPtr) } raidPtr->parity_rewrite_in_progress = 0; + /* Anyone waiting for us to stop? If so, inform them... */ + if (raidPtr->waitShutdown) { + wakeup(&raidPtr->parity_rewrite_in_progress); + } + /* That's all... */ kthread_exit(0); /* does not return */ } @@ -2337,3 +2612,705 @@ rf_ReconstructInPlaceThread(req) /* That's all... */ kthread_exit(0); /* does not return */ } + +void +rf_mountroot_hook(dev) + struct device *dev; +{ + +} + + +RF_AutoConfig_t * +rf_find_raid_components() +{ +#ifdef RAID_AUTOCONFIG + struct devnametobdevmaj *dtobdm; + struct vnode *vp; + struct disklabel label; + struct device *dv; + char *cd_name; + dev_t dev; + int error; + int i; + int good_one; + RF_ComponentLabel_t *clabel; + RF_AutoConfig_t *ac; +#endif + RF_AutoConfig_t *ac_list; + + + /* initialize the AutoConfig list */ + ac_list = NULL; + +#if RAID_AUTOCONFIG +if (raidautoconfig) { + + /* we begin by trolling through *all* the devices on the system */ + + for (dv = alldevs.tqh_first; dv != NULL; + dv = dv->dv_list.tqe_next) { + + /* we are only interested in disks... */ + if (dv->dv_class != DV_DISK) + continue; + + /* we don't care about floppies... */ + if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) { + continue; + } + + /* need to find the device_name_to_block_device_major stuff */ + cd_name = dv->dv_cfdata->cf_driver->cd_name; + dtobdm = dev_name2blk; + while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) { + dtobdm++; + } + + /* get a vnode for the raw partition of this disk */ + + dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART); + if (bdevvp(dev, &vp)) + panic("RAID can't alloc vnode"); + + error = VOP_OPEN(vp, FREAD, NOCRED, 0); + + if (error) { + /* "Who cares." Continue looking + for something that exists*/ + vput(vp); + continue; + } + + /* Ok, the disk exists. Go get the disklabel. */ + error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label, + FREAD, NOCRED, 0); + if (error) { + /* + * XXX can't happen - open() would + * have errored out (or faked up one) + */ + printf("can't get label for dev %s%c (%d)!?!?\n", + dv->dv_xname, 'a' + RAW_PART, error); + } + + /* don't need this any more. We'll allocate it again + a little later if we really do... */ + VOP_CLOSE(vp, FREAD, NOCRED, 0); + vput(vp); + + for (i=0; i < label.d_npartitions; i++) { + /* We only support partitions marked as RAID */ + if (label.d_partitions[i].p_fstype != FS_RAID) + continue; + + dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i); + if (bdevvp(dev, &vp)) + panic("RAID can't alloc vnode"); + + error = VOP_OPEN(vp, FREAD, NOCRED, 0); + if (error) { + /* Whatever... */ + vput(vp); + continue; + } + + good_one = 0; + + clabel = (RF_ComponentLabel_t *) + malloc(sizeof(RF_ComponentLabel_t), + M_RAIDFRAME, M_NOWAIT); + if (clabel == NULL) { + /* XXX CLEANUP HERE */ + printf("RAID auto config: out of memory!\n"); + return(NULL); /* XXX probably should panic? */ + } + + if (!raidread_component_label(dev, vp, clabel)) { + /* Got the label. Does it look reasonable? */ + if (rf_reasonable_label(clabel) && + (clabel->partitionSize <= + label.d_partitions[i].p_size)) { +#if DEBUG + printf("Component on: %s%c: %d\n", + dv->dv_xname, 'a'+i, + label.d_partitions[i].p_size); + rf_print_component_label(clabel); +#endif + /* if it's reasonable, add it, + else ignore it. */ + ac = (RF_AutoConfig_t *) + malloc(sizeof(RF_AutoConfig_t), + M_RAIDFRAME, + M_NOWAIT); + if (ac == NULL) { + /* XXX should panic?? */ + return(NULL); + } + + sprintf(ac->devname, "%s%c", + dv->dv_xname, 'a'+i); + ac->dev = dev; + ac->vp = vp; + ac->clabel = clabel; + ac->next = ac_list; + ac_list = ac; + good_one = 1; + } + } + if (!good_one) { + /* cleanup */ + free(clabel, M_RAIDFRAME); + VOP_CLOSE(vp, FREAD, NOCRED, 0); + vput(vp); + } + } + } +} +#endif +return(ac_list); +} + +#ifdef RAID_AUTOCONFIG +static int +rf_reasonable_label(clabel) + RF_ComponentLabel_t *clabel; +{ + + if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || + (clabel->version==RF_COMPONENT_LABEL_VERSION)) && + ((clabel->clean == RF_RAID_CLEAN) || + (clabel->clean == RF_RAID_DIRTY)) && + clabel->row >=0 && + clabel->column >= 0 && + clabel->num_rows > 0 && + clabel->num_columns > 0 && + clabel->row < clabel->num_rows && + clabel->column < clabel->num_columns && + clabel->blockSize > 0 && + clabel->numBlocks > 0) { + /* label looks reasonable enough... */ + return(1); + } + return(0); +} +#endif + +void +rf_print_component_label(clabel) + RF_ComponentLabel_t *clabel; +{ + printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", + clabel->row, clabel->column, + clabel->num_rows, clabel->num_columns); + printf(" Version: %d Serial Number: %d Mod Counter: %d\n", + clabel->version, clabel->serial_number, + clabel->mod_counter); + printf(" Clean: %s Status: %d\n", + clabel->clean ? "Yes" : "No", clabel->status ); + printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", + clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); + printf(" RAID Level: %c blocksize: %d numBlocks: %d\n", + (char) clabel->parityConfig, clabel->blockSize, + clabel->numBlocks); + printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" ); + printf(" Contains root partition: %s\n", + clabel->root_partition ? "Yes" : "No" ); + printf(" Last configured as: raid%d\n", clabel->last_unit ); +#if 0 + printf(" Config order: %d\n", clabel->config_order); +#endif + +} + +RF_ConfigSet_t * +rf_create_auto_sets(ac_list) + RF_AutoConfig_t *ac_list; +{ + RF_AutoConfig_t *ac; + RF_ConfigSet_t *config_sets; + RF_ConfigSet_t *cset; + RF_AutoConfig_t *ac_next; + + + config_sets = NULL; + + /* Go through the AutoConfig list, and figure out which components + belong to what sets. */ + ac = ac_list; + while(ac!=NULL) { + /* we're going to putz with ac->next, so save it here + for use at the end of the loop */ + ac_next = ac->next; + + if (config_sets == NULL) { + /* will need at least this one... */ + config_sets = (RF_ConfigSet_t *) + malloc(sizeof(RF_ConfigSet_t), + M_RAIDFRAME, M_NOWAIT); + if (config_sets == NULL) { + panic("rf_create_auto_sets: No memory!\n"); + } + /* this one is easy :) */ + config_sets->ac = ac; + config_sets->next = NULL; + config_sets->rootable = 0; + ac->next = NULL; + } else { + /* which set does this component fit into? */ + cset = config_sets; + while(cset!=NULL) { + if (rf_does_it_fit(cset, ac)) { + /* looks like it matches... */ + ac->next = cset->ac; + cset->ac = ac; + break; + } + cset = cset->next; + } + if (cset==NULL) { + /* didn't find a match above... new set..*/ + cset = (RF_ConfigSet_t *) + malloc(sizeof(RF_ConfigSet_t), + M_RAIDFRAME, M_NOWAIT); + if (cset == NULL) { + panic("rf_create_auto_sets: No memory!\n"); + } + cset->ac = ac; + ac->next = NULL; + cset->next = config_sets; + cset->rootable = 0; + config_sets = cset; + } + } + ac = ac_next; + } + + + return(config_sets); +} + +static int +rf_does_it_fit(cset, ac) + RF_ConfigSet_t *cset; + RF_AutoConfig_t *ac; +{ + RF_ComponentLabel_t *clabel1, *clabel2; + + /* If this one matches the *first* one in the set, that's good + enough, since the other members of the set would have been + through here too... */ + /* note that we are not checking partitionSize here.. + + Note that we are also not checking the mod_counters here. + If everything else matches execpt the mod_counter, that's + good enough for this test. We will deal with the mod_counters + a little later in the autoconfiguration process. + + (clabel1->mod_counter == clabel2->mod_counter) && + + The reason we don't check for this is that failed disks + will have lower modification counts. If those disks are + not added to the set they used to belong to, then they will + form their own set, which may result in 2 different sets, + for example, competing to be configured at raid0, and + perhaps competing to be the root filesystem set. If the + wrong ones get configured, or both attempt to become /, + weird behaviour and or serious lossage will occur. Thus we + need to bring them into the fold here, and kick them out at + a later point. + + */ + + clabel1 = cset->ac->clabel; + clabel2 = ac->clabel; + if ((clabel1->version == clabel2->version) && + (clabel1->serial_number == clabel2->serial_number) && + (clabel1->num_rows == clabel2->num_rows) && + (clabel1->num_columns == clabel2->num_columns) && + (clabel1->sectPerSU == clabel2->sectPerSU) && + (clabel1->SUsPerPU == clabel2->SUsPerPU) && + (clabel1->SUsPerRU == clabel2->SUsPerRU) && + (clabel1->parityConfig == clabel2->parityConfig) && + (clabel1->maxOutstanding == clabel2->maxOutstanding) && + (clabel1->blockSize == clabel2->blockSize) && + (clabel1->numBlocks == clabel2->numBlocks) && + (clabel1->autoconfigure == clabel2->autoconfigure) && + (clabel1->root_partition == clabel2->root_partition) && + (clabel1->last_unit == clabel2->last_unit) && + (clabel1->config_order == clabel2->config_order)) { + /* if it get's here, it almost *has* to be a match */ + } else { + /* it's not consistent with somebody in the set.. + punt */ + return(0); + } + /* all was fine.. it must fit... */ + return(1); +} + +int +rf_have_enough_components(cset) + RF_ConfigSet_t *cset; +{ + RF_AutoConfig_t *ac; + RF_AutoConfig_t *auto_config; + RF_ComponentLabel_t *clabel; + int r,c; + int num_rows; + int num_cols; + int num_missing; + int mod_counter; + int mod_counter_found; + int even_pair_failed; + char parity_type; + + + /* check to see that we have enough 'live' components + of this set. If so, we can configure it if necessary */ + + num_rows = cset->ac->clabel->num_rows; + num_cols = cset->ac->clabel->num_columns; + parity_type = cset->ac->clabel->parityConfig; + + /* XXX Check for duplicate components!?!?!? */ + + /* Determine what the mod_counter is supposed to be for this set. */ + + mod_counter_found = 0; + ac = cset->ac; + while(ac!=NULL) { + if (mod_counter_found==0) { + mod_counter = ac->clabel->mod_counter; + mod_counter_found = 1; + } else { + if (ac->clabel->mod_counter > mod_counter) { + mod_counter = ac->clabel->mod_counter; + } + } + ac = ac->next; + } + + num_missing = 0; + auto_config = cset->ac; + + for(r=0; r<num_rows; r++) { + even_pair_failed = 0; + for(c=0; c<num_cols; c++) { + ac = auto_config; + while(ac!=NULL) { + if ((ac->clabel->row == r) && + (ac->clabel->column == c) && + (ac->clabel->mod_counter == mod_counter)) { + /* it's this one... */ +#if DEBUG + printf("Found: %s at %d,%d\n", + ac->devname,r,c); +#endif + break; + } + ac=ac->next; + } + if (ac==NULL) { + /* Didn't find one here! */ + /* special case for RAID 1, especially + where there are more than 2 + components (where RAIDframe treats + things a little differently :( ) */ + if (parity_type == '1') { + if (c%2 == 0) { /* even component */ + even_pair_failed = 1; + } else { /* odd component. If + we're failed, and + so is the even + component, it's + "Good Night, Charlie" */ + if (even_pair_failed == 1) { + return(0); + } + } + } else { + /* normal accounting */ + num_missing++; + } + } + if ((parity_type == '1') && (c%2 == 1)) { + /* Just did an even component, and we didn't + bail.. reset the even_pair_failed flag, + and go on to the next component.... */ + even_pair_failed = 0; + } + } + } + + clabel = cset->ac->clabel; + + if (((clabel->parityConfig == '0') && (num_missing > 0)) || + ((clabel->parityConfig == '4') && (num_missing > 1)) || + ((clabel->parityConfig == '5') && (num_missing > 1))) { + /* XXX this needs to be made *much* more general */ + /* Too many failures */ + return(0); + } + /* otherwise, all is well, and we've got enough to take a kick + at autoconfiguring this set */ + return(1); +} + +void +rf_create_configuration(ac,config,raidPtr) + RF_AutoConfig_t *ac; + RF_Config_t *config; + RF_Raid_t *raidPtr; +{ + RF_ComponentLabel_t *clabel; + int i; + + clabel = ac->clabel; + + /* 1. Fill in the common stuff */ + config->numRow = clabel->num_rows; + config->numCol = clabel->num_columns; + config->numSpare = 0; /* XXX should this be set here? */ + config->sectPerSU = clabel->sectPerSU; + config->SUsPerPU = clabel->SUsPerPU; + config->SUsPerRU = clabel->SUsPerRU; + config->parityConfig = clabel->parityConfig; + /* XXX... */ + strcpy(config->diskQueueType,"fifo"); + config->maxOutstandingDiskReqs = clabel->maxOutstanding; + config->layoutSpecificSize = 0; /* XXX ?? */ + + while(ac!=NULL) { + /* row/col values will be in range due to the checks + in reasonable_label() */ + strcpy(config->devnames[ac->clabel->row][ac->clabel->column], + ac->devname); + ac = ac->next; + } + + for(i=0;i<RF_MAXDBGV;i++) { + config->debugVars[i][0] = NULL; + } +} + +int +rf_set_autoconfig(raidPtr, new_value) + RF_Raid_t *raidPtr; + int new_value; +{ + RF_ComponentLabel_t clabel; + struct vnode *vp; + dev_t dev; + int row, column; + + raidPtr->autoconfigure = new_value; + for(row=0; row<raidPtr->numRow; row++) { + for(column=0; column<raidPtr->numCol; column++) { + if (raidPtr->Disks[row][column].status == + rf_ds_optimal) { + dev = raidPtr->Disks[row][column].dev; + vp = raidPtr->raid_cinfo[row][column].ci_vp; + raidread_component_label(dev, vp, &clabel); + clabel.autoconfigure = new_value; + raidwrite_component_label(dev, vp, &clabel); + } + } + } + return(new_value); +} + +int +rf_set_rootpartition(raidPtr, new_value) + RF_Raid_t *raidPtr; + int new_value; +{ + RF_ComponentLabel_t clabel; + struct vnode *vp; + dev_t dev; + int row, column; + + raidPtr->root_partition = new_value; + for(row=0; row<raidPtr->numRow; row++) { + for(column=0; column<raidPtr->numCol; column++) { + if (raidPtr->Disks[row][column].status == + rf_ds_optimal) { + dev = raidPtr->Disks[row][column].dev; + vp = raidPtr->raid_cinfo[row][column].ci_vp; + raidread_component_label(dev, vp, &clabel); + clabel.root_partition = new_value; + raidwrite_component_label(dev, vp, &clabel); + } + } + } + return(new_value); +} + +void +rf_release_all_vps(cset) + RF_ConfigSet_t *cset; +{ + RF_AutoConfig_t *ac; + + ac = cset->ac; + while(ac!=NULL) { + /* Close the vp, and give it back */ + if (ac->vp) { + VOP_CLOSE(ac->vp, FREAD, NOCRED, 0); + vput(ac->vp); + ac->vp = NULL; + } + ac = ac->next; + } +} + + +void +rf_cleanup_config_set(cset) + RF_ConfigSet_t *cset; +{ + RF_AutoConfig_t *ac; + RF_AutoConfig_t *next_ac; + + ac = cset->ac; + while(ac!=NULL) { + next_ac = ac->next; + /* nuke the label */ + free(ac->clabel, M_RAIDFRAME); + /* cleanup the config structure */ + free(ac, M_RAIDFRAME); + /* "next.." */ + ac = next_ac; + } + /* and, finally, nuke the config set */ + free(cset, M_RAIDFRAME); +} + + +void +raid_init_component_label(raidPtr, clabel) + RF_Raid_t *raidPtr; + RF_ComponentLabel_t *clabel; +{ + /* current version number */ + clabel->version = RF_COMPONENT_LABEL_VERSION; + clabel->serial_number = raidPtr->serial_number; + clabel->mod_counter = raidPtr->mod_counter; + clabel->num_rows = raidPtr->numRow; + clabel->num_columns = raidPtr->numCol; + clabel->clean = RF_RAID_DIRTY; /* not clean */ + clabel->status = rf_ds_optimal; /* "It's good!" */ + + clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; + clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; + clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; + + clabel->blockSize = raidPtr->bytesPerSector; + clabel->numBlocks = raidPtr->sectorsPerDisk; + + /* XXX not portable */ + clabel->parityConfig = raidPtr->Layout.map->parityConfig; + clabel->maxOutstanding = raidPtr->maxOutstanding; + clabel->autoconfigure = raidPtr->autoconfigure; + clabel->root_partition = raidPtr->root_partition; + clabel->last_unit = raidPtr->raidid; + clabel->config_order = raidPtr->config_order; +} + +int +rf_auto_config_set(cset,unit) + RF_ConfigSet_t *cset; + int *unit; +{ + RF_Raid_t *raidPtr; + RF_Config_t *config; + int raidID; + int retcode; + + printf("RAID autoconfigure\n"); + + retcode = 0; + *unit = -1; + + /* 1. Create a config structure */ + + config = (RF_Config_t *)malloc(sizeof(RF_Config_t), + M_RAIDFRAME, + M_NOWAIT); + if (config==NULL) { + printf("Out of mem!?!?\n"); + /* XXX do something more intelligent here. */ + return(1); + } + + memset(config, 0, sizeof(RF_Config_t)); + + /* XXX raidID needs to be set correctly.. */ + + /* + 2. Figure out what RAID ID this one is supposed to live at + See if we can get the same RAID dev that it was configured + on last time.. + */ + + raidID = cset->ac->clabel->last_unit; + if ((raidID < 0) || (raidID >= numraid)) { + /* let's not wander off into lala land. */ + raidID = numraid - 1; + } + if (raidPtrs[raidID]->valid != 0) { + + /* + Nope... Go looking for an alternative... + Start high so we don't immediately use raid0 if that's + not taken. + */ + + for(raidID = numraid; raidID >= 0; raidID--) { + if (raidPtrs[raidID]->valid == 0) { + /* can use this one! */ + break; + } + } + } + + if (raidID < 0) { + /* punt... */ + printf("Unable to auto configure this set!\n"); + printf("(Out of RAID devs!)\n"); + return(1); + } + printf("Configuring raid%d:\n",raidID); + raidPtr = raidPtrs[raidID]; + + /* XXX all this stuff should be done SOMEWHERE ELSE! */ + raidPtr->raidid = raidID; + raidPtr->openings = RAIDOUTSTANDING; + + /* 3. Build the configuration structure */ + rf_create_configuration(cset->ac, config, raidPtr); + + /* 4. Do the configuration */ + retcode = rf_Configure(raidPtr, config, cset->ac); + + if (retcode == 0) { + + raidinit(raidPtrs[raidID]); + + rf_markalldirty(raidPtrs[raidID]); + raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */ + if (cset->ac->clabel->root_partition==1) { + /* everything configured just fine. Make a note + that this set is eligible to be root. */ + cset->rootable = 1; + /* XXX do this here? */ + raidPtrs[raidID]->root_partition = 1; + } + } + + /* 5. Cleanup */ + free(config, M_RAIDFRAME); + + *unit = raidID; + return(retcode); +} diff --git a/sys/dev/raidframe/rf_paritylogDiskMgr.c b/sys/dev/raidframe/rf_paritylogDiskMgr.c index 41c3417c175..6914b19e535 100644 --- a/sys/dev/raidframe/rf_paritylogDiskMgr.c +++ b/sys/dev/raidframe/rf_paritylogDiskMgr.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_paritylogDiskMgr.c,v 1.4 2000/01/11 18:02:22 peter Exp $ */ -/* $NetBSD: rf_paritylogDiskMgr.c,v 1.7 2000/01/08 01:18:36 oster Exp $ */ +/* $OpenBSD: rf_paritylogDiskMgr.c,v 1.5 2000/08/08 16:07:43 peter Exp $ */ +/* $NetBSD: rf_paritylogDiskMgr.c,v 1.10 2000/01/15 01:57:57 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -74,7 +74,7 @@ AcquireReintBuffer(pool) pool->availBuffersIndex = 0; RF_UNLOCK_MUTEX(pool->mutex); } else { - RF_PANIC(); /* should never happen in currect config, + RF_PANIC(); /* should never happen in correct config, * single reint */ RF_WAIT_COND(pool->cond, pool->mutex); } @@ -117,19 +117,23 @@ ReadRegionLog( * * NON-BLOCKING */ - RF_AccTraceEntry_t tracerec; + RF_AccTraceEntry_t *tracerec; RF_DagNode_t *rrd_rdNode; /* create DAG to read region log from disk */ rf_MakeAllocList(*rrd_alloclist); - *rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer, rf_DiskReadFunc, rf_DiskReadUndoFunc, - "Rrl", *rrd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY); + *rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer, + rf_DiskReadFunc, rf_DiskReadUndoFunc, + "Rrl", *rrd_alloclist, + RF_DAG_FLAGS_NONE, + RF_IO_NORMAL_PRIORITY); /* create and initialize PDA for the core log */ /* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t * *)); */ *rrd_pda = rf_AllocPDAList(1); - rf_MapLogParityLogging(raidPtr, regionID, 0, &((*rrd_pda)->row), &((*rrd_pda)->col), &((*rrd_pda)->startSector)); + rf_MapLogParityLogging(raidPtr, regionID, 0, &((*rrd_pda)->row), + &((*rrd_pda)->col), &((*rrd_pda)->startSector)); (*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity; if ((*rrd_pda)->next) { @@ -137,13 +141,15 @@ ReadRegionLog( printf("set rrd_pda->next to NULL\n"); } /* initialize DAG parameters */ - bzero((char *) &tracerec, sizeof(tracerec)); - (*rrd_dag_h)->tracerec = &tracerec; + RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); + bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t)); + (*rrd_dag_h)->tracerec = tracerec; rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0]; rrd_rdNode->params[0].p = *rrd_pda; /* rrd_rdNode->params[1] = regionBuffer; */ rrd_rdNode->params[2].v = 0; - rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); + rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + 0, 0, 0); /* launch region log read dag */ rf_DispatchDAG(*rrd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, @@ -162,7 +168,7 @@ WriteCoreLog( RF_PhysDiskAddr_t ** fwr_pda) { RF_RegionId_t regionID = log->regionID; - RF_AccTraceEntry_t tracerec; + RF_AccTraceEntry_t *tracerec; RF_SectorNum_t regionOffset; RF_DagNode_t *fwr_wrNode; @@ -173,7 +179,8 @@ WriteCoreLog( /* create DAG to write a core log to a region log disk */ rf_MakeAllocList(*fwr_alloclist); - *fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + *fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr, + rf_DiskWriteFunc, rf_DiskWriteUndoFunc, "Wcl", *fwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY); /* create and initialize PDA for the region log */ @@ -181,17 +188,21 @@ WriteCoreLog( * *)); */ *fwr_pda = rf_AllocPDAList(1); regionOffset = log->diskOffset; - rf_MapLogParityLogging(raidPtr, regionID, regionOffset, &((*fwr_pda)->row), &((*fwr_pda)->col), &((*fwr_pda)->startSector)); + rf_MapLogParityLogging(raidPtr, regionID, regionOffset, + &((*fwr_pda)->row), &((*fwr_pda)->col), + &((*fwr_pda)->startSector)); (*fwr_pda)->numSector = raidPtr->numSectorsPerLog; /* initialize DAG parameters */ - bzero((char *) &tracerec, sizeof(tracerec)); - (*fwr_dag_h)->tracerec = &tracerec; + RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); + bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t)); + (*fwr_dag_h)->tracerec = tracerec; fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0]; fwr_wrNode->params[0].p = *fwr_pda; /* fwr_wrNode->params[1] = log->bufPtr; */ fwr_wrNode->params[2].v = 0; - fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); + fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + 0, 0, 0); /* launch the dag to write the core log to disk */ rf_DispatchDAG(*fwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, @@ -214,19 +225,23 @@ ReadRegionParity( * * NON-BLOCKING */ - RF_AccTraceEntry_t tracerec; + RF_AccTraceEntry_t *tracerec; RF_DagNode_t *prd_rdNode; /* create DAG to read region parity from disk */ rf_MakeAllocList(*prd_alloclist); - *prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc, rf_DiskReadUndoFunc, - "Rrp", *prd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY); + *prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc, + rf_DiskReadUndoFunc, "Rrp", + *prd_alloclist, RF_DAG_FLAGS_NONE, + RF_IO_NORMAL_PRIORITY); /* create and initialize PDA for region parity */ /* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t * *)); */ *prd_pda = rf_AllocPDAList(1); - rf_MapRegionParity(raidPtr, regionID, &((*prd_pda)->row), &((*prd_pda)->col), &((*prd_pda)->startSector), &((*prd_pda)->numSector)); + rf_MapRegionParity(raidPtr, regionID, &((*prd_pda)->row), + &((*prd_pda)->col), &((*prd_pda)->startSector), + &((*prd_pda)->numSector)); if (rf_parityLogDebug) printf("[reading %d sectors of parity from region %d]\n", (int) (*prd_pda)->numSector, regionID); @@ -235,13 +250,15 @@ ReadRegionParity( printf("set prd_pda->next to NULL\n"); } /* initialize DAG parameters */ - bzero((char *) &tracerec, sizeof(tracerec)); - (*prd_dag_h)->tracerec = &tracerec; + RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); + bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t)); + (*prd_dag_h)->tracerec = tracerec; prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0]; prd_rdNode->params[0].p = *prd_pda; prd_rdNode->params[1].p = parityBuffer; prd_rdNode->params[2].v = 0; - prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); + prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + 0, 0, 0); if (rf_validateDAGDebug) rf_ValidateDAG(*prd_dag_h); /* launch region parity read dag */ @@ -264,28 +281,35 @@ WriteRegionParity( * * NON-BLOCKING */ - RF_AccTraceEntry_t tracerec; + RF_AccTraceEntry_t *tracerec; RF_DagNode_t *pwr_wrNode; /* create DAG to write region log from disk */ rf_MakeAllocList(*pwr_alloclist); - *pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, - "Wrp", *pwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY); + *pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer, + rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + "Wrp", *pwr_alloclist, + RF_DAG_FLAGS_NONE, + RF_IO_NORMAL_PRIORITY); /* create and initialize PDA for region parity */ /* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t * *)); */ *pwr_pda = rf_AllocPDAList(1); - rf_MapRegionParity(raidPtr, regionID, &((*pwr_pda)->row), &((*pwr_pda)->col), &((*pwr_pda)->startSector), &((*pwr_pda)->numSector)); + rf_MapRegionParity(raidPtr, regionID, &((*pwr_pda)->row), + &((*pwr_pda)->col), &((*pwr_pda)->startSector), + &((*pwr_pda)->numSector)); /* initialize DAG parameters */ - bzero((char *) &tracerec, sizeof(tracerec)); - (*pwr_dag_h)->tracerec = &tracerec; + RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); + bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t)); + (*pwr_dag_h)->tracerec = tracerec; pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0]; pwr_wrNode->params[0].p = *pwr_pda; /* pwr_wrNode->params[1] = parityBuffer; */ pwr_wrNode->params[2].v = 0; - pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0); + pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + 0, 0, 0); /* launch the dag to write region parity to disk */ rf_DispatchDAG(*pwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, @@ -325,7 +349,8 @@ FlushLogsToDisk( if (rf_parityLogDebug) printf("[initiating write of core log for region %d]\n", regionID); fwr_mcpair->flag = RF_FALSE; - WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h, &fwr_alloclist, &fwr_pda); + WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h, + &fwr_alloclist, &fwr_pda); /* wait for the DAG to complete */ while (!fwr_mcpair->flag) @@ -358,9 +383,14 @@ ReintegrateRegion( RF_PhysDiskAddr_t *rrd_pda, *prd_pda, *pwr_pda; caddr_t parityBuffer, regionBuffer = NULL; - /* Reintegrate a region (regionID). 1. acquire region and parity - * buffers 2. read log from disk 3. read parity from disk 4. apply log - * to parity 5. apply core log to parity 6. write new parity to disk + /* Reintegrate a region (regionID). + * + * 1. acquire region and parity buffers + * 2. read log from disk + * 3. read parity from disk + * 4. apply log to parity + * 5. apply core log to parity + * 6. write new parity to disk * * BLOCKING */ @@ -369,22 +399,25 @@ ReintegrateRegion( /* initiate read of region parity */ if (rf_parityLogDebug) - printf("[initiating read of parity for region %d]\n", regionID); + printf("[initiating read of parity for region %d]\n",regionID); parityBuffer = AcquireReintBuffer(&raidPtr->parityBufferPool); prd_mcpair = rf_AllocMCPair(); RF_LOCK_MUTEX(prd_mcpair->mutex); prd_mcpair->flag = RF_FALSE; - ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr, &prd_dag_h, &prd_alloclist, &prd_pda); + ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr, + &prd_dag_h, &prd_alloclist, &prd_pda); /* if region log nonempty, initiate read */ if (raidPtr->regionInfo[regionID].diskCount > 0) { if (rf_parityLogDebug) - printf("[initiating read of disk log for region %d]\n", regionID); + printf("[initiating read of disk log for region %d]\n", + regionID); regionBuffer = AcquireReintBuffer(&raidPtr->regionBufferPool); rrd_mcpair = rf_AllocMCPair(); RF_LOCK_MUTEX(rrd_mcpair->mutex); rrd_mcpair->flag = RF_FALSE; - ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr, &rrd_dag_h, &rrd_alloclist, &rrd_pda); + ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr, + &rrd_dag_h, &rrd_alloclist, &rrd_pda); } /* wait on read of region parity to complete */ while (!prd_mcpair->flag) { @@ -421,11 +454,13 @@ ReintegrateRegion( } /* write reintegrated parity to disk */ if (rf_parityLogDebug) - printf("[initiating write of parity for region %d]\n", regionID); + printf("[initiating write of parity for region %d]\n", + regionID); pwr_mcpair = rf_AllocMCPair(); RF_LOCK_MUTEX(pwr_mcpair->mutex); pwr_mcpair->flag = RF_FALSE; - WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr, &pwr_dag_h, &pwr_alloclist, &pwr_pda); + WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr, + &pwr_dag_h, &pwr_alloclist, &pwr_pda); while (!pwr_mcpair->flag) RF_WAIT_COND(pwr_mcpair->cond, pwr_mcpair->mutex); RF_UNLOCK_MUTEX(pwr_mcpair->mutex); @@ -476,10 +511,17 @@ ReintegrateLogs( /* remove all items which are blocked on reintegration of this * region */ RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); - logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE); + logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID, + &raidPtr->parityLogDiskQueue.reintBlockHead, + &raidPtr->parityLogDiskQueue.reintBlockTail, + RF_TRUE); logDataList = logData; while (logData) { - logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE); + logData->next = rf_SearchAndDequeueParityLogData( + raidPtr, regionID, + &raidPtr->parityLogDiskQueue.reintBlockHead, + &raidPtr->parityLogDiskQueue.reintBlockTail, + RF_TRUE); logData = logData->next; } RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); @@ -530,7 +572,8 @@ rf_ShutdownLogging(RF_Raid_t * raidPtr) if (rf_forceParityLogReint) { for (regionID = 0; regionID < rf_numParityRegions; regionID++) { RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); - raidPtr->regionInfo[regionID].loggingEnabled = RF_FALSE; + raidPtr->regionInfo[regionID].loggingEnabled = + RF_FALSE; log = raidPtr->regionInfo[regionID].coreLog; raidPtr->regionInfo[regionID].coreLog = NULL; diskCount = raidPtr->regionInfo[regionID].diskCount; @@ -553,6 +596,7 @@ rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr) { RF_ParityLog_t *reintQueue, *flushQueue; int workNeeded, done = RF_FALSE; + int s; /* Main program for parity logging disk thread. This routine waits * for work to appear in either the flush or reintegration queues and @@ -561,6 +605,8 @@ rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr) * * BLOCKING */ + s = splbio(); + RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); /* @@ -599,7 +645,7 @@ rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr) /* empty flushQueue, using free'd log buffers to * process bufTail */ if (flushQueue) - FlushLogsToDisk(raidPtr, flushQueue); + FlushLogsToDisk(raidPtr, flushQueue); /* empty reintQueue, flushing from reintTail as we go */ if (reintQueue) @@ -624,7 +670,8 @@ rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr) /* thread enabled, no work needed, so sleep */ if (rf_parityLogDebug) printf("[parity logging disk manager sleeping]\n"); - RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, raidPtr->parityLogDiskQueue.mutex); + RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, + raidPtr->parityLogDiskQueue.mutex); if (rf_parityLogDebug) printf("[parity logging disk manager just woke up]\n"); flushQueue = raidPtr->parityLogDiskQueue.flushQueue; @@ -642,6 +689,8 @@ rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr) RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); + splx(s); + /* * In the Net- & OpenBSD kernel, the thread must exit; returning would * cause the proc trampoline to attempt to return to userspace. diff --git a/sys/dev/raidframe/rf_paritylogging.c b/sys/dev/raidframe/rf_paritylogging.c index 65877664637..c0a04bbd062 100644 --- a/sys/dev/raidframe/rf_paritylogging.c +++ b/sys/dev/raidframe/rf_paritylogging.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_paritylogging.c,v 1.4 2000/01/11 18:02:22 peter Exp $ */ -/* $NetBSD: rf_paritylogging.c,v 1.8 2000/01/09 04:35:13 oster Exp $ */ +/* $OpenBSD: rf_paritylogging.c,v 1.5 2000/08/08 16:07:44 peter Exp $ */ +/* $NetBSD: rf_paritylogging.c,v 1.10 2000/02/12 16:06:27 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -201,6 +201,8 @@ rf_ConfigureParityLogging( raidPtr->numParityLogs = rf_numParityRegions; /* create region information structs */ + printf("Allocating %d bytes for in-core parity region info\n", + (int) (rf_numParityRegions * sizeof(RF_RegionInfo_t))); RF_Malloc(raidPtr->regionInfo, (rf_numParityRegions * sizeof(RF_RegionInfo_t)), (RF_RegionInfo_t *)); @@ -223,6 +225,10 @@ rf_ConfigureParityLogging( regionParityRange++; */ /* build pool of unused parity logs */ + printf("Allocating %d bytes for %d parity logs\n", + raidPtr->numParityLogs * raidPtr->numSectorsPerLog * + raidPtr->bytesPerSector, + raidPtr->numParityLogs); RF_Malloc(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * raidPtr->numSectorsPerLog * raidPtr->bytesPerSector, (caddr_t)); @@ -326,6 +332,9 @@ rf_ConfigureParityLogging( raidPtr->regionBufferPool.totalBuffers; raidPtr->regionBufferPool.availBuffersIndex = 0; raidPtr->regionBufferPool.emptyBuffersIndex = 0; + printf("Allocating %d bytes for regionBufferPool\n", + (int) (raidPtr->regionBufferPool.totalBuffers * + sizeof(caddr_t))); RF_Malloc(raidPtr->regionBufferPool.buffers, raidPtr->regionBufferPool.totalBuffers * sizeof(caddr_t), (caddr_t *)); @@ -335,6 +344,9 @@ rf_ConfigureParityLogging( return (ENOMEM); } for (i = 0; i < raidPtr->regionBufferPool.totalBuffers; i++) { + printf("Allocating %d bytes for regionBufferPool#%d\n", + (int) (raidPtr->regionBufferPool.bufferSize * + sizeof(char)), i); RF_Malloc(raidPtr->regionBufferPool.buffers[i], raidPtr->regionBufferPool.bufferSize * sizeof(char), (caddr_t)); @@ -390,6 +402,10 @@ rf_ConfigureParityLogging( raidPtr->parityBufferPool.totalBuffers; raidPtr->parityBufferPool.availBuffersIndex = 0; raidPtr->parityBufferPool.emptyBuffersIndex = 0; + printf("Allocating %d bytes for parityBufferPool of %d units\n", + (int) (raidPtr->parityBufferPool.totalBuffers * + sizeof(caddr_t)), + raidPtr->parityBufferPool.totalBuffers ); RF_Malloc(raidPtr->parityBufferPool.buffers, raidPtr->parityBufferPool.totalBuffers * sizeof(caddr_t), (caddr_t *)); @@ -399,6 +415,9 @@ rf_ConfigureParityLogging( return (ENOMEM); } for (i = 0; i < raidPtr->parityBufferPool.totalBuffers; i++) { + printf("Allocating %d bytes for parityBufferPool#%d\n", + (int) (raidPtr->parityBufferPool.bufferSize * + sizeof(char)),i); RF_Malloc(raidPtr->parityBufferPool.buffers[i], raidPtr->parityBufferPool.bufferSize * sizeof(char), (caddr_t)); @@ -514,6 +533,9 @@ rf_ConfigureParityLogging( RF_ASSERT(raidPtr->regionInfo[i].parityStartAddr + raidPtr->regionInfo[i].numSectorsParity <= raidPtr->sectorsPerDisk); + printf("Allocating %d bytes for region %d\n", + (int) (raidPtr->regionInfo[i].capacity * + sizeof(RF_DiskMap_t)), i); RF_Malloc(raidPtr->regionInfo[i].diskMap, (raidPtr->regionInfo[i].capacity * sizeof(RF_DiskMap_t)), diff --git a/sys/dev/raidframe/rf_parityscan.c b/sys/dev/raidframe/rf_parityscan.c index c139ef39504..3a46b610d53 100644 --- a/sys/dev/raidframe/rf_parityscan.c +++ b/sys/dev/raidframe/rf_parityscan.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_parityscan.c,v 1.5 2000/01/07 14:50:22 peter Exp $ */ -/* $NetBSD: rf_parityscan.c,v 1.8 2000/01/05 02:57:28 oster Exp $ */ +/* $OpenBSD: rf_parityscan.c,v 1.6 2000/08/08 16:07:44 peter Exp $ */ +/* $NetBSD: rf_parityscan.c,v 1.9 2000/05/28 03:00:31 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -90,6 +90,11 @@ rf_RewriteParity(raidPtr) for (i = 0; i < raidPtr->totalSectors && rc <= RF_PARITY_CORRECTED; i += layoutPtr->dataSectorsPerStripe) { + if (raidPtr->waitShutdown) { + /* Someone is pulling the plug on this set... + abort the re-write */ + return (1); + } asm_h = rf_MapAccess(raidPtr, i, layoutPtr->dataSectorsPerStripe, NULL, RF_DONT_REMAP); diff --git a/sys/dev/raidframe/rf_raid.h b/sys/dev/raidframe/rf_raid.h index 8efbc6639ee..a78b4a41841 100644 --- a/sys/dev/raidframe/rf_raid.h +++ b/sys/dev/raidframe/rf_raid.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_raid.h,v 1.5 2000/01/07 14:50:22 peter Exp $ */ -/* $NetBSD: rf_raid.h,v 1.8 2000/01/05 02:57:29 oster Exp $ */ +/* $OpenBSD: rf_raid.h,v 1.6 2000/08/08 16:07:44 peter Exp $ */ +/* $NetBSD: rf_raid.h,v 1.12 2000/02/24 17:12:10 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -64,7 +64,8 @@ #define RF_MAX_DISKS 128 /* max disks per array */ #define RF_DEV2RAIDID(_dev) (DISKUNIT(_dev)) -#define RF_COMPONENT_LABEL_VERSION 1 +#define RF_COMPONENT_LABEL_VERSION_1 1 +#define RF_COMPONENT_LABEL_VERSION 2 #define RF_RAID_DIRTY 0 #define RF_RAID_CLEAN 1 @@ -123,6 +124,8 @@ struct RF_Raid_s { RF_RaidLayout_t Layout; /* all information related to layout */ RF_RaidDisk_t **Disks; /* all information related to physical disks */ RF_DiskQueue_t **Queues;/* all information related to disk queues */ + RF_DiskQueueSW_t *qType;/* pointer to the DiskQueueSW used for the + component queues. */ /* NOTE: This is an anchor point via which the queues can be * accessed, but the enqueue/dequeue routines in diskqueue.c use a * local copy of this pointer for the actual accesses. */ @@ -135,6 +138,8 @@ struct RF_Raid_s { RF_LockTableEntry_t *lockTable; /* stripe-lock table */ RF_LockTableEntry_t *quiesceLock; /* quiesnce table */ int numFailures; /* total number of failures in the array */ + int numNewFailures; /* number of *new* failures (that havn't + caused a mod_counter update */ int parity_good; /* !0 if parity is known to be correct */ int serial_number; /* a "serial number" for this set */ @@ -145,6 +150,20 @@ struct RF_Raid_s { simultaneously (high-level - not a per-component limit)*/ + int maxOutstanding; /* maxOutstanding requests (per-component) */ + int autoconfigure; /* automatically configure this RAID set. + 0 == no, 1 == yes */ + int root_partition; /* Use this set as / + 0 == no, 1 == yes*/ + int last_unit; /* last unit number (e.g. 0 for /dev/raid0) + of this component. Used for autoconfigure + only. */ + int config_order; /* 0 .. n. The order in which the component + should be auto-configured. E.g. 0 is will + done first, (and would become raid0). + This may be in conflict with last_unit!!?! */ + /* Not currently used. */ + /* * Cleanup stuff */ diff --git a/sys/dev/raidframe/rf_raidframe.h b/sys/dev/raidframe/rf_raidframe.h index bf24693d663..f92a2d6540e 100644 --- a/sys/dev/raidframe/rf_raidframe.h +++ b/sys/dev/raidframe/rf_raidframe.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_raidframe.h,v 1.4 2000/01/07 14:50:22 peter Exp $ */ -/* $NetBSD: rf_raidframe.h,v 1.8 2000/01/05 02:57:29 oster Exp $ */ +/* $OpenBSD: rf_raidframe.h,v 1.5 2000/08/08 16:07:44 peter Exp $ */ +/* $NetBSD: rf_raidframe.h,v 1.11 2000/05/28 00:48:31 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -81,6 +81,12 @@ typedef struct RF_DeviceConfig_s { } RF_DeviceConfig_t; +typedef struct RF_ProgressInfo_s { + RF_uint64 remaining; + RF_uint64 completed; + RF_uint64 total; +} RF_ProgressInfo_t; + /* flags that can be put in the rf_recon_req structure */ #define RF_FDFLAGS_NONE 0x0 /* just fail the disk */ #define RF_FDFLAGS_RECON 0x1 /* fail and initiate recon */ @@ -135,4 +141,13 @@ typedef struct RF_DeviceConfig_s { #define RAIDFRAME_CHECK_PARITY _IOWR ('r', 25, int) #define RAIDFRAME_CHECK_PARITYREWRITE_STATUS _IOWR ('r', 26, int) #define RAIDFRAME_CHECK_COPYBACK_STATUS _IOWR ('r', 27, int) +#define RAIDFRAME_SET_AUTOCONFIG _IOWR ('r', 28, int) +#define RAIDFRAME_SET_ROOT _IOWR ('r', 29, int) +#define RAIDFRAME_DELETE_COMPONENT _IOW ('r', 30, RF_SingleComponent_t) +#define RAIDFRAME_INCORPORATE_HOT_SPARE _IOW ('r', 31, RF_SingleComponent_t) +/* 'Extended' status versions */ +#define RAIDFRAME_CHECK_RECON_STATUS_EXT _IOWR('r', 32, RF_ProgressInfo_t *) +#define RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT _IOWR ('r', 33, RF_ProgressInfo_t *) +#define RAIDFRAME_CHECK_COPYBACK_STATUS_EXT _IOWR ('r', 34, RF_ProgressInfo_t *) + #endif /* !_RF__RF_RAIDFRAME_H_ */ diff --git a/sys/dev/raidframe/rf_reconbuffer.c b/sys/dev/raidframe/rf_reconbuffer.c index 011060ef6db..d1ec64672d5 100644 --- a/sys/dev/raidframe/rf_reconbuffer.c +++ b/sys/dev/raidframe/rf_reconbuffer.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_reconbuffer.c,v 1.2 1999/02/16 00:03:20 niklas Exp $ */ -/* $NetBSD: rf_reconbuffer.c,v 1.3 1999/02/05 00:06:16 oster Exp $ */ +/* $OpenBSD: rf_reconbuffer.c,v 1.3 2000/08/08 16:07:44 peter Exp $ */ +/* $NetBSD: rf_reconbuffer.c,v 1.4 2000/03/13 23:52:36 soren Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -298,8 +298,7 @@ rf_MultiWayReconXor(raidPtr, pssPtr) nWayXorFuncs[numBufs] (pssPtr->rbufsForXor, targetRbuf, numBytes / sizeof(long)); /* release all the reconstruction buffers except the last one, which - * belongs to the the disk who's submission caused this XOR to take - * place */ + * belongs to the disk whose submission caused this XOR to take place */ for (i = 0; i < numBufs - 1; i++) { if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING) rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]->row, rbufs[i]); diff --git a/sys/dev/raidframe/rf_reconstruct.c b/sys/dev/raidframe/rf_reconstruct.c index e731bcc42a3..47884abd0c8 100644 --- a/sys/dev/raidframe/rf_reconstruct.c +++ b/sys/dev/raidframe/rf_reconstruct.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_reconstruct.c,v 1.8 2000/01/11 18:02:23 peter Exp $ */ -/* $NetBSD: rf_reconstruct.c,v 1.14 2000/01/09 03:14:33 oster Exp $ */ +/* $OpenBSD: rf_reconstruct.c,v 1.9 2000/08/08 16:07:45 peter Exp $ */ +/* $NetBSD: rf_reconstruct.c,v 1.26 2000/06/04 02:05:13 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -345,15 +345,13 @@ rf_ReconstructFailedDiskBasic(raidPtr, row, col) raidPtr->raid_cinfo[srow][scol].ci_vp, &c_label); - c_label.version = RF_COMPONENT_LABEL_VERSION; - c_label.mod_counter = raidPtr->mod_counter; - c_label.serial_number = raidPtr->serial_number; + raid_init_component_label( raidPtr, &c_label); c_label.row = row; c_label.column = col; - c_label.num_rows = raidPtr->numRow; - c_label.num_columns = raidPtr->numCol; c_label.clean = RF_RAID_DIRTY; c_label.status = rf_ds_optimal; + + /* XXXX MORE NEEDED HERE */ raidwrite_component_label( raidPtr->raid_cinfo[srow][scol].ci_dev, @@ -391,6 +389,7 @@ rf_ReconstructInPlace(raidPtr, row, col) struct vattr va; struct proc *proc; int retcode; + int ac; lp = raidPtr->Layout.map; if (lp->SubmitReconBuffer) { @@ -425,6 +424,8 @@ rf_ReconstructInPlace(raidPtr, row, col) raidPtr->numFailures++; raidPtr->Disks[row][col].status = rf_ds_failed; raidPtr->status[row] = rf_rs_degraded; + rf_update_component_labels(raidPtr, + RF_NORMAL_COMPONENT_UPDATE); } while (raidPtr->reconInProgress) { @@ -467,11 +468,14 @@ rf_ReconstructInPlace(raidPtr, row, col) if (raidPtr->raid_cinfo[row][col].ci_vp != NULL) { printf("Closing the open device: %s\n", raidPtr->Disks[row][col].devname); - VOP_UNLOCK(raidPtr->raid_cinfo[row][col].ci_vp, 0, proc); - (void) vn_close(raidPtr->raid_cinfo[row][col].ci_vp, - FREAD | FWRITE, proc->p_ucred, proc); + vp = raidPtr->raid_cinfo[row][col].ci_vp; + ac = raidPtr->Disks[row][col].auto_configured; + rf_close_component(raidPtr, vp, ac); raidPtr->raid_cinfo[row][col].ci_vp = NULL; } + /* note that this disk was *not* auto_configured (any longer)*/ + raidPtr->Disks[row][col].auto_configured = 0; + printf("About to (re-)open the device for rebuilding: %s\n", raidPtr->Disks[row][col].devname); @@ -575,16 +579,11 @@ rf_ReconstructInPlace(raidPtr, row, col) raidread_component_label(raidPtr->raid_cinfo[row][col].ci_dev, raidPtr->raid_cinfo[row][col].ci_vp, &c_label); - - c_label.version = RF_COMPONENT_LABEL_VERSION; - c_label.mod_counter = raidPtr->mod_counter; - c_label.serial_number = raidPtr->serial_number; + + raid_init_component_label(raidPtr, &c_label); + c_label.row = row; c_label.column = col; - c_label.num_rows = raidPtr->numRow; - c_label.num_columns = raidPtr->numCol; - c_label.clean = RF_RAID_DIRTY; - c_label.status = rf_ds_optimal; raidwrite_component_label(raidPtr->raid_cinfo[row][col].ci_dev, raidPtr->raid_cinfo[row][col].ci_vp, @@ -685,7 +684,14 @@ rf_ContinueReconstructFailedDisk(reconDesc) if (rf_ProcessReconEvent(raidPtr, row, event)) reconDesc->numDisksDone++; - raidPtr->reconControl[row]->percentComplete = 100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 / mapPtr->totalRUs); + raidPtr->reconControl[row]->numRUsTotal = + mapPtr->totalRUs; + raidPtr->reconControl[row]->numRUsComplete = + mapPtr->totalRUs - + rf_UnitsLeftToReconstruct(mapPtr); + + raidPtr->reconControl[row]->percentComplete = + (raidPtr->reconControl[row]->numRUsComplete * 100 / raidPtr->reconControl[row]->numRUsTotal); if (rf_prReconSched) { rf_PrintReconSchedule(raidPtr->reconControl[row]->reconMap, &(raidPtr->reconControl[row]->starttime)); } @@ -748,7 +754,8 @@ rf_ContinueReconstructFailedDisk(reconDesc) rf_ResumeNewRequests(raidPtr); - printf("Reconstruction of disk at row %d col %d completed and spare disk reassigned\n", row, col); + printf("Reconstruction of disk at row %d col %d completed\n", + row, col); xor_s = raidPtr->accumXorTimeUs / 1000000; xor_resid_us = raidPtr->accumXorTimeUs % 1000000; printf("Recon time was %d.%06d seconds, accumulated XOR time was %ld us (%ld.%06ld)\n", @@ -897,7 +904,7 @@ rf_ProcessReconEvent(raidPtr, frow, event) * head-separation wait request and return. * * ctrl->{ru_count, curPSID, diskOffset} and - * rbuf->failedDiskSectorOffset are maintained to point the the unit + * rbuf->failedDiskSectorOffset are maintained to point to the unit * we're currently accessing. Note that this deviates from the * standard C idiom of having counters point to the next thing to be * accessed. This allows us to easily retry when we're blocked by diff --git a/sys/dev/raidframe/rf_reconstruct.h b/sys/dev/raidframe/rf_reconstruct.h index 8b087f514c0..8a0842ce867 100644 --- a/sys/dev/raidframe/rf_reconstruct.h +++ b/sys/dev/raidframe/rf_reconstruct.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_reconstruct.h,v 1.3 1999/07/30 14:45:33 peter Exp $ */ -/* $NetBSD: rf_reconstruct.h,v 1.4 1999/03/02 03:18:48 oster Exp $ */ +/* $OpenBSD: rf_reconstruct.h,v 1.4 2000/08/08 16:07:45 peter Exp $ */ +/* $NetBSD: rf_reconstruct.h,v 1.5 2000/05/28 00:48:30 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -128,6 +128,8 @@ struct RF_ReconCtrl_s { RF_StripeNum_t lastPSID;/* the ID of the last parity stripe we want * reconstructed */ int percentComplete;/* percentage completion of reconstruction */ + int numRUsComplete; /* number of Reconstruction Units done */ + int numRUsTotal; /* total number of Reconstruction Units */ /* reconstruction event queue */ RF_ReconEvent_t *eventQueue; /* queue of pending reconstruction diff --git a/sys/dev/raidframe/rf_revent.c b/sys/dev/raidframe/rf_revent.c index bc9dad41174..e664e361110 100644 --- a/sys/dev/raidframe/rf_revent.c +++ b/sys/dev/raidframe/rf_revent.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_revent.c,v 1.8 2000/01/11 14:38:59 peter Exp $ */ -/* $NetBSD: rf_revent.c,v 1.6 2000/01/07 03:56:14 oster Exp $ */ +/* $OpenBSD: rf_revent.c,v 1.9 2000/08/08 16:07:45 peter Exp $ */ +/* $NetBSD: rf_revent.c,v 1.7 2000/05/30 02:04:29 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -48,10 +48,10 @@ static RF_FreeList_t *rf_revent_freelist; #include <sys/proc.h> +#include <sys/kernel.h> -extern int hz; - -#define DO_WAIT(_rc) tsleep(&(_rc)->eventQueue, PRIBIO, "raidframe eventq", 0) +#define DO_WAIT(_rc) \ + tsleep(&(_rc)->eventQueue, PRIBIO, "raidframe eventq", 0) #define DO_SIGNAL(_rc) wakeup(&(_rc)->eventQueue) @@ -90,11 +90,10 @@ rf_ConfigureReconEvent(listp) (RF_ReconEvent_t *)); return (0); } -/* returns the next reconstruction event, blocking the calling thread until - * one becomes available - */ -/* will now return null if it is blocked or will return an event if it is not */ +/* returns the next reconstruction event, blocking the calling thread + * until one becomes available. will now return null if it is blocked + * or will return an event if it is not */ RF_ReconEvent_t * rf_GetNextReconEvent(reconDesc, row, continueFunc, continueArg) @@ -109,13 +108,14 @@ rf_GetNextReconEvent(reconDesc, row, continueFunc, continueArg) RF_ASSERT(row >= 0 && row <= raidPtr->numRow); RF_LOCK_MUTEX(rctrl->eq_mutex); - RF_ASSERT((rctrl->eventQueue == NULL) == (rctrl->eq_count == 0)); /* q null and count==0 - * must be equivalent - * conditions */ + /* q null and count==0 must be equivalent conditions */ + RF_ASSERT((rctrl->eventQueue == NULL) == (rctrl->eq_count == 0)); rctrl->continueFunc = continueFunc; rctrl->continueArg = continueArg; + /* mpsleep timeout value: secs = timo_val/hz. 'ticks' here is + defined as cycle-counter ticks, not softclock ticks */ #define MAX_RECON_EXEC_USECS (100 * 1000) /* 100 ms */ #define RECON_DELAY_MS 25 @@ -131,15 +131,18 @@ rf_GetNextReconEvent(reconDesc, row, continueFunc, continueArg) RF_ETIMER_STOP(reconDesc->recon_exec_timer); RF_ETIMER_EVAL(reconDesc->recon_exec_timer); - reconDesc->reconExecTicks += RF_ETIMER_VAL_US(reconDesc->recon_exec_timer); + reconDesc->reconExecTicks += + RF_ETIMER_VAL_US(reconDesc->recon_exec_timer); if (reconDesc->reconExecTicks > reconDesc->maxReconExecTicks) - reconDesc->maxReconExecTicks = reconDesc->reconExecTicks; + reconDesc->maxReconExecTicks = + reconDesc->reconExecTicks; if (reconDesc->reconExecTicks >= MAX_RECON_EXEC_USECS) { /* we've been running too long - sleep */ #if RF_RECON_STATS > 0 reconDesc->numReconExecDelays++; #endif /* RF_RECON_STATS > 0 */ - status = tsleep(&reconDesc->reconExecTicks, PRIBIO, "recon delay", RECON_TIMO); + status = tsleep(&reconDesc->reconExecTicks, + PRIBIO, "recon delay", RECON_TIMO); RF_ASSERT(status == EWOULDBLOCK); reconDesc->reconExecTicks = 0; } @@ -159,9 +162,9 @@ rf_GetNextReconEvent(reconDesc, row, continueFunc, continueArg) rctrl->eventQueue = event->next; event->next = NULL; rctrl->eq_count--; - RF_ASSERT((rctrl->eventQueue == NULL) == (rctrl->eq_count == 0)); /* q null and count==0 - * must be equivalent - * conditions */ + + /* q null and count==0 must be equivalent conditions */ + RF_ASSERT((rctrl->eventQueue == NULL) == (rctrl->eq_count == 0)); RF_UNLOCK_MUTEX(rctrl->eq_mutex); return (event); } @@ -182,9 +185,8 @@ rf_CauseReconEvent(raidPtr, row, col, arg, type) } RF_ASSERT(row >= 0 && row <= raidPtr->numRow && col >= 0 && col <= raidPtr->numCol); RF_LOCK_MUTEX(rctrl->eq_mutex); - RF_ASSERT((rctrl->eventQueue == NULL) == (rctrl->eq_count == 0)); /* q null and count==0 - * must be equivalent - * conditions */ + /* q null and count==0 must be equivalent conditions */ + RF_ASSERT((rctrl->eventQueue == NULL) == (rctrl->eq_count == 0)); event->next = rctrl->eventQueue; rctrl->eventQueue = event; rctrl->eq_count++; diff --git a/sys/dev/raidframe/rf_shutdown.c b/sys/dev/raidframe/rf_shutdown.c index 9e776b90958..293ead367e6 100644 --- a/sys/dev/raidframe/rf_shutdown.c +++ b/sys/dev/raidframe/rf_shutdown.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_shutdown.c,v 1.3 2000/01/11 18:02:23 peter Exp $ */ -/* $NetBSD: rf_shutdown.c,v 1.5 2000/01/08 22:57:31 oster Exp $ */ +/* $OpenBSD: rf_shutdown.c,v 1.4 2000/08/08 16:07:45 peter Exp $ */ +/* $NetBSD: rf_shutdown.c,v 1.6 2000/01/13 23:41:18 oster Exp $ */ /* * rf_shutdown.c */ @@ -60,7 +60,10 @@ _rf_ShutdownCreate( * Have to directly allocate memory here, since we start up before * and shutdown after RAIDframe internal allocation system. */ - ent = (RF_ShutdownList_t *) malloc(sizeof(RF_ShutdownList_t), M_RAIDFRAME, M_WAITOK); + /* ent = (RF_ShutdownList_t *) malloc(sizeof(RF_ShutdownList_t), + M_RAIDFRAME, M_WAITOK); */ + ent = (RF_ShutdownList_t *) malloc(sizeof(RF_ShutdownList_t), + M_RAIDFRAME, M_NOWAIT); if (ent == NULL) return (ENOMEM); ent->cleanup = cleanup; diff --git a/sys/dev/raidframe/rf_threadstuff.h b/sys/dev/raidframe/rf_threadstuff.h index 033999efa6c..a54ad90c223 100644 --- a/sys/dev/raidframe/rf_threadstuff.h +++ b/sys/dev/raidframe/rf_threadstuff.h @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_threadstuff.h,v 1.5 2000/01/11 18:02:23 peter Exp $ */ -/* $NetBSD: rf_threadstuff.h,v 1.7 2000/01/09 01:34:29 oster Exp $ */ +/* $OpenBSD: rf_threadstuff.h,v 1.6 2000/08/08 16:07:45 peter Exp $ */ +/* $NetBSD: rf_threadstuff.h,v 1.8 2000/06/11 03:35:38 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -86,9 +86,6 @@ typedef void *RF_ThreadArg_t; /* * In Net- and OpenBSD, kernel threads are simply processes which share several * substructures and never run in userspace. - * - * XXX Note, Net- and OpenBSD does not yet have a wakeup_one(), so we always - * XXX get Thundering Herd when a condition occurs. */ #define RF_WAIT_COND(_c_,_m_) { \ RF_UNLOCK_MUTEX(_m_); \ |