diff options
author | Peter Galbavy <peter@cvs.openbsd.org> | 2000-08-08 16:07:46 +0000 |
---|---|---|
committer | Peter Galbavy <peter@cvs.openbsd.org> | 2000-08-08 16:07:46 +0000 |
commit | 32fe17a663470bcff500ec1d022251eb5521ca1d (patch) | |
tree | a9d50274c1f5d004d13fa30644899af9b22328ee /sys/dev/raidframe/rf_openbsdkintf.c | |
parent | ba14061bb6dd00dd21fcd589dca3d5be75d8f752 (diff) |
sync RAIDframe with Gre Oster's work for NetBSD.
This update incorporates changes since January 2000.
RAID1 and RAID5 tested for functionality matching the 2.7 code. A
number of bug fixes (including stopping a parity rebuild when
unconfiguring) have been included. See Greg's RAIDframe info page:
http://www.cs.usask.ca/staff/oster/raid.html
The RAID_AUTOCONFIG feature set does *NOT* yet work. These features
require more work throughout the boot system and as such are a big
task.
IMPORTANT: As with anything that is this near live data on your
systems, please test carefully with existing configurations before
deploying in a live system. Feedback via sendbug or mail direct
to peter@wonderland.org is appreciated.
Diffstat (limited to 'sys/dev/raidframe/rf_openbsdkintf.c')
-rw-r--r-- | sys/dev/raidframe/rf_openbsdkintf.c | 1433 |
1 files changed, 1205 insertions, 228 deletions
diff --git a/sys/dev/raidframe/rf_openbsdkintf.c b/sys/dev/raidframe/rf_openbsdkintf.c index 8ad4dc3364c..268eff0a015 100644 --- a/sys/dev/raidframe/rf_openbsdkintf.c +++ b/sys/dev/raidframe/rf_openbsdkintf.c @@ -1,5 +1,5 @@ -/* $OpenBSD: rf_openbsdkintf.c,v 1.9 2000/01/11 18:02:22 peter Exp $ */ -/* $NetBSD: rf_netbsdkintf.c,v 1.46 2000/01/09 03:39:13 oster Exp $ */ +/* $OpenBSD: rf_openbsdkintf.c,v 1.10 2000/08/08 16:07:43 peter Exp $ */ +/* $NetBSD: rf_netbsdkintf.c,v 1.93 2000/07/14 15:26:29 oster Exp $ */ /*- * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. * All rights reserved. @@ -130,6 +130,7 @@ #include <sys/lock.h> #include <sys/buf.h> #include <sys/user.h> +#include <sys/reboot.h> #include "raid.h" #include "rf_raid.h" @@ -148,6 +149,7 @@ #include "rf_parityscan.h" #include "rf_debugprint.h" #include "rf_threadstuff.h" +#include "rf_configure.h" int rf_kdebug_level = 0; @@ -172,7 +174,7 @@ void rf_KernelWakeupFunc __P((struct buf *)); void rf_InitBP __P((struct buf *, struct vnode *, unsigned, dev_t, RF_SectorNum_t, RF_SectorCount_t, caddr_t, void (*)(struct buf *), void *, int, struct proc *)); -static int raidinit __P((dev_t, RF_Raid_t *, int)); +static void raidinit __P((RF_Raid_t *)); void raidattach __P((int)); int raidsize __P((dev_t)); @@ -199,16 +201,15 @@ struct raidbuf { /* * XXX Not sure if the following should be replacing the raidPtrs above, - * or if it should be used in conjunction with that... - */ + or if it should be used in conjunction with that... +*/ struct raid_softc { int sc_flags; /* flags */ int sc_cflags; /* configuration flags */ size_t sc_size; /* size of the raid device */ - dev_t sc_dev; /* our device..*/ char sc_xname[20]; /* XXX external name */ struct disk sc_dkdev; /* generic disk device info */ - struct buf buf_queue; /* used for the device queue */ + struct buf sc_q; /* used for the device queue */ }; /* sc_flags */ @@ -219,7 +220,7 @@ struct raid_softc { #define RAIDF_LOCKED 0x80 /* unit is locked */ #define raidunit(x) DISKUNIT(x) -static int numraid = 0; +int numraid = 0; /* * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. @@ -257,6 +258,9 @@ int raidlock __P((struct raid_softc *)); void raidunlock __P((struct raid_softc *)); void rf_markalldirty __P((RF_Raid_t *)); +void rf_mountroot_hook __P((struct device *)); + +struct device *raidrootdev; void rf_ReconThread __P((struct rf_recon_req *)); /* XXX what I want is: */ @@ -264,6 +268,30 @@ void rf_ReconThread __P((struct rf_recon_req *)); void rf_RewriteParityThread __P((RF_Raid_t *raidPtr)); void rf_CopybackThread __P((RF_Raid_t *raidPtr)); void rf_ReconstructInPlaceThread __P((struct rf_recon_req *)); +#ifdef RAID_AUTOCONFIG +void rf_buildroothack __P((void *)); +static int rf_reasonable_label __P((RF_ComponentLabel_t *)); +#endif + +RF_AutoConfig_t *rf_find_raid_components __P((void)); +RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *)); +static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *)); +void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *, + RF_Raid_t *)); +int rf_set_autoconfig __P((RF_Raid_t *, int)); +int rf_set_rootpartition __P((RF_Raid_t *, int)); +void rf_release_all_vps __P((RF_ConfigSet_t *)); +void rf_cleanup_config_set __P((RF_ConfigSet_t *)); +int rf_have_enough_components __P((RF_ConfigSet_t *)); +int rf_auto_config_set __P((RF_ConfigSet_t *, int *)); + +#ifdef RAID_AUTOCONFIG +static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not + allow autoconfig to take place. + Note that this is overridden by having + RAID_AUTOCONFIG as an option in the + kernel config file. */ +#endif void raidattach(num) @@ -271,6 +299,10 @@ raidattach(num) { int raidID; int i, rc; +#ifdef RAID_AUTOCONFIG + RF_AutoConfig_t *ac_list; /* autoconfig list */ + RF_ConfigSet_t *config_sets; +#endif db1_printf(("raidattach: Asked for %d units\n", num)); @@ -283,6 +315,8 @@ raidattach(num) /* This is where all the initialization stuff gets done. */ + numraid = num; + /* Make some space for requested number of units... */ RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **)); if (raidPtrs == NULL) { @@ -296,7 +330,7 @@ raidattach(num) rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; - for (i = 0; i < numraid; i++) + for (i = 0; i < num; i++) raidPtrs[i] = NULL; rc = rf_BootRaidframe(); if (rc == 0) @@ -310,18 +344,33 @@ raidattach(num) */ raid_softc = (struct raid_softc *) - malloc(num * sizeof (struct raid_softc), M_RAIDFRAME, M_NOWAIT); + malloc(num * sizeof(struct raid_softc), + M_RAIDFRAME, M_NOWAIT); if (raid_softc == NULL) { printf("WARNING: no memory for RAIDframe driver\n"); return; } - numraid = num; + bzero(raid_softc, num * sizeof (struct raid_softc)); + raidrootdev = (struct device *)malloc(num * sizeof(struct device), + M_RAIDFRAME, M_NOWAIT); + if (raidrootdev == NULL) { + panic("No memory for RAIDframe driver!!?!?!\n"); + } + for (raidID = 0; raidID < num; raidID++) { - raid_softc[raidID].buf_queue.b_actf = NULL; - raid_softc[raidID].buf_queue.b_actb = - &raid_softc[raidID].buf_queue.b_actf; +#if 0 + SIMPLEQ_INIT(&raid_softc[raidID].sc_q); +#endif + + raidrootdev[raidID].dv_class = DV_DISK; + raidrootdev[raidID].dv_cfdata = NULL; + raidrootdev[raidID].dv_unit = raidID; + raidrootdev[raidID].dv_parent = NULL; + raidrootdev[raidID].dv_flags = 0; + sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID); + RF_Calloc(raidPtrs[raidID], 1, sizeof (RF_Raid_t), (RF_Raid_t *)); if (raidPtrs[raidID] == NULL) { @@ -330,7 +379,93 @@ raidattach(num) return; } } + +#if RAID_AUTOCONFIG + raidautoconfig = 1; + +if (raidautoconfig) { + /* 1. locate all RAID components on the system */ + +#if DEBUG + printf("Searching for raid components...\n"); +#endif + ac_list = rf_find_raid_components(); + + /* 2. sort them into their respective sets */ + + config_sets = rf_create_auto_sets(ac_list); + + /* 3. evaluate each set and configure the valid ones + This gets done in rf_buildroothack() */ + + /* schedule the creation of the thread to do the + "/ on RAID" stuff */ + + kthread_create(rf_buildroothack, config_sets, NULL, "raidauto"); + +#if 0 + mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]); +#endif +} +#endif + +} + +#ifdef RAID_AUTOCONFIG +void +rf_buildroothack(arg) + void *arg; +{ + RF_ConfigSet_t *config_sets = arg; + RF_ConfigSet_t *cset; + RF_ConfigSet_t *next_cset; + int retcode; + int raidID; + int rootID; + int num_root; + + num_root = 0; + cset = config_sets; + while(cset != NULL ) { + next_cset = cset->next; + if (rf_have_enough_components(cset) && + cset->ac->clabel->autoconfigure==1) { + retcode = rf_auto_config_set(cset,&raidID); + if (!retcode) { + if (cset->rootable) { + rootID = raidID; + num_root++; + } + } else { + /* The autoconfig didn't work :( */ +#if DEBUG + printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID); +#endif + rf_release_all_vps(cset); + } + } else { + /* we're not autoconfiguring this set... + release the associated resources */ + rf_release_all_vps(cset); + } + /* cleanup */ + rf_cleanup_config_set(cset); + cset = next_cset; + } + if (boothowto & RB_ASKNAME) { + /* We don't auto-config... */ + } else { + /* They didn't ask, and we found something bootable... */ + + if (num_root == 1) { + booted_device = &raidrootdev[rootID]; + } else if (num_root > 1) { + /* we can't guess.. require the user to answer... */ + boothowto |= RB_ASKNAME; + } + } } +#endif int raidsize(dev) @@ -496,7 +631,11 @@ raidclose(dev, flags, fmt, p) Device shutdown has taken care of setting the clean bits if RAIDF_INITED is not set mark things as clean... */ - rf_update_component_labels( raidPtrs[unit] ); +#if 0 + printf("Last one on raid%d. Updating status.\n",unit); +#endif + rf_update_component_labels(raidPtrs[unit], + RF_FINAL_COMPONENT_UPDATE); } raidunlock(rs); @@ -513,7 +652,6 @@ raidstrategy(bp) RF_Raid_t *raidPtr; struct raid_softc *rs = &raid_softc[raidID]; struct disklabel *lp; - struct buf *dp; int wlabel; if ((rs->sc_flags & RAIDF_INITED) ==0) { @@ -563,14 +701,10 @@ raidstrategy(bp) bp->b_resid = 0; - /* stuff it onto our queue */ + bp->b_actf = rs->sc_q.b_actf; + rs->sc_q.b_actf = bp; + rs->sc_q.b_active++; - dp = &rs->buf_queue; - bp->b_actf = NULL; - bp->b_actb = dp->b_actb; - *dp->b_actb = bp; - dp->b_actb = &bp->b_actf; - raidstart(raidPtrs[raidID]); splx(s); @@ -634,6 +768,7 @@ raidioctl(dev, cmd, data, flag, p) struct raid_softc *rs; RF_Config_t *k_cfg, *u_cfg; RF_Raid_t *raidPtr; + RF_RaidDisk_t *diskPtr; RF_AccTotals_t *totals; RF_DeviceConfig_t *d_cfg, **ucfgp; u_char *specific_buf; @@ -641,12 +776,13 @@ raidioctl(dev, cmd, data, flag, p) int row; int column; struct rf_recon_req *rrcopy, *rr; - RF_ComponentLabel_t *component_label; + RF_ComponentLabel_t *clabel; RF_ComponentLabel_t ci_label; - RF_ComponentLabel_t **c_label_ptr; + RF_ComponentLabel_t **clabel_ptr; RF_SingleComponent_t *sparePtr,*componentPtr; RF_SingleComponent_t hot_spare; RF_SingleComponent_t component; + RF_ProgressInfo_t progressInfo, **progressInfoPtr; int i, j, d; if (unit >= numraid) @@ -673,6 +809,7 @@ raidioctl(dev, cmd, data, flag, p) case DIOCWDINFO: case DIOCGPART: case DIOCWLABEL: + case DIOCGPDINFO: case RAIDFRAME_SHUTDOWN: case RAIDFRAME_REWRITEPARITY: case RAIDFRAME_GET_INFO: @@ -683,6 +820,7 @@ raidioctl(dev, cmd, data, flag, p) case RAIDFRAME_FAIL_DISK: case RAIDFRAME_COPYBACK: case RAIDFRAME_CHECK_RECON_STATUS: + case RAIDFRAME_CHECK_RECON_STATUS_EXT: case RAIDFRAME_GET_COMPONENT_LABEL: case RAIDFRAME_SET_COMPONENT_LABEL: case RAIDFRAME_ADD_HOT_SPARE: @@ -691,7 +829,13 @@ raidioctl(dev, cmd, data, flag, p) case RAIDFRAME_REBUILD_IN_PLACE: case RAIDFRAME_CHECK_PARITY: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: + case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: case RAIDFRAME_CHECK_COPYBACK_STATUS: + case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: + case RAIDFRAME_SET_AUTOCONFIG: + case RAIDFRAME_SET_ROOT: + case RAIDFRAME_DELETE_COMPONENT: + case RAIDFRAME_INCORPORATE_HOT_SPARE: if ((rs->sc_flags & RAIDF_INITED) == 0) return (ENXIO); } @@ -699,6 +843,13 @@ raidioctl(dev, cmd, data, flag, p) switch (cmd) { /* Configure the system */ case RAIDFRAME_CONFIGURE: + + if (raidPtr->valid) { + /* There is a valid RAID set running on this unit! */ + printf("raid%d: Device already configured!\n",unit); + return(EINVAL); + } + /* * Copy-in the configuration information * data points to a pointer to the configuration structure. @@ -749,25 +900,26 @@ raidioctl(dev, cmd, data, flag, p) * Store the sum of all the bytes in the last byte? */ + /* + * Clear the entire RAID descriptor, just to make sure + * there is no stale data left in the case of a + * reconfiguration + */ + bzero((char *) raidPtr, sizeof(RF_Raid_t)); + /* configure the system */ raidPtr->raidid = unit; - retcode = rf_Configure(raidPtr, k_cfg); + retcode = rf_Configure(raidPtr, k_cfg, NULL); if (retcode == 0) { /* allow this many simultaneous IO's to this RAID device */ raidPtr->openings = RAIDOUTSTANDING; - - /* XXX should be moved to rf_Configure() */ - - raidPtr->copyback_in_progress = 0; - raidPtr->parity_rewrite_in_progress = 0; - raidPtr->recon_in_progress = 0; - - retcode = raidinit(dev, raidPtr, unit); - rf_markalldirty( raidPtr ); + + raidinit(raidPtr); + rf_markalldirty(raidPtr); } /* Free the buffers. No return code here. */ @@ -811,29 +963,29 @@ raidioctl(dev, cmd, data, flag, p) return (retcode); case RAIDFRAME_GET_COMPONENT_LABEL: - c_label_ptr = (RF_ComponentLabel_t **) data; + clabel_ptr = (RF_ComponentLabel_t **) data; /* need to read the component label for the disk indicated - by row,column in component_label */ + by row,column in clabel */ /* For practice, let's get it directly fromdisk, rather than from the in-core copy */ - RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ), + RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ), (RF_ComponentLabel_t *)); - if (component_label == NULL) + if (clabel == NULL) return (ENOMEM); - bzero((char *) component_label, sizeof(RF_ComponentLabel_t)); + bzero((char *) clabel, sizeof(RF_ComponentLabel_t)); - retcode = copyin( *c_label_ptr, component_label, + retcode = copyin( *clabel_ptr, clabel, sizeof(RF_ComponentLabel_t)); if (retcode) { - RF_Free( component_label, sizeof(RF_ComponentLabel_t)); + RF_Free( clabel, sizeof(RF_ComponentLabel_t)); return(retcode); } - row = component_label->row; - column = component_label->column; + row = clabel->row; + column = clabel->column; if ((row < 0) || (row >= raidPtr->numRow) || (column < 0) || (column >= raidPtr->numCol)) { @@ -843,16 +995,16 @@ raidioctl(dev, cmd, data, flag, p) raidread_component_label( raidPtr->Disks[row][column].dev, raidPtr->raid_cinfo[row][column].ci_vp, - component_label ); + clabel ); - retcode = copyout((caddr_t) component_label, - (caddr_t) *c_label_ptr, + retcode = copyout((caddr_t) clabel, + (caddr_t) *clabel_ptr, sizeof(RF_ComponentLabel_t)); - RF_Free( component_label, sizeof(RF_ComponentLabel_t)); + RF_Free( clabel, sizeof(RF_ComponentLabel_t)); return (retcode); case RAIDFRAME_SET_COMPONENT_LABEL: - component_label = (RF_ComponentLabel_t *) data; + clabel = (RF_ComponentLabel_t *) data; /* XXX check the label for valid stuff... */ /* Note that some things *should not* get modified -- @@ -861,22 +1013,22 @@ raidioctl(dev, cmd, data, flag, p) */ printf("Got component label:\n"); - printf("Version: %d\n",component_label->version); - printf("Serial Number: %d\n",component_label->serial_number); - printf("Mod counter: %d\n",component_label->mod_counter); - printf("Row: %d\n", component_label->row); - printf("Column: %d\n", component_label->column); - printf("Num Rows: %d\n", component_label->num_rows); - printf("Num Columns: %d\n", component_label->num_columns); - printf("Clean: %d\n", component_label->clean); - printf("Status: %d\n", component_label->status); - - row = component_label->row; - column = component_label->column; + printf("Version: %d\n",clabel->version); + printf("Serial Number: %d\n",clabel->serial_number); + printf("Mod counter: %d\n",clabel->mod_counter); + printf("Row: %d\n", clabel->row); + printf("Column: %d\n", clabel->column); + printf("Num Rows: %d\n", clabel->num_rows); + printf("Num Columns: %d\n", clabel->num_columns); + printf("Clean: %d\n", clabel->clean); + printf("Status: %d\n", clabel->status); + + row = clabel->row; + column = clabel->column; if ((row < 0) || (row >= raidPtr->numRow) || (column < 0) || (column >= raidPtr->numCol)) { - RF_Free( component_label, sizeof(RF_ComponentLabel_t)); + RF_Free( clabel, sizeof(RF_ComponentLabel_t)); return(EINVAL); } @@ -885,12 +1037,12 @@ raidioctl(dev, cmd, data, flag, p) raidwrite_component_label( raidPtr->Disks[row][column].dev, raidPtr->raid_cinfo[row][column].ci_vp, - component_label ); + clabel ); #endif return (0); case RAIDFRAME_INIT_LABELS: - component_label = (RF_ComponentLabel_t *) data; + clabel = (RF_ComponentLabel_t *) data; /* we only want the serial number from the above. We get all the rest of the information @@ -898,19 +1050,16 @@ raidioctl(dev, cmd, data, flag, p) set. */ - raidPtr->serial_number = component_label->serial_number; - /* current version number */ - ci_label.version = RF_COMPONENT_LABEL_VERSION; - ci_label.serial_number = component_label->serial_number; - ci_label.mod_counter = raidPtr->mod_counter; - ci_label.num_rows = raidPtr->numRow; - ci_label.num_columns = raidPtr->numCol; - ci_label.clean = RF_RAID_DIRTY; /* not clean */ - ci_label.status = rf_ds_optimal; /* "It's good!" */ + raidPtr->serial_number = clabel->serial_number; + + raid_init_component_label(raidPtr, &ci_label); + ci_label.serial_number = clabel->serial_number; for(row=0;row<raidPtr->numRow;row++) { ci_label.row = row; for(column=0;column<raidPtr->numCol;column++) { + diskPtr = &raidPtr->Disks[row][column]; + ci_label.partitionSize = diskPtr->partitionSize; ci_label.column = column; raidwrite_component_label( raidPtr->Disks[row][column].dev, @@ -940,18 +1089,42 @@ raidioctl(dev, cmd, data, flag, p) raidPtr,"raid_parity"); return (retcode); + case RAIDFRAME_SET_AUTOCONFIG: + d = rf_set_autoconfig(raidPtr, *(int *) data); + printf("New autoconfig value is: %d\n", d); + *(int *) data = d; + return (retcode); + + case RAIDFRAME_SET_ROOT: + d = rf_set_rootpartition(raidPtr, *(int *) data); + printf("New rootpartition value is: %d\n", d); + *(int *) data = d; + return (retcode); case RAIDFRAME_ADD_HOT_SPARE: sparePtr = (RF_SingleComponent_t *) data; memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t)); - printf("Adding spare\n"); retcode = rf_add_hot_spare(raidPtr, &hot_spare); return(retcode); case RAIDFRAME_REMOVE_HOT_SPARE: return(retcode); + case RAIDFRAME_DELETE_COMPONENT: + componentPtr = (RF_SingleComponent_t *)data; + memcpy( &component, componentPtr, + sizeof(RF_SingleComponent_t)); + retcode = rf_delete_component(raidPtr, &component); + return(retcode); + + case RAIDFRAME_INCORPORATE_HOT_SPARE: + componentPtr = (RF_SingleComponent_t *)data; + memcpy( &component, componentPtr, + sizeof(RF_SingleComponent_t)); + retcode = rf_incorporate_hot_spare(raidPtr, &component); + return(retcode); + case RAIDFRAME_REBUILD_IN_PLACE: if (raidPtr->Layout.map->faultsTolerated == 0) { @@ -1096,8 +1269,10 @@ raidioctl(dev, cmd, data, flag, p) /* Return the percentage completion of reconstruction */ case RAIDFRAME_CHECK_RECON_STATUS: if (raidPtr->Layout.map->faultsTolerated == 0) { - /* This makes no sense on a RAID 0 */ - return(EINVAL); + /* This makes no sense on a RAID 0, so tell the + user it's done. */ + *(int *) data = 100; + return(0); } row = 0; /* XXX we only consider a single row... */ if (raidPtr->status[row] != rf_rs_reconstructing) @@ -1106,22 +1281,66 @@ raidioctl(dev, cmd, data, flag, p) *(int *)data = raidPtr->reconControl[row]->percentComplete; return (0); + + case RAIDFRAME_CHECK_RECON_STATUS_EXT: + progressInfoPtr = (RF_ProgressInfo_t **) data; + row = 0; /* XXX we only consider a single row... */ + if (raidPtr->status[row] != rf_rs_reconstructing) { + progressInfo.remaining = 0; + progressInfo.completed = 100; + progressInfo.total = 100; + } else { + progressInfo.total = + raidPtr->reconControl[row]->numRUsTotal; + progressInfo.completed = + raidPtr->reconControl[row]->numRUsComplete; + progressInfo.remaining = progressInfo.total - + progressInfo.completed; + } + retcode = copyout((caddr_t) &progressInfo, + (caddr_t) *progressInfoPtr, + sizeof(RF_ProgressInfo_t)); + return (retcode); + case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: if (raidPtr->Layout.map->faultsTolerated == 0) { - /* This makes no sense on a RAID 0 */ - return(EINVAL); + /* This makes no sense on a RAID 0, so tell the + user it's done. */ + *(int *) data = 100; + return(0); } if (raidPtr->parity_rewrite_in_progress == 1) { - *(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe; + *(int *) data = 100 * + raidPtr->parity_rewrite_stripes_done / + raidPtr->Layout.numStripe; } else { *(int *) data = 100; } return (0); + case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: + progressInfoPtr = (RF_ProgressInfo_t **) data; + if (raidPtr->parity_rewrite_in_progress == 1) { + progressInfo.total = raidPtr->Layout.numStripe; + progressInfo.completed = + raidPtr->parity_rewrite_stripes_done; + progressInfo.remaining = progressInfo.total - + progressInfo.completed; + } else { + progressInfo.remaining = 0; + progressInfo.completed = 100; + progressInfo.total = 100; + } + retcode = copyout((caddr_t) &progressInfo, + (caddr_t) *progressInfoPtr, + sizeof(RF_ProgressInfo_t)); + return (retcode); + case RAIDFRAME_CHECK_COPYBACK_STATUS: if (raidPtr->Layout.map->faultsTolerated == 0) { /* This makes no sense on a RAID 0 */ - return(EINVAL); + *(int *) data = 100; + return(0); } if (raidPtr->copyback_in_progress == 1) { *(int *) data = 100 * raidPtr->copyback_stripes_done / raidPtr->Layout.numStripe; @@ -1130,6 +1349,24 @@ raidioctl(dev, cmd, data, flag, p) } return (0); + case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: + progressInfoPtr = (RF_ProgressInfo_t **) data; + if (raidPtr->copyback_in_progress == 1) { + progressInfo.total = raidPtr->Layout.numStripe; + progressInfo.completed = + raidPtr->copyback_stripes_done; + progressInfo.remaining = progressInfo.total - + progressInfo.completed; + } else { + progressInfo.remaining = 0; + progressInfo.completed = 100; + progressInfo.total = 100; + } + retcode = copyout((caddr_t) &progressInfo, + (caddr_t) *progressInfoPtr, + sizeof(RF_ProgressInfo_t)); + return (retcode); + #if 0 case RAIDFRAME_SPARET_WAIT: /* @@ -1250,10 +1487,11 @@ raidioctl(dev, cmd, data, flag, p) #if 0 case DIOCGDEFLABEL: +#endif + case DIOCGPDINFO: raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data); break; -#endif default: @@ -1266,16 +1504,14 @@ raidioctl(dev, cmd, data, flag, p) * raidinit -- complete the rest of the initialization for the * RAIDframe device. */ -int -raidinit(dev, raidPtr, unit) - dev_t dev; +static void +raidinit(raidPtr) RF_Raid_t *raidPtr; - int unit; { - int retcode; struct raid_softc *rs; + int unit; - retcode = 0; + unit = raidPtr->raidid; rs = &raid_softc[unit]; @@ -1299,8 +1535,6 @@ raidinit(dev, raidPtr, unit) * protectedSectors, as used in RAIDframe. */ rs->sc_size = raidPtr->totalSectors; - rs->sc_dev = dev; - return (retcode); } /* @@ -1361,34 +1595,31 @@ raidstart(raidPtr) struct raid_softc *rs; int do_async; struct buf *bp; - struct buf *dp; unit = raidPtr->raidid; rs = &raid_softc[unit]; - + /* quick check to see if anything has died recently */ + RF_LOCK_MUTEX(raidPtr->mutex); + if (raidPtr->numNewFailures > 0) { + rf_update_component_labels(raidPtr, + RF_NORMAL_COMPONENT_UPDATE); + raidPtr->numNewFailures--; + } + RF_UNLOCK_MUTEX(raidPtr->mutex); + /* Check to see if we're at the limit... */ RF_LOCK_MUTEX(raidPtr->mutex); while (raidPtr->openings > 0) { RF_UNLOCK_MUTEX(raidPtr->mutex); - /* get the next item, if any, from the queue */ - dp = &rs->buf_queue; - bp = dp->b_actf; + bp = rs->sc_q.b_actf; if (bp == NULL) { /* nothing more to do */ return; } - - /* update structures */ - dp = bp->b_actf; - if (dp != NULL) { - dp->b_actb = bp->b_actb; - } else { - rs->buf_queue.b_actb = bp->b_actb; - } - *bp->b_actb = dp; - + rs->sc_q.b_actf = bp->b_actf; + /* Ok, for the bp we have here, bp->b_blkno is relative to the * partition.. Need to make it absolute to the underlying * device.. */ @@ -1457,14 +1688,12 @@ raidstart(raidPtr) /* XXX we're still at splbio() here... do we *really* need to be? */ - retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, do_async, raid_addr, num_blocks, - bp->b_un.b_addr, bp, NULL, NULL, + bp->b_data, bp, NULL, NULL, RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL); - RF_LOCK_MUTEX(raidPtr->mutex); } RF_UNLOCK_MUTEX(raidPtr->mutex); @@ -1671,7 +1900,7 @@ rf_KernelWakeupFunc(vbp) rf_ds_failed; queue->raidPtr->status[queue->row] = rf_rs_degraded; queue->raidPtr->numFailures++; - /* XXX here we should bump the version number for each component, and write that data out */ + queue->raidPtr->numNewFailures++; } else { /* Disk is already dead... */ /* printf("Disk already marked as dead!\n"); */ @@ -1716,7 +1945,7 @@ rf_InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg, bp->b_bufsize = bp->b_bcount; bp->b_error = 0; bp->b_dev = dev; - bp->b_un.b_addr = buf; + bp->b_data = buf; bp->b_blkno = startSect; bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ if (bp->b_bcount == 0) { @@ -1761,7 +1990,6 @@ raidgetdefaultlabel(raidPtr, rs, lp) lp->d_magic = DISKMAGIC; lp->d_magic2 = DISKMAGIC; lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label); - } /* @@ -1778,6 +2006,8 @@ raidgetdisklabel(dev) struct disklabel *lp = rs->sc_dkdev.dk_label; struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel; RF_Raid_t *raidPtr; + int i; + struct partition *pp; db1_printf(("Getting the disklabel...\n")); @@ -1790,34 +2020,33 @@ raidgetdisklabel(dev) /* * Call the generic disklabel extraction routine. */ - errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy, - rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel, 0); - if (errstring) - raidmakedisklabel(rs); - else { - int i; - struct partition *pp; + errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy, lp, + rs->sc_dkdev.dk_cpulabel, 0); + if (errstring) { + printf("%s: %s\n", rs->sc_xname, errstring); + return; + /*raidmakedisklabel(rs); */ + } - /* - * Sanity check whether the found disklabel is valid. - * - * This is necessary since total size of the raid device - * may vary when an interleave is changed even though exactly - * same componets are used, and old disklabel may used - * if that is found. - */ - if (lp->d_secperunit != rs->sc_size) - printf("WARNING: %s: " - "total sector size in disklabel (%d) != " - "the size of raid (%ld)\n", rs->sc_xname, - lp->d_secperunit, (long) rs->sc_size); - for (i = 0; i < lp->d_npartitions; i++) { - pp = &lp->d_partitions[i]; - if (pp->p_offset + pp->p_size > rs->sc_size) - printf("WARNING: %s: end of partition `%c' " - "exceeds the size of raid (%ld)\n", - rs->sc_xname, 'a' + i, (long) rs->sc_size); - } + /* + * Sanity check whether the found disklabel is valid. + * + * This is necessary since total size of the raid device + * may vary when an interleave is changed even though exactly + * same componets are used, and old disklabel may used + * if that is found. + */ + if (lp->d_secperunit != rs->sc_size) + printf("WARNING: %s: " + "total sector size in disklabel (%d) != " + "the size of raid (%ld)\n", rs->sc_xname, + lp->d_secperunit, (long) rs->sc_size); + for (i = 0; i < lp->d_npartitions; i++) { + pp = &lp->d_partitions[i]; + if (pp->p_offset + pp->p_size > rs->sc_size) + printf("WARNING: %s: end of partition `%c' " + "exceeds the size of raid (%ld)\n", + rs->sc_xname, 'a' + i, (long) rs->sc_size); } } @@ -1933,11 +2162,11 @@ raidunlock(rs) int raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter) { - RF_ComponentLabel_t component_label; - raidread_component_label(dev, b_vp, &component_label); - component_label.mod_counter = mod_counter; - component_label.clean = RF_RAID_CLEAN; - raidwrite_component_label(dev, b_vp, &component_label); + RF_ComponentLabel_t clabel; + raidread_component_label(dev, b_vp, &clabel); + clabel.mod_counter = mod_counter; + clabel.clean = RF_RAID_CLEAN; + raidwrite_component_label(dev, b_vp, &clabel); return(0); } @@ -1945,20 +2174,20 @@ raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter) int raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter) { - RF_ComponentLabel_t component_label; - raidread_component_label(dev, b_vp, &component_label); - component_label.mod_counter = mod_counter; - component_label.clean = RF_RAID_DIRTY; - raidwrite_component_label(dev, b_vp, &component_label); + RF_ComponentLabel_t clabel; + raidread_component_label(dev, b_vp, &clabel); + clabel.mod_counter = mod_counter; + clabel.clean = RF_RAID_DIRTY; + raidwrite_component_label(dev, b_vp, &clabel); return(0); } /* ARGSUSED */ int -raidread_component_label(dev, b_vp, component_label) +raidread_component_label(dev, b_vp, clabel) dev_t dev; struct vnode *b_vp; - RF_ComponentLabel_t *component_label; + RF_ComponentLabel_t *clabel; { struct buf *bp; int error; @@ -1981,22 +2210,15 @@ raidread_component_label(dev, b_vp, component_label) error = biowait(bp); if (!error) { - memcpy(component_label, bp->b_un.b_addr, + memcpy(clabel, bp->b_data, sizeof(RF_ComponentLabel_t)); #if 0 - printf("raidread_component_label: got component label:\n"); - printf("Version: %d\n",component_label->version); - printf("Serial Number: %d\n",component_label->serial_number); - printf("Mod counter: %d\n",component_label->mod_counter); - printf("Row: %d\n", component_label->row); - printf("Column: %d\n", component_label->column); - printf("Num Rows: %d\n", component_label->num_rows); - printf("Num Columns: %d\n", component_label->num_columns); - printf("Clean: %d\n", component_label->clean); - printf("Status: %d\n", component_label->status); + rf_print_component_label( clabel ); #endif } else { +#if 0 printf("Failed to read RAID component label!\n"); +#endif } bp->b_flags = B_INVAL | B_AGE; @@ -2005,10 +2227,10 @@ raidread_component_label(dev, b_vp, component_label) } /* ARGSUSED */ int -raidwrite_component_label(dev, b_vp, component_label) +raidwrite_component_label(dev, b_vp, clabel) dev_t dev; struct vnode *b_vp; - RF_ComponentLabel_t *component_label; + RF_ComponentLabel_t *clabel; { struct buf *bp; int error; @@ -2023,26 +2245,28 @@ raidwrite_component_label(dev, b_vp, component_label) bp->b_flags = B_BUSY | B_WRITE; bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; - memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE ); + memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE ); - memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t)); + memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t)); (*bdevsw[major(bp->b_dev)].d_strategy)(bp); error = biowait(bp); bp->b_flags = B_INVAL | B_AGE; brelse(bp); if (error) { +#if 1 printf("Failed to write RAID component info!\n"); +#endif } return(error); } void -rf_markalldirty( raidPtr ) +rf_markalldirty(raidPtr) RF_Raid_t *raidPtr; { - RF_ComponentLabel_t c_label; + RF_ComponentLabel_t clabel; int r,c; raidPtr->mod_counter++; @@ -2052,19 +2276,19 @@ rf_markalldirty( raidPtr ) raidread_component_label( raidPtr->Disks[r][c].dev, raidPtr->raid_cinfo[r][c].ci_vp, - &c_label); - if (c_label.status == rf_ds_spared) { + &clabel); + if (clabel.status == rf_ds_spared) { /* XXX do something special... but whatever you do, don't try to access it!! */ } else { #if 0 - c_label.status = + clabel.status = raidPtr->Disks[r][c].status; raidwrite_component_label( raidPtr->Disks[r][c].dev, raidPtr->raid_cinfo[r][c].ci_vp, - &c_label); + &clabel); #endif raidmarkdirty( raidPtr->Disks[r][c].dev, @@ -2110,21 +2334,21 @@ rf_markalldirty( raidPtr ) raidread_component_label( raidPtr->Disks[r][sparecol].dev, raidPtr->raid_cinfo[r][sparecol].ci_vp, - &c_label); + &clabel); /* make sure status is noted */ - c_label.version = RF_COMPONENT_LABEL_VERSION; - c_label.mod_counter = raidPtr->mod_counter; - c_label.serial_number = raidPtr->serial_number; - c_label.row = srow; - c_label.column = scol; - c_label.num_rows = raidPtr->numRow; - c_label.num_columns = raidPtr->numCol; - c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/ - c_label.status = rf_ds_optimal; + clabel.version = RF_COMPONENT_LABEL_VERSION; + clabel.mod_counter = raidPtr->mod_counter; + clabel.serial_number = raidPtr->serial_number; + clabel.row = srow; + clabel.column = scol; + clabel.num_rows = raidPtr->numRow; + clabel.num_columns = raidPtr->numCol; + clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/ + clabel.status = rf_ds_optimal; raidwrite_component_label( raidPtr->Disks[r][sparecol].dev, raidPtr->raid_cinfo[r][sparecol].ci_vp, - &c_label); + &clabel); raidmarkclean( raidPtr->Disks[r][sparecol].dev, raidPtr->raid_cinfo[r][sparecol].ci_vp); } @@ -2135,10 +2359,11 @@ rf_markalldirty( raidPtr ) void -rf_update_component_labels( raidPtr ) +rf_update_component_labels(raidPtr, final) RF_Raid_t *raidPtr; + int final; { - RF_ComponentLabel_t c_label; + RF_ComponentLabel_t clabel; int sparecol; int r,c; int i,j; @@ -2158,43 +2383,26 @@ rf_update_component_labels( raidPtr ) raidread_component_label( raidPtr->Disks[r][c].dev, raidPtr->raid_cinfo[r][c].ci_vp, - &c_label); + &clabel); /* make sure status is noted */ - c_label.status = rf_ds_optimal; + clabel.status = rf_ds_optimal; + /* bump the counter */ + clabel.mod_counter = raidPtr->mod_counter; + raidwrite_component_label( raidPtr->Disks[r][c].dev, raidPtr->raid_cinfo[r][c].ci_vp, - &c_label); - if (raidPtr->parity_good == RF_RAID_CLEAN) { - raidmarkclean( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - raidPtr->mod_counter); + &clabel); + if (final == RF_FINAL_COMPONENT_UPDATE) { + if (raidPtr->parity_good == RF_RAID_CLEAN) { + raidmarkclean( + raidPtr->Disks[r][c].dev, + raidPtr->raid_cinfo[r][c].ci_vp, + raidPtr->mod_counter); + } } } /* else we don't touch it.. */ -#if 0 - else if (raidPtr->Disks[r][c].status != - rf_ds_failed) { - raidread_component_label( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - &c_label); - /* make sure status is noted */ - c_label.status = - raidPtr->Disks[r][c].status; - raidwrite_component_label( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - &c_label); - if (raidPtr->parity_good == RF_RAID_CLEAN) { - raidmarkclean( - raidPtr->Disks[r][c].dev, - raidPtr->raid_cinfo[r][c].ci_vp, - raidPtr->mod_counter); - } - } -#endif } } @@ -2223,34 +2431,96 @@ rf_update_component_labels( raidPtr ) } } + /* XXX shouldn't *really* need this... */ raidread_component_label( raidPtr->Disks[0][sparecol].dev, raidPtr->raid_cinfo[0][sparecol].ci_vp, - &c_label); + &clabel); /* make sure status is noted */ - c_label.version = RF_COMPONENT_LABEL_VERSION; - c_label.mod_counter = raidPtr->mod_counter; - c_label.serial_number = raidPtr->serial_number; - c_label.row = srow; - c_label.column = scol; - c_label.num_rows = raidPtr->numRow; - c_label.num_columns = raidPtr->numCol; - c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/ - c_label.status = rf_ds_optimal; + + raid_init_component_label(raidPtr, &clabel); + + clabel.mod_counter = raidPtr->mod_counter; + clabel.row = srow; + clabel.column = scol; + clabel.status = rf_ds_optimal; + raidwrite_component_label( raidPtr->Disks[0][sparecol].dev, raidPtr->raid_cinfo[0][sparecol].ci_vp, - &c_label); - if (raidPtr->parity_good == RF_RAID_CLEAN) { - raidmarkclean( raidPtr->Disks[0][sparecol].dev, - raidPtr->raid_cinfo[0][sparecol].ci_vp, - raidPtr->mod_counter); + &clabel); + if (final == RF_FINAL_COMPONENT_UPDATE) { + if (raidPtr->parity_good == RF_RAID_CLEAN) { + raidmarkclean( raidPtr->Disks[0][sparecol].dev, + raidPtr->raid_cinfo[0][sparecol].ci_vp, + raidPtr->mod_counter); + } } } } /* printf("Component labels updated\n"); */ } +void +rf_close_component(raidPtr, vp, auto_configured) + RF_Raid_t *raidPtr; + struct vnode *vp; + int auto_configured; +{ + struct proc *p; + + p = raidPtr->engine_thread; + + if (vp != NULL) { + if (auto_configured == 1) { + VOP_CLOSE(vp, FREAD, NOCRED, 0); + vput(vp); + + } else { + VOP_UNLOCK(vp, 0, p); + (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); + } + } else { + printf("vnode was NULL\n"); + } +} + + +void +rf_UnconfigureVnodes(raidPtr) + RF_Raid_t *raidPtr; +{ + int r,c; + struct proc *p; + struct vnode *vp; + int acd; + + + /* We take this opportunity to close the vnodes like we should.. */ + + p = raidPtr->engine_thread; + + for (r = 0; r < raidPtr->numRow; r++) { + for (c = 0; c < raidPtr->numCol; c++) { + printf("Closing vnode for row: %d col: %d\n", r, c); + vp = raidPtr->raid_cinfo[r][c].ci_vp; + acd = raidPtr->Disks[r][c].auto_configured; + rf_close_component(raidPtr, vp, acd); + raidPtr->raid_cinfo[r][c].ci_vp = NULL; + raidPtr->Disks[r][c].auto_configured = 0; + } + } + for (r = 0; r < raidPtr->numSpare; r++) { + printf("Closing vnode for spare: %d\n", r); + vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp; + acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured; + rf_close_component(raidPtr, vp, acd); + raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL; + raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0; + } +} + + void rf_ReconThread(req) struct rf_recon_req *req; @@ -2296,6 +2566,11 @@ rf_RewriteParityThread(raidPtr) } raidPtr->parity_rewrite_in_progress = 0; + /* Anyone waiting for us to stop? If so, inform them... */ + if (raidPtr->waitShutdown) { + wakeup(&raidPtr->parity_rewrite_in_progress); + } + /* That's all... */ kthread_exit(0); /* does not return */ } @@ -2337,3 +2612,705 @@ rf_ReconstructInPlaceThread(req) /* That's all... */ kthread_exit(0); /* does not return */ } + +void +rf_mountroot_hook(dev) + struct device *dev; +{ + +} + + +RF_AutoConfig_t * +rf_find_raid_components() +{ +#ifdef RAID_AUTOCONFIG + struct devnametobdevmaj *dtobdm; + struct vnode *vp; + struct disklabel label; + struct device *dv; + char *cd_name; + dev_t dev; + int error; + int i; + int good_one; + RF_ComponentLabel_t *clabel; + RF_AutoConfig_t *ac; +#endif + RF_AutoConfig_t *ac_list; + + + /* initialize the AutoConfig list */ + ac_list = NULL; + +#if RAID_AUTOCONFIG +if (raidautoconfig) { + + /* we begin by trolling through *all* the devices on the system */ + + for (dv = alldevs.tqh_first; dv != NULL; + dv = dv->dv_list.tqe_next) { + + /* we are only interested in disks... */ + if (dv->dv_class != DV_DISK) + continue; + + /* we don't care about floppies... */ + if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) { + continue; + } + + /* need to find the device_name_to_block_device_major stuff */ + cd_name = dv->dv_cfdata->cf_driver->cd_name; + dtobdm = dev_name2blk; + while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) { + dtobdm++; + } + + /* get a vnode for the raw partition of this disk */ + + dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART); + if (bdevvp(dev, &vp)) + panic("RAID can't alloc vnode"); + + error = VOP_OPEN(vp, FREAD, NOCRED, 0); + + if (error) { + /* "Who cares." Continue looking + for something that exists*/ + vput(vp); + continue; + } + + /* Ok, the disk exists. Go get the disklabel. */ + error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label, + FREAD, NOCRED, 0); + if (error) { + /* + * XXX can't happen - open() would + * have errored out (or faked up one) + */ + printf("can't get label for dev %s%c (%d)!?!?\n", + dv->dv_xname, 'a' + RAW_PART, error); + } + + /* don't need this any more. We'll allocate it again + a little later if we really do... */ + VOP_CLOSE(vp, FREAD, NOCRED, 0); + vput(vp); + + for (i=0; i < label.d_npartitions; i++) { + /* We only support partitions marked as RAID */ + if (label.d_partitions[i].p_fstype != FS_RAID) + continue; + + dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i); + if (bdevvp(dev, &vp)) + panic("RAID can't alloc vnode"); + + error = VOP_OPEN(vp, FREAD, NOCRED, 0); + if (error) { + /* Whatever... */ + vput(vp); + continue; + } + + good_one = 0; + + clabel = (RF_ComponentLabel_t *) + malloc(sizeof(RF_ComponentLabel_t), + M_RAIDFRAME, M_NOWAIT); + if (clabel == NULL) { + /* XXX CLEANUP HERE */ + printf("RAID auto config: out of memory!\n"); + return(NULL); /* XXX probably should panic? */ + } + + if (!raidread_component_label(dev, vp, clabel)) { + /* Got the label. Does it look reasonable? */ + if (rf_reasonable_label(clabel) && + (clabel->partitionSize <= + label.d_partitions[i].p_size)) { +#if DEBUG + printf("Component on: %s%c: %d\n", + dv->dv_xname, 'a'+i, + label.d_partitions[i].p_size); + rf_print_component_label(clabel); +#endif + /* if it's reasonable, add it, + else ignore it. */ + ac = (RF_AutoConfig_t *) + malloc(sizeof(RF_AutoConfig_t), + M_RAIDFRAME, + M_NOWAIT); + if (ac == NULL) { + /* XXX should panic?? */ + return(NULL); + } + + sprintf(ac->devname, "%s%c", + dv->dv_xname, 'a'+i); + ac->dev = dev; + ac->vp = vp; + ac->clabel = clabel; + ac->next = ac_list; + ac_list = ac; + good_one = 1; + } + } + if (!good_one) { + /* cleanup */ + free(clabel, M_RAIDFRAME); + VOP_CLOSE(vp, FREAD, NOCRED, 0); + vput(vp); + } + } + } +} +#endif +return(ac_list); +} + +#ifdef RAID_AUTOCONFIG +static int +rf_reasonable_label(clabel) + RF_ComponentLabel_t *clabel; +{ + + if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || + (clabel->version==RF_COMPONENT_LABEL_VERSION)) && + ((clabel->clean == RF_RAID_CLEAN) || + (clabel->clean == RF_RAID_DIRTY)) && + clabel->row >=0 && + clabel->column >= 0 && + clabel->num_rows > 0 && + clabel->num_columns > 0 && + clabel->row < clabel->num_rows && + clabel->column < clabel->num_columns && + clabel->blockSize > 0 && + clabel->numBlocks > 0) { + /* label looks reasonable enough... */ + return(1); + } + return(0); +} +#endif + +void +rf_print_component_label(clabel) + RF_ComponentLabel_t *clabel; +{ + printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", + clabel->row, clabel->column, + clabel->num_rows, clabel->num_columns); + printf(" Version: %d Serial Number: %d Mod Counter: %d\n", + clabel->version, clabel->serial_number, + clabel->mod_counter); + printf(" Clean: %s Status: %d\n", + clabel->clean ? "Yes" : "No", clabel->status ); + printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", + clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); + printf(" RAID Level: %c blocksize: %d numBlocks: %d\n", + (char) clabel->parityConfig, clabel->blockSize, + clabel->numBlocks); + printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" ); + printf(" Contains root partition: %s\n", + clabel->root_partition ? "Yes" : "No" ); + printf(" Last configured as: raid%d\n", clabel->last_unit ); +#if 0 + printf(" Config order: %d\n", clabel->config_order); +#endif + +} + +RF_ConfigSet_t * +rf_create_auto_sets(ac_list) + RF_AutoConfig_t *ac_list; +{ + RF_AutoConfig_t *ac; + RF_ConfigSet_t *config_sets; + RF_ConfigSet_t *cset; + RF_AutoConfig_t *ac_next; + + + config_sets = NULL; + + /* Go through the AutoConfig list, and figure out which components + belong to what sets. */ + ac = ac_list; + while(ac!=NULL) { + /* we're going to putz with ac->next, so save it here + for use at the end of the loop */ + ac_next = ac->next; + + if (config_sets == NULL) { + /* will need at least this one... */ + config_sets = (RF_ConfigSet_t *) + malloc(sizeof(RF_ConfigSet_t), + M_RAIDFRAME, M_NOWAIT); + if (config_sets == NULL) { + panic("rf_create_auto_sets: No memory!\n"); + } + /* this one is easy :) */ + config_sets->ac = ac; + config_sets->next = NULL; + config_sets->rootable = 0; + ac->next = NULL; + } else { + /* which set does this component fit into? */ + cset = config_sets; + while(cset!=NULL) { + if (rf_does_it_fit(cset, ac)) { + /* looks like it matches... */ + ac->next = cset->ac; + cset->ac = ac; + break; + } + cset = cset->next; + } + if (cset==NULL) { + /* didn't find a match above... new set..*/ + cset = (RF_ConfigSet_t *) + malloc(sizeof(RF_ConfigSet_t), + M_RAIDFRAME, M_NOWAIT); + if (cset == NULL) { + panic("rf_create_auto_sets: No memory!\n"); + } + cset->ac = ac; + ac->next = NULL; + cset->next = config_sets; + cset->rootable = 0; + config_sets = cset; + } + } + ac = ac_next; + } + + + return(config_sets); +} + +static int +rf_does_it_fit(cset, ac) + RF_ConfigSet_t *cset; + RF_AutoConfig_t *ac; +{ + RF_ComponentLabel_t *clabel1, *clabel2; + + /* If this one matches the *first* one in the set, that's good + enough, since the other members of the set would have been + through here too... */ + /* note that we are not checking partitionSize here.. + + Note that we are also not checking the mod_counters here. + If everything else matches execpt the mod_counter, that's + good enough for this test. We will deal with the mod_counters + a little later in the autoconfiguration process. + + (clabel1->mod_counter == clabel2->mod_counter) && + + The reason we don't check for this is that failed disks + will have lower modification counts. If those disks are + not added to the set they used to belong to, then they will + form their own set, which may result in 2 different sets, + for example, competing to be configured at raid0, and + perhaps competing to be the root filesystem set. If the + wrong ones get configured, or both attempt to become /, + weird behaviour and or serious lossage will occur. Thus we + need to bring them into the fold here, and kick them out at + a later point. + + */ + + clabel1 = cset->ac->clabel; + clabel2 = ac->clabel; + if ((clabel1->version == clabel2->version) && + (clabel1->serial_number == clabel2->serial_number) && + (clabel1->num_rows == clabel2->num_rows) && + (clabel1->num_columns == clabel2->num_columns) && + (clabel1->sectPerSU == clabel2->sectPerSU) && + (clabel1->SUsPerPU == clabel2->SUsPerPU) && + (clabel1->SUsPerRU == clabel2->SUsPerRU) && + (clabel1->parityConfig == clabel2->parityConfig) && + (clabel1->maxOutstanding == clabel2->maxOutstanding) && + (clabel1->blockSize == clabel2->blockSize) && + (clabel1->numBlocks == clabel2->numBlocks) && + (clabel1->autoconfigure == clabel2->autoconfigure) && + (clabel1->root_partition == clabel2->root_partition) && + (clabel1->last_unit == clabel2->last_unit) && + (clabel1->config_order == clabel2->config_order)) { + /* if it get's here, it almost *has* to be a match */ + } else { + /* it's not consistent with somebody in the set.. + punt */ + return(0); + } + /* all was fine.. it must fit... */ + return(1); +} + +int +rf_have_enough_components(cset) + RF_ConfigSet_t *cset; +{ + RF_AutoConfig_t *ac; + RF_AutoConfig_t *auto_config; + RF_ComponentLabel_t *clabel; + int r,c; + int num_rows; + int num_cols; + int num_missing; + int mod_counter; + int mod_counter_found; + int even_pair_failed; + char parity_type; + + + /* check to see that we have enough 'live' components + of this set. If so, we can configure it if necessary */ + + num_rows = cset->ac->clabel->num_rows; + num_cols = cset->ac->clabel->num_columns; + parity_type = cset->ac->clabel->parityConfig; + + /* XXX Check for duplicate components!?!?!? */ + + /* Determine what the mod_counter is supposed to be for this set. */ + + mod_counter_found = 0; + ac = cset->ac; + while(ac!=NULL) { + if (mod_counter_found==0) { + mod_counter = ac->clabel->mod_counter; + mod_counter_found = 1; + } else { + if (ac->clabel->mod_counter > mod_counter) { + mod_counter = ac->clabel->mod_counter; + } + } + ac = ac->next; + } + + num_missing = 0; + auto_config = cset->ac; + + for(r=0; r<num_rows; r++) { + even_pair_failed = 0; + for(c=0; c<num_cols; c++) { + ac = auto_config; + while(ac!=NULL) { + if ((ac->clabel->row == r) && + (ac->clabel->column == c) && + (ac->clabel->mod_counter == mod_counter)) { + /* it's this one... */ +#if DEBUG + printf("Found: %s at %d,%d\n", + ac->devname,r,c); +#endif + break; + } + ac=ac->next; + } + if (ac==NULL) { + /* Didn't find one here! */ + /* special case for RAID 1, especially + where there are more than 2 + components (where RAIDframe treats + things a little differently :( ) */ + if (parity_type == '1') { + if (c%2 == 0) { /* even component */ + even_pair_failed = 1; + } else { /* odd component. If + we're failed, and + so is the even + component, it's + "Good Night, Charlie" */ + if (even_pair_failed == 1) { + return(0); + } + } + } else { + /* normal accounting */ + num_missing++; + } + } + if ((parity_type == '1') && (c%2 == 1)) { + /* Just did an even component, and we didn't + bail.. reset the even_pair_failed flag, + and go on to the next component.... */ + even_pair_failed = 0; + } + } + } + + clabel = cset->ac->clabel; + + if (((clabel->parityConfig == '0') && (num_missing > 0)) || + ((clabel->parityConfig == '4') && (num_missing > 1)) || + ((clabel->parityConfig == '5') && (num_missing > 1))) { + /* XXX this needs to be made *much* more general */ + /* Too many failures */ + return(0); + } + /* otherwise, all is well, and we've got enough to take a kick + at autoconfiguring this set */ + return(1); +} + +void +rf_create_configuration(ac,config,raidPtr) + RF_AutoConfig_t *ac; + RF_Config_t *config; + RF_Raid_t *raidPtr; +{ + RF_ComponentLabel_t *clabel; + int i; + + clabel = ac->clabel; + + /* 1. Fill in the common stuff */ + config->numRow = clabel->num_rows; + config->numCol = clabel->num_columns; + config->numSpare = 0; /* XXX should this be set here? */ + config->sectPerSU = clabel->sectPerSU; + config->SUsPerPU = clabel->SUsPerPU; + config->SUsPerRU = clabel->SUsPerRU; + config->parityConfig = clabel->parityConfig; + /* XXX... */ + strcpy(config->diskQueueType,"fifo"); + config->maxOutstandingDiskReqs = clabel->maxOutstanding; + config->layoutSpecificSize = 0; /* XXX ?? */ + + while(ac!=NULL) { + /* row/col values will be in range due to the checks + in reasonable_label() */ + strcpy(config->devnames[ac->clabel->row][ac->clabel->column], + ac->devname); + ac = ac->next; + } + + for(i=0;i<RF_MAXDBGV;i++) { + config->debugVars[i][0] = NULL; + } +} + +int +rf_set_autoconfig(raidPtr, new_value) + RF_Raid_t *raidPtr; + int new_value; +{ + RF_ComponentLabel_t clabel; + struct vnode *vp; + dev_t dev; + int row, column; + + raidPtr->autoconfigure = new_value; + for(row=0; row<raidPtr->numRow; row++) { + for(column=0; column<raidPtr->numCol; column++) { + if (raidPtr->Disks[row][column].status == + rf_ds_optimal) { + dev = raidPtr->Disks[row][column].dev; + vp = raidPtr->raid_cinfo[row][column].ci_vp; + raidread_component_label(dev, vp, &clabel); + clabel.autoconfigure = new_value; + raidwrite_component_label(dev, vp, &clabel); + } + } + } + return(new_value); +} + +int +rf_set_rootpartition(raidPtr, new_value) + RF_Raid_t *raidPtr; + int new_value; +{ + RF_ComponentLabel_t clabel; + struct vnode *vp; + dev_t dev; + int row, column; + + raidPtr->root_partition = new_value; + for(row=0; row<raidPtr->numRow; row++) { + for(column=0; column<raidPtr->numCol; column++) { + if (raidPtr->Disks[row][column].status == + rf_ds_optimal) { + dev = raidPtr->Disks[row][column].dev; + vp = raidPtr->raid_cinfo[row][column].ci_vp; + raidread_component_label(dev, vp, &clabel); + clabel.root_partition = new_value; + raidwrite_component_label(dev, vp, &clabel); + } + } + } + return(new_value); +} + +void +rf_release_all_vps(cset) + RF_ConfigSet_t *cset; +{ + RF_AutoConfig_t *ac; + + ac = cset->ac; + while(ac!=NULL) { + /* Close the vp, and give it back */ + if (ac->vp) { + VOP_CLOSE(ac->vp, FREAD, NOCRED, 0); + vput(ac->vp); + ac->vp = NULL; + } + ac = ac->next; + } +} + + +void +rf_cleanup_config_set(cset) + RF_ConfigSet_t *cset; +{ + RF_AutoConfig_t *ac; + RF_AutoConfig_t *next_ac; + + ac = cset->ac; + while(ac!=NULL) { + next_ac = ac->next; + /* nuke the label */ + free(ac->clabel, M_RAIDFRAME); + /* cleanup the config structure */ + free(ac, M_RAIDFRAME); + /* "next.." */ + ac = next_ac; + } + /* and, finally, nuke the config set */ + free(cset, M_RAIDFRAME); +} + + +void +raid_init_component_label(raidPtr, clabel) + RF_Raid_t *raidPtr; + RF_ComponentLabel_t *clabel; +{ + /* current version number */ + clabel->version = RF_COMPONENT_LABEL_VERSION; + clabel->serial_number = raidPtr->serial_number; + clabel->mod_counter = raidPtr->mod_counter; + clabel->num_rows = raidPtr->numRow; + clabel->num_columns = raidPtr->numCol; + clabel->clean = RF_RAID_DIRTY; /* not clean */ + clabel->status = rf_ds_optimal; /* "It's good!" */ + + clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; + clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; + clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; + + clabel->blockSize = raidPtr->bytesPerSector; + clabel->numBlocks = raidPtr->sectorsPerDisk; + + /* XXX not portable */ + clabel->parityConfig = raidPtr->Layout.map->parityConfig; + clabel->maxOutstanding = raidPtr->maxOutstanding; + clabel->autoconfigure = raidPtr->autoconfigure; + clabel->root_partition = raidPtr->root_partition; + clabel->last_unit = raidPtr->raidid; + clabel->config_order = raidPtr->config_order; +} + +int +rf_auto_config_set(cset,unit) + RF_ConfigSet_t *cset; + int *unit; +{ + RF_Raid_t *raidPtr; + RF_Config_t *config; + int raidID; + int retcode; + + printf("RAID autoconfigure\n"); + + retcode = 0; + *unit = -1; + + /* 1. Create a config structure */ + + config = (RF_Config_t *)malloc(sizeof(RF_Config_t), + M_RAIDFRAME, + M_NOWAIT); + if (config==NULL) { + printf("Out of mem!?!?\n"); + /* XXX do something more intelligent here. */ + return(1); + } + + memset(config, 0, sizeof(RF_Config_t)); + + /* XXX raidID needs to be set correctly.. */ + + /* + 2. Figure out what RAID ID this one is supposed to live at + See if we can get the same RAID dev that it was configured + on last time.. + */ + + raidID = cset->ac->clabel->last_unit; + if ((raidID < 0) || (raidID >= numraid)) { + /* let's not wander off into lala land. */ + raidID = numraid - 1; + } + if (raidPtrs[raidID]->valid != 0) { + + /* + Nope... Go looking for an alternative... + Start high so we don't immediately use raid0 if that's + not taken. + */ + + for(raidID = numraid; raidID >= 0; raidID--) { + if (raidPtrs[raidID]->valid == 0) { + /* can use this one! */ + break; + } + } + } + + if (raidID < 0) { + /* punt... */ + printf("Unable to auto configure this set!\n"); + printf("(Out of RAID devs!)\n"); + return(1); + } + printf("Configuring raid%d:\n",raidID); + raidPtr = raidPtrs[raidID]; + + /* XXX all this stuff should be done SOMEWHERE ELSE! */ + raidPtr->raidid = raidID; + raidPtr->openings = RAIDOUTSTANDING; + + /* 3. Build the configuration structure */ + rf_create_configuration(cset->ac, config, raidPtr); + + /* 4. Do the configuration */ + retcode = rf_Configure(raidPtr, config, cset->ac); + + if (retcode == 0) { + + raidinit(raidPtrs[raidID]); + + rf_markalldirty(raidPtrs[raidID]); + raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */ + if (cset->ac->clabel->root_partition==1) { + /* everything configured just fine. Make a note + that this set is eligible to be root. */ + cset->rootable = 1; + /* XXX do this here? */ + raidPtrs[raidID]->root_partition = 1; + } + } + + /* 5. Cleanup */ + free(config, M_RAIDFRAME); + + *unit = raidID; + return(retcode); +} |