src - OpenBSD base system

diff options


context:
space:
mode:

author	Peter Galbavy <peter@cvs.openbsd.org>	2000-01-07 14:50:24 +0000
committer	Peter Galbavy <peter@cvs.openbsd.org>	2000-01-07 14:50:24 +0000
commit	d7921cbd0ec1beed703dd2b35ce6236d09fa0f12 (patch)
tree	7193c433c334b83730ba6636e8b5e2007148d29f /sys/dev/raidframe/rf_openbsdkintf.c
parent	67a89310ea3a36e9b3d504382073e820f07a019a (diff)

sync with work by Greg Oster on NetBSD

Please note: This update has *only* been tested on i386 with IDE disks. Could someone with a spare box please make sure all is OK with SCSI and maybe other arches ? sparc testing will follow locally. * remove rf_sys.h * many changes to make it more stable * some performance increases * All raid threads now get their own kernel process and the calling raidctl(8) program will show status progress through a meter. * In theory FFS_SOFTUPDATES and RAIDframe will now work together - NOT TESTED YET See http://www.cs.usask.ca/staff/oster/raid.html This updates include Greg's changes to Jan 4th 2000. TODO: * some odd behaviour when running raictl -c on an already config'ed raid set - problem founf, fix being done * progress meter is in raidctl(8) - seperate commit, but could do with sync'ing with OpenBSD ftp version

Diffstat (limited to 'sys/dev/raidframe/rf_openbsdkintf.c')

-rw-r--r--

sys/dev/raidframe/rf_openbsdkintf.c

695

1 files changed, 361 insertions, 334 deletions

diff --git a/sys/dev/raidframe/rf_openbsdkintf.c b/sys/dev/raidframe/rf_openbsdkintf.c
index 9799e4325a2..f2ac4fd6810 100644
--- a/sys/dev/raidframe/rf_openbsdkintf.c
+++ b/sys/dev/raidframe/rf_openbsdkintf.c

@@ -1,5 +1,5 @@

-/* $OpenBSD: rf_openbsdkintf.c,v 1.7 1999/12/03 22:38:11 art Exp $ */

+/* $OpenBSD: rf_openbsdkintf.c,v 1.8 2000/01/07 14:50:21 peter Exp $ */

+/* $NetBSD: rf_netbsdkintf.c,v 1.39 2000/01/06 02:06:41 oster Exp $ */

/*-

@@ -150,11 +150,6 @@

int rf_kdebug_level = 0;

-#define RFK_BOOT_NONE 0

-#define RFK_BOOT_GOOD 1

-#define RFK_BOOT_BAD 2

-static int rf_kbooted = RFK_BOOT_NONE;

#ifdef RAIDDEBUG

#define db0_printf(a) printf a

#define db_printf(a) do if (rf_kdebug_level > 0) printf a; while(0)

@@ -182,29 +177,12 @@ static RF_SparetWait_t *rf_sparet_wait_queue;

/* responses from installation process */

static RF_SparetWait_t *rf_sparet_resp_queue;

-/* used to communicate reconstruction requests */

-static struct rf_recon_req *recon_queue = NULL;

-decl_simple_lock_data(, recon_queue_mutex)

-#define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)

-#define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)

/* prototypes */

void rf_KernelWakeupFunc __P((struct buf *));

void rf_InitBP __P((struct buf *, struct vnode *, unsigned, dev_t,

RF_SectorNum_t, RF_SectorCount_t, caddr_t, void (*)(struct buf *),

void *, int, struct proc *));

-#define Dprintf0(s) if (rf_queueDebug) \

- rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)

-#define Dprintf1(s,a) if (rf_queueDebug) \

- rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)

-#define Dprintf2(s,a,b) if (rf_queueDebug) \

- rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)

-#define Dprintf3(s,a,b,c) if (rf_queueDebug) \

- rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)

int raidmarkclean(dev_t dev, struct vnode *b_vp, int);

int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);

@@ -251,6 +229,7 @@ struct raid_softc {

dev_t sc_dev; /* our device..*/

char sc_xname[20]; /* XXX external name */

struct disk sc_dkdev; /* generic disk device info */

+ struct buf buf_queue; /* used for the device queue */

};

/* sc_flags */

@@ -266,11 +245,22 @@ static int numraid = 0;

* Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.

* Be aware that large numbers can allow the driver to consume a lot of

- * kernel memory, especially on writes...

+ * kernel memory, especially on writes, and in degraded mode reads.

+ *

+ * For example: with a stripe width of 64 blocks (32k) and 5 disks,

+ * a single 64K write will typically require 64K for the old data,

+ * 64K for the old parity, and 64K for the new parity, for a total

+ * of 192K (if the parity buffer is not re-used immediately).

+ * Even it if is used immedately, that's still 128K, which when multiplied

+ * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.

+ *

+ * Now in degraded mode, for example, a 64K read on the above setup may

+ * require data reconstruction, which will require *all* of the 4 remaining

+ * disks to participate -- 4 * 32K/disk == 128K again.

#ifndef RAIDOUTSTANDING

-#define RAIDOUTSTANDING 10

+#define RAIDOUTSTANDING 6

#endif

#define RAIDLABELDEV(dev) \

@@ -290,6 +280,13 @@ int raidlookup __P((char *, struct proc *p, struct vnode **));

void rf_markalldirty __P((RF_Raid_t *));

+void rf_ReconThread __P((struct rf_recon_req *));

+/* XXX what I want is: */

+/*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */

+void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));

+void rf_CopybackThread __P((RF_Raid_t *raidPtr));

+void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));

void

raidattach(num)

int num;

@@ -320,7 +317,6 @@ raidattach(num)

}

rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;

- recon_queue = NULL;

for (i = 0; i < numraid; i++)

raidPtrs[i] = NULL;

@@ -330,8 +326,6 @@ raidattach(num)

else

panic("Serious error booting RAID!!\n");

- rf_kbooted = RFK_BOOT_GOOD;

* Put together some datastructures like the CCD device does..

* This lets us lock the device and what-not when it gets opened.

@@ -347,10 +341,15 @@ raidattach(num)

bzero(raid_softc, num * sizeof (struct raid_softc));

for (raidID = 0; raidID < num; raidID++) {

+ raid_softc[raidID].buf_queue.b_actf = NULL;

+ raid_softc[raidID].buf_queue.b_actb =

+ &raid_softc[raidID].buf_queue.b_actf;

RF_Calloc(raidPtrs[raidID], 1, sizeof (RF_Raid_t),

(RF_Raid_t *));

if (raidPtrs[raidID] == NULL) {

- printf("raidPtrs[%d] is NULL\n", raidID);

+ printf("WARNING: raidPtrs[%d] is NULL\n", raidID);

+ numraid = raidID;

+ return;

}

@@ -536,21 +535,16 @@ raidstrategy(bp)

RF_Raid_t *raidPtr;

struct raid_softc *rs = &raid_softc[raidID];

struct disklabel *lp;

+ struct buf *dp;

int wlabel;

- db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));

- db1_printf(("Strategy(2): bp->b_bufsize %d\n", (int)bp->b_bufsize));

- db1_printf(("bp->b_count=%d\n", (int)bp->b_bcount));

- db1_printf(("bp->b_resid=%d\n", (int)bp->b_resid));

- db1_printf(("bp->b_blkno=%d\n", (int)bp->b_blkno));

- if (bp->b_flags & B_READ)

- db1_printf(("READ\n"));

- else

- db1_printf(("WRITE\n"));

- if (rf_kbooted != RFK_BOOT_GOOD)

- return;

+ if ((rs->sc_flags & RAIDF_INITED) ==0) {

+ bp->b_error = ENXIO;

+ bp->b_flags = B_ERROR;

+ bp->b_resid = bp->b_bcount;

+ biodone(bp);

+ return;

+ }

if (raidID >= numraid || !raidPtrs[raidID]) {

bp->b_error = ENODEV;

bp->b_flags |= B_ERROR;

@@ -587,21 +581,21 @@ raidstrategy(bp)

return;

}

- /* XXX splbio() needed? */

s = splbio();

- db1_printf(("Beginning strategy...\n"));

bp->b_resid = 0;

- bp->b_error =

- rf_DoAccessKernel(raidPtrs[raidID], bp, NULL, NULL, NULL);

- if (bp->b_error) {

- bp->b_flags |= B_ERROR;

- db1_printf(

- ("bp->b_flags HAS B_ERROR SET!!!: %d\n", bp->b_error));

- }

+ /* stuff it onto our queue */

+ dp = &rs->buf_queue;

+ bp->b_actf = NULL;

+ bp->b_actb = dp->b_actb;

+ *dp->b_actb = bp;

+ dp->b_actb = &bp->b_actf;

+ raidstart(raidPtrs[raidID]);

splx(s);

- db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n", bp, bp->b_data,

- (int)bp->b_bcount, (int)bp->b_resid));

}

/* ARGSUSED */

@@ -660,19 +654,11 @@ raidioctl(dev, cmd, data, flag, p)

int error = 0;

int part, pmask;

struct raid_softc *rs;

-#if 0

- int r, c;

-#endif

- /* struct raid_ioctl *ccio = (struct ccd_ioctl *)data; */

- /* struct ccdbuf *cbp; */

- /* struct raidbuf *raidbp; */

RF_Config_t *k_cfg, *u_cfg;

u_char *specific_buf;

int retcode = 0;

int row;

int column;

- int s;

struct rf_recon_req *rrcopy, *rr;

RF_ComponentLabel_t *component_label;

RF_ComponentLabel_t ci_label;

@@ -713,13 +699,16 @@ raidioctl(dev, cmd, data, flag, p)

case RAIDFRAME_GET_SIZE:

case RAIDFRAME_FAIL_DISK:

case RAIDFRAME_COPYBACK:

- case RAIDFRAME_CHECKRECON:

+ case RAIDFRAME_CHECK_RECON_STATUS:

case RAIDFRAME_GET_COMPONENT_LABEL:

case RAIDFRAME_SET_COMPONENT_LABEL:

case RAIDFRAME_ADD_HOT_SPARE:

case RAIDFRAME_REMOVE_HOT_SPARE:

case RAIDFRAME_INIT_LABELS:

case RAIDFRAME_REBUILD_IN_PLACE:

+ case RAIDFRAME_CHECK_PARITY:

+ case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:

+ case RAIDFRAME_CHECK_COPYBACK_STATUS:

if ((rs->sc_flags & RAIDF_INITED) == 0)

return (ENXIO);

}

@@ -745,6 +734,7 @@ raidioctl(dev, cmd, data, flag, p)

retcode = copyin((caddr_t)u_cfg, (caddr_t)k_cfg,

sizeof (RF_Config_t));

if (retcode) {

+ RF_Free(k_cfg, sizeof(RF_Config_t));

db3_printf(("rf_ioctl: retcode=%d copyin.1\n",

retcode));

return (retcode);

@@ -757,6 +747,7 @@ raidioctl(dev, cmd, data, flag, p)

if (k_cfg->layoutSpecificSize) {

if (k_cfg->layoutSpecificSize > 10000) {

/* sanity check */

+ RF_Free(k_cfg, sizeof(RF_Config_t));

db3_printf(("rf_ioctl: EINVAL %d\n", retcode));

return (EINVAL);

}

@@ -770,6 +761,8 @@ raidioctl(dev, cmd, data, flag, p)

retcode = copyin(k_cfg->layoutSpecific,

(caddr_t)specific_buf, k_cfg->layoutSpecificSize);

if (retcode) {

+ RF_Free(k_cfg, sizeof(RF_Config_t));

+ RF_Free(specific_buf, k_cfg->layoutSpecificSize);

db3_printf(("rf_ioctl: retcode=%d copyin.2\n",

retcode));

return (retcode);

@@ -784,15 +777,6 @@ raidioctl(dev, cmd, data, flag, p)

* Store the sum of all the bytes in the last byte?

- db1_printf(("Considering configuring the system.:%d 0x%x\n",

- unit, p));

- /*

- * We need the pointer to this a little deeper,

- * so stash it here...

- */

- raidPtrs[unit]->proc = p;

/* configure the system */

raidPtrs[unit]->raidid = unit;

@@ -801,6 +785,12 @@ raidioctl(dev, cmd, data, flag, p)

/* allow this many simultaneous IO's to this RAID device */

raidPtrs[unit]->openings = RAIDOUTSTANDING;

+ /* XXX should be moved to rf_Configure() */

+ raidPtrs[unit]->copyback_in_progress = 0;

+ raidPtrs[unit]->parity_rewrite_in_progress = 0;

+ raidPtrs[unit]->recon_in_progress = 0;

if (retcode == 0) {

retcode = raidinit(dev, raidPtrs[unit],unit);

rf_markalldirty( raidPtrs[unit] );

@@ -837,15 +827,8 @@ raidioctl(dev, cmd, data, flag, p)

return (EBUSY);

}

- if (rf_debugKernelAccess) {

- printf("call shutdown\n");

- }

- raidPtrs[unit]->proc = p; /* XXX Necessary evil */

retcode = rf_Shutdown(raidPtrs[unit]);

- db1_printf(("Done main shutdown\n"));

/* It's no longer initialized... */

rs->sc_flags &= ~RAIDF_INITED;

@@ -876,19 +859,17 @@ raidioctl(dev, cmd, data, flag, p)

sizeof(RF_ComponentLabel_t));

if (retcode) {

+ RF_Free( component_label, sizeof(RF_ComponentLabel_t));

return(retcode);

}

row = component_label->row;

- printf("Row: %d\n",row);

- if (row > raidPtrs[unit]->numRow) {

- row = 0; /* XXX */

- }

column = component_label->column;

- printf("Column: %d\n",column);

- if (column > raidPtrs[unit]->numCol) {

- column = 0; /* XXX */

- }

+ if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||

+ (column < 0) || (column >= raidPtrs[unit]->numCol)) {

+ return(EINVAL);

+ }

raidread_component_label(

raidPtrs[unit]->Disks[row][column].dev,

@@ -924,8 +905,9 @@ raidioctl(dev, cmd, data, flag, p)

row = component_label->row;

column = component_label->column;

- if ((row < 0) || (row > raidPtrs[unit]->numRow) ||

- (column < 0) || (column > raidPtrs[unit]->numCol)) {

+ if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||

+ (column < 0) || (column >= raidPtrs[unit]->numCol)) {

+ RF_Free( component_label, sizeof(RF_ComponentLabel_t));

return(EINVAL);

}

@@ -978,19 +960,18 @@ raidioctl(dev, cmd, data, flag, p)

return(0);

}

+ if (raidPtrs[unit]->parity_rewrite_in_progress == 1) {

+ /* Re-write is already in progress! */

+ return(EINVAL);

+ }

/* borrow the thread of the requesting process */

- raidPtrs[unit]->proc = p; /* Blah... :-p GO */

- retcode = rf_RewriteParity(raidPtrs[unit]);

- /* return I/O Error if the parity rewrite fails */

- if (retcode) {

- retcode = EIO;

- } else {

- /* set the clean bit! If we shutdown correctly,

- the clean bit on each component label will get

- set */

- raidPtrs[unit]->parity_good = RF_RAID_CLEAN;

- }

+ retcode = RF_CREATE_THREAD(raidPtrs[unit]->parity_rewrite_thread,

+ rf_RewriteParityThread,

+ raidPtrs[unit],"raid_parity");

return (retcode);

@@ -998,7 +979,6 @@ raidioctl(dev, cmd, data, flag, p)

sparePtr = (RF_SingleComponent_t *) data;

memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));

printf("Adding spare\n");

- raidPtrs[unit]->proc = p; /* Blah... :-p GO */

retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);

return(retcode);

@@ -1006,35 +986,41 @@ raidioctl(dev, cmd, data, flag, p)

return(retcode);

case RAIDFRAME_REBUILD_IN_PLACE:

+ if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {

+ /* Can't do this on a RAID 0!! */

+ return(EINVAL);

+ }

+ if (raidPtrs[unit]->recon_in_progress == 1) {

+ /* a reconstruct is already in progress! */

+ return(EINVAL);

+ }

componentPtr = (RF_SingleComponent_t *) data;

memcpy( &component, componentPtr,

sizeof(RF_SingleComponent_t));

row = component.row;

column = component.column;

printf("Rebuild: %d %d\n",row, column);

- if ((row < 0) || (row > raidPtrs[unit]->numRow) ||

- (column < 0) || (column > raidPtrs[unit]->numCol)) {

+ if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||

+ (column < 0) || (column >= raidPtrs[unit]->numCol)) {

return(EINVAL);

}

- printf("Attempting a rebuild in place\n");

- s = splbio();

- raidPtrs[unit]->proc = p; /* Blah... :-p GO */

- retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);

- splx(s);

- return (retcode);

+ RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));

+ if (rrcopy == NULL)

+ return(ENOMEM);

-#if 0 /* XXX not supported yet (ever?) */

- case RAIDFRAME_TUR:

- /*

- * Issue a test-unit-ready through raidframe to the

- * indicated device.

- */

+ rrcopy->raidPtr = (void *) raidPtrs[unit];

+ rrcopy->row = row;

+ rrcopy->col = column;

+ retcode = RF_CREATE_THREAD(raidPtrs[unit]->recon_thread,

+ rf_ReconstructInPlaceThread,

+ rrcopy,"raid_reconip");

- /* debug only */

- retcode = rf_SCSI_DoTUR(0, 0, 0, 0, *(dev_t *)data);

return (retcode);

-#endif

case RAIDFRAME_GET_INFO:

{

@@ -1054,12 +1040,12 @@ raidioctl(dev, cmd, data, flag, p)

cfg->cols = raid->numCol;

cfg->ndevs = raid->numRow * raid->numCol;

if (cfg->ndevs >= RF_MAX_DISKS) {

- cfg->ndevs = 0;

+ RF_Free(cfg, sizeof(RF_DeviceConfig_t));

return (ENOMEM);

}

cfg->nspares = raid->numSpare;

if (cfg->nspares >= RF_MAX_DISKS) {

- cfg->nspares = 0;

+ RF_Free(cfg, sizeof(RF_DeviceConfig_t));

return (ENOMEM);

}

cfg->maxqdepth = raid->maxQueueDepth;

@@ -1079,8 +1065,10 @@ raidioctl(dev, cmd, data, flag, p)

return (retcode);

}

- break;

+ break;

+ case RAIDFRAME_CHECK_PARITY:

+ *(int *) data = raidPtrs[unit]->parity_good;

+ return (0);

case RAIDFRAME_RESET_ACCTOTALS:

{

RF_Raid_t *raid = raidPtrs[unit];

@@ -1114,10 +1102,6 @@ raidioctl(dev, cmd, data, flag, p)

*(int *) data = raidPtrs[unit]->totalSectors;

return (0);

-#define RAIDFRAME_RECON 1

- /* XXX The above should probably be set somewhere else!! GO */

-#if RAIDFRAME_RECON > 0

/* fail a disk & optionally start reconstruction */

case RAIDFRAME_FAIL_DISK:

rr = (struct rf_recon_req *)data;

@@ -1134,15 +1118,14 @@ raidioctl(dev, cmd, data, flag, p)

* rely on the user's buffer

RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));

+ if (rrcopy == NULL)

+ return(ENOMEM);

bcopy(rr, rrcopy, sizeof(*rr));

rrcopy->raidPtr = (void *)raidPtrs[unit];

- LOCK_RECON_Q_MUTEX();

- rrcopy->next = recon_queue;

- recon_queue = rrcopy;

- wakeup(&recon_queue);

- UNLOCK_RECON_Q_MUTEX();

+ retcode = RF_CREATE_THREAD(raidPtrs[unit]->recon_thread,

+ rf_ReconThread,

+ rrcopy,"raid_recon");

return (0);

@@ -1150,25 +1133,49 @@ raidioctl(dev, cmd, data, flag, p)

* disk needs it, if any.

case RAIDFRAME_COPYBACK:

- /* Borrow the current thread to get this done */

- raidPtrs[unit]->proc = p; /* ICK.. but needed :-p GO */

- s = splbio();

- rf_CopybackReconstructedData(raidPtrs[unit]);

- splx(s);

- return (0);

+ if (raidPtrs[unit]->copyback_in_progress == 1) {

+ /* Copyback is already in progress! */

+ return(EINVAL);

+ }

+ retcode = RF_CREATE_THREAD(raidPtrs[unit]->copyback_thread,

+ rf_CopybackThread,

+ raidPtrs[unit],"raid_copyback");

+ return (retcode);

/* Return the percentage completion of reconstruction */

- case RAIDFRAME_CHECKRECON:

- row = *(int *)data;

- if (row < 0 || row >= raidPtrs[unit]->numRow)

- return (EINVAL);

+ case RAIDFRAME_CHECK_RECON_STATUS:

+ row = 0; /* XXX we only consider a single row... */

if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)

*(int *)data = 100;

else

*(int *)data =

raidPtrs[unit]->reconControl[row]->percentComplete;

return (0);

+ case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:

+ if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {

+ /* This makes no sense on a RAID 0 */

+ return(EINVAL);

+ }

+ if (raidPtrs[unit]->parity_rewrite_in_progress == 1) {

+ *(int *) data = 100 * raidPtrs[unit]->parity_rewrite_stripes_done / raidPtrs[unit]->Layout.numStripe;

+ } else {

+ *(int *) data = 100;

+ }

+ return (0);

+ case RAIDFRAME_CHECK_COPYBACK_STATUS:

+ if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {

+ /* This makes no sense on a RAID 0 */

+ return(EINVAL);

+ }

+ if (raidPtrs[unit]->copyback_in_progress == 1) {

+ *(int *) data = 100 * raidPtrs[unit]->copyback_stripes_done / raidPtrs[unit]->Layout.numStripe;

+ } else {

+ *(int *) data = 100;

+ }

+ return (0);

#if 0

case RAIDFRAME_SPARET_WAIT:

@@ -1234,8 +1241,6 @@ raidioctl(dev, cmd, data, flag, p)

return (retcode);

#endif

-#endif /* RAIDFRAME_RECON > 0 */

default:

/* fall through to the os-specific code below */

break;

@@ -1249,23 +1254,17 @@ raidioctl(dev, cmd, data, flag, p)

switch (cmd) {

case DIOCGDINFO:

- db1_printf(

- ("DIOCGDINFO %d %d\n", (int)dev, (int)DISKPART(dev)));

*(struct disklabel *)data = *(rs->sc_dkdev.dk_label);

break;

case DIOCGPART:

- db1_printf(

- ("DIOCGPART: %d %d\n", (int)dev, (int)DISKPART(dev)));

((struct partinfo *)data)->disklab = rs->sc_dkdev.dk_label;

((struct partinfo *)data)->part =

&rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];

break;

case DIOCWDINFO:

- db1_printf(("DIOCWDINFO\n"));

case DIOCSDINFO:

- db1_printf(("DIOCSDINFO\n"));

if ((error = raidlock(rs)) != 0)

return (error);

@@ -1289,15 +1288,22 @@ raidioctl(dev, cmd, data, flag, p)

break;

case DIOCWLABEL:

- db1_printf(("DIOCWLABEL\n"));

if (*(int *)data != 0)

rs->sc_flags |= RAIDF_WLABEL;

else

rs->sc_flags &= ~RAIDF_WLABEL;

break;

+#if 0

+ case DIOCGDEFLABEL:

+ raidgetdefaultlabel(raidPtrs[unit], rs,

+ (struct disklabel *) data);

+ break;

+#endif

default:

- retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */

+ retcode = ENOTTY;

}

return (retcode);

}

@@ -1313,8 +1319,6 @@ raidinit(dev, raidPtr, unit)

int unit;

{

int retcode;

- /* int ix; */

- /* struct raidbuf *raidbp; */

struct raid_softc *rs;

retcode = 0;

@@ -1346,53 +1350,14 @@ raidinit(dev, raidPtr, unit)

}

- * This kernel thread never exits. It is created once, and persists

- * until the system reboots.

- */

-void

-rf_ReconKernelThread()

- struct rf_recon_req *req;

- int s;

- /*

- * XXX not sure what spl() level we should be at here...

- * probably splbio()

- */

- s = splbio();

- while (1) {

- /* grab the next reconstruction request from the queue */

- LOCK_RECON_Q_MUTEX();

- while (!recon_queue) {

- UNLOCK_RECON_Q_MUTEX();

- tsleep(&recon_queue, PRIBIO,

- "raidframe recon", 0);

- LOCK_RECON_Q_MUTEX();

- }

- req = recon_queue;

- recon_queue = recon_queue->next;

- UNLOCK_RECON_Q_MUTEX();

- /*

- * If flags specifies that we should start recon, this call

- * will not return until reconstruction completes, fails, or

- * is aborted.

- */

- rf_FailDisk((RF_Raid_t *)req->raidPtr, req->row, req->col,

- ((req->flags&RF_FDFLAGS_RECON) ? 1 : 0));

- RF_Free(req, sizeof *req);

- }

-/*

* Wake up the daemon & tell it to get us a spare table

* XXX

* The entries in the queues should be tagged with the raidPtr so that in the

* extremely rare case that two recons happen at once, we know for

* which device were requesting a spare table.

* XXX

+ *

+ * XXX This code is not currently used. GO

int

rf_GetSpareTableFromDaemon(req)

@@ -1409,11 +1374,6 @@ rf_GetSpareTableFromDaemon(req)

while (!rf_sparet_resp_queue) {

tsleep(&rf_sparet_resp_queue, PRIBIO,

"raidframe getsparetable", 0);

-#if 0

- mpsleep(&rf_sparet_resp_queue, PZERO, "sparet resp", 0,

- (void *)simple_lock_addr(rf_sparet_wait_mutex),

- MS_LOCK_SIMPLE);

-#endif

}

req = rf_sparet_resp_queue;

rf_sparet_resp_queue = req->next;

@@ -1431,14 +1391,12 @@ rf_GetSpareTableFromDaemon(req)

* Any calls originating in the kernel must use non-blocking I/O

* do some extra sanity checking to return "appropriate" error values for

* certain conditions (to make some standard utilities work)

+ *

+ * Formerly known as: rf_DoAccessKernel

-int

-rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)

+void

+raidstart(raidPtr)

RF_Raid_t *raidPtr;

- struct buf *bp;

- RF_RaidAccessFlags_t flags;

- void (*cbFunc)(struct buf *);

- void *cbArg;

{

RF_SectorCount_t num_blocks, pb, sum;

RF_RaidAddr_t raid_addr;

@@ -1448,95 +1406,114 @@ rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)

int unit;

struct raid_softc *rs;

int do_async;

- /* XXX The dev_t used here should be for /dev/[r]raid* !!! */

+ struct buf *bp;

+ struct buf *dp;

unit = raidPtr->raidid;

rs = &raid_softc[unit];

- /*

- * Ok, for the bp we have here, bp->b_blkno is relative to the

- * partition.. Need to make it absolute to the underlying device..

- */

- blocknum = bp->b_blkno;

- if (DISKPART(bp->b_dev) != RAW_PART) {

- pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];

- blocknum += pp->p_offset;

- db1_printf(

- ("updated: %d %d\n", DISKPART(bp->b_dev), pp->p_offset));

- } else {

- db1_printf(("Is raw..\n"));

- }

- db1_printf(("Blocks: %d, %d\n", (int)bp->b_blkno, (int)blocknum));

- db1_printf(("bp->b_bcount = %d\n", (int)bp->b_bcount));

- db1_printf(("bp->b_resid = %d\n", (int)bp->b_resid));

- /*

- * *THIS* is where we adjust what block we're going to... but

- * DO NOT TOUCH bp->b_blkno!!!

- */

- raid_addr = blocknum;

- num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;

- pb = (bp->b_bcount&raidPtr->sectorMask) ? 1 : 0;

- sum = raid_addr + num_blocks + pb;

- if (1 || rf_debugKernelAccess) {

- db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",

- (int)raid_addr, (int)sum, (int)num_blocks, (int)pb,

- (int)bp->b_resid));

- }

+ /* Check to see if we're at the limit... */

+ RF_LOCK_MUTEX(raidPtr->mutex);

+ while (raidPtr->openings > 0) {

+ RF_UNLOCK_MUTEX(raidPtr->mutex);

+ /* get the next item, if any, from the queue */

+ dp = &rs->buf_queue;

+ bp = dp->b_actf;

+ if (bp == NULL) {

+ /* nothing more to do */

+ return;

+ }

+ /* update structures */

+ dp = bp->b_actf;

+ if (dp != NULL) {

+ dp->b_actb = bp->b_actb;

+ } else {

+ rs->buf_queue.b_actb = bp->b_actb;

+ }

+ *bp->b_actb = dp;

+ /* Ok, for the bp we have here, bp->b_blkno is relative to the

+ * partition.. Need to make it absolute to the underlying

+ * device.. */

+ blocknum = bp->b_blkno;

+ if (DISKPART(bp->b_dev) != RAW_PART) {

+ pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];

+ blocknum += pp->p_offset;

+ }

+ db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,

+ (int) blocknum));

+ db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));

+ db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));

+ /* *THIS* is where we adjust what block we're going to...

+ * but DO NOT TOUCH bp->b_blkno!!! */

+ raid_addr = blocknum;

+ num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;

+ pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;

+ sum = raid_addr + num_blocks + pb;

+ if (1 || rf_debugKernelAccess) {

+ db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",

+ (int) raid_addr, (int) sum, (int) num_blocks,

+ (int) pb, (int) bp->b_resid));

+ }

+ if ((sum > raidPtr->totalSectors) || (sum < raid_addr)

+ || (sum < num_blocks) || (sum < pb)) {

+ bp->b_error = ENOSPC;

+ bp->b_flags |= B_ERROR;

+ bp->b_resid = bp->b_bcount;

+ biodone(bp);

+ RF_LOCK_MUTEX(raidPtr->mutex);

+ continue;

+ }

+ /*

+ * XXX rf_DoAccess() should do this, not just DoAccessKernel()

+ */

+ if (bp->b_bcount & raidPtr->sectorMask) {

+ bp->b_error = EINVAL;

+ bp->b_flags |= B_ERROR;

+ bp->b_resid = bp->b_bcount;

+ biodone(bp);

+ RF_LOCK_MUTEX(raidPtr->mutex);

+ continue;

+ }

+ db1_printf(("Calling DoAccess..\n"));

+ RF_LOCK_MUTEX(raidPtr->mutex);

+ raidPtr->openings--;

+ RF_UNLOCK_MUTEX(raidPtr->mutex);

- if ((sum > raidPtr->totalSectors) || (sum < raid_addr) ||

- (sum < num_blocks) || (sum < pb)) {

- bp->b_error = ENOSPC;

- bp->b_flags |= B_ERROR;

- bp->b_resid = bp->b_bcount;

- biodone(bp);

- return (bp->b_error);

- }

- /*

- * XXX rf_DoAccess() should do this, not just DoAccessKernel()

- */

- if (bp->b_bcount & raidPtr->sectorMask) {

- bp->b_error = EINVAL;

- bp->b_flags |= B_ERROR;

- bp->b_resid = bp->b_bcount;

- biodone(bp);

- return (bp->b_error);

- }

- db1_printf(("Calling DoAccess..\n"));

+ /*

+ * Everything is async.

+ */

+ do_async = 1;

+ /* don't ever condition on bp->b_flags & B_WRITE.

+ * always condition on B_READ instead */

+ /* XXX we're still at splbio() here... do we *really*

+ need to be? */

- /* Put a throttle on the number of requests we handle simultanously */

+ retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?

+ RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,

+ do_async, raid_addr, num_blocks,

+ bp->b_un.b_addr, bp, NULL, NULL,

+ RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);

- RF_LOCK_MUTEX(raidPtr->mutex);

- while(raidPtr->openings <= 0) {

- RF_UNLOCK_MUTEX(raidPtr->mutex);

- (void)tsleep(&raidPtr->openings, PRIBIO, "rfdwait", 0);

RF_LOCK_MUTEX(raidPtr->mutex);

}

- raidPtr->openings--;

RF_UNLOCK_MUTEX(raidPtr->mutex);

- /*

- * Everything is async.

- */

- do_async = 1;

- /*

- * Don't ever condition on bp->b_flags & B_WRITE.

- * always condition on B_READ instead.

- */

- retcode = rf_DoAccess(raidPtr,

- (bp->b_flags & B_READ) ? RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,

- 0, raid_addr, num_blocks, bp->b_un.b_addr, bp, NULL, NULL,

- RF_DAG_NONBLOCKING_IO|flags, NULL, cbFunc, cbArg);

- db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp, bp->b_data,

- (int)bp->b_resid));

- return (retcode);

}

/* Invoke an I/O from kernel mode. Disk queue should be locked upon entry */

@@ -1551,6 +1528,10 @@ rf_DispatchKernelIO(queue, req)

struct raidbuf *raidbp = NULL;

struct raid_softc *rs;

int unit;

+ int s;

+ s=0;

+ /* s = splbio();*/ /* want to test this */

* XXX along with the vnode, we also need the softc associated with

@@ -1600,6 +1581,8 @@ rf_DispatchKernelIO(queue, req)

raidbp->rf_obp = bp;

raidbp->req = req;

+ LIST_INIT(&raidbp->rf_buf.b_dep);

switch (req->type) {

case RF_IO_TYPE_NOP:

/* Used primarily to unlock a locked queue. */

@@ -1663,6 +1646,7 @@ rf_DispatchKernelIO(queue, req)

panic("bad req->type in rf_DispatchKernelIO");

}

db1_printf(("Exiting from DispatchKernelIO\n"));

+ /* splx(s); */ /* want to test this */

return (0);

}

@@ -1682,31 +1666,20 @@ rf_KernelWakeupFunc(vbp)

int unit;

int s;

- s = splbio(); /* XXX */

+ s = splbio();

db1_printf(("recovering the request queue:\n"));

req = raidbp->req;

bp = raidbp->rf_obp;

- db1_printf(("bp=0x%x\n", bp));

queue = (RF_DiskQueue_t *)req->queue;

if (raidbp->rf_buf.b_flags & B_ERROR) {

- db1_printf(

- ("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error));

bp->b_flags |= B_ERROR;

bp->b_error =

raidbp->rf_buf.b_error ? raidbp->rf_buf.b_error : EIO;

}

- db1_printf(("raidbp->rf_buf.b_bcount=%d\n",

- (int)raidbp->rf_buf.b_bcount));

- db1_printf(("raidbp->rf_buf.b_bufsize=%d\n",

- (int)raidbp->rf_buf.b_bufsize));

- db1_printf(("raidbp->rf_buf.b_resid=%d\n",

- (int)raidbp->rf_buf.b_resid));

- db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));

#if 1

/* XXX Methinks this could be wrong... */

bp->b_resid = raidbp->rf_buf.b_resid;

@@ -1728,7 +1701,6 @@ rf_KernelWakeupFunc(vbp)

unit = queue->raidPtr->raidid; /* *Much* simpler :-> */

-#if 1

* XXX Ok, let's get aggressive... If B_ERROR is set, let's go

* ballistic, and mark the component as hosed...

@@ -1751,26 +1723,19 @@ rf_KernelWakeupFunc(vbp)

/* printf("Disk already marked as dead!\n"); */

}

-#endif

rs = &raid_softc[unit];

RAIDPUTBUF(raidbp);

if (bp->b_resid==0) {

- db1_printf((

- "Disk is no longer busy for this buffer... %d %ld %ld\n",

- unit, bp->b_resid, bp->b_bcount));

/* XXX is this the right place for a disk_unbusy()??!??!?!? */

disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));

- } else {

- db1_printf(("b_resid is still %ld\n", bp->b_resid));

}

rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);

(req->CompleteFunc)(req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);

- /* printf("Exiting rf_KernelWakeupFunc\n"); */

- splx(s); /* XXX */

+ splx(s);

}

@@ -1797,13 +1762,9 @@ rf_InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,

bp->b_bufsize = bp->b_bcount;

bp->b_error = 0;

bp->b_dev = dev;

- db1_printf(("bp->b_dev is %d\n", dev));

bp->b_un.b_addr = buf;

- db1_printf(("bp->b_data=0x%x\n", bp->b_data));

bp->b_blkno = startSect;

bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */

- db1_printf(("b_bcount is: %d\n", (int)bp->b_bcount));

if (bp->b_bcount == 0) {

panic("bp->b_bcount is zero in rf_InitBP!!\n");

}

@@ -1813,29 +1774,6 @@ rf_InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,

LIST_INIT(&bp->b_dep);

}

-/* Extras... */

-#if 0

-unsigned int

-rpcc()

- /* XXX no clue what this is supposed to do.. my guess is

- that it's supposed to read the CPU cycle counter... */

- /* db1_printf("this is supposed to do something useful too!??\n"); */

- return (0);

-int

-rf_GetSpareTableFromDaemon(req)

- RF_SparetWait_t *req;

- int retcode=1;

- printf("This is supposed to do something useful!!\n"); /* XXX */

- return (retcode);

-#endif

void

raidgetdefaultlabel(raidPtr, rs, lp)

RF_Raid_t *raidPtr;

@@ -1970,7 +1908,9 @@ raidlookup(path, p, vpp)

NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);

if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) {

- db1_printf(("RAIDframe: vn_open returned %d\n", error));

+#ifdef DEBUG

+ printf("RAIDframe: vn_open returned %d\n", error);

+#endif

return (error);

}

vp = nd.ni_vp;

@@ -2355,3 +2295,90 @@ rf_update_component_labels( raidPtr )

}

/* printf("Component labels updated\n"); */

}

+void

+rf_ReconThread(req)

+ struct rf_recon_req *req;

+ int s;

+ RF_Raid_t *raidPtr;

+ s = splbio();

+ raidPtr = (RF_Raid_t *) req->raidPtr;

+ raidPtr->recon_in_progress = 1;

+ rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,

+ ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));

+ /* XXX get rid of this! we don't need it at all.. */

+ RF_Free(req, sizeof(*req));

+ raidPtr->recon_in_progress = 0;

+ splx(s);

+ /* That's all... */

+ kthread_exit(0); /* does not return */

+void

+rf_RewriteParityThread(raidPtr)

+ RF_Raid_t *raidPtr;

+ int retcode;

+ int s;

+ raidPtr->parity_rewrite_in_progress = 1;

+ s = splbio();

+ retcode = rf_RewriteParity(raidPtr);

+ splx(s);

+ if (retcode) {

+ printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);

+ } else {

+ /* set the clean bit! If we shutdown correctly,

+ the clean bit on each component label will get

+ set */

+ raidPtr->parity_good = RF_RAID_CLEAN;

+ }

+ raidPtr->parity_rewrite_in_progress = 0;

+ /* That's all... */

+ kthread_exit(0); /* does not return */

+void

+rf_CopybackThread(raidPtr)

+ RF_Raid_t *raidPtr;

+ int s;

+ raidPtr->copyback_in_progress = 1;

+ s = splbio();

+ rf_CopybackReconstructedData(raidPtr);

+ splx(s);

+ raidPtr->copyback_in_progress = 0;

+ /* That's all... */

+ kthread_exit(0); /* does not return */

+void

+rf_ReconstructInPlaceThread(req)

+ struct rf_recon_req *req;

+ int retcode;

+ int s;

+ RF_Raid_t *raidPtr;

+ s = splbio();

+ raidPtr = req->raidPtr;

+ raidPtr->recon_in_progress = 1;

+ retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);

+ RF_Free(req, sizeof(*req));

+ raidPtr->recon_in_progress = 0;

+ splx(s);

+ /* That's all... */

+ kthread_exit(0); /* does not return */