summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sys/dev/softraid.c284
-rw-r--r--sys/dev/softraid_raid1.c32
-rw-r--r--sys/dev/softraidvar.h10
3 files changed, 285 insertions, 41 deletions
diff --git a/sys/dev/softraid.c b/sys/dev/softraid.c
index 4c88b0a940a..90b9f241eaf 100644
--- a/sys/dev/softraid.c
+++ b/sys/dev/softraid.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: softraid.c,v 1.134 2009/05/30 21:20:34 marco Exp $ */
+/* $OpenBSD: softraid.c,v 1.135 2009/06/02 00:58:16 marco Exp $ */
/*
* Copyright (c) 2007 Marco Peereboom <marco@peereboom.us>
* Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org>
@@ -38,6 +38,7 @@
#include <sys/conf.h>
#include <sys/uio.h>
#include <sys/workq.h>
+#include <sys/kthread.h>
#ifdef AOE
#include <sys/mbuf.h>
@@ -54,7 +55,6 @@
#include <dev/rndvar.h>
/* #define SR_FANCY_STATS */
-/* #define SR_UNIT_TEST */
#ifdef SR_DEBUG
#define SR_FANCY_STATS
@@ -117,6 +117,8 @@ void sr_checksum(struct sr_softc *, void *, void *,
u_int32_t);
int sr_boot_assembly(struct sr_softc *);
int sr_already_assembled(struct sr_discipline *);
+void sr_rebuild(void *);
+void sr_rebuild_thread(void *);
/* don't include these on RAMDISK */
#ifndef SMALL_KERNEL
@@ -1434,6 +1436,7 @@ sr_wu_put(struct sr_workunit *wu)
wu->swu_blk_end = 0;
wu->swu_collider = NULL;
wu->swu_fake = 0;
+ wu->swu_flags = 0;
while ((ccb = TAILQ_FIRST(&wu->swu_ccb)) != NULL) {
TAILQ_REMOVE(&wu->swu_ccb, ccb, ccb_link);
@@ -1708,6 +1711,7 @@ sr_ioctl_vol(struct sr_softc *sc, struct bioc_vol *bv)
{
int i, vol, rv = EINVAL;
struct sr_discipline *sd;
+ daddr64_t rb, sz;
for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) {
/* XXX this will not work when we stagger disciplines */
@@ -1721,6 +1725,11 @@ sr_ioctl_vol(struct sr_softc *sc, struct bioc_vol *bv)
bv->bv_size = sd->sd_meta->ssdi.ssd_size << DEV_BSHIFT;
bv->bv_level = sd->sd_meta->ssdi.ssd_level;
bv->bv_nodisk = sd->sd_meta->ssdi.ssd_chunk_no;
+ if (bv->bv_status == BIOC_SVREBUILD) {
+ sz = sd->sd_meta->ssdi.ssd_size;
+ rb = sd->sd_meta->ssd_rebuild;
+ bv->bv_percent = 100 - ((sz * 100 - rb * 100) / sz);
+ }
strlcpy(bv->bv_dev, sd->sd_meta->ssd_devname,
sizeof(bv->bv_dev));
strlcpy(bv->bv_vendor, sd->sd_meta->ssdi.ssd_vendor,
@@ -1766,13 +1775,18 @@ sr_ioctl_disk(struct sr_softc *sc, struct bioc_disk *bd)
int
sr_ioctl_setstate(struct sr_softc *sc, struct bioc_setstate *bs)
{
- int rv = EINVAL;
-
-#ifdef SR_UNIT_TEST
- int i, vol, state, found, tg;
+ int rv = EINVAL, part;
+ int i, c, found, vol, open = 0;
struct sr_discipline *sd;
- struct sr_chunk *ch_entry;
- struct sr_chunk_head *cl;
+ char devname[32];
+ struct bdevsw *bdsw;
+ dev_t dev;
+ daddr64_t size, csize;
+ struct disklabel label;
+ struct sr_meta_chunk *old, *new;
+
+ /* XXX disabled for now */
+ goto done;
if (bs->bs_other_id_type == BIOC_SSOTHER_UNUSED)
goto done;
@@ -1785,46 +1799,109 @@ sr_ioctl_setstate(struct sr_softc *sc, struct bioc_setstate *bs)
continue;
sd = sc->sc_dis[i];
- found = 0;
- tg = 0;
- cl = &sd->sd_vol.sv_chunk_list;
- SLIST_FOREACH(ch_entry, cl, src_link) {
- if (ch_entry->src_dev_mm == bs->bs_other_id) {
- found = 1;
+ /* XXX check that we can even do a rebuild on this discipline */
+
+ /* make sure volume is in the right state */
+ if (sd->sd_vol_status == BIOC_SVREBUILD) {
+ printf("%s: rebuild already in progres\n", DEVNAME(sc));
+ goto done;
+ }
+ if (sd->sd_vol_status != BIOC_SVDEGRADED) {
+ printf("%s: %s not degraded\n", DEVNAME(sc),
+ sd->sd_meta->ssd_devname);
+ goto done;
+ }
+
+ /* find offline chunk */
+ for (c = 0, found = -1; c < sd->sd_meta->ssdi.ssd_chunk_no; c++)
+ if (sd->sd_vol.sv_chunks[c]->src_meta.scm_status ==
+ BIOC_SDOFFLINE) {
+ found = c;
+ new = &sd->sd_vol.sv_chunks[c]->src_meta;
break;
+ } else {
+ csize = sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_size;
+ old = &sd->sd_vol.sv_chunks[c]->src_meta;
}
- tg++;
+ if (found == -1) {
+ printf("%s: no offline chunks available for rebuild\n",
+ DEVNAME(sc));
+ goto done;
}
- if (found == 0)
+
+ /* populate meta entry */
+ dev = (dev_t)bs->bs_other_id;
+ sr_meta_getdevname(sc, dev, devname, sizeof(devname));
+ bdsw = bdevsw_lookup(dev);
+
+ if (bdsw->d_open(dev, FREAD | FWRITE , S_IFBLK, curproc)) {
+ DNPRINTF(SR_D_META,"%s: sr_ioctl_setstate can't "
+ "open %s\n", DEVNAME(sc), devname);
goto done;
+ }
+ open = 1; /* close dev on error */
+
+ /* get partition */
+ part = DISKPART(dev);
+ if ((*bdsw->d_ioctl)(dev, DIOCGDINFO, (void *)&label, FREAD,
+ curproc)) {
+ DNPRINTF(SR_D_META, "%s: sr_ioctl_setstate ioctl "
+ "failed\n", DEVNAME(sc));
+ goto done;
+ }
+ if (label.d_partitions[part].p_fstype != FS_RAID) {
+ printf("%s: %s partition not of type RAID (%d)\n",
+ DEVNAME(sc) , devname,
+ label.d_partitions[part].p_fstype);
+ goto done;
+ }
- switch (bs->bs_status) {
- case BIOC_SSONLINE:
- state = BIOC_SDONLINE;
- break;
- case BIOC_SSOFFLINE:
- state = BIOC_SDOFFLINE;
- break;
- case BIOC_SSHOTSPARE:
- state = BIOC_SDHOTSPARE;
- break;
- case BIOC_SSREBUILD:
- state = BIOC_SDREBUILD;
- break;
- default:
- printf("invalid state %d\n", bs->bs_status);
+ /* is partition large enough? */
+ size = DL_GETPSIZE(&label.d_partitions[part]) -
+ SR_META_SIZE - SR_META_OFFSET;
+ if (size < csize) {
+ printf("%s: partition too small, at least %llu B "
+ "required\n", DEVNAME(sc), csize << DEV_BSHIFT);
+ goto done;
+ } else if (size > csize)
+ printf("%s: partition too large, wasting %llu B\n",
+ DEVNAME(sc), (size - csize) << DEV_BSHIFT);
+
+ /* XXX make sure we are not stomping on some other partition */
+
+ /* recreate metadata */
+ open = 0; /* leave dev open from here on out */
+ sd->sd_vol.sv_chunks[found]->src_dev_mm = dev;
+ new->scmi.scm_volid = old->scmi.scm_volid;
+ new->scmi.scm_chunk_id = found;
+ strlcpy(new->scmi.scm_devname, devname,
+ sizeof new->scmi.scm_devname);
+ new->scmi.scm_size = size;
+ new->scmi.scm_coerced_size = old->scmi.scm_coerced_size;
+ bcopy(&old->scmi.scm_uuid, &new->scmi.scm_uuid,
+ sizeof new->scmi.scm_uuid);
+ sr_checksum(sc, new, &new->scm_checksum,
+ sizeof(struct sr_meta_chunk_invariant));
+ sd->sd_set_chunk_state(sd, found, BIOC_SDREBUILD);
+ if (sr_meta_save(sd, SR_META_DIRTY)) {
+ printf("%s: could not save metadata to %s\n",
+ DEVNAME(sc), devname);
goto done;
}
- sd->sd_set_chunk_state(sd, tg, bs->bs_status);
+ printf("%s: trying rebuild %s from %s\n", DEVNAME(sc),
+ sd->sd_meta->ssd_devname, devname);
- rv = 0;
+ kthread_create_deferred(sr_rebuild, sd);
break;
}
+ rv = 0;
done:
-#endif
+ if (open)
+ (*bdsw->d_close)(dev, FREAD, S_IFCHR, curproc);
+
return (rv);
}
@@ -2153,6 +2230,12 @@ sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user)
rv = sr_meta_save(sd, SR_META_DIRTY);
sd->sd_shutdownhook = shutdownhook_establish(sr_shutdown, sd);
+ if (sd->sd_vol_status == BIOC_SVREBUILD) {
+ printf("%s: resuming rebuild on %s\n", DEVNAME(sc),
+ sd->sd_meta->ssd_devname);
+ kthread_create_deferred(sr_rebuild, sd);
+ }
+
return (rv);
unwind:
sr_discipline_shutdown(sd);
@@ -2706,6 +2789,139 @@ queued:
return (1);
}
+void
+sr_rebuild(void *arg)
+{
+ struct sr_discipline *sd = arg;
+ struct sr_softc *sc = sd->sd_sc;
+
+ if (kthread_create(sr_rebuild_thread, sd, &sd->sd_background_proc,
+ DEVNAME(sc)) != 0)
+ printf("%s: unable to start backgound operation\n",
+ DEVNAME(sc));
+}
+
+void
+sr_rebuild_thread(void *arg)
+{
+ struct sr_discipline *sd = arg;
+ struct sr_softc *sc = sd->sd_sc;
+ daddr64_t whole_blk, partial_blk, blk, sz, lba;
+ uint64_t mysize = 0;
+ struct sr_workunit *wu_r, *wu_w;
+ struct scsi_xfer xs_r, xs_w;
+ struct scsi_rw_16 cr, cw;
+ int c, s, slept;
+ u_int8_t *buf;
+
+ whole_blk = sd->sd_meta->ssdi.ssd_size / SR_REBUILD_IO_SIZE;
+ partial_blk = sd->sd_meta->ssdi.ssd_size % SR_REBUILD_IO_SIZE;
+
+ buf = malloc(SR_REBUILD_IO_SIZE << DEV_BSHIFT, M_DEVBUF, M_WAITOK);
+ for (blk = 0; blk <= whole_blk; blk++) {
+ if (blk == whole_blk)
+ sz = partial_blk;
+ else
+ sz = SR_REBUILD_IO_SIZE;
+ mysize += sz;
+ lba = blk * sz;
+
+ /* XXX be nicer than panic */
+ if ((wu_r = sr_wu_get(sd)) == NULL)
+ panic("%s: rebuild exhausted wu_r", DEVNAME(sc));
+
+ /* setup read io */
+ bzero(&xs_r, sizeof xs_r);
+ bzero(&cr, sizeof cr);
+ xs_r.error = XS_NOERROR;
+ xs_r.flags = SCSI_DATA_IN;
+ xs_r.datalen = sz << DEV_BSHIFT;
+ xs_r.data = buf;
+ xs_r.cmdlen = 16;
+ cr.opcode = READ_16;
+ _lto4b(sz, cr.length);
+ _lto8b(lba, cr.addr);
+ xs_r.cmd = (struct scsi_generic *)&cr;
+ wu_r->swu_flags = SR_WUF_REBUILD;
+ wu_r->swu_xs = &xs_r;
+ /* XXX be nicer than panic */
+ if (sd->sd_scsi_rw(wu_r))
+ panic("read failed");
+
+ /* XXX be nicer than panic */
+ if ((wu_w = sr_wu_get(sd)) == NULL)
+ panic("%s: rebuild exhausted wu_w", DEVNAME(sc));
+
+ /* setup write io */
+ bzero(&xs_w, sizeof xs_w);
+ bzero(&cw, sizeof cw);
+ xs_w.error = XS_NOERROR;
+ xs_w.flags = SCSI_DATA_OUT;
+ xs_w.datalen = sz << DEV_BSHIFT;
+ xs_w.data = buf;
+ xs_w.cmdlen = 16;
+ cw.opcode = WRITE_16;
+ _lto4b(sz, cw.length);
+ _lto8b(lba, cw.addr);
+ xs_w.cmd = (struct scsi_generic *)&cw;
+ wu_w->swu_flags = SR_WUF_REBUILD;
+ wu_w->swu_xs = &xs_w;
+ if (sd->sd_scsi_rw(wu_w))
+ panic("write failed");
+
+ /*
+ * collide with the read io so that we get automatically
+ * started when the read is done
+ */
+ wu_w->swu_state = SR_WU_DEFERRED;
+ wu_r->swu_collider = wu_w;
+ s = splbio();
+ TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu_w, swu_link);
+
+ /* schedule io */
+ if (sr_check_io_collision(wu_r))
+ goto queued;
+
+ sr_raid_startwu(wu_r);
+queued:
+ splx(s);
+
+ /* wait for read completion */
+
+ slept = 0;
+ while ((wu_w->swu_flags & SR_WUF_REBUILDIOCOMP) == 0) {
+ tsleep(wu_w, PRIBIO, "sr_rebuild", 0);
+ slept = 1;
+ }
+ /* yield if we didn't sleep */
+ if (slept == 0)
+ tsleep(sc, PWAIT, "sr_yield", 1);
+
+ sr_wu_put(wu_r);
+ sr_wu_put(wu_w);
+
+ sd->sd_meta->ssd_rebuild = lba;
+ /* XXX save metadata periodically */
+ }
+
+ /* all done */
+ sd->sd_meta->ssd_rebuild = 0;
+
+ for (c = 0; c < sd->sd_meta->ssdi.ssd_chunk_no; c++)
+ if (sd->sd_vol.sv_chunks[c]->src_meta.scm_status ==
+ BIOC_SDREBUILD) {
+ sd->sd_set_chunk_state(sd, c, BIOC_SDONLINE);
+ break;
+ }
+
+ if (sr_meta_save(sd, SR_META_DIRTY))
+ printf("%s: could not save metadata to %s\n",
+ DEVNAME(sc), sd->sd_meta->ssd_devname);
+
+ free(buf, M_DEVBUF);
+ kthread_exit(0);
+}
+
#ifndef SMALL_KERNEL
int
sr_sensors_create(struct sr_discipline *sd)
diff --git a/sys/dev/softraid_raid1.c b/sys/dev/softraid_raid1.c
index 9edb0783cfe..f705e1befa1 100644
--- a/sys/dev/softraid_raid1.c
+++ b/sys/dev/softraid_raid1.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: softraid_raid1.c,v 1.9 2009/05/11 14:06:21 jsing Exp $ */
+/* $OpenBSD: softraid_raid1.c,v 1.10 2009/06/02 00:58:16 marco Exp $ */
/*
* Copyright (c) 2007 Marco Peereboom <marco@peereboom.us>
*
@@ -244,6 +244,7 @@ sr_raid1_set_vol_state(struct sr_discipline *sd)
case BIOC_SVONLINE: /* can go to same state */
case BIOC_SVOFFLINE:
case BIOC_SVDEGRADED:
+ case BIOC_SVREBUILD: /* happens on boot */
break;
default:
goto die;
@@ -427,6 +428,10 @@ ragain:
s = splbio();
+ /* rebuild io, let rebuild routine deal with it */
+ if (wu->swu_flags & SR_WUF_REBUILD)
+ goto queued;
+
/* current io failed, restart */
if (wu->swu_state == SR_WU_RESTART)
goto start;
@@ -541,9 +546,17 @@ sr_raid1_intr(struct buf *bp)
printf("%s: wu: %p not on pending queue\n",
DEVNAME(sc), wu);
- /* do not change the order of these 2 functions */
- sr_wu_put(wu);
- sr_scsi_done(sd, xs);
+ if (wu->swu_flags & SR_WUF_REBUILD) {
+ if (wu->swu_xs->flags & SCSI_DATA_OUT) {
+ //printf("waking up write\n");
+ wu->swu_flags |= SR_WUF_REBUILDIOCOMP;
+ wakeup(wu);
+ }
+ } else {
+ /* do not change the order of these 2 functions */
+ sr_wu_put(wu);
+ scsi_done(xs);
+ }
if (sd->sd_sync && sd->sd_wu_pending == 0)
wakeup(sd);
@@ -555,8 +568,15 @@ retry:
bad:
xs->error = XS_DRIVER_STUFFUP;
xs->flags |= ITSDONE;
- sr_wu_put(wu);
- sr_scsi_done(sd, xs);
+ if (wu->swu_flags & SR_WUF_REBUILD) {
+ wu->swu_flags |= SR_WUF_REBUILDIOCOMP;
+ wakeup(wu);
+ } else {
+ /* do not change the order of these 2 functions */
+ sr_wu_put(wu);
+ scsi_done(xs);
+ }
+
splx(s);
}
diff --git a/sys/dev/softraidvar.h b/sys/dev/softraidvar.h
index 4c1b246fdf7..5c3a72c1ebd 100644
--- a/sys/dev/softraidvar.h
+++ b/sys/dev/softraidvar.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: softraidvar.h,v 1.67 2009/05/11 14:06:21 jsing Exp $ */
+/* $OpenBSD: softraidvar.h,v 1.68 2009/06/02 00:58:16 marco Exp $ */
/*
* Copyright (c) 2006 Marco Peereboom <marco@peereboom.us>
* Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org>
@@ -215,6 +215,7 @@ extern u_int32_t sr_debug;
#define SR_MAX_CMDS 16
#define SR_MAX_STATES 7
#define SR_VM_IGNORE_DIRTY 1
+#define SR_REBUILD_IO_SIZE 128 /* blocks */
/* forward define to prevent dependency goo */
struct sr_softc;
@@ -254,6 +255,10 @@ struct sr_workunit {
#define SR_WU_RESTART 7
#define SR_WU_REQUEUE 8
+ int swu_flags; /* additional hints */
+#define SR_WUF_REBUILD (1<<0)
+#define SR_WUF_REBUILDIOCOMP (1<<1)
+
int swu_fake; /* faked wu */
/* workunit io range */
daddr64_t swu_blk_start;
@@ -425,6 +430,9 @@ struct sr_discipline {
int (*sd_scsi_inquiry)(struct sr_workunit *);
int (*sd_scsi_read_cap)(struct sr_workunit *);
int (*sd_scsi_req_sense)(struct sr_workunit *);
+
+ /* background operation */
+ struct proc *sd_background_proc;
};
struct sr_softc {