summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoel Sing <jsing@cvs.openbsd.org>2009-06-26 14:50:45 +0000
committerJoel Sing <jsing@cvs.openbsd.org>2009-06-26 14:50:45 +0000
commit0fcb8707b5be671dd4ae8de7278040d06c479451 (patch)
treef3e3bf0faadd7cbbf1e8a32dfe765a5cc0789615
parent81a987313749100cc29b57228dc112955b933448 (diff)
Overhaul sr_boot_assembly(). The original code had a number of problems,
including a "chunk id already in use" bug that occurred when assembling more than one volume at boot. Volumes are now kept in order, as are the chunks which they are assembled from. Duplicated chunk IDs are now handled appropriately, with the chunk that has the most recent ondisk metadata being used. Also tested by nicm@ who got bitten by the "chunk id already in use" bug. ok marco@
-rw-r--r--sys/dev/softraid.c194
-rw-r--r--sys/dev/softraidvar.h17
2 files changed, 164 insertions, 47 deletions
diff --git a/sys/dev/softraid.c b/sys/dev/softraid.c
index 999e5341e6b..458df7de334 100644
--- a/sys/dev/softraid.c
+++ b/sys/dev/softraid.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: softraid.c,v 1.160 2009/06/24 12:08:15 jsing Exp $ */
+/* $OpenBSD: softraid.c,v 1.161 2009/06/26 14:50:44 jsing Exp $ */
/*
* Copyright (c) 2007 Marco Peereboom <marco@peereboom.us>
* Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org>
@@ -955,12 +955,17 @@ int
sr_boot_assembly(struct sr_softc *sc)
{
struct device *dv;
- struct sr_metadata_list_head mlh;
- struct sr_metadata_list *mle, *mle2;
- struct sr_metadata *m1, *m2;
struct bioc_createraid bc;
- int rv = 0, no_dev, i;
- dev_t *dt = NULL;
+ struct sr_metadata_list_head mlh;
+ struct sr_metadata_list *mle, *mlenext, *mle1, *mle2;
+ struct sr_metadata *metadata;
+ struct sr_boot_volume_head bvh;
+ struct sr_boot_volume *vol, *vp1, *vp2;
+ u_int32_t chunk_id;
+ u_int64_t *ondisk = NULL;
+ dev_t *devs = NULL;
+ char devname[32];
+ int rv = 0, i;
DNPRINTF(SR_D_META, "%s: sr_boot_assembly\n", DEVNAME(sc));
@@ -984,72 +989,169 @@ sr_boot_assembly(struct sr_softc *sc)
}
/*
- * XXX poor mans hack that doesn't keep disks in order and does not
- * roam disks correctly. replace this with something smarter that
- * orders disks by volid, chunkid and uuid.
+ * Create a list of volumes and associate chunks with each volume.
*/
- dt = malloc(BIOC_CRMAXLEN * sizeof(dev_t), M_DEVBUF, M_WAITOK);
- for (i = 0; i < BIOC_CRMAXLEN; i++)
- dt[i] = NODEV; /* mark device as illegal */
- SLIST_FOREACH(mle, &mlh, sml_link) {
- /* chunk used already? */
- if (mle->sml_used)
- continue;
+ SLIST_INIT(&bvh);
+ for (mle = SLIST_FIRST(&mlh); mle != SLIST_END(&mlh); mle = mlenext) {
- no_dev = 0;
- m1 = (struct sr_metadata *)&mle->sml_metadata;
- SLIST_FOREACH(mle2, &mlh, sml_link) {
- /* chunk used already? */
- if (mle2->sml_used)
- continue;
+ mlenext = SLIST_NEXT(mle, sml_link);
+ SLIST_REMOVE(&mlh, mle, sr_metadata_list, sml_link);
- m2 = (struct sr_metadata *)&mle2->sml_metadata;
+ metadata = (struct sr_metadata *)&mle->sml_metadata;
+ mle->sml_chunk_id = metadata->ssdi.ssd_chunk_id;
- /* are we the same volume? */
- if (m1->ssdi.ssd_volid != m2->ssdi.ssd_volid)
- continue;
-
- /* same uuid? */
- if (bcmp(&m1->ssdi.ssd_uuid, &m2->ssdi.ssd_uuid,
- sizeof(m1->ssdi.ssd_uuid)))
- continue;
+ SLIST_FOREACH(vol, &bvh, sbv_link) {
+ if (bcmp(&metadata->ssdi.ssd_uuid, &vol->sbv_uuid,
+ sizeof(metadata->ssdi.ssd_uuid)) == 0)
+ break;
+ }
- /* sanity */
- if (dt[m2->ssdi.ssd_chunk_id] != NODEV) {
- printf("%s: chunk id already in use; can not "
- "assemble volume\n", DEVNAME(sc));
+ if (vol == NULL) {
+ vol = malloc(sizeof(struct sr_boot_volume),
+ M_DEVBUF, M_NOWAIT | M_CANFAIL | M_ZERO);
+ if (vol == NULL) {
+ printf("%s: failed to allocate boot volume!\n",
+ DEVNAME(sc));
goto unwind;
}
- dt[m2->ssdi.ssd_chunk_id] = mle2->sml_mm;
- no_dev++;
- mle2->sml_used = 1;
+
+ vol->sbv_level = metadata->ssdi.ssd_level;
+ vol->sbv_volid = metadata->ssdi.ssd_volid;
+ vol->sbv_chunk_no = metadata->ssdi.ssd_chunk_no;
+ bcopy(&metadata->ssdi.ssd_uuid, &vol->sbv_uuid,
+ sizeof(metadata->ssdi.ssd_uuid));
+ SLIST_INIT(&vol->sml);
+
+ /* Maintain volume order. */
+ vp2 = NULL;
+ SLIST_FOREACH(vp1, &bvh, sbv_link) {
+ if (vp1->sbv_volid > vol->sbv_volid)
+ break;
+ vp2 = vp1;
+ }
+ if (vp2 == NULL) {
+ DNPRINTF(SR_D_META, "%s: insert volume %u "
+ "at head\n", DEVNAME(sc), vol->sbv_volid);
+ SLIST_INSERT_HEAD(&bvh, vol, sbv_link);
+ } else {
+ DNPRINTF(SR_D_META, "%s: insert volume %u "
+ "after %u\n", DEVNAME(sc), vol->sbv_volid,
+ vp2->sbv_volid);
+ SLIST_INSERT_AFTER(vp2, vol, sbv_link);
+ }
}
- if (m1->ssdi.ssd_chunk_no != no_dev) {
+
+ /* Maintain chunk order. */
+ mle2 = NULL;
+ SLIST_FOREACH(mle1, &vol->sml, sml_link) {
+ if (mle1->sml_chunk_id > mle->sml_chunk_id)
+ break;
+ mle2 = mle1;
+ }
+ if (mle2 == NULL) {
+ DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u "
+ "at head\n", DEVNAME(sc), vol->sbv_volid,
+ mle->sml_chunk_id);
+ SLIST_INSERT_HEAD(&vol->sml, mle, sml_link);
+ } else {
+ DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u "
+ "after %u\n", DEVNAME(sc), vol->sbv_volid,
+ mle->sml_chunk_id, mle2->sml_chunk_id);
+ SLIST_INSERT_AFTER(mle2, mle, sml_link);
+ }
+
+ vol->sbv_dev_no++;
+ }
+
+ /* Allocate memory for device and ondisk version arrays. */
+ devs = malloc(BIOC_CRMAXLEN * sizeof(dev_t), M_DEVBUF,
+ M_NOWAIT | M_CANFAIL);
+ if (devs == NULL) {
+ printf("%s: failed to allocate device array\n", DEVNAME(sc));
+ goto unwind;
+ }
+ ondisk = malloc(BIOC_CRMAXLEN * sizeof(u_int64_t), M_DEVBUF,
+ M_NOWAIT | M_CANFAIL);
+ if (ondisk == NULL) {
+ printf("%s: failed to allocate ondisk array\n", DEVNAME(sc));
+ goto unwind;
+ }
+
+ SLIST_FOREACH(vol, &bvh, sbv_link) {
+
+#ifdef SR_DEBUG
+ DNPRINTF(SR_D_META, "%s: assembling volume ", DEVNAME(sc));
+ if (sr_debug & SR_D_META)
+ sr_uuid_print(&vol->sbv_uuid, 0);
+ DNPRINTF(SR_D_META, " volid %u with %u chunks\n",
+ vol->sbv_volid, vol->sbv_chunk_no);
+#endif
+
+ for (i = 0; i < BIOC_CRMAXLEN; i++) {
+ devs[i] = NODEV; /* mark device as illegal */
+ ondisk[i] = 0;
+ }
+
+ SLIST_FOREACH(mle, &vol->sml, sml_link) {
+ metadata = (struct sr_metadata *)&mle->sml_metadata;
+ chunk_id = metadata->ssdi.ssd_chunk_id;
+
+ if (devs[chunk_id] != NODEV) {
+ vol->sbv_dev_no--;
+ sr_meta_getdevname(sc, mle->sml_mm, devname,
+ sizeof(devname));
+ printf("%s: found duplicate chunk %u for "
+ "volume %u on device %s\n", DEVNAME(sc),
+ chunk_id, vol->sbv_volid, devname);
+ }
+
+ if (devs[chunk_id] == NODEV ||
+ metadata->ssd_ondisk > ondisk[chunk_id]) {
+ devs[chunk_id] = mle->sml_mm;
+ ondisk[chunk_id] = metadata->ssd_ondisk;
+ DNPRINTF(SR_D_META, "%s: using ondisk "
+ "metadata version %llu for chunk %u\n",
+ DEVNAME(sc), ondisk[chunk_id], chunk_id);
+ }
+ }
+
+ if (vol->sbv_chunk_no != vol->sbv_dev_no) {
printf("%s: not all chunks were provided; "
"attempting to bring volume %d online\n",
- DEVNAME(sc), m1->ssdi.ssd_volid);
- no_dev = m1->ssdi.ssd_chunk_no;
+ DEVNAME(sc), vol->sbv_volid);
}
bzero(&bc, sizeof(bc));
- bc.bc_level = m1->ssdi.ssd_level;
- bc.bc_dev_list_len = no_dev * sizeof(dev_t);
- bc.bc_dev_list = dt;
+ bc.bc_level = vol->sbv_level;
+ bc.bc_dev_list_len = vol->sbv_chunk_no * sizeof(dev_t);
+ bc.bc_dev_list = devs;
bc.bc_flags = BIOC_SCDEVT;
sr_ioctl_createraid(sc, &bc, 0);
+
rv++;
}
/* done with metadata */
unwind:
+ for (vp1 = SLIST_FIRST(&bvh); vp1 != SLIST_END(&bvh); vp1 = vp2) {
+ vp2 = SLIST_NEXT(vp1, sbv_link);
+ for (mle1 = SLIST_FIRST(&vp1->sml);
+ mle1 != SLIST_END(&vp1->sml); mle1 = mle2) {
+ mle2 = SLIST_NEXT(mle1, sml_link);
+ free(mle1, M_DEVBUF);
+ }
+ free(vp1, M_DEVBUF);
+ }
for (mle = SLIST_FIRST(&mlh); mle != SLIST_END(&mlh); mle = mle2) {
mle2 = SLIST_NEXT(mle, sml_link);
free(mle, M_DEVBUF);
}
SLIST_INIT(&mlh);
- if (dt)
- free(dt, M_DEVBUF);
+ if (devs)
+ free(devs, M_DEVBUF);
+ if (ondisk)
+ free(ondisk, M_DEVBUF);
return (rv);
}
diff --git a/sys/dev/softraidvar.h b/sys/dev/softraidvar.h
index 07134bfdc24..b27997916cd 100644
--- a/sys/dev/softraidvar.h
+++ b/sys/dev/softraidvar.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: softraidvar.h,v 1.77 2009/06/24 12:06:00 jsing Exp $ */
+/* $OpenBSD: softraidvar.h,v 1.78 2009/06/26 14:50:44 jsing Exp $ */
/*
* Copyright (c) 2006 Marco Peereboom <marco@peereboom.us>
* Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org>
@@ -330,6 +330,7 @@ struct sr_aoe {
struct sr_metadata_list {
u_int8_t sml_metadata[SR_META_SIZE * 512];
dev_t sml_mm;
+ u_int32_t sml_chunk_id;
int sml_used;
SLIST_ENTRY(sr_metadata_list) sml_link;
@@ -337,6 +338,20 @@ struct sr_metadata_list {
SLIST_HEAD(sr_metadata_list_head, sr_metadata_list);
+struct sr_boot_volume {
+ struct sr_uuid sbv_uuid; /* Volume UUID. */
+ u_int32_t sbv_level; /* Level. */
+ u_int32_t sbv_volid; /* Volume ID. */
+ u_int32_t sbv_chunk_no; /* Number of chunks. */
+ u_int32_t sbv_dev_no; /* Number of devs discovered. */
+
+ struct sr_metadata_list_head sml; /* List of metadata. */
+
+ SLIST_ENTRY(sr_boot_volume) sbv_link;
+};
+
+SLIST_HEAD(sr_boot_volume_head, sr_boot_volume);
+
struct sr_chunk {
struct sr_meta_chunk src_meta; /* chunk meta data */
struct sr_meta_opt src_opt; /* optional metadata */