diff options
author | Joel Sing <jsing@cvs.openbsd.org> | 2009-06-26 14:50:45 +0000 |
---|---|---|
committer | Joel Sing <jsing@cvs.openbsd.org> | 2009-06-26 14:50:45 +0000 |
commit | 0fcb8707b5be671dd4ae8de7278040d06c479451 (patch) | |
tree | f3e3bf0faadd7cbbf1e8a32dfe765a5cc0789615 /sys/dev | |
parent | 81a987313749100cc29b57228dc112955b933448 (diff) |
Overhaul sr_boot_assembly(). The original code had a number of problems,
including a "chunk id already in use" bug that occurred when assembling
more than one volume at boot. Volumes are now kept in order, as are the
chunks which they are assembled from. Duplicated chunk IDs are now handled
appropriately, with the chunk that has the most recent ondisk metadata
being used.
Also tested by nicm@ who got bitten by the "chunk id already in use" bug.
ok marco@
Diffstat (limited to 'sys/dev')
-rw-r--r-- | sys/dev/softraid.c | 194 | ||||
-rw-r--r-- | sys/dev/softraidvar.h | 17 |
2 files changed, 164 insertions, 47 deletions
diff --git a/sys/dev/softraid.c b/sys/dev/softraid.c index 999e5341e6b..458df7de334 100644 --- a/sys/dev/softraid.c +++ b/sys/dev/softraid.c @@ -1,4 +1,4 @@ -/* $OpenBSD: softraid.c,v 1.160 2009/06/24 12:08:15 jsing Exp $ */ +/* $OpenBSD: softraid.c,v 1.161 2009/06/26 14:50:44 jsing Exp $ */ /* * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us> * Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org> @@ -955,12 +955,17 @@ int sr_boot_assembly(struct sr_softc *sc) { struct device *dv; - struct sr_metadata_list_head mlh; - struct sr_metadata_list *mle, *mle2; - struct sr_metadata *m1, *m2; struct bioc_createraid bc; - int rv = 0, no_dev, i; - dev_t *dt = NULL; + struct sr_metadata_list_head mlh; + struct sr_metadata_list *mle, *mlenext, *mle1, *mle2; + struct sr_metadata *metadata; + struct sr_boot_volume_head bvh; + struct sr_boot_volume *vol, *vp1, *vp2; + u_int32_t chunk_id; + u_int64_t *ondisk = NULL; + dev_t *devs = NULL; + char devname[32]; + int rv = 0, i; DNPRINTF(SR_D_META, "%s: sr_boot_assembly\n", DEVNAME(sc)); @@ -984,72 +989,169 @@ sr_boot_assembly(struct sr_softc *sc) } /* - * XXX poor mans hack that doesn't keep disks in order and does not - * roam disks correctly. replace this with something smarter that - * orders disks by volid, chunkid and uuid. + * Create a list of volumes and associate chunks with each volume. */ - dt = malloc(BIOC_CRMAXLEN * sizeof(dev_t), M_DEVBUF, M_WAITOK); - for (i = 0; i < BIOC_CRMAXLEN; i++) - dt[i] = NODEV; /* mark device as illegal */ - SLIST_FOREACH(mle, &mlh, sml_link) { - /* chunk used already? */ - if (mle->sml_used) - continue; + SLIST_INIT(&bvh); + for (mle = SLIST_FIRST(&mlh); mle != SLIST_END(&mlh); mle = mlenext) { - no_dev = 0; - m1 = (struct sr_metadata *)&mle->sml_metadata; - SLIST_FOREACH(mle2, &mlh, sml_link) { - /* chunk used already? */ - if (mle2->sml_used) - continue; + mlenext = SLIST_NEXT(mle, sml_link); + SLIST_REMOVE(&mlh, mle, sr_metadata_list, sml_link); - m2 = (struct sr_metadata *)&mle2->sml_metadata; + metadata = (struct sr_metadata *)&mle->sml_metadata; + mle->sml_chunk_id = metadata->ssdi.ssd_chunk_id; - /* are we the same volume? */ - if (m1->ssdi.ssd_volid != m2->ssdi.ssd_volid) - continue; - - /* same uuid? */ - if (bcmp(&m1->ssdi.ssd_uuid, &m2->ssdi.ssd_uuid, - sizeof(m1->ssdi.ssd_uuid))) - continue; + SLIST_FOREACH(vol, &bvh, sbv_link) { + if (bcmp(&metadata->ssdi.ssd_uuid, &vol->sbv_uuid, + sizeof(metadata->ssdi.ssd_uuid)) == 0) + break; + } - /* sanity */ - if (dt[m2->ssdi.ssd_chunk_id] != NODEV) { - printf("%s: chunk id already in use; can not " - "assemble volume\n", DEVNAME(sc)); + if (vol == NULL) { + vol = malloc(sizeof(struct sr_boot_volume), + M_DEVBUF, M_NOWAIT | M_CANFAIL | M_ZERO); + if (vol == NULL) { + printf("%s: failed to allocate boot volume!\n", + DEVNAME(sc)); goto unwind; } - dt[m2->ssdi.ssd_chunk_id] = mle2->sml_mm; - no_dev++; - mle2->sml_used = 1; + + vol->sbv_level = metadata->ssdi.ssd_level; + vol->sbv_volid = metadata->ssdi.ssd_volid; + vol->sbv_chunk_no = metadata->ssdi.ssd_chunk_no; + bcopy(&metadata->ssdi.ssd_uuid, &vol->sbv_uuid, + sizeof(metadata->ssdi.ssd_uuid)); + SLIST_INIT(&vol->sml); + + /* Maintain volume order. */ + vp2 = NULL; + SLIST_FOREACH(vp1, &bvh, sbv_link) { + if (vp1->sbv_volid > vol->sbv_volid) + break; + vp2 = vp1; + } + if (vp2 == NULL) { + DNPRINTF(SR_D_META, "%s: insert volume %u " + "at head\n", DEVNAME(sc), vol->sbv_volid); + SLIST_INSERT_HEAD(&bvh, vol, sbv_link); + } else { + DNPRINTF(SR_D_META, "%s: insert volume %u " + "after %u\n", DEVNAME(sc), vol->sbv_volid, + vp2->sbv_volid); + SLIST_INSERT_AFTER(vp2, vol, sbv_link); + } } - if (m1->ssdi.ssd_chunk_no != no_dev) { + + /* Maintain chunk order. */ + mle2 = NULL; + SLIST_FOREACH(mle1, &vol->sml, sml_link) { + if (mle1->sml_chunk_id > mle->sml_chunk_id) + break; + mle2 = mle1; + } + if (mle2 == NULL) { + DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u " + "at head\n", DEVNAME(sc), vol->sbv_volid, + mle->sml_chunk_id); + SLIST_INSERT_HEAD(&vol->sml, mle, sml_link); + } else { + DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u " + "after %u\n", DEVNAME(sc), vol->sbv_volid, + mle->sml_chunk_id, mle2->sml_chunk_id); + SLIST_INSERT_AFTER(mle2, mle, sml_link); + } + + vol->sbv_dev_no++; + } + + /* Allocate memory for device and ondisk version arrays. */ + devs = malloc(BIOC_CRMAXLEN * sizeof(dev_t), M_DEVBUF, + M_NOWAIT | M_CANFAIL); + if (devs == NULL) { + printf("%s: failed to allocate device array\n", DEVNAME(sc)); + goto unwind; + } + ondisk = malloc(BIOC_CRMAXLEN * sizeof(u_int64_t), M_DEVBUF, + M_NOWAIT | M_CANFAIL); + if (ondisk == NULL) { + printf("%s: failed to allocate ondisk array\n", DEVNAME(sc)); + goto unwind; + } + + SLIST_FOREACH(vol, &bvh, sbv_link) { + +#ifdef SR_DEBUG + DNPRINTF(SR_D_META, "%s: assembling volume ", DEVNAME(sc)); + if (sr_debug & SR_D_META) + sr_uuid_print(&vol->sbv_uuid, 0); + DNPRINTF(SR_D_META, " volid %u with %u chunks\n", + vol->sbv_volid, vol->sbv_chunk_no); +#endif + + for (i = 0; i < BIOC_CRMAXLEN; i++) { + devs[i] = NODEV; /* mark device as illegal */ + ondisk[i] = 0; + } + + SLIST_FOREACH(mle, &vol->sml, sml_link) { + metadata = (struct sr_metadata *)&mle->sml_metadata; + chunk_id = metadata->ssdi.ssd_chunk_id; + + if (devs[chunk_id] != NODEV) { + vol->sbv_dev_no--; + sr_meta_getdevname(sc, mle->sml_mm, devname, + sizeof(devname)); + printf("%s: found duplicate chunk %u for " + "volume %u on device %s\n", DEVNAME(sc), + chunk_id, vol->sbv_volid, devname); + } + + if (devs[chunk_id] == NODEV || + metadata->ssd_ondisk > ondisk[chunk_id]) { + devs[chunk_id] = mle->sml_mm; + ondisk[chunk_id] = metadata->ssd_ondisk; + DNPRINTF(SR_D_META, "%s: using ondisk " + "metadata version %llu for chunk %u\n", + DEVNAME(sc), ondisk[chunk_id], chunk_id); + } + } + + if (vol->sbv_chunk_no != vol->sbv_dev_no) { printf("%s: not all chunks were provided; " "attempting to bring volume %d online\n", - DEVNAME(sc), m1->ssdi.ssd_volid); - no_dev = m1->ssdi.ssd_chunk_no; + DEVNAME(sc), vol->sbv_volid); } bzero(&bc, sizeof(bc)); - bc.bc_level = m1->ssdi.ssd_level; - bc.bc_dev_list_len = no_dev * sizeof(dev_t); - bc.bc_dev_list = dt; + bc.bc_level = vol->sbv_level; + bc.bc_dev_list_len = vol->sbv_chunk_no * sizeof(dev_t); + bc.bc_dev_list = devs; bc.bc_flags = BIOC_SCDEVT; sr_ioctl_createraid(sc, &bc, 0); + rv++; } /* done with metadata */ unwind: + for (vp1 = SLIST_FIRST(&bvh); vp1 != SLIST_END(&bvh); vp1 = vp2) { + vp2 = SLIST_NEXT(vp1, sbv_link); + for (mle1 = SLIST_FIRST(&vp1->sml); + mle1 != SLIST_END(&vp1->sml); mle1 = mle2) { + mle2 = SLIST_NEXT(mle1, sml_link); + free(mle1, M_DEVBUF); + } + free(vp1, M_DEVBUF); + } for (mle = SLIST_FIRST(&mlh); mle != SLIST_END(&mlh); mle = mle2) { mle2 = SLIST_NEXT(mle, sml_link); free(mle, M_DEVBUF); } SLIST_INIT(&mlh); - if (dt) - free(dt, M_DEVBUF); + if (devs) + free(devs, M_DEVBUF); + if (ondisk) + free(ondisk, M_DEVBUF); return (rv); } diff --git a/sys/dev/softraidvar.h b/sys/dev/softraidvar.h index 07134bfdc24..b27997916cd 100644 --- a/sys/dev/softraidvar.h +++ b/sys/dev/softraidvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: softraidvar.h,v 1.77 2009/06/24 12:06:00 jsing Exp $ */ +/* $OpenBSD: softraidvar.h,v 1.78 2009/06/26 14:50:44 jsing Exp $ */ /* * Copyright (c) 2006 Marco Peereboom <marco@peereboom.us> * Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org> @@ -330,6 +330,7 @@ struct sr_aoe { struct sr_metadata_list { u_int8_t sml_metadata[SR_META_SIZE * 512]; dev_t sml_mm; + u_int32_t sml_chunk_id; int sml_used; SLIST_ENTRY(sr_metadata_list) sml_link; @@ -337,6 +338,20 @@ struct sr_metadata_list { SLIST_HEAD(sr_metadata_list_head, sr_metadata_list); +struct sr_boot_volume { + struct sr_uuid sbv_uuid; /* Volume UUID. */ + u_int32_t sbv_level; /* Level. */ + u_int32_t sbv_volid; /* Volume ID. */ + u_int32_t sbv_chunk_no; /* Number of chunks. */ + u_int32_t sbv_dev_no; /* Number of devs discovered. */ + + struct sr_metadata_list_head sml; /* List of metadata. */ + + SLIST_ENTRY(sr_boot_volume) sbv_link; +}; + +SLIST_HEAD(sr_boot_volume_head, sr_boot_volume); + struct sr_chunk { struct sr_meta_chunk src_meta; /* chunk meta data */ struct sr_meta_opt src_opt; /* optional metadata */ |