diff options
author | Jordan Hargrave <jordan@cvs.openbsd.org> | 2009-08-06 22:39:41 +0000 |
---|---|---|
committer | Jordan Hargrave <jordan@cvs.openbsd.org> | 2009-08-06 22:39:41 +0000 |
commit | f723a78a7706ede006f780f9ae18194567284925 (patch) | |
tree | 5ff24891a5f26dc58b6322659a2d94d8c466db02 /sys/dev | |
parent | 1cc21b2bfac371283ef92b44a84bdd9218afe035 (diff) |
Handle failed disk I/O for RAID6
RAID6 still disabled in softraid.c
Diffstat (limited to 'sys/dev')
-rw-r--r-- | sys/dev/softraid_raid6.c | 260 |
1 files changed, 121 insertions, 139 deletions
diff --git a/sys/dev/softraid_raid6.c b/sys/dev/softraid_raid6.c index 2e60630973f..63a02580c95 100644 --- a/sys/dev/softraid_raid6.c +++ b/sys/dev/softraid_raid6.c @@ -1,4 +1,4 @@ -/* $OpenBSD: softraid_raid6.c,v 1.2 2009/08/04 20:17:14 jordan Exp $ */ +/* $OpenBSD: softraid_raid6.c,v 1.3 2009/08/06 22:39:40 jordan Exp $ */ /* * Copyright (c) 2009 Marco Peereboom <marco@peereboom.us> * Copyright (c) 2009 Jordan Hargrave <jordan@openbsd.org> @@ -62,6 +62,7 @@ int sr_raid6_addio(struct sr_workunit *wu, int, daddr64_t, daddr64_t, void *, int, int, void *, void *, int); void sr_dump(void *, int); void sr_raid6_scrub(struct sr_discipline *); +int sr_failio(struct sr_workunit *); void *sr_get_block(struct sr_discipline *, int); void sr_put_block(struct sr_discipline *, void *); @@ -433,78 +434,42 @@ sr_raid6_rw(struct sr_workunit *wu) wu->swu_blk_end = ((chunk_offs + (no_chunk << strip_bits)) >> DEV_BSHIFT) - 1; fail = 0; - - /* Get P-fail flag */ - scp = sd->sd_vol.sv_chunks[pchunk]; - switch (scp->src_meta.scm_status) { - case BIOC_SDOFFLINE: - case BIOC_SDREBUILD: - case BIOC_SDHOTSPARE: - fail |= SR_FAILP; - break; - } - - /* Get Q-fail flag */ - scp = sd->sd_vol.sv_chunks[qchunk]; - switch (scp->src_meta.scm_status) { - case BIOC_SDOFFLINE: - case BIOC_SDREBUILD: - case BIOC_SDHOTSPARE: - fail |= SR_FAILQ; - break; - } - - /* Get disk-fail flag */ - scp = sd->sd_vol.sv_chunks[chunk]; - switch (scp->src_meta.scm_status) { - case BIOC_SDOFFLINE: - case BIOC_SDREBUILD: - case BIOC_SDHOTSPARE: - fail |= SR_FAILX; - - /* Check for dual-drive failure */ - if (!(fail & (SR_FAILP|SR_FAILQ)) && - (sd->sd_vol_status == BIOC_SVDEGRADED)) - fail |= SR_FAILY; - break; + fchunk = -1; + + /* Get disk-fail flags */ + for (i=0; i< no_chunk+2; i++) { + scp = sd->sd_vol.sv_chunks[i]; + switch (scp->src_meta.scm_status) { + case BIOC_SDOFFLINE: + case BIOC_SDREBUILD: + case BIOC_SDHOTSPARE: + if (i == qchunk) + fail |= SR_FAILQ; + else if (i == pchunk) + fail |= SR_FAILP; + else if (i == chunk) + fail |= SR_FAILX; + else { + /* dual data-disk failure */ + fail |= SR_FAILY; + fchunk = i; + } + break; + } } - if (xs->flags & SCSI_DATA_IN) { - switch (fail) { - case SR_NOFAIL: + if (!(fail & SR_FAILX)) { /* drive is good. issue single read request */ if (sr_raid6_addio(wu, chunk, lba, length, data, xs->flags, 0, NULL, NULL, 0)) goto bad; - break; - case SR_FAILX: - case SR_FAILX+SR_FAILQ: - /* Dx, (Q) failed: Dx = Dz ^ P (same as RAID5) */ - printf("Disk %llx offline, " - "regenerating Dx+Q\n", chunk); - - /* Calculate: Dx = P^Dz - * P: sr_raid6_xorp(data, ---, length); - * Dz: sr_raid6_xorp(data, ---, length); - */ - memset(data, 0, length); - for (i = 0; i < no_chunk+2; i++) { - if (i != chunk && i != qchunk) { - /* Read Dz */ - if (sr_raid6_addio(wu, i, lba, length, - NULL, SCSI_DATA_IN, SR_CCBF_FREEBUF, - data, NULL, 0)) - goto bad; - } - } - break; - case SR_FAILX+SR_FAILP: + } else if (fail & SR_FAILP) { /* Dx, P failed */ printf("Disk %llx offline, " "regenerating Dx+P\n", chunk); - pbuf = sr_get_block(sd, length); - if (pbuf == NULL) + qbuf = sr_get_block(sd, length); + if (qbuf == NULL) goto bad; /* Calculate: Dx*gx = Q^(Dz*gz) @@ -515,44 +480,33 @@ sr_raid6_rw(struct sr_workunit *wu) for (i = 0; i < no_chunk+2; i++) { if (i == qchunk) { /* Read Q */ - if (sr_raid6_addio(wu, i, lba, length, - NULL, SCSI_DATA_IN, SR_CCBF_FREEBUF, - pbuf, NULL, 0)) - goto bad; + if (sr_raid6_addio(wu, i, lba, + length, NULL, SCSI_DATA_IN, + SR_CCBF_FREEBUF, qbuf, + NULL, 0)) + goto bad; } else if (i != chunk && i != pchunk) { /* Read Dz * gz */ - if (sr_raid6_addio(wu, i, lba, length, - NULL, SCSI_DATA_IN, SR_CCBF_FREEBUF, - NULL, pbuf, gf_pow[i])) - goto bad; + if (sr_raid6_addio(wu, i, lba, + length, NULL, SCSI_DATA_IN, + SR_CCBF_FREEBUF, NULL, + qbuf, gf_pow[i])) + goto bad; } } - /* XXX: bag of fail */ - wu->swu_flags |= SR_WUF_FAIL; - sr_raid_startwu(wu); - while ((wu->swu_flags & SR_WUF_FAILIOCOMP) == 0) { - tsleep(wu, PRIBIO, "sr_getdata", 0); - } - - /* On completion, pbuf = Dx*gx */ - sr_raid6_xorq(data, pbuf, length, gf_inv(gf_pow[chunk])); - sr_put_block(sd, pbuf); - - sr_wu_put(wu); - scsi_done(xs); - return(0); + /* run fake wu when read i/o is complete */ + if (wu_w == NULL && + (wu_w = sr_wu_get(sd, 0)) == NULL) + goto bad; - break; - case SR_FAILX+SR_FAILY: + wu_w->swu_flags |= SR_WUF_FAIL; + if (sr_raid6_addio(wu_w, 0, 0, length, qbuf, 0, + SR_CCBF_FREEBUF, NULL, data, + gf_inv(gf_pow[chunk]))) + goto bad; + } else if (fail & SR_FAILY) { /* Dx, Dy failed */ - - /* cheat.. get other failed drive */ - for (fchunk=0; fchunk<no_chunk+2; fchunk++) { - if (fchunk != chunk && fchunk != qchunk && fchunk != pchunk) - break; - } - printf("Disk %llx & %llx offline, " "regenerating Dx+Dy\n", chunk, fchunk); qbuf = sr_get_block(sd, length); @@ -572,55 +526,71 @@ sr_raid6_rw(struct sr_workunit *wu) for (i = 0; i < no_chunk+2; i++) { if (i == qchunk) { /* read Q */ - if (sr_raid6_addio(wu, i, lba, length, - NULL, SCSI_DATA_IN, SR_CCBF_FREEBUF, - qbuf, NULL, 0)) - goto bad; + if (sr_raid6_addio(wu, i, lba, + length, NULL, SCSI_DATA_IN, + SR_CCBF_FREEBUF, qbuf, + NULL, 0)) + goto bad; } else if (i == pchunk) { /* read P */ - if (sr_raid6_addio(wu, i, lba, length, - NULL, SCSI_DATA_IN, SR_CCBF_FREEBUF, - pbuf, NULL, 0)) - goto bad; + if (sr_raid6_addio(wu, i, lba, + length, NULL, SCSI_DATA_IN, + SR_CCBF_FREEBUF, pbuf, + NULL, 0)) + goto bad; } else if (i != chunk) { /* read Dz * gz */ - if (sr_raid6_addio(wu, i, lba, length, - NULL, SCSI_DATA_IN, SR_CCBF_FREEBUF, - pbuf, qbuf, gf_pow[i])) - goto bad; + if (sr_raid6_addio(wu, i, lba, + length, NULL, SCSI_DATA_IN, + SR_CCBF_FREEBUF, pbuf, + qbuf, gf_pow[i])) + goto bad; } } + /* run fake wu when read i/o is complete */ + if (wu_w == NULL && + (wu_w = sr_wu_get(sd, 0)) == NULL) + goto bad; - /* XXX: bag of fail */ - wu->swu_flags |= SR_WUF_FAIL; - sr_raid_startwu(wu); - while ((wu->swu_flags & SR_WUF_FAILIOCOMP) == 0) { - tsleep(wu, PRIBIO, "sr_getdata", 0); - } - - /* On completion, pbuf = Dx ^ Dy; qbuf = Dx*gx ^ Dy*gy */ - sr_raid6_xorq(data, qbuf, length, - gf_inv(gf_pow[chunk] ^ gf_pow[fchunk])); - sr_raid6_xorq(data, pbuf, length, - gf_inv(gf_pow[255+chunk-fchunk] ^ 1)); // Dx - - sr_put_block(sd, pbuf); - sr_put_block(sd, qbuf); + wu_w->swu_flags |= SR_WUF_FAIL; + if (sr_raid6_addio(wu_w, 0, 0, length, pbuf, 0, + SR_CCBF_FREEBUF, NULL, data, + gf_inv(gf_pow[255+chunk-fchunk] ^ 1))) + goto bad; + if (sr_raid6_addio(wu_w, 0, 0, length, qbuf, 0, + SR_CCBF_FREEBUF, NULL, data, + gf_inv(gf_pow[chunk] ^ gf_pow[fchunk]))) + goto bad; + } else { + /* Two cases: single disk (Dx) or (Dx+Q) + * Dx = Dz ^ P (same as RAID5) + */ + printf("Disk %llx offline, " + "regenerating Dx%s\n", chunk, + fail & SR_FAILQ ? "+Q" : " single"); - sr_wu_put(wu); - scsi_done(xs); - return(0); + /* Calculate: Dx = P^Dz + * P: sr_raid6_xorp(data, ---, length); + * Dz: sr_raid6_xorp(data, ---, length); + */ + memset(data, 0, length); + for (i = 0; i < no_chunk+2; i++) { + if (i != chunk && i != qchunk) { + /* Read Dz */ + if (sr_raid6_addio(wu, i, lba, + length, NULL, SCSI_DATA_IN, + SR_CCBF_FREEBUF, data, + NULL, 0)) + goto bad; + } + } - break; - default: - printf("%s: is offline, can't read\n", - DEVNAME(sd->sd_sc)); - goto bad; + /* data will contain correct value on completion */ } } else { /* XXX handle writes to failed/offline disk? */ - if (scp->src_meta.scm_status == BIOC_SDOFFLINE) + if (fail & (SR_FAILX|SR_FAILQ|SR_FAILP)) goto bad; /* @@ -724,6 +694,23 @@ bad: return (1); } +/* Handle failure I/O completion */ +int +sr_failio(struct sr_workunit *wu) +{ + struct sr_discipline *sd = wu->swu_dis; + struct sr_ccb *ccb; + + if (!(wu->swu_flags & SR_WUF_FAIL)) + return (0); + + /* Wu is a 'fake'.. don't do real I/O just intr */ + TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link); + TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) + sr_raid6_intr(&ccb->ccb_buf); + return (1); +} + void sr_raid6_intr(struct buf *bp) { @@ -832,7 +819,8 @@ sr_raid6_intr(struct buf *bp) SR_WU_INPROGRESS; TAILQ_REMOVE(&sd->sd_wu_defq, wu->swu_collider, swu_link); - sr_raid_startwu(wu->swu_collider); + if (sr_failio(wu->swu_collider) == 0) + sr_raid_startwu(wu->swu_collider); } break; } @@ -842,11 +830,7 @@ sr_raid6_intr(struct buf *bp) printf("%s: wu: %p not on pending queue\n", DEVNAME(sc), wu); - if (wu->swu_flags & SR_WUF_FAIL) { - wu->swu_flags |= SR_WUF_FAILIOCOMP; - wakeup(wu); - } - else if (wu->swu_flags & SR_WUF_REBUILD) { + if (wu->swu_flags & SR_WUF_REBUILD) { if (wu->swu_xs->flags & SCSI_DATA_OUT) { wu->swu_flags |= SR_WUF_REBUILDIOCOMP; wakeup(wu); @@ -987,10 +971,8 @@ sr_raid6_addio(struct sr_workunit *wu, int dsk, daddr64_t blk, daddr64_t len, void sr_raid6_xorp(void *p, void *d, int len) { - uint32_t *pbuf = p, *data = d; + uint8_t *pbuf = p, *data = d; - /* Faster, X bytes at a time */ - len >>= 4; while (len--) pbuf[len] ^= data[len]; } |