diff options
author | Niklas Hallqvist <niklas@cvs.openbsd.org> | 1999-01-11 14:29:56 +0000 |
---|---|---|
committer | Niklas Hallqvist <niklas@cvs.openbsd.org> | 1999-01-11 14:29:56 +0000 |
commit | 5a29b52d01b420bb61a3112d2d44740a0fa99601 (patch) | |
tree | 7d6238740f53a56f5c76ba8256c785b13caaa24a /sys/dev/raidframe/rf_pq.c | |
parent | 799a3ea9a9c07e091f5f4e62273c6f105cf86191 (diff) |
Import of CMU's RAIDframe via NetBSD.
Diffstat (limited to 'sys/dev/raidframe/rf_pq.c')
-rw-r--r-- | sys/dev/raidframe/rf_pq.c | 1026 |
1 files changed, 1026 insertions, 0 deletions
diff --git a/sys/dev/raidframe/rf_pq.c b/sys/dev/raidframe/rf_pq.c new file mode 100644 index 00000000000..ebbc7917b26 --- /dev/null +++ b/sys/dev/raidframe/rf_pq.c @@ -0,0 +1,1026 @@ +/* $OpenBSD: rf_pq.c,v 1.1 1999/01/11 14:29:38 niklas Exp $ */ +/* $NetBSD: rf_pq.c,v 1.1 1998/11/13 04:20:32 oster Exp $ */ +/* + * Copyright (c) 1995 Carnegie-Mellon University. + * All rights reserved. + * + * Author: Daniel Stodolsky + * + * Permission to use, copy, modify and distribute this software and + * its documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie the + * rights to redistribute these changes. + */ + +/* + * Code for RAID level 6 (P + Q) disk array architecture. + * + * : + * Log: rf_pq.c,v + * Revision 1.33 1996/11/05 21:10:40 jimz + * failed pda generalization + * + * Revision 1.32 1996/07/31 16:29:50 jimz + * "fix" math on 32-bit machines using RF_LONGSHIFT + * (may be incorrect) + * + * Revision 1.31 1996/07/31 15:35:01 jimz + * evenodd changes; bugfixes for double-degraded archs, generalize + * some formerly PQ-only functions + * + * Revision 1.30 1996/07/27 23:36:08 jimz + * Solaris port of simulator + * + * Revision 1.29 1996/07/22 19:52:16 jimz + * switched node params to RF_DagParam_t, a union of + * a 64-bit int and a void *, for better portability + * attempted hpux port, but failed partway through for + * lack of a single C compiler capable of compiling all + * source files + * + * Revision 1.28 1996/06/09 02:36:46 jimz + * lots of little crufty cleanup- fixup whitespace + * issues, comment #ifdefs, improve typing in some + * places (esp size-related) + * + * Revision 1.27 1996/06/07 21:33:04 jimz + * begin using consistent types for sector numbers, + * stripe numbers, row+col numbers, recon unit numbers + * + * Revision 1.26 1996/06/02 17:31:48 jimz + * Moved a lot of global stuff into array structure, where it belongs. + * Fixed up paritylogging, pss modules in this manner. Some general + * code cleanup. Removed lots of dead code, some dead files. + * + * Revision 1.25 1996/05/31 22:26:54 jimz + * fix a lot of mapping problems, memory allocation problems + * found some weird lock issues, fixed 'em + * more code cleanup + * + * Revision 1.24 1996/05/30 23:22:16 jimz + * bugfixes of serialization, timing problems + * more cleanup + * + * Revision 1.23 1996/05/30 12:59:18 jimz + * make etimer happier, more portable + * + * Revision 1.22 1996/05/27 18:56:37 jimz + * more code cleanup + * better typing + * compiles in all 3 environments + * + * Revision 1.21 1996/05/24 22:17:04 jimz + * continue code + namespace cleanup + * typed a bunch of flags + * + * Revision 1.20 1996/05/24 04:28:55 jimz + * release cleanup ckpt + * + * Revision 1.19 1996/05/23 21:46:35 jimz + * checkpoint in code cleanup (release prep) + * lots of types, function names have been fixed + * + * Revision 1.18 1996/05/23 00:33:23 jimz + * code cleanup: move all debug decls to rf_options.c, all extern + * debug decls to rf_options.h, all debug vars preceded by rf_ + * + * Revision 1.17 1996/05/18 19:51:34 jimz + * major code cleanup- fix syntax, make some types consistent, + * add prototypes, clean out dead code, et cetera + * + * Revision 1.16 1996/05/17 14:52:04 wvcii + * added prototyping to QDelta() + * - changed buf params from volatile unsigned long * to char * + * changed QDelta for kernel + * - just bzero the buf since kernel doesn't include pq decode table + * + * Revision 1.15 1996/05/03 19:40:20 wvcii + * added includes for dag library + * + * Revision 1.14 1995/12/12 18:10:06 jimz + * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT + * fix 80-column brain damage in comments + * + * Revision 1.13 1995/11/30 16:19:55 wvcii + * added copyright info + * + * Revision 1.12 1995/11/07 16:13:47 wvcii + * changed PQDagSelect prototype + * function no longer returns numHdrSucc, numTermAnt + * note: this file contains node functions which should be + * moved to rf_dagfuncs.c so that all node funcs are bundled together + * + * Revision 1.11 1995/10/04 03:50:33 wvcii + * removed panics, minor code cleanup in dag selection + * + * + */ + +#include "rf_archs.h" +#include "rf_types.h" +#include "rf_raid.h" +#include "rf_dag.h" +#include "rf_dagffrd.h" +#include "rf_dagffwr.h" +#include "rf_dagdegrd.h" +#include "rf_dagdegwr.h" +#include "rf_dagutils.h" +#include "rf_dagfuncs.h" +#include "rf_threadid.h" +#include "rf_etimer.h" +#include "rf_pqdeg.h" +#include "rf_general.h" +#include "rf_map.h" +#include "rf_pq.h" +#include "rf_sys.h" + +RF_RedFuncs_t rf_pFuncs = { rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P" }; +RF_RedFuncs_t rf_pRecoveryFuncs = { rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func" }; + +int rf_RegularONPFunc(node) + RF_DagNode_t *node; +{ + return(rf_RegularXorFunc(node)); +} + +/* + same as simpleONQ func, but the coefficient is always 1 +*/ + +int rf_SimpleONPFunc(node) + RF_DagNode_t *node; +{ + return(rf_SimpleXorFunc(node)); +} + +int rf_RecoveryPFunc(node) +RF_DagNode_t *node; +{ + return(rf_RecoveryXorFunc(node)); +} + +int rf_RegularPFunc(node) + RF_DagNode_t *node; +{ + return(rf_RegularXorFunc(node)); +} + +#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) + +static void QDelta(char *dest, char *obuf, char *nbuf, unsigned length, + unsigned char coeff); +static void rf_InvertQ(unsigned long *qbuf, unsigned long *abuf, + unsigned length, unsigned coeff); + +RF_RedFuncs_t rf_qFuncs = { rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q" }; +RF_RedFuncs_t rf_qRecoveryFuncs = { rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func" }; +RF_RedFuncs_t rf_pqRecoveryFuncs = { rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func" }; + +void rf_PQDagSelect( + RF_Raid_t *raidPtr, + RF_IoType_t type, + RF_AccessStripeMap_t *asmap, + RF_VoidFuncPtr *createFunc) +{ + RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); + unsigned ndfail = asmap->numDataFailed; + unsigned npfail = asmap->numParityFailed; + unsigned ntfail = npfail + ndfail; + + RF_ASSERT(RF_IO_IS_R_OR_W(type)); + if (ntfail > 2) + { + RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n"); + /* *infoFunc = */ *createFunc = NULL; + return; + } + + /* ok, we can do this I/O */ + if (type == RF_IO_TYPE_READ) + { + switch (ndfail) + { + case 0: + /* fault free read */ + *createFunc = rf_CreateFaultFreeReadDAG; /* same as raid 5 */ + break; + case 1: + /* lost a single data unit */ + /* two cases: + (1) parity is not lost. + do a normal raid 5 reconstruct read. + (2) parity is lost. + do a reconstruct read using "q". + */ + if (ntfail == 2) /* also lost redundancy */ + { + if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) + *createFunc = rf_PQ_110_CreateReadDAG; + else + *createFunc = rf_PQ_101_CreateReadDAG; + } + else + { + /* P and Q are ok. But is there a failure + in some unaccessed data unit? + */ + if (rf_NumFailedDataUnitsInStripe(raidPtr,asmap)==2) + *createFunc = rf_PQ_200_CreateReadDAG; + else + *createFunc = rf_PQ_100_CreateReadDAG; + } + break; + case 2: + /* lost two data units */ + /* *infoFunc = PQOneTwo; */ + *createFunc = rf_PQ_200_CreateReadDAG; + break; + } + return; + } + + /* a write */ + switch (ntfail) + { + case 0: /* fault free */ + if (rf_suppressLocksAndLargeWrites || + (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) || + (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) { + + *createFunc = rf_PQCreateSmallWriteDAG; + } + else { + *createFunc = rf_PQCreateLargeWriteDAG; + } + break; + + case 1: /* single disk fault */ + if (npfail==1) + { + RF_ASSERT ((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q)); + if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) + { /* q died, treat like normal mode raid5 write.*/ + if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) + || rf_NumFailedDataUnitsInStripe(raidPtr,asmap)) + *createFunc = rf_PQ_001_CreateSmallWriteDAG; + else + *createFunc = rf_PQ_001_CreateLargeWriteDAG; + } + else + { /* parity died, small write only updating Q */ + if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) + || rf_NumFailedDataUnitsInStripe(raidPtr,asmap)) + *createFunc = rf_PQ_010_CreateSmallWriteDAG; + else + *createFunc = rf_PQ_010_CreateLargeWriteDAG; + } + } + else + { /* data missing. + Do a P reconstruct write if only a single data unit + is lost in the stripe, otherwise a PQ reconstruct + write. */ + if (rf_NumFailedDataUnitsInStripe(raidPtr,asmap)==2) + *createFunc = rf_PQ_200_CreateWriteDAG; + else + *createFunc = rf_PQ_100_CreateWriteDAG; + } + break; + + case 2: /* two disk faults */ + switch (npfail) + { + case 2: /* both p and q dead */ + *createFunc = rf_PQ_011_CreateWriteDAG; + break; + case 1: /* either p or q and dead data */ + RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA); + RF_ASSERT ((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)); + if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q) + *createFunc = rf_PQ_101_CreateWriteDAG; + else + *createFunc = rf_PQ_110_CreateWriteDAG; + break; + case 0: /* double data loss */ + *createFunc = rf_PQ_200_CreateWriteDAG; + break; + } + break; + + default: /* more than 2 disk faults */ + *createFunc = NULL; + RF_PANIC(); + } + return; +} + +/* + Used as a stop gap info function +*/ +static void PQOne(raidPtr, nSucc, nAnte, asmap) + RF_Raid_t *raidPtr; + int *nSucc; + int *nAnte; + RF_AccessStripeMap_t *asmap; +{ + *nSucc = *nAnte = 1; +} + +static void PQOneTwo(raidPtr, nSucc, nAnte, asmap) + RF_Raid_t *raidPtr; + int *nSucc; + int *nAnte; + RF_AccessStripeMap_t *asmap; +{ + *nSucc = 1; + *nAnte = 2; +} + +RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG) +{ + rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2, + rf_RegularPQFunc, RF_FALSE); +} + +int rf_RegularONQFunc(node) + RF_DagNode_t *node; +{ + int np = node->numParams; + int d; + RF_Raid_t *raidPtr = (RF_Raid_t *)node->params[np-1].p; + int i; + RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; + RF_Etimer_t timer; + char *qbuf, *qpbuf; + char *obuf, *nbuf; + RF_PhysDiskAddr_t *old, *new; + unsigned long coeff; + unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; + + RF_ETIMER_START(timer); + + d = (np-3)/4; + RF_ASSERT (4*d+3 == np); + qbuf = (char *) node->params[2*d+1].p; /* q buffer*/ + for (i=0; i < d; i++) + { + old = (RF_PhysDiskAddr_t *) node->params[2*i].p; + obuf = (char *) node->params[2*i+1].p; + new = (RF_PhysDiskAddr_t *) node->params[2*(d+1+i)].p; + nbuf = (char *) node->params[2*(d+1+i)+1].p; + RF_ASSERT (new->numSector == old->numSector); + RF_ASSERT (new->raidAddress == old->raidAddress); + /* the stripe unit within the stripe tells us the coefficient to use + for the multiply. */ + coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),new->raidAddress); + /* compute the data unit offset within the column, then add one */ + coeff = (coeff % raidPtr->Layout.numDataCol); + qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,old->startSector % secPerSU); + QDelta(qpbuf,obuf,nbuf, rf_RaidAddressToByte(raidPtr, old->numSector),coeff); + } + + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->q_us += RF_ETIMER_VAL_US(timer); + rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no I/O in this node */ + return(0); +} + +/* + See the SimpleXORFunc for the difference between a simple and regular func. + These Q functions should be used for + + new q = Q(data,old data,old q) + + style updates and not for + + q = ( new data, new data, .... ) + + computations. + + The simple q takes 2(2d+1)+1 params, where d is the number + of stripes written. The order of params is + old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... old data pda_d, old data buffer_d + [2d] old q pda_0, old q buffer + [2d_2] new data pda_0, new data buffer_0, ... new data pda_d, new data buffer_d + raidPtr +*/ + +int rf_SimpleONQFunc(node) + RF_DagNode_t *node; +{ + int np = node->numParams; + int d; + RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-1].p; + int i; + RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; + RF_Etimer_t timer; + char *qbuf; + char *obuf, *nbuf; + RF_PhysDiskAddr_t *old, *new; + unsigned long coeff; + + RF_ETIMER_START(timer); + + d = (np-3)/4; + RF_ASSERT (4*d+3 == np); + qbuf = (char *) node->params[2*d+1].p; /* q buffer*/ + for (i=0; i < d; i++) + { + old = (RF_PhysDiskAddr_t *) node->params[2*i].p; + obuf = (char *) node->params[2*i+1].p; + new = (RF_PhysDiskAddr_t *) node->params[2*(d+1+i)].p; + nbuf = (char *) node->params[2*(d+1+i)+1].p; + RF_ASSERT (new->numSector == old->numSector); + RF_ASSERT (new->raidAddress == old->raidAddress); + /* the stripe unit within the stripe tells us the coefficient to use + for the multiply. */ + coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),new->raidAddress); + /* compute the data unit offset within the column, then add one */ + coeff = (coeff % raidPtr->Layout.numDataCol); + QDelta(qbuf,obuf,nbuf, rf_RaidAddressToByte(raidPtr, old->numSector),coeff); + } + + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->q_us += RF_ETIMER_VAL_US(timer); + rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no I/O in this node */ + return(0); +} + +RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG) +{ + rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs); +} + +static void RegularQSubr(node,qbuf) + RF_DagNode_t *node; + char *qbuf; +{ + int np = node->numParams; + int d; + RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-1].p; + unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; + int i; + RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; + RF_Etimer_t timer; + char *obuf, *qpbuf; + RF_PhysDiskAddr_t *old; + unsigned long coeff; + + RF_ETIMER_START(timer); + + d = (np-1)/2; + RF_ASSERT (2*d+1 == np); + for (i=0; i < d; i++) + { + old = (RF_PhysDiskAddr_t *) node->params[2*i].p; + obuf = (char *) node->params[2*i+1].p; + coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),old->raidAddress); + /* compute the data unit offset within the column, then add one */ + coeff = (coeff % raidPtr->Layout.numDataCol); + /* the input buffers may not all be aligned with the start of the + stripe. so shift by their sector offset within the stripe unit */ + qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,old->startSector % secPerSU); + rf_IncQ((unsigned long *)qpbuf,(unsigned long *)obuf,rf_RaidAddressToByte(raidPtr, old->numSector),coeff); + } + + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->q_us += RF_ETIMER_VAL_US(timer); +} + +/* + used in degraded writes. +*/ + +static void DegrQSubr(node) + RF_DagNode_t *node; +{ + int np = node->numParams; + int d; + RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-1].p; + unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; + int i; + RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; + RF_Etimer_t timer; + char *qbuf = node->results[1]; + char *obuf, *qpbuf; + RF_PhysDiskAddr_t *old; + unsigned long coeff; + unsigned fail_start; + int j; + + old = (RF_PhysDiskAddr_t *)node->params[np-2].p; + fail_start = old->startSector % secPerSU; + + RF_ETIMER_START(timer); + + d = (np-2)/2; + RF_ASSERT (2*d+2 == np); + for (i=0; i < d; i++) + { + old = (RF_PhysDiskAddr_t *) node->params[2*i].p; + obuf = (char *) node->params[2*i+1].p; + coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),old->raidAddress); + /* compute the data unit offset within the column, then add one */ + coeff = (coeff % raidPtr->Layout.numDataCol); + /* the input buffers may not all be aligned with the start of the + stripe. so shift by their sector offset within the stripe unit */ + j = old->startSector % secPerSU; + RF_ASSERT(j >= fail_start); + qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,j - fail_start); + rf_IncQ((unsigned long *)qpbuf,(unsigned long *)obuf,rf_RaidAddressToByte(raidPtr, old->numSector),coeff); + } + + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->q_us += RF_ETIMER_VAL_US(timer); +} + +/* + Called by large write code to compute the new parity and the new q. + + structure of the params: + + pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d ( d = numDataCol + raidPtr + + for a total of 2d+1 arguments. + The result buffers results[0], results[1] are the buffers for the p and q, + respectively. + + We compute Q first, then compute P. The P calculation may try to reuse + one of the input buffers for its output, so if we computed P first, we would + corrupt the input for the q calculation. +*/ + +int rf_RegularPQFunc(node) + RF_DagNode_t *node; +{ + RegularQSubr(node,node->results[1]); + return(rf_RegularXorFunc(node)); /* does the wakeup */ +} + +int rf_RegularQFunc(node) + RF_DagNode_t *node; +{ + /* Almost ... adjust Qsubr args */ + RegularQSubr(node, node->results[0]); + rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no I/O in this node */ + return(0); +} + +/* + Called by singly degraded write code to compute the new parity and the new q. + + structure of the params: + + pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d + failedPDA raidPtr + + for a total of 2d+2 arguments. + The result buffers results[0], results[1] are the buffers for the parity and q, + respectively. + + We compute Q first, then compute parity. The parity calculation may try to reuse + one of the input buffers for its output, so if we computed parity first, we would + corrupt the input for the q calculation. + + We treat this identically to the regularPQ case, ignoring the failedPDA extra argument. +*/ + +void rf_Degraded_100_PQFunc(node) + RF_DagNode_t *node; +{ + int np = node->numParams; + + RF_ASSERT (np >= 2); + DegrQSubr(node); + rf_RecoveryXorFunc(node); +} + + +/* + The two below are used when reading a stripe with a single lost data unit. + The parameters are + + pda_0, buffer_0, .... pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr + + and results[0] contains the data buffer. Which is originally zero-filled. + +*/ + +/* this Q func is used by the degraded-mode dag functions to recover lost data. + * the second-to-last parameter is the PDA for the failed portion of the access. + * the code here looks at this PDA and assumes that the xor target buffer is + * equal in size to the number of sectors in the failed PDA. It then uses + * the other PDAs in the parameter list to determine where within the target + * buffer the corresponding data should be xored. + * + * Recall the basic equation is + * + * Q = ( data_1 + 2 * data_2 ... + k * data_k ) mod 256 + * + * so to recover data_j we need + * + * J data_j = (Q - data_1 - 2 data_2 ....- k* data_k) mod 256 + * + * So the coefficient for each buffer is (255 - data_col), and j should be initialized by + * copying Q into it. Then we need to do a table lookup to convert to solve + * data_j /= J + * + * + */ +int rf_RecoveryQFunc(node) + RF_DagNode_t *node; +{ + RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p; + RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout; + RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams-2].p; + int i; + RF_PhysDiskAddr_t *pda; + RF_RaidAddr_t suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr,failedPDA->startSector); + char *srcbuf, *destbuf; + RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; + RF_Etimer_t timer; + unsigned long coeff; + + RF_ETIMER_START(timer); + /* start by copying Q into the buffer */ + bcopy(node->params[node->numParams-3].p,node->results[0], + rf_RaidAddressToByte(raidPtr, failedPDA->numSector)); + for (i=0; i<node->numParams-4; i+=2) + { + RF_ASSERT (node->params[i+1].p != node->results[0]); + pda = (RF_PhysDiskAddr_t *) node->params[i].p; + srcbuf = (char *) node->params[i+1].p; + suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); + destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr,suoffset-failedSUOffset); + coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),pda->raidAddress); + /* compute the data unit offset within the column */ + coeff = (coeff % raidPtr->Layout.numDataCol); + rf_IncQ((unsigned long *)destbuf, (unsigned long *)srcbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff); + } + /* Do the nasty inversion now */ + coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),failedPDA->startSector) % raidPtr->Layout.numDataCol); + rf_InvertQ(node->results[0],node->results[0],rf_RaidAddressToByte(raidPtr,pda->numSector),coeff); + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->q_us += RF_ETIMER_VAL_US(timer); + rf_GenericWakeupFunc(node, 0); + return(0); +} + +int rf_RecoveryPQFunc(node) + RF_DagNode_t *node; +{ + RF_PANIC(); + return(1); +} + +/* + Degraded write Q subroutine. + Used when P is dead. + Large-write style Q computation. + Parameters + + (pda,buf),(pda,buf),.....,(failedPDA,bufPtr),failedPDA,raidPtr. + + We ignore failedPDA. + + This is a "simple style" recovery func. +*/ + +void rf_PQ_DegradedWriteQFunc(node) + RF_DagNode_t *node; +{ + int np = node->numParams; + int d; + RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-1].p; + unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; + int i; + RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; + RF_Etimer_t timer; + char *qbuf = node->results[0]; + char *obuf, *qpbuf; + RF_PhysDiskAddr_t *old; + unsigned long coeff; + int fail_start,j; + + old = (RF_PhysDiskAddr_t *) node->params[np-2].p; + fail_start = old->startSector % secPerSU; + + RF_ETIMER_START(timer); + + d = (np-2)/2; + RF_ASSERT (2*d+2 == np); + + for (i=0; i < d; i++) + { + old = (RF_PhysDiskAddr_t *) node->params[2*i].p; + obuf = (char *) node->params[2*i+1].p; + coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),old->raidAddress); + /* compute the data unit offset within the column, then add one */ + coeff = (coeff % raidPtr->Layout.numDataCol); + j = old->startSector % secPerSU; + RF_ASSERT(j >= fail_start); + qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,j - fail_start); + rf_IncQ((unsigned long *)qpbuf,(unsigned long *)obuf,rf_RaidAddressToByte(raidPtr, old->numSector),coeff); + } + + RF_ETIMER_STOP(timer); + RF_ETIMER_EVAL(timer); + tracerec->q_us += RF_ETIMER_VAL_US(timer); + rf_GenericWakeupFunc(node, 0); +} + + + + +/* Q computations */ + +/* + coeff - colummn; + + compute dest ^= qfor[28-coeff][rn[coeff+1] a] + + on 5-bit basis; + length in bytes; +*/ + +void rf_IncQ(dest,buf,length,coeff) + unsigned long *dest; + unsigned long *buf; + unsigned length; + unsigned coeff; +{ + unsigned long a, d, new; + unsigned long a1, a2; + unsigned int *q = &(rf_qfor[28-coeff][0]); + unsigned r = rf_rn[coeff+1]; + +#define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f) +#define INSERT(a,i) (a << (5L*i)) + + length /= 8; + /* 13 5 bit quants in a 64 bit word */ + while (length) + { + a = *buf++; + d = *dest; + a1 = EXTRACT(a,0) ^ r; + a2 = EXTRACT(a,1) ^ r; + new = INSERT(a2,1) | a1 ; + a1 = EXTRACT(a,2) ^ r; + a2 = EXTRACT(a,3) ^ r; + a1 = q[a1]; + a2 = q[a2]; + new = new | INSERT(a1,2) | INSERT (a2,3); + a1 = EXTRACT(a,4) ^ r; + a2 = EXTRACT(a,5) ^ r; + a1 = q[a1]; + a2 = q[a2]; + new = new | INSERT(a1,4) | INSERT (a2,5); + a1 = EXTRACT(a,5) ^ r; + a2 = EXTRACT(a,6) ^ r; + a1 = q[a1]; + a2 = q[a2]; + new = new | INSERT(a1,5) | INSERT (a2,6); +#if RF_LONGSHIFT > 2 + a1 = EXTRACT(a,7) ^ r; + a2 = EXTRACT(a,8) ^ r; + a1 = q[a1]; + a2 = q[a2]; + new = new | INSERT(a1,7) | INSERT (a2,8); + a1 = EXTRACT(a,9) ^ r; + a2 = EXTRACT(a,10) ^ r; + a1 = q[a1]; + a2 = q[a2]; + new = new | INSERT(a1,9) | INSERT (a2,10); + a1 = EXTRACT(a,11) ^ r; + a2 = EXTRACT(a,12) ^ r; + a1 = q[a1]; + a2 = q[a2]; + new = new | INSERT(a1,11) | INSERT (a2,12); +#endif /* RF_LONGSHIFT > 2 */ + d ^= new; + *dest++ = d; + length--; + } +} + +/* + compute + + dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new) ] + + on a five bit basis. + optimization: compute old ^ new on 64 bit basis. + + length in bytes. +*/ + +static void QDelta( + char *dest, + char *obuf, + char *nbuf, + unsigned length, + unsigned char coeff) +{ + unsigned long a, d, new; + unsigned long a1, a2; + unsigned int *q = &(rf_qfor[28-coeff][0]); + unsigned r = rf_rn[coeff+1]; + +#ifdef KERNEL + /* PQ in kernel currently not supported because the encoding/decoding table is not present */ + bzero(dest, length); +#else /* KERNEL */ + /* this code probably doesn't work and should be rewritten -wvcii */ + /* 13 5 bit quants in a 64 bit word */ + length /= 8; + while (length) + { + a = *obuf++; /* XXX need to reorg to avoid cache conflicts */ + a ^= *nbuf++; + d = *dest; + a1 = EXTRACT(a,0) ^ r; + a2 = EXTRACT(a,1) ^ r; + a1 = q[a1]; + a2 = q[a2]; + new = INSERT(a2,1) | a1 ; + a1 = EXTRACT(a,2) ^ r; + a2 = EXTRACT(a,3) ^ r; + a1 = q[a1]; + a2 = q[a2]; + new = new | INSERT(a1,2) | INSERT (a2,3); + a1 = EXTRACT(a,4) ^ r; + a2 = EXTRACT(a,5) ^ r; + a1 = q[a1]; + a2 = q[a2]; + new = new | INSERT(a1,4) | INSERT (a2,5); + a1 = EXTRACT(a,5) ^ r; + a2 = EXTRACT(a,6) ^ r; + a1 = q[a1]; + a2 = q[a2]; + new = new | INSERT(a1,5) | INSERT (a2,6); +#if RF_LONGSHIFT > 2 + a1 = EXTRACT(a,7) ^ r; + a2 = EXTRACT(a,8) ^ r; + a1 = q[a1]; + a2 = q[a2]; + new = new | INSERT(a1,7) | INSERT (a2,8); + a1 = EXTRACT(a,9) ^ r; + a2 = EXTRACT(a,10) ^ r; + a1 = q[a1]; + a2 = q[a2]; + new = new | INSERT(a1,9) | INSERT (a2,10); + a1 = EXTRACT(a,11) ^ r; + a2 = EXTRACT(a,12) ^ r; + a1 = q[a1]; + a2 = q[a2]; + new = new | INSERT(a1,11) | INSERT (a2,12); +#endif /* RF_LONGSHIFT > 2 */ + d ^= new; + *dest++ = d; + length--; + } +#endif /* KERNEL */ +} + +/* + recover columns a and b from the given p and q into + bufs abuf and bbuf. All bufs are word aligned. + Length is in bytes. +*/ + + +/* + * XXX + * + * Everything about this seems wrong. + */ +void rf_PQ_recover(pbuf,qbuf,abuf,bbuf,length,coeff_a,coeff_b) + unsigned long *pbuf; + unsigned long *qbuf; + unsigned long *abuf; + unsigned long *bbuf; + unsigned length; + unsigned coeff_a; + unsigned coeff_b; +{ + unsigned long p, q, a, a0, a1; + int col = (29 * coeff_a) + coeff_b; + unsigned char *q0 = & (rf_qinv[col][0]); + + length /= 8; + while (length) + { + p = *pbuf++; + q = *qbuf++; + a0 = EXTRACT(p,0); + a1 = EXTRACT(q,0); + a = q0[a0<<5 | a1]; +#define MF(i) \ + a0 = EXTRACT(p,i); \ + a1 = EXTRACT(q,i); \ + a = a | INSERT(q0[a0<<5 | a1],i) + + MF(1); + MF(2); + MF(3); + MF(4); + MF(5); + MF(6); +#if 0 + MF(7); + MF(8); + MF(9); + MF(10); + MF(11); + MF(12); +#endif /* 0 */ + *abuf++ = a; + *bbuf++ = a ^ p; + length--; + } +} + +/* + Lost parity and a data column. Recover that data column. + Assume col coeff is lost. Let q the contents of Q after + all surviving data columns have been q-xored out of it. + Then we have the equation + + q[28-coeff][a_i ^ r_i+1] = q + + but q is cyclic with period 31. + So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] = + q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} . + + so a_i = r_{coeff+1} ^ q[3+coeff][q] + + The routine is passed q buffer and the buffer + the data is to be recoverd into. They can be the same. +*/ + + + +static void rf_InvertQ( + unsigned long *qbuf, + unsigned long *abuf, + unsigned length, + unsigned coeff) +{ + unsigned long a, new; + unsigned long a1, a2; + unsigned int *q = &(rf_qfor[3+coeff][0]); + unsigned r = rf_rn[coeff+1]; + + /* 13 5 bit quants in a 64 bit word */ + length /= 8; + while (length) + { + a = *qbuf++; + a1 = EXTRACT(a,0); + a2 = EXTRACT(a,1); + a1 = r ^ q[a1]; + a2 = r ^ q[a2]; + new = INSERT(a2,1) | a1; +#define M(i,j) \ + a1 = EXTRACT(a,i); \ + a2 = EXTRACT(a,j); \ + a1 = r ^ q[a1]; \ + a2 = r ^ q[a2]; \ + new = new | INSERT(a1,i) | INSERT(a2,j) + + M(2,3); + M(4,5); + M(5,6); +#if RF_LONGSHIFT > 2 + M(7,8); + M(9,10); + M(11,12); +#endif /* RF_LONGSHIFT > 2 */ + *abuf++ = new; + length--; + } +} + +#endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */ |