summaryrefslogtreecommitdiff
path: root/sys/dev/raidframe/rf_raid.h
blob: 278cc9f507a20380a8f62c775c367116ed9459f7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
/*	$OpenBSD: rf_raid.h,v 1.1 1999/01/11 14:29:41 niklas Exp $	*/
/*	$NetBSD: rf_raid.h,v 1.1 1998/11/13 04:20:32 oster Exp $	*/
/*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Author: Mark Holland
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/**********************************************
 * rf_raid.h -- main header file for RAID driver
 **********************************************/

/*
 * :  
 * Log: rf_raid.h,v 
 * Revision 1.48  1996/08/20 22:33:54  jimz
 * make hist_diskreq a doubly-indexed array
 *
 * Revision 1.47  1996/07/15  05:40:41  jimz
 * some recon datastructure cleanup
 * better handling of multiple failures
 * added undocumented double-recon test
 *
 * Revision 1.46  1996/07/10  22:28:51  jimz
 * get rid of obsolete row statuses (dead,degraded2)
 *
 * Revision 1.45  1996/06/14  14:56:29  jimz
 * make engine threading stuff ifndef SIMULATE
 *
 * Revision 1.44  1996/06/14  14:16:54  jimz
 * move in engine node queue, atomicity control
 *
 * Revision 1.43  1996/06/12  04:41:26  jimz
 * tweaks to make genplot work with user-level driver
 * (mainly change stat collection)
 *
 * Revision 1.42  1996/06/11  10:57:17  jimz
 * add recon_done_procs, recon_done_proc_mutex
 *
 * Revision 1.41  1996/06/11  01:26:48  jimz
 * added mechanism for user-level to sync diskthread startup,
 * shutdown
 *
 * Revision 1.40  1996/06/10  14:18:58  jimz
 * move user, throughput stats into per-array structure
 *
 * Revision 1.39  1996/06/10  11:55:47  jimz
 * Straightened out some per-array/not-per-array distinctions, fixed
 * a couple bugs related to confusion. Added shutdown lists. Removed
 * layout shutdown function (now subsumed by shutdown lists).
 *
 * Revision 1.38  1996/06/07  21:33:04  jimz
 * begin using consistent types for sector numbers,
 * stripe numbers, row+col numbers, recon unit numbers
 *
 * Revision 1.37  1996/06/05  19:38:32  jimz
 * fixed up disk queueing types config
 * added sstf disk queueing
 * fixed exit bug on diskthreads (ref-ing bad mem)
 *
 * Revision 1.36  1996/06/05  18:06:02  jimz
 * Major code cleanup. The Great Renaming is now done.
 * Better modularity. Better typing. Fixed a bunch of
 * synchronization bugs. Made a lot of global stuff
 * per-desc or per-array. Removed dead code.
 *
 * Revision 1.35  1996/06/03  23:28:26  jimz
 * more bugfixes
 * check in tree to sync for IPDS runs with current bugfixes
 * there still may be a problem with threads in the script test
 * getting I/Os stuck- not trivially reproducible (runs ~50 times
 * in a row without getting stuck)
 *
 * Revision 1.34  1996/06/02  17:31:48  jimz
 * Moved a lot of global stuff into array structure, where it belongs.
 * Fixed up paritylogging, pss modules in this manner. Some general
 * code cleanup. Removed lots of dead code, some dead files.
 *
 * Revision 1.33  1996/05/30  23:22:16  jimz
 * bugfixes of serialization, timing problems
 * more cleanup
 *
 * Revision 1.32  1996/05/30  11:29:41  jimz
 * Numerous bug fixes. Stripe lock release code disagreed with the taking code
 * about when stripes should be locked (I made it consistent: no parity, no lock)
 * There was a lot of extra serialization of I/Os which I've removed- a lot of
 * it was to calculate values for the cache code, which is no longer with us.
 * More types, function, macro cleanup. Added code to properly quiesce the array
 * on shutdown. Made a lot of stuff array-specific which was (bogusly) general
 * before. Fixed memory allocation, freeing bugs.
 *
 * Revision 1.31  1996/05/27  18:56:37  jimz
 * more code cleanup
 * better typing
 * compiles in all 3 environments
 *
 * Revision 1.30  1996/05/24  22:17:04  jimz
 * continue code + namespace cleanup
 * typed a bunch of flags
 *
 * Revision 1.29  1996/05/23  21:46:35  jimz
 * checkpoint in code cleanup (release prep)
 * lots of types, function names have been fixed
 *
 * Revision 1.28  1996/05/23  00:33:23  jimz
 * code cleanup: move all debug decls to rf_options.c, all extern
 * debug decls to rf_options.h, all debug vars preceded by rf_
 *
 * Revision 1.27  1996/05/18  19:51:34  jimz
 * major code cleanup- fix syntax, make some types consistent,
 * add prototypes, clean out dead code, et cetera
 *
 * Revision 1.26  1996/05/08  21:01:24  jimz
 * fixed up enum type names that were conflicting with other
 * enums and function names (ie, "panic")
 * future naming trends will be towards RF_ and rf_ for
 * everything raidframe-related
 *
 * Revision 1.25  1996/05/02  14:57:55  jimz
 * add sectorMask
 *
 * Revision 1.24  1996/04/22  15:53:13  jimz
 * MAX_RAIDS -> NRAIDFRAME
 *
 * Revision 1.23  1995/12/14  18:39:46  jimz
 * convert to rf_types.h types
 *
 * Revision 1.22  1995/12/06  15:02:26  root
 * added copyright info
 *
 * Revision 1.21  1995/10/09  17:39:24  jimz
 * added info for tracking number of outstanding accesses
 * at user-level
 *
 * Revision 1.20  1995/09/30  20:37:46  jimz
 * added acc_totals to Raid for kernel
 *
 * Revision 1.19  1995/09/19  22:57:14  jimz
 * add cache of raidid for kernel
 *
 * Revision 1.18  1995/09/18  16:50:04  jimz
 * added RF_MAX_DISKS (for config ioctls)
 *
 * Revision 1.17  1995/09/07  19:02:31  jimz
 * mods to get raidframe to compile and link
 * in kernel environment
 *
 * Revision 1.16  1995/07/21  19:29:51  robby
 * added some info for the idler to the Raid
 *
 * Revision 1.15  1995/07/16  03:19:14  cfb
 * added cachePtr to *raidPtr
 *
 * Revision 1.14  1995/06/23  13:39:36  robby
 * updeated to prototypes in rf_layout.h
 *
 */

#ifndef _RF__RF_RAID_H_
#define _RF__RF_RAID_H_

#ifdef _KERNEL
#define KERNEL
#endif

#include "rf_archs.h"
#include "rf_types.h"
#include "rf_threadstuff.h"

#ifdef _KERNEL
#if defined(__NetBSD__)
#include "rf_netbsd.h"
#elif defined(__OpenBSD__)
#include "rf_openbsd.h"
#endif
#endif

#ifdef KERNEL
/* XXX Needs to be added.  GO
#include <raidframe.h>
*/
#include <sys/disklabel.h>
#else /* KERNEL */
#include <stdio.h>
#include <assert.h>
#endif /* KERNEL */
#include <sys/types.h>

#include "rf_alloclist.h"
#include "rf_stripelocks.h"
#include "rf_layout.h"
#include "rf_disks.h"
#include "rf_debugMem.h"
#include "rf_diskqueue.h"
#include "rf_reconstruct.h"
#include "rf_acctrace.h"

#if RF_INCLUDE_PARITYLOGGING > 0
#include "rf_paritylog.h"
#endif /* RF_INCLUDE_PARITYLOGGING > 0 */

#define RF_MAX_DISKS 128 /* max disks per array */
#if defined(__NetBSD__) || defined(__OpenBSD__)
#define RF_DEV2RAIDID(_dev)  (DISKUNIT(_dev))
#else
#define RF_DEV2RAIDID(_dev)  (minor(_dev)>>6)     /* convert dev_t to raid id */
#endif

/*
 * Each row in the array is a distinct parity group, so
 * each has it's own status, which is one of the following.
 */
typedef enum RF_RowStatus_e {
  rf_rs_optimal,
  rf_rs_degraded,
  rf_rs_reconstructing,
  rf_rs_reconfigured
} RF_RowStatus_t;

struct RF_CumulativeStats_s {
  struct timeval start;     /* the time when the stats were last started*/
  struct timeval stop;      /* the time when the stats were last stopped */
  long sum_io_us;           /* sum of all user response times (us) */
  long num_ios;             /* total number of I/Os serviced */
  long num_sect_moved;      /* total number of sectors read or written */
};

struct RF_ThroughputStats_s {
  RF_DECLARE_MUTEX(mutex)/* a mutex used to lock the configuration stuff */
  struct timeval start;  /* timer started when numOutstandingRequests moves from 0 to 1 */
  struct timeval stop;   /* timer stopped when numOutstandingRequests moves from 1 to 0 */
  RF_uint64 sum_io_us;   /* total time timer is enabled */
  RF_uint64 num_ios;     /* total number of ios processed by RAIDframe */
  long num_out_ios;      /* number of outstanding ios */
};

#ifdef SIMULATE
typedef struct RF_PendingRecon_s RF_PendingRecon_t;
struct RF_PendingRecon_s {
  RF_RowCol_t         row;
  RF_RowCol_t         col;
  RF_PendingRecon_t  *next;
};
#endif /* SIMULATE */

struct RF_Raid_s {
  /* This portion never changes, and can be accessed without locking */
  /* an exception is Disks[][].status, which requires locking when it is changed */
  u_int numRow;             /* number of rows of disks, typically == # of ranks */
  u_int numCol;             /* number of columns of disks, typically == # of disks/rank */
  u_int numSpare;           /* number of spare disks */
  int   maxQueueDepth;      /* max disk queue depth */
  RF_SectorCount_t  totalSectors;   /* total number of sectors in the array */
  RF_SectorCount_t  sectorsPerDisk; /* number of sectors on each disk */
  u_int logBytesPerSector;  /* base-2 log of the number of bytes in a sector */
  u_int bytesPerSector;     /* bytes in a sector */
  RF_int32  sectorMask;     /* mask of bytes-per-sector */

  RF_RaidLayout_t   Layout; /* all information related to layout */
  RF_RaidDisk_t   **Disks;  /* all information related to physical disks */
  RF_DiskQueue_t  **Queues; /* all information related to disk queues */
     /* NOTE:  This is an anchor point via which the queues can be accessed,
      * but the enqueue/dequeue routines in diskqueue.c use a local copy of
      * this pointer for the actual accesses.
      */
  /* The remainder of the structure can change, and therefore requires locking on reads and updates */
  RF_DECLARE_MUTEX(mutex)        /* mutex used to serialize access to the fields below */
  RF_RowStatus_t  *status;       /* the status of each row in the array */
  int              valid;        /* indicates successful configuration */
  RF_LockTableEntry_t *lockTable;   /* stripe-lock table */
  RF_LockTableEntry_t *quiesceLock; /* quiesnce table */
  int                  numFailures; /* total number of failures in the array */

  /*
   * Cleanup stuff
   */
  RF_ShutdownList_t  *shutdownList; /* shutdown activities */
  RF_AllocListElem_t *cleanupList;  /* memory to be freed at shutdown time */

  /*
   * Recon stuff
   */
  RF_HeadSepLimit_t headSepLimit;
  int numFloatingReconBufs;
  int reconInProgress;
#ifdef SIMULATE
  RF_PendingRecon_t *pendingRecon;
#endif /* SIMULATE */
  RF_DECLARE_COND(waitForReconCond)
  RF_RaidReconDesc_t *reconDesc; /* reconstruction descriptor */
  RF_ReconCtrl_t **reconControl; /* reconstruction control structure pointers for each row in the array */

#if !defined(KERNEL) && !defined(SIMULATE)
  /*
   * Disk thread stuff
   */
  int diskthreads_created;
  int diskthreads_running;
  int diskthreads_shutdown;
  RF_DECLARE_MUTEX(diskthread_count_mutex)
  RF_DECLARE_COND(diskthread_count_cond)
#endif /* !KERNEL && !SIMULATE */

  /*
   * Array-quiescence stuff
   */
  RF_DECLARE_MUTEX(access_suspend_mutex)
  RF_DECLARE_COND(quiescent_cond)
  RF_IoCount_t accesses_suspended;
  RF_IoCount_t accs_in_flight;
  int access_suspend_release;
  int waiting_for_quiescence;
  RF_CallbackDesc_t *quiesce_wait_list;

  /*
   * Statistics
   */
#if !defined(KERNEL) && !defined(SIMULATE)
  RF_ThroughputStats_t throughputstats;
#endif /* !KERNEL && !SIMULATE */
  RF_CumulativeStats_t userstats;

  /*
   * Engine thread control
   */
  RF_DECLARE_MUTEX(node_queue_mutex)
  RF_DECLARE_COND(node_queue_cond)
  RF_DagNode_t *node_queue;
#ifndef SIMULATE
  RF_Thread_t engine_thread;
  RF_ThreadGroup_t engine_tg;
#endif /* !SIMULATE */
  int shutdown_engine;
  int dags_in_flight; /* debug */

  /*
   * PSS (Parity Stripe Status) stuff
   */
  RF_FreeList_t *pss_freelist;
  long pssTableSize;

  /*
   * Reconstruction stuff
   */
  int procsInBufWait;
  int numFullReconBuffers;
  RF_AccTraceEntry_t *recon_tracerecs;
  unsigned long accumXorTimeUs;
  RF_ReconDoneProc_t *recon_done_procs;
  RF_DECLARE_MUTEX(recon_done_proc_mutex)

#if !defined(KERNEL) && !defined(SIMULATE)
  RF_Thread_t **diskthreads, *sparediskthreads;  /* thread descriptors for disk threads in user-level version */
#endif /* !KERNEL && !SIMULATE */

  /*
   * nAccOutstanding, waitShutdown protected by desc freelist lock
   * (This may seem strange, since that's a central serialization point
   * for a per-array piece of data, but otherwise, it'd be an extra
   * per-array lock, and that'd only be less efficient...)
   */
  RF_DECLARE_COND(outstandingCond)
  int waitShutdown;
  int nAccOutstanding;

  RF_DiskId_t **diskids;
  RF_DiskId_t  *sparediskids;

#ifdef KERNEL
	int           raidid;
#endif /* KERNEL */
	RF_AccTotals_t  acc_totals;
	int           keep_acc_totals;

#ifdef _KERNEL
        struct raidcinfo **raid_cinfo; /* array of component info */
        struct proc *proc; /* XXX shouldn't be needed here.. :-p */
#endif

  int terminate_disk_queues;

  /*
   * XXX
   *
   * config-specific information should be moved
   * somewhere else, or at least hung off this
   * in some generic way
   */

  /* used by rf_compute_workload_shift */
  RF_RowCol_t hist_diskreq[RF_MAXROW][RF_MAXCOL];

  /* used by declustering */
  int noRotate;

#if RF_INCLUDE_PARITYLOGGING > 0
  /* used by parity logging */
  RF_SectorCount_t          regionLogCapacity;
  RF_ParityLogQueue_t       parityLogPool;       /* pool of unused parity logs */
  RF_RegionInfo_t          *regionInfo;          /* array of region state */
  int                       numParityLogs;
  int                       numSectorsPerLog;
  int                       regionParityRange;
  int                       logsInUse;           /* debugging */
  RF_ParityLogDiskQueue_t   parityLogDiskQueue;  /* state of parity logging disk work */
  RF_RegionBufferQueue_t    regionBufferPool;    /* buffers for holding region log */
  RF_RegionBufferQueue_t    parityBufferPool;    /* buffers for holding parity */
  caddr_t                   parityLogBufferHeap; /* pool of unused parity logs */
#ifndef SIMULATE
  RF_Thread_t               pLogDiskThreadHandle;
#endif /* !SIMULATE */

#endif /* RF_INCLUDE_PARITYLOGGING > 0 */
};

#endif /* !_RF__RF_RAID_H_ */