summaryrefslogtreecommitdiff
path: root/sys/dev/raidframe/rf_raid.h
blob: d1277a723ca355e3faa9c6551c3bda8b12baa4ce (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
/*	$OpenBSD: rf_raid.h,v 1.7 2002/12/16 07:01:04 tdeval Exp $	*/
/*	$NetBSD: rf_raid.h,v 1.12 2000/02/24 17:12:10 oster Exp $	*/

/*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Author: Mark Holland
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/*************************************************
 * rf_raid.h -- Main header file for RAID driver.
 *************************************************/


#ifndef	_RF__RF_RAID_H_
#define	_RF__RF_RAID_H_

#include "rf_archs.h"
#include "rf_types.h"
#include "rf_threadstuff.h"

#if	defined(__NetBSD__)
#include "rf_netbsd.h"
#elif	defined(__OpenBSD__)
#include "rf_openbsd.h"
#endif

#include <sys/disklabel.h>
#include <sys/types.h>

#include "rf_alloclist.h"
#include "rf_stripelocks.h"
#include "rf_layout.h"
#include "rf_disks.h"
#include "rf_debugMem.h"
#include "rf_diskqueue.h"
#include "rf_reconstruct.h"
#include "rf_acctrace.h"

#if	RF_INCLUDE_PARITYLOGGING > 0
#include "rf_paritylog.h"
#endif	/* RF_INCLUDE_PARITYLOGGING > 0 */

#define	RF_MAX_DISKS			128	/* Max disks per array. */
#define	RF_DEV2RAIDID(_dev)		(DISKUNIT(_dev))

#define	RF_COMPONENT_LABEL_VERSION_1	1
#define	RF_COMPONENT_LABEL_VERSION	2
#define	RF_RAID_DIRTY			0
#define	RF_RAID_CLEAN			1

/*
 * Each row in the array is a distinct parity group, so
 * each has it's own status, which is one of the following.
 */
typedef enum RF_RowStatus_e {
	rf_rs_optimal,
	rf_rs_degraded,
	rf_rs_reconstructing,
	rf_rs_reconfigured
} RF_RowStatus_t;

struct RF_CumulativeStats_s {
	struct timeval		start;		/*
						 * The time when the stats were
						 * last started.
						 */
	struct timeval		stop;		/*
						 * The time when the stats were
						 * last stopped.
						 */
	long			sum_io_us;	/*
						 * Sum of all user response
						 * times (us).
						 */
	long			num_ios;	/*
						 * Total number of I/Os
						 * serviced.
						 */
	long			num_sect_moved;	/*
						 * Total number of sectors read
						 * or written.
						 */
};

struct RF_ThroughputStats_s {
	RF_DECLARE_MUTEX	(mutex);	/*
						 * A mutex used to lock the
						 * configuration stuff.
						 */
	struct timeval		 start;		/*
						 * Timer started when
						 * numOutstandingRequests
						 * moves from 0 to 1.
						 */
	struct timeval		 stop;		/*
						 * Timer stopped when
						 * numOutstandingRequests
						 * moves from 1 to 0.
						 */
	RF_uint64		 sum_io_us;	/*
						 * Total time timer is enabled.
						 */
	RF_uint64		 num_ios;	/*
						 * Total number of I/Os
						 * processed by RAIDframe.
						 */
	long			 num_out_ios;	/*
						 * Number of outstanding I/Os.
						 */
};

struct RF_Raid_s {
	/* This portion never changes, and can be accessed without locking */
	/*
	 * An exception is Disks[][].status, which requires locking when it is
	 * changed. XXX This is no longer true. numSpare and friends can
	 * change now.
	 */
	u_int			  numRow;	/*
						 * Number of rows of disks,
						 * typically == # of ranks.
						 */
	u_int			  numCol;	/*
						 * Number of columns of disks,
						 * typically == # of disks/rank.
						 */
	u_int			  numSpare;	/* Number of spare disks. */
	int			  maxQueueDepth;/* Max disk queue depth. */
	RF_SectorCount_t	  totalSectors;	/*
						 * Total number of sectors
						 * in the array.
						 */
	RF_SectorCount_t	  sectorsPerDisk;
						/*
						 * Number of sectors on each
						 * disk.
						 */
	u_int			  logBytesPerSector;
						/*
						 * Base-2 log of the number
						 * of bytes in a sector.
						 */
	u_int			  bytesPerSector;
						/* Bytes in a sector. */
	RF_int32		  sectorMask;	/* Mask of bytes-per-sector. */

	RF_RaidLayout_t		  Layout;	/*
						 * All information related to
						 * layout.
						 */
	RF_RaidDisk_t		**Disks;	/*
						 * All information related to
						 * physical disks.
						 */
	RF_DiskQueue_t		**Queues;	/*
						 * All information related to
						 * disk queues.
						 */
	RF_DiskQueueSW_t	 *qType;	/*
						 * Pointer to the DiskQueueSW
						 * used for the component
						 * queues.
						 */
	/*
	 * NOTE:  This is an anchor point via which the queues can be
	 * accessed, but the enqueue/dequeue routines in diskqueue.c use a
	 * local copy of this pointer for the actual accesses.
	 */
	/*
	 * The remainder of the structure can change, and therefore requires
	 * locking on reads and updates.
	 */
	RF_DECLARE_MUTEX	 (mutex);	/*
						 * Mutex used to serialize
						 * access to the fields below.
						 */
	RF_RowStatus_t		 *status;	/*
						 * The status of each row in
						 * the array.
						 */
	int			  valid;	/*
						 * Indicates successful
						 * configuration.
						 */
	RF_LockTableEntry_t	 *lockTable;	/* Stripe-lock table. */
	RF_LockTableEntry_t	 *quiesceLock;	/* Quiescence table. */
	int			  numFailures;	/*
						 * Total number of failures
						 * in the array.
						 */
	int			  numNewFailures;
						/*
						 * Number of *new* failures
						 * (that haven't caused a
						 * mod_counter update).
						 */

	int			  parity_good;	/*
						 * !0 if parity is known to be
						 * correct.
						 */
	int			  serial_number;/*
						 * A "serial number" for this
						 * set.
						 */
	int			  mod_counter;	/*
						 * Modification counter for
						 * component labels.
						 */
	int			  clean;	/*
						 * The clean bit for this array.
						 */

	int			  openings;	/*
						 * Number of I/Os that can be
						 * scheduled simultaneously
						 * (high-level - not a
				 		 * per-component limit).
						 */

	int			  maxOutstanding;
						/*
						 * maxOutstanding requests
						 * (per-component).
						 */
	int			  autoconfigure;
						/*
						 * Automatically configure
						 * this RAID set.
						 * 0 == no, 1 == yes
						 */
	int			  root_partition;
						/*
						 * Use this set as
						 * 0 == no, 1 == yes.
						 */
	int			  last_unit;	/*
						 * Last unit number (e.g. 0
						 * for /dev/raid0) of this
						 * component. Used for
						 * autoconfigure only.
						 */
	int			  config_order;	/*
						 * 0 .. n. The order in which
						 * the component should be
						 * auto-configured.
						 * E.g. 0 is will done first,
						 * (and would become raid0).
						 * This may be in conflict
						 * with last_unit !!?!
						 */
						/* Not currently used. */

	/*
	 * Cleanup stuff.
	 */
	RF_ShutdownList_t	 *shutdownList;	/* Shutdown activities. */
	RF_AllocListElem_t	 *cleanupList;	/*
						 * Memory to be freed at
						 * shutdown time.
						 */

	/*
	 * Recon stuff.
	 */
	RF_HeadSepLimit_t	  headSepLimit;
	int			  numFloatingReconBufs;
	int			  reconInProgress;
	RF_DECLARE_COND		 (waitForReconCond);
	RF_RaidReconDesc_t	 *reconDesc;	/* Reconstruction descriptor. */
	RF_ReconCtrl_t		**reconControl;	/*
						 * Reconstruction control
						 * structure pointers for each
						 * row in the array.
						 */

	/*
	 * Array-quiescence stuff.
	 */
	RF_DECLARE_MUTEX	 (access_suspend_mutex);
	RF_DECLARE_COND		 (quiescent_cond);
	RF_IoCount_t		  accesses_suspended;
	RF_IoCount_t		  accs_in_flight;
	int			  access_suspend_release;
	int			  waiting_for_quiescence;
	RF_CallbackDesc_t	 *quiesce_wait_list;

	/*
	 * Statistics.
	 */
#if	!defined(_KERNEL) && !defined(SIMULATE)
	RF_ThroughputStats_t	  throughputstats;
#endif	/* !_KERNEL && !SIMULATE */
	RF_CumulativeStats_t	  userstats;
	int			  parity_rewrite_stripes_done;
	int			  recon_stripes_done;
	int			  copyback_stripes_done;

	int			  recon_in_progress;
	int			  parity_rewrite_in_progress;
	int			  copyback_in_progress;

	/*
	 * Engine thread control.
	 */
	RF_DECLARE_MUTEX	 (node_queue_mutex);
	RF_DECLARE_COND		 (node_queue_cond);
	RF_DagNode_t		 *node_queue;
	RF_Thread_t		  parity_rewrite_thread;
	RF_Thread_t		  copyback_thread;
	RF_Thread_t		  engine_thread;
	RF_Thread_t		  recon_thread;
	RF_ThreadGroup_t	  engine_tg;
	int			  shutdown_engine;
	int			  dags_in_flight;	/* Debug. */

	/*
	 * PSS (Parity Stripe Status) stuff.
	 */
	RF_FreeList_t		 *pss_freelist;
	long			  pssTableSize;

	/*
	 * Reconstruction stuff.
	 */
	int			  procsInBufWait;
	int			  numFullReconBuffers;
	RF_AccTraceEntry_t	 *recon_tracerecs;
	unsigned long		  accumXorTimeUs;
	RF_ReconDoneProc_t	 *recon_done_procs;
	RF_DECLARE_MUTEX	 (recon_done_proc_mutex);
	/*
	 * nAccOutstanding, waitShutdown protected by desc freelist lock
	 * (This may seem strange, since that's a central serialization point
	 * for a per-array piece of data, but otherwise, it'd be an extra
	 * per-array lock, and that'd only be less efficient...)
	 */
	RF_DECLARE_COND		 (outstandingCond);
	int			  waitShutdown;
	int			  nAccOutstanding;

	RF_DiskId_t		**diskids;
	RF_DiskId_t		 *sparediskids;

	int			  raidid;
	RF_AccTotals_t		  acc_totals;
	int			  keep_acc_totals;

	struct raidcinfo	**raid_cinfo;	/* Array of component info. */

	int			  terminate_disk_queues;

	/*
	 * XXX
	 *
	 * Config-specific information should be moved
	 * somewhere else, or at least hung off this
	 * in some generic way.
	 */

	/* Used by rf_compute_workload_shift. */
	RF_RowCol_t		  hist_diskreq[RF_MAXROW][RF_MAXCOL];

	/* Used by declustering. */
	int			  noRotate;

#if	RF_INCLUDE_PARITYLOGGING > 0
	/* used by parity logging */
	RF_SectorCount_t	  regionLogCapacity;
	RF_ParityLogQueue_t	  parityLogPool;/*
						 * Pool of unused parity logs.
						 */
	RF_RegionInfo_t		 *regionInfo;	/* Array of region state. */
	int			  numParityLogs;
	int			  numSectorsPerLog;
	int			  regionParityRange;
	int			  logsInUse;	/* Debugging. */
	RF_ParityLogDiskQueue_t	  parityLogDiskQueue;
						/*
						 * State of parity logging
						 * disk work.
						 */
	RF_RegionBufferQueue_t	  regionBufferPool;
					 	/*
						 * buffers for holding region
						 * log.
						 */
	RF_RegionBufferQueue_t	  parityBufferPool;
						/*
						 * Buffers for holding parity.
						 */
	caddr_t			  parityLogBufferHeap;
						/*
						 * Pool of unused parity logs.
						 */
	RF_Thread_t		  pLogDiskThreadHandle;

#endif	/* RF_INCLUDE_PARITYLOGGING > 0 */
};

#endif	/* !_RF__RF_RAID_H_ */