diff options
author | Tobias Weingartner <weingart@cvs.openbsd.org> | 1999-05-22 21:22:35 +0000 |
---|---|---|
committer | Tobias Weingartner <weingart@cvs.openbsd.org> | 1999-05-22 21:22:35 +0000 |
commit | 35c377bf5315fb3e23e1c5b7e8af00733bed7db0 (patch) | |
tree | 5ab464baa96068a0b4eeb167b4514387057f3f90 | |
parent | aa079fadbadf6efd9c150afdd60894563611277c (diff) |
Add new vm_swap code for dynamic swap. From netbsd, munged some by me, and
others. syscall commit pending.
-rw-r--r-- | sys/arch/alpha/alpha/machdep.c | 3 | ||||
-rw-r--r-- | sys/arch/amiga/amiga/machdep.c | 3 | ||||
-rw-r--r-- | sys/arch/arc/arc/machdep.c | 5 | ||||
-rw-r--r-- | sys/arch/arm32/arm32/machdep.c | 3 | ||||
-rw-r--r-- | sys/arch/atari/atari/machdep.c | 1 | ||||
-rw-r--r-- | sys/arch/hp300/hp300/machdep.c | 3 | ||||
-rw-r--r-- | sys/arch/i386/i386/machdep.c | 3 | ||||
-rw-r--r-- | sys/arch/kbus/kbus/machdep.c | 1 | ||||
-rw-r--r-- | sys/arch/mac68k/mac68k/machdep.c | 3 | ||||
-rw-r--r-- | sys/arch/mvme68k/mvme68k/machdep.c | 3 | ||||
-rw-r--r-- | sys/arch/mvme88k/mvme88k/machdep.c | 3 | ||||
-rw-r--r-- | sys/arch/pc532/pc532/machdep.c | 1 | ||||
-rw-r--r-- | sys/arch/pmax/pmax/machdep.c | 1 | ||||
-rw-r--r-- | sys/arch/powerpc/powerpc/machdep.c | 3 | ||||
-rw-r--r-- | sys/arch/sparc/sparc/machdep.c | 3 | ||||
-rw-r--r-- | sys/arch/sun3/sun3/machdep.c | 3 | ||||
-rw-r--r-- | sys/arch/vax/vax/machdep.c | 3 | ||||
-rw-r--r-- | sys/arch/wgrisc/wgrisc/machdep.c | 5 | ||||
-rw-r--r-- | sys/compat/common/Makefile | 8 | ||||
-rw-r--r-- | sys/compat/common/compat_vm.c | 73 | ||||
-rw-r--r-- | sys/sys/map.h | 11 | ||||
-rw-r--r-- | sys/sys/swap.h | 72 | ||||
-rw-r--r-- | sys/vm/swap_pager.c | 63 | ||||
-rw-r--r-- | sys/vm/vm_swap.c | 1539 |
24 files changed, 1341 insertions, 475 deletions
diff --git a/sys/arch/alpha/alpha/machdep.c b/sys/arch/alpha/alpha/machdep.c index 828f3f42ef9..50facc3c27e 100644 --- a/sys/arch/alpha/alpha/machdep.c +++ b/sys/arch/alpha/alpha/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.26 1999/03/24 22:56:13 alex Exp $ */ +/* $OpenBSD: machdep.c,v 1.27 1999/05/22 21:22:17 weingart Exp $ */ /* $NetBSD: machdep.c,v 1.61 1996/12/07 01:54:49 cgd Exp $ */ /* @@ -445,7 +445,6 @@ unknown_cputype: valloc(cfree, struct cblock, nclist); #endif valloc(callout, struct callout, ncallout); - valloc(swapmap, struct map, nswapmap = maxproc * 2); #ifdef SYSVSHM valloc(shmsegs, struct shmid_ds, shminfo.shmmni); #endif diff --git a/sys/arch/amiga/amiga/machdep.c b/sys/arch/amiga/amiga/machdep.c index 26f83ac34f0..e9de602d790 100644 --- a/sys/arch/amiga/amiga/machdep.c +++ b/sys/arch/amiga/amiga/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.31 1999/01/20 12:06:52 niklas Exp $ */ +/* $OpenBSD: machdep.c,v 1.32 1999/05/22 21:22:18 weingart Exp $ */ /* $NetBSD: machdep.c,v 1.95 1997/08/27 18:31:17 is Exp $ */ /* @@ -363,7 +363,6 @@ again: (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num))) /* valloc(cfree, struct cblock, nclist); */ valloc(callout, struct callout, ncallout); - valloc(swapmap, struct map, nswapmap = maxproc * 2); #ifdef SYSVSHM valloc(shmsegs, struct shmid_ds, shminfo.shmmni); #endif diff --git a/sys/arch/arc/arc/machdep.c b/sys/arch/arc/arc/machdep.c index 8c6386a2cd7..f991f33ae9c 100644 --- a/sys/arch/arc/arc/machdep.c +++ b/sys/arch/arc/arc/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.35 1999/01/30 22:39:31 imp Exp $ */ +/* $OpenBSD: machdep.c,v 1.36 1999/05/22 21:22:19 weingart Exp $ */ /* * Copyright (c) 1988 University of Utah. * Copyright (c) 1992, 1993 @@ -38,7 +38,7 @@ * SUCH DAMAGE. * * from: @(#)machdep.c 8.3 (Berkeley) 1/12/94 - * $Id: machdep.c,v 1.35 1999/01/30 22:39:31 imp Exp $ + * $Id: machdep.c,v 1.36 1999/05/22 21:22:19 weingart Exp $ */ /* from: Utah Hdr: machdep.c 1.63 91/04/24 */ @@ -483,7 +483,6 @@ mips_init(argc, argv, envv) valloc(cfree, struct cblock, nclist); #endif valloc(callout, struct callout, ncallout); - valloc(swapmap, struct map, nswapmap = maxproc * 2); #ifdef SYSVSHM valloc(shmsegs, struct shmid_ds, shminfo.shmmni); #endif diff --git a/sys/arch/arm32/arm32/machdep.c b/sys/arch/arm32/arm32/machdep.c index bb2f341abf1..91c7331ee35 100644 --- a/sys/arch/arm32/arm32/machdep.c +++ b/sys/arch/arm32/arm32/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.4 1999/01/11 05:11:11 millert Exp $ */ +/* $OpenBSD: machdep.c,v 1.5 1999/05/22 21:22:20 weingart Exp $ */ /* $NetBSD: machdep.c,v 1.6 1996/03/13 21:32:39 mark Exp $ */ /* @@ -1472,7 +1472,6 @@ allocsys(v) v = (caddr_t)((name) + (num)); valloc(callout, struct callout, ncallout); - valloc(swapmap, struct map, nswapmap = maxproc * 2); #ifdef SYSVSHM valloc(shmsegs, struct shmid_ds, shminfo.shmmni); diff --git a/sys/arch/atari/atari/machdep.c b/sys/arch/atari/atari/machdep.c index 9e57142defa..8bebb10f257 100644 --- a/sys/arch/atari/atari/machdep.c +++ b/sys/arch/atari/atari/machdep.c @@ -213,7 +213,6 @@ again: (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num))) /* valloc(cfree, struct cblock, nclist); */ valloc(callout, struct callout, ncallout); - valloc(swapmap, struct map, nswapmap = maxproc * 2); #ifdef SYSVSHM valloc(shmsegs, struct shmid_ds, shminfo.shmmni); #endif diff --git a/sys/arch/hp300/hp300/machdep.c b/sys/arch/hp300/hp300/machdep.c index 50660fa8df9..0f269f318d9 100644 --- a/sys/arch/hp300/hp300/machdep.c +++ b/sys/arch/hp300/hp300/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.36 1999/04/23 03:11:55 downsj Exp $ */ +/* $OpenBSD: machdep.c,v 1.37 1999/05/22 21:22:22 weingart Exp $ */ /* $NetBSD: machdep.c,v 1.94 1997/06/12 15:46:29 mrg Exp $ */ /* @@ -401,7 +401,6 @@ allocsys(v) valloc(cfree, struct cblock, nclist); #endif valloc(callout, struct callout, ncallout); - valloc(swapmap, struct map, nswapmap = maxproc * 2); #ifdef SYSVSHM valloc(shmsegs, struct shmid_ds, shminfo.shmmni); #endif diff --git a/sys/arch/i386/i386/machdep.c b/sys/arch/i386/i386/machdep.c index c4f7bf6325d..57b9aeaa243 100644 --- a/sys/arch/i386/i386/machdep.c +++ b/sys/arch/i386/i386/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.106 1999/05/09 15:09:04 mickey Exp $ */ +/* $OpenBSD: machdep.c,v 1.107 1999/05/22 21:22:23 weingart Exp $ */ /* $NetBSD: machdep.c,v 1.214 1996/11/10 03:16:17 thorpej Exp $ */ /*- @@ -447,7 +447,6 @@ allocsys(v) valloc(cfree, struct cblock, nclist); #endif valloc(callout, struct callout, ncallout); - valloc(swapmap, struct map, nswapmap = maxproc * 2); #ifdef SYSVSHM valloc(shmsegs, struct shmid_ds, shminfo.shmmni); #endif diff --git a/sys/arch/kbus/kbus/machdep.c b/sys/arch/kbus/kbus/machdep.c index da0cd04a5c6..f7876bb310d 100644 --- a/sys/arch/kbus/kbus/machdep.c +++ b/sys/arch/kbus/kbus/machdep.c @@ -508,7 +508,6 @@ allocsys(v) #define valloclim(name, type, num, lim) \ (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num))) valloc(callout, struct callout, ncallout); - valloc(swapmap, struct map, nswapmap = maxproc * 2); #ifdef SYSVSHM valloc(shmsegs, struct shmid_ds, shminfo.shmmni); #endif diff --git a/sys/arch/mac68k/mac68k/machdep.c b/sys/arch/mac68k/mac68k/machdep.c index 008cc7bb57d..8c313de02a5 100644 --- a/sys/arch/mac68k/mac68k/machdep.c +++ b/sys/arch/mac68k/mac68k/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.58 1999/04/23 05:15:28 downsj Exp $ */ +/* $OpenBSD: machdep.c,v 1.59 1999/05/22 21:22:25 weingart Exp $ */ /* $NetBSD: machdep.c,v 1.134 1997/02/14 06:15:30 scottr Exp $ */ /* @@ -363,7 +363,6 @@ again: valloc(cfree, struct cblock, nclist); #endif valloc(callout, struct callout, ncallout); - valloc(swapmap, struct map, nswapmap = maxproc * 2); #ifdef SYSVSHM valloc(shmsegs, struct shmid_ds, shminfo.shmmni); #endif diff --git a/sys/arch/mvme68k/mvme68k/machdep.c b/sys/arch/mvme68k/mvme68k/machdep.c index a30e4a14e13..3d7ebc53607 100644 --- a/sys/arch/mvme68k/mvme68k/machdep.c +++ b/sys/arch/mvme68k/mvme68k/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.25 1999/02/04 23:00:26 niklas Exp $ */ +/* $OpenBSD: machdep.c,v 1.26 1999/05/22 21:22:26 weingart Exp $ */ /* * Copyright (c) 1995 Theo de Raadt @@ -264,7 +264,6 @@ again: valloc(cfree, struct cblock, nclist); #endif valloc(callout, struct callout, ncallout); - valloc(swapmap, struct map, nswapmap = maxproc * 2); #ifdef SYSVSHM valloc(shmsegs, struct shmid_ds, shminfo.shmmni); #endif diff --git a/sys/arch/mvme88k/mvme88k/machdep.c b/sys/arch/mvme88k/mvme88k/machdep.c index 33b121bdcb4..27085bb3023 100644 --- a/sys/arch/mvme88k/mvme88k/machdep.c +++ b/sys/arch/mvme88k/mvme88k/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.12 1999/04/11 03:26:28 smurph Exp $ */ +/* $OpenBSD: machdep.c,v 1.13 1999/05/22 21:22:27 weingart Exp $ */ /* * Copyright (c) 1998 Steve Murphree, Jr. * Copyright (c) 1996 Nivas Madhur @@ -627,7 +627,6 @@ allocsys(v) valloc(cfree, struct cblock, nclist); #endif valloc(callout, struct callout, ncallout); - valloc(swapmap, struct map, nswapmap = maxproc * 2); #ifdef SYSVSHM valloc(shmsegs, struct shmid_ds, shminfo.shmmni); #endif diff --git a/sys/arch/pc532/pc532/machdep.c b/sys/arch/pc532/pc532/machdep.c index 277237a481d..9462904d85d 100644 --- a/sys/arch/pc532/pc532/machdep.c +++ b/sys/arch/pc532/pc532/machdep.c @@ -350,7 +350,6 @@ again: #define valloclim(name, type, num, lim) \ (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num))) valloc(callout, struct callout, ncallout); - valloc(swapmap, struct map, nswapmap = maxproc * 2); #ifdef SYSVSHM valloc(shmsegs, struct shmid_ds, shminfo.shmmni); #endif diff --git a/sys/arch/pmax/pmax/machdep.c b/sys/arch/pmax/pmax/machdep.c index 81adb5a7775..29a7914ed30 100644 --- a/sys/arch/pmax/pmax/machdep.c +++ b/sys/arch/pmax/pmax/machdep.c @@ -701,7 +701,6 @@ mach_init(argc, argv, code, cv) valloc(cfree, struct cblock, nclist); #endif valloc(callout, struct callout, ncallout); - valloc(swapmap, struct map, nswapmap = maxproc * 2); #ifdef SYSVSHM valloc(shmsegs, struct shmid_ds, shminfo.shmmni); #endif diff --git a/sys/arch/powerpc/powerpc/machdep.c b/sys/arch/powerpc/powerpc/machdep.c index e6c147b3c57..d2d1037e04b 100644 --- a/sys/arch/powerpc/powerpc/machdep.c +++ b/sys/arch/powerpc/powerpc/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.22 1999/01/11 05:11:54 millert Exp $ */ +/* $OpenBSD: machdep.c,v 1.23 1999/05/22 21:22:29 weingart Exp $ */ /* $NetBSD: machdep.c,v 1.4 1996/10/16 19:33:11 ws Exp $ */ /* @@ -444,7 +444,6 @@ allocsys(v) v = (caddr_t)(((name) = (type *)v) + (num)) valloc(callout, struct callout, ncallout); - valloc(swapmap, struct map, nswapmap = maxproc * 2); #ifdef SYSVSHM valloc(shmsegs, struct shmid_ds, shminfo.shmmni); #endif diff --git a/sys/arch/sparc/sparc/machdep.c b/sys/arch/sparc/sparc/machdep.c index 75d25f3d240..4222480e53c 100644 --- a/sys/arch/sparc/sparc/machdep.c +++ b/sys/arch/sparc/sparc/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.35 1999/04/22 18:51:49 art Exp $ */ +/* $OpenBSD: machdep.c,v 1.36 1999/05/22 21:22:30 weingart Exp $ */ /* $NetBSD: machdep.c,v 1.85 1997/09/12 08:55:02 pk Exp $ */ /* @@ -410,7 +410,6 @@ allocsys(v) #define valloc(name, type, num) \ v = (caddr_t)(((name) = (type *)v) + (num)) valloc(callout, struct callout, ncallout); - valloc(swapmap, struct map, nswapmap = maxproc * 2); #ifdef SYSVSHM valloc(shmsegs, struct shmid_ds, shminfo.shmmni); #endif diff --git a/sys/arch/sun3/sun3/machdep.c b/sys/arch/sun3/sun3/machdep.c index 3f060f938a8..1b0190a635b 100644 --- a/sys/arch/sun3/sun3/machdep.c +++ b/sys/arch/sun3/sun3/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.22 1999/02/04 23:00:26 niklas Exp $ */ +/* $OpenBSD: machdep.c,v 1.23 1999/05/22 21:22:31 weingart Exp $ */ /* $NetBSD: machdep.c,v 1.77 1996/10/13 03:47:51 christos Exp $ */ /* @@ -189,7 +189,6 @@ allocsys(v) valloc(cfree, struct cblock, nclist); #endif valloc(callout, struct callout, ncallout); - valloc(swapmap, struct map, nswapmap = maxproc * 2); #ifdef SYSVSHM valloc(shmsegs, struct shmid_ds, shminfo.shmmni); #endif diff --git a/sys/arch/vax/vax/machdep.c b/sys/arch/vax/vax/machdep.c index e1a2b55d96f..2ea640c80eb 100644 --- a/sys/arch/vax/vax/machdep.c +++ b/sys/arch/vax/vax/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.17 1997/10/02 19:53:20 niklas Exp $ */ +/* $OpenBSD: machdep.c,v 1.18 1999/05/22 21:22:32 weingart Exp $ */ /* $NetBSD: machdep.c,v 1.45 1997/07/26 10:12:49 ragge Exp $ */ /* @@ -342,7 +342,6 @@ allocsys(v) valloc(cfree, struct cblock, nclist); #endif valloc(callout, struct callout, ncallout); - valloc(swapmap, struct map, nswapmap = maxproc * 2); #ifdef SYSVSHM valloc(shmsegs, struct shmid_ds, shminfo.shmmni); #endif diff --git a/sys/arch/wgrisc/wgrisc/machdep.c b/sys/arch/wgrisc/wgrisc/machdep.c index 5d90de97c66..7135ba0199c 100644 --- a/sys/arch/wgrisc/wgrisc/machdep.c +++ b/sys/arch/wgrisc/wgrisc/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.4 1997/08/24 12:01:15 pefo Exp $ */ +/* $OpenBSD: machdep.c,v 1.5 1999/05/22 21:22:32 weingart Exp $ */ /* * Copyright (c) 1988 University of Utah. * Copyright (c) 1992, 1993 @@ -38,7 +38,7 @@ * SUCH DAMAGE. * * from: @(#)machdep.c 8.3 (Berkeley) 1/12/94 - * $Id: machdep.c,v 1.4 1997/08/24 12:01:15 pefo Exp $ + * $Id: machdep.c,v 1.5 1999/05/22 21:22:32 weingart Exp $ */ /* from: Utah Hdr: machdep.c 1.63 91/04/24 */ @@ -418,7 +418,6 @@ mips_init(argc, argv, code) valloc(cfree, struct cblock, nclist); #endif valloc(callout, struct callout, ncallout); - valloc(swapmap, struct map, nswapmap = maxproc * 2); #ifdef SYSVSHM valloc(shmsegs, struct shmid_ds, shminfo.shmmni); #endif diff --git a/sys/compat/common/Makefile b/sys/compat/common/Makefile index e681d104d97..9c53d691d07 100644 --- a/sys/compat/common/Makefile +++ b/sys/compat/common/Makefile @@ -1,4 +1,4 @@ -# $OpenBSD: Makefile,v 1.6 1997/05/30 09:40:57 niklas Exp $ +# $OpenBSD: Makefile,v 1.7 1999/05/22 21:22:33 weingart Exp $ # $NetBSD: Makefile,v 1.8 1996/05/18 15:52:19 christos Exp $ LIB= compat @@ -6,9 +6,9 @@ NOPIC= .PATH: ${COMPATDIR} -SRCS= compat_exec.c compat_util.c kern_exit_43.c kern_info_09.c \ - kern_info_43.c kern_prot_43.c kern_resource_43.c kern_sig_43.c \ - tty_43.c uipc_syscalls_43.c vfs_syscalls_43.c vm_43.c +SRCS= compat_exec.c compat_util.c compat_vm.c kern_exit_43.c \ + kern_info_09.c kern_info_43.c kern_prot_43.c kern_resource_43.c \ + kern_sig_43.c tty_43.c uipc_syscalls_43.c vfs_syscalls_43.c vm_43.c # really, all machines were sizeof(int) != sizeof(long) .if (${MACHINE_ARCH} != "alpha") diff --git a/sys/compat/common/compat_vm.c b/sys/compat/common/compat_vm.c new file mode 100644 index 00000000000..68ce0889d69 --- /dev/null +++ b/sys/compat/common/compat_vm.c @@ -0,0 +1,73 @@ +/* $OpenBSD: compat_vm.c,v 1.1 1999/05/22 21:22:33 weingart Exp $ */ +/* $NetBSD: vm_12.c,v 1.8 1997/10/20 22:05:22 thorpej Exp $ */ + +/* + * Copyright (c) 1997 Matthew R. Green + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/mount.h> /* needed for next include! */ +#include <sys/syscallargs.h> + +#include <sys/swap.h> +#include <sys/mman.h> + +int +sys_swapon(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +{ + struct sys_swapctl_args ua; + struct sys_swapon_args /* { + syscallarg(const char *) name; + } */ *uap = v; + + SCARG(&ua, cmd) = SWAP_ON; + SCARG(&ua, arg) = (void *)SCARG(uap, name); + SCARG(&ua, misc) = 0; /* priority */ + return (sys_swapctl(p, &ua, retval)); +} + +int +sys_omsync(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +{ + struct sys_msync_args ua; + struct sys_omsync_args /* { + syscallarg(caddr_t) addr; + syscallarg(size_t) len; + } */ *uap = v; + + SCARG(&ua, addr) = SCARG(uap, addr);; + SCARG(&ua, len) = SCARG(uap, len);; + SCARG(&ua, flags) = MS_SYNC | MS_INVALIDATE; + return (sys_msync(p, &ua, retval)); +} diff --git a/sys/sys/map.h b/sys/sys/map.h index 57989b8a8bd..a476f19f54e 100644 --- a/sys/sys/map.h +++ b/sys/sys/map.h @@ -1,4 +1,4 @@ -/* $OpenBSD: map.h,v 1.2 1996/03/03 12:11:59 niklas Exp $ */ +/* $OpenBSD: map.h,v 1.3 1999/05/22 21:22:33 weingart Exp $ */ /* $NetBSD: map.h,v 1.10 1995/09/15 05:32:45 jtc Exp $ */ /*- @@ -64,6 +64,10 @@ * N.B.: The address 0 in the resource address space is not available * as it is used internally by the resource map routines. */ + +#ifndef _SYS_MAP_H_ +#define _SYS_MAP_H_ + struct map { struct mapent *m_limit; /* first slot beyond map */ char *m_name; /* name of resource, for messages */ @@ -75,10 +79,9 @@ struct mapent { }; #ifdef _KERNEL -struct map *swapmap; -int nswapmap; long rmalloc __P((struct map *, long)); void rmfree __P((struct map *, long, long)); void rminit __P((struct map *, long, long, char *, int)); -#endif +#endif /* _KERNEL */ +#endif /* _SYS_MAP_H_ */ diff --git a/sys/sys/swap.h b/sys/sys/swap.h new file mode 100644 index 00000000000..212eea90768 --- /dev/null +++ b/sys/sys/swap.h @@ -0,0 +1,72 @@ +/* $OpenBSD: swap.h,v 1.1 1999/05/22 21:22:34 weingart Exp $ */ +/* $NetBSD: swap.h,v 1.2 1998/09/13 14:46:24 christos Exp $ */ + +/* + * Copyright (c) 1995, 1996, 1998 Matthew R. Green, Tobias Weingartner + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* NOTE: This is the current swap.h from NetBSD. Since we are "upgrading" + * to the new vm_swap code now, we will not keep compatibility with the + * old vm_swap code that was in NetBSD. This means that we do not have + * an oswapent structure, but instead use a "new" swapent structure, with + * no overlay. + * + * --Toby. + */ + +#ifndef _SYS_SWAP_H_ +#define _SYS_SWAP_H_ + +#include <sys/syslimits.h> + +/* These structures are used to return swap information for userland */ +struct swapent { + dev_t se_dev; /* device id */ + int se_flags; /* flags */ + int se_nblks; /* total blocks */ + int se_inuse; /* blocks in use */ + int se_priority; /* priority of this device */ + char se_path[PATH_MAX+1]; /* path name */ +}; + +#define SWAP_ON 1 /* begin swapping on device */ +#define SWAP_OFF 2 /* (stop swapping on device) */ +#define SWAP_NSWAP 3 /* how many swap devices ? */ +#define SWAP_STATS 4 /* get device info */ +#define SWAP_CTL 5 /* change priority on device */ + +#define SWF_INUSE 0x00000001 /* in use: we have swapped here */ +#define SWF_ENABLE 0x00000002 /* enabled: we can swap here */ +#define SWF_BUSY 0x00000004 /* busy: I/O happening here */ +#define SWF_FAKE 0x00000008 /* fake: still being built */ + +#if defined(_KERNEL) && !defined(UVM) +daddr_t swap_alloc __P((int size)); +void swap_free __P((int size, daddr_t addr)); +void swapinit __P((void)); +#endif + +#endif /* _SYS_SWAP_H_ */ diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index 8eb5e60d2b8..dd1c8b6f717 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -1,4 +1,4 @@ -/* $OpenBSD: swap_pager.c,v 1.15 1999/02/08 01:10:58 art Exp $ */ +/* $OpenBSD: swap_pager.c,v 1.16 1999/05/22 21:22:34 weingart Exp $ */ /* $NetBSD: swap_pager.c,v 1.27 1996/03/16 23:15:20 christos Exp $ */ /* @@ -55,8 +55,10 @@ #include <sys/proc.h> #include <sys/buf.h> #include <sys/map.h> +#include <sys/simplelock.h> #include <sys/vnode.h> #include <sys/malloc.h> +#include <sys/swap.h> #include <miscfs/specfs/specdev.h> @@ -65,6 +67,7 @@ #include <vm/vm_pageout.h> #include <vm/swap_pager.h> +/* XXX this makes the max swap devices 16 */ #define NSWSIZES 16 /* size of swtab */ #define MAXDADDRS 64 /* max # of disk addrs for fixed allocations */ #ifndef NPENDINGIO @@ -169,10 +172,8 @@ struct pagerops swappagerops = { static void swap_pager_init() { - register swp_clean_t spc; - register int i, bsize; - extern int dmmin, dmmax; - int maxbsize; + swp_clean_t spc; + int i, maxbsize, bsize; #ifdef DEBUG if (swpagerdebug & (SDB_FOLLOW|SDB_INIT)) @@ -201,42 +202,34 @@ swap_pager_init() spc->spc_flags = SPC_FREE; } +/* this needs to be at least ctod(1) for all ports for vtod() to work */ +#define DMMIN 32 /* - * Calculate the swap allocation constants. - */ - if (dmmin == 0) { - dmmin = DMMIN; - if (dmmin < CLBYTES/DEV_BSIZE) - dmmin = CLBYTES/DEV_BSIZE; - } - if (dmmax == 0) - dmmax = DMMAX; - - /* - * Fill in our table of object size vs. allocation size + * Fill in our table of object size vs. allocation size. bsize needs + * to be at least ctod(1) for all ports for vtod() to work, with a + * bare minimum of 32. */ - bsize = btodb(PAGE_SIZE); - if (bsize < dmmin) - bsize = dmmin; +#define max(a, b) ((a) > (b) ? (a) : (b)) + bsize = max(32, max(ctod(1), btodb(PAGE_SIZE))); maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE); - if (maxbsize > dmmax) - maxbsize = dmmax; + if (maxbsize > NBPG) + maxbsize = NBPG; for (i = 0; i < NSWSIZES; i++) { - swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize)); - swtab[i].st_bsize = bsize; if (bsize <= btodb(MAXPHYS)) swap_pager_maxcluster = dbtob(bsize); + swtab[i].st_bsize = bsize; + if (bsize >= maxbsize) { + swtab[i].st_osize = 0; + break; + } + swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize)); #ifdef DEBUG if (swpagerdebug & SDB_INIT) printf("swpg_init: ix %d, size %lx, bsize %x\n", i, swtab[i].st_osize, swtab[i].st_bsize); #endif - if (bsize >= maxbsize) - break; bsize *= 2; } - swtab[i].st_osize = 0; - swtab[i].st_bsize = bsize; } /* @@ -407,7 +400,7 @@ swap_pager_dealloc(pager) printf("swpg_dealloc: blk %x\n", bp->swb_block); #endif - rmfree(swapmap, swp->sw_bsize, bp->swb_block); + swap_free(swp->sw_bsize, bp->swb_block); } /* * Free swap management resources @@ -462,7 +455,6 @@ swap_pager_putpage(pager, mlist, npages, sync) int npages; boolean_t sync; { - int flags; #ifdef DEBUG if (swpagerdebug & SDB_FOLLOW) @@ -473,11 +465,8 @@ swap_pager_putpage(pager, mlist, npages, sync) swap_pager_clean(B_WRITE); return (VM_PAGER_OK); /* ??? */ } - flags = B_WRITE; - if (!sync) - flags |= B_ASYNC; return (swap_pager_io((sw_pager_t)pager->pg_data, mlist, npages, - flags)); + B_WRITE | (sync ? 0 : B_ASYNC))); } static boolean_t @@ -656,7 +645,7 @@ swap_pager_io(swp, mlist, npages, flags) * Allocate a swap block if necessary. */ if (swb->swb_block == 0) { - swb->swb_block = rmalloc(swapmap, swp->sw_bsize); + swb->swb_block = swap_alloc(swp->sw_bsize); if (swb->swb_block == 0) { #ifdef DEBUG if (swpagerdebug & SDB_FAIL) @@ -727,8 +716,8 @@ swap_pager_io(swp, mlist, npages, flags) bp->b_dirtyoff = 0; bp->b_dirtyend = npages * PAGE_SIZE; s = splbio(); - swp->sw_poip++; swapdev_vp->v_numoutput++; + swp->sw_poip++; splx(s); mask = (~(~0 << npages)) << atop(off); #ifdef DEBUG @@ -1139,7 +1128,7 @@ swap_pager_remove(pager, from, to) * means no pages are left in the block, free it. */ if ((swb->swb_mask &= mask) == 0) { - rmfree(swapmap, swp->sw_bsize, swb->swb_block); + swap_free(swp->sw_bsize, swb->swb_block); swb->swb_block = 0; } } diff --git a/sys/vm/vm_swap.c b/sys/vm/vm_swap.c index 4a8f1026b73..1d80eb8e421 100644 --- a/sys/vm/vm_swap.c +++ b/sys/vm/vm_swap.c @@ -1,9 +1,9 @@ -/* $OpenBSD: vm_swap.c,v 1.8 1997/12/02 16:55:52 csapuntz Exp $ */ -/* $NetBSD: vm_swap.c,v 1.32 1996/02/05 01:54:09 christos Exp $ */ +/* $OpenBSD: vm_swap.c,v 1.9 1999/05/22 21:22:34 weingart Exp $ */ +/* $NetBSD: vm_swap.c,v 1.64 1998/11/08 19:45:17 mycroft Exp $ */ /* - * Copyright (c) 1982, 1986, 1989, 1993 - * The Regents of the University of California. All rights reserved. + * Copyright (c) 1995, 1996, 1997 Matthew R. Green, Tobias Weingartner + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -13,27 +13,19 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)vm_swap.c 8.5 (Berkeley) 2/17/94 + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include <sys/param.h> @@ -41,225 +33,804 @@ #include <sys/buf.h> #include <sys/proc.h> #include <sys/namei.h> -#include <sys/dmap.h> /* XXX */ +#include <sys/disklabel.h> +#include <sys/dmap.h> +#include <sys/errno.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/lock.h> #include <sys/vnode.h> #include <sys/map.h> #include <sys/file.h> -#include <sys/mman.h> - +#include <sys/stat.h> +#include <sys/extent.h> +#include <sys/swap.h> #include <sys/mount.h> #include <sys/syscallargs.h> -#include <vm/vm.h> +#include <machine/vmparam.h> + #include <vm/vm_conf.h> #include <miscfs/specfs/specdev.h> /* - * Indirect driver for multi-controller paging. + * The idea here is to provide a single interface for multiple swap devices, + * of any kind and priority in a simple and fast way. + * + * Each swap device has these properties: + * * swap in use. + * * swap enabled. + * * map information in `/dev/drum'. + * * vnode pointer. + * Files have these additional properties: + * * block size. + * * maximum byte count in buffer. + * * buffer. + * * credentials. + * + * The arguments to swapctl(2) are: + * int cmd; + * void *arg; + * int misc; + * The cmd can be one of: + * SWAP_NSWAP - swapctl(2) returns the number of swap devices currently in + * use. + * SWAP_STATS - swapctl(2) takes a struct ent * in (void *arg) and writes + * misc or fewer (to zero) entries of configured swap devices, + * and returns the number of entries written or -1 on error. + * SWAP_ON - swapctl(2) takes a (char *) in arg to be the pathname of a + * device or file to begin swapping on, with it's priority in + * misc, returning 0 on success and -1 on error. + * SWAP_OFF - swapctl(2) takes a (char *) n arg to be the pathname of a + * device or file to stop swapping on. returning 0 or -1. + * XXX unwritten. + * SWAP_CTL - swapctl(2) changes the priority of a swap device, using the + * misc value. + */ + +#ifdef SWAPDEBUG +#define STATIC +#define VMSDB_SWON 0x0001 +#define VMSDB_SWOFF 0x0002 +#define VMSDB_SWINIT 0x0004 +#define VMSDB_SWALLOC 0x0008 +#define VMSDB_SWFLOW 0x0010 +#define VMSDB_INFO 0x0020 +int vmswapdebug = 0; +int vmswap_domount = 1; + +#define DPRINTF(f, m) do { \ + if (vmswapdebug & (f)) \ + printf m; \ +} while(0) +#else +#define STATIC static +#define DPRINTF(f, m) +#endif + +#define SWAP_TO_FILES + +struct swapdev { + struct swapent swd_se; +#define swd_dev swd_se.se_dev +#define swd_flags swd_se.se_flags +#define swd_nblks swd_se.se_nblks +#define swd_inuse swd_se.se_inuse +#define swd_priority swd_se.se_priority +#define swd_path swd_se.se_path + daddr_t swd_mapoffset; + int swd_mapsize; + struct extent *swd_ex; + struct vnode *swd_vp; + CIRCLEQ_ENTRY(swapdev) swd_next; + +#ifdef SWAP_TO_FILES + int swd_bsize; + int swd_maxactive; + struct buf swd_tab; + struct ucred *swd_cred; +#endif +}; + +/* + * Swap device priority entry; the list is kept sorted on `spi_priority'. */ +struct swappri { + int spi_priority; + CIRCLEQ_HEAD(spi_swapdev, swapdev) spi_swapdev; + LIST_ENTRY(swappri) spi_swappri; +}; + + + + +/* + * The following two structures are used to keep track of data transfers + * on swap devices associated with regular files. + * NOTE: this code is more or less a copy of vnd.c; we use the same + * structure names here to ease porting.. + */ + + +struct vndxfer { + struct buf *vx_bp; /* Pointer to parent buffer */ + struct swapdev *vx_sdp; + int vx_error; + int vx_pending; /* # of pending aux buffers */ + int vx_flags; +#define VX_BUSY 1 +#define VX_DEAD 2 +}; + + +struct vndbuf { + struct buf vb_buf; + struct vndxfer *vb_xfer; +}; -int nswap, nswdev; -#ifdef SEQSWAP -int niswdev; /* number of interleaved swap devices */ -int niswap; /* size of interleaved swap area */ +/* To get from a buffer to the encapsulating vndbuf */ +#define BUF_TO_VNDBUF(bp) \ + ((struct vndbuf *)((long)bp - ((long)&((struct vndbuf *)0)->vb_buf))) + +/* vnd macro stuff, rewritten to use malloc()/free() */ +#define getvndxfer() \ + (struct vndxfer *)malloc(sizeof(struct vndxfer), M_VMSWAP, M_WAITOK); + +#define putvndxfer(vnx) \ + free(vnx, M_VMSWAP) + +#define getvndbuf() \ + (struct vndbuf *)malloc(sizeof(struct vndbuf), M_VMSWAP, M_WAITOK); + +#define putvndbuf(vbp) \ + free(vbp, M_VMSWAP) + + +int nswapdev; +int swflags; +struct extent *swapmap; +LIST_HEAD(swap_priority, swappri) swap_priority; + +STATIC int swap_on __P((struct proc *, struct swapdev *)); +#ifdef SWAP_OFF_WORKS +STATIC int swap_off __P((struct proc *, struct swapdev *)); #endif +STATIC struct swapdev *swap_getsdpfromaddr __P((daddr_t)); +STATIC void swap_addmap __P((struct swapdev *, int)); -int swfree __P((struct proc *, int)); +#ifdef SWAP_TO_FILES +STATIC void sw_reg_strategy __P((struct swapdev *, struct buf *, int)); +STATIC void sw_reg_iodone __P((struct buf *)); +STATIC void sw_reg_start __P((struct swapdev *)); +#endif + +STATIC void insert_swapdev __P((struct swapdev *, int)); +STATIC struct swapdev *find_swapdev __P((struct vnode *, int)); +STATIC void swaplist_trim __P((void)); + +STATIC void swapmount __P((void)); + +/* + * We use two locks to protect the swap device lists. + * The long-term lock is used only used to prevent races in + * concurrently executing swapctl(2) system calls. + */ +struct simplelock swaplist_lock; +struct lock swaplist_change_lock; /* - * Set up swap devices. - * Initialize linked list of free swap - * headers. These do not actually point - * to buffers, but rather to pages that - * are being swapped in and out. + * Insert a swap device on the priority list. */ void -swapinit() +insert_swapdev(sdp, priority) + struct swapdev *sdp; + int priority; { - register int i; - register struct buf *sp = swbuf; - register struct proc *p = &proc0; /* XXX */ - struct swdevt *swp; - int error; + struct swappri *spp, *pspp; + +again: + simple_lock(&swaplist_lock); /* - * Count swap devices, and adjust total swap space available. - * Some of the space will not be countable until later (dynamically - * configurable devices) and some of the counted space will not be - * available until a swapon() system call is issued, both usually - * happen when the system goes multi-user. - * - * If using NFS for swap, swdevt[0] will already be bdevvp'd. XXX - */ -#ifdef SEQSWAP - nswdev = niswdev = 0; - nswap = niswap = 0; - /* - * All interleaved devices must come first + * Find entry at or after which to insert the new device. */ - for (swp = swdevt; swp->sw_dev != NODEV || swp->sw_vp != NULL; swp++) { - if (swp->sw_flags & SW_SEQUENTIAL) + for (pspp = NULL, spp = swap_priority.lh_first; spp != NULL; + spp = spp->spi_swappri.le_next) { + if (priority <= spp->spi_priority) break; - niswdev++; - if (swp->sw_nblks > niswap) - niswap = swp->sw_nblks; + pspp = spp; } - niswap = roundup(niswap, dmmax); - niswap *= niswdev; - if (swdevt[0].sw_vp == NULL && - bdevvp(swdevt[0].sw_dev, &swdevt[0].sw_vp)) - panic("swapvp"); - /* - * The remainder must be sequential - */ - for ( ; swp->sw_dev != NODEV; swp++) { - if ((swp->sw_flags & SW_SEQUENTIAL) == 0) - panic("binit: mis-ordered swap devices"); - nswdev++; - if (swp->sw_nblks > 0) { - if (swp->sw_nblks % dmmax) - swp->sw_nblks -= (swp->sw_nblks % dmmax); - nswap += swp->sw_nblks; + + if (spp == NULL || spp->spi_priority != priority) { + spp = (struct swappri *) + malloc(sizeof *spp, M_VMSWAP, M_NOWAIT); + + if (spp == NULL) { + simple_unlock(&swaplist_lock); + tsleep((caddr_t)&lbolt, PSWP, "memory", 0); + goto again; } + DPRINTF(VMSDB_SWFLOW, + ("sw: had to create a new swappri = %d\n", priority)); + + spp->spi_priority = priority; + CIRCLEQ_INIT(&spp->spi_swapdev); + + if (pspp) + LIST_INSERT_AFTER(pspp, spp, spi_swappri); + else + LIST_INSERT_HEAD(&swap_priority, spp, spi_swappri); + } - nswdev += niswdev; - if (nswdev == 0) - panic("swapinit"); - nswap += niswap; -#else - nswdev = 0; - nswap = 0; - for (swp = swdevt; swp->sw_dev != NODEV || swp->sw_vp != NULL; swp++) { - nswdev++; - if (swp->sw_nblks > nswap) - nswap = swp->sw_nblks; - } - if (nswdev == 0) - panic("swapinit"); - if (nswdev > 1) - nswap = ((nswap + dmmax - 1) / dmmax) * dmmax; - nswap *= nswdev; - if (swdevt[0].sw_vp == NULL && - bdevvp(swdevt[0].sw_dev, &swdevt[0].sw_vp)) - panic("swapvp"); -#endif - if (nswap == 0) - printf("WARNING: no swap space found\n"); - else if ((error = swfree(p, 0)) == ENXIO) - printf("WARNING: primary swap device not configured\n"); - else if (error) { - printf("swfree errno %d\n", error); /* XXX */ - panic("swapinit swfree 0"); - } + /* Onto priority list */ + CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next); + sdp->swd_priority = priority; + simple_unlock(&swaplist_lock); +} - /* - * Now set up swap buffer headers. - */ - bswlist.b_actf = sp; - for (i = 0; i < nswbuf - 1; i++, sp++) { - sp->b_actf = sp + 1; - sp->b_rcred = sp->b_wcred = p->p_ucred; - sp->b_vnbufs.le_next = NOLIST; +/* + * Find and optionally remove a swap device from the priority list. + */ +struct swapdev * +find_swapdev(vp, remove) + struct vnode *vp; + int remove; +{ + struct swapdev *sdp; + struct swappri *spp; + + simple_lock(&swaplist_lock); + for (spp = swap_priority.lh_first; spp != NULL; + spp = spp->spi_swappri.le_next) { + for (sdp = spp->spi_swapdev.cqh_first; + sdp != (void *)&spp->spi_swapdev; + sdp = sdp->swd_next.cqe_next) + if (sdp->swd_vp == vp) { + if (remove) + CIRCLEQ_REMOVE(&spp->spi_swapdev, sdp, + swd_next); + simple_unlock(&swaplist_lock); + return (sdp); + } } - sp->b_rcred = sp->b_wcred = p->p_ucred; - sp->b_vnbufs.le_next = NOLIST; - sp->b_actf = NULL; + simple_unlock(&swaplist_lock); + return (NULL); } +/* + * Scan priority list for empty priority entries. + */ void -swstrategy(bp) - register struct buf *bp; +swaplist_trim() +{ + struct swappri *spp; + + simple_lock(&swaplist_lock); +restart: + for (spp = swap_priority.lh_first; spp != NULL; + spp = spp->spi_swappri.le_next) { + if (spp->spi_swapdev.cqh_first != (void *)&spp->spi_swapdev) + continue; + LIST_REMOVE(spp, spi_swappri); + free((caddr_t)spp, M_VMSWAP); + goto restart; + } + simple_unlock(&swaplist_lock); +} + +int +sys_swapctl(p, v, retval) + struct proc *p; + void *v; + register_t *retval; { - int s, sz, off, seg, index; - register struct swdevt *sp; + struct sys_swapctl_args /* { + syscallarg(int) cmd; + syscallarg(const void *) arg; + syscallarg(int) misc; + } */ *uap = (struct sys_swapctl_args *)v; struct vnode *vp; + struct nameidata nd; + struct swappri *spp; + struct swapdev *sdp; + struct swapent *sep; + char userpath[PATH_MAX + 1]; + int count, error, misc; + size_t len; + int priority; - sz = howmany(bp->b_bcount, DEV_BSIZE); - if (bp->b_blkno + sz > nswap) { - bp->b_error = EINVAL; - bp->b_flags |= B_ERROR; - biodone(bp); - return; + misc = SCARG(uap, misc); + + DPRINTF(VMSDB_SWFLOW, ("entering sys_swapctl\n")); + + /* how many swap devices */ + if (SCARG(uap, cmd) == SWAP_NSWAP) { + DPRINTF(VMSDB_SWFLOW,("did SWAP_NSWAP: leaving sys_swapctl\n")); + *retval = nswapdev; + return (0); } - if (nswdev > 1) { -#ifdef SEQSWAP - if (bp->b_blkno < niswap) { - if (niswdev > 1) { - off = bp->b_blkno % dmmax; - if (off+sz > dmmax) { - bp->b_error = EINVAL; - bp->b_flags |= B_ERROR; - biodone(bp); - return; - } - seg = bp->b_blkno / dmmax; - index = seg % niswdev; - seg /= niswdev; - bp->b_blkno = seg*dmmax + off; - } else - index = 0; - } else { - register struct swdevt *swp; - - bp->b_blkno -= niswap; - for (index = niswdev, swp = &swdevt[niswdev]; - swp->sw_dev != NODEV; - swp++, index++) { - if (bp->b_blkno < swp->sw_nblks) - break; - bp->b_blkno -= swp->sw_nblks; - } - if (swp->sw_dev == NODEV || - bp->b_blkno+sz > swp->sw_nblks) { - bp->b_error = swp->sw_dev == NODEV ? - ENODEV : EINVAL; - bp->b_flags |= B_ERROR; - biodone(bp); - return; + + /* stats on the swap devices. */ + if (SCARG(uap, cmd) == SWAP_STATS) { + sep = (struct swapent *)SCARG(uap, arg); + count = 0; + + error = lockmgr(&swaplist_change_lock, LK_SHARED, (void *)0, p); + if (error) + return (error); + for (spp = swap_priority.lh_first; spp != NULL; + spp = spp->spi_swappri.le_next) { + for (sdp = spp->spi_swapdev.cqh_first; + sdp != (void *)&spp->spi_swapdev && misc-- > 0; + sdp = sdp->swd_next.cqe_next, sep++, count++) { + /* + * We do not do NetBSD 1.3 compat call. + */ + error = copyout((caddr_t)&sdp->swd_se, + (caddr_t)sep, sizeof(struct swapent)); + + if (error) + goto out; } } +out: + (void)lockmgr(&swaplist_change_lock, LK_RELEASE, (void *)0, p); + if (error) + return (error); + + DPRINTF(VMSDB_SWFLOW,("did SWAP_STATS: leaving sys_swapctl\n")); + + *retval = count; + return (0); + } + if ((error = suser(p->p_ucred, &p->p_acflag))) + return (error); + + if (SCARG(uap, arg) == NULL) { + /* XXX - interface - arg==NULL: miniroot */ + vp = rootvp; + if (vget(vp, LK_EXCLUSIVE, p)) + return (EBUSY); + if (SCARG(uap, cmd) == SWAP_ON && + copystr("miniroot", userpath, sizeof userpath, &len)) + panic("swapctl: miniroot copy failed"); + } else { + int space; + char *where; + + if (SCARG(uap, cmd) == SWAP_ON) { + if ((error = copyinstr(SCARG(uap, arg), userpath, + sizeof userpath, &len))) + return (error); + space = UIO_SYSSPACE; + where = userpath; + } else { + space = UIO_USERSPACE; + where = (char *)SCARG(uap, arg); + } + NDINIT(&nd, LOOKUP, FOLLOW|LOCKLEAF, space, where, p); + if ((error = namei(&nd))) + return (error); + + vp = nd.ni_vp; + } + + error = lockmgr(&swaplist_change_lock, LK_EXCLUSIVE, (void *)0, p); + if (error) + goto bad2; + + switch(SCARG(uap, cmd)) { + case SWAP_CTL: + priority = SCARG(uap, misc); + if ((sdp = find_swapdev(vp, 1)) == NULL) { + error = ENOENT; + break; + } + insert_swapdev(sdp, priority); + swaplist_trim(); + break; + + case SWAP_ON: + priority = SCARG(uap, misc); + + /* Check for duplicates */ + if ((sdp = find_swapdev(vp, 0)) != NULL) { + if (!bcmp(sdp->swd_path, "swap_device", 12)) { + copystr(userpath, sdp->swd_path, len, 0); + error = 0; + } else + error = EBUSY; + goto bad; + } + + sdp = (struct swapdev *) + malloc(sizeof *sdp, M_VMSWAP, M_WAITOK); + bzero(sdp, sizeof(*sdp)); + + sdp->swd_vp = vp; + sdp->swd_dev = (vp->v_type == VBLK) ? vp->v_rdev : NODEV; + + if ((error = swap_on(p, sdp)) != 0) { + free((caddr_t)sdp, M_VMSWAP); + break; + } +#ifdef SWAP_TO_FILES + /* + * XXX Is NFS elaboration necessary? + */ + if (vp->v_type == VREG) + sdp->swd_cred = crdup(p->p_ucred); +#endif + if (copystr(userpath, sdp->swd_path, len, 0) != 0) + panic("swapctl: copystr"); + insert_swapdev(sdp, priority); + + /* Keep reference to vnode */ + vref(vp); + break; + + case SWAP_OFF: + DPRINTF(VMSDB_SWFLOW, ("doing SWAP_OFF...\n")); +#ifdef SWAP_OFF_WORKS + if ((sdp = find_swapdev(vp, 0)) == NULL) { + error = ENXIO; + break; + } + /* + * If a device isn't in use or enabled, we + * can't stop swapping from it (again). + */ + if ((sdp->swd_flags & + (SWF_INUSE|SWF_ENABLE)) == 0) { + error = EBUSY; + goto bad; + } + if ((error = swap_off(p, sdp)) != 0) + goto bad; + + /* Find again and remove this time */ + if ((sdp = find_swapdev(vp, 1)) == NULL) { + error = ENXIO; + break; + } + free((caddr_t)sdp, M_VMSWAP); #else - off = bp->b_blkno % dmmax; - if (off+sz > dmmax) { - bp->b_error = EINVAL; - bp->b_flags |= B_ERROR; - biodone(bp); - return; + error = ENODEV; +#endif + break; + + default: + DPRINTF(VMSDB_SWFLOW, + ("unhandled command: %x\n", SCARG(uap, cmd))); + error = EINVAL; + } + +bad: + (void)lockmgr(&swaplist_change_lock, LK_RELEASE, (void *)0, p); +bad2: + vput(vp); + + DPRINTF(VMSDB_SWFLOW, ("leaving sys_swapctl: error %d\n", error)); + return (error); +} + +/* + * swap_on() attempts to begin swapping on a swapdev. we check that this + * device is OK to swap from, miss the start of any disk (to avoid any + * disk labels that may exist). + */ +STATIC int +swap_on(p, sdp) + struct proc *p; + struct swapdev *sdp; +{ + static int count = 0; + struct vnode *vp = sdp->swd_vp; + int error, nblks, size; + long addr; + char *storage; + int storagesize; +#ifdef SWAP_TO_FILES + struct vattr va; +#endif +#ifdef NFS + extern int (**nfsv2_vnodeop_p) __P((void *)); +#endif /* NFS */ + dev_t dev = sdp->swd_dev; + char *name; + + + /* If root on swap, then the skip open/close operations. */ + if (vp != rootvp) { + if ((error = VOP_OPEN(vp, FREAD|FWRITE, p->p_ucred, p))) + return (error); + vp->v_writecount++; + } + + DPRINTF(VMSDB_INFO, + ("swap_on: dev = %d, major(dev) = %d\n", dev, major(dev))); + + switch (vp->v_type) { + case VBLK: + if (bdevsw[major(dev)].d_psize == 0 || + (nblks = (*bdevsw[major(dev)].d_psize)(dev)) == -1) { + error = ENXIO; + goto bad; } - seg = bp->b_blkno / dmmax; - index = seg % nswdev; - seg /= nswdev; - bp->b_blkno = seg*dmmax + off; + break; + +#ifdef SWAP_TO_FILES + case VREG: + if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p))) + goto bad; + nblks = (int)btodb(va.va_size); + if ((error = + VFS_STATFS(vp->v_mount, &vp->v_mount->mnt_stat, p)) != 0) + goto bad; + + sdp->swd_bsize = vp->v_mount->mnt_stat.f_iosize; +#ifdef NFS + if (vp->v_op == nfsv2_vnodeop_p) + sdp->swd_maxactive = 2; /* XXX */ + else +#endif /* NFS */ + sdp->swd_maxactive = 8; /* XXX */ + break; #endif - } else - index = 0; - sp = &swdevt[index]; - if (sp->sw_vp == NULL) { - bp->b_error = ENODEV; - bp->b_flags |= B_ERROR; - biodone(bp); - return; + + default: + error = ENXIO; + goto bad; } - if ((bp->b_dev = sp->sw_dev) == NODEV && sp->sw_vp->v_type != VREG) - panic("swstrategy"); - VHOLD(sp->sw_vp); - s = splbio(); - if ((bp->b_flags & B_READ) == 0) { - if ((vp = bp->b_vp) != NULL) { - vp->v_numoutput--; - if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { - vp->v_flag &= ~VBWAIT; - wakeup((caddr_t)&vp->v_numoutput); + if (nblks == 0) { + DPRINTF(VMSDB_SWFLOW, ("swap_on: nblks == 0\n")); + error = EINVAL; + goto bad; + } + + sdp->swd_flags |= SWF_INUSE; + sdp->swd_nblks = nblks; + + /* + * skip over first cluster of a device in case of labels or + * boot blocks. + */ + if (vp->v_type == VBLK) { + size = (int)(nblks - ctod(CLSIZE)); + addr = (long)ctod(CLSIZE); + } else { + size = (int)nblks; + addr = (long)0; + } + + DPRINTF(VMSDB_SWON, + ("swap_on: dev %x: size %d, addr %ld\n", dev, size, addr)); + + name = malloc(12, M_VMSWAP, M_WAITOK); + sprintf(name, "swap0x%04x", count++); + /* XXX make this based on ram as well. */ + storagesize = EXTENT_FIXED_STORAGE_SIZE(maxproc * 2); + storage = malloc(storagesize, M_VMSWAP, M_WAITOK); + sdp->swd_ex = extent_create(name, 0, nblks, M_VMSWAP, + storage, storagesize, EX_WAITOK); + if (addr) { + if (extent_alloc_region(sdp->swd_ex, 0, addr, EX_WAITOK)) + panic("disklabel region"); + sdp->swd_inuse += addr; + } + + + if (vp == rootvp) { + struct mount *mp; + struct statfs *sp; + int rootblks; + + /* Get size from root FS (mountroot did statfs) */ + mp = rootvnode->v_mount; + sp = &mp->mnt_stat; + rootblks = sp->f_blocks * (sp->f_bsize / DEV_BSIZE); + if (rootblks > nblks) + panic("miniroot size"); + + if (extent_alloc_region(sdp->swd_ex, addr, rootblks, EX_WAITOK)) + panic("miniroot region"); + + printf("Preserved %d blocks, leaving %d pages of swap\n", + rootblks, dtoc(size - rootblks)); + } + + swap_addmap(sdp, size); + nswapdev++; + sdp->swd_flags |= SWF_ENABLE; + return (0); + +bad: + if (vp != rootvp) { + vp->v_writecount--; + (void)VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p); + } + return (error); +} + +#ifdef SWAP_OFF_WORKS +STATIC int +swap_off(p, sdp) + struct proc *p; + struct swapdev *sdp; +{ + char *name; + + /* turn off the enable flag */ + sdp->swd_flags &= ~SWF_ENABLE; + + DPRINTF(VMSDB_SWOFF, ("swap_off: %x\n", sdp->swd_dev)); + + /* + * XXX write me + * + * the idea is to find out which processes are using this swap + * device, and page them all in. + * + * eventually, we should try to move them out to other swap areas + * if available. + * + * The alternative is to create a redirection map for this swap + * device. This should work by moving all the pages of data from + * the ex-swap device to another one, and making an entry in the + * redirection map for it. locking is going to be important for + * this! + * + * There might be an easier way to do a "soft" swapoff. First + * we mark the particular swap partition as not desirable anymore. + * Then we use the pager to page a couple of pages in, each time + * it has the memory, and the chance to do so. Thereby moving pages + * back into memory. Once they are in memory, when they get paged + * out again, they do not go back onto the "undesirable" device + * anymore, but to good devices. This might take longer, but it + * can certainly work. If need be, the user process can sleep on + * the particular sdp entry, and the swapper can then wake him up + * when everything is done. + */ + + /* until the above code is written, we must ENODEV */ + return ENODEV; + + extent_free(swapmap, sdp->swd_mapoffset, sdp->swd_mapsize, EX_WAITOK); + nswapdev--; + name = sdp->swd_ex->ex_name; + extent_destroy(sdp->swd_ex); + free(name, M_VMSWAP); + free((caddr_t)sdp->swd_ex, M_VMSWAP); + if (sdp->swp_vp != rootvp) { + vp->v_writecount--; + (void) VOP_CLOSE(sdp->swd_vp, FREAD|FWRITE, p->p_ucred, p); + } + if (sdp->swd_vp) + vrele(sdp->swd_vp); + free((caddr_t)sdp, M_VMSWAP); + return (0); +} +#endif + +/* + * To decide where to allocate what part of swap, we must "round robin" + * the swap devices in swap_priority of the same priority until they are + * full. we do this with a list of swap priorities that have circle + * queues of swapdevs. + * + * The following functions control allocation and freeing of part of the + * swap area. you call swap_alloc() with a size and it returns an address. + * later you call swap_free() and it frees the use of that swap area. + * + * daddr_t swap_alloc(int size); + * void swap_free(int size, daddr_t addr); + */ + +daddr_t +swap_alloc(size) + int size; +{ + struct swapdev *sdp; + struct swappri *spp; + u_long result; + + if (nswapdev < 1) + return 0; + + simple_lock(&swaplist_lock); + for (spp = swap_priority.lh_first; spp != NULL; + spp = spp->spi_swappri.le_next) { + for (sdp = spp->spi_swapdev.cqh_first; + sdp != (void *)&spp->spi_swapdev; + sdp = sdp->swd_next.cqe_next) { + /* if it's not enabled, then we can't swap from it */ + if ((sdp->swd_flags & SWF_ENABLE) == 0 || + /* XXX IS THIS CORRECT ? */ +#if 1 + (sdp->swd_inuse + size > sdp->swd_nblks) || +#endif + extent_alloc(sdp->swd_ex, size, EX_NOALIGN, + EX_NOBOUNDARY, EX_MALLOCOK|EX_NOWAIT, + &result) != 0) { + continue; } + CIRCLEQ_REMOVE(&spp->spi_swapdev, sdp, swd_next); + CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next); + sdp->swd_inuse += size; + simple_unlock(&swaplist_lock); + return (daddr_t)(result + sdp->swd_mapoffset); } - sp->sw_vp->v_numoutput++; } - if (bp->b_vp != NULL) - brelvp(bp); - splx(s); - bp->b_vp = sp->sw_vp; - VOP_STRATEGY(bp); + simple_unlock(&swaplist_lock); + return 0; +} + +void +swap_free(size, addr) + int size; + daddr_t addr; +{ + struct swapdev *sdp = swap_getsdpfromaddr(addr); + +#ifdef DIAGNOSTIC + if (sdp == NULL) + panic("swap_free: unmapped address\n"); + if (nswapdev < 1) + panic("swap_free: nswapdev < 1\n"); +#endif + extent_free(sdp->swd_ex, addr - sdp->swd_mapoffset, size, + EX_MALLOCOK|EX_NOWAIT); + sdp->swd_inuse -= size; +#ifdef DIAGNOSTIC + if (sdp->swd_inuse < 0) + panic("swap_free: inuse < 0"); +#endif +} + +/* + * We have a physical -> virtual mapping to address here. There are several + * different physical address spaces (one for each swap partition) that are + * to be mapped onto a single virtual address space. + */ +#define ADDR_IN_MAP(addr, sdp) \ + (((addr) >= (sdp)->swd_mapoffset) && \ + ((addr) < ((sdp)->swd_mapoffset + (sdp)->swd_mapsize))) + +struct swapdev * +swap_getsdpfromaddr(addr) + daddr_t addr; +{ + struct swapdev *sdp; + struct swappri *spp; + + simple_lock(&swaplist_lock); + for (spp = swap_priority.lh_first; spp != NULL; + spp = spp->spi_swappri.le_next) + for (sdp = spp->spi_swapdev.cqh_first; + sdp != (void *)&spp->spi_swapdev; + sdp = sdp->swd_next.cqe_next) + if (ADDR_IN_MAP(addr, sdp)) { + simple_unlock(&swaplist_lock); + return sdp; + } + simple_unlock(&swaplist_lock); + return NULL; +} + +void +swap_addmap(sdp, size) + struct swapdev *sdp; + int size; +{ + u_long result; + + if (extent_alloc(swapmap, size, EX_NOALIGN, EX_NOBOUNDARY, + EX_WAITOK, &result)) + panic("swap_addmap"); + + sdp->swd_mapoffset = result; + sdp->swd_mapsize = size; } /*ARGSUSED*/ @@ -284,232 +855,408 @@ swwrite(dev, uio, ioflag) return (physio(swstrategy, NULL, dev, B_WRITE, minphys, uio)); } -/* - * System call swapon(name) enables swapping on device name, - * which must be in the swdevsw. Return EBUSY - * if already swapping on this device. - */ -/* ARGSUSED */ -int -sys_swapon(p, v, retval) - struct proc *p; - void *v; - register_t *retval; +void +swstrategy(bp) + struct buf *bp; { - struct sys_swapon_args /* { - syscallarg(char *) name; - } */ *uap = v; - register struct vnode *vp; - register struct swdevt *sp; - dev_t dev; - int error; - struct nameidata nd; + struct swapdev *sdp; + struct vnode *vp; + daddr_t bn; - if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) - return (error); - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, name), p); - if ((error = namei(&nd)) != 0) - return (error); - vp = nd.ni_vp; - if (vp->v_type != VBLK) { - vrele(vp); - return (ENOTBLK); - } - dev = (dev_t)vp->v_rdev; - if (major(dev) >= nblkdev) { - vrele(vp); - return (ENXIO); + bn = bp->b_blkno; + sdp = swap_getsdpfromaddr(bn); + if (sdp == NULL) { + bp->b_error = EINVAL; + bp->b_flags |= B_ERROR; + biodone(bp); + return; } - for (sp = &swdevt[0]; sp->sw_dev != NODEV; sp++) { - if (sp->sw_dev == dev) { - if (sp->sw_flags & SW_FREED) { - vrele(vp); - return (EBUSY); - } - sp->sw_vp = vp; - if ((error = swfree(p, sp - swdevt)) != 0) { - vrele(vp); - return (error); - } - return (0); + + bn -= sdp->swd_mapoffset; + + DPRINTF(VMSDB_SWFLOW, + ("swstrategy(%s): mapoff %x, bn %x, bcount %ld\n", + ((bp->b_flags & B_READ) == 0) ? "write" : "read", + sdp->swd_mapoffset, bn, bp->b_bcount)); + + switch (sdp->swd_vp->v_type) { + default: + panic("swstrategy: vnode type %x", sdp->swd_vp->v_type); + case VBLK: + bp->b_blkno = bn + ctod(CLSIZE); + vp = sdp->swd_vp; + bp->b_dev = sdp->swd_dev; + VHOLD(vp); + if ((bp->b_flags & B_READ) == 0) { + int s = splbio(); + vwakeup(bp); + vp->v_numoutput++; + splx(s); } -#ifdef SEQSWAP - /* - * If we have reached a non-freed sequential device without - * finding what we are looking for, it is an error. - * That is because all interleaved devices must come first - * and sequential devices must be freed in order. - */ - if ((sp->sw_flags & (SW_SEQUENTIAL|SW_FREED)) == SW_SEQUENTIAL) - break; + + if (bp->b_vp != NULL) + brelvp(bp); + + bp->b_vp = vp; + VOP_STRATEGY(bp); + return; +#ifdef SWAP_TO_FILES + case VREG: + sw_reg_strategy(sdp, bp, bn); + return; #endif } - vrele(vp); - return (EINVAL); + /* NOTREACHED */ } -/* - * Swfree(index) frees the index'th portion of the swap map. - * Each of the nswdev devices provides 1/nswdev'th of the swap - * space, which is laid out with blocks of dmmax pages circularly - * among the devices. - */ -int -swfree(p, index) - struct proc *p; - int index; +#ifdef SWAP_TO_FILES + +STATIC void +sw_reg_strategy(sdp, bp, bn) + struct swapdev *sdp; + struct buf *bp; + int bn; { - register struct swdevt *sp; - register swblk_t vsbase; - register long blk; - struct vnode *vp; - register swblk_t dvbase; - register int nblks; - int error; + struct vnode *vp; + struct vndxfer *vnx; + daddr_t nbn; + caddr_t addr; + int s, off, nra, error, sz, resid; - sp = &swdevt[index]; - vp = sp->sw_vp; - /* If root on swap, then the skip open/close operations. */ - if (vp != rootvp) { - if ((error = VOP_OPEN(vp, FREAD|FWRITE, p->p_ucred, p)) != 0) - return (error); - } - sp->sw_flags |= SW_FREED; - nblks = sp->sw_nblks; /* - * Some devices may not exist til after boot time. - * If so, their nblk count will be 0. + * Translate the device logical block numbers into physical + * block numbers of the underlying filesystem device. */ - if (nblks <= 0) { - int perdev; - dev_t dev = sp->sw_dev; + bp->b_resid = bp->b_bcount; + addr = bp->b_data; + bn = dbtob(bn); - if (bdevsw[major(dev)].d_psize == 0 || - (nblks = (*bdevsw[major(dev)].d_psize)(dev)) == -1) { - if (vp != rootvp) - (void) VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p); - sp->sw_flags &= ~SW_FREED; - return (ENXIO); + /* Allocate a header for this transfer and link it to the buffer */ + vnx = getvndxfer(); + vnx->vx_flags = VX_BUSY; + vnx->vx_error = 0; + vnx->vx_pending = 0; + vnx->vx_bp = bp; + vnx->vx_sdp = sdp; + + error = 0; + for (resid = bp->b_resid; resid; resid -= sz) { + struct vndbuf *nbp; + + nra = 0; + error = VOP_BMAP(sdp->swd_vp, bn / sdp->swd_bsize, + &vp, &nbn, &nra); + + if (error == 0 && (long)nbn == -1) + error = EIO; + + /* + * If there was an error or a hole in the file...punt. + * Note that we may have to wait for any operations + * that we have already fired off before releasing + * the buffer. + * + * XXX we could deal with holes here but it would be + * a hassle (in the write case). + */ + if (error) { + s = splbio(); + vnx->vx_error = error; + goto out; + } + + if ((off = bn % sdp->swd_bsize) != 0) + sz = sdp->swd_bsize - off; + else + sz = (1 + nra) * sdp->swd_bsize; + + if (resid < sz) + sz = resid; + + DPRINTF(VMSDB_SWFLOW, + ("sw_reg_strategy: vp %p/%p bn 0x%x/0x%x" + " sz 0x%x\n", sdp->swd_vp, vp, bn, nbn, sz)); + + nbp = getvndbuf(); + nbp->vb_buf.b_flags = bp->b_flags | B_NOCACHE | B_CALL; + nbp->vb_buf.b_bcount = sz; + nbp->vb_buf.b_bufsize = bp->b_bufsize; + nbp->vb_buf.b_error = 0; + nbp->vb_buf.b_data = addr; + nbp->vb_buf.b_blkno = nbn + btodb(off); + nbp->vb_buf.b_proc = bp->b_proc; + nbp->vb_buf.b_iodone = sw_reg_iodone; + nbp->vb_buf.b_vp = NULLVP; + nbp->vb_buf.b_rcred = sdp->swd_cred; + nbp->vb_buf.b_wcred = sdp->swd_cred; + if (bp->b_dirtyend == 0) { + nbp->vb_buf.b_dirtyoff = 0; + nbp->vb_buf.b_dirtyend = sz; + } else { + nbp->vb_buf.b_dirtyoff = + max(0, bp->b_dirtyoff - (bp->b_bcount-resid)); + nbp->vb_buf.b_dirtyend = + min(sz, + max(0, bp->b_dirtyend - (bp->b_bcount-resid))); } -#ifdef SEQSWAP - if (index < niswdev) { - perdev = niswap / niswdev; - if (nblks > perdev) - nblks = perdev; + if (bp->b_validend == 0) { + nbp->vb_buf.b_validoff = 0; + nbp->vb_buf.b_validend = sz; } else { - if (nblks % dmmax) - nblks -= (nblks % dmmax); - nswap += nblks; + nbp->vb_buf.b_validoff = + max(0, bp->b_validoff - (bp->b_bcount-resid)); + nbp->vb_buf.b_validend = + min(sz, + max(0, bp->b_validend - (bp->b_bcount-resid))); } -#else - if (nswap > 0) { - perdev = nswap / nswdev; - if (nblks > perdev) - nblks = perdev; - } else - nswap = nblks; -#endif - sp->sw_nblks = nblks; + + nbp->vb_xfer = vnx; + + /* + * Just sort by block number + */ + nbp->vb_buf.b_cylinder = nbp->vb_buf.b_blkno; + s = splbio(); + if (vnx->vx_error != 0) { + putvndbuf(nbp); + goto out; + } + vnx->vx_pending++; + bgetvp(vp, &nbp->vb_buf); + disksort(&sdp->swd_tab, &nbp->vb_buf); + sw_reg_start(sdp); + splx(s); + + bn += sz; + addr += sz; } - if (nblks == 0) { - if (vp != rootvp) - (void) VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p); - sp->sw_flags &= ~SW_FREED; - return (0); /* XXX error? */ + + s = splbio(); + +out: /* Arrive here at splbio */ + vnx->vx_flags &= ~VX_BUSY; + if (vnx->vx_pending == 0) { + if (vnx->vx_error != 0) { + bp->b_error = vnx->vx_error; + bp->b_flags |= B_ERROR; + } + putvndxfer(vnx); + biodone(bp); } -#ifdef SEQSWAP - if (sp->sw_flags & SW_SEQUENTIAL) { - register struct swdevt *swp; - - blk = niswap; - for (swp = &swdevt[niswdev]; swp != sp; swp++) - blk += swp->sw_nblks; - rmfree(swapmap, nblks, blk); - return (0); + splx(s); +} + +/* + * Feed requests sequentially. + * We do it this way to keep from flooding NFS servers if we are connected + * to an NFS file. This places the burden on the client rather than the + * server. + */ +STATIC void +sw_reg_start(sdp) + struct swapdev *sdp; +{ + struct buf *bp; + + if ((sdp->swd_flags & SWF_BUSY) != 0) + /* Recursion control */ + return; + + sdp->swd_flags |= SWF_BUSY; + + while (sdp->swd_tab.b_active < sdp->swd_maxactive) { + bp = sdp->swd_tab.b_actf; + if (bp == NULL) + break; + sdp->swd_tab.b_actf = bp->b_actf; + sdp->swd_tab.b_active++; + + DPRINTF(VMSDB_SWFLOW, + ("sw_reg_start: bp %p vp %p blkno %x addr %p cnt %lx\n", + bp, bp->b_vp, bp->b_blkno,bp->b_data, bp->b_bcount)); + + if ((bp->b_flags & B_READ) == 0) + bp->b_vp->v_numoutput++; + VOP_STRATEGY(bp); } -#endif - for (dvbase = 0; dvbase < nblks; dvbase += dmmax) { - blk = nblks - dvbase; -#ifdef SEQSWAP - if ((vsbase = index*dmmax + dvbase*niswdev) >= niswap) - panic("swfree"); -#else - if ((vsbase = index*dmmax + dvbase*nswdev) >= nswap) - panic("swfree"); -#endif - if (blk > dmmax) - blk = dmmax; - if (vsbase == 0) { - /* - * First of all chunks... initialize the swapmap. - * Don't use the first cluster of the device - * in case it starts with a label or boot block. - */ - rminit(swapmap, blk - ctod(btoc(SWAPSKIPBYTES)), - vsbase + ctod(btoc(SWAPSKIPBYTES)), "swap", nswapmap); - } else if (dvbase == 0) { - /* - * Don't use the first cluster of the device - * in case it starts with a label or boot block. - */ - rmfree(swapmap, blk - ctod(btoc(SWAPSKIPBYTES)), - vsbase + ctod(btoc(SWAPSKIPBYTES))); - } else - rmfree(swapmap, blk, vsbase); + sdp->swd_flags &= ~SWF_BUSY; +} + +STATIC void +sw_reg_iodone(bp) + struct buf *bp; +{ + register struct vndbuf *vbp = BUF_TO_VNDBUF(bp); + register struct vndxfer *vnx = (struct vndxfer *)vbp->vb_xfer; + register struct buf *pbp = vnx->vx_bp; + struct swapdev *sdp = vnx->vx_sdp; + int s, resid; + + DPRINTF(VMSDB_SWFLOW, + ("sw_reg_iodone: vbp %p vp %p blkno %x addr %p " + "cnt %lx(%lx)\n", + vbp, vbp->vb_buf.b_vp, vbp->vb_buf.b_blkno, + vbp->vb_buf.b_data, vbp->vb_buf.b_bcount, + vbp->vb_buf.b_resid)); + + s = splbio(); + resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid; + pbp->b_resid -= resid; + vnx->vx_pending--; + + if (vbp->vb_buf.b_error) { + DPRINTF(VMSDB_INFO, ("sw_reg_iodone: vbp %p error %d\n", vbp, + vbp->vb_buf.b_error)); + + vnx->vx_error = vbp->vb_buf.b_error; } + if (vbp->vb_buf.b_vp != NULLVP) + brelvp(&vbp->vb_buf); + + putvndbuf(vbp); + /* - * Preserve the mini-root if appropriate: - * Note: this requires !SEQSWAP && nswdev==1 - * - * A mini-root gets copied into the front of the swap - * and we run over top of the swap area just long - * enough for us to do a mkfs and restor of the real - * root (sure beats rewriting standalone restor). + * Wrap up this transaction if it has run to completion or, in + * case of an error, when all auxiliary buffers have returned. */ - if (vp == rootvp) { -#ifndef MINIROOTSIZE - struct mount *mp; - struct statfs *sp; -#endif - long firstblk; - int rootblks; + if (vnx->vx_error != 0) { + pbp->b_flags |= B_ERROR; + pbp->b_error = vnx->vx_error; + if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) { -#ifdef MINIROOTSIZE - rootblks = MINIROOTSIZE; -#else - /* Get size from root FS (mountroot did statfs) */ - mp = rootvnode->v_mount; - sp = &mp->mnt_stat; - rootblks = sp->f_blocks * (sp->f_bsize / DEV_BSIZE); + DPRINTF(VMSDB_SWFLOW, + ("swiodone: pbp %p iodone: error %d\n", + pbp, vnx->vx_error)); + putvndxfer(vnx); + biodone(pbp); + } + } else if (pbp->b_resid == 0) { + +#ifdef DIAGNOSTIC + if (vnx->vx_pending != 0) + panic("swiodone: vnx pending: %d", vnx->vx_pending); #endif - if (rootblks > nblks) - panic("swfree miniroot size"); - /* First ctod(btoc(SWAPSKIPBYTES)) blocks are not in the map. */ - firstblk = rmalloc(swapmap, rootblks - ctod(btoc(SWAPSKIPBYTES))); - if (firstblk != ctod(btoc(SWAPSKIPBYTES))) - panic("swfree miniroot save"); - printf("Preserved %d blocks of miniroot leaving %d pages of swap\n", - rootblks, dtoc(nblks - rootblks)); + + if ((vnx->vx_flags & VX_BUSY) == 0) { + DPRINTF(VMSDB_SWFLOW, + ("swiodone: pbp %p iodone\n", pbp)); + putvndxfer(vnx); + biodone(pbp); + } } - return (0); + sdp->swd_tab.b_active--; + sw_reg_start(sdp); + + splx(s); } +#endif /* SWAP_TO_FILES */ -int -sys_omsync(p, v, retval) - struct proc *p; - void *v; - register_t *retval; +void +swapinit() { - struct sys_msync_args ua; - struct sys_omsync_args /* { - syscallarg(caddr_t) addr; - syscallarg(size_t) len; - } */ *uap = v; - - SCARG(&ua, addr) = SCARG(uap, addr);; - SCARG(&ua, len) = SCARG(uap, len);; - SCARG(&ua, flags) = MS_SYNC | MS_INVALIDATE; - return (sys_msync(p, &ua, retval)); + struct buf *sp = swbuf; + struct proc *p = &proc0; /* XXX */ + int i; + + DPRINTF(VMSDB_SWINIT, ("swapinit\n")); + + nswapdev = 0; + if (bdevvp(swapdev, &swapdev_vp)) + panic("swapinit: can not setup swapdev_vp"); + + simple_lock_init(&swaplist_lock); + lockinit(&swaplist_change_lock, PSWP, "swap change", 0, 0); + LIST_INIT(&swap_priority); + + /* + * Create swap block resource map. The range [1..INT_MAX] allows + * for a grand total of 2 gigablocks of swap resource. + * (start at 1 because "block #0" will be interpreted as + * an allocation failure). + */ + swapmap = extent_create("swapmap", 1, INT_MAX, + M_VMSWAP, 0, 0, EX_WAITOK); + if (swapmap == 0) + panic("swapinit: extent_create failed"); + + /* + * Now set up swap buffer headers. + */ + bswlist.b_actf = sp; + for (i = 0; i < nswbuf - 1; i++, sp++) { + sp->b_actf = sp + 1; + sp->b_rcred = sp->b_wcred = p->p_ucred; + sp->b_vnbufs.le_next = NOLIST; + } + sp->b_rcred = sp->b_wcred = p->p_ucred; + sp->b_vnbufs.le_next = NOLIST; + sp->b_actf = NULL; + + /* Mount primary swap if available */ +#ifdef SWAPDEBUG + if(vmswap_domount) +#endif + swapmount(); + + DPRINTF(VMSDB_SWINIT, ("leaving swapinit\n")); +} + +/* + * Mount the primary swap device pointed to by 'swdevt[0]'. + */ +STATIC void +swapmount() +{ + extern int getdevvp(dev_t, struct vnode **, enum vtype); + struct swapdev *sdp; + struct vnode *vp = NULL; + struct proc *p = curproc; + dev_t swap_dev = swdevt[0].sw_dev; + + /* Make sure we have a device */ + if (swap_dev == NODEV) { + printf("swapmount: No swap device!\n"); + return; + } + + /* Malloc needed things */ + sdp = (struct swapdev *)malloc(sizeof *sdp, M_VMSWAP, M_WAITOK); + bzero(sdp, sizeof(*sdp)); + + /* Do swap_on() stuff */ + if(bdevvp(swap_dev, &vp)){ + printf("swapmount: bdevvp() failed\n"); + return; + } + +#ifdef SWAPDEBUG + vprint("swapmount", vp); +#endif + + sdp->swd_vp = vp; + sdp->swd_dev = (vp->v_type == VBLK) ? vp->v_rdev : NODEV; + if(copystr("swap_device", sdp->swd_path, sizeof sdp->swd_path, 0) != 0){ + printf("swapmount: copystr() failed\n"); + return; + } + + /* Look for a swap device */ + printf("Adding swap(%d, %d):", major(swap_dev), minor(swap_dev)); + + if (swap_on(p, sdp) != 0) { + printf(" failed!\n"); + free((caddr_t)sdp, M_VMSWAP); + return; + } else + printf(" done.\n"); +#ifdef SWAP_TO_FILES + /* + * XXX Is NFS elaboration necessary? + */ + if (vp->v_type == VREG) + sdp->swd_cred = crdup(p->p_ucred); +#endif + insert_swapdev(sdp, 0); } |