summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTobias Weingartner <weingart@cvs.openbsd.org>1999-05-22 21:22:35 +0000
committerTobias Weingartner <weingart@cvs.openbsd.org>1999-05-22 21:22:35 +0000
commit35c377bf5315fb3e23e1c5b7e8af00733bed7db0 (patch)
tree5ab464baa96068a0b4eeb167b4514387057f3f90
parentaa079fadbadf6efd9c150afdd60894563611277c (diff)
Add new vm_swap code for dynamic swap. From netbsd, munged some by me, and
others. syscall commit pending.
-rw-r--r--sys/arch/alpha/alpha/machdep.c3
-rw-r--r--sys/arch/amiga/amiga/machdep.c3
-rw-r--r--sys/arch/arc/arc/machdep.c5
-rw-r--r--sys/arch/arm32/arm32/machdep.c3
-rw-r--r--sys/arch/atari/atari/machdep.c1
-rw-r--r--sys/arch/hp300/hp300/machdep.c3
-rw-r--r--sys/arch/i386/i386/machdep.c3
-rw-r--r--sys/arch/kbus/kbus/machdep.c1
-rw-r--r--sys/arch/mac68k/mac68k/machdep.c3
-rw-r--r--sys/arch/mvme68k/mvme68k/machdep.c3
-rw-r--r--sys/arch/mvme88k/mvme88k/machdep.c3
-rw-r--r--sys/arch/pc532/pc532/machdep.c1
-rw-r--r--sys/arch/pmax/pmax/machdep.c1
-rw-r--r--sys/arch/powerpc/powerpc/machdep.c3
-rw-r--r--sys/arch/sparc/sparc/machdep.c3
-rw-r--r--sys/arch/sun3/sun3/machdep.c3
-rw-r--r--sys/arch/vax/vax/machdep.c3
-rw-r--r--sys/arch/wgrisc/wgrisc/machdep.c5
-rw-r--r--sys/compat/common/Makefile8
-rw-r--r--sys/compat/common/compat_vm.c73
-rw-r--r--sys/sys/map.h11
-rw-r--r--sys/sys/swap.h72
-rw-r--r--sys/vm/swap_pager.c63
-rw-r--r--sys/vm/vm_swap.c1539
24 files changed, 1341 insertions, 475 deletions
diff --git a/sys/arch/alpha/alpha/machdep.c b/sys/arch/alpha/alpha/machdep.c
index 828f3f42ef9..50facc3c27e 100644
--- a/sys/arch/alpha/alpha/machdep.c
+++ b/sys/arch/alpha/alpha/machdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: machdep.c,v 1.26 1999/03/24 22:56:13 alex Exp $ */
+/* $OpenBSD: machdep.c,v 1.27 1999/05/22 21:22:17 weingart Exp $ */
/* $NetBSD: machdep.c,v 1.61 1996/12/07 01:54:49 cgd Exp $ */
/*
@@ -445,7 +445,6 @@ unknown_cputype:
valloc(cfree, struct cblock, nclist);
#endif
valloc(callout, struct callout, ncallout);
- valloc(swapmap, struct map, nswapmap = maxproc * 2);
#ifdef SYSVSHM
valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
#endif
diff --git a/sys/arch/amiga/amiga/machdep.c b/sys/arch/amiga/amiga/machdep.c
index 26f83ac34f0..e9de602d790 100644
--- a/sys/arch/amiga/amiga/machdep.c
+++ b/sys/arch/amiga/amiga/machdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: machdep.c,v 1.31 1999/01/20 12:06:52 niklas Exp $ */
+/* $OpenBSD: machdep.c,v 1.32 1999/05/22 21:22:18 weingart Exp $ */
/* $NetBSD: machdep.c,v 1.95 1997/08/27 18:31:17 is Exp $ */
/*
@@ -363,7 +363,6 @@ again:
(name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
/* valloc(cfree, struct cblock, nclist); */
valloc(callout, struct callout, ncallout);
- valloc(swapmap, struct map, nswapmap = maxproc * 2);
#ifdef SYSVSHM
valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
#endif
diff --git a/sys/arch/arc/arc/machdep.c b/sys/arch/arc/arc/machdep.c
index 8c6386a2cd7..f991f33ae9c 100644
--- a/sys/arch/arc/arc/machdep.c
+++ b/sys/arch/arc/arc/machdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: machdep.c,v 1.35 1999/01/30 22:39:31 imp Exp $ */
+/* $OpenBSD: machdep.c,v 1.36 1999/05/22 21:22:19 weingart Exp $ */
/*
* Copyright (c) 1988 University of Utah.
* Copyright (c) 1992, 1993
@@ -38,7 +38,7 @@
* SUCH DAMAGE.
*
* from: @(#)machdep.c 8.3 (Berkeley) 1/12/94
- * $Id: machdep.c,v 1.35 1999/01/30 22:39:31 imp Exp $
+ * $Id: machdep.c,v 1.36 1999/05/22 21:22:19 weingart Exp $
*/
/* from: Utah Hdr: machdep.c 1.63 91/04/24 */
@@ -483,7 +483,6 @@ mips_init(argc, argv, envv)
valloc(cfree, struct cblock, nclist);
#endif
valloc(callout, struct callout, ncallout);
- valloc(swapmap, struct map, nswapmap = maxproc * 2);
#ifdef SYSVSHM
valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
#endif
diff --git a/sys/arch/arm32/arm32/machdep.c b/sys/arch/arm32/arm32/machdep.c
index bb2f341abf1..91c7331ee35 100644
--- a/sys/arch/arm32/arm32/machdep.c
+++ b/sys/arch/arm32/arm32/machdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: machdep.c,v 1.4 1999/01/11 05:11:11 millert Exp $ */
+/* $OpenBSD: machdep.c,v 1.5 1999/05/22 21:22:20 weingart Exp $ */
/* $NetBSD: machdep.c,v 1.6 1996/03/13 21:32:39 mark Exp $ */
/*
@@ -1472,7 +1472,6 @@ allocsys(v)
v = (caddr_t)((name) + (num));
valloc(callout, struct callout, ncallout);
- valloc(swapmap, struct map, nswapmap = maxproc * 2);
#ifdef SYSVSHM
valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
diff --git a/sys/arch/atari/atari/machdep.c b/sys/arch/atari/atari/machdep.c
index 9e57142defa..8bebb10f257 100644
--- a/sys/arch/atari/atari/machdep.c
+++ b/sys/arch/atari/atari/machdep.c
@@ -213,7 +213,6 @@ again:
(name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
/* valloc(cfree, struct cblock, nclist); */
valloc(callout, struct callout, ncallout);
- valloc(swapmap, struct map, nswapmap = maxproc * 2);
#ifdef SYSVSHM
valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
#endif
diff --git a/sys/arch/hp300/hp300/machdep.c b/sys/arch/hp300/hp300/machdep.c
index 50660fa8df9..0f269f318d9 100644
--- a/sys/arch/hp300/hp300/machdep.c
+++ b/sys/arch/hp300/hp300/machdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: machdep.c,v 1.36 1999/04/23 03:11:55 downsj Exp $ */
+/* $OpenBSD: machdep.c,v 1.37 1999/05/22 21:22:22 weingart Exp $ */
/* $NetBSD: machdep.c,v 1.94 1997/06/12 15:46:29 mrg Exp $ */
/*
@@ -401,7 +401,6 @@ allocsys(v)
valloc(cfree, struct cblock, nclist);
#endif
valloc(callout, struct callout, ncallout);
- valloc(swapmap, struct map, nswapmap = maxproc * 2);
#ifdef SYSVSHM
valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
#endif
diff --git a/sys/arch/i386/i386/machdep.c b/sys/arch/i386/i386/machdep.c
index c4f7bf6325d..57b9aeaa243 100644
--- a/sys/arch/i386/i386/machdep.c
+++ b/sys/arch/i386/i386/machdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: machdep.c,v 1.106 1999/05/09 15:09:04 mickey Exp $ */
+/* $OpenBSD: machdep.c,v 1.107 1999/05/22 21:22:23 weingart Exp $ */
/* $NetBSD: machdep.c,v 1.214 1996/11/10 03:16:17 thorpej Exp $ */
/*-
@@ -447,7 +447,6 @@ allocsys(v)
valloc(cfree, struct cblock, nclist);
#endif
valloc(callout, struct callout, ncallout);
- valloc(swapmap, struct map, nswapmap = maxproc * 2);
#ifdef SYSVSHM
valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
#endif
diff --git a/sys/arch/kbus/kbus/machdep.c b/sys/arch/kbus/kbus/machdep.c
index da0cd04a5c6..f7876bb310d 100644
--- a/sys/arch/kbus/kbus/machdep.c
+++ b/sys/arch/kbus/kbus/machdep.c
@@ -508,7 +508,6 @@ allocsys(v)
#define valloclim(name, type, num, lim) \
(name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
valloc(callout, struct callout, ncallout);
- valloc(swapmap, struct map, nswapmap = maxproc * 2);
#ifdef SYSVSHM
valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
#endif
diff --git a/sys/arch/mac68k/mac68k/machdep.c b/sys/arch/mac68k/mac68k/machdep.c
index 008cc7bb57d..8c313de02a5 100644
--- a/sys/arch/mac68k/mac68k/machdep.c
+++ b/sys/arch/mac68k/mac68k/machdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: machdep.c,v 1.58 1999/04/23 05:15:28 downsj Exp $ */
+/* $OpenBSD: machdep.c,v 1.59 1999/05/22 21:22:25 weingart Exp $ */
/* $NetBSD: machdep.c,v 1.134 1997/02/14 06:15:30 scottr Exp $ */
/*
@@ -363,7 +363,6 @@ again:
valloc(cfree, struct cblock, nclist);
#endif
valloc(callout, struct callout, ncallout);
- valloc(swapmap, struct map, nswapmap = maxproc * 2);
#ifdef SYSVSHM
valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
#endif
diff --git a/sys/arch/mvme68k/mvme68k/machdep.c b/sys/arch/mvme68k/mvme68k/machdep.c
index a30e4a14e13..3d7ebc53607 100644
--- a/sys/arch/mvme68k/mvme68k/machdep.c
+++ b/sys/arch/mvme68k/mvme68k/machdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: machdep.c,v 1.25 1999/02/04 23:00:26 niklas Exp $ */
+/* $OpenBSD: machdep.c,v 1.26 1999/05/22 21:22:26 weingart Exp $ */
/*
* Copyright (c) 1995 Theo de Raadt
@@ -264,7 +264,6 @@ again:
valloc(cfree, struct cblock, nclist);
#endif
valloc(callout, struct callout, ncallout);
- valloc(swapmap, struct map, nswapmap = maxproc * 2);
#ifdef SYSVSHM
valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
#endif
diff --git a/sys/arch/mvme88k/mvme88k/machdep.c b/sys/arch/mvme88k/mvme88k/machdep.c
index 33b121bdcb4..27085bb3023 100644
--- a/sys/arch/mvme88k/mvme88k/machdep.c
+++ b/sys/arch/mvme88k/mvme88k/machdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: machdep.c,v 1.12 1999/04/11 03:26:28 smurph Exp $ */
+/* $OpenBSD: machdep.c,v 1.13 1999/05/22 21:22:27 weingart Exp $ */
/*
* Copyright (c) 1998 Steve Murphree, Jr.
* Copyright (c) 1996 Nivas Madhur
@@ -627,7 +627,6 @@ allocsys(v)
valloc(cfree, struct cblock, nclist);
#endif
valloc(callout, struct callout, ncallout);
- valloc(swapmap, struct map, nswapmap = maxproc * 2);
#ifdef SYSVSHM
valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
#endif
diff --git a/sys/arch/pc532/pc532/machdep.c b/sys/arch/pc532/pc532/machdep.c
index 277237a481d..9462904d85d 100644
--- a/sys/arch/pc532/pc532/machdep.c
+++ b/sys/arch/pc532/pc532/machdep.c
@@ -350,7 +350,6 @@ again:
#define valloclim(name, type, num, lim) \
(name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
valloc(callout, struct callout, ncallout);
- valloc(swapmap, struct map, nswapmap = maxproc * 2);
#ifdef SYSVSHM
valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
#endif
diff --git a/sys/arch/pmax/pmax/machdep.c b/sys/arch/pmax/pmax/machdep.c
index 81adb5a7775..29a7914ed30 100644
--- a/sys/arch/pmax/pmax/machdep.c
+++ b/sys/arch/pmax/pmax/machdep.c
@@ -701,7 +701,6 @@ mach_init(argc, argv, code, cv)
valloc(cfree, struct cblock, nclist);
#endif
valloc(callout, struct callout, ncallout);
- valloc(swapmap, struct map, nswapmap = maxproc * 2);
#ifdef SYSVSHM
valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
#endif
diff --git a/sys/arch/powerpc/powerpc/machdep.c b/sys/arch/powerpc/powerpc/machdep.c
index e6c147b3c57..d2d1037e04b 100644
--- a/sys/arch/powerpc/powerpc/machdep.c
+++ b/sys/arch/powerpc/powerpc/machdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: machdep.c,v 1.22 1999/01/11 05:11:54 millert Exp $ */
+/* $OpenBSD: machdep.c,v 1.23 1999/05/22 21:22:29 weingart Exp $ */
/* $NetBSD: machdep.c,v 1.4 1996/10/16 19:33:11 ws Exp $ */
/*
@@ -444,7 +444,6 @@ allocsys(v)
v = (caddr_t)(((name) = (type *)v) + (num))
valloc(callout, struct callout, ncallout);
- valloc(swapmap, struct map, nswapmap = maxproc * 2);
#ifdef SYSVSHM
valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
#endif
diff --git a/sys/arch/sparc/sparc/machdep.c b/sys/arch/sparc/sparc/machdep.c
index 75d25f3d240..4222480e53c 100644
--- a/sys/arch/sparc/sparc/machdep.c
+++ b/sys/arch/sparc/sparc/machdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: machdep.c,v 1.35 1999/04/22 18:51:49 art Exp $ */
+/* $OpenBSD: machdep.c,v 1.36 1999/05/22 21:22:30 weingart Exp $ */
/* $NetBSD: machdep.c,v 1.85 1997/09/12 08:55:02 pk Exp $ */
/*
@@ -410,7 +410,6 @@ allocsys(v)
#define valloc(name, type, num) \
v = (caddr_t)(((name) = (type *)v) + (num))
valloc(callout, struct callout, ncallout);
- valloc(swapmap, struct map, nswapmap = maxproc * 2);
#ifdef SYSVSHM
valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
#endif
diff --git a/sys/arch/sun3/sun3/machdep.c b/sys/arch/sun3/sun3/machdep.c
index 3f060f938a8..1b0190a635b 100644
--- a/sys/arch/sun3/sun3/machdep.c
+++ b/sys/arch/sun3/sun3/machdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: machdep.c,v 1.22 1999/02/04 23:00:26 niklas Exp $ */
+/* $OpenBSD: machdep.c,v 1.23 1999/05/22 21:22:31 weingart Exp $ */
/* $NetBSD: machdep.c,v 1.77 1996/10/13 03:47:51 christos Exp $ */
/*
@@ -189,7 +189,6 @@ allocsys(v)
valloc(cfree, struct cblock, nclist);
#endif
valloc(callout, struct callout, ncallout);
- valloc(swapmap, struct map, nswapmap = maxproc * 2);
#ifdef SYSVSHM
valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
#endif
diff --git a/sys/arch/vax/vax/machdep.c b/sys/arch/vax/vax/machdep.c
index e1a2b55d96f..2ea640c80eb 100644
--- a/sys/arch/vax/vax/machdep.c
+++ b/sys/arch/vax/vax/machdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: machdep.c,v 1.17 1997/10/02 19:53:20 niklas Exp $ */
+/* $OpenBSD: machdep.c,v 1.18 1999/05/22 21:22:32 weingart Exp $ */
/* $NetBSD: machdep.c,v 1.45 1997/07/26 10:12:49 ragge Exp $ */
/*
@@ -342,7 +342,6 @@ allocsys(v)
valloc(cfree, struct cblock, nclist);
#endif
valloc(callout, struct callout, ncallout);
- valloc(swapmap, struct map, nswapmap = maxproc * 2);
#ifdef SYSVSHM
valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
#endif
diff --git a/sys/arch/wgrisc/wgrisc/machdep.c b/sys/arch/wgrisc/wgrisc/machdep.c
index 5d90de97c66..7135ba0199c 100644
--- a/sys/arch/wgrisc/wgrisc/machdep.c
+++ b/sys/arch/wgrisc/wgrisc/machdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: machdep.c,v 1.4 1997/08/24 12:01:15 pefo Exp $ */
+/* $OpenBSD: machdep.c,v 1.5 1999/05/22 21:22:32 weingart Exp $ */
/*
* Copyright (c) 1988 University of Utah.
* Copyright (c) 1992, 1993
@@ -38,7 +38,7 @@
* SUCH DAMAGE.
*
* from: @(#)machdep.c 8.3 (Berkeley) 1/12/94
- * $Id: machdep.c,v 1.4 1997/08/24 12:01:15 pefo Exp $
+ * $Id: machdep.c,v 1.5 1999/05/22 21:22:32 weingart Exp $
*/
/* from: Utah Hdr: machdep.c 1.63 91/04/24 */
@@ -418,7 +418,6 @@ mips_init(argc, argv, code)
valloc(cfree, struct cblock, nclist);
#endif
valloc(callout, struct callout, ncallout);
- valloc(swapmap, struct map, nswapmap = maxproc * 2);
#ifdef SYSVSHM
valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
#endif
diff --git a/sys/compat/common/Makefile b/sys/compat/common/Makefile
index e681d104d97..9c53d691d07 100644
--- a/sys/compat/common/Makefile
+++ b/sys/compat/common/Makefile
@@ -1,4 +1,4 @@
-# $OpenBSD: Makefile,v 1.6 1997/05/30 09:40:57 niklas Exp $
+# $OpenBSD: Makefile,v 1.7 1999/05/22 21:22:33 weingart Exp $
# $NetBSD: Makefile,v 1.8 1996/05/18 15:52:19 christos Exp $
LIB= compat
@@ -6,9 +6,9 @@ NOPIC=
.PATH: ${COMPATDIR}
-SRCS= compat_exec.c compat_util.c kern_exit_43.c kern_info_09.c \
- kern_info_43.c kern_prot_43.c kern_resource_43.c kern_sig_43.c \
- tty_43.c uipc_syscalls_43.c vfs_syscalls_43.c vm_43.c
+SRCS= compat_exec.c compat_util.c compat_vm.c kern_exit_43.c \
+ kern_info_09.c kern_info_43.c kern_prot_43.c kern_resource_43.c \
+ kern_sig_43.c tty_43.c uipc_syscalls_43.c vfs_syscalls_43.c vm_43.c
# really, all machines were sizeof(int) != sizeof(long)
.if (${MACHINE_ARCH} != "alpha")
diff --git a/sys/compat/common/compat_vm.c b/sys/compat/common/compat_vm.c
new file mode 100644
index 00000000000..68ce0889d69
--- /dev/null
+++ b/sys/compat/common/compat_vm.c
@@ -0,0 +1,73 @@
+/* $OpenBSD: compat_vm.c,v 1.1 1999/05/22 21:22:33 weingart Exp $ */
+/* $NetBSD: vm_12.c,v 1.8 1997/10/20 22:05:22 thorpej Exp $ */
+
+/*
+ * Copyright (c) 1997 Matthew R. Green
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mount.h> /* needed for next include! */
+#include <sys/syscallargs.h>
+
+#include <sys/swap.h>
+#include <sys/mman.h>
+
+int
+sys_swapon(p, v, retval)
+ struct proc *p;
+ void *v;
+ register_t *retval;
+{
+ struct sys_swapctl_args ua;
+ struct sys_swapon_args /* {
+ syscallarg(const char *) name;
+ } */ *uap = v;
+
+ SCARG(&ua, cmd) = SWAP_ON;
+ SCARG(&ua, arg) = (void *)SCARG(uap, name);
+ SCARG(&ua, misc) = 0; /* priority */
+ return (sys_swapctl(p, &ua, retval));
+}
+
+int
+sys_omsync(p, v, retval)
+ struct proc *p;
+ void *v;
+ register_t *retval;
+{
+ struct sys_msync_args ua;
+ struct sys_omsync_args /* {
+ syscallarg(caddr_t) addr;
+ syscallarg(size_t) len;
+ } */ *uap = v;
+
+ SCARG(&ua, addr) = SCARG(uap, addr);;
+ SCARG(&ua, len) = SCARG(uap, len);;
+ SCARG(&ua, flags) = MS_SYNC | MS_INVALIDATE;
+ return (sys_msync(p, &ua, retval));
+}
diff --git a/sys/sys/map.h b/sys/sys/map.h
index 57989b8a8bd..a476f19f54e 100644
--- a/sys/sys/map.h
+++ b/sys/sys/map.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: map.h,v 1.2 1996/03/03 12:11:59 niklas Exp $ */
+/* $OpenBSD: map.h,v 1.3 1999/05/22 21:22:33 weingart Exp $ */
/* $NetBSD: map.h,v 1.10 1995/09/15 05:32:45 jtc Exp $ */
/*-
@@ -64,6 +64,10 @@
* N.B.: The address 0 in the resource address space is not available
* as it is used internally by the resource map routines.
*/
+
+#ifndef _SYS_MAP_H_
+#define _SYS_MAP_H_
+
struct map {
struct mapent *m_limit; /* first slot beyond map */
char *m_name; /* name of resource, for messages */
@@ -75,10 +79,9 @@ struct mapent {
};
#ifdef _KERNEL
-struct map *swapmap;
-int nswapmap;
long rmalloc __P((struct map *, long));
void rmfree __P((struct map *, long, long));
void rminit __P((struct map *, long, long, char *, int));
-#endif
+#endif /* _KERNEL */
+#endif /* _SYS_MAP_H_ */
diff --git a/sys/sys/swap.h b/sys/sys/swap.h
new file mode 100644
index 00000000000..212eea90768
--- /dev/null
+++ b/sys/sys/swap.h
@@ -0,0 +1,72 @@
+/* $OpenBSD: swap.h,v 1.1 1999/05/22 21:22:34 weingart Exp $ */
+/* $NetBSD: swap.h,v 1.2 1998/09/13 14:46:24 christos Exp $ */
+
+/*
+ * Copyright (c) 1995, 1996, 1998 Matthew R. Green, Tobias Weingartner
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* NOTE: This is the current swap.h from NetBSD. Since we are "upgrading"
+ * to the new vm_swap code now, we will not keep compatibility with the
+ * old vm_swap code that was in NetBSD. This means that we do not have
+ * an oswapent structure, but instead use a "new" swapent structure, with
+ * no overlay.
+ *
+ * --Toby.
+ */
+
+#ifndef _SYS_SWAP_H_
+#define _SYS_SWAP_H_
+
+#include <sys/syslimits.h>
+
+/* These structures are used to return swap information for userland */
+struct swapent {
+ dev_t se_dev; /* device id */
+ int se_flags; /* flags */
+ int se_nblks; /* total blocks */
+ int se_inuse; /* blocks in use */
+ int se_priority; /* priority of this device */
+ char se_path[PATH_MAX+1]; /* path name */
+};
+
+#define SWAP_ON 1 /* begin swapping on device */
+#define SWAP_OFF 2 /* (stop swapping on device) */
+#define SWAP_NSWAP 3 /* how many swap devices ? */
+#define SWAP_STATS 4 /* get device info */
+#define SWAP_CTL 5 /* change priority on device */
+
+#define SWF_INUSE 0x00000001 /* in use: we have swapped here */
+#define SWF_ENABLE 0x00000002 /* enabled: we can swap here */
+#define SWF_BUSY 0x00000004 /* busy: I/O happening here */
+#define SWF_FAKE 0x00000008 /* fake: still being built */
+
+#if defined(_KERNEL) && !defined(UVM)
+daddr_t swap_alloc __P((int size));
+void swap_free __P((int size, daddr_t addr));
+void swapinit __P((void));
+#endif
+
+#endif /* _SYS_SWAP_H_ */
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index 8eb5e60d2b8..dd1c8b6f717 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: swap_pager.c,v 1.15 1999/02/08 01:10:58 art Exp $ */
+/* $OpenBSD: swap_pager.c,v 1.16 1999/05/22 21:22:34 weingart Exp $ */
/* $NetBSD: swap_pager.c,v 1.27 1996/03/16 23:15:20 christos Exp $ */
/*
@@ -55,8 +55,10 @@
#include <sys/proc.h>
#include <sys/buf.h>
#include <sys/map.h>
+#include <sys/simplelock.h>
#include <sys/vnode.h>
#include <sys/malloc.h>
+#include <sys/swap.h>
#include <miscfs/specfs/specdev.h>
@@ -65,6 +67,7 @@
#include <vm/vm_pageout.h>
#include <vm/swap_pager.h>
+/* XXX this makes the max swap devices 16 */
#define NSWSIZES 16 /* size of swtab */
#define MAXDADDRS 64 /* max # of disk addrs for fixed allocations */
#ifndef NPENDINGIO
@@ -169,10 +172,8 @@ struct pagerops swappagerops = {
static void
swap_pager_init()
{
- register swp_clean_t spc;
- register int i, bsize;
- extern int dmmin, dmmax;
- int maxbsize;
+ swp_clean_t spc;
+ int i, maxbsize, bsize;
#ifdef DEBUG
if (swpagerdebug & (SDB_FOLLOW|SDB_INIT))
@@ -201,42 +202,34 @@ swap_pager_init()
spc->spc_flags = SPC_FREE;
}
+/* this needs to be at least ctod(1) for all ports for vtod() to work */
+#define DMMIN 32
/*
- * Calculate the swap allocation constants.
- */
- if (dmmin == 0) {
- dmmin = DMMIN;
- if (dmmin < CLBYTES/DEV_BSIZE)
- dmmin = CLBYTES/DEV_BSIZE;
- }
- if (dmmax == 0)
- dmmax = DMMAX;
-
- /*
- * Fill in our table of object size vs. allocation size
+ * Fill in our table of object size vs. allocation size. bsize needs
+ * to be at least ctod(1) for all ports for vtod() to work, with a
+ * bare minimum of 32.
*/
- bsize = btodb(PAGE_SIZE);
- if (bsize < dmmin)
- bsize = dmmin;
+#define max(a, b) ((a) > (b) ? (a) : (b))
+ bsize = max(32, max(ctod(1), btodb(PAGE_SIZE)));
maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE);
- if (maxbsize > dmmax)
- maxbsize = dmmax;
+ if (maxbsize > NBPG)
+ maxbsize = NBPG;
for (i = 0; i < NSWSIZES; i++) {
- swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize));
- swtab[i].st_bsize = bsize;
if (bsize <= btodb(MAXPHYS))
swap_pager_maxcluster = dbtob(bsize);
+ swtab[i].st_bsize = bsize;
+ if (bsize >= maxbsize) {
+ swtab[i].st_osize = 0;
+ break;
+ }
+ swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize));
#ifdef DEBUG
if (swpagerdebug & SDB_INIT)
printf("swpg_init: ix %d, size %lx, bsize %x\n",
i, swtab[i].st_osize, swtab[i].st_bsize);
#endif
- if (bsize >= maxbsize)
- break;
bsize *= 2;
}
- swtab[i].st_osize = 0;
- swtab[i].st_bsize = bsize;
}
/*
@@ -407,7 +400,7 @@ swap_pager_dealloc(pager)
printf("swpg_dealloc: blk %x\n",
bp->swb_block);
#endif
- rmfree(swapmap, swp->sw_bsize, bp->swb_block);
+ swap_free(swp->sw_bsize, bp->swb_block);
}
/*
* Free swap management resources
@@ -462,7 +455,6 @@ swap_pager_putpage(pager, mlist, npages, sync)
int npages;
boolean_t sync;
{
- int flags;
#ifdef DEBUG
if (swpagerdebug & SDB_FOLLOW)
@@ -473,11 +465,8 @@ swap_pager_putpage(pager, mlist, npages, sync)
swap_pager_clean(B_WRITE);
return (VM_PAGER_OK); /* ??? */
}
- flags = B_WRITE;
- if (!sync)
- flags |= B_ASYNC;
return (swap_pager_io((sw_pager_t)pager->pg_data, mlist, npages,
- flags));
+ B_WRITE | (sync ? 0 : B_ASYNC)));
}
static boolean_t
@@ -656,7 +645,7 @@ swap_pager_io(swp, mlist, npages, flags)
* Allocate a swap block if necessary.
*/
if (swb->swb_block == 0) {
- swb->swb_block = rmalloc(swapmap, swp->sw_bsize);
+ swb->swb_block = swap_alloc(swp->sw_bsize);
if (swb->swb_block == 0) {
#ifdef DEBUG
if (swpagerdebug & SDB_FAIL)
@@ -727,8 +716,8 @@ swap_pager_io(swp, mlist, npages, flags)
bp->b_dirtyoff = 0;
bp->b_dirtyend = npages * PAGE_SIZE;
s = splbio();
- swp->sw_poip++;
swapdev_vp->v_numoutput++;
+ swp->sw_poip++;
splx(s);
mask = (~(~0 << npages)) << atop(off);
#ifdef DEBUG
@@ -1139,7 +1128,7 @@ swap_pager_remove(pager, from, to)
* means no pages are left in the block, free it.
*/
if ((swb->swb_mask &= mask) == 0) {
- rmfree(swapmap, swp->sw_bsize, swb->swb_block);
+ swap_free(swp->sw_bsize, swb->swb_block);
swb->swb_block = 0;
}
}
diff --git a/sys/vm/vm_swap.c b/sys/vm/vm_swap.c
index 4a8f1026b73..1d80eb8e421 100644
--- a/sys/vm/vm_swap.c
+++ b/sys/vm/vm_swap.c
@@ -1,9 +1,9 @@
-/* $OpenBSD: vm_swap.c,v 1.8 1997/12/02 16:55:52 csapuntz Exp $ */
-/* $NetBSD: vm_swap.c,v 1.32 1996/02/05 01:54:09 christos Exp $ */
+/* $OpenBSD: vm_swap.c,v 1.9 1999/05/22 21:22:34 weingart Exp $ */
+/* $NetBSD: vm_swap.c,v 1.64 1998/11/08 19:45:17 mycroft Exp $ */
/*
- * Copyright (c) 1982, 1986, 1989, 1993
- * The Regents of the University of California. All rights reserved.
+ * Copyright (c) 1995, 1996, 1997 Matthew R. Green, Tobias Weingartner
+ * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -13,27 +13,19 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
*
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)vm_swap.c 8.5 (Berkeley) 2/17/94
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/param.h>
@@ -41,225 +33,804 @@
#include <sys/buf.h>
#include <sys/proc.h>
#include <sys/namei.h>
-#include <sys/dmap.h> /* XXX */
+#include <sys/disklabel.h>
+#include <sys/dmap.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/lock.h>
#include <sys/vnode.h>
#include <sys/map.h>
#include <sys/file.h>
-#include <sys/mman.h>
-
+#include <sys/stat.h>
+#include <sys/extent.h>
+#include <sys/swap.h>
#include <sys/mount.h>
#include <sys/syscallargs.h>
-#include <vm/vm.h>
+#include <machine/vmparam.h>
+
#include <vm/vm_conf.h>
#include <miscfs/specfs/specdev.h>
/*
- * Indirect driver for multi-controller paging.
+ * The idea here is to provide a single interface for multiple swap devices,
+ * of any kind and priority in a simple and fast way.
+ *
+ * Each swap device has these properties:
+ * * swap in use.
+ * * swap enabled.
+ * * map information in `/dev/drum'.
+ * * vnode pointer.
+ * Files have these additional properties:
+ * * block size.
+ * * maximum byte count in buffer.
+ * * buffer.
+ * * credentials.
+ *
+ * The arguments to swapctl(2) are:
+ * int cmd;
+ * void *arg;
+ * int misc;
+ * The cmd can be one of:
+ * SWAP_NSWAP - swapctl(2) returns the number of swap devices currently in
+ * use.
+ * SWAP_STATS - swapctl(2) takes a struct ent * in (void *arg) and writes
+ * misc or fewer (to zero) entries of configured swap devices,
+ * and returns the number of entries written or -1 on error.
+ * SWAP_ON - swapctl(2) takes a (char *) in arg to be the pathname of a
+ * device or file to begin swapping on, with it's priority in
+ * misc, returning 0 on success and -1 on error.
+ * SWAP_OFF - swapctl(2) takes a (char *) n arg to be the pathname of a
+ * device or file to stop swapping on. returning 0 or -1.
+ * XXX unwritten.
+ * SWAP_CTL - swapctl(2) changes the priority of a swap device, using the
+ * misc value.
+ */
+
+#ifdef SWAPDEBUG
+#define STATIC
+#define VMSDB_SWON 0x0001
+#define VMSDB_SWOFF 0x0002
+#define VMSDB_SWINIT 0x0004
+#define VMSDB_SWALLOC 0x0008
+#define VMSDB_SWFLOW 0x0010
+#define VMSDB_INFO 0x0020
+int vmswapdebug = 0;
+int vmswap_domount = 1;
+
+#define DPRINTF(f, m) do { \
+ if (vmswapdebug & (f)) \
+ printf m; \
+} while(0)
+#else
+#define STATIC static
+#define DPRINTF(f, m)
+#endif
+
+#define SWAP_TO_FILES
+
+struct swapdev {
+ struct swapent swd_se;
+#define swd_dev swd_se.se_dev
+#define swd_flags swd_se.se_flags
+#define swd_nblks swd_se.se_nblks
+#define swd_inuse swd_se.se_inuse
+#define swd_priority swd_se.se_priority
+#define swd_path swd_se.se_path
+ daddr_t swd_mapoffset;
+ int swd_mapsize;
+ struct extent *swd_ex;
+ struct vnode *swd_vp;
+ CIRCLEQ_ENTRY(swapdev) swd_next;
+
+#ifdef SWAP_TO_FILES
+ int swd_bsize;
+ int swd_maxactive;
+ struct buf swd_tab;
+ struct ucred *swd_cred;
+#endif
+};
+
+/*
+ * Swap device priority entry; the list is kept sorted on `spi_priority'.
*/
+struct swappri {
+ int spi_priority;
+ CIRCLEQ_HEAD(spi_swapdev, swapdev) spi_swapdev;
+ LIST_ENTRY(swappri) spi_swappri;
+};
+
+
+
+
+/*
+ * The following two structures are used to keep track of data transfers
+ * on swap devices associated with regular files.
+ * NOTE: this code is more or less a copy of vnd.c; we use the same
+ * structure names here to ease porting..
+ */
+
+
+struct vndxfer {
+ struct buf *vx_bp; /* Pointer to parent buffer */
+ struct swapdev *vx_sdp;
+ int vx_error;
+ int vx_pending; /* # of pending aux buffers */
+ int vx_flags;
+#define VX_BUSY 1
+#define VX_DEAD 2
+};
+
+
+struct vndbuf {
+ struct buf vb_buf;
+ struct vndxfer *vb_xfer;
+};
-int nswap, nswdev;
-#ifdef SEQSWAP
-int niswdev; /* number of interleaved swap devices */
-int niswap; /* size of interleaved swap area */
+/* To get from a buffer to the encapsulating vndbuf */
+#define BUF_TO_VNDBUF(bp) \
+ ((struct vndbuf *)((long)bp - ((long)&((struct vndbuf *)0)->vb_buf)))
+
+/* vnd macro stuff, rewritten to use malloc()/free() */
+#define getvndxfer() \
+ (struct vndxfer *)malloc(sizeof(struct vndxfer), M_VMSWAP, M_WAITOK);
+
+#define putvndxfer(vnx) \
+ free(vnx, M_VMSWAP)
+
+#define getvndbuf() \
+ (struct vndbuf *)malloc(sizeof(struct vndbuf), M_VMSWAP, M_WAITOK);
+
+#define putvndbuf(vbp) \
+ free(vbp, M_VMSWAP)
+
+
+int nswapdev;
+int swflags;
+struct extent *swapmap;
+LIST_HEAD(swap_priority, swappri) swap_priority;
+
+STATIC int swap_on __P((struct proc *, struct swapdev *));
+#ifdef SWAP_OFF_WORKS
+STATIC int swap_off __P((struct proc *, struct swapdev *));
#endif
+STATIC struct swapdev *swap_getsdpfromaddr __P((daddr_t));
+STATIC void swap_addmap __P((struct swapdev *, int));
-int swfree __P((struct proc *, int));
+#ifdef SWAP_TO_FILES
+STATIC void sw_reg_strategy __P((struct swapdev *, struct buf *, int));
+STATIC void sw_reg_iodone __P((struct buf *));
+STATIC void sw_reg_start __P((struct swapdev *));
+#endif
+
+STATIC void insert_swapdev __P((struct swapdev *, int));
+STATIC struct swapdev *find_swapdev __P((struct vnode *, int));
+STATIC void swaplist_trim __P((void));
+
+STATIC void swapmount __P((void));
+
+/*
+ * We use two locks to protect the swap device lists.
+ * The long-term lock is used only used to prevent races in
+ * concurrently executing swapctl(2) system calls.
+ */
+struct simplelock swaplist_lock;
+struct lock swaplist_change_lock;
/*
- * Set up swap devices.
- * Initialize linked list of free swap
- * headers. These do not actually point
- * to buffers, but rather to pages that
- * are being swapped in and out.
+ * Insert a swap device on the priority list.
*/
void
-swapinit()
+insert_swapdev(sdp, priority)
+ struct swapdev *sdp;
+ int priority;
{
- register int i;
- register struct buf *sp = swbuf;
- register struct proc *p = &proc0; /* XXX */
- struct swdevt *swp;
- int error;
+ struct swappri *spp, *pspp;
+
+again:
+ simple_lock(&swaplist_lock);
/*
- * Count swap devices, and adjust total swap space available.
- * Some of the space will not be countable until later (dynamically
- * configurable devices) and some of the counted space will not be
- * available until a swapon() system call is issued, both usually
- * happen when the system goes multi-user.
- *
- * If using NFS for swap, swdevt[0] will already be bdevvp'd. XXX
- */
-#ifdef SEQSWAP
- nswdev = niswdev = 0;
- nswap = niswap = 0;
- /*
- * All interleaved devices must come first
+ * Find entry at or after which to insert the new device.
*/
- for (swp = swdevt; swp->sw_dev != NODEV || swp->sw_vp != NULL; swp++) {
- if (swp->sw_flags & SW_SEQUENTIAL)
+ for (pspp = NULL, spp = swap_priority.lh_first; spp != NULL;
+ spp = spp->spi_swappri.le_next) {
+ if (priority <= spp->spi_priority)
break;
- niswdev++;
- if (swp->sw_nblks > niswap)
- niswap = swp->sw_nblks;
+ pspp = spp;
}
- niswap = roundup(niswap, dmmax);
- niswap *= niswdev;
- if (swdevt[0].sw_vp == NULL &&
- bdevvp(swdevt[0].sw_dev, &swdevt[0].sw_vp))
- panic("swapvp");
- /*
- * The remainder must be sequential
- */
- for ( ; swp->sw_dev != NODEV; swp++) {
- if ((swp->sw_flags & SW_SEQUENTIAL) == 0)
- panic("binit: mis-ordered swap devices");
- nswdev++;
- if (swp->sw_nblks > 0) {
- if (swp->sw_nblks % dmmax)
- swp->sw_nblks -= (swp->sw_nblks % dmmax);
- nswap += swp->sw_nblks;
+
+ if (spp == NULL || spp->spi_priority != priority) {
+ spp = (struct swappri *)
+ malloc(sizeof *spp, M_VMSWAP, M_NOWAIT);
+
+ if (spp == NULL) {
+ simple_unlock(&swaplist_lock);
+ tsleep((caddr_t)&lbolt, PSWP, "memory", 0);
+ goto again;
}
+ DPRINTF(VMSDB_SWFLOW,
+ ("sw: had to create a new swappri = %d\n", priority));
+
+ spp->spi_priority = priority;
+ CIRCLEQ_INIT(&spp->spi_swapdev);
+
+ if (pspp)
+ LIST_INSERT_AFTER(pspp, spp, spi_swappri);
+ else
+ LIST_INSERT_HEAD(&swap_priority, spp, spi_swappri);
+
}
- nswdev += niswdev;
- if (nswdev == 0)
- panic("swapinit");
- nswap += niswap;
-#else
- nswdev = 0;
- nswap = 0;
- for (swp = swdevt; swp->sw_dev != NODEV || swp->sw_vp != NULL; swp++) {
- nswdev++;
- if (swp->sw_nblks > nswap)
- nswap = swp->sw_nblks;
- }
- if (nswdev == 0)
- panic("swapinit");
- if (nswdev > 1)
- nswap = ((nswap + dmmax - 1) / dmmax) * dmmax;
- nswap *= nswdev;
- if (swdevt[0].sw_vp == NULL &&
- bdevvp(swdevt[0].sw_dev, &swdevt[0].sw_vp))
- panic("swapvp");
-#endif
- if (nswap == 0)
- printf("WARNING: no swap space found\n");
- else if ((error = swfree(p, 0)) == ENXIO)
- printf("WARNING: primary swap device not configured\n");
- else if (error) {
- printf("swfree errno %d\n", error); /* XXX */
- panic("swapinit swfree 0");
- }
+ /* Onto priority list */
+ CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next);
+ sdp->swd_priority = priority;
+ simple_unlock(&swaplist_lock);
+}
- /*
- * Now set up swap buffer headers.
- */
- bswlist.b_actf = sp;
- for (i = 0; i < nswbuf - 1; i++, sp++) {
- sp->b_actf = sp + 1;
- sp->b_rcred = sp->b_wcred = p->p_ucred;
- sp->b_vnbufs.le_next = NOLIST;
+/*
+ * Find and optionally remove a swap device from the priority list.
+ */
+struct swapdev *
+find_swapdev(vp, remove)
+ struct vnode *vp;
+ int remove;
+{
+ struct swapdev *sdp;
+ struct swappri *spp;
+
+ simple_lock(&swaplist_lock);
+ for (spp = swap_priority.lh_first; spp != NULL;
+ spp = spp->spi_swappri.le_next) {
+ for (sdp = spp->spi_swapdev.cqh_first;
+ sdp != (void *)&spp->spi_swapdev;
+ sdp = sdp->swd_next.cqe_next)
+ if (sdp->swd_vp == vp) {
+ if (remove)
+ CIRCLEQ_REMOVE(&spp->spi_swapdev, sdp,
+ swd_next);
+ simple_unlock(&swaplist_lock);
+ return (sdp);
+ }
}
- sp->b_rcred = sp->b_wcred = p->p_ucred;
- sp->b_vnbufs.le_next = NOLIST;
- sp->b_actf = NULL;
+ simple_unlock(&swaplist_lock);
+ return (NULL);
}
+/*
+ * Scan priority list for empty priority entries.
+ */
void
-swstrategy(bp)
- register struct buf *bp;
+swaplist_trim()
+{
+ struct swappri *spp;
+
+ simple_lock(&swaplist_lock);
+restart:
+ for (spp = swap_priority.lh_first; spp != NULL;
+ spp = spp->spi_swappri.le_next) {
+ if (spp->spi_swapdev.cqh_first != (void *)&spp->spi_swapdev)
+ continue;
+ LIST_REMOVE(spp, spi_swappri);
+ free((caddr_t)spp, M_VMSWAP);
+ goto restart;
+ }
+ simple_unlock(&swaplist_lock);
+}
+
+int
+sys_swapctl(p, v, retval)
+ struct proc *p;
+ void *v;
+ register_t *retval;
{
- int s, sz, off, seg, index;
- register struct swdevt *sp;
+ struct sys_swapctl_args /* {
+ syscallarg(int) cmd;
+ syscallarg(const void *) arg;
+ syscallarg(int) misc;
+ } */ *uap = (struct sys_swapctl_args *)v;
struct vnode *vp;
+ struct nameidata nd;
+ struct swappri *spp;
+ struct swapdev *sdp;
+ struct swapent *sep;
+ char userpath[PATH_MAX + 1];
+ int count, error, misc;
+ size_t len;
+ int priority;
- sz = howmany(bp->b_bcount, DEV_BSIZE);
- if (bp->b_blkno + sz > nswap) {
- bp->b_error = EINVAL;
- bp->b_flags |= B_ERROR;
- biodone(bp);
- return;
+ misc = SCARG(uap, misc);
+
+ DPRINTF(VMSDB_SWFLOW, ("entering sys_swapctl\n"));
+
+ /* how many swap devices */
+ if (SCARG(uap, cmd) == SWAP_NSWAP) {
+ DPRINTF(VMSDB_SWFLOW,("did SWAP_NSWAP: leaving sys_swapctl\n"));
+ *retval = nswapdev;
+ return (0);
}
- if (nswdev > 1) {
-#ifdef SEQSWAP
- if (bp->b_blkno < niswap) {
- if (niswdev > 1) {
- off = bp->b_blkno % dmmax;
- if (off+sz > dmmax) {
- bp->b_error = EINVAL;
- bp->b_flags |= B_ERROR;
- biodone(bp);
- return;
- }
- seg = bp->b_blkno / dmmax;
- index = seg % niswdev;
- seg /= niswdev;
- bp->b_blkno = seg*dmmax + off;
- } else
- index = 0;
- } else {
- register struct swdevt *swp;
-
- bp->b_blkno -= niswap;
- for (index = niswdev, swp = &swdevt[niswdev];
- swp->sw_dev != NODEV;
- swp++, index++) {
- if (bp->b_blkno < swp->sw_nblks)
- break;
- bp->b_blkno -= swp->sw_nblks;
- }
- if (swp->sw_dev == NODEV ||
- bp->b_blkno+sz > swp->sw_nblks) {
- bp->b_error = swp->sw_dev == NODEV ?
- ENODEV : EINVAL;
- bp->b_flags |= B_ERROR;
- biodone(bp);
- return;
+
+ /* stats on the swap devices. */
+ if (SCARG(uap, cmd) == SWAP_STATS) {
+ sep = (struct swapent *)SCARG(uap, arg);
+ count = 0;
+
+ error = lockmgr(&swaplist_change_lock, LK_SHARED, (void *)0, p);
+ if (error)
+ return (error);
+ for (spp = swap_priority.lh_first; spp != NULL;
+ spp = spp->spi_swappri.le_next) {
+ for (sdp = spp->spi_swapdev.cqh_first;
+ sdp != (void *)&spp->spi_swapdev && misc-- > 0;
+ sdp = sdp->swd_next.cqe_next, sep++, count++) {
+ /*
+ * We do not do NetBSD 1.3 compat call.
+ */
+ error = copyout((caddr_t)&sdp->swd_se,
+ (caddr_t)sep, sizeof(struct swapent));
+
+ if (error)
+ goto out;
}
}
+out:
+ (void)lockmgr(&swaplist_change_lock, LK_RELEASE, (void *)0, p);
+ if (error)
+ return (error);
+
+ DPRINTF(VMSDB_SWFLOW,("did SWAP_STATS: leaving sys_swapctl\n"));
+
+ *retval = count;
+ return (0);
+ }
+ if ((error = suser(p->p_ucred, &p->p_acflag)))
+ return (error);
+
+ if (SCARG(uap, arg) == NULL) {
+ /* XXX - interface - arg==NULL: miniroot */
+ vp = rootvp;
+ if (vget(vp, LK_EXCLUSIVE, p))
+ return (EBUSY);
+ if (SCARG(uap, cmd) == SWAP_ON &&
+ copystr("miniroot", userpath, sizeof userpath, &len))
+ panic("swapctl: miniroot copy failed");
+ } else {
+ int space;
+ char *where;
+
+ if (SCARG(uap, cmd) == SWAP_ON) {
+ if ((error = copyinstr(SCARG(uap, arg), userpath,
+ sizeof userpath, &len)))
+ return (error);
+ space = UIO_SYSSPACE;
+ where = userpath;
+ } else {
+ space = UIO_USERSPACE;
+ where = (char *)SCARG(uap, arg);
+ }
+ NDINIT(&nd, LOOKUP, FOLLOW|LOCKLEAF, space, where, p);
+ if ((error = namei(&nd)))
+ return (error);
+
+ vp = nd.ni_vp;
+ }
+
+ error = lockmgr(&swaplist_change_lock, LK_EXCLUSIVE, (void *)0, p);
+ if (error)
+ goto bad2;
+
+ switch(SCARG(uap, cmd)) {
+ case SWAP_CTL:
+ priority = SCARG(uap, misc);
+ if ((sdp = find_swapdev(vp, 1)) == NULL) {
+ error = ENOENT;
+ break;
+ }
+ insert_swapdev(sdp, priority);
+ swaplist_trim();
+ break;
+
+ case SWAP_ON:
+ priority = SCARG(uap, misc);
+
+ /* Check for duplicates */
+ if ((sdp = find_swapdev(vp, 0)) != NULL) {
+ if (!bcmp(sdp->swd_path, "swap_device", 12)) {
+ copystr(userpath, sdp->swd_path, len, 0);
+ error = 0;
+ } else
+ error = EBUSY;
+ goto bad;
+ }
+
+ sdp = (struct swapdev *)
+ malloc(sizeof *sdp, M_VMSWAP, M_WAITOK);
+ bzero(sdp, sizeof(*sdp));
+
+ sdp->swd_vp = vp;
+ sdp->swd_dev = (vp->v_type == VBLK) ? vp->v_rdev : NODEV;
+
+ if ((error = swap_on(p, sdp)) != 0) {
+ free((caddr_t)sdp, M_VMSWAP);
+ break;
+ }
+#ifdef SWAP_TO_FILES
+ /*
+ * XXX Is NFS elaboration necessary?
+ */
+ if (vp->v_type == VREG)
+ sdp->swd_cred = crdup(p->p_ucred);
+#endif
+ if (copystr(userpath, sdp->swd_path, len, 0) != 0)
+ panic("swapctl: copystr");
+ insert_swapdev(sdp, priority);
+
+ /* Keep reference to vnode */
+ vref(vp);
+ break;
+
+ case SWAP_OFF:
+ DPRINTF(VMSDB_SWFLOW, ("doing SWAP_OFF...\n"));
+#ifdef SWAP_OFF_WORKS
+ if ((sdp = find_swapdev(vp, 0)) == NULL) {
+ error = ENXIO;
+ break;
+ }
+ /*
+ * If a device isn't in use or enabled, we
+ * can't stop swapping from it (again).
+ */
+ if ((sdp->swd_flags &
+ (SWF_INUSE|SWF_ENABLE)) == 0) {
+ error = EBUSY;
+ goto bad;
+ }
+ if ((error = swap_off(p, sdp)) != 0)
+ goto bad;
+
+ /* Find again and remove this time */
+ if ((sdp = find_swapdev(vp, 1)) == NULL) {
+ error = ENXIO;
+ break;
+ }
+ free((caddr_t)sdp, M_VMSWAP);
#else
- off = bp->b_blkno % dmmax;
- if (off+sz > dmmax) {
- bp->b_error = EINVAL;
- bp->b_flags |= B_ERROR;
- biodone(bp);
- return;
+ error = ENODEV;
+#endif
+ break;
+
+ default:
+ DPRINTF(VMSDB_SWFLOW,
+ ("unhandled command: %x\n", SCARG(uap, cmd)));
+ error = EINVAL;
+ }
+
+bad:
+ (void)lockmgr(&swaplist_change_lock, LK_RELEASE, (void *)0, p);
+bad2:
+ vput(vp);
+
+ DPRINTF(VMSDB_SWFLOW, ("leaving sys_swapctl: error %d\n", error));
+ return (error);
+}
+
+/*
+ * swap_on() attempts to begin swapping on a swapdev. we check that this
+ * device is OK to swap from, miss the start of any disk (to avoid any
+ * disk labels that may exist).
+ */
+STATIC int
+swap_on(p, sdp)
+ struct proc *p;
+ struct swapdev *sdp;
+{
+ static int count = 0;
+ struct vnode *vp = sdp->swd_vp;
+ int error, nblks, size;
+ long addr;
+ char *storage;
+ int storagesize;
+#ifdef SWAP_TO_FILES
+ struct vattr va;
+#endif
+#ifdef NFS
+ extern int (**nfsv2_vnodeop_p) __P((void *));
+#endif /* NFS */
+ dev_t dev = sdp->swd_dev;
+ char *name;
+
+
+ /* If root on swap, then the skip open/close operations. */
+ if (vp != rootvp) {
+ if ((error = VOP_OPEN(vp, FREAD|FWRITE, p->p_ucred, p)))
+ return (error);
+ vp->v_writecount++;
+ }
+
+ DPRINTF(VMSDB_INFO,
+ ("swap_on: dev = %d, major(dev) = %d\n", dev, major(dev)));
+
+ switch (vp->v_type) {
+ case VBLK:
+ if (bdevsw[major(dev)].d_psize == 0 ||
+ (nblks = (*bdevsw[major(dev)].d_psize)(dev)) == -1) {
+ error = ENXIO;
+ goto bad;
}
- seg = bp->b_blkno / dmmax;
- index = seg % nswdev;
- seg /= nswdev;
- bp->b_blkno = seg*dmmax + off;
+ break;
+
+#ifdef SWAP_TO_FILES
+ case VREG:
+ if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)))
+ goto bad;
+ nblks = (int)btodb(va.va_size);
+ if ((error =
+ VFS_STATFS(vp->v_mount, &vp->v_mount->mnt_stat, p)) != 0)
+ goto bad;
+
+ sdp->swd_bsize = vp->v_mount->mnt_stat.f_iosize;
+#ifdef NFS
+ if (vp->v_op == nfsv2_vnodeop_p)
+ sdp->swd_maxactive = 2; /* XXX */
+ else
+#endif /* NFS */
+ sdp->swd_maxactive = 8; /* XXX */
+ break;
#endif
- } else
- index = 0;
- sp = &swdevt[index];
- if (sp->sw_vp == NULL) {
- bp->b_error = ENODEV;
- bp->b_flags |= B_ERROR;
- biodone(bp);
- return;
+
+ default:
+ error = ENXIO;
+ goto bad;
}
- if ((bp->b_dev = sp->sw_dev) == NODEV && sp->sw_vp->v_type != VREG)
- panic("swstrategy");
- VHOLD(sp->sw_vp);
- s = splbio();
- if ((bp->b_flags & B_READ) == 0) {
- if ((vp = bp->b_vp) != NULL) {
- vp->v_numoutput--;
- if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
- vp->v_flag &= ~VBWAIT;
- wakeup((caddr_t)&vp->v_numoutput);
+ if (nblks == 0) {
+ DPRINTF(VMSDB_SWFLOW, ("swap_on: nblks == 0\n"));
+ error = EINVAL;
+ goto bad;
+ }
+
+ sdp->swd_flags |= SWF_INUSE;
+ sdp->swd_nblks = nblks;
+
+ /*
+ * skip over first cluster of a device in case of labels or
+ * boot blocks.
+ */
+ if (vp->v_type == VBLK) {
+ size = (int)(nblks - ctod(CLSIZE));
+ addr = (long)ctod(CLSIZE);
+ } else {
+ size = (int)nblks;
+ addr = (long)0;
+ }
+
+ DPRINTF(VMSDB_SWON,
+ ("swap_on: dev %x: size %d, addr %ld\n", dev, size, addr));
+
+ name = malloc(12, M_VMSWAP, M_WAITOK);
+ sprintf(name, "swap0x%04x", count++);
+ /* XXX make this based on ram as well. */
+ storagesize = EXTENT_FIXED_STORAGE_SIZE(maxproc * 2);
+ storage = malloc(storagesize, M_VMSWAP, M_WAITOK);
+ sdp->swd_ex = extent_create(name, 0, nblks, M_VMSWAP,
+ storage, storagesize, EX_WAITOK);
+ if (addr) {
+ if (extent_alloc_region(sdp->swd_ex, 0, addr, EX_WAITOK))
+ panic("disklabel region");
+ sdp->swd_inuse += addr;
+ }
+
+
+ if (vp == rootvp) {
+ struct mount *mp;
+ struct statfs *sp;
+ int rootblks;
+
+ /* Get size from root FS (mountroot did statfs) */
+ mp = rootvnode->v_mount;
+ sp = &mp->mnt_stat;
+ rootblks = sp->f_blocks * (sp->f_bsize / DEV_BSIZE);
+ if (rootblks > nblks)
+ panic("miniroot size");
+
+ if (extent_alloc_region(sdp->swd_ex, addr, rootblks, EX_WAITOK))
+ panic("miniroot region");
+
+ printf("Preserved %d blocks, leaving %d pages of swap\n",
+ rootblks, dtoc(size - rootblks));
+ }
+
+ swap_addmap(sdp, size);
+ nswapdev++;
+ sdp->swd_flags |= SWF_ENABLE;
+ return (0);
+
+bad:
+ if (vp != rootvp) {
+ vp->v_writecount--;
+ (void)VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p);
+ }
+ return (error);
+}
+
+#ifdef SWAP_OFF_WORKS
+STATIC int
+swap_off(p, sdp)
+ struct proc *p;
+ struct swapdev *sdp;
+{
+ char *name;
+
+ /* turn off the enable flag */
+ sdp->swd_flags &= ~SWF_ENABLE;
+
+ DPRINTF(VMSDB_SWOFF, ("swap_off: %x\n", sdp->swd_dev));
+
+ /*
+ * XXX write me
+ *
+ * the idea is to find out which processes are using this swap
+ * device, and page them all in.
+ *
+ * eventually, we should try to move them out to other swap areas
+ * if available.
+ *
+ * The alternative is to create a redirection map for this swap
+ * device. This should work by moving all the pages of data from
+ * the ex-swap device to another one, and making an entry in the
+ * redirection map for it. locking is going to be important for
+ * this!
+ *
+ * There might be an easier way to do a "soft" swapoff. First
+ * we mark the particular swap partition as not desirable anymore.
+ * Then we use the pager to page a couple of pages in, each time
+ * it has the memory, and the chance to do so. Thereby moving pages
+ * back into memory. Once they are in memory, when they get paged
+ * out again, they do not go back onto the "undesirable" device
+ * anymore, but to good devices. This might take longer, but it
+ * can certainly work. If need be, the user process can sleep on
+ * the particular sdp entry, and the swapper can then wake him up
+ * when everything is done.
+ */
+
+ /* until the above code is written, we must ENODEV */
+ return ENODEV;
+
+ extent_free(swapmap, sdp->swd_mapoffset, sdp->swd_mapsize, EX_WAITOK);
+ nswapdev--;
+ name = sdp->swd_ex->ex_name;
+ extent_destroy(sdp->swd_ex);
+ free(name, M_VMSWAP);
+ free((caddr_t)sdp->swd_ex, M_VMSWAP);
+ if (sdp->swp_vp != rootvp) {
+ vp->v_writecount--;
+ (void) VOP_CLOSE(sdp->swd_vp, FREAD|FWRITE, p->p_ucred, p);
+ }
+ if (sdp->swd_vp)
+ vrele(sdp->swd_vp);
+ free((caddr_t)sdp, M_VMSWAP);
+ return (0);
+}
+#endif
+
+/*
+ * To decide where to allocate what part of swap, we must "round robin"
+ * the swap devices in swap_priority of the same priority until they are
+ * full. we do this with a list of swap priorities that have circle
+ * queues of swapdevs.
+ *
+ * The following functions control allocation and freeing of part of the
+ * swap area. you call swap_alloc() with a size and it returns an address.
+ * later you call swap_free() and it frees the use of that swap area.
+ *
+ * daddr_t swap_alloc(int size);
+ * void swap_free(int size, daddr_t addr);
+ */
+
+daddr_t
+swap_alloc(size)
+ int size;
+{
+ struct swapdev *sdp;
+ struct swappri *spp;
+ u_long result;
+
+ if (nswapdev < 1)
+ return 0;
+
+ simple_lock(&swaplist_lock);
+ for (spp = swap_priority.lh_first; spp != NULL;
+ spp = spp->spi_swappri.le_next) {
+ for (sdp = spp->spi_swapdev.cqh_first;
+ sdp != (void *)&spp->spi_swapdev;
+ sdp = sdp->swd_next.cqe_next) {
+ /* if it's not enabled, then we can't swap from it */
+ if ((sdp->swd_flags & SWF_ENABLE) == 0 ||
+ /* XXX IS THIS CORRECT ? */
+#if 1
+ (sdp->swd_inuse + size > sdp->swd_nblks) ||
+#endif
+ extent_alloc(sdp->swd_ex, size, EX_NOALIGN,
+ EX_NOBOUNDARY, EX_MALLOCOK|EX_NOWAIT,
+ &result) != 0) {
+ continue;
}
+ CIRCLEQ_REMOVE(&spp->spi_swapdev, sdp, swd_next);
+ CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next);
+ sdp->swd_inuse += size;
+ simple_unlock(&swaplist_lock);
+ return (daddr_t)(result + sdp->swd_mapoffset);
}
- sp->sw_vp->v_numoutput++;
}
- if (bp->b_vp != NULL)
- brelvp(bp);
- splx(s);
- bp->b_vp = sp->sw_vp;
- VOP_STRATEGY(bp);
+ simple_unlock(&swaplist_lock);
+ return 0;
+}
+
+void
+swap_free(size, addr)
+ int size;
+ daddr_t addr;
+{
+ struct swapdev *sdp = swap_getsdpfromaddr(addr);
+
+#ifdef DIAGNOSTIC
+ if (sdp == NULL)
+ panic("swap_free: unmapped address\n");
+ if (nswapdev < 1)
+ panic("swap_free: nswapdev < 1\n");
+#endif
+ extent_free(sdp->swd_ex, addr - sdp->swd_mapoffset, size,
+ EX_MALLOCOK|EX_NOWAIT);
+ sdp->swd_inuse -= size;
+#ifdef DIAGNOSTIC
+ if (sdp->swd_inuse < 0)
+ panic("swap_free: inuse < 0");
+#endif
+}
+
+/*
+ * We have a physical -> virtual mapping to address here. There are several
+ * different physical address spaces (one for each swap partition) that are
+ * to be mapped onto a single virtual address space.
+ */
+#define ADDR_IN_MAP(addr, sdp) \
+ (((addr) >= (sdp)->swd_mapoffset) && \
+ ((addr) < ((sdp)->swd_mapoffset + (sdp)->swd_mapsize)))
+
+struct swapdev *
+swap_getsdpfromaddr(addr)
+ daddr_t addr;
+{
+ struct swapdev *sdp;
+ struct swappri *spp;
+
+ simple_lock(&swaplist_lock);
+ for (spp = swap_priority.lh_first; spp != NULL;
+ spp = spp->spi_swappri.le_next)
+ for (sdp = spp->spi_swapdev.cqh_first;
+ sdp != (void *)&spp->spi_swapdev;
+ sdp = sdp->swd_next.cqe_next)
+ if (ADDR_IN_MAP(addr, sdp)) {
+ simple_unlock(&swaplist_lock);
+ return sdp;
+ }
+ simple_unlock(&swaplist_lock);
+ return NULL;
+}
+
+void
+swap_addmap(sdp, size)
+ struct swapdev *sdp;
+ int size;
+{
+ u_long result;
+
+ if (extent_alloc(swapmap, size, EX_NOALIGN, EX_NOBOUNDARY,
+ EX_WAITOK, &result))
+ panic("swap_addmap");
+
+ sdp->swd_mapoffset = result;
+ sdp->swd_mapsize = size;
}
/*ARGSUSED*/
@@ -284,232 +855,408 @@ swwrite(dev, uio, ioflag)
return (physio(swstrategy, NULL, dev, B_WRITE, minphys, uio));
}
-/*
- * System call swapon(name) enables swapping on device name,
- * which must be in the swdevsw. Return EBUSY
- * if already swapping on this device.
- */
-/* ARGSUSED */
-int
-sys_swapon(p, v, retval)
- struct proc *p;
- void *v;
- register_t *retval;
+void
+swstrategy(bp)
+ struct buf *bp;
{
- struct sys_swapon_args /* {
- syscallarg(char *) name;
- } */ *uap = v;
- register struct vnode *vp;
- register struct swdevt *sp;
- dev_t dev;
- int error;
- struct nameidata nd;
+ struct swapdev *sdp;
+ struct vnode *vp;
+ daddr_t bn;
- if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
- return (error);
- NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, name), p);
- if ((error = namei(&nd)) != 0)
- return (error);
- vp = nd.ni_vp;
- if (vp->v_type != VBLK) {
- vrele(vp);
- return (ENOTBLK);
- }
- dev = (dev_t)vp->v_rdev;
- if (major(dev) >= nblkdev) {
- vrele(vp);
- return (ENXIO);
+ bn = bp->b_blkno;
+ sdp = swap_getsdpfromaddr(bn);
+ if (sdp == NULL) {
+ bp->b_error = EINVAL;
+ bp->b_flags |= B_ERROR;
+ biodone(bp);
+ return;
}
- for (sp = &swdevt[0]; sp->sw_dev != NODEV; sp++) {
- if (sp->sw_dev == dev) {
- if (sp->sw_flags & SW_FREED) {
- vrele(vp);
- return (EBUSY);
- }
- sp->sw_vp = vp;
- if ((error = swfree(p, sp - swdevt)) != 0) {
- vrele(vp);
- return (error);
- }
- return (0);
+
+ bn -= sdp->swd_mapoffset;
+
+ DPRINTF(VMSDB_SWFLOW,
+ ("swstrategy(%s): mapoff %x, bn %x, bcount %ld\n",
+ ((bp->b_flags & B_READ) == 0) ? "write" : "read",
+ sdp->swd_mapoffset, bn, bp->b_bcount));
+
+ switch (sdp->swd_vp->v_type) {
+ default:
+ panic("swstrategy: vnode type %x", sdp->swd_vp->v_type);
+ case VBLK:
+ bp->b_blkno = bn + ctod(CLSIZE);
+ vp = sdp->swd_vp;
+ bp->b_dev = sdp->swd_dev;
+ VHOLD(vp);
+ if ((bp->b_flags & B_READ) == 0) {
+ int s = splbio();
+ vwakeup(bp);
+ vp->v_numoutput++;
+ splx(s);
}
-#ifdef SEQSWAP
- /*
- * If we have reached a non-freed sequential device without
- * finding what we are looking for, it is an error.
- * That is because all interleaved devices must come first
- * and sequential devices must be freed in order.
- */
- if ((sp->sw_flags & (SW_SEQUENTIAL|SW_FREED)) == SW_SEQUENTIAL)
- break;
+
+ if (bp->b_vp != NULL)
+ brelvp(bp);
+
+ bp->b_vp = vp;
+ VOP_STRATEGY(bp);
+ return;
+#ifdef SWAP_TO_FILES
+ case VREG:
+ sw_reg_strategy(sdp, bp, bn);
+ return;
#endif
}
- vrele(vp);
- return (EINVAL);
+ /* NOTREACHED */
}
-/*
- * Swfree(index) frees the index'th portion of the swap map.
- * Each of the nswdev devices provides 1/nswdev'th of the swap
- * space, which is laid out with blocks of dmmax pages circularly
- * among the devices.
- */
-int
-swfree(p, index)
- struct proc *p;
- int index;
+#ifdef SWAP_TO_FILES
+
+STATIC void
+sw_reg_strategy(sdp, bp, bn)
+ struct swapdev *sdp;
+ struct buf *bp;
+ int bn;
{
- register struct swdevt *sp;
- register swblk_t vsbase;
- register long blk;
- struct vnode *vp;
- register swblk_t dvbase;
- register int nblks;
- int error;
+ struct vnode *vp;
+ struct vndxfer *vnx;
+ daddr_t nbn;
+ caddr_t addr;
+ int s, off, nra, error, sz, resid;
- sp = &swdevt[index];
- vp = sp->sw_vp;
- /* If root on swap, then the skip open/close operations. */
- if (vp != rootvp) {
- if ((error = VOP_OPEN(vp, FREAD|FWRITE, p->p_ucred, p)) != 0)
- return (error);
- }
- sp->sw_flags |= SW_FREED;
- nblks = sp->sw_nblks;
/*
- * Some devices may not exist til after boot time.
- * If so, their nblk count will be 0.
+ * Translate the device logical block numbers into physical
+ * block numbers of the underlying filesystem device.
*/
- if (nblks <= 0) {
- int perdev;
- dev_t dev = sp->sw_dev;
+ bp->b_resid = bp->b_bcount;
+ addr = bp->b_data;
+ bn = dbtob(bn);
- if (bdevsw[major(dev)].d_psize == 0 ||
- (nblks = (*bdevsw[major(dev)].d_psize)(dev)) == -1) {
- if (vp != rootvp)
- (void) VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p);
- sp->sw_flags &= ~SW_FREED;
- return (ENXIO);
+ /* Allocate a header for this transfer and link it to the buffer */
+ vnx = getvndxfer();
+ vnx->vx_flags = VX_BUSY;
+ vnx->vx_error = 0;
+ vnx->vx_pending = 0;
+ vnx->vx_bp = bp;
+ vnx->vx_sdp = sdp;
+
+ error = 0;
+ for (resid = bp->b_resid; resid; resid -= sz) {
+ struct vndbuf *nbp;
+
+ nra = 0;
+ error = VOP_BMAP(sdp->swd_vp, bn / sdp->swd_bsize,
+ &vp, &nbn, &nra);
+
+ if (error == 0 && (long)nbn == -1)
+ error = EIO;
+
+ /*
+ * If there was an error or a hole in the file...punt.
+ * Note that we may have to wait for any operations
+ * that we have already fired off before releasing
+ * the buffer.
+ *
+ * XXX we could deal with holes here but it would be
+ * a hassle (in the write case).
+ */
+ if (error) {
+ s = splbio();
+ vnx->vx_error = error;
+ goto out;
+ }
+
+ if ((off = bn % sdp->swd_bsize) != 0)
+ sz = sdp->swd_bsize - off;
+ else
+ sz = (1 + nra) * sdp->swd_bsize;
+
+ if (resid < sz)
+ sz = resid;
+
+ DPRINTF(VMSDB_SWFLOW,
+ ("sw_reg_strategy: vp %p/%p bn 0x%x/0x%x"
+ " sz 0x%x\n", sdp->swd_vp, vp, bn, nbn, sz));
+
+ nbp = getvndbuf();
+ nbp->vb_buf.b_flags = bp->b_flags | B_NOCACHE | B_CALL;
+ nbp->vb_buf.b_bcount = sz;
+ nbp->vb_buf.b_bufsize = bp->b_bufsize;
+ nbp->vb_buf.b_error = 0;
+ nbp->vb_buf.b_data = addr;
+ nbp->vb_buf.b_blkno = nbn + btodb(off);
+ nbp->vb_buf.b_proc = bp->b_proc;
+ nbp->vb_buf.b_iodone = sw_reg_iodone;
+ nbp->vb_buf.b_vp = NULLVP;
+ nbp->vb_buf.b_rcred = sdp->swd_cred;
+ nbp->vb_buf.b_wcred = sdp->swd_cred;
+ if (bp->b_dirtyend == 0) {
+ nbp->vb_buf.b_dirtyoff = 0;
+ nbp->vb_buf.b_dirtyend = sz;
+ } else {
+ nbp->vb_buf.b_dirtyoff =
+ max(0, bp->b_dirtyoff - (bp->b_bcount-resid));
+ nbp->vb_buf.b_dirtyend =
+ min(sz,
+ max(0, bp->b_dirtyend - (bp->b_bcount-resid)));
}
-#ifdef SEQSWAP
- if (index < niswdev) {
- perdev = niswap / niswdev;
- if (nblks > perdev)
- nblks = perdev;
+ if (bp->b_validend == 0) {
+ nbp->vb_buf.b_validoff = 0;
+ nbp->vb_buf.b_validend = sz;
} else {
- if (nblks % dmmax)
- nblks -= (nblks % dmmax);
- nswap += nblks;
+ nbp->vb_buf.b_validoff =
+ max(0, bp->b_validoff - (bp->b_bcount-resid));
+ nbp->vb_buf.b_validend =
+ min(sz,
+ max(0, bp->b_validend - (bp->b_bcount-resid)));
}
-#else
- if (nswap > 0) {
- perdev = nswap / nswdev;
- if (nblks > perdev)
- nblks = perdev;
- } else
- nswap = nblks;
-#endif
- sp->sw_nblks = nblks;
+
+ nbp->vb_xfer = vnx;
+
+ /*
+ * Just sort by block number
+ */
+ nbp->vb_buf.b_cylinder = nbp->vb_buf.b_blkno;
+ s = splbio();
+ if (vnx->vx_error != 0) {
+ putvndbuf(nbp);
+ goto out;
+ }
+ vnx->vx_pending++;
+ bgetvp(vp, &nbp->vb_buf);
+ disksort(&sdp->swd_tab, &nbp->vb_buf);
+ sw_reg_start(sdp);
+ splx(s);
+
+ bn += sz;
+ addr += sz;
}
- if (nblks == 0) {
- if (vp != rootvp)
- (void) VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p);
- sp->sw_flags &= ~SW_FREED;
- return (0); /* XXX error? */
+
+ s = splbio();
+
+out: /* Arrive here at splbio */
+ vnx->vx_flags &= ~VX_BUSY;
+ if (vnx->vx_pending == 0) {
+ if (vnx->vx_error != 0) {
+ bp->b_error = vnx->vx_error;
+ bp->b_flags |= B_ERROR;
+ }
+ putvndxfer(vnx);
+ biodone(bp);
}
-#ifdef SEQSWAP
- if (sp->sw_flags & SW_SEQUENTIAL) {
- register struct swdevt *swp;
-
- blk = niswap;
- for (swp = &swdevt[niswdev]; swp != sp; swp++)
- blk += swp->sw_nblks;
- rmfree(swapmap, nblks, blk);
- return (0);
+ splx(s);
+}
+
+/*
+ * Feed requests sequentially.
+ * We do it this way to keep from flooding NFS servers if we are connected
+ * to an NFS file. This places the burden on the client rather than the
+ * server.
+ */
+STATIC void
+sw_reg_start(sdp)
+ struct swapdev *sdp;
+{
+ struct buf *bp;
+
+ if ((sdp->swd_flags & SWF_BUSY) != 0)
+ /* Recursion control */
+ return;
+
+ sdp->swd_flags |= SWF_BUSY;
+
+ while (sdp->swd_tab.b_active < sdp->swd_maxactive) {
+ bp = sdp->swd_tab.b_actf;
+ if (bp == NULL)
+ break;
+ sdp->swd_tab.b_actf = bp->b_actf;
+ sdp->swd_tab.b_active++;
+
+ DPRINTF(VMSDB_SWFLOW,
+ ("sw_reg_start: bp %p vp %p blkno %x addr %p cnt %lx\n",
+ bp, bp->b_vp, bp->b_blkno,bp->b_data, bp->b_bcount));
+
+ if ((bp->b_flags & B_READ) == 0)
+ bp->b_vp->v_numoutput++;
+ VOP_STRATEGY(bp);
}
-#endif
- for (dvbase = 0; dvbase < nblks; dvbase += dmmax) {
- blk = nblks - dvbase;
-#ifdef SEQSWAP
- if ((vsbase = index*dmmax + dvbase*niswdev) >= niswap)
- panic("swfree");
-#else
- if ((vsbase = index*dmmax + dvbase*nswdev) >= nswap)
- panic("swfree");
-#endif
- if (blk > dmmax)
- blk = dmmax;
- if (vsbase == 0) {
- /*
- * First of all chunks... initialize the swapmap.
- * Don't use the first cluster of the device
- * in case it starts with a label or boot block.
- */
- rminit(swapmap, blk - ctod(btoc(SWAPSKIPBYTES)),
- vsbase + ctod(btoc(SWAPSKIPBYTES)), "swap", nswapmap);
- } else if (dvbase == 0) {
- /*
- * Don't use the first cluster of the device
- * in case it starts with a label or boot block.
- */
- rmfree(swapmap, blk - ctod(btoc(SWAPSKIPBYTES)),
- vsbase + ctod(btoc(SWAPSKIPBYTES)));
- } else
- rmfree(swapmap, blk, vsbase);
+ sdp->swd_flags &= ~SWF_BUSY;
+}
+
+STATIC void
+sw_reg_iodone(bp)
+ struct buf *bp;
+{
+ register struct vndbuf *vbp = BUF_TO_VNDBUF(bp);
+ register struct vndxfer *vnx = (struct vndxfer *)vbp->vb_xfer;
+ register struct buf *pbp = vnx->vx_bp;
+ struct swapdev *sdp = vnx->vx_sdp;
+ int s, resid;
+
+ DPRINTF(VMSDB_SWFLOW,
+ ("sw_reg_iodone: vbp %p vp %p blkno %x addr %p "
+ "cnt %lx(%lx)\n",
+ vbp, vbp->vb_buf.b_vp, vbp->vb_buf.b_blkno,
+ vbp->vb_buf.b_data, vbp->vb_buf.b_bcount,
+ vbp->vb_buf.b_resid));
+
+ s = splbio();
+ resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid;
+ pbp->b_resid -= resid;
+ vnx->vx_pending--;
+
+ if (vbp->vb_buf.b_error) {
+ DPRINTF(VMSDB_INFO, ("sw_reg_iodone: vbp %p error %d\n", vbp,
+ vbp->vb_buf.b_error));
+
+ vnx->vx_error = vbp->vb_buf.b_error;
}
+ if (vbp->vb_buf.b_vp != NULLVP)
+ brelvp(&vbp->vb_buf);
+
+ putvndbuf(vbp);
+
/*
- * Preserve the mini-root if appropriate:
- * Note: this requires !SEQSWAP && nswdev==1
- *
- * A mini-root gets copied into the front of the swap
- * and we run over top of the swap area just long
- * enough for us to do a mkfs and restor of the real
- * root (sure beats rewriting standalone restor).
+ * Wrap up this transaction if it has run to completion or, in
+ * case of an error, when all auxiliary buffers have returned.
*/
- if (vp == rootvp) {
-#ifndef MINIROOTSIZE
- struct mount *mp;
- struct statfs *sp;
-#endif
- long firstblk;
- int rootblks;
+ if (vnx->vx_error != 0) {
+ pbp->b_flags |= B_ERROR;
+ pbp->b_error = vnx->vx_error;
+ if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) {
-#ifdef MINIROOTSIZE
- rootblks = MINIROOTSIZE;
-#else
- /* Get size from root FS (mountroot did statfs) */
- mp = rootvnode->v_mount;
- sp = &mp->mnt_stat;
- rootblks = sp->f_blocks * (sp->f_bsize / DEV_BSIZE);
+ DPRINTF(VMSDB_SWFLOW,
+ ("swiodone: pbp %p iodone: error %d\n",
+ pbp, vnx->vx_error));
+ putvndxfer(vnx);
+ biodone(pbp);
+ }
+ } else if (pbp->b_resid == 0) {
+
+#ifdef DIAGNOSTIC
+ if (vnx->vx_pending != 0)
+ panic("swiodone: vnx pending: %d", vnx->vx_pending);
#endif
- if (rootblks > nblks)
- panic("swfree miniroot size");
- /* First ctod(btoc(SWAPSKIPBYTES)) blocks are not in the map. */
- firstblk = rmalloc(swapmap, rootblks - ctod(btoc(SWAPSKIPBYTES)));
- if (firstblk != ctod(btoc(SWAPSKIPBYTES)))
- panic("swfree miniroot save");
- printf("Preserved %d blocks of miniroot leaving %d pages of swap\n",
- rootblks, dtoc(nblks - rootblks));
+
+ if ((vnx->vx_flags & VX_BUSY) == 0) {
+ DPRINTF(VMSDB_SWFLOW,
+ ("swiodone: pbp %p iodone\n", pbp));
+ putvndxfer(vnx);
+ biodone(pbp);
+ }
}
- return (0);
+ sdp->swd_tab.b_active--;
+ sw_reg_start(sdp);
+
+ splx(s);
}
+#endif /* SWAP_TO_FILES */
-int
-sys_omsync(p, v, retval)
- struct proc *p;
- void *v;
- register_t *retval;
+void
+swapinit()
{
- struct sys_msync_args ua;
- struct sys_omsync_args /* {
- syscallarg(caddr_t) addr;
- syscallarg(size_t) len;
- } */ *uap = v;
-
- SCARG(&ua, addr) = SCARG(uap, addr);;
- SCARG(&ua, len) = SCARG(uap, len);;
- SCARG(&ua, flags) = MS_SYNC | MS_INVALIDATE;
- return (sys_msync(p, &ua, retval));
+ struct buf *sp = swbuf;
+ struct proc *p = &proc0; /* XXX */
+ int i;
+
+ DPRINTF(VMSDB_SWINIT, ("swapinit\n"));
+
+ nswapdev = 0;
+ if (bdevvp(swapdev, &swapdev_vp))
+ panic("swapinit: can not setup swapdev_vp");
+
+ simple_lock_init(&swaplist_lock);
+ lockinit(&swaplist_change_lock, PSWP, "swap change", 0, 0);
+ LIST_INIT(&swap_priority);
+
+ /*
+ * Create swap block resource map. The range [1..INT_MAX] allows
+ * for a grand total of 2 gigablocks of swap resource.
+ * (start at 1 because "block #0" will be interpreted as
+ * an allocation failure).
+ */
+ swapmap = extent_create("swapmap", 1, INT_MAX,
+ M_VMSWAP, 0, 0, EX_WAITOK);
+ if (swapmap == 0)
+ panic("swapinit: extent_create failed");
+
+ /*
+ * Now set up swap buffer headers.
+ */
+ bswlist.b_actf = sp;
+ for (i = 0; i < nswbuf - 1; i++, sp++) {
+ sp->b_actf = sp + 1;
+ sp->b_rcred = sp->b_wcred = p->p_ucred;
+ sp->b_vnbufs.le_next = NOLIST;
+ }
+ sp->b_rcred = sp->b_wcred = p->p_ucred;
+ sp->b_vnbufs.le_next = NOLIST;
+ sp->b_actf = NULL;
+
+ /* Mount primary swap if available */
+#ifdef SWAPDEBUG
+ if(vmswap_domount)
+#endif
+ swapmount();
+
+ DPRINTF(VMSDB_SWINIT, ("leaving swapinit\n"));
+}
+
+/*
+ * Mount the primary swap device pointed to by 'swdevt[0]'.
+ */
+STATIC void
+swapmount()
+{
+ extern int getdevvp(dev_t, struct vnode **, enum vtype);
+ struct swapdev *sdp;
+ struct vnode *vp = NULL;
+ struct proc *p = curproc;
+ dev_t swap_dev = swdevt[0].sw_dev;
+
+ /* Make sure we have a device */
+ if (swap_dev == NODEV) {
+ printf("swapmount: No swap device!\n");
+ return;
+ }
+
+ /* Malloc needed things */
+ sdp = (struct swapdev *)malloc(sizeof *sdp, M_VMSWAP, M_WAITOK);
+ bzero(sdp, sizeof(*sdp));
+
+ /* Do swap_on() stuff */
+ if(bdevvp(swap_dev, &vp)){
+ printf("swapmount: bdevvp() failed\n");
+ return;
+ }
+
+#ifdef SWAPDEBUG
+ vprint("swapmount", vp);
+#endif
+
+ sdp->swd_vp = vp;
+ sdp->swd_dev = (vp->v_type == VBLK) ? vp->v_rdev : NODEV;
+ if(copystr("swap_device", sdp->swd_path, sizeof sdp->swd_path, 0) != 0){
+ printf("swapmount: copystr() failed\n");
+ return;
+ }
+
+ /* Look for a swap device */
+ printf("Adding swap(%d, %d):", major(swap_dev), minor(swap_dev));
+
+ if (swap_on(p, sdp) != 0) {
+ printf(" failed!\n");
+ free((caddr_t)sdp, M_VMSWAP);
+ return;
+ } else
+ printf(" done.\n");
+#ifdef SWAP_TO_FILES
+ /*
+ * XXX Is NFS elaboration necessary?
+ */
+ if (vp->v_type == VREG)
+ sdp->swd_cred = crdup(p->p_ucred);
+#endif
+ insert_swapdev(sdp, 0);
}