src - OpenBSD base system

diff options


context:
space:
mode:

author	Tobias Weingartner <weingart@cvs.openbsd.org>	1999-05-22 21:22:35 +0000
committer	Tobias Weingartner <weingart@cvs.openbsd.org>	1999-05-22 21:22:35 +0000
commit	35c377bf5315fb3e23e1c5b7e8af00733bed7db0 (patch)
tree	5ab464baa96068a0b4eeb167b4514387057f3f90 /sys/vm/vm_swap.c
parent	aa079fadbadf6efd9c150afdd60894563611277c (diff)

Add new vm_swap code for dynamic swap. From netbsd, munged some by me, and

others. syscall commit pending.

Diffstat (limited to 'sys/vm/vm_swap.c')

-rw-r--r--

sys/vm/vm_swap.c

1539

1 files changed, 1143 insertions, 396 deletions

diff --git a/sys/vm/vm_swap.c b/sys/vm/vm_swap.c
index 4a8f1026b73..1d80eb8e421 100644
--- a/sys/vm/vm_swap.c
+++ b/sys/vm/vm_swap.c

@@ -1,9 +1,9 @@

-/* $OpenBSD: vm_swap.c,v 1.8 1997/12/02 16:55:52 csapuntz Exp $ */

-/* $NetBSD: vm_swap.c,v 1.32 1996/02/05 01:54:09 christos Exp $ */

+/* $OpenBSD: vm_swap.c,v 1.9 1999/05/22 21:22:34 weingart Exp $ */

+/* $NetBSD: vm_swap.c,v 1.64 1998/11/08 19:45:17 mycroft Exp $ */

* Redistribution and use in source and binary forms, with or without

* modification, are permitted provided that the following conditions

@@ -13,27 +13,19 @@

* 2. Redistributions in binary form must reproduce the above copyright

* notice, this list of conditions and the following disclaimer in the

* documentation and/or other materials provided with the distribution.

- * 3. All advertising materials mentioning features or use of this software

- * must display the following acknowledgement:

- * This product includes software developed by the University of

- * California, Berkeley and its contributors.

- * 4. Neither the name of the University nor the names of its contributors

- * may be used to endorse or promote products derived from this software

- * without specific prior written permission.

+ * 3. The name of the author may not be used to endorse or promote products

+ * derived from this software without specific prior written permission.

- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND

- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE

- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL

- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS

- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)

- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY

- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

- * SUCH DAMAGE.

- *

- * @(#)vm_swap.c 8.5 (Berkeley) 2/17/94

+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR

+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES

+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.

+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,

+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT

+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF

+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include <sys/param.h>

@@ -41,225 +33,804 @@

#include <sys/buf.h>

#include <sys/proc.h>

#include <sys/namei.h>

-#include <sys/dmap.h> /* XXX */

+#include <sys/disklabel.h>

+#include <sys/dmap.h>

+#include <sys/errno.h>

+#include <sys/kernel.h>

+#include <sys/malloc.h>

+#include <sys/lock.h>

#include <sys/vnode.h>

#include <sys/map.h>

#include <sys/file.h>

-#include <sys/mman.h>

+#include <sys/stat.h>

+#include <sys/extent.h>

+#include <sys/swap.h>

#include <sys/mount.h>

#include <sys/syscallargs.h>

-#include <vm/vm.h>

+#include <machine/vmparam.h>

#include <vm/vm_conf.h>

#include <miscfs/specfs/specdev.h>

- * Indirect driver for multi-controller paging.

+ * The idea here is to provide a single interface for multiple swap devices,

+ * of any kind and priority in a simple and fast way.

+ *

+ * Each swap device has these properties:

+ * * swap in use.

+ * * swap enabled.

+ * * map information in `/dev/drum'.

+ * * vnode pointer.

+ * Files have these additional properties:

+ * * block size.

+ * * maximum byte count in buffer.

+ * * buffer.

+ * * credentials.

+ *

+ * The arguments to swapctl(2) are:

+ * int cmd;

+ * void *arg;

+ * int misc;

+ * The cmd can be one of:

+ * SWAP_NSWAP - swapctl(2) returns the number of swap devices currently in

+ * use.

+ * SWAP_STATS - swapctl(2) takes a struct ent * in (void *arg) and writes

+ * misc or fewer (to zero) entries of configured swap devices,

+ * and returns the number of entries written or -1 on error.

+ * SWAP_ON - swapctl(2) takes a (char *) in arg to be the pathname of a

+ * device or file to begin swapping on, with it's priority in

+ * misc, returning 0 on success and -1 on error.

+ * SWAP_OFF - swapctl(2) takes a (char *) n arg to be the pathname of a

+ * device or file to stop swapping on. returning 0 or -1.

+ * XXX unwritten.

+ * SWAP_CTL - swapctl(2) changes the priority of a swap device, using the

+ * misc value.

+ */

+#ifdef SWAPDEBUG

+#define STATIC

+#define VMSDB_SWON 0x0001

+#define VMSDB_SWOFF 0x0002

+#define VMSDB_SWINIT 0x0004

+#define VMSDB_SWALLOC 0x0008

+#define VMSDB_SWFLOW 0x0010

+#define VMSDB_INFO 0x0020

+int vmswapdebug = 0;

+int vmswap_domount = 1;

+#define DPRINTF(f, m) do { \

+ if (vmswapdebug & (f)) \

+ printf m; \

+} while(0)

+#else

+#define STATIC static

+#define DPRINTF(f, m)

+#endif

+#define SWAP_TO_FILES

+struct swapdev {

+ struct swapent swd_se;

+#define swd_dev swd_se.se_dev

+#define swd_flags swd_se.se_flags

+#define swd_nblks swd_se.se_nblks

+#define swd_inuse swd_se.se_inuse

+#define swd_priority swd_se.se_priority

+#define swd_path swd_se.se_path

+ daddr_t swd_mapoffset;

+ int swd_mapsize;

+ struct extent *swd_ex;

+ struct vnode *swd_vp;

+ CIRCLEQ_ENTRY(swapdev) swd_next;

+#ifdef SWAP_TO_FILES

+ int swd_bsize;

+ int swd_maxactive;

+ struct buf swd_tab;

+ struct ucred *swd_cred;

+#endif

+};

+/*

+ * Swap device priority entry; the list is kept sorted on `spi_priority'.

+struct swappri {

+ int spi_priority;

+ CIRCLEQ_HEAD(spi_swapdev, swapdev) spi_swapdev;

+ LIST_ENTRY(swappri) spi_swappri;

+};

+/*

+ * The following two structures are used to keep track of data transfers

+ * on swap devices associated with regular files.

+ * NOTE: this code is more or less a copy of vnd.c; we use the same

+ * structure names here to ease porting..

+ */

+struct vndxfer {

+ struct buf *vx_bp; /* Pointer to parent buffer */

+ struct swapdev *vx_sdp;

+ int vx_error;

+ int vx_pending; /* # of pending aux buffers */

+ int vx_flags;

+#define VX_BUSY 1

+#define VX_DEAD 2

+};

+struct vndbuf {

+ struct buf vb_buf;

+ struct vndxfer *vb_xfer;

+};

-int nswap, nswdev;

-#ifdef SEQSWAP

-int niswdev; /* number of interleaved swap devices */

-int niswap; /* size of interleaved swap area */

+/* To get from a buffer to the encapsulating vndbuf */

+#define BUF_TO_VNDBUF(bp) \

+ ((struct vndbuf *)((long)bp - ((long)&((struct vndbuf *)0)->vb_buf)))

+/* vnd macro stuff, rewritten to use malloc()/free() */

+#define getvndxfer() \

+ (struct vndxfer *)malloc(sizeof(struct vndxfer), M_VMSWAP, M_WAITOK);

+#define putvndxfer(vnx) \

+ free(vnx, M_VMSWAP)

+#define getvndbuf() \

+ (struct vndbuf *)malloc(sizeof(struct vndbuf), M_VMSWAP, M_WAITOK);

+#define putvndbuf(vbp) \

+ free(vbp, M_VMSWAP)

+int nswapdev;

+int swflags;

+struct extent *swapmap;

+LIST_HEAD(swap_priority, swappri) swap_priority;

+STATIC int swap_on __P((struct proc *, struct swapdev *));

+#ifdef SWAP_OFF_WORKS

+STATIC int swap_off __P((struct proc *, struct swapdev *));

#endif

+STATIC struct swapdev *swap_getsdpfromaddr __P((daddr_t));

+STATIC void swap_addmap __P((struct swapdev *, int));

-int swfree __P((struct proc *, int));

+#ifdef SWAP_TO_FILES

+STATIC void sw_reg_strategy __P((struct swapdev *, struct buf *, int));

+STATIC void sw_reg_iodone __P((struct buf *));

+STATIC void sw_reg_start __P((struct swapdev *));

+#endif

+STATIC void insert_swapdev __P((struct swapdev *, int));

+STATIC struct swapdev *find_swapdev __P((struct vnode *, int));

+STATIC void swaplist_trim __P((void));

+STATIC void swapmount __P((void));

+/*

+ * We use two locks to protect the swap device lists.

+ * The long-term lock is used only used to prevent races in

+ * concurrently executing swapctl(2) system calls.

+ */

+struct simplelock swaplist_lock;

+struct lock swaplist_change_lock;

- * Set up swap devices.

- * Initialize linked list of free swap

- * headers. These do not actually point

- * to buffers, but rather to pages that

- * are being swapped in and out.

+ * Insert a swap device on the priority list.

void

-swapinit()

+insert_swapdev(sdp, priority)

+ struct swapdev *sdp;

+ int priority;

{

- register int i;

- register struct buf *sp = swbuf;

- register struct proc *p = &proc0; /* XXX */

- struct swdevt *swp;

- int error;

+ struct swappri *spp, *pspp;

+again:

+ simple_lock(&swaplist_lock);

- * Count swap devices, and adjust total swap space available.

- * Some of the space will not be countable until later (dynamically

- * configurable devices) and some of the counted space will not be

- * available until a swapon() system call is issued, both usually

- * happen when the system goes multi-user.

- *

- * If using NFS for swap, swdevt[0] will already be bdevvp'd. XXX

- */

-#ifdef SEQSWAP

- nswdev = niswdev = 0;

- nswap = niswap = 0;

- /*

- * All interleaved devices must come first

+ * Find entry at or after which to insert the new device.

- for (swp = swdevt; swp->sw_dev != NODEV || swp->sw_vp != NULL; swp++) {

- if (swp->sw_flags & SW_SEQUENTIAL)

+ for (pspp = NULL, spp = swap_priority.lh_first; spp != NULL;

+ spp = spp->spi_swappri.le_next) {

+ if (priority <= spp->spi_priority)

break;

- niswdev++;

- if (swp->sw_nblks > niswap)

- niswap = swp->sw_nblks;

+ pspp = spp;

}

- niswap = roundup(niswap, dmmax);

- niswap *= niswdev;

- if (swdevt[0].sw_vp == NULL &&

- bdevvp(swdevt[0].sw_dev, &swdevt[0].sw_vp))

- panic("swapvp");

- /*

- * The remainder must be sequential

- */

- for ( ; swp->sw_dev != NODEV; swp++) {

- if ((swp->sw_flags & SW_SEQUENTIAL) == 0)

- panic("binit: mis-ordered swap devices");

- nswdev++;

- if (swp->sw_nblks > 0) {

- if (swp->sw_nblks % dmmax)

- swp->sw_nblks -= (swp->sw_nblks % dmmax);

- nswap += swp->sw_nblks;

+ if (spp == NULL || spp->spi_priority != priority) {

+ spp = (struct swappri *)

+ malloc(sizeof *spp, M_VMSWAP, M_NOWAIT);

+ if (spp == NULL) {

+ simple_unlock(&swaplist_lock);

+ tsleep((caddr_t)&lbolt, PSWP, "memory", 0);

+ goto again;

}

+ DPRINTF(VMSDB_SWFLOW,

+ ("sw: had to create a new swappri = %d\n", priority));

+ spp->spi_priority = priority;

+ CIRCLEQ_INIT(&spp->spi_swapdev);

+ if (pspp)

+ LIST_INSERT_AFTER(pspp, spp, spi_swappri);

+ else

+ LIST_INSERT_HEAD(&swap_priority, spp, spi_swappri);

}

- nswdev += niswdev;

- if (nswdev == 0)

- panic("swapinit");

- nswap += niswap;

-#else

- nswdev = 0;

- nswap = 0;

- for (swp = swdevt; swp->sw_dev != NODEV || swp->sw_vp != NULL; swp++) {

- nswdev++;

- if (swp->sw_nblks > nswap)

- nswap = swp->sw_nblks;

- }

- if (nswdev == 0)

- panic("swapinit");

- if (nswdev > 1)

- nswap = ((nswap + dmmax - 1) / dmmax) * dmmax;

- nswap *= nswdev;

- if (swdevt[0].sw_vp == NULL &&

- bdevvp(swdevt[0].sw_dev, &swdevt[0].sw_vp))

- panic("swapvp");

-#endif

- if (nswap == 0)

- printf("WARNING: no swap space found\n");

- else if ((error = swfree(p, 0)) == ENXIO)

- printf("WARNING: primary swap device not configured\n");

- else if (error) {

- printf("swfree errno %d\n", error); /* XXX */

- panic("swapinit swfree 0");

- }

+ /* Onto priority list */

+ CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next);

+ sdp->swd_priority = priority;

+ simple_unlock(&swaplist_lock);

- /*

- * Now set up swap buffer headers.

- */

- bswlist.b_actf = sp;

- for (i = 0; i < nswbuf - 1; i++, sp++) {

- sp->b_actf = sp + 1;

- sp->b_rcred = sp->b_wcred = p->p_ucred;

- sp->b_vnbufs.le_next = NOLIST;

+/*

+ * Find and optionally remove a swap device from the priority list.

+ */

+struct swapdev *

+find_swapdev(vp, remove)

+ struct vnode *vp;

+ int remove;

+ struct swapdev *sdp;

+ struct swappri *spp;

+ simple_lock(&swaplist_lock);

+ for (spp = swap_priority.lh_first; spp != NULL;

+ spp = spp->spi_swappri.le_next) {

+ for (sdp = spp->spi_swapdev.cqh_first;

+ sdp != (void *)&spp->spi_swapdev;

+ sdp = sdp->swd_next.cqe_next)

+ if (sdp->swd_vp == vp) {

+ if (remove)

+ CIRCLEQ_REMOVE(&spp->spi_swapdev, sdp,

+ swd_next);

+ simple_unlock(&swaplist_lock);

+ return (sdp);

+ }

}

- sp->b_rcred = sp->b_wcred = p->p_ucred;

- sp->b_vnbufs.le_next = NOLIST;

- sp->b_actf = NULL;

+ simple_unlock(&swaplist_lock);

+ return (NULL);

}

+/*

+ * Scan priority list for empty priority entries.

+ */

void

-swstrategy(bp)

- register struct buf *bp;

+swaplist_trim()

+ struct swappri *spp;

+ simple_lock(&swaplist_lock);

+restart:

+ for (spp = swap_priority.lh_first; spp != NULL;

+ spp = spp->spi_swappri.le_next) {

+ if (spp->spi_swapdev.cqh_first != (void *)&spp->spi_swapdev)

+ continue;

+ LIST_REMOVE(spp, spi_swappri);

+ free((caddr_t)spp, M_VMSWAP);

+ goto restart;

+ }

+ simple_unlock(&swaplist_lock);

+int

+sys_swapctl(p, v, retval)

+ struct proc *p;

+ void *v;

+ register_t *retval;

{

- int s, sz, off, seg, index;

- register struct swdevt *sp;

+ struct sys_swapctl_args /* {

+ syscallarg(int) cmd;

+ syscallarg(const void *) arg;

+ syscallarg(int) misc;

+ } */ *uap = (struct sys_swapctl_args *)v;

struct vnode *vp;

+ struct nameidata nd;

+ struct swappri *spp;

+ struct swapdev *sdp;

+ struct swapent *sep;

+ char userpath[PATH_MAX + 1];

+ int count, error, misc;

+ size_t len;

+ int priority;

- sz = howmany(bp->b_bcount, DEV_BSIZE);

- if (bp->b_blkno + sz > nswap) {

- bp->b_error = EINVAL;

- bp->b_flags |= B_ERROR;

- biodone(bp);

- return;

+ misc = SCARG(uap, misc);

+ DPRINTF(VMSDB_SWFLOW, ("entering sys_swapctl\n"));

+ /* how many swap devices */

+ if (SCARG(uap, cmd) == SWAP_NSWAP) {

+ DPRINTF(VMSDB_SWFLOW,("did SWAP_NSWAP: leaving sys_swapctl\n"));

+ *retval = nswapdev;

+ return (0);

}

- if (nswdev > 1) {

-#ifdef SEQSWAP

- if (bp->b_blkno < niswap) {

- if (niswdev > 1) {

- off = bp->b_blkno % dmmax;

- if (off+sz > dmmax) {

- bp->b_error = EINVAL;

- bp->b_flags |= B_ERROR;

- biodone(bp);

- return;

- }

- seg = bp->b_blkno / dmmax;

- index = seg % niswdev;

- seg /= niswdev;

- bp->b_blkno = seg*dmmax + off;

- } else

- index = 0;

- } else {

- register struct swdevt *swp;

- bp->b_blkno -= niswap;

- for (index = niswdev, swp = &swdevt[niswdev];

- swp->sw_dev != NODEV;

- swp++, index++) {

- if (bp->b_blkno < swp->sw_nblks)

- break;

- bp->b_blkno -= swp->sw_nblks;

- }

- if (swp->sw_dev == NODEV ||

- bp->b_blkno+sz > swp->sw_nblks) {

- bp->b_error = swp->sw_dev == NODEV ?

- ENODEV : EINVAL;

- bp->b_flags |= B_ERROR;

- biodone(bp);

- return;

+ /* stats on the swap devices. */

+ if (SCARG(uap, cmd) == SWAP_STATS) {

+ sep = (struct swapent *)SCARG(uap, arg);

+ count = 0;

+ error = lockmgr(&swaplist_change_lock, LK_SHARED, (void *)0, p);

+ if (error)

+ return (error);

+ for (spp = swap_priority.lh_first; spp != NULL;

+ spp = spp->spi_swappri.le_next) {

+ for (sdp = spp->spi_swapdev.cqh_first;

+ sdp != (void *)&spp->spi_swapdev && misc-- > 0;

+ sdp = sdp->swd_next.cqe_next, sep++, count++) {

+ /*

+ * We do not do NetBSD 1.3 compat call.

+ */

+ error = copyout((caddr_t)&sdp->swd_se,

+ (caddr_t)sep, sizeof(struct swapent));

+ if (error)

+ goto out;

}

+out:

+ (void)lockmgr(&swaplist_change_lock, LK_RELEASE, (void *)0, p);

+ if (error)

+ return (error);

+ DPRINTF(VMSDB_SWFLOW,("did SWAP_STATS: leaving sys_swapctl\n"));

+ *retval = count;

+ return (0);

+ }

+ if ((error = suser(p->p_ucred, &p->p_acflag)))

+ return (error);

+ if (SCARG(uap, arg) == NULL) {

+ /* XXX - interface - arg==NULL: miniroot */

+ vp = rootvp;

+ if (vget(vp, LK_EXCLUSIVE, p))

+ return (EBUSY);

+ if (SCARG(uap, cmd) == SWAP_ON &&

+ copystr("miniroot", userpath, sizeof userpath, &len))

+ panic("swapctl: miniroot copy failed");

+ } else {

+ int space;

+ char *where;

+ if (SCARG(uap, cmd) == SWAP_ON) {

+ if ((error = copyinstr(SCARG(uap, arg), userpath,

+ sizeof userpath, &len)))

+ return (error);

+ space = UIO_SYSSPACE;

+ where = userpath;

+ } else {

+ space = UIO_USERSPACE;

+ where = (char *)SCARG(uap, arg);

+ }

+ NDINIT(&nd, LOOKUP, FOLLOW|LOCKLEAF, space, where, p);

+ if ((error = namei(&nd)))

+ return (error);

+ vp = nd.ni_vp;

+ }

+ error = lockmgr(&swaplist_change_lock, LK_EXCLUSIVE, (void *)0, p);

+ if (error)

+ goto bad2;

+ switch(SCARG(uap, cmd)) {

+ case SWAP_CTL:

+ priority = SCARG(uap, misc);

+ if ((sdp = find_swapdev(vp, 1)) == NULL) {

+ error = ENOENT;

+ break;

+ }

+ insert_swapdev(sdp, priority);

+ swaplist_trim();

+ break;

+ case SWAP_ON:

+ priority = SCARG(uap, misc);

+ /* Check for duplicates */

+ if ((sdp = find_swapdev(vp, 0)) != NULL) {

+ if (!bcmp(sdp->swd_path, "swap_device", 12)) {

+ copystr(userpath, sdp->swd_path, len, 0);

+ error = 0;

+ } else

+ error = EBUSY;

+ goto bad;

+ }

+ sdp = (struct swapdev *)

+ malloc(sizeof *sdp, M_VMSWAP, M_WAITOK);

+ bzero(sdp, sizeof(*sdp));

+ sdp->swd_vp = vp;

+ sdp->swd_dev = (vp->v_type == VBLK) ? vp->v_rdev : NODEV;

+ if ((error = swap_on(p, sdp)) != 0) {

+ free((caddr_t)sdp, M_VMSWAP);

+ break;

+ }

+#ifdef SWAP_TO_FILES

+ /*

+ * XXX Is NFS elaboration necessary?

+ */

+ if (vp->v_type == VREG)

+ sdp->swd_cred = crdup(p->p_ucred);

+#endif

+ if (copystr(userpath, sdp->swd_path, len, 0) != 0)

+ panic("swapctl: copystr");

+ insert_swapdev(sdp, priority);

+ /* Keep reference to vnode */

+ vref(vp);

+ break;

+ case SWAP_OFF:

+ DPRINTF(VMSDB_SWFLOW, ("doing SWAP_OFF...\n"));

+#ifdef SWAP_OFF_WORKS

+ if ((sdp = find_swapdev(vp, 0)) == NULL) {

+ error = ENXIO;

+ break;

+ }

+ /*

+ * If a device isn't in use or enabled, we

+ * can't stop swapping from it (again).

+ */

+ if ((sdp->swd_flags &

+ (SWF_INUSE|SWF_ENABLE)) == 0) {

+ error = EBUSY;

+ goto bad;

+ }

+ if ((error = swap_off(p, sdp)) != 0)

+ goto bad;

+ /* Find again and remove this time */

+ if ((sdp = find_swapdev(vp, 1)) == NULL) {

+ error = ENXIO;

+ break;

+ }

+ free((caddr_t)sdp, M_VMSWAP);

#else

- off = bp->b_blkno % dmmax;

- if (off+sz > dmmax) {

- bp->b_error = EINVAL;

- bp->b_flags |= B_ERROR;

- biodone(bp);

- return;

+ error = ENODEV;

+#endif

+ break;

+ default:

+ DPRINTF(VMSDB_SWFLOW,

+ ("unhandled command: %x\n", SCARG(uap, cmd)));

+ error = EINVAL;

+ }

+bad:

+ (void)lockmgr(&swaplist_change_lock, LK_RELEASE, (void *)0, p);

+bad2:

+ vput(vp);

+ DPRINTF(VMSDB_SWFLOW, ("leaving sys_swapctl: error %d\n", error));

+ return (error);

+/*

+ * swap_on() attempts to begin swapping on a swapdev. we check that this

+ * device is OK to swap from, miss the start of any disk (to avoid any

+ * disk labels that may exist).

+ */

+STATIC int

+swap_on(p, sdp)

+ struct proc *p;

+ struct swapdev *sdp;

+ static int count = 0;

+ struct vnode *vp = sdp->swd_vp;

+ int error, nblks, size;

+ long addr;

+ char *storage;

+ int storagesize;

+#ifdef SWAP_TO_FILES

+ struct vattr va;

+#endif

+#ifdef NFS

+ extern int (**nfsv2_vnodeop_p) __P((void *));

+#endif /* NFS */

+ dev_t dev = sdp->swd_dev;

+ char *name;

+ /* If root on swap, then the skip open/close operations. */

+ if (vp != rootvp) {

+ if ((error = VOP_OPEN(vp, FREAD|FWRITE, p->p_ucred, p)))

+ return (error);

+ vp->v_writecount++;

+ }

+ DPRINTF(VMSDB_INFO,

+ ("swap_on: dev = %d, major(dev) = %d\n", dev, major(dev)));

+ switch (vp->v_type) {

+ case VBLK:

+ if (bdevsw[major(dev)].d_psize == 0 ||

+ (nblks = (*bdevsw[major(dev)].d_psize)(dev)) == -1) {

+ error = ENXIO;

+ goto bad;

}

- seg = bp->b_blkno / dmmax;

- index = seg % nswdev;

- seg /= nswdev;

- bp->b_blkno = seg*dmmax + off;

+ break;

+#ifdef SWAP_TO_FILES

+ case VREG:

+ if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)))

+ goto bad;

+ nblks = (int)btodb(va.va_size);

+ if ((error =

+ VFS_STATFS(vp->v_mount, &vp->v_mount->mnt_stat, p)) != 0)

+ goto bad;

+ sdp->swd_bsize = vp->v_mount->mnt_stat.f_iosize;

+#ifdef NFS

+ if (vp->v_op == nfsv2_vnodeop_p)

+ sdp->swd_maxactive = 2; /* XXX */

+ else

+#endif /* NFS */

+ sdp->swd_maxactive = 8; /* XXX */

+ break;

#endif

- } else

- index = 0;

- sp = &swdevt[index];

- if (sp->sw_vp == NULL) {

- bp->b_error = ENODEV;

- bp->b_flags |= B_ERROR;

- biodone(bp);

- return;

+ default:

+ error = ENXIO;

+ goto bad;

}

- if ((bp->b_dev = sp->sw_dev) == NODEV && sp->sw_vp->v_type != VREG)

- panic("swstrategy");

- VHOLD(sp->sw_vp);

- s = splbio();

- if ((bp->b_flags & B_READ) == 0) {

- if ((vp = bp->b_vp) != NULL) {

- vp->v_numoutput--;

- if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {

- vp->v_flag &= ~VBWAIT;

- wakeup((caddr_t)&vp->v_numoutput);

+ if (nblks == 0) {

+ DPRINTF(VMSDB_SWFLOW, ("swap_on: nblks == 0\n"));

+ error = EINVAL;

+ goto bad;

+ }

+ sdp->swd_flags |= SWF_INUSE;

+ sdp->swd_nblks = nblks;

+ /*

+ * skip over first cluster of a device in case of labels or

+ * boot blocks.

+ */

+ if (vp->v_type == VBLK) {

+ size = (int)(nblks - ctod(CLSIZE));

+ addr = (long)ctod(CLSIZE);

+ } else {

+ size = (int)nblks;

+ addr = (long)0;

+ }

+ DPRINTF(VMSDB_SWON,

+ ("swap_on: dev %x: size %d, addr %ld\n", dev, size, addr));

+ name = malloc(12, M_VMSWAP, M_WAITOK);

+ sprintf(name, "swap0x%04x", count++);

+ /* XXX make this based on ram as well. */

+ storagesize = EXTENT_FIXED_STORAGE_SIZE(maxproc * 2);

+ storage = malloc(storagesize, M_VMSWAP, M_WAITOK);

+ sdp->swd_ex = extent_create(name, 0, nblks, M_VMSWAP,

+ storage, storagesize, EX_WAITOK);

+ if (addr) {

+ if (extent_alloc_region(sdp->swd_ex, 0, addr, EX_WAITOK))

+ panic("disklabel region");

+ sdp->swd_inuse += addr;

+ }

+ if (vp == rootvp) {

+ struct mount *mp;

+ struct statfs *sp;

+ int rootblks;

+ /* Get size from root FS (mountroot did statfs) */

+ mp = rootvnode->v_mount;

+ sp = &mp->mnt_stat;

+ rootblks = sp->f_blocks * (sp->f_bsize / DEV_BSIZE);

+ if (rootblks > nblks)

+ panic("miniroot size");

+ if (extent_alloc_region(sdp->swd_ex, addr, rootblks, EX_WAITOK))

+ panic("miniroot region");

+ printf("Preserved %d blocks, leaving %d pages of swap\n",

+ rootblks, dtoc(size - rootblks));

+ }

+ swap_addmap(sdp, size);

+ nswapdev++;

+ sdp->swd_flags |= SWF_ENABLE;

+ return (0);

+bad:

+ if (vp != rootvp) {

+ vp->v_writecount--;

+ (void)VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p);

+ }

+ return (error);

+#ifdef SWAP_OFF_WORKS

+STATIC int

+swap_off(p, sdp)

+ struct proc *p;

+ struct swapdev *sdp;

+ char *name;

+ /* turn off the enable flag */

+ sdp->swd_flags &= ~SWF_ENABLE;

+ DPRINTF(VMSDB_SWOFF, ("swap_off: %x\n", sdp->swd_dev));

+ /*

+ * XXX write me

+ *

+ * the idea is to find out which processes are using this swap

+ * device, and page them all in.

+ *

+ * eventually, we should try to move them out to other swap areas

+ * if available.

+ *

+ * The alternative is to create a redirection map for this swap

+ * device. This should work by moving all the pages of data from

+ * the ex-swap device to another one, and making an entry in the

+ * redirection map for it. locking is going to be important for

+ * this!

+ *

+ * There might be an easier way to do a "soft" swapoff. First

+ * we mark the particular swap partition as not desirable anymore.

+ * Then we use the pager to page a couple of pages in, each time

+ * it has the memory, and the chance to do so. Thereby moving pages

+ * back into memory. Once they are in memory, when they get paged

+ * out again, they do not go back onto the "undesirable" device

+ * anymore, but to good devices. This might take longer, but it

+ * can certainly work. If need be, the user process can sleep on

+ * the particular sdp entry, and the swapper can then wake him up

+ * when everything is done.

+ */

+ /* until the above code is written, we must ENODEV */

+ return ENODEV;

+ extent_free(swapmap, sdp->swd_mapoffset, sdp->swd_mapsize, EX_WAITOK);

+ nswapdev--;

+ name = sdp->swd_ex->ex_name;

+ extent_destroy(sdp->swd_ex);

+ free(name, M_VMSWAP);

+ free((caddr_t)sdp->swd_ex, M_VMSWAP);

+ if (sdp->swp_vp != rootvp) {

+ vp->v_writecount--;

+ (void) VOP_CLOSE(sdp->swd_vp, FREAD|FWRITE, p->p_ucred, p);

+ }

+ if (sdp->swd_vp)

+ vrele(sdp->swd_vp);

+ free((caddr_t)sdp, M_VMSWAP);

+ return (0);

+#endif

+/*

+ * To decide where to allocate what part of swap, we must "round robin"

+ * the swap devices in swap_priority of the same priority until they are

+ * full. we do this with a list of swap priorities that have circle

+ * queues of swapdevs.

+ *

+ * The following functions control allocation and freeing of part of the

+ * swap area. you call swap_alloc() with a size and it returns an address.

+ * later you call swap_free() and it frees the use of that swap area.

+ *

+ * daddr_t swap_alloc(int size);

+ * void swap_free(int size, daddr_t addr);

+ */

+daddr_t

+swap_alloc(size)

+ int size;

+ struct swapdev *sdp;

+ struct swappri *spp;

+ u_long result;

+ if (nswapdev < 1)

+ return 0;

+ simple_lock(&swaplist_lock);

+ for (spp = swap_priority.lh_first; spp != NULL;

+ spp = spp->spi_swappri.le_next) {

+ for (sdp = spp->spi_swapdev.cqh_first;

+ sdp != (void *)&spp->spi_swapdev;

+ sdp = sdp->swd_next.cqe_next) {

+ /* if it's not enabled, then we can't swap from it */

+ if ((sdp->swd_flags & SWF_ENABLE) == 0 ||

+ /* XXX IS THIS CORRECT ? */

+#if 1

+ (sdp->swd_inuse + size > sdp->swd_nblks) ||

+#endif

+ extent_alloc(sdp->swd_ex, size, EX_NOALIGN,

+ EX_NOBOUNDARY, EX_MALLOCOK|EX_NOWAIT,

+ &result) != 0) {

+ continue;

}

+ CIRCLEQ_REMOVE(&spp->spi_swapdev, sdp, swd_next);

+ CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next);

+ sdp->swd_inuse += size;

+ simple_unlock(&swaplist_lock);

+ return (daddr_t)(result + sdp->swd_mapoffset);

}

- sp->sw_vp->v_numoutput++;

}

- if (bp->b_vp != NULL)

- brelvp(bp);

- splx(s);

- bp->b_vp = sp->sw_vp;

- VOP_STRATEGY(bp);

+ simple_unlock(&swaplist_lock);

+ return 0;

+void

+swap_free(size, addr)

+ int size;

+ daddr_t addr;

+ struct swapdev *sdp = swap_getsdpfromaddr(addr);

+#ifdef DIAGNOSTIC

+ if (sdp == NULL)

+ panic("swap_free: unmapped address\n");

+ if (nswapdev < 1)

+ panic("swap_free: nswapdev < 1\n");

+#endif

+ extent_free(sdp->swd_ex, addr - sdp->swd_mapoffset, size,

+ EX_MALLOCOK|EX_NOWAIT);

+ sdp->swd_inuse -= size;

+#ifdef DIAGNOSTIC

+ if (sdp->swd_inuse < 0)

+ panic("swap_free: inuse < 0");

+#endif

+/*

+ * We have a physical -> virtual mapping to address here. There are several

+ * different physical address spaces (one for each swap partition) that are

+ * to be mapped onto a single virtual address space.

+ */

+#define ADDR_IN_MAP(addr, sdp) \

+ (((addr) >= (sdp)->swd_mapoffset) && \

+ ((addr) < ((sdp)->swd_mapoffset + (sdp)->swd_mapsize)))

+struct swapdev *

+swap_getsdpfromaddr(addr)

+ daddr_t addr;

+ struct swapdev *sdp;

+ struct swappri *spp;

+ simple_lock(&swaplist_lock);

+ for (spp = swap_priority.lh_first; spp != NULL;

+ spp = spp->spi_swappri.le_next)

+ for (sdp = spp->spi_swapdev.cqh_first;

+ sdp != (void *)&spp->spi_swapdev;

+ sdp = sdp->swd_next.cqe_next)

+ if (ADDR_IN_MAP(addr, sdp)) {

+ simple_unlock(&swaplist_lock);

+ return sdp;

+ }

+ simple_unlock(&swaplist_lock);

+ return NULL;

+void

+swap_addmap(sdp, size)

+ struct swapdev *sdp;

+ int size;

+ u_long result;

+ if (extent_alloc(swapmap, size, EX_NOALIGN, EX_NOBOUNDARY,

+ EX_WAITOK, &result))

+ panic("swap_addmap");

+ sdp->swd_mapoffset = result;

+ sdp->swd_mapsize = size;

}

/*ARGSUSED*/

@@ -284,232 +855,408 @@ swwrite(dev, uio, ioflag)

return (physio(swstrategy, NULL, dev, B_WRITE, minphys, uio));

}

-/*

- * System call swapon(name) enables swapping on device name,

- * which must be in the swdevsw. Return EBUSY

- * if already swapping on this device.

- */

-/* ARGSUSED */

-int

-sys_swapon(p, v, retval)

- struct proc *p;

- void *v;

- register_t *retval;

+void

+swstrategy(bp)

+ struct buf *bp;

{

- struct sys_swapon_args /* {

- syscallarg(char *) name;

- } */ *uap = v;

- register struct vnode *vp;

- register struct swdevt *sp;

- dev_t dev;

- int error;

- struct nameidata nd;

+ struct swapdev *sdp;

+ struct vnode *vp;

+ daddr_t bn;

- if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)

- return (error);

- NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, name), p);

- if ((error = namei(&nd)) != 0)

- return (error);

- vp = nd.ni_vp;

- if (vp->v_type != VBLK) {

- vrele(vp);

- return (ENOTBLK);

- }

- dev = (dev_t)vp->v_rdev;

- if (major(dev) >= nblkdev) {

- vrele(vp);

- return (ENXIO);

+ bn = bp->b_blkno;

+ sdp = swap_getsdpfromaddr(bn);

+ if (sdp == NULL) {

+ bp->b_error = EINVAL;

+ bp->b_flags |= B_ERROR;

+ biodone(bp);

+ return;

}

- for (sp = &swdevt[0]; sp->sw_dev != NODEV; sp++) {

- if (sp->sw_dev == dev) {

- if (sp->sw_flags & SW_FREED) {

- vrele(vp);

- return (EBUSY);

- }

- sp->sw_vp = vp;

- if ((error = swfree(p, sp - swdevt)) != 0) {

- vrele(vp);

- return (error);

- }

- return (0);

+ bn -= sdp->swd_mapoffset;

+ DPRINTF(VMSDB_SWFLOW,

+ ("swstrategy(%s): mapoff %x, bn %x, bcount %ld\n",

+ ((bp->b_flags & B_READ) == 0) ? "write" : "read",

+ sdp->swd_mapoffset, bn, bp->b_bcount));

+ switch (sdp->swd_vp->v_type) {

+ default:

+ panic("swstrategy: vnode type %x", sdp->swd_vp->v_type);

+ case VBLK:

+ bp->b_blkno = bn + ctod(CLSIZE);

+ vp = sdp->swd_vp;

+ bp->b_dev = sdp->swd_dev;

+ VHOLD(vp);

+ if ((bp->b_flags & B_READ) == 0) {

+ int s = splbio();

+ vwakeup(bp);

+ vp->v_numoutput++;

+ splx(s);

}

-#ifdef SEQSWAP

- /*

- * If we have reached a non-freed sequential device without

- * finding what we are looking for, it is an error.

- * That is because all interleaved devices must come first

- * and sequential devices must be freed in order.

- */

- if ((sp->sw_flags & (SW_SEQUENTIAL|SW_FREED)) == SW_SEQUENTIAL)

- break;

+ if (bp->b_vp != NULL)

+ brelvp(bp);

+ bp->b_vp = vp;

+ VOP_STRATEGY(bp);

+ return;

+#ifdef SWAP_TO_FILES

+ case VREG:

+ sw_reg_strategy(sdp, bp, bn);

+ return;

#endif

}

- vrele(vp);

- return (EINVAL);

+ /* NOTREACHED */

}

-/*

- * Swfree(index) frees the index'th portion of the swap map.

- * Each of the nswdev devices provides 1/nswdev'th of the swap

- * space, which is laid out with blocks of dmmax pages circularly

- * among the devices.

- */

-int

-swfree(p, index)

- struct proc *p;

- int index;

+#ifdef SWAP_TO_FILES

+STATIC void

+sw_reg_strategy(sdp, bp, bn)

+ struct swapdev *sdp;

+ struct buf *bp;

+ int bn;

{

- register struct swdevt *sp;

- register swblk_t vsbase;

- register long blk;

- struct vnode *vp;

- register swblk_t dvbase;

- register int nblks;

- int error;

+ struct vnode *vp;

+ struct vndxfer *vnx;

+ daddr_t nbn;

+ caddr_t addr;

+ int s, off, nra, error, sz, resid;

- sp = &swdevt[index];

- vp = sp->sw_vp;

- /* If root on swap, then the skip open/close operations. */

- if (vp != rootvp) {

- if ((error = VOP_OPEN(vp, FREAD|FWRITE, p->p_ucred, p)) != 0)

- return (error);

- }

- sp->sw_flags |= SW_FREED;

- nblks = sp->sw_nblks;

- * Some devices may not exist til after boot time.

- * If so, their nblk count will be 0.

+ * Translate the device logical block numbers into physical

+ * block numbers of the underlying filesystem device.

- if (nblks <= 0) {

- int perdev;

- dev_t dev = sp->sw_dev;

+ bp->b_resid = bp->b_bcount;

+ addr = bp->b_data;

+ bn = dbtob(bn);

- if (bdevsw[major(dev)].d_psize == 0 ||

- (nblks = (*bdevsw[major(dev)].d_psize)(dev)) == -1) {

- if (vp != rootvp)

- (void) VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p);

- sp->sw_flags &= ~SW_FREED;

- return (ENXIO);

+ /* Allocate a header for this transfer and link it to the buffer */

+ vnx = getvndxfer();

+ vnx->vx_flags = VX_BUSY;

+ vnx->vx_error = 0;

+ vnx->vx_pending = 0;

+ vnx->vx_bp = bp;

+ vnx->vx_sdp = sdp;

+ error = 0;

+ for (resid = bp->b_resid; resid; resid -= sz) {

+ struct vndbuf *nbp;

+ nra = 0;

+ error = VOP_BMAP(sdp->swd_vp, bn / sdp->swd_bsize,

+ &vp, &nbn, &nra);

+ if (error == 0 && (long)nbn == -1)

+ error = EIO;

+ /*

+ * If there was an error or a hole in the file...punt.

+ * Note that we may have to wait for any operations

+ * that we have already fired off before releasing

+ * the buffer.

+ *

+ * XXX we could deal with holes here but it would be

+ * a hassle (in the write case).

+ */

+ if (error) {

+ s = splbio();

+ vnx->vx_error = error;

+ goto out;

+ }

+ if ((off = bn % sdp->swd_bsize) != 0)

+ sz = sdp->swd_bsize - off;

+ else

+ sz = (1 + nra) * sdp->swd_bsize;

+ if (resid < sz)

+ sz = resid;

+ DPRINTF(VMSDB_SWFLOW,

+ ("sw_reg_strategy: vp %p/%p bn 0x%x/0x%x"

+ " sz 0x%x\n", sdp->swd_vp, vp, bn, nbn, sz));

+ nbp = getvndbuf();

+ nbp->vb_buf.b_flags = bp->b_flags | B_NOCACHE | B_CALL;

+ nbp->vb_buf.b_bcount = sz;

+ nbp->vb_buf.b_bufsize = bp->b_bufsize;

+ nbp->vb_buf.b_error = 0;

+ nbp->vb_buf.b_data = addr;

+ nbp->vb_buf.b_blkno = nbn + btodb(off);

+ nbp->vb_buf.b_proc = bp->b_proc;

+ nbp->vb_buf.b_iodone = sw_reg_iodone;

+ nbp->vb_buf.b_vp = NULLVP;

+ nbp->vb_buf.b_rcred = sdp->swd_cred;

+ nbp->vb_buf.b_wcred = sdp->swd_cred;

+ if (bp->b_dirtyend == 0) {

+ nbp->vb_buf.b_dirtyoff = 0;

+ nbp->vb_buf.b_dirtyend = sz;

+ } else {

+ nbp->vb_buf.b_dirtyoff =

+ max(0, bp->b_dirtyoff - (bp->b_bcount-resid));

+ nbp->vb_buf.b_dirtyend =

+ min(sz,

+ max(0, bp->b_dirtyend - (bp->b_bcount-resid)));

}

-#ifdef SEQSWAP

- if (index < niswdev) {

- perdev = niswap / niswdev;

- if (nblks > perdev)

- nblks = perdev;

+ if (bp->b_validend == 0) {

+ nbp->vb_buf.b_validoff = 0;

+ nbp->vb_buf.b_validend = sz;

} else {

- if (nblks % dmmax)

- nblks -= (nblks % dmmax);

- nswap += nblks;

+ nbp->vb_buf.b_validoff =

+ max(0, bp->b_validoff - (bp->b_bcount-resid));

+ nbp->vb_buf.b_validend =

+ min(sz,

+ max(0, bp->b_validend - (bp->b_bcount-resid)));

}

-#else

- if (nswap > 0) {

- perdev = nswap / nswdev;

- if (nblks > perdev)

- nblks = perdev;

- } else

- nswap = nblks;

-#endif

- sp->sw_nblks = nblks;

+ nbp->vb_xfer = vnx;

+ /*

+ * Just sort by block number

+ */

+ nbp->vb_buf.b_cylinder = nbp->vb_buf.b_blkno;

+ s = splbio();

+ if (vnx->vx_error != 0) {

+ putvndbuf(nbp);

+ goto out;

+ }

+ vnx->vx_pending++;

+ bgetvp(vp, &nbp->vb_buf);

+ disksort(&sdp->swd_tab, &nbp->vb_buf);

+ sw_reg_start(sdp);

+ splx(s);

+ bn += sz;

+ addr += sz;

}

- if (nblks == 0) {

- if (vp != rootvp)

- (void) VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p);

- sp->sw_flags &= ~SW_FREED;

- return (0); /* XXX error? */

+ s = splbio();

+out: /* Arrive here at splbio */

+ vnx->vx_flags &= ~VX_BUSY;

+ if (vnx->vx_pending == 0) {

+ if (vnx->vx_error != 0) {

+ bp->b_error = vnx->vx_error;

+ bp->b_flags |= B_ERROR;

+ }

+ putvndxfer(vnx);

+ biodone(bp);

}

-#ifdef SEQSWAP

- if (sp->sw_flags & SW_SEQUENTIAL) {

- register struct swdevt *swp;

- blk = niswap;

- for (swp = &swdevt[niswdev]; swp != sp; swp++)

- blk += swp->sw_nblks;

- rmfree(swapmap, nblks, blk);

- return (0);

+ splx(s);

+/*

+ * Feed requests sequentially.

+ * We do it this way to keep from flooding NFS servers if we are connected

+ * to an NFS file. This places the burden on the client rather than the

+ * server.

+ */

+STATIC void

+sw_reg_start(sdp)

+ struct swapdev *sdp;

+ struct buf *bp;

+ if ((sdp->swd_flags & SWF_BUSY) != 0)

+ /* Recursion control */

+ return;

+ sdp->swd_flags |= SWF_BUSY;

+ while (sdp->swd_tab.b_active < sdp->swd_maxactive) {

+ bp = sdp->swd_tab.b_actf;

+ if (bp == NULL)

+ break;

+ sdp->swd_tab.b_actf = bp->b_actf;

+ sdp->swd_tab.b_active++;

+ DPRINTF(VMSDB_SWFLOW,

+ ("sw_reg_start: bp %p vp %p blkno %x addr %p cnt %lx\n",

+ bp, bp->b_vp, bp->b_blkno,bp->b_data, bp->b_bcount));

+ if ((bp->b_flags & B_READ) == 0)

+ bp->b_vp->v_numoutput++;

+ VOP_STRATEGY(bp);

}

-#endif

- for (dvbase = 0; dvbase < nblks; dvbase += dmmax) {

- blk = nblks - dvbase;

-#ifdef SEQSWAP

- if ((vsbase = index*dmmax + dvbase*niswdev) >= niswap)

- panic("swfree");

-#else

- if ((vsbase = index*dmmax + dvbase*nswdev) >= nswap)

- panic("swfree");

-#endif

- if (blk > dmmax)

- blk = dmmax;

- if (vsbase == 0) {

- /*

- * First of all chunks... initialize the swapmap.

- * Don't use the first cluster of the device

- * in case it starts with a label or boot block.

- */

- rminit(swapmap, blk - ctod(btoc(SWAPSKIPBYTES)),

- vsbase + ctod(btoc(SWAPSKIPBYTES)), "swap", nswapmap);

- } else if (dvbase == 0) {

- /*

- * Don't use the first cluster of the device

- * in case it starts with a label or boot block.

- */

- rmfree(swapmap, blk - ctod(btoc(SWAPSKIPBYTES)),

- vsbase + ctod(btoc(SWAPSKIPBYTES)));

- } else

- rmfree(swapmap, blk, vsbase);

+ sdp->swd_flags &= ~SWF_BUSY;

+STATIC void

+sw_reg_iodone(bp)

+ struct buf *bp;

+ register struct vndbuf *vbp = BUF_TO_VNDBUF(bp);

+ register struct vndxfer *vnx = (struct vndxfer *)vbp->vb_xfer;

+ register struct buf *pbp = vnx->vx_bp;

+ struct swapdev *sdp = vnx->vx_sdp;

+ int s, resid;

+ DPRINTF(VMSDB_SWFLOW,

+ ("sw_reg_iodone: vbp %p vp %p blkno %x addr %p "

+ "cnt %lx(%lx)\n",

+ vbp, vbp->vb_buf.b_vp, vbp->vb_buf.b_blkno,

+ vbp->vb_buf.b_data, vbp->vb_buf.b_bcount,

+ vbp->vb_buf.b_resid));

+ s = splbio();

+ resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid;

+ pbp->b_resid -= resid;

+ vnx->vx_pending--;

+ if (vbp->vb_buf.b_error) {

+ DPRINTF(VMSDB_INFO, ("sw_reg_iodone: vbp %p error %d\n", vbp,

+ vbp->vb_buf.b_error));

+ vnx->vx_error = vbp->vb_buf.b_error;

}

+ if (vbp->vb_buf.b_vp != NULLVP)

+ brelvp(&vbp->vb_buf);

+ putvndbuf(vbp);

- * Preserve the mini-root if appropriate:

- * Note: this requires !SEQSWAP && nswdev==1

- *

- * A mini-root gets copied into the front of the swap

- * and we run over top of the swap area just long

- * enough for us to do a mkfs and restor of the real

- * root (sure beats rewriting standalone restor).

+ * Wrap up this transaction if it has run to completion or, in

+ * case of an error, when all auxiliary buffers have returned.

- if (vp == rootvp) {

-#ifndef MINIROOTSIZE

- struct mount *mp;

- struct statfs *sp;

-#endif

- long firstblk;

- int rootblks;

+ if (vnx->vx_error != 0) {

+ pbp->b_flags |= B_ERROR;

+ pbp->b_error = vnx->vx_error;

+ if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) {

-#ifdef MINIROOTSIZE

- rootblks = MINIROOTSIZE;

-#else

- /* Get size from root FS (mountroot did statfs) */

- mp = rootvnode->v_mount;

- sp = &mp->mnt_stat;

- rootblks = sp->f_blocks * (sp->f_bsize / DEV_BSIZE);

+ DPRINTF(VMSDB_SWFLOW,

+ ("swiodone: pbp %p iodone: error %d\n",

+ pbp, vnx->vx_error));

+ putvndxfer(vnx);

+ biodone(pbp);

+ }

+ } else if (pbp->b_resid == 0) {

+#ifdef DIAGNOSTIC

+ if (vnx->vx_pending != 0)

+ panic("swiodone: vnx pending: %d", vnx->vx_pending);

#endif

- if (rootblks > nblks)

- panic("swfree miniroot size");

- /* First ctod(btoc(SWAPSKIPBYTES)) blocks are not in the map. */

- firstblk = rmalloc(swapmap, rootblks - ctod(btoc(SWAPSKIPBYTES)));

- if (firstblk != ctod(btoc(SWAPSKIPBYTES)))

- panic("swfree miniroot save");

- printf("Preserved %d blocks of miniroot leaving %d pages of swap\n",

- rootblks, dtoc(nblks - rootblks));

+ if ((vnx->vx_flags & VX_BUSY) == 0) {

+ DPRINTF(VMSDB_SWFLOW,

+ ("swiodone: pbp %p iodone\n", pbp));

+ putvndxfer(vnx);

+ biodone(pbp);

+ }

}

- return (0);

+ sdp->swd_tab.b_active--;

+ sw_reg_start(sdp);

+ splx(s);

}

+#endif /* SWAP_TO_FILES */

-int

-sys_omsync(p, v, retval)

- struct proc *p;

- void *v;

- register_t *retval;

+void

+swapinit()

{

- struct sys_msync_args ua;

- struct sys_omsync_args /* {

- syscallarg(caddr_t) addr;

- syscallarg(size_t) len;

- } */ *uap = v;

- SCARG(&ua, addr) = SCARG(uap, addr);;

- SCARG(&ua, len) = SCARG(uap, len);;

- SCARG(&ua, flags) = MS_SYNC | MS_INVALIDATE;

- return (sys_msync(p, &ua, retval));

+ struct buf *sp = swbuf;

+ struct proc *p = &proc0; /* XXX */

+ int i;

+ DPRINTF(VMSDB_SWINIT, ("swapinit\n"));

+ nswapdev = 0;

+ if (bdevvp(swapdev, &swapdev_vp))

+ panic("swapinit: can not setup swapdev_vp");

+ simple_lock_init(&swaplist_lock);

+ lockinit(&swaplist_change_lock, PSWP, "swap change", 0, 0);

+ LIST_INIT(&swap_priority);

+ /*

+ * Create swap block resource map. The range [1..INT_MAX] allows

+ * for a grand total of 2 gigablocks of swap resource.

+ * (start at 1 because "block #0" will be interpreted as

+ * an allocation failure).

+ */

+ swapmap = extent_create("swapmap", 1, INT_MAX,

+ M_VMSWAP, 0, 0, EX_WAITOK);

+ if (swapmap == 0)

+ panic("swapinit: extent_create failed");

+ /*

+ * Now set up swap buffer headers.

+ */

+ bswlist.b_actf = sp;

+ for (i = 0; i < nswbuf - 1; i++, sp++) {

+ sp->b_actf = sp + 1;

+ sp->b_rcred = sp->b_wcred = p->p_ucred;

+ sp->b_vnbufs.le_next = NOLIST;

+ }

+ sp->b_rcred = sp->b_wcred = p->p_ucred;

+ sp->b_vnbufs.le_next = NOLIST;

+ sp->b_actf = NULL;

+ /* Mount primary swap if available */

+#ifdef SWAPDEBUG

+ if(vmswap_domount)

+#endif

+ swapmount();

+ DPRINTF(VMSDB_SWINIT, ("leaving swapinit\n"));

+/*

+ * Mount the primary swap device pointed to by 'swdevt[0]'.

+ */

+STATIC void

+swapmount()

+ extern int getdevvp(dev_t, struct vnode **, enum vtype);

+ struct swapdev *sdp;

+ struct vnode *vp = NULL;

+ struct proc *p = curproc;

+ dev_t swap_dev = swdevt[0].sw_dev;

+ /* Make sure we have a device */

+ if (swap_dev == NODEV) {

+ printf("swapmount: No swap device!\n");

+ return;

+ }

+ /* Malloc needed things */

+ sdp = (struct swapdev *)malloc(sizeof *sdp, M_VMSWAP, M_WAITOK);

+ bzero(sdp, sizeof(*sdp));

+ /* Do swap_on() stuff */

+ if(bdevvp(swap_dev, &vp)){

+ printf("swapmount: bdevvp() failed\n");

+ return;

+ }

+#ifdef SWAPDEBUG

+ vprint("swapmount", vp);

+#endif

+ sdp->swd_vp = vp;

+ sdp->swd_dev = (vp->v_type == VBLK) ? vp->v_rdev : NODEV;

+ if(copystr("swap_device", sdp->swd_path, sizeof sdp->swd_path, 0) != 0){

+ printf("swapmount: copystr() failed\n");

+ return;

+ }

+ /* Look for a swap device */

+ printf("Adding swap(%d, %d):", major(swap_dev), minor(swap_dev));

+ if (swap_on(p, sdp) != 0) {

+ printf(" failed!\n");

+ free((caddr_t)sdp, M_VMSWAP);

+ return;

+ } else

+ printf(" done.\n");

+#ifdef SWAP_TO_FILES

+ /*

+ * XXX Is NFS elaboration necessary?

+ */

+ if (vp->v_type == VREG)

+ sdp->swd_cred = crdup(p->p_ucred);

+#endif

+ insert_swapdev(sdp, 0);

}