diff options
author | Tobias Weingartner <weingart@cvs.openbsd.org> | 1999-05-22 21:22:35 +0000 |
---|---|---|
committer | Tobias Weingartner <weingart@cvs.openbsd.org> | 1999-05-22 21:22:35 +0000 |
commit | 35c377bf5315fb3e23e1c5b7e8af00733bed7db0 (patch) | |
tree | 5ab464baa96068a0b4eeb167b4514387057f3f90 /sys/vm/vm_swap.c | |
parent | aa079fadbadf6efd9c150afdd60894563611277c (diff) |
Add new vm_swap code for dynamic swap. From netbsd, munged some by me, and
others. syscall commit pending.
Diffstat (limited to 'sys/vm/vm_swap.c')
-rw-r--r-- | sys/vm/vm_swap.c | 1539 |
1 files changed, 1143 insertions, 396 deletions
diff --git a/sys/vm/vm_swap.c b/sys/vm/vm_swap.c index 4a8f1026b73..1d80eb8e421 100644 --- a/sys/vm/vm_swap.c +++ b/sys/vm/vm_swap.c @@ -1,9 +1,9 @@ -/* $OpenBSD: vm_swap.c,v 1.8 1997/12/02 16:55:52 csapuntz Exp $ */ -/* $NetBSD: vm_swap.c,v 1.32 1996/02/05 01:54:09 christos Exp $ */ +/* $OpenBSD: vm_swap.c,v 1.9 1999/05/22 21:22:34 weingart Exp $ */ +/* $NetBSD: vm_swap.c,v 1.64 1998/11/08 19:45:17 mycroft Exp $ */ /* - * Copyright (c) 1982, 1986, 1989, 1993 - * The Regents of the University of California. All rights reserved. + * Copyright (c) 1995, 1996, 1997 Matthew R. Green, Tobias Weingartner + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -13,27 +13,19 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)vm_swap.c 8.5 (Berkeley) 2/17/94 + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include <sys/param.h> @@ -41,225 +33,804 @@ #include <sys/buf.h> #include <sys/proc.h> #include <sys/namei.h> -#include <sys/dmap.h> /* XXX */ +#include <sys/disklabel.h> +#include <sys/dmap.h> +#include <sys/errno.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/lock.h> #include <sys/vnode.h> #include <sys/map.h> #include <sys/file.h> -#include <sys/mman.h> - +#include <sys/stat.h> +#include <sys/extent.h> +#include <sys/swap.h> #include <sys/mount.h> #include <sys/syscallargs.h> -#include <vm/vm.h> +#include <machine/vmparam.h> + #include <vm/vm_conf.h> #include <miscfs/specfs/specdev.h> /* - * Indirect driver for multi-controller paging. + * The idea here is to provide a single interface for multiple swap devices, + * of any kind and priority in a simple and fast way. + * + * Each swap device has these properties: + * * swap in use. + * * swap enabled. + * * map information in `/dev/drum'. + * * vnode pointer. + * Files have these additional properties: + * * block size. + * * maximum byte count in buffer. + * * buffer. + * * credentials. + * + * The arguments to swapctl(2) are: + * int cmd; + * void *arg; + * int misc; + * The cmd can be one of: + * SWAP_NSWAP - swapctl(2) returns the number of swap devices currently in + * use. + * SWAP_STATS - swapctl(2) takes a struct ent * in (void *arg) and writes + * misc or fewer (to zero) entries of configured swap devices, + * and returns the number of entries written or -1 on error. + * SWAP_ON - swapctl(2) takes a (char *) in arg to be the pathname of a + * device or file to begin swapping on, with it's priority in + * misc, returning 0 on success and -1 on error. + * SWAP_OFF - swapctl(2) takes a (char *) n arg to be the pathname of a + * device or file to stop swapping on. returning 0 or -1. + * XXX unwritten. + * SWAP_CTL - swapctl(2) changes the priority of a swap device, using the + * misc value. + */ + +#ifdef SWAPDEBUG +#define STATIC +#define VMSDB_SWON 0x0001 +#define VMSDB_SWOFF 0x0002 +#define VMSDB_SWINIT 0x0004 +#define VMSDB_SWALLOC 0x0008 +#define VMSDB_SWFLOW 0x0010 +#define VMSDB_INFO 0x0020 +int vmswapdebug = 0; +int vmswap_domount = 1; + +#define DPRINTF(f, m) do { \ + if (vmswapdebug & (f)) \ + printf m; \ +} while(0) +#else +#define STATIC static +#define DPRINTF(f, m) +#endif + +#define SWAP_TO_FILES + +struct swapdev { + struct swapent swd_se; +#define swd_dev swd_se.se_dev +#define swd_flags swd_se.se_flags +#define swd_nblks swd_se.se_nblks +#define swd_inuse swd_se.se_inuse +#define swd_priority swd_se.se_priority +#define swd_path swd_se.se_path + daddr_t swd_mapoffset; + int swd_mapsize; + struct extent *swd_ex; + struct vnode *swd_vp; + CIRCLEQ_ENTRY(swapdev) swd_next; + +#ifdef SWAP_TO_FILES + int swd_bsize; + int swd_maxactive; + struct buf swd_tab; + struct ucred *swd_cred; +#endif +}; + +/* + * Swap device priority entry; the list is kept sorted on `spi_priority'. */ +struct swappri { + int spi_priority; + CIRCLEQ_HEAD(spi_swapdev, swapdev) spi_swapdev; + LIST_ENTRY(swappri) spi_swappri; +}; + + + + +/* + * The following two structures are used to keep track of data transfers + * on swap devices associated with regular files. + * NOTE: this code is more or less a copy of vnd.c; we use the same + * structure names here to ease porting.. + */ + + +struct vndxfer { + struct buf *vx_bp; /* Pointer to parent buffer */ + struct swapdev *vx_sdp; + int vx_error; + int vx_pending; /* # of pending aux buffers */ + int vx_flags; +#define VX_BUSY 1 +#define VX_DEAD 2 +}; + + +struct vndbuf { + struct buf vb_buf; + struct vndxfer *vb_xfer; +}; -int nswap, nswdev; -#ifdef SEQSWAP -int niswdev; /* number of interleaved swap devices */ -int niswap; /* size of interleaved swap area */ +/* To get from a buffer to the encapsulating vndbuf */ +#define BUF_TO_VNDBUF(bp) \ + ((struct vndbuf *)((long)bp - ((long)&((struct vndbuf *)0)->vb_buf))) + +/* vnd macro stuff, rewritten to use malloc()/free() */ +#define getvndxfer() \ + (struct vndxfer *)malloc(sizeof(struct vndxfer), M_VMSWAP, M_WAITOK); + +#define putvndxfer(vnx) \ + free(vnx, M_VMSWAP) + +#define getvndbuf() \ + (struct vndbuf *)malloc(sizeof(struct vndbuf), M_VMSWAP, M_WAITOK); + +#define putvndbuf(vbp) \ + free(vbp, M_VMSWAP) + + +int nswapdev; +int swflags; +struct extent *swapmap; +LIST_HEAD(swap_priority, swappri) swap_priority; + +STATIC int swap_on __P((struct proc *, struct swapdev *)); +#ifdef SWAP_OFF_WORKS +STATIC int swap_off __P((struct proc *, struct swapdev *)); #endif +STATIC struct swapdev *swap_getsdpfromaddr __P((daddr_t)); +STATIC void swap_addmap __P((struct swapdev *, int)); -int swfree __P((struct proc *, int)); +#ifdef SWAP_TO_FILES +STATIC void sw_reg_strategy __P((struct swapdev *, struct buf *, int)); +STATIC void sw_reg_iodone __P((struct buf *)); +STATIC void sw_reg_start __P((struct swapdev *)); +#endif + +STATIC void insert_swapdev __P((struct swapdev *, int)); +STATIC struct swapdev *find_swapdev __P((struct vnode *, int)); +STATIC void swaplist_trim __P((void)); + +STATIC void swapmount __P((void)); + +/* + * We use two locks to protect the swap device lists. + * The long-term lock is used only used to prevent races in + * concurrently executing swapctl(2) system calls. + */ +struct simplelock swaplist_lock; +struct lock swaplist_change_lock; /* - * Set up swap devices. - * Initialize linked list of free swap - * headers. These do not actually point - * to buffers, but rather to pages that - * are being swapped in and out. + * Insert a swap device on the priority list. */ void -swapinit() +insert_swapdev(sdp, priority) + struct swapdev *sdp; + int priority; { - register int i; - register struct buf *sp = swbuf; - register struct proc *p = &proc0; /* XXX */ - struct swdevt *swp; - int error; + struct swappri *spp, *pspp; + +again: + simple_lock(&swaplist_lock); /* - * Count swap devices, and adjust total swap space available. - * Some of the space will not be countable until later (dynamically - * configurable devices) and some of the counted space will not be - * available until a swapon() system call is issued, both usually - * happen when the system goes multi-user. - * - * If using NFS for swap, swdevt[0] will already be bdevvp'd. XXX - */ -#ifdef SEQSWAP - nswdev = niswdev = 0; - nswap = niswap = 0; - /* - * All interleaved devices must come first + * Find entry at or after which to insert the new device. */ - for (swp = swdevt; swp->sw_dev != NODEV || swp->sw_vp != NULL; swp++) { - if (swp->sw_flags & SW_SEQUENTIAL) + for (pspp = NULL, spp = swap_priority.lh_first; spp != NULL; + spp = spp->spi_swappri.le_next) { + if (priority <= spp->spi_priority) break; - niswdev++; - if (swp->sw_nblks > niswap) - niswap = swp->sw_nblks; + pspp = spp; } - niswap = roundup(niswap, dmmax); - niswap *= niswdev; - if (swdevt[0].sw_vp == NULL && - bdevvp(swdevt[0].sw_dev, &swdevt[0].sw_vp)) - panic("swapvp"); - /* - * The remainder must be sequential - */ - for ( ; swp->sw_dev != NODEV; swp++) { - if ((swp->sw_flags & SW_SEQUENTIAL) == 0) - panic("binit: mis-ordered swap devices"); - nswdev++; - if (swp->sw_nblks > 0) { - if (swp->sw_nblks % dmmax) - swp->sw_nblks -= (swp->sw_nblks % dmmax); - nswap += swp->sw_nblks; + + if (spp == NULL || spp->spi_priority != priority) { + spp = (struct swappri *) + malloc(sizeof *spp, M_VMSWAP, M_NOWAIT); + + if (spp == NULL) { + simple_unlock(&swaplist_lock); + tsleep((caddr_t)&lbolt, PSWP, "memory", 0); + goto again; } + DPRINTF(VMSDB_SWFLOW, + ("sw: had to create a new swappri = %d\n", priority)); + + spp->spi_priority = priority; + CIRCLEQ_INIT(&spp->spi_swapdev); + + if (pspp) + LIST_INSERT_AFTER(pspp, spp, spi_swappri); + else + LIST_INSERT_HEAD(&swap_priority, spp, spi_swappri); + } - nswdev += niswdev; - if (nswdev == 0) - panic("swapinit"); - nswap += niswap; -#else - nswdev = 0; - nswap = 0; - for (swp = swdevt; swp->sw_dev != NODEV || swp->sw_vp != NULL; swp++) { - nswdev++; - if (swp->sw_nblks > nswap) - nswap = swp->sw_nblks; - } - if (nswdev == 0) - panic("swapinit"); - if (nswdev > 1) - nswap = ((nswap + dmmax - 1) / dmmax) * dmmax; - nswap *= nswdev; - if (swdevt[0].sw_vp == NULL && - bdevvp(swdevt[0].sw_dev, &swdevt[0].sw_vp)) - panic("swapvp"); -#endif - if (nswap == 0) - printf("WARNING: no swap space found\n"); - else if ((error = swfree(p, 0)) == ENXIO) - printf("WARNING: primary swap device not configured\n"); - else if (error) { - printf("swfree errno %d\n", error); /* XXX */ - panic("swapinit swfree 0"); - } + /* Onto priority list */ + CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next); + sdp->swd_priority = priority; + simple_unlock(&swaplist_lock); +} - /* - * Now set up swap buffer headers. - */ - bswlist.b_actf = sp; - for (i = 0; i < nswbuf - 1; i++, sp++) { - sp->b_actf = sp + 1; - sp->b_rcred = sp->b_wcred = p->p_ucred; - sp->b_vnbufs.le_next = NOLIST; +/* + * Find and optionally remove a swap device from the priority list. + */ +struct swapdev * +find_swapdev(vp, remove) + struct vnode *vp; + int remove; +{ + struct swapdev *sdp; + struct swappri *spp; + + simple_lock(&swaplist_lock); + for (spp = swap_priority.lh_first; spp != NULL; + spp = spp->spi_swappri.le_next) { + for (sdp = spp->spi_swapdev.cqh_first; + sdp != (void *)&spp->spi_swapdev; + sdp = sdp->swd_next.cqe_next) + if (sdp->swd_vp == vp) { + if (remove) + CIRCLEQ_REMOVE(&spp->spi_swapdev, sdp, + swd_next); + simple_unlock(&swaplist_lock); + return (sdp); + } } - sp->b_rcred = sp->b_wcred = p->p_ucred; - sp->b_vnbufs.le_next = NOLIST; - sp->b_actf = NULL; + simple_unlock(&swaplist_lock); + return (NULL); } +/* + * Scan priority list for empty priority entries. + */ void -swstrategy(bp) - register struct buf *bp; +swaplist_trim() +{ + struct swappri *spp; + + simple_lock(&swaplist_lock); +restart: + for (spp = swap_priority.lh_first; spp != NULL; + spp = spp->spi_swappri.le_next) { + if (spp->spi_swapdev.cqh_first != (void *)&spp->spi_swapdev) + continue; + LIST_REMOVE(spp, spi_swappri); + free((caddr_t)spp, M_VMSWAP); + goto restart; + } + simple_unlock(&swaplist_lock); +} + +int +sys_swapctl(p, v, retval) + struct proc *p; + void *v; + register_t *retval; { - int s, sz, off, seg, index; - register struct swdevt *sp; + struct sys_swapctl_args /* { + syscallarg(int) cmd; + syscallarg(const void *) arg; + syscallarg(int) misc; + } */ *uap = (struct sys_swapctl_args *)v; struct vnode *vp; + struct nameidata nd; + struct swappri *spp; + struct swapdev *sdp; + struct swapent *sep; + char userpath[PATH_MAX + 1]; + int count, error, misc; + size_t len; + int priority; - sz = howmany(bp->b_bcount, DEV_BSIZE); - if (bp->b_blkno + sz > nswap) { - bp->b_error = EINVAL; - bp->b_flags |= B_ERROR; - biodone(bp); - return; + misc = SCARG(uap, misc); + + DPRINTF(VMSDB_SWFLOW, ("entering sys_swapctl\n")); + + /* how many swap devices */ + if (SCARG(uap, cmd) == SWAP_NSWAP) { + DPRINTF(VMSDB_SWFLOW,("did SWAP_NSWAP: leaving sys_swapctl\n")); + *retval = nswapdev; + return (0); } - if (nswdev > 1) { -#ifdef SEQSWAP - if (bp->b_blkno < niswap) { - if (niswdev > 1) { - off = bp->b_blkno % dmmax; - if (off+sz > dmmax) { - bp->b_error = EINVAL; - bp->b_flags |= B_ERROR; - biodone(bp); - return; - } - seg = bp->b_blkno / dmmax; - index = seg % niswdev; - seg /= niswdev; - bp->b_blkno = seg*dmmax + off; - } else - index = 0; - } else { - register struct swdevt *swp; - - bp->b_blkno -= niswap; - for (index = niswdev, swp = &swdevt[niswdev]; - swp->sw_dev != NODEV; - swp++, index++) { - if (bp->b_blkno < swp->sw_nblks) - break; - bp->b_blkno -= swp->sw_nblks; - } - if (swp->sw_dev == NODEV || - bp->b_blkno+sz > swp->sw_nblks) { - bp->b_error = swp->sw_dev == NODEV ? - ENODEV : EINVAL; - bp->b_flags |= B_ERROR; - biodone(bp); - return; + + /* stats on the swap devices. */ + if (SCARG(uap, cmd) == SWAP_STATS) { + sep = (struct swapent *)SCARG(uap, arg); + count = 0; + + error = lockmgr(&swaplist_change_lock, LK_SHARED, (void *)0, p); + if (error) + return (error); + for (spp = swap_priority.lh_first; spp != NULL; + spp = spp->spi_swappri.le_next) { + for (sdp = spp->spi_swapdev.cqh_first; + sdp != (void *)&spp->spi_swapdev && misc-- > 0; + sdp = sdp->swd_next.cqe_next, sep++, count++) { + /* + * We do not do NetBSD 1.3 compat call. + */ + error = copyout((caddr_t)&sdp->swd_se, + (caddr_t)sep, sizeof(struct swapent)); + + if (error) + goto out; } } +out: + (void)lockmgr(&swaplist_change_lock, LK_RELEASE, (void *)0, p); + if (error) + return (error); + + DPRINTF(VMSDB_SWFLOW,("did SWAP_STATS: leaving sys_swapctl\n")); + + *retval = count; + return (0); + } + if ((error = suser(p->p_ucred, &p->p_acflag))) + return (error); + + if (SCARG(uap, arg) == NULL) { + /* XXX - interface - arg==NULL: miniroot */ + vp = rootvp; + if (vget(vp, LK_EXCLUSIVE, p)) + return (EBUSY); + if (SCARG(uap, cmd) == SWAP_ON && + copystr("miniroot", userpath, sizeof userpath, &len)) + panic("swapctl: miniroot copy failed"); + } else { + int space; + char *where; + + if (SCARG(uap, cmd) == SWAP_ON) { + if ((error = copyinstr(SCARG(uap, arg), userpath, + sizeof userpath, &len))) + return (error); + space = UIO_SYSSPACE; + where = userpath; + } else { + space = UIO_USERSPACE; + where = (char *)SCARG(uap, arg); + } + NDINIT(&nd, LOOKUP, FOLLOW|LOCKLEAF, space, where, p); + if ((error = namei(&nd))) + return (error); + + vp = nd.ni_vp; + } + + error = lockmgr(&swaplist_change_lock, LK_EXCLUSIVE, (void *)0, p); + if (error) + goto bad2; + + switch(SCARG(uap, cmd)) { + case SWAP_CTL: + priority = SCARG(uap, misc); + if ((sdp = find_swapdev(vp, 1)) == NULL) { + error = ENOENT; + break; + } + insert_swapdev(sdp, priority); + swaplist_trim(); + break; + + case SWAP_ON: + priority = SCARG(uap, misc); + + /* Check for duplicates */ + if ((sdp = find_swapdev(vp, 0)) != NULL) { + if (!bcmp(sdp->swd_path, "swap_device", 12)) { + copystr(userpath, sdp->swd_path, len, 0); + error = 0; + } else + error = EBUSY; + goto bad; + } + + sdp = (struct swapdev *) + malloc(sizeof *sdp, M_VMSWAP, M_WAITOK); + bzero(sdp, sizeof(*sdp)); + + sdp->swd_vp = vp; + sdp->swd_dev = (vp->v_type == VBLK) ? vp->v_rdev : NODEV; + + if ((error = swap_on(p, sdp)) != 0) { + free((caddr_t)sdp, M_VMSWAP); + break; + } +#ifdef SWAP_TO_FILES + /* + * XXX Is NFS elaboration necessary? + */ + if (vp->v_type == VREG) + sdp->swd_cred = crdup(p->p_ucred); +#endif + if (copystr(userpath, sdp->swd_path, len, 0) != 0) + panic("swapctl: copystr"); + insert_swapdev(sdp, priority); + + /* Keep reference to vnode */ + vref(vp); + break; + + case SWAP_OFF: + DPRINTF(VMSDB_SWFLOW, ("doing SWAP_OFF...\n")); +#ifdef SWAP_OFF_WORKS + if ((sdp = find_swapdev(vp, 0)) == NULL) { + error = ENXIO; + break; + } + /* + * If a device isn't in use or enabled, we + * can't stop swapping from it (again). + */ + if ((sdp->swd_flags & + (SWF_INUSE|SWF_ENABLE)) == 0) { + error = EBUSY; + goto bad; + } + if ((error = swap_off(p, sdp)) != 0) + goto bad; + + /* Find again and remove this time */ + if ((sdp = find_swapdev(vp, 1)) == NULL) { + error = ENXIO; + break; + } + free((caddr_t)sdp, M_VMSWAP); #else - off = bp->b_blkno % dmmax; - if (off+sz > dmmax) { - bp->b_error = EINVAL; - bp->b_flags |= B_ERROR; - biodone(bp); - return; + error = ENODEV; +#endif + break; + + default: + DPRINTF(VMSDB_SWFLOW, + ("unhandled command: %x\n", SCARG(uap, cmd))); + error = EINVAL; + } + +bad: + (void)lockmgr(&swaplist_change_lock, LK_RELEASE, (void *)0, p); +bad2: + vput(vp); + + DPRINTF(VMSDB_SWFLOW, ("leaving sys_swapctl: error %d\n", error)); + return (error); +} + +/* + * swap_on() attempts to begin swapping on a swapdev. we check that this + * device is OK to swap from, miss the start of any disk (to avoid any + * disk labels that may exist). + */ +STATIC int +swap_on(p, sdp) + struct proc *p; + struct swapdev *sdp; +{ + static int count = 0; + struct vnode *vp = sdp->swd_vp; + int error, nblks, size; + long addr; + char *storage; + int storagesize; +#ifdef SWAP_TO_FILES + struct vattr va; +#endif +#ifdef NFS + extern int (**nfsv2_vnodeop_p) __P((void *)); +#endif /* NFS */ + dev_t dev = sdp->swd_dev; + char *name; + + + /* If root on swap, then the skip open/close operations. */ + if (vp != rootvp) { + if ((error = VOP_OPEN(vp, FREAD|FWRITE, p->p_ucred, p))) + return (error); + vp->v_writecount++; + } + + DPRINTF(VMSDB_INFO, + ("swap_on: dev = %d, major(dev) = %d\n", dev, major(dev))); + + switch (vp->v_type) { + case VBLK: + if (bdevsw[major(dev)].d_psize == 0 || + (nblks = (*bdevsw[major(dev)].d_psize)(dev)) == -1) { + error = ENXIO; + goto bad; } - seg = bp->b_blkno / dmmax; - index = seg % nswdev; - seg /= nswdev; - bp->b_blkno = seg*dmmax + off; + break; + +#ifdef SWAP_TO_FILES + case VREG: + if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p))) + goto bad; + nblks = (int)btodb(va.va_size); + if ((error = + VFS_STATFS(vp->v_mount, &vp->v_mount->mnt_stat, p)) != 0) + goto bad; + + sdp->swd_bsize = vp->v_mount->mnt_stat.f_iosize; +#ifdef NFS + if (vp->v_op == nfsv2_vnodeop_p) + sdp->swd_maxactive = 2; /* XXX */ + else +#endif /* NFS */ + sdp->swd_maxactive = 8; /* XXX */ + break; #endif - } else - index = 0; - sp = &swdevt[index]; - if (sp->sw_vp == NULL) { - bp->b_error = ENODEV; - bp->b_flags |= B_ERROR; - biodone(bp); - return; + + default: + error = ENXIO; + goto bad; } - if ((bp->b_dev = sp->sw_dev) == NODEV && sp->sw_vp->v_type != VREG) - panic("swstrategy"); - VHOLD(sp->sw_vp); - s = splbio(); - if ((bp->b_flags & B_READ) == 0) { - if ((vp = bp->b_vp) != NULL) { - vp->v_numoutput--; - if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { - vp->v_flag &= ~VBWAIT; - wakeup((caddr_t)&vp->v_numoutput); + if (nblks == 0) { + DPRINTF(VMSDB_SWFLOW, ("swap_on: nblks == 0\n")); + error = EINVAL; + goto bad; + } + + sdp->swd_flags |= SWF_INUSE; + sdp->swd_nblks = nblks; + + /* + * skip over first cluster of a device in case of labels or + * boot blocks. + */ + if (vp->v_type == VBLK) { + size = (int)(nblks - ctod(CLSIZE)); + addr = (long)ctod(CLSIZE); + } else { + size = (int)nblks; + addr = (long)0; + } + + DPRINTF(VMSDB_SWON, + ("swap_on: dev %x: size %d, addr %ld\n", dev, size, addr)); + + name = malloc(12, M_VMSWAP, M_WAITOK); + sprintf(name, "swap0x%04x", count++); + /* XXX make this based on ram as well. */ + storagesize = EXTENT_FIXED_STORAGE_SIZE(maxproc * 2); + storage = malloc(storagesize, M_VMSWAP, M_WAITOK); + sdp->swd_ex = extent_create(name, 0, nblks, M_VMSWAP, + storage, storagesize, EX_WAITOK); + if (addr) { + if (extent_alloc_region(sdp->swd_ex, 0, addr, EX_WAITOK)) + panic("disklabel region"); + sdp->swd_inuse += addr; + } + + + if (vp == rootvp) { + struct mount *mp; + struct statfs *sp; + int rootblks; + + /* Get size from root FS (mountroot did statfs) */ + mp = rootvnode->v_mount; + sp = &mp->mnt_stat; + rootblks = sp->f_blocks * (sp->f_bsize / DEV_BSIZE); + if (rootblks > nblks) + panic("miniroot size"); + + if (extent_alloc_region(sdp->swd_ex, addr, rootblks, EX_WAITOK)) + panic("miniroot region"); + + printf("Preserved %d blocks, leaving %d pages of swap\n", + rootblks, dtoc(size - rootblks)); + } + + swap_addmap(sdp, size); + nswapdev++; + sdp->swd_flags |= SWF_ENABLE; + return (0); + +bad: + if (vp != rootvp) { + vp->v_writecount--; + (void)VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p); + } + return (error); +} + +#ifdef SWAP_OFF_WORKS +STATIC int +swap_off(p, sdp) + struct proc *p; + struct swapdev *sdp; +{ + char *name; + + /* turn off the enable flag */ + sdp->swd_flags &= ~SWF_ENABLE; + + DPRINTF(VMSDB_SWOFF, ("swap_off: %x\n", sdp->swd_dev)); + + /* + * XXX write me + * + * the idea is to find out which processes are using this swap + * device, and page them all in. + * + * eventually, we should try to move them out to other swap areas + * if available. + * + * The alternative is to create a redirection map for this swap + * device. This should work by moving all the pages of data from + * the ex-swap device to another one, and making an entry in the + * redirection map for it. locking is going to be important for + * this! + * + * There might be an easier way to do a "soft" swapoff. First + * we mark the particular swap partition as not desirable anymore. + * Then we use the pager to page a couple of pages in, each time + * it has the memory, and the chance to do so. Thereby moving pages + * back into memory. Once they are in memory, when they get paged + * out again, they do not go back onto the "undesirable" device + * anymore, but to good devices. This might take longer, but it + * can certainly work. If need be, the user process can sleep on + * the particular sdp entry, and the swapper can then wake him up + * when everything is done. + */ + + /* until the above code is written, we must ENODEV */ + return ENODEV; + + extent_free(swapmap, sdp->swd_mapoffset, sdp->swd_mapsize, EX_WAITOK); + nswapdev--; + name = sdp->swd_ex->ex_name; + extent_destroy(sdp->swd_ex); + free(name, M_VMSWAP); + free((caddr_t)sdp->swd_ex, M_VMSWAP); + if (sdp->swp_vp != rootvp) { + vp->v_writecount--; + (void) VOP_CLOSE(sdp->swd_vp, FREAD|FWRITE, p->p_ucred, p); + } + if (sdp->swd_vp) + vrele(sdp->swd_vp); + free((caddr_t)sdp, M_VMSWAP); + return (0); +} +#endif + +/* + * To decide where to allocate what part of swap, we must "round robin" + * the swap devices in swap_priority of the same priority until they are + * full. we do this with a list of swap priorities that have circle + * queues of swapdevs. + * + * The following functions control allocation and freeing of part of the + * swap area. you call swap_alloc() with a size and it returns an address. + * later you call swap_free() and it frees the use of that swap area. + * + * daddr_t swap_alloc(int size); + * void swap_free(int size, daddr_t addr); + */ + +daddr_t +swap_alloc(size) + int size; +{ + struct swapdev *sdp; + struct swappri *spp; + u_long result; + + if (nswapdev < 1) + return 0; + + simple_lock(&swaplist_lock); + for (spp = swap_priority.lh_first; spp != NULL; + spp = spp->spi_swappri.le_next) { + for (sdp = spp->spi_swapdev.cqh_first; + sdp != (void *)&spp->spi_swapdev; + sdp = sdp->swd_next.cqe_next) { + /* if it's not enabled, then we can't swap from it */ + if ((sdp->swd_flags & SWF_ENABLE) == 0 || + /* XXX IS THIS CORRECT ? */ +#if 1 + (sdp->swd_inuse + size > sdp->swd_nblks) || +#endif + extent_alloc(sdp->swd_ex, size, EX_NOALIGN, + EX_NOBOUNDARY, EX_MALLOCOK|EX_NOWAIT, + &result) != 0) { + continue; } + CIRCLEQ_REMOVE(&spp->spi_swapdev, sdp, swd_next); + CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next); + sdp->swd_inuse += size; + simple_unlock(&swaplist_lock); + return (daddr_t)(result + sdp->swd_mapoffset); } - sp->sw_vp->v_numoutput++; } - if (bp->b_vp != NULL) - brelvp(bp); - splx(s); - bp->b_vp = sp->sw_vp; - VOP_STRATEGY(bp); + simple_unlock(&swaplist_lock); + return 0; +} + +void +swap_free(size, addr) + int size; + daddr_t addr; +{ + struct swapdev *sdp = swap_getsdpfromaddr(addr); + +#ifdef DIAGNOSTIC + if (sdp == NULL) + panic("swap_free: unmapped address\n"); + if (nswapdev < 1) + panic("swap_free: nswapdev < 1\n"); +#endif + extent_free(sdp->swd_ex, addr - sdp->swd_mapoffset, size, + EX_MALLOCOK|EX_NOWAIT); + sdp->swd_inuse -= size; +#ifdef DIAGNOSTIC + if (sdp->swd_inuse < 0) + panic("swap_free: inuse < 0"); +#endif +} + +/* + * We have a physical -> virtual mapping to address here. There are several + * different physical address spaces (one for each swap partition) that are + * to be mapped onto a single virtual address space. + */ +#define ADDR_IN_MAP(addr, sdp) \ + (((addr) >= (sdp)->swd_mapoffset) && \ + ((addr) < ((sdp)->swd_mapoffset + (sdp)->swd_mapsize))) + +struct swapdev * +swap_getsdpfromaddr(addr) + daddr_t addr; +{ + struct swapdev *sdp; + struct swappri *spp; + + simple_lock(&swaplist_lock); + for (spp = swap_priority.lh_first; spp != NULL; + spp = spp->spi_swappri.le_next) + for (sdp = spp->spi_swapdev.cqh_first; + sdp != (void *)&spp->spi_swapdev; + sdp = sdp->swd_next.cqe_next) + if (ADDR_IN_MAP(addr, sdp)) { + simple_unlock(&swaplist_lock); + return sdp; + } + simple_unlock(&swaplist_lock); + return NULL; +} + +void +swap_addmap(sdp, size) + struct swapdev *sdp; + int size; +{ + u_long result; + + if (extent_alloc(swapmap, size, EX_NOALIGN, EX_NOBOUNDARY, + EX_WAITOK, &result)) + panic("swap_addmap"); + + sdp->swd_mapoffset = result; + sdp->swd_mapsize = size; } /*ARGSUSED*/ @@ -284,232 +855,408 @@ swwrite(dev, uio, ioflag) return (physio(swstrategy, NULL, dev, B_WRITE, minphys, uio)); } -/* - * System call swapon(name) enables swapping on device name, - * which must be in the swdevsw. Return EBUSY - * if already swapping on this device. - */ -/* ARGSUSED */ -int -sys_swapon(p, v, retval) - struct proc *p; - void *v; - register_t *retval; +void +swstrategy(bp) + struct buf *bp; { - struct sys_swapon_args /* { - syscallarg(char *) name; - } */ *uap = v; - register struct vnode *vp; - register struct swdevt *sp; - dev_t dev; - int error; - struct nameidata nd; + struct swapdev *sdp; + struct vnode *vp; + daddr_t bn; - if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) - return (error); - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, name), p); - if ((error = namei(&nd)) != 0) - return (error); - vp = nd.ni_vp; - if (vp->v_type != VBLK) { - vrele(vp); - return (ENOTBLK); - } - dev = (dev_t)vp->v_rdev; - if (major(dev) >= nblkdev) { - vrele(vp); - return (ENXIO); + bn = bp->b_blkno; + sdp = swap_getsdpfromaddr(bn); + if (sdp == NULL) { + bp->b_error = EINVAL; + bp->b_flags |= B_ERROR; + biodone(bp); + return; } - for (sp = &swdevt[0]; sp->sw_dev != NODEV; sp++) { - if (sp->sw_dev == dev) { - if (sp->sw_flags & SW_FREED) { - vrele(vp); - return (EBUSY); - } - sp->sw_vp = vp; - if ((error = swfree(p, sp - swdevt)) != 0) { - vrele(vp); - return (error); - } - return (0); + + bn -= sdp->swd_mapoffset; + + DPRINTF(VMSDB_SWFLOW, + ("swstrategy(%s): mapoff %x, bn %x, bcount %ld\n", + ((bp->b_flags & B_READ) == 0) ? "write" : "read", + sdp->swd_mapoffset, bn, bp->b_bcount)); + + switch (sdp->swd_vp->v_type) { + default: + panic("swstrategy: vnode type %x", sdp->swd_vp->v_type); + case VBLK: + bp->b_blkno = bn + ctod(CLSIZE); + vp = sdp->swd_vp; + bp->b_dev = sdp->swd_dev; + VHOLD(vp); + if ((bp->b_flags & B_READ) == 0) { + int s = splbio(); + vwakeup(bp); + vp->v_numoutput++; + splx(s); } -#ifdef SEQSWAP - /* - * If we have reached a non-freed sequential device without - * finding what we are looking for, it is an error. - * That is because all interleaved devices must come first - * and sequential devices must be freed in order. - */ - if ((sp->sw_flags & (SW_SEQUENTIAL|SW_FREED)) == SW_SEQUENTIAL) - break; + + if (bp->b_vp != NULL) + brelvp(bp); + + bp->b_vp = vp; + VOP_STRATEGY(bp); + return; +#ifdef SWAP_TO_FILES + case VREG: + sw_reg_strategy(sdp, bp, bn); + return; #endif } - vrele(vp); - return (EINVAL); + /* NOTREACHED */ } -/* - * Swfree(index) frees the index'th portion of the swap map. - * Each of the nswdev devices provides 1/nswdev'th of the swap - * space, which is laid out with blocks of dmmax pages circularly - * among the devices. - */ -int -swfree(p, index) - struct proc *p; - int index; +#ifdef SWAP_TO_FILES + +STATIC void +sw_reg_strategy(sdp, bp, bn) + struct swapdev *sdp; + struct buf *bp; + int bn; { - register struct swdevt *sp; - register swblk_t vsbase; - register long blk; - struct vnode *vp; - register swblk_t dvbase; - register int nblks; - int error; + struct vnode *vp; + struct vndxfer *vnx; + daddr_t nbn; + caddr_t addr; + int s, off, nra, error, sz, resid; - sp = &swdevt[index]; - vp = sp->sw_vp; - /* If root on swap, then the skip open/close operations. */ - if (vp != rootvp) { - if ((error = VOP_OPEN(vp, FREAD|FWRITE, p->p_ucred, p)) != 0) - return (error); - } - sp->sw_flags |= SW_FREED; - nblks = sp->sw_nblks; /* - * Some devices may not exist til after boot time. - * If so, their nblk count will be 0. + * Translate the device logical block numbers into physical + * block numbers of the underlying filesystem device. */ - if (nblks <= 0) { - int perdev; - dev_t dev = sp->sw_dev; + bp->b_resid = bp->b_bcount; + addr = bp->b_data; + bn = dbtob(bn); - if (bdevsw[major(dev)].d_psize == 0 || - (nblks = (*bdevsw[major(dev)].d_psize)(dev)) == -1) { - if (vp != rootvp) - (void) VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p); - sp->sw_flags &= ~SW_FREED; - return (ENXIO); + /* Allocate a header for this transfer and link it to the buffer */ + vnx = getvndxfer(); + vnx->vx_flags = VX_BUSY; + vnx->vx_error = 0; + vnx->vx_pending = 0; + vnx->vx_bp = bp; + vnx->vx_sdp = sdp; + + error = 0; + for (resid = bp->b_resid; resid; resid -= sz) { + struct vndbuf *nbp; + + nra = 0; + error = VOP_BMAP(sdp->swd_vp, bn / sdp->swd_bsize, + &vp, &nbn, &nra); + + if (error == 0 && (long)nbn == -1) + error = EIO; + + /* + * If there was an error or a hole in the file...punt. + * Note that we may have to wait for any operations + * that we have already fired off before releasing + * the buffer. + * + * XXX we could deal with holes here but it would be + * a hassle (in the write case). + */ + if (error) { + s = splbio(); + vnx->vx_error = error; + goto out; + } + + if ((off = bn % sdp->swd_bsize) != 0) + sz = sdp->swd_bsize - off; + else + sz = (1 + nra) * sdp->swd_bsize; + + if (resid < sz) + sz = resid; + + DPRINTF(VMSDB_SWFLOW, + ("sw_reg_strategy: vp %p/%p bn 0x%x/0x%x" + " sz 0x%x\n", sdp->swd_vp, vp, bn, nbn, sz)); + + nbp = getvndbuf(); + nbp->vb_buf.b_flags = bp->b_flags | B_NOCACHE | B_CALL; + nbp->vb_buf.b_bcount = sz; + nbp->vb_buf.b_bufsize = bp->b_bufsize; + nbp->vb_buf.b_error = 0; + nbp->vb_buf.b_data = addr; + nbp->vb_buf.b_blkno = nbn + btodb(off); + nbp->vb_buf.b_proc = bp->b_proc; + nbp->vb_buf.b_iodone = sw_reg_iodone; + nbp->vb_buf.b_vp = NULLVP; + nbp->vb_buf.b_rcred = sdp->swd_cred; + nbp->vb_buf.b_wcred = sdp->swd_cred; + if (bp->b_dirtyend == 0) { + nbp->vb_buf.b_dirtyoff = 0; + nbp->vb_buf.b_dirtyend = sz; + } else { + nbp->vb_buf.b_dirtyoff = + max(0, bp->b_dirtyoff - (bp->b_bcount-resid)); + nbp->vb_buf.b_dirtyend = + min(sz, + max(0, bp->b_dirtyend - (bp->b_bcount-resid))); } -#ifdef SEQSWAP - if (index < niswdev) { - perdev = niswap / niswdev; - if (nblks > perdev) - nblks = perdev; + if (bp->b_validend == 0) { + nbp->vb_buf.b_validoff = 0; + nbp->vb_buf.b_validend = sz; } else { - if (nblks % dmmax) - nblks -= (nblks % dmmax); - nswap += nblks; + nbp->vb_buf.b_validoff = + max(0, bp->b_validoff - (bp->b_bcount-resid)); + nbp->vb_buf.b_validend = + min(sz, + max(0, bp->b_validend - (bp->b_bcount-resid))); } -#else - if (nswap > 0) { - perdev = nswap / nswdev; - if (nblks > perdev) - nblks = perdev; - } else - nswap = nblks; -#endif - sp->sw_nblks = nblks; + + nbp->vb_xfer = vnx; + + /* + * Just sort by block number + */ + nbp->vb_buf.b_cylinder = nbp->vb_buf.b_blkno; + s = splbio(); + if (vnx->vx_error != 0) { + putvndbuf(nbp); + goto out; + } + vnx->vx_pending++; + bgetvp(vp, &nbp->vb_buf); + disksort(&sdp->swd_tab, &nbp->vb_buf); + sw_reg_start(sdp); + splx(s); + + bn += sz; + addr += sz; } - if (nblks == 0) { - if (vp != rootvp) - (void) VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p); - sp->sw_flags &= ~SW_FREED; - return (0); /* XXX error? */ + + s = splbio(); + +out: /* Arrive here at splbio */ + vnx->vx_flags &= ~VX_BUSY; + if (vnx->vx_pending == 0) { + if (vnx->vx_error != 0) { + bp->b_error = vnx->vx_error; + bp->b_flags |= B_ERROR; + } + putvndxfer(vnx); + biodone(bp); } -#ifdef SEQSWAP - if (sp->sw_flags & SW_SEQUENTIAL) { - register struct swdevt *swp; - - blk = niswap; - for (swp = &swdevt[niswdev]; swp != sp; swp++) - blk += swp->sw_nblks; - rmfree(swapmap, nblks, blk); - return (0); + splx(s); +} + +/* + * Feed requests sequentially. + * We do it this way to keep from flooding NFS servers if we are connected + * to an NFS file. This places the burden on the client rather than the + * server. + */ +STATIC void +sw_reg_start(sdp) + struct swapdev *sdp; +{ + struct buf *bp; + + if ((sdp->swd_flags & SWF_BUSY) != 0) + /* Recursion control */ + return; + + sdp->swd_flags |= SWF_BUSY; + + while (sdp->swd_tab.b_active < sdp->swd_maxactive) { + bp = sdp->swd_tab.b_actf; + if (bp == NULL) + break; + sdp->swd_tab.b_actf = bp->b_actf; + sdp->swd_tab.b_active++; + + DPRINTF(VMSDB_SWFLOW, + ("sw_reg_start: bp %p vp %p blkno %x addr %p cnt %lx\n", + bp, bp->b_vp, bp->b_blkno,bp->b_data, bp->b_bcount)); + + if ((bp->b_flags & B_READ) == 0) + bp->b_vp->v_numoutput++; + VOP_STRATEGY(bp); } -#endif - for (dvbase = 0; dvbase < nblks; dvbase += dmmax) { - blk = nblks - dvbase; -#ifdef SEQSWAP - if ((vsbase = index*dmmax + dvbase*niswdev) >= niswap) - panic("swfree"); -#else - if ((vsbase = index*dmmax + dvbase*nswdev) >= nswap) - panic("swfree"); -#endif - if (blk > dmmax) - blk = dmmax; - if (vsbase == 0) { - /* - * First of all chunks... initialize the swapmap. - * Don't use the first cluster of the device - * in case it starts with a label or boot block. - */ - rminit(swapmap, blk - ctod(btoc(SWAPSKIPBYTES)), - vsbase + ctod(btoc(SWAPSKIPBYTES)), "swap", nswapmap); - } else if (dvbase == 0) { - /* - * Don't use the first cluster of the device - * in case it starts with a label or boot block. - */ - rmfree(swapmap, blk - ctod(btoc(SWAPSKIPBYTES)), - vsbase + ctod(btoc(SWAPSKIPBYTES))); - } else - rmfree(swapmap, blk, vsbase); + sdp->swd_flags &= ~SWF_BUSY; +} + +STATIC void +sw_reg_iodone(bp) + struct buf *bp; +{ + register struct vndbuf *vbp = BUF_TO_VNDBUF(bp); + register struct vndxfer *vnx = (struct vndxfer *)vbp->vb_xfer; + register struct buf *pbp = vnx->vx_bp; + struct swapdev *sdp = vnx->vx_sdp; + int s, resid; + + DPRINTF(VMSDB_SWFLOW, + ("sw_reg_iodone: vbp %p vp %p blkno %x addr %p " + "cnt %lx(%lx)\n", + vbp, vbp->vb_buf.b_vp, vbp->vb_buf.b_blkno, + vbp->vb_buf.b_data, vbp->vb_buf.b_bcount, + vbp->vb_buf.b_resid)); + + s = splbio(); + resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid; + pbp->b_resid -= resid; + vnx->vx_pending--; + + if (vbp->vb_buf.b_error) { + DPRINTF(VMSDB_INFO, ("sw_reg_iodone: vbp %p error %d\n", vbp, + vbp->vb_buf.b_error)); + + vnx->vx_error = vbp->vb_buf.b_error; } + if (vbp->vb_buf.b_vp != NULLVP) + brelvp(&vbp->vb_buf); + + putvndbuf(vbp); + /* - * Preserve the mini-root if appropriate: - * Note: this requires !SEQSWAP && nswdev==1 - * - * A mini-root gets copied into the front of the swap - * and we run over top of the swap area just long - * enough for us to do a mkfs and restor of the real - * root (sure beats rewriting standalone restor). + * Wrap up this transaction if it has run to completion or, in + * case of an error, when all auxiliary buffers have returned. */ - if (vp == rootvp) { -#ifndef MINIROOTSIZE - struct mount *mp; - struct statfs *sp; -#endif - long firstblk; - int rootblks; + if (vnx->vx_error != 0) { + pbp->b_flags |= B_ERROR; + pbp->b_error = vnx->vx_error; + if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) { -#ifdef MINIROOTSIZE - rootblks = MINIROOTSIZE; -#else - /* Get size from root FS (mountroot did statfs) */ - mp = rootvnode->v_mount; - sp = &mp->mnt_stat; - rootblks = sp->f_blocks * (sp->f_bsize / DEV_BSIZE); + DPRINTF(VMSDB_SWFLOW, + ("swiodone: pbp %p iodone: error %d\n", + pbp, vnx->vx_error)); + putvndxfer(vnx); + biodone(pbp); + } + } else if (pbp->b_resid == 0) { + +#ifdef DIAGNOSTIC + if (vnx->vx_pending != 0) + panic("swiodone: vnx pending: %d", vnx->vx_pending); #endif - if (rootblks > nblks) - panic("swfree miniroot size"); - /* First ctod(btoc(SWAPSKIPBYTES)) blocks are not in the map. */ - firstblk = rmalloc(swapmap, rootblks - ctod(btoc(SWAPSKIPBYTES))); - if (firstblk != ctod(btoc(SWAPSKIPBYTES))) - panic("swfree miniroot save"); - printf("Preserved %d blocks of miniroot leaving %d pages of swap\n", - rootblks, dtoc(nblks - rootblks)); + + if ((vnx->vx_flags & VX_BUSY) == 0) { + DPRINTF(VMSDB_SWFLOW, + ("swiodone: pbp %p iodone\n", pbp)); + putvndxfer(vnx); + biodone(pbp); + } } - return (0); + sdp->swd_tab.b_active--; + sw_reg_start(sdp); + + splx(s); } +#endif /* SWAP_TO_FILES */ -int -sys_omsync(p, v, retval) - struct proc *p; - void *v; - register_t *retval; +void +swapinit() { - struct sys_msync_args ua; - struct sys_omsync_args /* { - syscallarg(caddr_t) addr; - syscallarg(size_t) len; - } */ *uap = v; - - SCARG(&ua, addr) = SCARG(uap, addr);; - SCARG(&ua, len) = SCARG(uap, len);; - SCARG(&ua, flags) = MS_SYNC | MS_INVALIDATE; - return (sys_msync(p, &ua, retval)); + struct buf *sp = swbuf; + struct proc *p = &proc0; /* XXX */ + int i; + + DPRINTF(VMSDB_SWINIT, ("swapinit\n")); + + nswapdev = 0; + if (bdevvp(swapdev, &swapdev_vp)) + panic("swapinit: can not setup swapdev_vp"); + + simple_lock_init(&swaplist_lock); + lockinit(&swaplist_change_lock, PSWP, "swap change", 0, 0); + LIST_INIT(&swap_priority); + + /* + * Create swap block resource map. The range [1..INT_MAX] allows + * for a grand total of 2 gigablocks of swap resource. + * (start at 1 because "block #0" will be interpreted as + * an allocation failure). + */ + swapmap = extent_create("swapmap", 1, INT_MAX, + M_VMSWAP, 0, 0, EX_WAITOK); + if (swapmap == 0) + panic("swapinit: extent_create failed"); + + /* + * Now set up swap buffer headers. + */ + bswlist.b_actf = sp; + for (i = 0; i < nswbuf - 1; i++, sp++) { + sp->b_actf = sp + 1; + sp->b_rcred = sp->b_wcred = p->p_ucred; + sp->b_vnbufs.le_next = NOLIST; + } + sp->b_rcred = sp->b_wcred = p->p_ucred; + sp->b_vnbufs.le_next = NOLIST; + sp->b_actf = NULL; + + /* Mount primary swap if available */ +#ifdef SWAPDEBUG + if(vmswap_domount) +#endif + swapmount(); + + DPRINTF(VMSDB_SWINIT, ("leaving swapinit\n")); +} + +/* + * Mount the primary swap device pointed to by 'swdevt[0]'. + */ +STATIC void +swapmount() +{ + extern int getdevvp(dev_t, struct vnode **, enum vtype); + struct swapdev *sdp; + struct vnode *vp = NULL; + struct proc *p = curproc; + dev_t swap_dev = swdevt[0].sw_dev; + + /* Make sure we have a device */ + if (swap_dev == NODEV) { + printf("swapmount: No swap device!\n"); + return; + } + + /* Malloc needed things */ + sdp = (struct swapdev *)malloc(sizeof *sdp, M_VMSWAP, M_WAITOK); + bzero(sdp, sizeof(*sdp)); + + /* Do swap_on() stuff */ + if(bdevvp(swap_dev, &vp)){ + printf("swapmount: bdevvp() failed\n"); + return; + } + +#ifdef SWAPDEBUG + vprint("swapmount", vp); +#endif + + sdp->swd_vp = vp; + sdp->swd_dev = (vp->v_type == VBLK) ? vp->v_rdev : NODEV; + if(copystr("swap_device", sdp->swd_path, sizeof sdp->swd_path, 0) != 0){ + printf("swapmount: copystr() failed\n"); + return; + } + + /* Look for a swap device */ + printf("Adding swap(%d, %d):", major(swap_dev), minor(swap_dev)); + + if (swap_on(p, sdp) != 0) { + printf(" failed!\n"); + free((caddr_t)sdp, M_VMSWAP); + return; + } else + printf(" done.\n"); +#ifdef SWAP_TO_FILES + /* + * XXX Is NFS elaboration necessary? + */ + if (vp->v_type == VREG) + sdp->swd_cred = crdup(p->p_ucred); +#endif + insert_swapdev(sdp, 0); } |