diff options
author | Artur Grabowski <art@cvs.openbsd.org> | 2001-06-27 04:53:33 +0000 |
---|---|---|
committer | Artur Grabowski <art@cvs.openbsd.org> | 2001-06-27 04:53:33 +0000 |
commit | 23908c853b9acc0b1320aca6a88554f9fa3e0345 (patch) | |
tree | b1c414d1c8d188c5f376dabf99916885e274d73d /sys/vm | |
parent | 736c4adc1cd88f788e32174b7575db53904c92c8 (diff) |
Die!
Diffstat (limited to 'sys/vm')
-rw-r--r-- | sys/vm/device_pager.c | 370 | ||||
-rw-r--r-- | sys/vm/swap_pager.c | 1267 | ||||
-rw-r--r-- | sys/vm/vm_fault.c | 1015 | ||||
-rw-r--r-- | sys/vm/vm_glue.c | 532 | ||||
-rw-r--r-- | sys/vm/vm_init.c | 127 | ||||
-rw-r--r-- | sys/vm/vm_kern.c | 465 | ||||
-rw-r--r-- | sys/vm/vm_map.c | 2746 | ||||
-rw-r--r-- | sys/vm/vm_meter.c | 236 | ||||
-rw-r--r-- | sys/vm/vm_mmap.c | 1054 | ||||
-rw-r--r-- | sys/vm/vm_object.c | 1887 | ||||
-rw-r--r-- | sys/vm/vm_page.c | 1881 | ||||
-rw-r--r-- | sys/vm/vm_pageout.c | 620 | ||||
-rw-r--r-- | sys/vm/vm_pager.c | 426 | ||||
-rw-r--r-- | sys/vm/vm_swap.c | 1248 | ||||
-rw-r--r-- | sys/vm/vm_unix.c | 254 | ||||
-rw-r--r-- | sys/vm/vm_user.c | 340 | ||||
-rw-r--r-- | sys/vm/vnode_pager.c | 591 |
17 files changed, 0 insertions, 15059 deletions
diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c deleted file mode 100644 index 34423923334..00000000000 --- a/sys/vm/device_pager.c +++ /dev/null @@ -1,370 +0,0 @@ -/* $OpenBSD: device_pager.c,v 1.6 2001/05/16 12:54:34 ho Exp $ */ -/* $NetBSD: device_pager.c,v 1.24 1997/01/03 18:03:14 mrg Exp $ */ - -/* - * Copyright (c) 1990 University of Utah. - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * the Systems Programming Group of the University of Utah Computer - * Science Department. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)device_pager.c 8.5 (Berkeley) 1/12/94 - */ - -/* - * Page to/from special files. - */ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/conf.h> -#include <sys/mman.h> -#include <sys/malloc.h> -#include <sys/proc.h> - -#include <vm/vm.h> -#include <vm/vm_kern.h> -#include <vm/vm_page.h> -#include <vm/device_pager.h> - -struct pagerlst dev_pager_list; /* list of managed devices */ -struct pglist dev_pager_fakelist; /* list of available vm_page_t's */ - -#ifdef DEBUG -int dpagerdebug = 0; -#define DDB_FOLLOW 0x01 -#define DDB_INIT 0x02 -#define DDB_ALLOC 0x04 -#define DDB_FAIL 0x08 -#endif - -static vm_pager_t dev_pager_alloc - __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t)); -static void dev_pager_dealloc __P((vm_pager_t)); -static int dev_pager_getpage - __P((vm_pager_t, vm_page_t *, int, boolean_t)); -static boolean_t dev_pager_haspage __P((vm_pager_t, vm_offset_t)); -static void dev_pager_init __P((void)); -static int dev_pager_putpage - __P((vm_pager_t, vm_page_t *, int, boolean_t)); -static vm_page_t dev_pager_getfake __P((vm_offset_t)); -static void dev_pager_putfake __P((vm_page_t)); - -struct pagerops devicepagerops = { - dev_pager_init, - dev_pager_alloc, - dev_pager_dealloc, - dev_pager_getpage, - dev_pager_putpage, - dev_pager_haspage, - vm_pager_clusternull -}; - -static void -dev_pager_init() -{ -#ifdef DEBUG - if (dpagerdebug & DDB_FOLLOW) - printf("dev_pager_init()\n"); -#endif - TAILQ_INIT(&dev_pager_list); - TAILQ_INIT(&dev_pager_fakelist); -} - -static vm_pager_t -dev_pager_alloc(handle, size, prot, foff) - caddr_t handle; - vm_size_t size; - vm_prot_t prot; - vm_offset_t foff; -{ - dev_t dev; - vm_pager_t pager; - int (*mapfunc) __P((dev_t, int, int)); - vm_object_t object; - dev_pager_t devp; - int npages, off; - -#ifdef DEBUG - if (dpagerdebug & DDB_FOLLOW) - printf("dev_pager_alloc(%p, %lx, %x, %lx)\n", - handle, size, prot, foff); -#endif -#ifdef DIAGNOSTIC - /* - * Pageout to device, should never happen. - */ - if (handle == NULL) - panic("dev_pager_alloc called"); -#endif - - /* - * Make sure this device can be mapped. - */ - dev = (dev_t)(long)handle; - mapfunc = cdevsw[major(dev)].d_mmap; - if (mapfunc == NULL || - mapfunc == (int (*) __P((dev_t, int, int))) enodev || - mapfunc == (int (*) __P((dev_t, int, int))) nullop) - return(NULL); - - /* - * Offset should be page aligned. - */ - if (foff & PAGE_MASK) - return(NULL); - - /* - * Check that the specified range of the device allows the - * desired protection. - * - * XXX assumes VM_PROT_* == PROT_* - */ - npages = atop(round_page(size)); - for (off = foff; npages--; off += PAGE_SIZE) - if ((*mapfunc)(dev, off, (int)prot) == -1) - return(NULL); - - /* - * Look up pager, creating as necessary. - */ -top: - pager = vm_pager_lookup(&dev_pager_list, handle); - if (pager == NULL) { - /* - * Allocate and initialize pager structs - */ - pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, M_WAITOK); - devp = (dev_pager_t)malloc(sizeof *devp, M_VMPGDATA, M_WAITOK); - pager->pg_handle = handle; - pager->pg_ops = &devicepagerops; - pager->pg_type = PG_DEVICE; - pager->pg_flags = 0; - pager->pg_data = devp; - TAILQ_INIT(&devp->devp_pglist); - /* - * Allocate object and associate it with the pager. - */ - object = devp->devp_object = vm_object_allocate(0); - vm_object_enter(object, pager); - vm_object_setpager(object, pager, (vm_offset_t)0, FALSE); - /* - * Finally, put it on the managed list so other can find it. - * First we re-lookup in case someone else beat us to this - * point (due to blocking in the various mallocs). If so, - * we free everything and start over. - */ - if (vm_pager_lookup(&dev_pager_list, handle)) { - free((caddr_t)devp, M_VMPGDATA); - free((caddr_t)pager, M_VMPAGER); - goto top; - } - TAILQ_INSERT_TAIL(&dev_pager_list, pager, pg_list); -#ifdef DEBUG - if (dpagerdebug & DDB_ALLOC) { - printf("dev_pager_alloc: pager %p devp %p object %p\n", - pager, devp, object); - vm_object_print(object, FALSE); - } -#endif - } else { - /* - * vm_object_lookup() gains a reference and also - * removes the object from the cache. - */ - object = vm_object_lookup(pager); -#ifdef DIAGNOSTIC - devp = (dev_pager_t)pager->pg_data; - if (object != devp->devp_object) - panic("dev_pager_setup: bad object"); -#endif - } - return(pager); -} - -static void -dev_pager_dealloc(pager) - vm_pager_t pager; -{ - dev_pager_t devp; - vm_object_t object; - vm_page_t m; - -#ifdef DEBUG - if (dpagerdebug & DDB_FOLLOW) - printf("dev_pager_dealloc(%p)\n", pager); -#endif - TAILQ_REMOVE(&dev_pager_list, pager, pg_list); - /* - * Get the object. - * Note: cannot use vm_object_lookup since object has already - * been removed from the hash chain. - */ - devp = (dev_pager_t)pager->pg_data; - object = devp->devp_object; -#ifdef DEBUG - if (dpagerdebug & DDB_ALLOC) - printf("dev_pager_dealloc: devp %p object %p\n", devp, object); -#endif - /* - * Free up our fake pages. - */ - while ((m = devp->devp_pglist.tqh_first) != NULL) { - TAILQ_REMOVE(&devp->devp_pglist, m, pageq); - dev_pager_putfake(m); - } - free((caddr_t)devp, M_VMPGDATA); - free((caddr_t)pager, M_VMPAGER); -} - -static int -dev_pager_getpage(pager, mlist, npages, sync) - vm_pager_t pager; - vm_page_t *mlist; - int npages; - boolean_t sync; -{ - register vm_object_t object; - vm_offset_t offset, paddr; - vm_page_t page; - dev_t dev; - int (*mapfunc) __P((dev_t, int, int)), prot; - vm_page_t m; - -#ifdef DEBUG - if (dpagerdebug & DDB_FOLLOW) - printf("dev_pager_getpage(%p, %p, %x, %x)\n", - pager, mlist, npages, sync); -#endif - - if (npages != 1) - panic("dev_pager_getpage: cannot handle multiple pages"); - m = *mlist; - - object = m->object; - dev = (dev_t)(long)pager->pg_handle; - offset = m->offset + object->paging_offset; - prot = PROT_READ; /* XXX should pass in? */ - mapfunc = cdevsw[major(dev)].d_mmap; -#ifdef DIAGNOSTIC - if (mapfunc == NULL || - mapfunc == (int (*) __P((dev_t, int, int))) enodev || - mapfunc == (int (*) __P((dev_t, int, int))) nullop) - panic("dev_pager_getpage: no map function"); -#endif - paddr = pmap_phys_address((*mapfunc)(dev, (int)offset, prot)); -#ifdef DIAGNOSTIC - if (paddr == -1) - panic("dev_pager_getpage: map function returns error"); -#endif - /* - * Replace the passed in page with our own fake page and free - * up the original. - */ - page = dev_pager_getfake(paddr); - TAILQ_INSERT_TAIL(&((dev_pager_t)pager->pg_data)->devp_pglist, page, - pageq); - vm_object_lock(object); - vm_page_lock_queues(); - vm_page_free(m); - vm_page_insert(page, object, offset); - vm_page_unlock_queues(); - PAGE_WAKEUP(m); - if (offset + PAGE_SIZE > object->size) - object->size = offset + PAGE_SIZE; /* XXX anal */ - vm_object_unlock(object); - - return(VM_PAGER_OK); -} - -static int -dev_pager_putpage(pager, mlist, npages, sync) - vm_pager_t pager; - vm_page_t *mlist; - int npages; - boolean_t sync; -{ -#ifdef DEBUG - if (dpagerdebug & DDB_FOLLOW) - printf("dev_pager_putpage(%p, %p, %x, %x)\n", - pager, mlist, npages, sync); -#endif - if (pager == NULL) - return (FALSE); - panic("dev_pager_putpage called"); -} - -static boolean_t -dev_pager_haspage(pager, offset) - vm_pager_t pager; - vm_offset_t offset; -{ -#ifdef DEBUG - if (dpagerdebug & DDB_FOLLOW) - printf("dev_pager_haspage(%p, %lx)\n", pager, offset); -#endif - return(TRUE); -} - -static vm_page_t -dev_pager_getfake(paddr) - vm_offset_t paddr; -{ - vm_page_t m; - int i; - - if (dev_pager_fakelist.tqh_first == NULL) { - m = (vm_page_t)malloc(PAGE_SIZE, M_VMPGDATA, M_WAITOK); - for (i = PAGE_SIZE / sizeof(*m); i > 0; i--) { - TAILQ_INSERT_TAIL(&dev_pager_fakelist, m, pageq); - m++; - } - } - m = dev_pager_fakelist.tqh_first; - TAILQ_REMOVE(&dev_pager_fakelist, m, pageq); - m->flags = PG_BUSY | PG_CLEAN | PG_FAKE | PG_FICTITIOUS; - m->phys_addr = paddr; - m->wire_count = 1; - return(m); -} - -static void -dev_pager_putfake(m) - vm_page_t m; -{ -#ifdef DIAGNOSTIC - if (!(m->flags & PG_FICTITIOUS)) - panic("dev_pager_putfake: bad page"); -#endif - TAILQ_INSERT_TAIL(&dev_pager_fakelist, m, pageq); -} diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c deleted file mode 100644 index f7ee9045449..00000000000 --- a/sys/vm/swap_pager.c +++ /dev/null @@ -1,1267 +0,0 @@ -/* $OpenBSD: swap_pager.c,v 1.17 2001/02/28 20:32:40 csapuntz Exp $ */ -/* $NetBSD: swap_pager.c,v 1.27 1996/03/16 23:15:20 christos Exp $ */ - -/* - * Copyright (c) 1990 University of Utah. - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * the Systems Programming Group of the University of Utah Computer - * Science Department. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ - * - * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 - */ - -/* - * Quick hack to page to dedicated partition(s). - * TODO: - * Add multiprocessor locks - * Deal with async writes in a better fashion - */ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/proc.h> -#include <sys/buf.h> -#include <sys/map.h> -#include <sys/simplelock.h> -#include <sys/vnode.h> -#include <sys/malloc.h> -#include <sys/swap.h> - -#include <miscfs/specfs/specdev.h> - -#include <vm/vm.h> -#include <vm/vm_page.h> -#include <vm/vm_pageout.h> -#include <vm/swap_pager.h> - -/* XXX this makes the max swap devices 16 */ -#define NSWSIZES 16 /* size of swtab */ -#define MAXDADDRS 64 /* max # of disk addrs for fixed allocations */ -#ifndef NPENDINGIO -#define NPENDINGIO 64 /* max # of pending cleans */ -#endif - -#ifdef DEBUG -int swpagerdebug = 0x100; -#define SDB_FOLLOW 0x001 -#define SDB_INIT 0x002 -#define SDB_ALLOC 0x004 -#define SDB_IO 0x008 -#define SDB_WRITE 0x010 -#define SDB_FAIL 0x020 -#define SDB_ALLOCBLK 0x040 -#define SDB_FULL 0x080 -#define SDB_ANOM 0x100 -#define SDB_ANOMPANIC 0x200 -#define SDB_CLUSTER 0x400 -#define SDB_PARANOIA 0x800 -#endif - -TAILQ_HEAD(swpclean, swpagerclean); - -struct swpagerclean { - TAILQ_ENTRY(swpagerclean) spc_list; - int spc_flags; - struct buf *spc_bp; - sw_pager_t spc_swp; - vm_offset_t spc_kva; - vm_page_t spc_m; - int spc_npages; -} swcleanlist[NPENDINGIO]; -typedef struct swpagerclean *swp_clean_t; - -/* spc_flags values */ -#define SPC_FREE 0x00 -#define SPC_BUSY 0x01 -#define SPC_DONE 0x02 -#define SPC_ERROR 0x04 - -struct swtab { - vm_size_t st_osize; /* size of object (bytes) */ - int st_bsize; /* vs. size of swap block (DEV_BSIZE units) */ -#ifdef DEBUG - u_long st_inuse; /* number in this range in use */ - u_long st_usecnt; /* total used of this size */ -#endif -} swtab[NSWSIZES+1]; - -#ifdef DEBUG -int swap_pager_poip; /* pageouts in progress */ -int swap_pager_piip; /* pageins in progress */ -#endif - -int swap_pager_maxcluster; /* maximum cluster size */ -int swap_pager_npendingio; /* number of pager clean structs */ - -struct swpclean swap_pager_inuse; /* list of pending page cleans */ -struct swpclean swap_pager_free; /* list of free pager clean structs */ -struct pagerlst swap_pager_list; /* list of "named" anon regions */ - -extern struct buf bswlist; /* import from vm_swap.c */ - -static void swap_pager_init __P((void)); -static vm_pager_t swap_pager_alloc - __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t)); -static void swap_pager_clean __P((int)); -#ifdef DEBUG -static void swap_pager_clean_check __P((vm_page_t *, int, int)); -#endif -static void swap_pager_cluster - __P((vm_pager_t, vm_offset_t, - vm_offset_t *, vm_offset_t *)); -static void swap_pager_dealloc __P((vm_pager_t)); -static int swap_pager_remove - __P((vm_pager_t, vm_offset_t, vm_offset_t)); -static vm_offset_t swap_pager_next __P((vm_pager_t, vm_offset_t)); -static int swap_pager_count __P((vm_pager_t)); -static int swap_pager_getpage - __P((vm_pager_t, vm_page_t *, int, boolean_t)); -static boolean_t swap_pager_haspage __P((vm_pager_t, vm_offset_t)); -static int swap_pager_io __P((sw_pager_t, vm_page_t *, int, int)); -static void swap_pager_iodone __P((struct buf *)); -static int swap_pager_putpage - __P((vm_pager_t, vm_page_t *, int, boolean_t)); -static int count_bits __P((u_int)); - -struct pagerops swappagerops = { - swap_pager_init, - swap_pager_alloc, - swap_pager_dealloc, - swap_pager_getpage, - swap_pager_putpage, - swap_pager_haspage, - swap_pager_cluster, - swap_pager_remove, - swap_pager_next, - swap_pager_count -}; - -static void -swap_pager_init() -{ - swp_clean_t spc; - int i, maxbsize, bsize; - -#ifdef DEBUG - if (swpagerdebug & (SDB_FOLLOW|SDB_INIT)) - printf("swpg_init()\n"); -#endif - dfltpagerops = &swappagerops; - TAILQ_INIT(&swap_pager_list); - - /* - * Allocate async IO structures. - * - * XXX it would be nice if we could do this dynamically based on - * the value of nswbuf (since we are ultimately limited by that) - * but neither nswbuf or malloc has been initialized yet. So the - * structs are statically allocated above. - */ - swap_pager_npendingio = NPENDINGIO; - - /* - * Initialize clean lists - */ - TAILQ_INIT(&swap_pager_inuse); - TAILQ_INIT(&swap_pager_free); - for (i = 0, spc = swcleanlist; i < swap_pager_npendingio; i++, spc++) { - TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); - spc->spc_flags = SPC_FREE; - } - -/* this needs to be at least ctod(1) for all ports for vtod() to work */ -#define DMMIN 32 - /* - * Fill in our table of object size vs. allocation size. bsize needs - * to be at least ctod(1) for all ports for vtod() to work, with a - * bare minimum of 32. - */ -#define max(a, b) ((a) > (b) ? (a) : (b)) - bsize = max(32, max(ctod(1), btodb(PAGE_SIZE))); - maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE); - if (maxbsize > NBPG) - maxbsize = NBPG; - for (i = 0; i < NSWSIZES; i++) { - if (bsize <= btodb(MAXPHYS)) - swap_pager_maxcluster = dbtob(bsize); - swtab[i].st_bsize = bsize; - if (bsize >= maxbsize) { - swtab[i].st_osize = 0; - break; - } - swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize)); -#ifdef DEBUG - if (swpagerdebug & SDB_INIT) - printf("swpg_init: ix %d, size %lx, bsize %x\n", - i, swtab[i].st_osize, swtab[i].st_bsize); -#endif - bsize *= 2; - } -} - -/* - * Allocate a pager structure and associated resources. - * Note that if we are called from the pageout daemon (handle == NULL) - * we should not wait for memory as it could resulting in deadlock. - */ -static vm_pager_t -swap_pager_alloc(handle, size, prot, foff) - caddr_t handle; - register vm_size_t size; - vm_prot_t prot; - vm_offset_t foff; -{ - register vm_pager_t pager; - register sw_pager_t swp; - struct swtab *swt; - int waitok; - -#ifdef DEBUG - if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC)) - printf("swpg_alloc(%p, %lx, %x)\n", handle, size, prot); -#endif - /* - * If this is a "named" anonymous region, look it up and - * return the appropriate pager if it exists. - */ - if (handle) { - pager = vm_pager_lookup(&swap_pager_list, handle); - if (pager != NULL) { - /* - * Use vm_object_lookup to gain a reference - * to the object and also to remove from the - * object cache. - */ - if (vm_object_lookup(pager) == NULL) - panic("swap_pager_alloc: bad object"); - return (pager); - } - } - /* - * Pager doesn't exist, allocate swap management resources - * and initialize. - */ - waitok = handle ? M_WAITOK : M_NOWAIT; - pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok); - if (pager == NULL) - return (NULL); - swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok); - if (swp == NULL) { -#ifdef DEBUG - if (swpagerdebug & SDB_FAIL) - printf("swpg_alloc: swpager malloc failed\n"); -#endif - free((caddr_t)pager, M_VMPAGER); - return (NULL); - } - size = round_page(size); - for (swt = swtab; swt->st_osize; swt++) - if (size <= swt->st_osize) - break; -#ifdef DEBUG - swt->st_inuse++; - swt->st_usecnt++; -#endif - swp->sw_osize = size; - swp->sw_bsize = swt->st_bsize; - swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize; - swp->sw_blocks = (sw_blk_t)malloc(swp->sw_nblocks * - sizeof(*swp->sw_blocks), M_VMPGDATA, M_NOWAIT); - if (swp->sw_blocks == NULL) { - free((caddr_t)swp, M_VMPGDATA); - free((caddr_t)pager, M_VMPAGER); -#ifdef DEBUG - if (swpagerdebug & SDB_FAIL) - printf("swpg_alloc: sw_blocks malloc failed\n"); - swt->st_inuse--; - swt->st_usecnt--; -#endif - return (FALSE); - } - bzero((caddr_t)swp->sw_blocks, - swp->sw_nblocks * sizeof(*swp->sw_blocks)); - swp->sw_poip = swp->sw_cnt = 0; - if (handle) { - vm_object_t object; - - swp->sw_flags = SW_NAMED; - TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list); - /* - * Consistant with other pagers: return with object - * referenced. Can't do this with handle == NULL - * since it might be the pageout daemon calling. - */ - object = vm_object_allocate(size); - vm_object_enter(object, pager); - vm_object_setpager(object, pager, 0, FALSE); - } else { - swp->sw_flags = 0; - pager->pg_list.tqe_next = NULL; - pager->pg_list.tqe_prev = NULL; - } - pager->pg_handle = handle; - pager->pg_ops = &swappagerops; - pager->pg_type = PG_SWAP; - pager->pg_flags = PG_CLUSTERPUT; - pager->pg_data = swp; - -#ifdef DEBUG - if (swpagerdebug & SDB_ALLOC) - printf("swpg_alloc: pg_data %p, %x of %x at %p\n", - swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks); -#endif - return (pager); -} - -static void -swap_pager_dealloc(pager) - vm_pager_t pager; -{ - register int i; - register sw_blk_t bp; - register sw_pager_t swp; - int s; -#ifdef DEBUG - struct swtab *swt; - /* save panic time state */ - if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) - return; - if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC)) - printf("swpg_dealloc(%p)\n", pager); -#endif - /* - * Remove from list right away so lookups will fail if we - * block for pageout completion. - */ - swp = (sw_pager_t) pager->pg_data; - if (swp->sw_flags & SW_NAMED) { - TAILQ_REMOVE(&swap_pager_list, pager, pg_list); - swp->sw_flags &= ~SW_NAMED; - } -#ifdef DEBUG - for (swt = swtab; swt->st_osize; swt++) - if (swp->sw_osize <= swt->st_osize) - break; - swt->st_inuse--; -#endif - - /* - * Wait for all pageouts to finish and remove - * all entries from cleaning list. - */ - s = splbio(); - while (swp->sw_poip) { - swp->sw_flags |= SW_WANTED; - (void) tsleep(swp, PVM, "swpgdealloc", 0); - } - splx(s); - swap_pager_clean(B_WRITE); - - /* - * Free left over swap blocks - */ - for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) - if (bp->swb_block) { -#ifdef DEBUG - if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL)) - printf("swpg_dealloc: blk %x\n", - bp->swb_block); -#endif - swap_free(swp->sw_bsize, bp->swb_block); - } - /* - * Free swap management resources - */ - free((caddr_t)swp->sw_blocks, M_VMPGDATA); - free((caddr_t)swp, M_VMPGDATA); - free((caddr_t)pager, M_VMPAGER); -} - -static int -swap_pager_getpage(pager, mlist, npages, sync) - vm_pager_t pager; - vm_page_t *mlist; - int npages; - boolean_t sync; -{ - register int rv; -#ifdef DIAGNOSTIC - vm_page_t m; - int i; -#endif - -#ifdef DEBUG - if (swpagerdebug & SDB_FOLLOW) - printf("swpg_getpage(%p, %p, %x, %x)\n", - pager, mlist, npages, sync); -#endif -#ifdef DIAGNOSTIC - for (i = 0; i < npages; i++) { - m = mlist[i]; - - if (m->flags & PG_FAULTING) - panic("swap_pager_getpage: page is already faulting"); - m->flags |= PG_FAULTING; - } -#endif - rv = swap_pager_io((sw_pager_t)pager->pg_data, mlist, npages, B_READ); -#ifdef DIAGNOSTIC - for (i = 0; i < npages; i++) { - m = mlist[i]; - - m->flags &= ~PG_FAULTING; - } -#endif - return (rv); -} - -static int -swap_pager_putpage(pager, mlist, npages, sync) - vm_pager_t pager; - vm_page_t *mlist; - int npages; - boolean_t sync; -{ - -#ifdef DEBUG - if (swpagerdebug & SDB_FOLLOW) - printf("swpg_putpage(%p, %p, %x, %x)\n", - pager, mlist, npages, sync); -#endif - if (pager == NULL) { - swap_pager_clean(B_WRITE); - return (VM_PAGER_OK); /* ??? */ - } - return (swap_pager_io((sw_pager_t)pager->pg_data, mlist, npages, - B_WRITE | (sync ? 0 : B_ASYNC))); -} - -static boolean_t -swap_pager_haspage(pager, offset) - vm_pager_t pager; - vm_offset_t offset; -{ - register sw_pager_t swp; - register sw_blk_t swb; - int ix; - -#ifdef DEBUG - if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK)) - printf("swpg_haspage(%p, %lx) ", pager, offset); -#endif - swp = (sw_pager_t) pager->pg_data; - ix = offset / dbtob(swp->sw_bsize); - if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { -#ifdef DEBUG - if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK)) - printf("swpg_haspage: %p bad offset %lx, ix %x\n", - swp->sw_blocks, offset, ix); -#endif - return (FALSE); - } - swb = &swp->sw_blocks[ix]; - if (swb->swb_block) - ix = atop(offset % dbtob(swp->sw_bsize)); -#ifdef DEBUG - if (swpagerdebug & SDB_ALLOCBLK) - printf("%p blk %x+%x ", swp->sw_blocks, swb->swb_block, ix); - if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK)) - printf("-> %c\n", - "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]); -#endif - if (swb->swb_block && (swb->swb_mask & (1 << ix))) - return (TRUE); - return (FALSE); -} - -static void -swap_pager_cluster(pager, offset, loffset, hoffset) - vm_pager_t pager; - vm_offset_t offset; - vm_offset_t *loffset; - vm_offset_t *hoffset; -{ - sw_pager_t swp; - register int bsize; - vm_offset_t loff, hoff; - -#ifdef DEBUG - if (swpagerdebug & (SDB_FOLLOW|SDB_CLUSTER)) - printf("swpg_cluster(%p, %lx) ", pager, offset); -#endif - swp = (sw_pager_t) pager->pg_data; - bsize = dbtob(swp->sw_bsize); - if (bsize > swap_pager_maxcluster) - bsize = swap_pager_maxcluster; - - loff = offset - (offset % bsize); -#ifdef DIAGNOSTIC - if (loff >= swp->sw_osize) - panic("swap_pager_cluster: bad offset"); -#endif - - hoff = loff + bsize; - if (hoff > swp->sw_osize) - hoff = swp->sw_osize; - - *loffset = loff; - *hoffset = hoff; -#ifdef DEBUG - if (swpagerdebug & (SDB_FOLLOW|SDB_CLUSTER)) - printf("returns [%lx-%lx]\n", loff, hoff); -#endif -} - -/* - * Scaled down version of swap(). - * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed. - * BOGUS: lower level IO routines expect a KVA so we have to map our - * provided physical page into the KVA to keep them happy. - */ -static int -swap_pager_io(swp, mlist, npages, flags) - register sw_pager_t swp; - vm_page_t *mlist; - int npages; - int flags; -{ - register struct buf *bp; - register sw_blk_t swb; - register int s; - int ix; - u_int mask; - boolean_t rv; - vm_offset_t kva, off; - swp_clean_t spc; - vm_page_t m; - -#ifdef DEBUG - /* save panic time state */ - if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) - return (VM_PAGER_FAIL); /* XXX: correct return? */ - if (swpagerdebug & (SDB_FOLLOW|SDB_IO)) - printf("swpg_io(%p, %p, %x, %x)\n", swp, mlist, npages, flags); - if (flags & B_READ) { - if (flags & B_ASYNC) - panic("swap_pager_io: cannot do ASYNC reads"); - if (npages != 1) - panic("swap_pager_io: cannot do clustered reads"); - } -#endif - - /* - * First determine if the page exists in the pager if this is - * a sync read. This quickly handles cases where we are - * following shadow chains looking for the top level object - * with the page. - */ - m = *mlist; - off = m->offset + m->object->paging_offset; - ix = off / dbtob(swp->sw_bsize); - if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { -#ifdef DEBUG - if ((flags & B_READ) == 0 && (swpagerdebug & SDB_ANOM)) { - printf("swap_pager_io: no swap block on write\n"); - return (VM_PAGER_BAD); - } -#endif - return (VM_PAGER_FAIL); - } - swb = &swp->sw_blocks[ix]; - off = off % dbtob(swp->sw_bsize); - if ((flags & B_READ) && - (swb->swb_block == 0 || (swb->swb_mask & (1 << atop(off))) == 0)) - return (VM_PAGER_FAIL); - - /* - * For reads (pageins) and synchronous writes, we clean up - * all completed async pageouts. - */ - if ((flags & B_ASYNC) == 0) { - s = splbio(); - swap_pager_clean(flags&B_READ); -#ifdef DEBUG - if (swpagerdebug & SDB_PARANOIA) - swap_pager_clean_check(mlist, npages, flags&B_READ); -#endif - splx(s); - } - /* - * For async writes (pageouts), we cleanup completed pageouts so - * that all available resources are freed. Also tells us if this - * page is already being cleaned. If it is, or no resources - * are available, we try again later. - */ - else { - swap_pager_clean(B_WRITE); -#ifdef DEBUG - if (swpagerdebug & SDB_PARANOIA) - swap_pager_clean_check(mlist, npages, B_WRITE); -#endif - if (swap_pager_free.tqh_first == NULL) { -#ifdef DEBUG - if (swpagerdebug & SDB_FAIL) - printf("%s: no available io headers\n", - "swap_pager_io"); -#endif - return (VM_PAGER_AGAIN); - } - } - - /* - * Allocate a swap block if necessary. - */ - if (swb->swb_block == 0) { - swb->swb_block = swap_alloc(swp->sw_bsize); - if (swb->swb_block == 0) { -#ifdef DEBUG - if (swpagerdebug & SDB_FAIL) - printf("swpg_io: rmalloc of %x failed\n", - swp->sw_bsize); -#endif - /* - * XXX this is technically a resource shortage that - * should return AGAIN, but the situation isn't likely - * to be remedied just by delaying a little while and - * trying again (the pageout daemon's current response - * to AGAIN) so we just return FAIL. - */ - return (VM_PAGER_FAIL); - } -#ifdef DEBUG - if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK)) - printf("swpg_io: %p alloc blk %x at ix %x\n", - swp->sw_blocks, swb->swb_block, ix); -#endif - } - - /* - * Allocate a kernel virtual address and initialize so that PTE - * is available for lower level IO drivers. - */ - kva = vm_pager_map_pages(mlist, npages, !(flags & B_ASYNC)); - if (kva == NULL) { -#ifdef DEBUG - if (swpagerdebug & SDB_FAIL) - printf("%s: no KVA space to map pages\n", - "swap_pager_io"); -#endif - return (VM_PAGER_AGAIN); - } - - /* - * Get a swap buffer header and initialize it. - */ - s = splbio(); - while (bswlist.b_actf == NULL) { -#ifdef DEBUG - if (swpagerdebug & SDB_IO) /* XXX what should this be? */ - printf("swap_pager_io: wait on swbuf for %p (%d)\n", - m, flags); -#endif - bswlist.b_flags |= B_WANTED; - tsleep((caddr_t)&bswlist, PSWP+1, "swpgiobuf", 0); - } - bp = bswlist.b_actf; - bswlist.b_actf = bp->b_actf; - splx(s); - bp->b_flags = B_BUSY | (flags & B_READ); - bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ - bp->b_data = (caddr_t)kva; - bp->b_blkno = swb->swb_block + btodb(off); - bp->b_vp = 0; - buf_replacevnode(bp, swapdev_vp); - bp->b_bcount = npages * PAGE_SIZE; - - /* - * For writes we set up additional buffer fields, record a pageout - * in progress and mark that these swap blocks are now allocated. - */ - if ((bp->b_flags & B_READ) == 0) { - bp->b_dirtyoff = 0; - bp->b_dirtyend = npages * PAGE_SIZE; - s = splbio(); - swp->sw_poip++; - splx(s); - mask = (~(~0 << npages)) << atop(off); -#ifdef DEBUG - swap_pager_poip++; - if (swpagerdebug & SDB_WRITE) - printf("swpg_io: write: bp=%p swp=%p poip=%d\n", - bp, swp, swp->sw_poip); - if ((swpagerdebug & SDB_ALLOCBLK) && - (swb->swb_mask & mask) != mask) - printf("swpg_io: %p write %d pages at %x+%lx\n", - swp->sw_blocks, npages, swb->swb_block, atop(off)); - if (swpagerdebug & SDB_CLUSTER) - printf("swpg_io: off=%lx, npg=%x, mask=%x, bmask=%x\n", - off, npages, mask, swb->swb_mask); -#endif - swp->sw_cnt += count_bits(mask & ~swb->swb_mask); - swb->swb_mask |= mask; - } - /* - * If this is an async write we set up still more buffer fields - * and place a "cleaning" entry on the inuse queue. - */ - if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) { -#ifdef DIAGNOSTIC - if (swap_pager_free.tqh_first == NULL) - panic("swpg_io: lost spc"); -#endif - spc = swap_pager_free.tqh_first; - TAILQ_REMOVE(&swap_pager_free, spc, spc_list); -#ifdef DIAGNOSTIC - if (spc->spc_flags != SPC_FREE) - panic("swpg_io: bad free spc"); -#endif - spc->spc_flags = SPC_BUSY; - spc->spc_bp = bp; - spc->spc_swp = swp; - spc->spc_kva = kva; - /* - * Record the first page. This allows swap_pager_clean - * to efficiently handle the common case of a single page. - * For clusters, it allows us to locate the object easily - * and we then reconstruct the rest of the mlist from spc_kva. - */ - spc->spc_m = m; - spc->spc_npages = npages; - bp->b_flags |= B_CALL; - bp->b_iodone = swap_pager_iodone; - s = splbio(); - TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list); - splx(s); - } - - /* - * Finally, start the IO operation. - * If it is async we are all done, otherwise we must wait for - * completion and cleanup afterwards. - */ -#ifdef DEBUG - if (swpagerdebug & SDB_IO) - printf("swpg_io: IO start: bp %p, db %lx, va %lx, pa %lx\n", - bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m)); -#endif - VOP_STRATEGY(bp); - if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) { -#ifdef DEBUG - if (swpagerdebug & SDB_IO) - printf("swpg_io: IO started: bp %p\n", bp); -#endif - return (VM_PAGER_PEND); - } - s = splbio(); -#ifdef DEBUG - if (flags & B_READ) - swap_pager_piip++; - else - swap_pager_poip++; -#endif - while ((bp->b_flags & B_DONE) == 0) - (void) tsleep(bp, PVM, "swpgio", 0); - if ((flags & B_READ) == 0) - --swp->sw_poip; -#ifdef DEBUG - if (flags & B_READ) - --swap_pager_piip; - else - --swap_pager_poip; -#endif - rv = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK; - bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); - bp->b_actf = bswlist.b_actf; - bswlist.b_actf = bp; - if (bp->b_vp) - brelvp(bp); - if (bswlist.b_flags & B_WANTED) { - bswlist.b_flags &= ~B_WANTED; - wakeup(&bswlist); - } - if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) { - m->flags |= PG_CLEAN; - pmap_clear_modify(VM_PAGE_TO_PHYS(m)); - } - splx(s); -#ifdef DEBUG - if (swpagerdebug & SDB_IO) - printf("swpg_io: IO done: bp %p, rv %d\n", bp, rv); - if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_ERROR) - printf("swpg_io: IO error\n"); -#endif - vm_pager_unmap_pages(kva, npages); - return (rv); -} - -static void -swap_pager_clean(rw) - int rw; -{ - register swp_clean_t spc; - register int s, i; - vm_object_t object; - vm_page_t m; - -#ifdef DEBUG - /* save panic time state */ - if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) - return; - if (swpagerdebug & SDB_FOLLOW) - printf("swpg_clean(%x)\n", rw); -#endif - - for (;;) { - /* - * Look up and removal from inuse list must be done - * at splbio() to avoid conflicts with swap_pager_iodone. - */ - s = splbio(); - for (spc = swap_pager_inuse.tqh_first; - spc != NULL; - spc = spc->spc_list.tqe_next) { - /* - * If the operation is done, remove it from the - * list and process it. - * - * XXX if we can't get the object lock we also - * leave it on the list and try again later. - * Is there something better we could do? - */ - if ((spc->spc_flags & SPC_DONE) && - vm_object_lock_try(spc->spc_m->object)) { - TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list); - break; - } - } - splx(s); - - /* - * No operations done, thats all we can do for now. - */ - if (spc == NULL) - break; - - /* - * Found a completed operation so finish it off. - * Note: no longer at splbio since entry is off the list. - */ - m = spc->spc_m; - object = m->object; - - /* - * Process each page in the cluster. - * The first page is explicitly kept in the cleaning - * entry, others must be reconstructed from the KVA. - */ - for (i = 0; i < spc->spc_npages; i++) { - if (i) - m = vm_pager_atop(spc->spc_kva + ptoa(i)); - /* - * If no error mark as clean and inform the pmap - * system. If there was an error, mark as dirty - * so we will try again. - * - * XXX could get stuck doing this, should give up - * after awhile. - */ - if (spc->spc_flags & SPC_ERROR) { - printf("%s: clean of page %lx failed\n", - "swap_pager_clean", VM_PAGE_TO_PHYS(m)); - m->flags |= PG_LAUNDRY; - } else { - m->flags |= PG_CLEAN; - pmap_clear_modify(VM_PAGE_TO_PHYS(m)); - } - m->flags &= ~PG_BUSY; - PAGE_WAKEUP(m); - } - - /* - * Done with the object, decrement the paging count - * and unlock it. - */ - vm_object_paging_end(object); - vm_object_unlock(object); - - /* - * Free up KVM used and put the entry back on the list. - */ - vm_pager_unmap_pages(spc->spc_kva, spc->spc_npages); - spc->spc_flags = SPC_FREE; - TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); -#ifdef DEBUG - if (swpagerdebug & SDB_WRITE) - printf("swpg_clean: free spc %p\n", spc); -#endif - } -} - -#ifdef DEBUG -static void -swap_pager_clean_check(mlist, npages, rw) - vm_page_t *mlist; - int npages; - int rw; -{ - register swp_clean_t spc; - boolean_t bad; - int i, j, s; - vm_page_t m; - - if (panicstr) - return; - - bad = FALSE; - s = splbio(); - for (spc = swap_pager_inuse.tqh_first; - spc != NULL; - spc = spc->spc_list.tqe_next) { - for (j = 0; j < spc->spc_npages; j++) { - m = vm_pager_atop(spc->spc_kva + ptoa(j)); - for (i = 0; i < npages; i++) - if (m == mlist[i]) { - if (swpagerdebug & SDB_ANOM) - printf( - "swpg_clean_check: %s: page %p on list, flags %x\n", - rw == B_WRITE ? "write" : "read", mlist[i], spc->spc_flags); - bad = TRUE; - } - } - } - splx(s); - if (bad) - panic("swpg_clean_check"); -} -#endif - -static void -swap_pager_iodone(bp) - register struct buf *bp; -{ - register swp_clean_t spc; - daddr_t blk; - int s; - -#ifdef DEBUG - /* save panic time state */ - if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) - return; - if (swpagerdebug & SDB_FOLLOW) - printf("swpg_iodone(%p)\n", bp); -#endif - s = splbio(); - for (spc = swap_pager_inuse.tqh_first; - spc != NULL; - spc = spc->spc_list.tqe_next) - if (spc->spc_bp == bp) - break; -#ifdef DIAGNOSTIC - if (spc == NULL) - panic("swap_pager_iodone: bp not found"); -#endif - - spc->spc_flags &= ~SPC_BUSY; - spc->spc_flags |= SPC_DONE; - if (bp->b_flags & B_ERROR) - spc->spc_flags |= SPC_ERROR; - spc->spc_bp = NULL; - blk = bp->b_blkno; - -#ifdef DEBUG - --swap_pager_poip; - if (swpagerdebug & SDB_WRITE) - printf("swpg_iodone: bp=%p swp=%p flags=%x spc=%p poip=%x\n", - bp, spc->spc_swp, spc->spc_swp->sw_flags, - spc, spc->spc_swp->sw_poip); -#endif - - spc->spc_swp->sw_poip--; - if (spc->spc_swp->sw_flags & SW_WANTED) { - spc->spc_swp->sw_flags &= ~SW_WANTED; - wakeup(spc->spc_swp); - } - - bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); - bp->b_actf = bswlist.b_actf; - bswlist.b_actf = bp; - if (bp->b_vp) - brelvp(bp); - if (bswlist.b_flags & B_WANTED) { - bswlist.b_flags &= ~B_WANTED; - wakeup(&bswlist); - } - wakeup(&vm_pages_needed); - splx(s); -} - -/* - * swap_pager_remove: - * - * This is called via the vm_pager_remove path and - * will remove any pages inside the range [from, to) - * backed by us. It is assumed that both addresses - * are multiples of PAGE_SIZE. The special case - * where TO is zero means: remove to end of object. - */ -static int -swap_pager_remove(pager, from, to) - vm_pager_t pager; - vm_offset_t from, to; -{ - sw_pager_t swp; - sw_blk_t swb; - int bsize, blk, bit, to_blk, to_bit, mask, cnt = 0; - -#ifdef DEBUG - if (swpagerdebug & SDB_FOLLOW) - printf("swpg_remove()\n"); -#endif - - /* Special case stupid ranges. */ - if (to > 0 && from >= to) - return (0); - - swp = (sw_pager_t)pager->pg_data; - - /* - * If we back no pages, just return. XXX Can this - * ever be the case? At least all remove calls should - * be through vm_object_remove_from_pager which also - * deallocates the pager when it no longer backs any - * pages. Left is the initial case: can a swap-pager - * be created without any pages put into it? - */ - if (swp->sw_cnt == 0) - return (0); - - bsize = dbtob(swp->sw_bsize); - blk = from / bsize; - - /* Another fast one.. no blocks in range. */ - if (blk >= swp->sw_nblocks) - return (0); - bit = atop(from % bsize); - - /* - * Deal with the special case with TO == 0. - * XXX Perhaps the code might be improved if we - * made to_blk & to_bit signify the inclusive end - * of range instead (i.e. to - 1). - */ - if (to) { - to_blk = to / bsize; - if (to_blk >= swp->sw_nblocks) { - to_blk = swp->sw_nblocks; - to_bit = 0; - } else - to_bit = atop(to % bsize); - } else { - to_blk = swp->sw_nblocks; - to_bit = 0; - } - - /* - * Loop over the range, remove pages as we find them. - * If all pages in a block get freed, deallocate the - * swap block as well. - */ - for (swb = &swp->sw_blocks[blk], mask = (1 << bit) - 1; - blk < to_blk || (blk == to_blk && to_bit); - blk++, swb++, mask = 0) { - - /* Don't bother if the block is already cleared. */ - if (swb->swb_block == 0) - continue; - - /* - * When coming to the end-block we need to - * adjust the mask in the other end, as well as - * ensuring this will be the last iteration. - */ - if (blk == to_blk) { - mask |= ~((1 << to_bit) - 1); - to_bit = 0; - } - - /* Count pages that will be removed. */ - cnt += count_bits(swb->swb_mask & ~mask); - - /* - * Remove pages by applying our mask, and if this - * means no pages are left in the block, free it. - */ - if ((swb->swb_mask &= mask) == 0) { - swap_free(swp->sw_bsize, swb->swb_block); - swb->swb_block = 0; - } - } - - /* Adjust the page count and return the removed count. */ - swp->sw_cnt -= cnt; -#ifdef DIAGNOSTIC - if (swp->sw_cnt < 0) - panic("swap_pager_remove: sw_cnt < 0"); -#endif - return (cnt); -} - -/* - * swap_pager_next: - * - * This is called via the vm_pager_next path and - * will return the offset of the next page (addresswise) - * which this pager is backing. If there are no more - * pages we will return the size of the pager's managed - * space (which by definition is larger than any page's - * offset). - */ -static vm_offset_t -swap_pager_next(pager, offset) - vm_pager_t pager; - vm_offset_t offset; -{ - sw_pager_t swp; - sw_blk_t swb; - int bsize, blk, bit, to_blk, to_bit, mask; - -#ifdef DEBUG - if (swpagerdebug & SDB_FOLLOW) - printf("swpg_next()\n"); -#endif - - swp = (sw_pager_t)pager->pg_data; - - /* - * If we back no pages, just return our size. XXX Can - * this ever be the case? At least all remove calls - * should be through vm_object_remove_from_pager which - * also deallocates the pager when it no longer backs any - * pages. Left is the initial case: can a swap-pager - * be created without any pages put into it? - */ - if (swp->sw_cnt == 0) - return (swp->sw_osize); - - bsize = dbtob(swp->sw_bsize); - blk = offset / bsize; - - /* Another fast one.. no blocks in range. */ - if (blk >= swp->sw_nblocks) - return (swp->sw_osize); - bit = atop(offset % bsize); - to_blk = swp->sw_osize / bsize; - to_bit = atop(swp->sw_osize % bsize); - - /* - * Loop over the remaining blocks, returning as soon - * as we find a page. - */ - swb = &swp->sw_blocks[blk]; - mask = ~((1 << bit) - 1); - for (;;) { - if (blk == to_blk) { - /* Nothing to be done in this end-block? */ - if (to_bit == 0) - break; - mask &= (1 << to_bit) - 1; - } - - /* - * Check this block for a backed page and return - * its offset if there. - */ - mask &= swb->swb_mask; - if (mask) - return (blk * bsize + (ffs (mask) - 1) * PAGE_SIZE); - - /* - * If we handled the end of range now, this - * means we are ready. - */ - if (blk == to_blk) - break; - - /* Get on with the next block. */ - blk++; - swb++; - mask = ~0; - } - return (swp->sw_osize); -} - -/* - * swap_pager_count: - * - * Just returns the count of pages backed by this pager. - */ -int -swap_pager_count(pager) - vm_pager_t pager; -{ -#ifndef notyet - return ((sw_pager_t)pager->pg_data)->sw_cnt; -#else - sw_pager_t swp; - sw_blk_t swb; - int i, cnt = 0; - - swp = (sw_pager_t)pager->pg_data; - if (swp->sw_blocks == NULL) - return (0); - for (i = 0; i < swp->sw_nblocks; i++) - cnt += count_bits(swp->sw_blocks[i].swb_mask); - return (cnt); -#endif -} - -/* - * count_bits: - * - * Counts the number of set bits in a word. - */ -static int -count_bits(x) - u_int x; -{ - int cnt = 0; - - while (x) { - cnt += x & 1; - x >>= 1; - } - return (cnt); -} diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c deleted file mode 100644 index 40cfd7821f4..00000000000 --- a/sys/vm/vm_fault.c +++ /dev/null @@ -1,1015 +0,0 @@ -/* $OpenBSD: vm_fault.c,v 1.19 2001/06/08 08:09:43 art Exp $ */ -/* $NetBSD: vm_fault.c,v 1.21 1998/01/31 04:02:39 ross Exp $ */ - -/* - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * The Mach Operating System project at Carnegie-Mellon University. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)vm_fault.c 8.5 (Berkeley) 1/9/95 - * - * - * Copyright (c) 1987, 1990 Carnegie-Mellon University. - * All rights reserved. - * - * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * Page fault handling module. - */ - -#include <sys/param.h> -#include <sys/proc.h> -#include <sys/systm.h> -#include <sys/user.h> - -#include <vm/vm.h> -#include <vm/vm_page.h> -#include <vm/vm_pageout.h> - -/* - * vm_fault: - * - * Handle a page fault occuring at the given address, - * requiring the given permissions, in the map specified. - * If successful, the page is inserted into the - * associated physical map. - * - * NOTE: the given address should be truncated to the - * proper page address. - * - * KERN_SUCCESS is returned if the page fault is handled; otherwise, - * a standard error specifying why the fault is fatal is returned. - * - * - * The map in question must be referenced, and remains so. - * Caller may hold no locks. - */ -int -vm_fault(map, vaddr, fault_type, change_wiring) - vm_map_t map; - vm_offset_t vaddr; - vm_prot_t fault_type; - boolean_t change_wiring; -{ - vm_object_t first_object; - vm_offset_t first_offset; - vm_map_entry_t entry; - register vm_object_t object; - register vm_offset_t offset; - register vm_page_t m; - vm_page_t first_m; - vm_prot_t prot; - int result; - boolean_t wired; - boolean_t su; - boolean_t lookup_still_valid; - boolean_t page_exists; - vm_page_t old_m; - vm_object_t next_object; - - cnt.v_faults++; /* needs lock XXX */ - -/* - * Recovery actions - */ -#define FREE_PAGE(m) { \ - PAGE_WAKEUP(m); \ - vm_page_lock_queues(); \ - vm_page_free(m); \ - vm_page_unlock_queues(); \ -} - -#define RELEASE_PAGE(m) { \ - PAGE_WAKEUP(m); \ - vm_page_lock_queues(); \ - vm_page_activate(m); \ - vm_page_unlock_queues(); \ -} - -#define UNLOCK_MAP { \ - if (lookup_still_valid) { \ - vm_map_lookup_done(map, entry); \ - lookup_still_valid = FALSE; \ - } \ -} - -#define UNLOCK_THINGS { \ - vm_object_paging_end(object); \ - vm_object_unlock(object); \ - if (object != first_object) { \ - vm_object_lock(first_object); \ - FREE_PAGE(first_m); \ - vm_object_paging_end(first_object); \ - vm_object_unlock(first_object); \ - } \ - UNLOCK_MAP; \ -} - -#define UNLOCK_AND_DEALLOCATE { \ - UNLOCK_THINGS; \ - vm_object_deallocate(first_object); \ -} - - RetryFault: ; - - /* - * Find the backing store object and offset into - * it to begin the search. - */ - - if ((result = vm_map_lookup(&map, vaddr, fault_type, &entry, - &first_object, &first_offset, &prot, &wired, &su)) != - KERN_SUCCESS) { - return (result); - } - lookup_still_valid = TRUE; - - if (wired) - fault_type = prot; - - first_m = NULL; - - /* - * Make a reference to this object to - * prevent its disposal while we are messing with - * it. Once we have the reference, the map is free - * to be diddled. Since objects reference their - * shadows (and copies), they will stay around as well. - */ - - vm_object_lock(first_object); - - first_object->ref_count++; - vm_object_paging_begin(first_object); - - /* - * INVARIANTS (through entire routine): - * - * 1) At all times, we must either have the object lock or a busy - * page in some object to prevent some other thread from trying - * to bring in the same page. - * - * Note that we cannot hold any locks during the pager access or - * when waiting for memory, so we use a busy page then. - * - * Note also that we aren't as concerned about more than one thead - * attempting to pager_data_unlock the same page at once, so we - * don't hold the page as busy then, but do record the highest - * unlock value so far. [Unlock requests may also be delivered - * out of order.] - * - * 2) Once we have a busy page, we must remove it from the pageout - * queues, so that the pageout daemon will not grab it away. - * - * 3) To prevent another thread from racing us down the shadow chain - * and entering a new page in the top object before we do, we must - * keep a busy page in the top object while following the shadow - * chain. - * - * 4) We must increment paging_in_progress on any object for which we - * have a busy page, to prevent vm_object_collapse from removing - * the busy page without our noticing. - */ - - /* - * Search for the page at object/offset. - */ - object = first_object; - offset = first_offset; - - /* - * See whether this page is resident - */ - while (TRUE) { - m = vm_page_lookup(object, offset); - if (m != NULL) { - /* - * If the page is being brought in, - * wait for it and then retry. - */ - if (m->flags & PG_BUSY) { -#ifdef DOTHREADS - int wait_result; - - PAGE_ASSERT_WAIT(m, !change_wiring); - UNLOCK_THINGS; - thread_block("mFltbsy"); - wait_result = current_thread()->wait_result; - vm_object_deallocate(first_object); - if (wait_result != THREAD_AWAKENED) - return (KERN_SUCCESS); - goto RetryFault; -#else - PAGE_ASSERT_WAIT(m, !change_wiring); - UNLOCK_THINGS; - cnt.v_intrans++; - thread_block("mFltbsy2"); - vm_object_deallocate(first_object); - goto RetryFault; -#endif - } - - /* - * Remove the page from the pageout daemon's - * reach while we play with it. - */ - - vm_page_lock_queues(); - if (m->flags & PG_INACTIVE) { - TAILQ_REMOVE(&vm_page_queue_inactive, m, - pageq); - m->flags &= ~PG_INACTIVE; - cnt.v_inactive_count--; - cnt.v_reactivated++; - } - - if (m->flags & PG_ACTIVE) { - TAILQ_REMOVE(&vm_page_queue_active, m, pageq); - m->flags &= ~PG_ACTIVE; - cnt.v_active_count--; - } - vm_page_unlock_queues(); - - /* - * Mark page busy for other threads. - */ - m->flags |= PG_BUSY; - if (curproc != &proc0) - curproc->p_addr->u_stats.p_ru.ru_minflt++; - break; - } - - if (((object->pager != NULL) && (!change_wiring || wired)) - || (object == first_object)) { - - /* - * Allocate a new page for this object/offset - * pair. - */ - m = vm_page_alloc(object, offset); - - if (m == NULL) { - UNLOCK_AND_DEALLOCATE; - vm_wait("fVfault1"); - goto RetryFault; - } - } - - if (object->pager != NULL && (!change_wiring || wired)) { - int rv; - - /* - * Now that we have a busy page, we can - * release the object lock. - */ - vm_object_unlock(object); - - /* - * Call the pager to retrieve the data, if any, - * after releasing the lock on the map. - */ - UNLOCK_MAP; - cnt.v_pageins++; - rv = vm_pager_get(object->pager, m, TRUE); - - /* - * Reaquire the object lock to preserve our - * invariant. - */ - vm_object_lock(object); - - /* - * Found the page. - * Leave it busy while we play with it. - */ - if (rv == VM_PAGER_OK) { - /* - * Relookup in case pager changed page. - * Pager is responsible for disposition - * of old page if moved. - */ - m = vm_page_lookup(object, offset); - - cnt.v_pgpgin++; - m->flags &= ~PG_FAKE; - m->flags |= PG_CLEAN; - pmap_clear_modify(VM_PAGE_TO_PHYS(m)); - if (curproc != &proc0) - curproc->p_addr-> - u_stats.p_ru.ru_majflt++; - break; - } - - /* - * IO error or page outside the range of the pager: - * cleanup and return an error. - */ - if (rv == VM_PAGER_ERROR || rv == VM_PAGER_BAD) { - FREE_PAGE(m); - UNLOCK_AND_DEALLOCATE; - return (KERN_PROTECTION_FAILURE); /* XXX */ - } - /* - * rv == VM_PAGER_FAIL: - * - * Page does not exist at this object/offset. - * Free the bogus page (waking up anyone waiting - * for it) and continue on to the next object. - * - * If this is the top-level object, we must - * leave the busy page to prevent another - * thread from rushing past us, and inserting - * the page in that object at the same time - * that we are. - */ - if (object != first_object) { - FREE_PAGE(m); - /* note that `m' is not used after this */ - } - } - - /* - * We get here if the object has no pager (or unwiring) - * or the pager doesn't have the page. - */ - if (object == first_object) - first_m = m; - - /* - * Move on to the next object. Lock the next - * object before unlocking the current one. - */ - - offset += object->shadow_offset; - next_object = object->shadow; - if (next_object == NULL) { - /* - * If there's no object left, fill the page - * in the top object with zeros. - */ - if (object != first_object) { - vm_object_paging_end(object); - vm_object_unlock(object); - - object = first_object; - offset = first_offset; - m = first_m; - vm_object_lock(object); - } - first_m = NULL; - - vm_page_zero_fill(m); - cnt.v_zfod++; - m->flags &= ~PG_FAKE; - if (curproc != &proc0) - curproc->p_addr->u_stats.p_ru.ru_minflt++; - break; - } - else { - vm_object_lock(next_object); - if (object != first_object) - vm_object_paging_end(object); - vm_object_unlock(object); - object = next_object; - vm_object_paging_begin(object); - } - } - - if ((m->flags & (PG_ACTIVE | PG_INACTIVE | PG_BUSY)) != PG_BUSY) - panic("vm_fault: active, inactive or !busy after main loop"); - - /* - * PAGE HAS BEEN FOUND. - * [Loop invariant still holds -- the object lock is held.] - */ - old_m = m; /* save page that would be copied */ - - /* - * If the page is being written, but isn't already owned by the - * top-level object, we have to copy it into a new page owned - * by the top-level object. - */ - if (object != first_object) { - /* - * We only really need to copy if we want to write it. - */ - if (fault_type & VM_PROT_WRITE) { - - /* - * If we try to collapse first_object at this - * point, we may deadlock when we try to get - * the lock on an intermediate object (since we - * have the bottom object locked). We can't - * unlock the bottom object, because the page - * we found may move (by collapse) if we do. - * - * Instead, we first copy the page. Then, when - * we have no more use for the bottom object, - * we unlock it and try to collapse. - * - * Note that we copy the page even if we didn't - * need to... that's the breaks. - */ - - /* - * We already have an empty page in - * first_object - use it. - */ - vm_page_copy(m, first_m); - first_m->flags &= ~PG_FAKE; - - /* - * If another map is truly sharing this - * page with us, we have to flush all - * uses of the original page, since we - * can't distinguish those which want the - * original from those which need the - * new copy. - * - * XXX If we know that only one map has - * access to this page, then we could - * avoid the pmap_page_protect() call. - */ - vm_page_lock_queues(); - vm_page_deactivate(m); - pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); - vm_page_unlock_queues(); - - /* - * We no longer need the old page or object. - */ - PAGE_WAKEUP(m); - vm_object_paging_end(object); - vm_object_unlock(object); - - /* - * Only use the new page below... - */ - cnt.v_cow_faults++; - m = first_m; - object = first_object; - offset = first_offset; - - /* - * Now that we've gotten the copy out of the - * way, let's try to collapse the top object. - */ - vm_object_lock(object); - /* - * But we have to play ugly games with - * paging_in_progress to do that... - */ - vm_object_paging_end(object); - vm_object_collapse(object); - vm_object_paging_begin(object); - } else { - prot &= ~VM_PROT_WRITE; - m->flags |= PG_COPYONWRITE; - } - } - - if (m->flags & (PG_ACTIVE|PG_INACTIVE)) - panic("%s: active or inactive before copy object handling", - "vm_fault"); - - /* - * If the page is being written, but hasn't been - * copied to the copy-object, we have to copy it there. - */ - RetryCopy: - if (first_object->copy != NULL) { - vm_object_t copy_object = first_object->copy; - vm_offset_t copy_offset; - vm_page_t copy_m; - - /* - * We only need to copy if we want to write it. - */ - if ((fault_type & VM_PROT_WRITE) == 0) { - prot &= ~VM_PROT_WRITE; - m->flags |= PG_COPYONWRITE; - } - else { - /* - * Try to get the lock on the copy_object. - */ - if (!vm_object_lock_try(copy_object)) { - vm_object_unlock(object); - /* should spin a bit here... */ - vm_object_lock(object); - goto RetryCopy; - } - - /* - * Make another reference to the copy-object, - * to keep it from disappearing during the - * copy. - */ - copy_object->ref_count++; - - /* - * Does the page exist in the copy? - */ - copy_offset = first_offset - - copy_object->shadow_offset; - copy_m = vm_page_lookup(copy_object, copy_offset); - if ((page_exists = (copy_m != NULL)) != 0) { - if (copy_m->flags & PG_BUSY) { -#ifdef DOTHREADS - int wait_result; - - /* - * If the page is being brought - * in, wait for it and then retry. - */ - PAGE_ASSERT_WAIT(copy_m, - !change_wiring); - RELEASE_PAGE(m); - copy_object->ref_count--; - vm_object_unlock(copy_object); - UNLOCK_THINGS; - thread_block("mCpybsy"); - wait_result = - current_thread()->wait_result; - vm_object_deallocate(first_object); - if (wait_result != THREAD_AWAKENED) - return (KERN_SUCCESS); - goto RetryFault; -#else - /* - * If the page is being brought - * in, wait for it and then retry. - */ - PAGE_ASSERT_WAIT(copy_m, - !change_wiring); - RELEASE_PAGE(m); - copy_object->ref_count--; - vm_object_unlock(copy_object); - UNLOCK_THINGS; - thread_block("mCpybsy2"); - vm_object_deallocate(first_object); - goto RetryFault; -#endif - } - } - - /* - * If the page is not in memory (in the object) - * and the object has a pager, we have to check - * if the pager has the data in secondary - * storage. - */ - if (!page_exists) { - - /* - * If we don't allocate a (blank) page - * here... another thread could try - * to page it in, allocate a page, and - * then block on the busy page in its - * shadow (first_object). Then we'd - * trip over the busy page after we - * found that the copy_object's pager - * doesn't have the page... - */ - copy_m = - vm_page_alloc(copy_object, copy_offset); - if (copy_m == NULL) { - /* - * Wait for a page, then retry. - */ - RELEASE_PAGE(m); - copy_object->ref_count--; - vm_object_unlock(copy_object); - UNLOCK_AND_DEALLOCATE; - vm_wait("fCopy"); - goto RetryFault; - } - - if (copy_object->pager != NULL) { - vm_object_unlock(object); - vm_object_unlock(copy_object); - UNLOCK_MAP; - - page_exists = vm_pager_has_page( - copy_object->pager, - (copy_offset + - copy_object->paging_offset)); - - vm_object_lock(copy_object); - - /* - * Since the map is unlocked, someone - * else could have copied this object - * and put a different copy_object - * between the two. Or, the last - * reference to the copy-object (other - * than the one we have) may have - * disappeared - if that has happened, - * we don't need to make the copy. - */ - if (copy_object->shadow != object || - copy_object->ref_count == 1) { - /* - * Gaah... start over! - */ - FREE_PAGE(copy_m); - vm_object_unlock(copy_object); - /* may block */ - vm_object_deallocate( - copy_object); - vm_object_lock(object); - goto RetryCopy; - } - vm_object_lock(object); - - if (page_exists) { - /* - * We didn't need the page - */ - FREE_PAGE(copy_m); - } - } - } - if (!page_exists) { - /* - * Must copy page into copy-object. - */ - vm_page_copy(m, copy_m); - copy_m->flags &= ~PG_FAKE; - - /* - * Things to remember: - * 1. The copied page must be marked 'dirty' - * so it will be paged out to the copy - * object. - * 2. If the old page was in use by any users - * of the copy-object, it must be removed - * from all pmaps. (We can't know which - * pmaps use it.) - */ - vm_page_lock_queues(); - pmap_page_protect(VM_PAGE_TO_PHYS(old_m), - VM_PROT_NONE); - copy_m->flags &= ~PG_CLEAN; - vm_page_activate(copy_m); /* XXX */ - vm_page_unlock_queues(); - - PAGE_WAKEUP(copy_m); - } - /* - * The reference count on copy_object must be - * at least 2: one for our extra reference, - * and at least one from the outside world - * (we checked that when we last locked - * copy_object). - */ - copy_object->ref_count--; - vm_object_unlock(copy_object); - m->flags &= ~PG_COPYONWRITE; - } - } - - if (m->flags & (PG_ACTIVE | PG_INACTIVE)) - panic("vm_fault: active or inactive before retrying lookup"); - - /* - * We must verify that the maps have not changed - * since our last lookup. - */ - if (!lookup_still_valid) { - vm_object_t retry_object; - vm_offset_t retry_offset; - vm_prot_t retry_prot; - - /* - * Since map entries may be pageable, make sure we can - * take a page fault on them. - */ - vm_object_unlock(object); - - /* - * To avoid trying to write_lock the map while another - * thread has it read_locked (in vm_map_pageable), we - * do not try for write permission. If the page is - * still writable, we will get write permission. If it - * is not, or has been marked needs_copy, we enter the - * mapping without write permission, and will merely - * take another fault. - */ - result = vm_map_lookup(&map, vaddr, - fault_type & ~VM_PROT_WRITE, &entry, &retry_object, - &retry_offset, &retry_prot, &wired, &su); - - vm_object_lock(object); - - /* - * If we don't need the page any longer, put it on the - * active list (the easiest thing to do here). If no - * one needs it, pageout will grab it eventually. - */ - - if (result != KERN_SUCCESS) { - RELEASE_PAGE(m); - UNLOCK_AND_DEALLOCATE; - return (result); - } - - lookup_still_valid = TRUE; - - if ((retry_object != first_object) || - (retry_offset != first_offset)) { - RELEASE_PAGE(m); - UNLOCK_AND_DEALLOCATE; - goto RetryFault; - } - - /* - * Check whether the protection has changed or the object - * has been copied while we left the map unlocked. - * Changing from read to write permission is OK - we leave - * the page write-protected, and catch the write fault. - * Changing from write to read permission means that we - * can't mark the page write-enabled after all. - */ - prot &= retry_prot; - if (m->flags & PG_COPYONWRITE) - prot &= ~VM_PROT_WRITE; - } - - /* - * (the various bits we're fiddling with here are locked by - * the object's lock) - */ - - /* XXX This distorts the meaning of the copy_on_write bit */ - - if (prot & VM_PROT_WRITE) - m->flags &= ~PG_COPYONWRITE; - - /* - * It's critically important that a wired-down page be faulted - * only once in each map for which it is wired. - */ - - if (m->flags & (PG_ACTIVE | PG_INACTIVE)) - panic("vm_fault: active or inactive before pmap_enter"); - - vm_object_unlock(object); - - /* - * Put this page into the physical map. - * We had to do the unlock above because pmap_enter - * may cause other faults. We don't put the - * page back on the active queue until later so - * that the page-out daemon won't find us (yet). - */ - - pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), prot, wired, 0); - - /* - * If the page is not wired down, then put it where the - * pageout daemon can find it. - */ - vm_object_lock(object); - vm_page_lock_queues(); - if (change_wiring) { - if (wired) - vm_page_wire(m); - else - vm_page_unwire(m); - } - else - vm_page_activate(m); - vm_page_unlock_queues(); - - /* - * Unlock everything, and return - */ - - PAGE_WAKEUP(m); - UNLOCK_AND_DEALLOCATE; - - return (KERN_SUCCESS); -} - -/* - * vm_fault_wire: - * - * Wire down a range of virtual addresses in a map. - */ -int -vm_fault_wire(map, start, end) - vm_map_t map; - vm_offset_t start, end; -{ - register vm_offset_t va; - register pmap_t pmap; - int rv; - - pmap = vm_map_pmap(map); - - /* - * We simulate a fault to get the page and enter it - * in the physical map. - */ - - for (va = start; va < end; va += PAGE_SIZE) { - rv = vm_fault(map, va, VM_PROT_NONE, TRUE); - if (rv) { - if (va != start) - vm_fault_unwire(map, start, va); - return (rv); - } - } - return (KERN_SUCCESS); -} - - -/* - * vm_fault_unwire: - * - * Unwire a range of virtual addresses in a map. - */ -void -vm_fault_unwire(map, start, end) - vm_map_t map; - vm_offset_t start, end; -{ - - register vm_offset_t va; - vm_offset_t pa; - register pmap_t pmap; - - pmap = vm_map_pmap(map); - - /* - * Since the pages are wired down, we must be able to - * get their mappings from the physical map system. - */ - vm_page_lock_queues(); - - for (va = start; va < end; va += PAGE_SIZE) { - if (pmap_extract(pmap, va, &pa) == FALSE) { - panic("unwire: page not in pmap"); - } - pmap_unwire(pmap, va); - vm_page_unwire(PHYS_TO_VM_PAGE(pa)); - } - vm_page_unlock_queues(); -} - -/* - * Routine: - * vm_fault_copy_entry - * Function: - * Copy all of the pages from a wired-down map entry to another. - * - * In/out conditions: - * The source and destination maps must be locked for write. - * The source map entry must be wired down (or be a sharing map - * entry corresponding to a main map entry that is wired down). - */ -void -vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry) - vm_map_t dst_map; - vm_map_t src_map; - vm_map_entry_t dst_entry; - vm_map_entry_t src_entry; -{ - - vm_object_t dst_object; - vm_object_t src_object; - vm_offset_t dst_offset; - vm_offset_t src_offset; - vm_prot_t prot; - vm_offset_t vaddr; - vm_page_t dst_m; - vm_page_t src_m; - -#ifdef lint - src_map++; -#endif - - src_object = src_entry->object.vm_object; - src_offset = src_entry->offset; - - /* - * Create the top-level object for the destination entry. - * (Doesn't actually shadow anything - we copy the pages - * directly.) - */ - dst_object = - vm_object_allocate((vm_size_t)(dst_entry->end - dst_entry->start)); - - dst_entry->object.vm_object = dst_object; - dst_entry->offset = 0; - - prot = dst_entry->max_protection; - - /* - * Loop through all of the pages in the entry's range, copying - * each one from the source object (it should be there) to the - * destination object. - */ - for (vaddr = dst_entry->start, dst_offset = 0; - vaddr < dst_entry->end; - vaddr += PAGE_SIZE, dst_offset += PAGE_SIZE) { - - /* - * Allocate a page in the destination object - */ - vm_object_lock(dst_object); - do { - dst_m = vm_page_alloc(dst_object, dst_offset); - if (dst_m == NULL) { - vm_object_unlock(dst_object); - vm_wait("fVm_copy"); - vm_object_lock(dst_object); - } - } while (dst_m == NULL); - - /* - * Find the page in the source object, and copy it in. - * (Because the source is wired down, the page will be - * in memory.) - */ - vm_object_lock(src_object); - src_m = vm_page_lookup(src_object, dst_offset + src_offset); - if (src_m == NULL) - panic("vm_fault_copy_wired: page missing"); - - vm_page_copy(src_m, dst_m); - - /* - * Enter it in the pmap... - */ - vm_object_unlock(src_object); - vm_object_unlock(dst_object); - - pmap_enter(dst_map->pmap, vaddr, VM_PAGE_TO_PHYS(dst_m), prot, - FALSE, 0); - - /* - * Mark it no longer busy, and put it on the active list. - */ - vm_object_lock(dst_object); - vm_page_lock_queues(); - vm_page_activate(dst_m); - vm_page_unlock_queues(); - PAGE_WAKEUP(dst_m); - vm_object_unlock(dst_object); - } - -} diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c deleted file mode 100644 index 9b86e5c7b37..00000000000 --- a/sys/vm/vm_glue.c +++ /dev/null @@ -1,532 +0,0 @@ -/* $OpenBSD: vm_glue.c,v 1.38 2001/06/08 08:09:43 art Exp $ */ -/* $NetBSD: vm_glue.c,v 1.55.4.1 1996/06/13 17:25:45 cgd Exp $ */ - -/* - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * The Mach Operating System project at Carnegie-Mellon University. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)vm_glue.c 8.9 (Berkeley) 3/4/95 - * - * - * Copyright (c) 1987, 1990 Carnegie-Mellon University. - * All rights reserved. - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/proc.h> -#include <sys/resourcevar.h> -#include <sys/buf.h> -#include <sys/user.h> -#ifdef SYSVSHM -#include <sys/shm.h> -#endif - -#include <vm/vm.h> -#include <vm/vm_extern.h> -#include <vm/vm_page.h> -#include <vm/vm_kern.h> - -#include <machine/cpu.h> - -int avefree = 0; /* XXX */ -unsigned maxdmap = MAXDSIZ; /* XXX */ -unsigned maxsmap = MAXSSIZ; /* XXX */ -int readbuffers = 0; /* XXX allow kgdb to read kernel buffer pool */ - -int -kernacc(addr, len, rw) - caddr_t addr; - int len, rw; -{ - boolean_t rv; - vm_offset_t saddr, eaddr; - vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE; - - saddr = trunc_page((vaddr_t)addr); - eaddr = round_page((vaddr_t)addr+len); - rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot); - /* - * XXX there are still some things (e.g. the buffer cache) that - * are managed behind the VM system's back so even though an - * address is accessible in the mind of the VM system, there may - * not be physical pages where the VM thinks there is. This can - * lead to bogus allocation of pages in the kernel address space - * or worse, inconsistencies at the pmap level. We only worry - * about the buffer cache for now. - */ - if (!readbuffers && rv && (eaddr > (vm_offset_t)buffers && - saddr < (vm_offset_t)buffers + MAXBSIZE * nbuf)) - rv = FALSE; - return (rv == TRUE); -} - -int -useracc(addr, len, rw) - caddr_t addr; - int len, rw; -{ - boolean_t rv; - vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE; - -#if defined(i386) || defined(pc532) - /* - * XXX - specially disallow access to user page tables - they are - * in the map. This is here until i386 & pc532 pmaps are fixed... - */ - if ((vm_offset_t) addr >= VM_MAXUSER_ADDRESS - || (vm_offset_t) addr + len > VM_MAXUSER_ADDRESS - || (vm_offset_t) addr + len <= (vm_offset_t) addr) - return (FALSE); -#endif - - rv = vm_map_check_protection(&curproc->p_vmspace->vm_map, - trunc_page((vaddr_t)addr), round_page((vaddr_t)addr+len), prot); - return (rv == TRUE); -} - -#ifdef KGDB -/* - * Change protections on kernel pages from addr to addr+len - * (presumably so debugger can plant a breakpoint). - * - * We force the protection change at the pmap level. If we were - * to use vm_map_protect a change to allow writing would be lazily- - * applied meaning we would still take a protection fault, something - * we really don't want to do. It would also fragment the kernel - * map unnecessarily. We cannot use pmap_protect since it also won't - * enforce a write-enable request. Using pmap_enter is the only way - * we can ensure the change takes place properly. - */ -void -chgkprot(addr, len, rw) - register caddr_t addr; - int len, rw; -{ - vm_prot_t prot; - vm_offset_t pa, sva, eva; - - prot = rw == B_READ ? VM_PROT_READ : VM_PROT_READ|VM_PROT_WRITE; - eva = round_page((vaddr_t)addr + len); - for (sva = trunc_page((vaddr_t)addr); sva < eva; sva += PAGE_SIZE) { - /* - * Extract physical address for the page. - */ - if (pmap_extract(pmap_kernel(), sva, &pa) == FALSE) - panic("chgkprot: invalid page"); - pmap_enter(pmap_kernel(), sva, pa, prot, TRUE, 0); - } -} -#endif - -int -vslock(addr, len) - caddr_t addr; - u_int len; -{ -#ifdef __i386__ - pmap_prefault(&curproc->p_vmspace->vm_map, (vm_offset_t)addr, len); -#endif - return (vm_map_pageable(&curproc->p_vmspace->vm_map, - trunc_page((vaddr_t)addr), - round_page((vaddr_t)addr+len), FALSE)); -} - -int -vsunlock(addr, len) - caddr_t addr; - u_int len; -{ - return (vm_map_pageable(&curproc->p_vmspace->vm_map, - trunc_page((vaddr_t)addr), - round_page((vaddr_t)addr+len), TRUE)); -} - -/* - * Implement fork's actions on an address space. - * Here we arrange for the address space to be copied or referenced, - * allocate a user struct (pcb and kernel stack), then call the - * machine-dependent layer to fill those in and make the new process - * ready to run. - * NOTE: the kernel stack may be at a different location in the child - * process, and thus addresses of automatic variables may be invalid - * after cpu_fork returns in the child process. We do nothing here - * after cpu_fork returns. - */ -#ifdef __FORK_BRAINDAMAGE -int -#else -void -#endif -vm_fork(p1, p2, stack, stacksize) - register struct proc *p1, *p2; - void *stack; - size_t stacksize; -{ - register struct user *up = p2->p_addr; - -#if defined(i386) || defined(pc532) - /* - * avoid copying any of the parent's pagetables or other per-process - * objects that reside in the map by marking all of them non-inheritable - */ - (void)vm_map_inherit(&p1->p_vmspace->vm_map, - VM_MAXUSER_ADDRESS, VM_MAX_ADDRESS, VM_INHERIT_NONE); -#endif - p2->p_vmspace = vmspace_fork(p1->p_vmspace); - -#ifdef SYSVSHM - if (p1->p_vmspace->vm_shm) - shmfork(p1->p_vmspace, p2->p_vmspace); -#endif - - vm_map_pageable(kernel_map, (vm_offset_t)up, - (vm_offset_t)up + USPACE, FALSE); - - /* - * p_stats currently point at fields in the user struct. Copy - * parts of p_stats, and zero out the rest. - */ - p2->p_stats = &up->u_stats; - bzero(&up->u_stats.pstat_startzero, - (unsigned) ((caddr_t)&up->u_stats.pstat_endzero - - (caddr_t)&up->u_stats.pstat_startzero)); - bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy, - ((caddr_t)&up->u_stats.pstat_endcopy - - (caddr_t)&up->u_stats.pstat_startcopy)); - -#if defined(i386) || defined(pc532) - { - vm_offset_t addr = VM_MAXUSER_ADDRESS; struct vm_map *vp; - - /* ream out old pagetables and kernel stack */ - vp = &p2->p_vmspace->vm_map; - (void)vm_deallocate(vp, addr, VM_MAX_ADDRESS - addr); - (void)vm_allocate(vp, &addr, VM_MAX_ADDRESS - addr, FALSE); - (void)vm_map_inherit(vp, addr, VM_MAX_ADDRESS, - VM_INHERIT_NONE); - } -#endif - -#ifdef __FORK_BRAINDAMAGE - /* - * cpu_fork will copy and update the kernel stack and pcb, - * and make the child ready to run. It marks the child - * so that it can return differently than the parent. - * It returns twice, once in the parent process and - * once in the child. - */ - return (cpu_fork(p1, p2, stack, stacksize)); -#else - /* - * cpu_fork will copy and update the kernel stack and pcb, - * and make the child ready to run. The child will exit - * directly to user mode on its first time slice, and will - * not return here. - */ - cpu_fork(p1, p2, stack, stacksize); -#endif -} - -/* - * Set default limits for VM system. - * Called for proc 0, and then inherited by all others. - */ -void -vm_init_limits(p) - register struct proc *p; -{ - - /* - * Set up the initial limits on process VM. - * Set the maximum resident set size to be all - * of (reasonably) available memory. This causes - * any single, large process to start random page - * replacement once it fills memory. - */ - p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ; - p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ; - p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ; - p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ; - p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(cnt.v_free_count); -} - -#include <vm/vm_pageout.h> - -#ifdef DEBUG -int enableswap = 1; -int swapdebug = 0; -#define SDB_FOLLOW 1 -#define SDB_SWAPIN 2 -#define SDB_SWAPOUT 4 -#endif - -/* - * Swap in a process's u-area. - */ -void -swapin(p) - struct proc *p; -{ - vm_offset_t addr; - int s; - - addr = (vm_offset_t)p->p_addr; - vm_map_pageable(kernel_map, addr, addr + USPACE, FALSE); - /* - * Some architectures need to be notified when the - * user area has moved to new physical page(s) (e.g. - * see pmax/pmax/vm_machdep.c). - */ - cpu_swapin(p); - s = splstatclock(); - if (p->p_stat == SRUN) - setrunqueue(p); - p->p_flag |= P_INMEM; - splx(s); - p->p_swtime = 0; - ++cnt.v_swpin; -} - -/* - * Brutally simple: - * 1. Attempt to swapin every swaped-out, runnable process in - * order of priority. - * 2. If not enough memory, wake the pageout daemon and let it - * clear some space. - */ -void -scheduler() -{ - register struct proc *p; - register int pri; - struct proc *pp; - int ppri; - -loop: -#ifdef DEBUG - while (!enableswap) { - panic ("swap disabled??"); - tsleep((caddr_t)&proc0, PVM, "noswap", 0); - } -#endif - pp = NULL; - ppri = INT_MIN; - for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { - if (p->p_stat == SRUN && (p->p_flag & P_INMEM) == 0) { - pri = p->p_swtime + p->p_slptime - p->p_nice * 8; - if (pri > ppri) { - pp = p; - ppri = pri; - } - } - } -#ifdef DEBUG - if (swapdebug & SDB_FOLLOW) - printf("scheduler: running, procp %p pri %d\n", pp, ppri); -#endif - /* - * Nothing to do, back to sleep - */ - if ((p = pp) == NULL) { - tsleep((caddr_t)&proc0, PVM, "scheduler", 0); - goto loop; - } - - /* - * We would like to bring someone in. - * This part is really bogus cuz we could deadlock on memory - * despite our feeble check. - * XXX should require at least vm_swrss / 2 - */ - if (cnt.v_free_count > atop(USPACE)) { -#ifdef DEBUG - if (swapdebug & SDB_SWAPIN) - printf("swapin: pid %d(%s)@%p, pri %d free %d\n", - p->p_pid, p->p_comm, p->p_addr, ppri, - cnt.v_free_count); -#endif -#if defined(arc) || defined(pica) - vm_map_pageable(kernel_map, (vm_offset_t)p->p_addr, - (vm_offset_t)p->p_addr + atop(USPACE), FALSE); -#endif - swapin(p); - goto loop; - } - /* - * Not enough memory, jab the pageout daemon and wait til the - * coast is clear. - */ -#ifdef DEBUG - if (swapdebug & SDB_FOLLOW) - printf("scheduler: no room for pid %d(%s), free %d\n", - p->p_pid, p->p_comm, cnt.v_free_count); -#endif - (void)splhigh(); - vm_wait("fLowmem"); - (void)spl0(); -#ifdef DEBUG - if (swapdebug & SDB_FOLLOW) - printf("scheduler: room again, free %d\n", cnt.v_free_count); -#endif - goto loop; -} - -#define swappable(p) \ - (((p)->p_flag & (P_SYSTEM | P_INMEM | P_WEXIT)) == P_INMEM && \ - (p)->p_holdcnt == 0) - -/* - * Swapout is driven by the pageout daemon. Very simple, we find eligible - * procs and unwire their u-areas. We try to always "swap" at least one - * process in case we need the room for a swapin. - * If any procs have been sleeping/stopped for at least maxslp seconds, - * they are swapped. Else, we swap the longest-sleeping or stopped process, - * if any, otherwise the longest-resident process. - */ -void -swapout_threads() -{ - register struct proc *p; - struct proc *outp, *outp2; - int outpri, outpri2; - int didswap = 0; - extern int maxslp; - -#ifdef DEBUG - if (!enableswap) - return; -#endif - outp = outp2 = NULL; - outpri = outpri2 = 0; - for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { - if (!swappable(p)) - continue; - switch (p->p_stat) { - case SRUN: - if (p->p_swtime > outpri2) { - outp2 = p; - outpri2 = p->p_swtime; - } - continue; - - case SSLEEP: - case SSTOP: - if (p->p_slptime >= maxslp) { - swapout(p); - didswap++; - } else if (p->p_slptime > outpri) { - outp = p; - outpri = p->p_slptime; - } - continue; - } - } - /* - * If we didn't get rid of any real duds, toss out the next most - * likely sleeping/stopped or running candidate. We only do this - * if we are real low on memory since we don't gain much by doing - * it (USPACE bytes). - */ - if (didswap == 0 && - cnt.v_free_count <= atop(round_page(USPACE))) { - if ((p = outp) == 0) - p = outp2; -#ifdef DEBUG - if (swapdebug & SDB_SWAPOUT) - printf("swapout_threads: no duds, try procp %p\n", p); -#endif - if (p) - swapout(p); - } -} - -void -swapout(p) - register struct proc *p; -{ - vm_offset_t addr; - int s; - -#ifdef DEBUG - if (swapdebug & SDB_SWAPOUT) - printf("swapout: pid %d(%s)@%p, stat %x pri %d free %d\n", - p->p_pid, p->p_comm, p->p_addr, p->p_stat, p->p_slptime, - cnt.v_free_count); -#endif - - /* - * Do any machine-specific actions necessary before swapout. - * This can include saving floating point state, etc. - */ - cpu_swapout(p); - - /* - * Unwire the to-be-swapped process's user struct and kernel stack. - */ - addr = (vm_offset_t)p->p_addr; - vm_map_pageable(kernel_map, addr, addr + USPACE, TRUE); - pmap_collect(vm_map_pmap(&p->p_vmspace->vm_map)); - - /* - * Mark it as (potentially) swapped out. - */ - s = splstatclock(); - p->p_flag &= ~P_INMEM; - if (p->p_stat == SRUN) - remrunqueue(p); - splx(s); - p->p_swtime = 0; - ++cnt.v_swpout; -} diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c deleted file mode 100644 index 6b46acbf01d..00000000000 --- a/sys/vm/vm_init.c +++ /dev/null @@ -1,127 +0,0 @@ -/* $OpenBSD: vm_init.c,v 1.3 1998/03/01 00:38:06 niklas Exp $ */ -/* $NetBSD: vm_init.c,v 1.11 1998/01/09 06:00:50 thorpej Exp $ */ - -/* - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * The Mach Operating System project at Carnegie-Mellon University. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)vm_init.c 8.1 (Berkeley) 6/11/93 - * - * - * Copyright (c) 1987, 1990 Carnegie-Mellon University. - * All rights reserved. - * - * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * Initialize the Virtual Memory subsystem. - */ - -#include <sys/param.h> -#include <sys/systm.h> - -#include <vm/vm.h> -#include <vm/vm_page.h> -#include <vm/vm_kern.h> - -/* - * vm_init initializes the virtual memory system. - * This is done only by the first cpu up. - * - * The start and end address of physical memory is passed in. - */ - -void vm_mem_init() -{ -#if !defined(MACHINE_NONCONTIG) && !defined(MACHINE_NEW_NONCONTIG) - extern vm_offset_t avail_start, avail_end; - extern vm_offset_t virtual_avail, virtual_end; -#else - vm_offset_t start, end; -#endif - - /* - * Initializes resident memory structures. - * From here on, all physical memory is accounted for, - * and we use only virtual addresses. - */ - if (page_shift == 0) { - printf("vm_mem_init: WARN: MD code did not set page size\n"); - vm_set_page_size(); - } -#if !defined(MACHINE_NONCONTIG) && !defined(MACHINE_NEW_NONCONTIG) - vm_page_startup(&avail_start, &avail_end); -#else - vm_page_bootstrap(&start, &end); -#endif - - /* - * Initialize other VM packages - */ -#if !defined(MACHINE_NONCONTIG) && !defined(MACHINE_NEW_NONCONTIG) - vm_object_init(virtual_end - VM_MIN_KERNEL_ADDRESS); -#else - vm_object_init(end - VM_MIN_KERNEL_ADDRESS); -#endif - vm_map_startup(); -#if !defined(MACHINE_NONCONTIG) && !defined(MACHINE_NEW_NONCONTIG) - kmem_init(virtual_avail, virtual_end); - pmap_init(avail_start, avail_end); -#else - kmem_init(start, end); - pmap_init(); -#endif - vm_pager_init(); -} diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c deleted file mode 100644 index c2da9722860..00000000000 --- a/sys/vm/vm_kern.c +++ /dev/null @@ -1,465 +0,0 @@ -/* $OpenBSD: vm_kern.c,v 1.11 1999/09/03 18:02:27 art Exp $ */ -/* $NetBSD: vm_kern.c,v 1.17.6.1 1996/06/13 17:21:28 cgd Exp $ */ - -/* - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * The Mach Operating System project at Carnegie-Mellon University. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)vm_kern.c 8.4 (Berkeley) 1/9/95 - * - * - * Copyright (c) 1987, 1990 Carnegie-Mellon University. - * All rights reserved. - * - * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * Kernel memory management. - */ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/proc.h> - -#include <vm/vm.h> -#include <vm/vm_extern.h> -#include <vm/vm_page.h> -#include <vm/vm_pageout.h> -#include <vm/vm_kern.h> - -/* - * kmem_alloc_pageable: - * - * Allocate pageable memory to the kernel's address map. - * map must be "kernel_map" below. - */ -vm_offset_t -kmem_alloc_pageable(map, size) - vm_map_t map; - register vm_size_t size; -{ - vm_offset_t addr; - register int result; - -#if 0 - if (map != kernel_map) - panic("kmem_alloc_pageable: not called with kernel_map"); -#endif - - size = round_page(size); - - addr = vm_map_min(map); - result = vm_map_find(map, NULL, (vm_offset_t)0, &addr, size, TRUE); - if (result != KERN_SUCCESS) { - return (0); - } - - return (addr); -} - -/* - * Allocate wired-down memory in the kernel's address map - * or a submap. - */ -vm_offset_t -kmem_alloc(map, size) - register vm_map_t map; - register vm_size_t size; -{ - vm_offset_t addr; - register vm_offset_t offset; - extern vm_object_t kernel_object; - vm_offset_t i; - - size = round_page(size); - - /* - * Use the kernel object for wired-down kernel pages. - * Assume that no region of the kernel object is - * referenced more than once. - */ - - /* - * Locate sufficient space in the map. This will give us the - * final virtual address for the new memory, and thus will tell - * us the offset within the kernel map. - */ - vm_map_lock(map); - if (vm_map_findspace(map, 0, size, &addr)) { - vm_map_unlock(map); - return (0); - } - offset = addr - VM_MIN_KERNEL_ADDRESS; - vm_object_reference(kernel_object); - vm_map_insert(map, kernel_object, offset, addr, addr + size); - vm_map_unlock(map); - - /* - * Guarantee that there are pages already in this object - * before calling vm_map_pageable. This is to prevent the - * following scenario: - * - * 1) Threads have swapped out, so that there is a - * pager for the kernel_object. - * 2) The kmsg zone is empty, and so we are kmem_allocing - * a new page for it. - * 3) vm_map_pageable calls vm_fault; there is no page, - * but there is a pager, so we call - * pager_data_request. But the kmsg zone is empty, - * so we must kmem_alloc. - * 4) goto 1 - * 5) Even if the kmsg zone is not empty: when we get - * the data back from the pager, it will be (very - * stale) non-zero data. kmem_alloc is defined to - * return zero-filled memory. - * - * We're intentionally not activating the pages we allocate - * to prevent a race with page-out. vm_map_pageable will wire - * the pages. - */ - - vm_object_lock(kernel_object); - for (i = 0; i < size; i += PAGE_SIZE) { - vm_page_t mem; - - while ((mem = vm_page_alloc(kernel_object, offset + i)) == - NULL) { - vm_object_unlock(kernel_object); - vm_wait("fKmwire"); - vm_object_lock(kernel_object); - } - vm_page_zero_fill(mem); - mem->flags &= ~PG_BUSY; - } - vm_object_unlock(kernel_object); - - /* - * And finally, mark the data as non-pageable. - */ - - (void)vm_map_pageable(map, (vm_offset_t)addr, addr + size, FALSE); - - /* - * Try to coalesce the map - */ - - vm_map_simplify(map, addr); - - return (addr); -} - -/* - * kmem_free: - * - * Release a region of kernel virtual memory allocated - * with kmem_alloc, and return the physical pages - * associated with that region. - */ -void -kmem_free(map, addr, size) - vm_map_t map; - register vm_offset_t addr; - vm_size_t size; -{ - (void)vm_map_remove(map, trunc_page(addr), round_page(addr + size)); -} - -/* - * kmem_suballoc: - * - * Allocates a map to manage a subrange - * of the kernel virtual address space. - * - * Arguments are as follows: - * - * parent Map to take range from - * size Size of range to find - * min, max Returned endpoints of map - * pageable Can the region be paged - */ -vm_map_t -kmem_suballoc(parent, min, max, size, pageable) - register vm_map_t parent; - vm_offset_t *min, *max; - register vm_size_t size; - boolean_t pageable; -{ - register int ret; - vm_map_t result; - - size = round_page(size); - - *min = (vm_offset_t)vm_map_min(parent); - ret = vm_map_find(parent, NULL, (vm_offset_t)0, min, size, TRUE); - if (ret != KERN_SUCCESS) { - printf("kmem_suballoc: bad status return of %d.\n", ret); - panic("kmem_suballoc"); - } - *max = *min + size; - pmap_reference(vm_map_pmap(parent)); - result = vm_map_create(vm_map_pmap(parent), *min, *max, pageable); - if (result == NULL) - panic("kmem_suballoc: cannot create submap"); - if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS) - panic("kmem_suballoc: unable to change range to submap"); - return (result); -} - -/* - * Allocate wired-down memory in the kernel's address map for the higher - * level kernel memory allocator (kern/kern_malloc.c). We cannot use - * kmem_alloc() because we may need to allocate memory at interrupt - * level where we cannot block (canwait == FALSE). - * - * This routine has its own private kernel submap (kmem_map) and object - * (kmem_object). This, combined with the fact that only malloc uses - * this routine, ensures that we will never block in map or object waits. - * - * Note that this still only works in a uni-processor environment and - * when called at splimp(). - * - * We don't worry about expanding the map (adding entries) since entries - * for wired maps are statically allocated. - */ -vm_offset_t -kmem_malloc(map, size, canwait) - register vm_map_t map; - register vm_size_t size; - boolean_t canwait; -{ - register vm_offset_t offset, i; - vm_map_entry_t entry; - vm_offset_t addr; - vm_page_t m; - extern vm_object_t kmem_object; - - if (map != kmem_map && map != mb_map) - panic("kern_malloc_alloc: map != {kmem,mb}_map"); - - size = round_page(size); - addr = vm_map_min(map); - - /* - * Locate sufficient space in the map. This will give us the - * final virtual address for the new memory, and thus will tell - * us the offset within the kernel map. - */ - vm_map_lock(map); - if (vm_map_findspace(map, 0, size, &addr)) { - vm_map_unlock(map); - /* - * Should wait, but that makes no sense since we will - * likely never wake up unless action to free resources - * is taken by the calling subsystem. - * - * We return NULL, and if the caller was able to wait - * then they should take corrective action and retry. - */ - return (0); - } - offset = addr - vm_map_min(kmem_map); - vm_object_reference(kmem_object); - vm_map_insert(map, kmem_object, offset, addr, addr + size); - - /* - * If we can wait, just mark the range as wired - * (will fault pages as necessary). - */ - if (canwait) { - vm_map_unlock(map); - (void)vm_map_pageable(map, (vm_offset_t)addr, addr + size, - FALSE); - vm_map_simplify(map, addr); - return (addr); - } - - /* - * If we cannot wait then we must allocate all memory up front, - * pulling it off the active queue to prevent pageout. - */ - vm_object_lock(kmem_object); - for (i = 0; i < size; i += PAGE_SIZE) { - m = vm_page_alloc(kmem_object, offset + i); - - /* - * Ran out of space, free everything up and return. - * Don't need to lock page queues here as we know - * that the pages we got aren't on any queues. - */ - if (m == NULL) { - while (i != 0) { - i -= PAGE_SIZE; - m = vm_page_lookup(kmem_object, offset + i); - vm_page_free(m); - } - vm_object_unlock(kmem_object); - vm_map_delete(map, addr, addr + size); - vm_map_unlock(map); - return (0); - } -#if 0 - vm_page_zero_fill(m); -#endif - m->flags &= ~PG_BUSY; - } - vm_object_unlock(kmem_object); - - /* - * Mark map entry as non-pageable. - * Assert: vm_map_insert() will never be able to extend the previous - * entry so there will be a new entry exactly corresponding to this - * address range and it will have wired_count == 0. - */ - if (!vm_map_lookup_entry(map, addr, &entry) || - entry->start != addr || entry->end != addr + size || - entry->wired_count) - panic("kmem_malloc: entry not found or misaligned"); - entry->wired_count++; - - /* - * Loop thru pages, entering them in the pmap. - * (We cannot add them to the wired count without - * wrapping the vm_page_queue_lock in splimp...) - */ - for (i = 0; i < size; i += PAGE_SIZE) { - vm_object_lock(kmem_object); - m = vm_page_lookup(kmem_object, offset + i); - vm_object_unlock(kmem_object); - pmap_enter(map->pmap, addr + i, VM_PAGE_TO_PHYS(m), - VM_PROT_DEFAULT, TRUE, 0); - } - vm_map_unlock(map); - - vm_map_simplify(map, addr); - return (addr); -} - -/* - * kmem_alloc_wait - * - * Allocates pageable memory from a sub-map of the kernel. If the submap - * has no room, the caller sleeps waiting for more memory in the submap. - * - */ -vm_offset_t -kmem_alloc_wait(map, size) - vm_map_t map; - vm_size_t size; -{ - vm_offset_t addr; - - size = round_page(size); - - for (;;) { - /* - * To make this work for more than one map, - * use the map's lock to lock out sleepers/wakers. - */ - vm_map_lock(map); - if (vm_map_findspace(map, 0, size, &addr) == 0) - break; - /* no space now; see if we can ever get space */ - if (vm_map_max(map) - vm_map_min(map) < size) { - vm_map_unlock(map); - return (0); - } - assert_wait(map, TRUE); - vm_map_unlock(map); - thread_block("mKmwait"); - } - vm_map_insert(map, NULL, (vm_offset_t)0, addr, addr + size); - vm_map_unlock(map); - return (addr); -} - -/* - * kmem_free_wakeup - * - * Returns memory to a submap of the kernel, and wakes up any threads - * waiting for memory in that map. - */ -void -kmem_free_wakeup(map, addr, size) - vm_map_t map; - vm_offset_t addr; - vm_size_t size; -{ - vm_map_lock(map); - (void)vm_map_delete(map, trunc_page(addr), round_page(addr + size)); - thread_wakeup(map); - vm_map_unlock(map); -} - -/* - * Create the kernel map; insert a mapping covering kernel text, data, bss, - * and all space allocated thus far (`boostrap' data). The new map will thus - * map the range between VM_MIN_KERNEL_ADDRESS and `start' as allocated, and - * the range between `start' and `end' as free. - */ -void -kmem_init(start, end) - vm_offset_t start, end; -{ - register vm_map_t m; - - m = vm_map_create(pmap_kernel(), VM_MIN_KERNEL_ADDRESS, end, FALSE); - vm_map_lock(m); - /* N.B.: cannot use kgdb to debug, starting with this assignment ... */ - kernel_map = m; - (void)vm_map_insert(m, NULL, (vm_offset_t)0, VM_MIN_KERNEL_ADDRESS, - start); - /* ... and ending with the completion of the above `insert' */ - vm_map_unlock(m); -} diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c deleted file mode 100644 index d3dac7a832a..00000000000 --- a/sys/vm/vm_map.c +++ /dev/null @@ -1,2746 +0,0 @@ -/* $OpenBSD: vm_map.c,v 1.20 2001/05/16 12:54:34 ho Exp $ */ -/* $NetBSD: vm_map.c,v 1.23 1996/02/10 00:08:08 christos Exp $ */ - -/* - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * The Mach Operating System project at Carnegie-Mellon University. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)vm_map.c 8.9 (Berkeley) 5/17/95 - * - * - * Copyright (c) 1987, 1990 Carnegie-Mellon University. - * All rights reserved. - * - * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * Virtual memory mapping module. - */ - -#include <sys/param.h> -#include <sys/proc.h> -#include <sys/systm.h> -#include <sys/malloc.h> - -#include <vm/vm.h> -#include <vm/vm_kern.h> -#include <vm/vm_page.h> - -/* - * Virtual memory maps provide for the mapping, protection, - * and sharing of virtual memory objects. In addition, - * this module provides for an efficient virtual copy of - * memory from one map to another. - * - * Synchronization is required prior to most operations. - * - * Maps consist of an ordered doubly-linked list of simple - * entries; a single hint is used to speed up lookups. - * - * In order to properly represent the sharing of virtual - * memory regions among maps, the map structure is bi-level. - * Top-level ("address") maps refer to regions of sharable - * virtual memory. These regions are implemented as - * ("sharing") maps, which then refer to the actual virtual - * memory objects. When two address maps "share" memory, - * their top-level maps both have references to the same - * sharing map. When memory is virtual-copied from one - * address map to another, the references in the sharing - * maps are actually copied -- no copying occurs at the - * virtual memory object level. - * - * Since portions of maps are specified by start/end addreses, - * which may not align with existing map entries, all - * routines merely "clip" entries to these start/end values. - * [That is, an entry is split into two, bordering at a - * start or end value.] Note that these clippings may not - * always be necessary (as the two resulting entries are then - * not changed); however, the clipping is done for convenience. - * No attempt is currently made to "glue back together" two - * abutting entries. - * - * As mentioned above, virtual copy operations are performed - * by copying VM object references from one sharing map to - * another, and then marking both regions as copy-on-write. - * It is important to note that only one writeable reference - * to a VM object region exists in any map -- this means that - * shadow object creation can be delayed until a write operation - * occurs. - */ - -/* - * vm_map_startup: - * - * Initialize the vm_map module. Must be called before - * any other vm_map routines. - * - * Map and entry structures are allocated from the general - * purpose memory pool with some exceptions: - * - * - The kernel map and kmem submap are allocated statically. - * - Kernel map entries are allocated out of a static pool. - * - * These restrictions are necessary since malloc() uses the - * maps and requires map entries. - */ - -#if defined(MACHINE_NEW_NONCONTIG) -u_int8_t kentry_data_store[MAX_KMAP*sizeof(struct vm_map) + - MAX_KMAPENT*sizeof(struct vm_map_entry)]; -vm_offset_t kentry_data = (vm_offset_t) kentry_data_store; -vm_size_t kentry_data_size = sizeof(kentry_data_store); -#else -/* NUKE NUKE NUKE */ -vm_offset_t kentry_data; -vm_size_t kentry_data_size; -#endif -vm_map_entry_t kentry_free; -vm_map_t kmap_free; - -static int kentry_count; -static vm_offset_t mapvm_start, mapvm, mapvmmax; -static int mapvmpgcnt; - -static struct vm_map_entry *mappool; -static int mappoolcnt; -#define KENTRY_LOW_WATER 128 - -static void _vm_map_clip_end __P((vm_map_t, vm_map_entry_t, vm_offset_t)); -static void _vm_map_clip_start __P((vm_map_t, vm_map_entry_t, vm_offset_t)); - -void -vm_map_startup() -{ - register int i; - register vm_map_entry_t mep; - vm_map_t mp; - - /* - * zero kentry area - * XXX necessary? - */ - bzero((caddr_t)kentry_data, kentry_data_size); - - /* - * Static map structures for allocation before initialization of - * kernel map or kmem map. vm_map_create knows how to deal with them. - */ - kmap_free = mp = (vm_map_t) kentry_data; - i = MAX_KMAP; - while (--i > 0) { - mp->header.next = (vm_map_entry_t) (mp + 1); - mp++; - } - mp++->header.next = NULL; - - /* - * Form a free list of statically allocated kernel map entries - * with the rest. - */ - kentry_free = mep = (vm_map_entry_t) mp; - kentry_count = i = (kentry_data_size - MAX_KMAP * sizeof *mp) / sizeof *mep; - while (--i > 0) { - mep->next = mep + 1; - mep++; - } - mep->next = NULL; -} - -/* - * Allocate a vmspace structure, including a vm_map and pmap, - * and initialize those structures. The refcnt is set to 1. - * The remaining fields must be initialized by the caller. - */ -struct vmspace * -vmspace_alloc(min, max, pageable) - vm_offset_t min, max; - int pageable; -{ - register struct vmspace *vm; - - if (mapvmpgcnt == 0 && mapvm == 0) { -#if defined(MACHINE_NEW_NONCONTIG) - int vm_page_count = 0; - int lcv; - - for (lcv = 0; lcv < vm_nphysseg; lcv++) - vm_page_count += (vm_physmem[lcv].end - - vm_physmem[lcv].start); - - mapvmpgcnt = (vm_page_count * - sizeof(struct vm_map_entry) + PAGE_SIZE - 1) / PAGE_SIZE; - -#elif defined(MACHINE_NONCONTIG) - mapvmpgcnt = (vm_page_count * - sizeof(struct vm_map_entry) + PAGE_SIZE - 1) / PAGE_SIZE; -#else /* must be contig */ - mapvmpgcnt = ((last_page-first_page) * - sizeof(struct vm_map_entry) + PAGE_SIZE - 1) / PAGE_SIZE; -#endif /* contig */ - mapvm_start = mapvm = kmem_alloc_pageable(kernel_map, - mapvmpgcnt * PAGE_SIZE); - mapvmmax = mapvm_start + mapvmpgcnt * PAGE_SIZE; - if (!mapvm) - mapvmpgcnt = 0; - } - MALLOC(vm, struct vmspace *, sizeof(struct vmspace), M_VMMAP, M_WAITOK); - bzero(vm, (caddr_t) &vm->vm_startcopy - (caddr_t) vm); - vm_map_init(&vm->vm_map, min, max, pageable); - vm->vm_map.pmap = pmap_create(0); - vm->vm_refcnt = 1; - return (vm); -} - -void -vmspace_free(vm) - register struct vmspace *vm; -{ - - if (--vm->vm_refcnt == 0) { - /* - * Lock the map, to wait out all other references to it. - * Delete all of the mappings and pages they hold, - * then call the pmap module to reclaim anything left. - */ - vm_map_lock(&vm->vm_map); - (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset, - vm->vm_map.max_offset); - pmap_destroy(vm->vm_map.pmap); - FREE(vm, M_VMMAP); - } -} - -/* - * vm_map_create: - * - * Creates and returns a new empty VM map with - * the given physical map structure, and having - * the given lower and upper address bounds. - */ -vm_map_t -vm_map_create(pmap, min, max, pageable) - pmap_t pmap; - vm_offset_t min, max; - boolean_t pageable; -{ - register vm_map_t result; - extern vm_map_t kmem_map; - - if (kmem_map == NULL) { - result = kmap_free; - if (result == NULL) - panic("vm_map_create: out of maps"); - kmap_free = (vm_map_t) result->header.next; - } else - MALLOC(result, vm_map_t, sizeof(struct vm_map), - M_VMMAP, M_WAITOK); - - vm_map_init(result, min, max, pageable); - result->pmap = pmap; - return(result); -} - -/* - * Initialize an existing vm_map structure - * such as that in the vmspace structure. - * The pmap is set elsewhere. - */ -void -vm_map_init(map, min, max, pageable) - register struct vm_map *map; - vm_offset_t min, max; - boolean_t pageable; -{ - map->header.next = map->header.prev = &map->header; - map->nentries = 0; - map->size = 0; - map->ref_count = 1; - map->is_main_map = TRUE; - map->min_offset = min; - map->max_offset = max; - map->entries_pageable = pageable; - map->first_free = &map->header; - map->hint = &map->header; - map->timestamp = 0; - lockinit(&map->lock, PVM, "thrd_sleep", 0, 0); - simple_lock_init(&map->ref_lock); - simple_lock_init(&map->hint_lock); -} - -/* - * vm_map_entry_create: [ internal use only ] - * - * Allocates a VM map entry for insertion. - * No entry fields are filled in. This routine is - */ -vm_map_entry_t -vm_map_entry_create(map) - vm_map_t map; -{ - vm_map_entry_t entry; - int i, s; - - /* - * This is a *very* nasty (and sort of incomplete) hack!!!! - */ - if (kentry_count < KENTRY_LOW_WATER) { - s = splimp(); - if (mapvmpgcnt && mapvm) { - vm_page_t m; - - m = vm_page_alloc(kernel_object, - mapvm - VM_MIN_KERNEL_ADDRESS); - - if (m) { - int newentries; - - newentries = (PAGE_SIZE / sizeof(struct vm_map_entry)); -#ifdef DIAGNOSTIC - printf("vm_map_entry_create: allocated %d new entries.\n", newentries); -#endif - - /* XXX */ - vm_page_wire(m); - PAGE_WAKEUP(m); - pmap_enter(pmap_kernel(), mapvm, - VM_PAGE_TO_PHYS(m), - VM_PROT_READ|VM_PROT_WRITE, FALSE, 0); - - entry = (vm_map_entry_t) mapvm; - mapvm += PAGE_SIZE; - --mapvmpgcnt; - - for (i = 0; i < newentries; i++) { - vm_map_entry_dispose(kernel_map, entry); - entry++; - } - } - } - splx(s); - } - - if (map->entries_pageable) { - if ((entry = mappool) != NULL) { - mappool = mappool->next; - --mappoolcnt; - } else { - MALLOC(entry, vm_map_entry_t, - sizeof(struct vm_map_entry), M_VMMAPENT, M_WAITOK); - } - } else { - s = splimp(); - if ((entry = kentry_free) != NULL) { - kentry_free = kentry_free->next; - --kentry_count; - } - if (entry == NULL) - panic("vm_map_entry_create: out of map entries for kernel"); - splx(s); - } - - return(entry); -} - -/* - * vm_map_entry_dispose: [ internal use only ] - * - * Inverse of vm_map_entry_create. - */ -void -vm_map_entry_dispose(map, entry) - vm_map_t map; - vm_map_entry_t entry; -{ - int s; - - if (map->entries_pageable) { - entry->next = mappool; - mappool = entry; - ++mappoolcnt; - } else { - s = splimp(); - entry->next = kentry_free; - kentry_free = entry; - ++kentry_count; - splx(s); - } -} - -/* - * vm_map_entry_{un,}link: - * - * Insert/remove entries from maps. - */ -#define vm_map_entry_link(map, after_where, entry) \ - { \ - (map)->nentries++; \ - (entry)->prev = (after_where); \ - (entry)->next = (after_where)->next; \ - (entry)->prev->next = (entry); \ - (entry)->next->prev = (entry); \ - } -#define vm_map_entry_unlink(map, entry) \ - { \ - (map)->nentries--; \ - (entry)->next->prev = (entry)->prev; \ - (entry)->prev->next = (entry)->next; \ - } - -/* - * vm_map_reference: - * - * Creates another valid reference to the given map. - * - */ -void -vm_map_reference(map) - register vm_map_t map; -{ - if (map == NULL) - return; - - simple_lock(&map->ref_lock); - map->ref_count++; - simple_unlock(&map->ref_lock); -} - -/* - * vm_map_deallocate: - * - * Removes a reference from the specified map, - * destroying it if no references remain. - * The map should not be locked. - */ -void -vm_map_deallocate(map) - register vm_map_t map; -{ - register int c; - - if (map == NULL) - return; - - simple_lock(&map->ref_lock); - c = --map->ref_count; - - if (c > 0) { - simple_unlock(&map->ref_lock); - return; - } - - /* - * Lock the map, to wait out all other references - * to it. - */ - - vm_map_lock_drain_interlock(map); - - (void) vm_map_delete(map, map->min_offset, map->max_offset); - - pmap_destroy(map->pmap); - - vm_map_unlock(map); - - FREE(map, M_VMMAP); -} - -/* - * vm_map_insert: - * - * Inserts the given whole VM object into the target - * map at the specified address range. The object's - * size should match that of the address range. - * - * Requires that the map be locked, and leaves it so. - */ -int -vm_map_insert(map, object, offset, start, end) - vm_map_t map; - vm_object_t object; - vm_offset_t offset; - vm_offset_t start; - vm_offset_t end; -{ - register vm_map_entry_t new_entry; - register vm_map_entry_t prev_entry; - vm_map_entry_t temp_entry; - - /* - * Check that the start and end points are not bogus. - */ - - if ((start < map->min_offset) || (end > map->max_offset) || - (start >= end)) - return(KERN_INVALID_ADDRESS); - - /* - * Find the entry prior to the proposed - * starting address; if it's part of an - * existing entry, this range is bogus. - */ - - if (vm_map_lookup_entry(map, start, &temp_entry)) - return(KERN_NO_SPACE); - - prev_entry = temp_entry; - - /* - * Assert that the next entry doesn't overlap the - * end point. - */ - - if ((prev_entry->next != &map->header) && - (prev_entry->next->start < end)) - return(KERN_NO_SPACE); - - /* - * See if we can avoid creating a new entry by - * extending one of our neighbors. - */ - - if (object == NULL) { - if ((prev_entry != &map->header) && - (prev_entry->end == start) && - (map->is_main_map) && - (prev_entry->is_a_map == FALSE) && - (prev_entry->is_sub_map == FALSE) && - (prev_entry->inheritance == VM_INHERIT_DEFAULT) && - (prev_entry->protection == VM_PROT_DEFAULT) && - (prev_entry->max_protection == VM_PROT_DEFAULT) && - (prev_entry->wired_count == 0)) { - - if (vm_object_coalesce(prev_entry->object.vm_object, - NULL, - prev_entry->offset, - (vm_offset_t) 0, - (vm_size_t)(prev_entry->end - - prev_entry->start), - (vm_size_t)(end - prev_entry->end))) { - /* - * Coalesced the two objects - can extend - * the previous map entry to include the - * new range. - */ - map->size += (end - prev_entry->end); - prev_entry->end = end; - return(KERN_SUCCESS); - } - } - } - - /* - * Create a new entry - */ - - new_entry = vm_map_entry_create(map); - new_entry->start = start; - new_entry->end = end; - - new_entry->is_a_map = FALSE; - new_entry->is_sub_map = FALSE; - new_entry->object.vm_object = object; - new_entry->offset = offset; - - new_entry->copy_on_write = FALSE; - new_entry->needs_copy = FALSE; - - if (map->is_main_map) { - new_entry->inheritance = VM_INHERIT_DEFAULT; - new_entry->protection = VM_PROT_DEFAULT; - new_entry->max_protection = VM_PROT_DEFAULT; - new_entry->wired_count = 0; - } - - /* - * Insert the new entry into the list - */ - - vm_map_entry_link(map, prev_entry, new_entry); - map->size += new_entry->end - new_entry->start; - - /* - * Update the free space hint - */ - - if ((map->first_free == prev_entry) && (prev_entry->end >= new_entry->start)) - map->first_free = new_entry; - - return(KERN_SUCCESS); -} - -/* - * SAVE_HINT: - * - * Saves the specified entry as the hint for - * future lookups. Performs necessary interlocks. - */ -#define SAVE_HINT(map,value) \ - simple_lock(&(map)->hint_lock); \ - (map)->hint = (value); \ - simple_unlock(&(map)->hint_lock); - -/* - * vm_map_lookup_entry: [ internal use only ] - * - * Finds the map entry containing (or - * immediately preceding) the specified address - * in the given map; the entry is returned - * in the "entry" parameter. The boolean - * result indicates whether the address is - * actually contained in the map. - */ -boolean_t -vm_map_lookup_entry(map, address, entry) - register vm_map_t map; - register vm_offset_t address; - vm_map_entry_t *entry; /* OUT */ -{ - register vm_map_entry_t cur; - register vm_map_entry_t last; - - /* - * Start looking either from the head of the - * list, or from the hint. - */ - - simple_lock(&map->hint_lock); - cur = map->hint; - simple_unlock(&map->hint_lock); - - if (cur == &map->header) - cur = cur->next; - - if (address >= cur->start) { - /* - * Go from hint to end of list. - * - * But first, make a quick check to see if - * we are already looking at the entry we - * want (which is usually the case). - * Note also that we don't need to save the hint - * here... it is the same hint (unless we are - * at the header, in which case the hint didn't - * buy us anything anyway). - */ - last = &map->header; - if ((cur != last) && (cur->end > address)) { - *entry = cur; - return(TRUE); - } - } - else { - /* - * Go from start to hint, *inclusively* - */ - last = cur->next; - cur = map->header.next; - } - - /* - * Search linearly - */ - - while (cur != last) { - if (cur->end > address) { - if (address >= cur->start) { - /* - * Save this lookup for future - * hints, and return - */ - - *entry = cur; - SAVE_HINT(map, cur); - return(TRUE); - } - break; - } - cur = cur->next; - } - *entry = cur->prev; - SAVE_HINT(map, *entry); - return(FALSE); -} - -/* - * Find sufficient space for `length' bytes in the given map, starting at - * `start'. The map must be locked. Returns 0 on success, 1 on no space. - */ -int -vm_map_findspace(map, start, length, addr) - register vm_map_t map; - register vm_offset_t start; - vm_size_t length; - vm_offset_t *addr; -{ - register vm_map_entry_t entry, next; - register vm_offset_t end; - - if (start < map->min_offset) - start = map->min_offset; - if (start > map->max_offset) - return (1); - - /* - * Look for the first possible address; if there's already - * something at this address, we have to start after it. - */ - if (start == map->min_offset) { - if ((entry = map->first_free) != &map->header) - start = entry->end; - } else { - vm_map_entry_t tmp; - if (vm_map_lookup_entry(map, start, &tmp)) - start = tmp->end; - entry = tmp; - } - - /* - * Look through the rest of the map, trying to fit a new region in - * the gap between existing regions, or after the very last region. - */ - for (;; start = (entry = next)->end) { - /* - * Find the end of the proposed new region. Be sure we didn't - * go beyond the end of the map, or wrap around the address; - * if so, we lose. Otherwise, if this is the last entry, or - * if the proposed new region fits before the next entry, we - * win. - */ - end = start + length; - if (end > map->max_offset || end < start) - return (1); - next = entry->next; - if (next == &map->header || next->start >= end) - break; - } - SAVE_HINT(map, entry); - *addr = start; - return (0); -} - -/* - * vm_map_find finds an unallocated region in the target address - * map with the given length. The search is defined to be - * first-fit from the specified address; the region found is - * returned in the same parameter. - * - */ -int -vm_map_find(map, object, offset, addr, length, find_space) - vm_map_t map; - vm_object_t object; - vm_offset_t offset; - vm_offset_t *addr; /* IN/OUT */ - vm_size_t length; - boolean_t find_space; -{ - register vm_offset_t start; - int result; - - start = *addr; - vm_map_lock(map); - if (find_space) { - if (vm_map_findspace(map, start, length, addr)) { - vm_map_unlock(map); - return (KERN_NO_SPACE); - } - start = *addr; - } - result = vm_map_insert(map, object, offset, start, start + length); - vm_map_unlock(map); - return (result); -} - -/* - * vm_map_simplify_entry: [ internal use only ] - * - * Simplify the given map entry by: - * removing extra sharing maps - * [XXX maybe later] merging with a neighbor - */ -void -vm_map_simplify_entry(map, entry) - vm_map_t map; - vm_map_entry_t entry; -{ -#ifdef lint - map++; -#endif - - /* - * If this entry corresponds to a sharing map, then - * see if we can remove the level of indirection. - * If it's not a sharing map, then it points to - * a VM object, so see if we can merge with either - * of our neighbors. - */ - - if (entry->is_sub_map) - return; - if (entry->is_a_map) { -#if 0 - vm_map_t my_share_map; - int count; - - my_share_map = entry->object.share_map; - simple_lock(&my_share_map->ref_lock); - count = my_share_map->ref_count; - simple_unlock(&my_share_map->ref_lock); - - if (count == 1) { - /* Can move the region from - * entry->start to entry->end (+ entry->offset) - * in my_share_map into place of entry. - * Later. - */ - } -#endif - } - else { - /* - * Try to merge with our neighbors. - * - * Conditions for merge are: - * - * 1. entries are adjacent. - * 2. both entries point to objects - * with null pagers. - * - * If a merge is possible, we replace the two - * entries with a single entry, then merge - * the two objects into a single object. - * - * Now, all that is left to do is write the - * code! - */ - } -} - -/* - * vm_map_clip_start: [ internal use only ] - * - * Asserts that the given entry begins at or after - * the specified address; if necessary, - * it splits the entry into two. - */ -#define vm_map_clip_start(map, entry, startaddr) \ -{ \ - if (startaddr > entry->start) \ - _vm_map_clip_start(map, entry, startaddr); \ -} - -/* - * This routine is called only when it is known that - * the entry must be split. - */ -static void -_vm_map_clip_start(map, entry, start) - register vm_map_t map; - register vm_map_entry_t entry; - register vm_offset_t start; -{ - register vm_map_entry_t new_entry; - - /* - * See if we can simplify this entry first - */ - - vm_map_simplify_entry(map, entry); - - /* - * Split off the front portion -- - * note that we must insert the new - * entry BEFORE this one, so that - * this entry has the specified starting - * address. - */ - - new_entry = vm_map_entry_create(map); - *new_entry = *entry; - - new_entry->end = start; - entry->offset += (start - entry->start); - entry->start = start; - - vm_map_entry_link(map, entry->prev, new_entry); - - if (entry->is_a_map || entry->is_sub_map) - vm_map_reference(new_entry->object.share_map); - else - vm_object_reference(new_entry->object.vm_object); -} - -/* - * vm_map_clip_end: [ internal use only ] - * - * Asserts that the given entry ends at or before - * the specified address; if necessary, - * it splits the entry into two. - */ - -#define vm_map_clip_end(map, entry, endaddr) \ -{ \ - if (endaddr < entry->end) \ - _vm_map_clip_end(map, entry, endaddr); \ -} - -/* - * This routine is called only when it is known that - * the entry must be split. - */ -static void -_vm_map_clip_end(map, entry, end) - register vm_map_t map; - register vm_map_entry_t entry; - register vm_offset_t end; -{ - register vm_map_entry_t new_entry; - - /* - * Create a new entry and insert it - * AFTER the specified entry - */ - - new_entry = vm_map_entry_create(map); - *new_entry = *entry; - - new_entry->start = entry->end = end; - new_entry->offset += (end - entry->start); - - vm_map_entry_link(map, entry, new_entry); - - if (entry->is_a_map || entry->is_sub_map) - vm_map_reference(new_entry->object.share_map); - else - vm_object_reference(new_entry->object.vm_object); -} - -/* - * VM_MAP_RANGE_CHECK: [ internal use only ] - * - * Asserts that the starting and ending region - * addresses fall within the valid range of the map. - */ -#define VM_MAP_RANGE_CHECK(map, start, end) \ - { \ - if (start < vm_map_min(map)) \ - start = vm_map_min(map); \ - if (end > vm_map_max(map)) \ - end = vm_map_max(map); \ - if (start > end) \ - start = end; \ - } - -/* - * vm_map_submap: [ kernel use only ] - * - * Mark the given range as handled by a subordinate map. - * - * This range must have been created with vm_map_find, - * and no other operations may have been performed on this - * range prior to calling vm_map_submap. - * - * Only a limited number of operations can be performed - * within this range after calling vm_map_submap: - * vm_fault - * [Don't try vm_map_copy!] - * - * To remove a submapping, one must first remove the - * range from the superior map, and then destroy the - * submap (if desired). [Better yet, don't try it.] - */ -int -vm_map_submap(map, start, end, submap) - register vm_map_t map; - register vm_offset_t start; - register vm_offset_t end; - vm_map_t submap; -{ - vm_map_entry_t entry; - register int result = KERN_INVALID_ARGUMENT; - - vm_map_lock(map); - - VM_MAP_RANGE_CHECK(map, start, end); - - if (vm_map_lookup_entry(map, start, &entry)) { - vm_map_clip_start(map, entry, start); - } - else - entry = entry->next; - - vm_map_clip_end(map, entry, end); - - if ((entry->start == start) && (entry->end == end) && - (!entry->is_a_map) && - (entry->object.vm_object == NULL) && - (!entry->copy_on_write)) { - entry->is_a_map = FALSE; - entry->is_sub_map = TRUE; - vm_map_reference(entry->object.sub_map = submap); - result = KERN_SUCCESS; - } - vm_map_unlock(map); - - return(result); -} - -/* - * vm_map_protect: - * - * Sets the protection of the specified address - * region in the target map. If "set_max" is - * specified, the maximum protection is to be set; - * otherwise, only the current protection is affected. - */ -int -vm_map_protect(map, start, end, new_prot, set_max) - register vm_map_t map; - register vm_offset_t start; - register vm_offset_t end; - register vm_prot_t new_prot; - register boolean_t set_max; -{ - register vm_map_entry_t current; - vm_map_entry_t entry; - - vm_map_lock(map); - - VM_MAP_RANGE_CHECK(map, start, end); - - if (vm_map_lookup_entry(map, start, &entry)) { - vm_map_clip_start(map, entry, start); - } - else - entry = entry->next; - - /* - * Make a first pass to check for protection - * violations. - */ - - current = entry; - while ((current != &map->header) && (current->start < end)) { - if (current->is_sub_map) - return(KERN_INVALID_ARGUMENT); - if ((new_prot & current->max_protection) != new_prot) { - vm_map_unlock(map); - return(KERN_PROTECTION_FAILURE); - } - - current = current->next; - } - - /* - * Go back and fix up protections. - * [Note that clipping is not necessary the second time.] - */ - - current = entry; - - while ((current != &map->header) && (current->start < end)) { - vm_prot_t old_prot; - - vm_map_clip_end(map, current, end); - - old_prot = current->protection; - if (set_max) - current->protection = - (current->max_protection = new_prot) & - old_prot; - else - current->protection = new_prot; - - /* - * Update physical map if necessary. - * Worry about copy-on-write here -- CHECK THIS XXX - */ - - if (current->protection != old_prot) { - -#define MASK(entry) ((entry)->copy_on_write ? ~VM_PROT_WRITE : \ - VM_PROT_ALL) -#define max(a,b) ((a) > (b) ? (a) : (b)) - - if (current->is_a_map) { - vm_map_entry_t share_entry; - vm_offset_t share_end; - - vm_map_lock(current->object.share_map); - (void) vm_map_lookup_entry( - current->object.share_map, - current->offset, - &share_entry); - share_end = current->offset + - (current->end - current->start); - while ((share_entry != - ¤t->object.share_map->header) && - (share_entry->start < share_end)) { - - pmap_protect(map->pmap, - (max(share_entry->start, - current->offset) - - current->offset + - current->start), - min(share_entry->end, - share_end) - - current->offset + - current->start, - current->protection & - MASK(share_entry)); - - share_entry = share_entry->next; - } - vm_map_unlock(current->object.share_map); - } - else - pmap_protect(map->pmap, current->start, - current->end, - current->protection & MASK(entry)); -#undef max -#undef MASK - } - current = current->next; - } - - vm_map_unlock(map); - return(KERN_SUCCESS); -} - -/* - * vm_map_inherit: - * - * Sets the inheritance of the specified address - * range in the target map. Inheritance - * affects how the map will be shared with - * child maps at the time of vm_map_fork. - */ -int -vm_map_inherit(map, start, end, new_inheritance) - register vm_map_t map; - register vm_offset_t start; - register vm_offset_t end; - register vm_inherit_t new_inheritance; -{ - register vm_map_entry_t entry; - vm_map_entry_t temp_entry; - - switch (new_inheritance) { - case VM_INHERIT_NONE: - case VM_INHERIT_COPY: - case VM_INHERIT_SHARE: - break; - default: - return(KERN_INVALID_ARGUMENT); - } - - vm_map_lock(map); - - VM_MAP_RANGE_CHECK(map, start, end); - - if (vm_map_lookup_entry(map, start, &temp_entry)) { - entry = temp_entry; - vm_map_clip_start(map, entry, start); - } - else - entry = temp_entry->next; - - while ((entry != &map->header) && (entry->start < end)) { - vm_map_clip_end(map, entry, end); - - entry->inheritance = new_inheritance; - - entry = entry->next; - } - - vm_map_unlock(map); - return(KERN_SUCCESS); -} - -/* - * vm_map_pageable: - * - * Sets the pageability of the specified address - * range in the target map. Regions specified - * as not pageable require locked-down physical - * memory and physical page maps. - * - * The map must not be locked, but a reference - * must remain to the map throughout the call. - */ -int -vm_map_pageable(map, start, end, new_pageable) - register vm_map_t map; - register vm_offset_t start; - register vm_offset_t end; - register boolean_t new_pageable; -{ - register vm_map_entry_t entry; - vm_map_entry_t start_entry; - register vm_offset_t failed = 0; - int rv; - - vm_map_lock(map); - - VM_MAP_RANGE_CHECK(map, start, end); - - /* - * Only one pageability change may take place at one - * time, since vm_fault assumes it will be called - * only once for each wiring/unwiring. Therefore, we - * have to make sure we're actually changing the pageability - * for the entire region. We do so before making any changes. - */ - - if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) { - vm_map_unlock(map); - return(KERN_INVALID_ADDRESS); - } - entry = start_entry; - - /* - * Actions are rather different for wiring and unwiring, - * so we have two separate cases. - */ - - if (new_pageable) { - - vm_map_clip_start(map, entry, start); - - /* - * Unwiring. First ensure that the range to be - * unwired is really wired down and that there - * are no holes. - */ - while ((entry != &map->header) && (entry->start < end)) { - - if (entry->wired_count == 0 || - (entry->end < end && - (entry->next == &map->header || - entry->next->start > entry->end))) { - vm_map_unlock(map); - return(KERN_INVALID_ARGUMENT); - } - entry = entry->next; - } - - /* - * Now decrement the wiring count for each region. - * If a region becomes completely unwired, - * unwire its physical pages and mappings. - */ - vm_map_set_recursive(&map->lock); - - entry = start_entry; - while ((entry != &map->header) && (entry->start < end)) { - vm_map_clip_end(map, entry, end); - - entry->wired_count--; - if (entry->wired_count == 0) - vm_fault_unwire(map, entry->start, entry->end); - - entry = entry->next; - } - vm_map_clear_recursive(&map->lock); - } - - else { - /* - * Wiring. We must do this in two passes: - * - * 1. Holding the write lock, we create any shadow - * or zero-fill objects that need to be created. - * Then we clip each map entry to the region to be - * wired and increment its wiring count. We - * create objects before clipping the map entries - * to avoid object proliferation. - * - * 2. We downgrade to a read lock, and call - * vm_fault_wire to fault in the pages for any - * newly wired area (wired_count is 1). - * - * Downgrading to a read lock for vm_fault_wire avoids - * a possible deadlock with another thread that may have - * faulted on one of the pages to be wired (it would mark - * the page busy, blocking us, then in turn block on the - * map lock that we hold). Because of problems in the - * recursive lock package, we cannot upgrade to a write - * lock in vm_map_lookup. Thus, any actions that require - * the write lock must be done beforehand. Because we - * keep the read lock on the map, the copy-on-write status - * of the entries we modify here cannot change. - */ - - /* - * Pass 1. - */ - while ((entry != &map->header) && (entry->start < end)) { - if (entry->wired_count == 0) { - - /* - * Perform actions of vm_map_lookup that need - * the write lock on the map: create a shadow - * object for a copy-on-write region, or an - * object for a zero-fill region. - * - * We don't have to do this for entries that - * point to sharing maps, because we won't hold - * the lock on the sharing map. - */ - if (!entry->is_a_map) { - if (entry->needs_copy && - ((entry->protection & VM_PROT_WRITE) != 0)) { - - vm_object_shadow(&entry->object.vm_object, - &entry->offset, - (vm_size_t)(entry->end - - entry->start)); - entry->needs_copy = FALSE; - } - else if (entry->object.vm_object == NULL) { - entry->object.vm_object = - vm_object_allocate((vm_size_t)(entry->end - - entry->start)); - entry->offset = (vm_offset_t)0; - } - } - } - vm_map_clip_start(map, entry, start); - vm_map_clip_end(map, entry, end); - entry->wired_count++; - - /* - * Check for holes - */ - if (entry->end < end && - (entry->next == &map->header || - entry->next->start > entry->end)) { - /* - * Found one. Object creation actions - * do not need to be undone, but the - * wired counts need to be restored. - */ - while (entry != &map->header && entry->end > start) { - entry->wired_count--; - entry = entry->prev; - } - vm_map_unlock(map); - return(KERN_INVALID_ARGUMENT); - } - entry = entry->next; - } - - /* - * Pass 2. - */ - - /* - * HACK HACK HACK HACK - * - * If we are wiring in the kernel map or a submap of it, - * unlock the map to avoid deadlocks. We trust that the - * kernel threads are well-behaved, and therefore will - * not do anything destructive to this region of the map - * while we have it unlocked. We cannot trust user threads - * to do the same. - * - * HACK HACK HACK HACK - */ - if (vm_map_pmap(map) == pmap_kernel()) { - vm_map_unlock(map); /* trust me ... */ - } - else { - vm_map_set_recursive(&map->lock); - lockmgr(&map->lock, LK_DOWNGRADE, (void *)0, curproc); - } - - rv = 0; - entry = start_entry; - while (entry != &map->header && entry->start < end) { - /* - * If vm_fault_wire fails for any page we need to - * undo what has been done. We decrement the wiring - * count for those pages which have not yet been - * wired (now) and unwire those that have (later). - * - * XXX this violates the locking protocol on the map, - * needs to be fixed. - */ - if (rv) - entry->wired_count--; - else if (entry->wired_count == 1) { - rv = vm_fault_wire(map, entry->start, entry->end); - if (rv) { - failed = entry->start; - entry->wired_count--; - } - } - entry = entry->next; - } - - if (vm_map_pmap(map) == pmap_kernel()) { - vm_map_lock(map); - } - else { - vm_map_clear_recursive(&map->lock); - } - if (rv) { - vm_map_unlock(map); - (void) vm_map_pageable(map, start, failed, TRUE); - return(rv); - } - } - - vm_map_unlock(map); - - return(KERN_SUCCESS); -} - -/* - * vm_map_clean - * - * Push any dirty cached pages in the address range to their pager. - * If syncio is TRUE, dirty pages are written synchronously. - * If invalidate is TRUE, any cached pages are freed as well. - * - * Returns an error if any part of the specified range is not mapped. - */ -int -vm_map_clean(map, start, end, syncio, invalidate) - vm_map_t map; - vm_offset_t start; - vm_offset_t end; - boolean_t syncio; - boolean_t invalidate; -{ - register vm_map_entry_t current; - vm_map_entry_t entry; - vm_size_t size; - vm_object_t object; - vm_offset_t offset; - - vm_map_lock_read(map); - VM_MAP_RANGE_CHECK(map, start, end); - if (!vm_map_lookup_entry(map, start, &entry)) { - vm_map_unlock_read(map); - return(KERN_INVALID_ADDRESS); - } - - /* - * Make a first pass to check for holes, and (if invalidating) - * wired pages. - */ - for (current = entry; current->start < end; current = current->next) { - if (current->is_sub_map) { - vm_map_unlock_read(map); - return(KERN_INVALID_ARGUMENT); - } - if (end > current->end && - (current->next == &map->header || - current->end != current->next->start)) { - vm_map_unlock_read(map); - return(KERN_INVALID_ADDRESS); - } - if (current->wired_count) { - vm_map_unlock_read(map); - return(KERN_PAGES_LOCKED); - } - } - - /* - * Make a second pass, cleaning/uncaching pages from the indicated - * objects as we go. - */ - for (current = entry; current->start < end; current = current->next) { - offset = current->offset + (start - current->start); - size = (end <= current->end ? end : current->end) - start; - if (current->is_a_map) { - register vm_map_t smap; - vm_map_entry_t tentry; - vm_size_t tsize; - - smap = current->object.share_map; - vm_map_lock_read(smap); - (void) vm_map_lookup_entry(smap, offset, &tentry); - tsize = tentry->end - offset; - if (tsize < size) - size = tsize; - object = tentry->object.vm_object; - offset = tentry->offset + (offset - tentry->start); - vm_object_lock(object); - vm_map_unlock_read(smap); - } else { - object = current->object.vm_object; - vm_object_lock(object); - } - /* - * XXX should we continue on an error? - */ - if (!vm_object_page_clean(object, offset, offset+size, syncio, - FALSE)) { - vm_object_unlock(object); - vm_map_unlock_read(map); - return(KERN_FAILURE); - } - if (invalidate) - vm_object_page_remove(object, offset, offset+size); - vm_object_unlock(object); - start += size; - } - - vm_map_unlock_read(map); - return(KERN_SUCCESS); -} - -/* - * vm_map_entry_unwire: [ internal use only ] - * - * Make the region specified by this entry pageable. - * - * The map in question should be locked. - * [This is the reason for this routine's existence.] - */ -void -vm_map_entry_unwire(map, entry) - vm_map_t map; - register vm_map_entry_t entry; -{ - vm_fault_unwire(map, entry->start, entry->end); - entry->wired_count = 0; -} - -/* - * vm_map_entry_delete: [ internal use only ] - * - * Deallocate the given entry from the target map. - */ -void -vm_map_entry_delete(map, entry) - register vm_map_t map; - register vm_map_entry_t entry; -{ - if (entry->wired_count != 0) - vm_map_entry_unwire(map, entry); - - vm_map_entry_unlink(map, entry); - map->size -= entry->end - entry->start; - - if (entry->is_a_map || entry->is_sub_map) - vm_map_deallocate(entry->object.share_map); - else - vm_object_deallocate(entry->object.vm_object); - - vm_map_entry_dispose(map, entry); -} - -/* - * vm_map_delete: [ internal use only ] - * - * Deallocates the given address range from the target - * map. - * - * When called with a sharing map, removes pages from - * that region from all physical maps. - */ -int -vm_map_delete(map, start, end) - register vm_map_t map; - vm_offset_t start; - register vm_offset_t end; -{ - register vm_map_entry_t entry; - vm_map_entry_t first_entry; - - /* - * Find the start of the region, and clip it - */ - - if (!vm_map_lookup_entry(map, start, &first_entry)) - entry = first_entry->next; - else { - entry = first_entry; - vm_map_clip_start(map, entry, start); - - /* - * Fix the lookup hint now, rather than each - * time though the loop. - */ - - SAVE_HINT(map, entry->prev); - } - - /* - * Save the free space hint - */ - - if (map->first_free->start >= start) - map->first_free = entry->prev; - - /* - * Step through all entries in this region - */ - - while ((entry != &map->header) && (entry->start < end)) { - vm_map_entry_t next; - register vm_offset_t s, e; - register vm_object_t object; - - vm_map_clip_end(map, entry, end); - - next = entry->next; - s = entry->start; - e = entry->end; - - /* - * Unwire before removing addresses from the pmap; - * otherwise, unwiring will put the entries back in - * the pmap. - */ - - object = entry->object.vm_object; - if (entry->wired_count != 0) - vm_map_entry_unwire(map, entry); - - /* - * If this is a sharing map, we must remove - * *all* references to this data, since we can't - * find all of the physical maps which are sharing - * it. - */ - - if (object == kernel_object || object == kmem_object) - vm_object_page_remove(object, entry->offset, - entry->offset + (e - s)); - else if (!map->is_main_map) - vm_object_pmap_remove(object, - entry->offset, - entry->offset + (e - s)); - else - pmap_remove(map->pmap, s, e); - - /* - * Delete the entry (which may delete the object) - * only after removing all pmap entries pointing - * to its pages. (Otherwise, its page frames may - * be reallocated, and any modify bits will be - * set in the wrong object!) - */ - - vm_map_entry_delete(map, entry); - entry = next; - } - return(KERN_SUCCESS); -} - -/* - * vm_map_remove: - * - * Remove the given address range from the target map. - * This is the exported form of vm_map_delete. - */ -int -vm_map_remove(map, start, end) - register vm_map_t map; - register vm_offset_t start; - register vm_offset_t end; -{ - register int result; - - vm_map_lock(map); - VM_MAP_RANGE_CHECK(map, start, end); - result = vm_map_delete(map, start, end); - vm_map_unlock(map); - - return(result); -} - -/* - * vm_map_check_protection: - * - * Assert that the target map allows the specified - * privilege on the entire address region given. - * The entire region must be allocated. - */ -boolean_t -vm_map_check_protection(map, start, end, protection) - register vm_map_t map; - register vm_offset_t start; - register vm_offset_t end; - register vm_prot_t protection; -{ - register vm_map_entry_t entry; - vm_map_entry_t tmp_entry; - - if (!vm_map_lookup_entry(map, start, &tmp_entry)) { - return(FALSE); - } - - entry = tmp_entry; - - while (start < end) { - if (entry == &map->header) { - return(FALSE); - } - - /* - * No holes allowed! - */ - - if (start < entry->start) { - return(FALSE); - } - - /* - * Check protection associated with entry. - */ - - if ((entry->protection & protection) != protection) { - return(FALSE); - } - - /* go to next entry */ - - start = entry->end; - entry = entry->next; - } - return(TRUE); -} - -/* - * vm_map_copy_entry: - * - * Copies the contents of the source entry to the destination - * entry. The entries *must* be aligned properly. - */ -void -vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry) - vm_map_t src_map, dst_map; - register vm_map_entry_t src_entry, dst_entry; -{ - vm_object_t temp_object; - - if (src_entry->is_sub_map || dst_entry->is_sub_map) - return; - - if (dst_entry->object.vm_object != NULL && - (dst_entry->object.vm_object->flags & OBJ_INTERNAL) == 0) - printf("vm_map_copy_entry: copying over permanent data!\n"); - - /* - * If our destination map was wired down, - * unwire it now. - */ - - if (dst_entry->wired_count != 0) - vm_map_entry_unwire(dst_map, dst_entry); - - /* - * If we're dealing with a sharing map, we - * must remove the destination pages from - * all maps (since we cannot know which maps - * this sharing map belongs in). - */ - - if (dst_map->is_main_map) - pmap_remove(dst_map->pmap, dst_entry->start, dst_entry->end); - else - vm_object_pmap_remove(dst_entry->object.vm_object, - dst_entry->offset, - dst_entry->offset + - (dst_entry->end - dst_entry->start)); - - if (src_entry->wired_count == 0) { - - boolean_t src_needs_copy; - - /* - * If the source entry is marked needs_copy, - * it is already write-protected. - */ - if (!src_entry->needs_copy) { - - boolean_t su; - - /* - * If the source entry has only one mapping, - * we can just protect the virtual address - * range. - */ - if (!(su = src_map->is_main_map)) { - simple_lock(&src_map->ref_lock); - su = (src_map->ref_count == 1); - simple_unlock(&src_map->ref_lock); - } - - if (su) { - pmap_protect(src_map->pmap, - src_entry->start, - src_entry->end, - src_entry->protection & ~VM_PROT_WRITE); - } - else { - vm_object_pmap_copy(src_entry->object.vm_object, - src_entry->offset, - src_entry->offset + (src_entry->end - -src_entry->start)); - } - } - - /* - * Make a copy of the object. - */ - temp_object = dst_entry->object.vm_object; - vm_object_copy(src_entry->object.vm_object, src_entry->offset, - (vm_size_t)(src_entry->end - src_entry->start), - &dst_entry->object.vm_object, &dst_entry->offset, - &src_needs_copy); - - /* - * If we didn't get a copy-object now, mark the - * source map entry so that a shadow will be created - * to hold its changed pages. - */ - if (src_needs_copy) - src_entry->needs_copy = TRUE; - - /* - * The destination always needs to have a shadow - * created, unless it's a zero-fill entry. - */ - if (dst_entry->object.vm_object != NULL) - dst_entry->needs_copy = TRUE; - else - dst_entry->needs_copy = FALSE; - - /* - * Mark the entries copy-on-write, so that write-enabling - * the entry won't make copy-on-write pages writable. - */ - src_entry->copy_on_write = TRUE; - dst_entry->copy_on_write = TRUE; - /* - * Get rid of the old object. - */ - vm_object_deallocate(temp_object); - - pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start, - dst_entry->end - dst_entry->start, src_entry->start); - } - else { - /* - * Of course, wired down pages can't be set copy-on-write. - * Cause wired pages to be copied into the new - * map by simulating faults (the new pages are - * pageable) - */ - vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry); - } -} - -/* - * vm_map_copy: - * - * Perform a virtual memory copy from the source - * address map/range to the destination map/range. - * - * If src_destroy or dst_alloc is requested, - * the source and destination regions should be - * disjoint, not only in the top-level map, but - * in the sharing maps as well. [The best way - * to guarantee this is to use a new intermediate - * map to make copies. This also reduces map - * fragmentation.] - */ -int -vm_map_copy(dst_map, src_map, - dst_addr, len, src_addr, - dst_alloc, src_destroy) - vm_map_t dst_map; - vm_map_t src_map; - vm_offset_t dst_addr; - vm_size_t len; - vm_offset_t src_addr; - boolean_t dst_alloc; - boolean_t src_destroy; -{ - register - vm_map_entry_t src_entry; - register - vm_map_entry_t dst_entry; - vm_map_entry_t tmp_entry; - vm_offset_t src_start; - vm_offset_t src_end; - vm_offset_t dst_start; - vm_offset_t dst_end; - vm_offset_t src_clip; - vm_offset_t dst_clip; - int result; - boolean_t old_src_destroy; - - /* - * XXX While we figure out why src_destroy screws up, - * we'll do it by explicitly vm_map_delete'ing at the end. - */ - - old_src_destroy = src_destroy; - src_destroy = FALSE; - - /* - * Compute start and end of region in both maps - */ - - src_start = src_addr; - src_end = src_start + len; - dst_start = dst_addr; - dst_end = dst_start + len; - - /* - * Check that the region can exist in both source - * and destination. - */ - - if ((dst_end < dst_start) || (src_end < src_start)) - return(KERN_NO_SPACE); - - /* - * Lock the maps in question -- we avoid deadlock - * by ordering lock acquisition by map value - */ - - if (src_map == dst_map) { - vm_map_lock(src_map); - } - else if ((long) src_map < (long) dst_map) { - vm_map_lock(src_map); - vm_map_lock(dst_map); - } else { - vm_map_lock(dst_map); - vm_map_lock(src_map); - } - - result = KERN_SUCCESS; - - /* - * Check protections... source must be completely readable and - * destination must be completely writable. [Note that if we're - * allocating the destination region, we don't have to worry - * about protection, but instead about whether the region - * exists.] - */ - - if (src_map->is_main_map && dst_map->is_main_map) { - if (!vm_map_check_protection(src_map, src_start, src_end, - VM_PROT_READ)) { - result = KERN_PROTECTION_FAILURE; - goto Return; - } - - if (dst_alloc) { - /* XXX Consider making this a vm_map_find instead */ - if ((result = vm_map_insert(dst_map, NULL, - (vm_offset_t) 0, dst_start, dst_end)) != KERN_SUCCESS) - goto Return; - } - else if (!vm_map_check_protection(dst_map, dst_start, dst_end, - VM_PROT_WRITE)) { - result = KERN_PROTECTION_FAILURE; - goto Return; - } - } - - /* - * Find the start entries and clip. - * - * Note that checking protection asserts that the - * lookup cannot fail. - * - * Also note that we wait to do the second lookup - * until we have done the first clip, as the clip - * may affect which entry we get! - */ - - (void) vm_map_lookup_entry(src_map, src_addr, &tmp_entry); - src_entry = tmp_entry; - vm_map_clip_start(src_map, src_entry, src_start); - - (void) vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry); - dst_entry = tmp_entry; - vm_map_clip_start(dst_map, dst_entry, dst_start); - - /* - * If both source and destination entries are the same, - * retry the first lookup, as it may have changed. - */ - - if (src_entry == dst_entry) { - (void) vm_map_lookup_entry(src_map, src_addr, &tmp_entry); - src_entry = tmp_entry; - } - - /* - * If source and destination entries are still the same, - * a null copy is being performed. - */ - - if (src_entry == dst_entry) - goto Return; - - /* - * Go through entries until we get to the end of the - * region. - */ - - while (src_start < src_end) { - /* - * Clip the entries to the endpoint of the entire region. - */ - - vm_map_clip_end(src_map, src_entry, src_end); - vm_map_clip_end(dst_map, dst_entry, dst_end); - - /* - * Clip each entry to the endpoint of the other entry. - */ - - src_clip = src_entry->start + (dst_entry->end - dst_entry->start); - vm_map_clip_end(src_map, src_entry, src_clip); - - dst_clip = dst_entry->start + (src_entry->end - src_entry->start); - vm_map_clip_end(dst_map, dst_entry, dst_clip); - - /* - * Both entries now match in size and relative endpoints. - * - * If both entries refer to a VM object, we can - * deal with them now. - */ - - if (!src_entry->is_a_map && !dst_entry->is_a_map) { - vm_map_copy_entry(src_map, dst_map, src_entry, - dst_entry); - } - else { - register vm_map_t new_dst_map; - vm_offset_t new_dst_start; - vm_size_t new_size; - vm_map_t new_src_map; - vm_offset_t new_src_start; - - /* - * We have to follow at least one sharing map. - */ - - new_size = (dst_entry->end - dst_entry->start); - - if (src_entry->is_a_map) { - new_src_map = src_entry->object.share_map; - new_src_start = src_entry->offset; - } - else { - new_src_map = src_map; - new_src_start = src_entry->start; - vm_map_set_recursive(&src_map->lock); - } - - if (dst_entry->is_a_map) { - vm_offset_t new_dst_end; - - new_dst_map = dst_entry->object.share_map; - new_dst_start = dst_entry->offset; - - /* - * Since the destination sharing entries - * will be merely deallocated, we can - * do that now, and replace the region - * with a null object. [This prevents - * splitting the source map to match - * the form of the destination map.] - * Note that we can only do so if the - * source and destination do not overlap. - */ - - new_dst_end = new_dst_start + new_size; - - if (new_dst_map != new_src_map) { - vm_map_lock(new_dst_map); - (void) vm_map_delete(new_dst_map, - new_dst_start, - new_dst_end); - (void) vm_map_insert(new_dst_map, - NULL, - (vm_offset_t) 0, - new_dst_start, - new_dst_end); - vm_map_unlock(new_dst_map); - } - } - else { - new_dst_map = dst_map; - new_dst_start = dst_entry->start; - vm_map_set_recursive(&dst_map->lock); - } - - /* - * Recursively copy the sharing map. - */ - - (void) vm_map_copy(new_dst_map, new_src_map, - new_dst_start, new_size, new_src_start, - FALSE, FALSE); - - if (dst_map == new_dst_map) - vm_map_clear_recursive(&dst_map->lock); - if (src_map == new_src_map) - vm_map_clear_recursive(&src_map->lock); - } - - /* - * Update variables for next pass through the loop. - */ - - src_start = src_entry->end; - src_entry = src_entry->next; - dst_start = dst_entry->end; - dst_entry = dst_entry->next; - - /* - * If the source is to be destroyed, here is the - * place to do it. - */ - - if (src_destroy && src_map->is_main_map && - dst_map->is_main_map) - vm_map_entry_delete(src_map, src_entry->prev); - } - - /* - * Update the physical maps as appropriate - */ - - if (src_map->is_main_map && dst_map->is_main_map) { - if (src_destroy) - pmap_remove(src_map->pmap, src_addr, src_addr + len); - } - - /* - * Unlock the maps - */ - - Return: ; - - if (old_src_destroy) - vm_map_delete(src_map, src_addr, src_addr + len); - - vm_map_unlock(src_map); - if (src_map != dst_map) - vm_map_unlock(dst_map); - - return(result); -} - -/* - * vmspace_fork: - * Create a new process vmspace structure and vm_map - * based on those of an existing process. The new map - * is based on the old map, according to the inheritance - * values on the regions in that map. - * - * The source map must not be locked. - */ -struct vmspace * -vmspace_fork(vm1) - register struct vmspace *vm1; -{ - register struct vmspace *vm2; - vm_map_t old_map = &vm1->vm_map; - vm_map_t new_map; - vm_map_entry_t old_entry; - vm_map_entry_t new_entry; - - vm_map_lock(old_map); - - vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset, - old_map->entries_pageable); - bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy, - (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); - new_map = &vm2->vm_map; /* XXX */ - - old_entry = old_map->header.next; - - while (old_entry != &old_map->header) { - if (old_entry->is_sub_map) - panic("vm_map_fork: encountered a submap"); - - switch (old_entry->inheritance) { - case VM_INHERIT_NONE: - break; - - case VM_INHERIT_SHARE: - /* - * If we don't already have a sharing map: - */ - - if (!old_entry->is_a_map) { - vm_map_t new_share_map; - vm_map_entry_t new_share_entry; - - /* - * Create a new sharing map - */ - - new_share_map = vm_map_create(NULL, - old_entry->start, - old_entry->end, - TRUE); - new_share_map->is_main_map = FALSE; - - /* - * Create the only sharing entry from the - * old task map entry. - */ - - new_share_entry = - vm_map_entry_create(new_share_map); - *new_share_entry = *old_entry; - new_share_entry->wired_count = 0; - - /* - * Insert the entry into the new sharing - * map - */ - - vm_map_entry_link(new_share_map, - new_share_map->header.prev, - new_share_entry); - - /* - * Fix up the task map entry to refer - * to the sharing map now. - */ - - old_entry->is_a_map = TRUE; - old_entry->object.share_map = new_share_map; - old_entry->offset = old_entry->start; - } - - /* - * Clone the entry, referencing the sharing map. - */ - - new_entry = vm_map_entry_create(new_map); - *new_entry = *old_entry; - new_entry->wired_count = 0; - vm_map_reference(new_entry->object.share_map); - - /* - * Insert the entry into the new map -- we - * know we're inserting at the end of the new - * map. - */ - - vm_map_entry_link(new_map, new_map->header.prev, - new_entry); - - /* - * Update the physical map - */ - - pmap_copy(new_map->pmap, old_map->pmap, - new_entry->start, - (old_entry->end - old_entry->start), - old_entry->start); - break; - - case VM_INHERIT_COPY: - /* - * Clone the entry and link into the map. - */ - - new_entry = vm_map_entry_create(new_map); - *new_entry = *old_entry; - new_entry->wired_count = 0; - new_entry->object.vm_object = NULL; - new_entry->is_a_map = FALSE; - vm_map_entry_link(new_map, new_map->header.prev, - new_entry); - if (old_entry->is_a_map) { - int check; - - check = vm_map_copy(new_map, - old_entry->object.share_map, - new_entry->start, - (vm_size_t)(new_entry->end - - new_entry->start), - old_entry->offset, - FALSE, FALSE); - if (check != KERN_SUCCESS) - printf("vm_map_fork: copy in share_map region failed\n"); - } - else { - vm_map_copy_entry(old_map, new_map, old_entry, - new_entry); - } - break; - } - old_entry = old_entry->next; - } - - new_map->size = old_map->size; - vm_map_unlock(old_map); - - return(vm2); -} - -/* - * vm_map_lookup: - * - * Finds the VM object, offset, and - * protection for a given virtual address in the - * specified map, assuming a page fault of the - * type specified. - * - * Leaves the map in question locked for read; return - * values are guaranteed until a vm_map_lookup_done - * call is performed. Note that the map argument - * is in/out; the returned map must be used in - * the call to vm_map_lookup_done. - * - * A handle (out_entry) is returned for use in - * vm_map_lookup_done, to make that fast. - * - * If a lookup is requested with "write protection" - * specified, the map may be changed to perform virtual - * copying operations, although the data referenced will - * remain the same. - */ -int -vm_map_lookup(var_map, vaddr, fault_type, out_entry, - object, offset, out_prot, wired, single_use) - vm_map_t *var_map; /* IN/OUT */ - register vm_offset_t vaddr; - register vm_prot_t fault_type; - - vm_map_entry_t *out_entry; /* OUT */ - vm_object_t *object; /* OUT */ - vm_offset_t *offset; /* OUT */ - vm_prot_t *out_prot; /* OUT */ - boolean_t *wired; /* OUT */ - boolean_t *single_use; /* OUT */ -{ - vm_map_t share_map; - vm_offset_t share_offset; - register vm_map_entry_t entry; - register vm_map_t map = *var_map; - register vm_prot_t prot; - register boolean_t su; - - RetryLookup: ; - - /* - * Lookup the faulting address. - */ - - vm_map_lock_read(map); - -#define RETURN(why) \ - { \ - vm_map_unlock_read(map); \ - return(why); \ - } - - /* - * If the map has an interesting hint, try it before calling - * full blown lookup routine. - */ - - simple_lock(&map->hint_lock); - entry = map->hint; - simple_unlock(&map->hint_lock); - - *out_entry = entry; - - if ((entry == &map->header) || - (vaddr < entry->start) || (vaddr >= entry->end)) { - vm_map_entry_t tmp_entry; - - /* - * Entry was either not a valid hint, or the vaddr - * was not contained in the entry, so do a full lookup. - */ - if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) - RETURN(KERN_INVALID_ADDRESS); - - entry = tmp_entry; - *out_entry = entry; - } - - /* - * Handle submaps. - */ - - if (entry->is_sub_map) { - vm_map_t old_map = map; - - *var_map = map = entry->object.sub_map; - vm_map_unlock_read(old_map); - goto RetryLookup; - } - - /* - * Check whether this task is allowed to have - * this page. - */ - - prot = entry->protection; - if ((fault_type & (prot)) != fault_type) - RETURN(KERN_PROTECTION_FAILURE); - - /* - * If this page is not pageable, we have to get - * it for all possible accesses. - */ - - if ((*wired = (entry->wired_count != 0)) != 0) - prot = fault_type = entry->protection; - - /* - * If we don't already have a VM object, track - * it down. - */ - - if ((su = !entry->is_a_map) != 0) { - share_map = map; - share_offset = vaddr; - } - else { - vm_map_entry_t share_entry; - - /* - * Compute the sharing map, and offset into it. - */ - - share_map = entry->object.share_map; - share_offset = (vaddr - entry->start) + entry->offset; - - /* - * Look for the backing store object and offset - */ - - vm_map_lock_read(share_map); - - if (!vm_map_lookup_entry(share_map, share_offset, - &share_entry)) { - vm_map_unlock_read(share_map); - RETURN(KERN_INVALID_ADDRESS); - } - entry = share_entry; - } - - /* - * If the entry was copy-on-write, we either ... - */ - - if (entry->needs_copy) { - /* - * If we want to write the page, we may as well - * handle that now since we've got the sharing - * map locked. - * - * If we don't need to write the page, we just - * demote the permissions allowed. - */ - - if (fault_type & VM_PROT_WRITE) { - /* - * Make a new object, and place it in the - * object chain. Note that no new references - * have appeared -- one just moved from the - * share map to the new object. - */ - - if (lockmgr(&share_map->lock, LK_EXCLUPGRADE, - (void *)0, curproc)) { - if (share_map != map) - vm_map_unlock_read(map); - goto RetryLookup; - } - - vm_object_shadow( - &entry->object.vm_object, - &entry->offset, - (vm_size_t) (entry->end - entry->start)); - - entry->needs_copy = FALSE; - - lockmgr(&share_map->lock, LK_DOWNGRADE, - (void *)0, curproc); - } - else { - /* - * We're attempting to read a copy-on-write - * page -- don't allow writes. - */ - - prot &= (~VM_PROT_WRITE); - } - } - - /* - * Create an object if necessary. - */ - if (entry->object.vm_object == NULL) { - - if (lockmgr(&share_map->lock, LK_EXCLUPGRADE, - (void *)0, curproc)) { - if (share_map != map) - vm_map_unlock_read(map); - goto RetryLookup; - } - - entry->object.vm_object = vm_object_allocate( - (vm_size_t)(entry->end - entry->start)); - entry->offset = 0; - lockmgr(&share_map->lock, LK_DOWNGRADE, (void *)0, curproc); - } - - /* - * Return the object/offset from this entry. If the entry - * was copy-on-write or empty, it has been fixed up. - */ - - *offset = (share_offset - entry->start) + entry->offset; - *object = entry->object.vm_object; - - /* - * Return whether this is the only map sharing this data. - */ - - if (!su) { - simple_lock(&share_map->ref_lock); - su = (share_map->ref_count == 1); - simple_unlock(&share_map->ref_lock); - } - - *out_prot = prot; - *single_use = su; - - return(KERN_SUCCESS); - -#undef RETURN -} - -/* - * vm_map_lookup_done: - * - * Releases locks acquired by a vm_map_lookup - * (according to the handle returned by that lookup). - */ - -void -vm_map_lookup_done(map, entry) - register vm_map_t map; - vm_map_entry_t entry; -{ - /* - * If this entry references a map, unlock it first. - */ - - if (entry->is_a_map) - vm_map_unlock_read(entry->object.share_map); - - /* - * Unlock the main-level map - */ - - vm_map_unlock_read(map); -} - -/* - * Routine: vm_map_simplify - * Purpose: - * Attempt to simplify the map representation in - * the vicinity of the given starting address. - * Note: - * This routine is intended primarily to keep the - * kernel maps more compact -- they generally don't - * benefit from the "expand a map entry" technology - * at allocation time because the adjacent entry - * is often wired down. - */ -void -vm_map_simplify(map, start) - vm_map_t map; - vm_offset_t start; -{ - vm_map_entry_t this_entry; - vm_map_entry_t prev_entry; - - vm_map_lock(map); - if ( - (vm_map_lookup_entry(map, start, &this_entry)) && - ((prev_entry = this_entry->prev) != &map->header) && - - (prev_entry->end == start) && - (map->is_main_map) && - - (prev_entry->is_a_map == FALSE) && - (prev_entry->is_sub_map == FALSE) && - - (this_entry->is_a_map == FALSE) && - (this_entry->is_sub_map == FALSE) && - - (prev_entry->inheritance == this_entry->inheritance) && - (prev_entry->protection == this_entry->protection) && - (prev_entry->max_protection == this_entry->max_protection) && - (prev_entry->wired_count == this_entry->wired_count) && - - (prev_entry->copy_on_write == this_entry->copy_on_write) && - (prev_entry->needs_copy == this_entry->needs_copy) && - - (prev_entry->object.vm_object == this_entry->object.vm_object) && - ((prev_entry->offset + (prev_entry->end - prev_entry->start)) - == this_entry->offset) - ) { - if (map->first_free == this_entry) - map->first_free = prev_entry; - - SAVE_HINT(map, prev_entry); - vm_map_entry_unlink(map, this_entry); - prev_entry->end = this_entry->end; - vm_object_deallocate(this_entry->object.vm_object); - vm_map_entry_dispose(map, this_entry); - } - vm_map_unlock(map); -} - -/* - * vm_map_print: [ debug ] - */ -void -vm_map_print(map, full) - register vm_map_t map; - boolean_t full; -{ - _vm_map_print(map, full, printf); -} - -void -_vm_map_print(map, full, pr) - register vm_map_t map; - boolean_t full; - int (*pr) __P((const char *, ...)); -{ - register vm_map_entry_t entry; - extern int indent; - - iprintf(pr, "%s map %p: pmap=%p, ref=%d, nentries=%d, version=%d\n", - (map->is_main_map ? "Task" : "Share"), - map, (map->pmap), map->ref_count, map->nentries, - map->timestamp); - - if (!full && indent) - return; - - indent += 2; - for (entry = map->header.next; entry != &map->header; - entry = entry->next) { - iprintf(pr, "map entry %p: start=%p, end=%p, ", - entry, entry->start, entry->end); - if (map->is_main_map) { - static char *inheritance_name[4] = - { "share", "copy", "none", "donate_copy"}; - (*pr)("prot=%x/%x/%s, ", - entry->protection, - entry->max_protection, - inheritance_name[entry->inheritance]); - if (entry->wired_count != 0) - (*pr)("wired, "); - } - - if (entry->is_a_map || entry->is_sub_map) { - (*pr)("share=%p, offset=%p\n", - entry->object.share_map, - entry->offset); - if ((entry->prev == &map->header) || - (!entry->prev->is_a_map) || - (entry->prev->object.share_map != - entry->object.share_map)) { - indent += 2; - vm_map_print(entry->object.share_map, full); - indent -= 2; - } - - } - else { - (*pr)("object=%p, offset=%p", entry->object.vm_object, - entry->offset); - if (entry->copy_on_write) - (*pr)(", copy (%s)", - entry->needs_copy ? "needed" : "done"); - (*pr)("\n"); - - if ((entry->prev == &map->header) || - (entry->prev->is_a_map) || - (entry->prev->object.vm_object != - entry->object.vm_object)) { - indent += 2; - _vm_object_print(entry->object.vm_object, - full, pr); - indent -= 2; - } - } - } - indent -= 2; -} diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c deleted file mode 100644 index 0364c169ea9..00000000000 --- a/sys/vm/vm_meter.c +++ /dev/null @@ -1,236 +0,0 @@ -/* $OpenBSD: vm_meter.c,v 1.8 1998/03/01 00:38:14 niklas Exp $ */ -/* $NetBSD: vm_meter.c,v 1.18 1996/02/05 01:53:59 christos Exp $ */ - -/* - * Copyright (c) 1982, 1986, 1989, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)vm_meter.c 8.7 (Berkeley) 5/10/95 - */ - -#include <sys/param.h> -#include <sys/proc.h> -#include <sys/systm.h> -#include <sys/kernel.h> -#include <vm/vm.h> -#include <sys/sysctl.h> -#include <sys/exec.h> - -struct loadavg averunnable; /* load average, of runnable procs */ - -int maxslp = MAXSLP; -#if !defined(MACHINE_NONCONTIG) && !defined(MACHINE_NEW_NONCONTIG) -int saferss = SAFERSS; -#endif /* MACHINE_NONCONTIG */ - -void -vmmeter() -{ - - if (time.tv_sec % 5 == 0) - loadav(&averunnable); - if (proc0.p_slptime > maxslp/2) - wakeup((caddr_t)&proc0); -} - -/* - * Constants for averages over 1, 5, and 15 minutes - * when sampling at 5 second intervals. - */ -fixpt_t cexp[3] = { - 0.9200444146293232 * FSCALE, /* exp(-1/12) */ - 0.9834714538216174 * FSCALE, /* exp(-1/60) */ - 0.9944598480048967 * FSCALE, /* exp(-1/180) */ -}; - -/* - * Compute a tenex style load average of a quantity on - * 1, 5 and 15 minute intervals. - */ -void -loadav(avg) - register struct loadavg *avg; -{ - register int i, nrun; - register struct proc *p; - - for (nrun = 0, p = allproc.lh_first; p != 0; p = p->p_list.le_next) { - switch (p->p_stat) { - case SSLEEP: - if (p->p_priority > PZERO || p->p_slptime > 1) - continue; - /* fall through */ - case SRUN: - case SIDL: - nrun++; - } - } - for (i = 0; i < 3; i++) - avg->ldavg[i] = (cexp[i] * avg->ldavg[i] + - nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT; -} - -/* - * Attributes associated with virtual memory. - */ -int -vm_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) - int *name; - u_int namelen; - void *oldp; - size_t *oldlenp; - void *newp; - size_t newlen; - struct proc *p; -{ - struct vmtotal vmtotals; - struct _ps_strings _ps = { PS_STRINGS }; - - /* all sysctl names at this level are terminal */ - if (namelen != 1) - return (ENOTDIR); /* overloaded */ - - switch (name[0]) { - case VM_LOADAVG: - averunnable.fscale = FSCALE; - return (sysctl_rdstruct(oldp, oldlenp, newp, &averunnable, - sizeof(averunnable))); - case VM_METER: - vmtotal(&vmtotals); - return (sysctl_rdstruct(oldp, oldlenp, newp, &vmtotals, - sizeof(vmtotals))); - case VM_PSSTRINGS: - return (sysctl_rdstruct(oldp, oldlenp, newp, &_ps, - sizeof _ps)); - default: - return (EOPNOTSUPP); - } - /* NOTREACHED */ -} - -/* - * Calculate the current state of the system. - * Done on demand from getkerninfo(). - */ -void -vmtotal(totalp) - register struct vmtotal *totalp; -{ - register struct proc *p; - register vm_map_entry_t entry; - register vm_object_t object; - register vm_map_t map; - int paging; - - bzero(totalp, sizeof *totalp); - /* - * Mark all objects as inactive. - */ - simple_lock(&vm_object_list_lock); - for (object = vm_object_list.tqh_first; - object != NULL; - object = object->object_list.tqe_next) - object->flags &= ~OBJ_ACTIVE; - simple_unlock(&vm_object_list_lock); - /* - * Calculate process statistics. - */ - for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { - if (p->p_flag & P_SYSTEM) - continue; - switch (p->p_stat) { - case 0: - continue; - - case SSLEEP: - case SSTOP: - if (p->p_flag & P_INMEM) { - if (p->p_priority <= PZERO) - totalp->t_dw++; - else if (p->p_slptime < maxslp) - totalp->t_sl++; - } else if (p->p_slptime < maxslp) - totalp->t_sw++; - if (p->p_slptime >= maxslp) - continue; - break; - - case SRUN: - case SIDL: - if (p->p_flag & P_INMEM) - totalp->t_rq++; - else - totalp->t_sw++; - if (p->p_stat == SIDL) - continue; - break; - } - /* - * Note active objects. - */ - paging = 0; - for (map = &p->p_vmspace->vm_map, entry = map->header.next; - entry != &map->header; entry = entry->next) { - if (entry->is_a_map || entry->is_sub_map || - entry->object.vm_object == NULL) - continue; - entry->object.vm_object->flags |= OBJ_ACTIVE; - paging |= vm_object_paging(entry->object.vm_object); - } - if (paging) - totalp->t_pw++; - } - /* - * Calculate object memory usage statistics. - */ - simple_lock(&vm_object_list_lock); - for (object = vm_object_list.tqh_first; - object != NULL; - object = object->object_list.tqe_next) { - totalp->t_vm += num_pages(object->size); - totalp->t_rm += object->resident_page_count; - if (object->flags & OBJ_ACTIVE) { - totalp->t_avm += num_pages(object->size); - totalp->t_arm += object->resident_page_count; - } - if (object->ref_count > 1) { - /* shared object */ - simple_unlock(&vm_object_list_lock); - totalp->t_vmshr += num_pages(object->size); - totalp->t_rmshr += object->resident_page_count; - if (object->flags & OBJ_ACTIVE) { - totalp->t_avmshr += num_pages(object->size); - totalp->t_armshr += object->resident_page_count; - } - } - } - totalp->t_free = cnt.v_free_count; -} diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c deleted file mode 100644 index 57aac2da609..00000000000 --- a/sys/vm/vm_mmap.c +++ /dev/null @@ -1,1054 +0,0 @@ -/* $OpenBSD: vm_mmap.c,v 1.17 2001/05/05 21:26:47 art Exp $ */ -/* $NetBSD: vm_mmap.c,v 1.47 1996/03/16 23:15:23 christos Exp $ */ - -/* - * Copyright (c) 1988 University of Utah. - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * the Systems Programming Group of the University of Utah Computer - * Science Department. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ - * - * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94 - */ - -/* - * Mapped file (mmap) interface to VM - */ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/filedesc.h> -#include <sys/resourcevar.h> -#include <sys/proc.h> -#include <sys/vnode.h> -#include <sys/file.h> -#include <sys/mman.h> -#include <sys/conf.h> -#include <sys/stat.h> - -#include <sys/mount.h> -#include <sys/syscallargs.h> - -#include <miscfs/specfs/specdev.h> - -#include <vm/vm.h> -#include <vm/vm_pager.h> -#include <vm/vm_prot.h> - -#ifdef DEBUG -int mmapdebug = 0; -#define MDB_FOLLOW 0x01 -#define MDB_SYNC 0x02 -#define MDB_MAPIT 0x04 -#endif - -/* ARGSUSED */ -int -sys_sbrk(p, v, retval) - struct proc *p; - void *v; - register_t *retval; -{ -#if 0 - struct sys_sbrk_args /* { - syscallarg(int) incr; - } */ *uap = v; -#endif - - /* Not yet implemented */ - return (EOPNOTSUPP); -} - -/* ARGSUSED */ -int -sys_sstk(p, v, retval) - struct proc *p; - void *v; - register_t *retval; -{ -#if 0 - struct sys_sstk_args /* { - syscallarg(int) incr; - } */ *uap = v; -#endif - - /* Not yet implemented */ - return (EOPNOTSUPP); -} - - -/* - * Memory Map (mmap) system call. Note that the file offset - * and address are allowed to be NOT page aligned, though if - * the MAP_FIXED flag it set, both must have the same remainder - * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not - * page-aligned, the actual mapping starts at trunc_page(addr) - * and the return value is adjusted up by the page offset. - */ -int -sys_mmap(p, v, retval) - struct proc *p; - void *v; - register_t *retval; -{ - register struct sys_mmap_args /* { - syscallarg(void *) addr; - syscallarg(size_t) len; - syscallarg(int) prot; - syscallarg(int) flags; - syscallarg(int) fd; - syscallarg(long) pad; - syscallarg(off_t) pos; - } */ *uap = v; - struct vattr va; - register struct filedesc *fdp = p->p_fd; - register struct file *fp; - struct vnode *vp; - vm_offset_t addr, pos; - vm_size_t size, pageoff; - vm_prot_t prot, maxprot; - caddr_t handle; - int fd, flags, error; - vm_offset_t vm_min_address = VM_MIN_ADDRESS; - - addr = (vm_offset_t) SCARG(uap, addr); - size = (vm_size_t) SCARG(uap, len); - prot = SCARG(uap, prot) & VM_PROT_ALL; - flags = SCARG(uap, flags); - fd = SCARG(uap, fd); - pos = (vm_offset_t) SCARG(uap, pos); - -#ifdef DEBUG - if (mmapdebug & MDB_FOLLOW) - printf("mmap(%d): addr %lx len %lx pro %x flg %x fd %d pos %lx\n", - p->p_pid, addr, size, prot, flags, fd, pos); -#endif - - /* - * Align the file position to a page boundary, - * and save its page offset component. - */ - pageoff = (pos & PAGE_MASK); - pos -= pageoff; - - /* Adjust size for rounding (on both ends). */ - size += pageoff; /* low end... */ - size = (vm_size_t) round_page(size); /* hi end */ - - /* Do not allow mappings that cause address wrap... */ - if ((ssize_t)size < 0) - return (EINVAL); - - /* - * Check for illegal addresses. Watch out for address wrap... - * Note that VM_*_ADDRESS are not constants due to casts (argh). - */ - if (flags & MAP_FIXED) { - /* - * The specified address must have the same remainder - * as the file offset taken modulo PAGE_SIZE, so it - * should be aligned after adjustment by pageoff. - */ - addr -= pageoff; - if (addr & PAGE_MASK) - return (EINVAL); - /* Address range must be all in user VM space. */ - if (VM_MAXUSER_ADDRESS > 0 && - addr + size > VM_MAXUSER_ADDRESS) - return (EINVAL); - if (vm_min_address > 0 && addr < vm_min_address) - return (EINVAL); - if (addr > addr + size) - return (EINVAL); - } - /* - * XXX for non-fixed mappings where no hint is provided or - * the hint would fall in the potential heap space, - * place it after the end of the largest possible heap. - * - * There should really be a pmap call to determine a reasonable - * location. (To avoid VA cache alias problems, for example!) - */ - else if (addr < round_page((vaddr_t)p->p_vmspace->vm_daddr + MAXDSIZ)) - addr = round_page((vaddr_t)p->p_vmspace->vm_daddr + MAXDSIZ); - - if ((flags & MAP_ANON) == 0) { - /* - * Mapping file, get fp for validation. - * Obtain vnode and make sure it is of appropriate type. - */ - if (((unsigned)fd) >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fd]) == NULL) - return (EBADF); - if (fp->f_type != DTYPE_VNODE) - return (EINVAL); - vp = (struct vnode *)fp->f_data; - - /* - * XXX hack to handle use of /dev/zero to map anon - * memory (ala SunOS). - */ - if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) { - flags |= MAP_ANON; - goto is_anon; - } - - /* - * Only files and cdevs are mappable, and cdevs does not - * provide private mappings of any kind. - */ - if (vp->v_type != VREG && - (vp->v_type != VCHR || (flags & (MAP_PRIVATE|MAP_COPY)))) - return (EINVAL); - /* - * Ensure that file and memory protections are - * compatible. Note that we only worry about - * writability if mapping is shared; in this case, - * current and max prot are dictated by the open file. - * XXX use the vnode instead? Problem is: what - * credentials do we use for determination? - * What if proc does a setuid? - */ - maxprot = VM_PROT_EXECUTE; /* ??? */ - if (fp->f_flag & FREAD) - maxprot |= VM_PROT_READ; - else if (prot & PROT_READ) - return (EACCES); - - /* - * If we are sharing potential changes (either via MAP_SHARED - * or via the implicit sharing of character device mappings), - * there are security issues with giving out PROT_WRITE - */ - if ((flags & MAP_SHARED) || vp->v_type == VCHR) { - - /* In case we opened the thing readonly... */ - if (!(fp->f_flag & FWRITE)) { - /* - * If we are trying to get write permission - * bail out, otherwise go ahead but don't - * raise maxprot to contain VM_PROT_WRITE, as - * we have not asked for write permission at - * all. - */ - if (prot & PROT_WRITE) - return (EACCES); - - /* - * If the file is writable, only add PROT_WRITE to - * maxprot if the file is not immutable, append-only. - * If it is, and if we are going for PROT_WRITE right - * away, return EPERM. - */ - } else if ((error = - VOP_GETATTR(vp, &va, p->p_ucred, p))) - return (error); - else if (va.va_flags & (IMMUTABLE|APPEND)) { - if (prot & PROT_WRITE) - return (EPERM); - } else - maxprot |= VM_PROT_WRITE; - } else - maxprot |= VM_PROT_WRITE; - handle = (caddr_t)vp; - } else { - /* - * (flags & MAP_ANON) == TRUE - * Mapping blank space is trivial. - */ - if (fd != -1) - return (EINVAL); - is_anon: - handle = NULL; - maxprot = VM_PROT_ALL; - pos = 0; - } - error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, - flags, handle, pos); - if (error == 0) - *retval = (register_t)(addr + pageoff); - return (error); -} - -int -sys_msync(p, v, retval) - struct proc *p; - void *v; - register_t *retval; -{ - struct sys_msync_args /* { - syscallarg(void *) addr; - syscallarg(size_t) len; - syscallarg(int) flags; - } */ *uap = v; - vm_offset_t addr; - vm_size_t size, pageoff; - vm_map_t map; - int rv, flags; - boolean_t syncio, invalidate; - - addr = (vm_offset_t)SCARG(uap, addr); - size = (vm_size_t)SCARG(uap, len); - flags = SCARG(uap, flags); -#ifdef DEBUG - if (mmapdebug & (MDB_FOLLOW|MDB_SYNC)) - printf("msync(%d): addr 0x%lx len %lx\n", p->p_pid, addr, size); -#endif - - /* sanity check flags */ - if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 || - (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 || - (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC)) - return (EINVAL); - if ((flags & (MS_ASYNC | MS_SYNC)) == 0) - flags |= MS_SYNC; - - /* - * Align the address to a page boundary, - * and adjust the size accordingly. - */ - pageoff = (addr & PAGE_MASK); - addr -= pageoff; - size += pageoff; - size = (vm_size_t) round_page(size); - - /* Disallow wrap-around. */ - if (addr + size < addr) - return (ENOMEM); - - map = &p->p_vmspace->vm_map; - /* - * XXX Gak! If size is zero we are supposed to sync "all modified - * pages with the region containing addr". Unfortunately, we - * don't really keep track of individual mmaps so we approximate - * by flushing the range of the map entry containing addr. - * This can be incorrect if the region splits or is coalesced - * with a neighbor. - */ - if (size == 0) { - vm_map_entry_t entry; - - vm_map_lock_read(map); - rv = vm_map_lookup_entry(map, addr, &entry); - vm_map_unlock_read(map); - if (rv == FALSE) - return (ENOMEM); - addr = entry->start; - size = entry->end - entry->start; - } -#ifdef DEBUG - if (mmapdebug & MDB_SYNC) - printf("msync: cleaning/flushing address range [0x%lx-0x%lx)\n", - addr, addr+size); -#endif - -#if 0 - /* - * XXX Asynchronous msync() causes: - * . the process to hang on wchan "vospgw", and - * . a "vm_object_page_clean: pager_put error" message to - * be printed by the kernel. - */ - syncio = (flags & MS_SYNC) ? TRUE : FALSE; -#else - syncio = TRUE; -#endif - invalidate = (flags & MS_INVALIDATE) ? TRUE : FALSE; - - /* - * XXX bummer, gotta flush all cached pages to ensure - * consistency with the file system cache. Otherwise, we could - * pass this in to implement Sun's MS_INVALIDATE. - */ - invalidate = TRUE; - /* - * Clean the pages and interpret the return value. - */ - rv = vm_map_clean(map, addr, addr+size, syncio, invalidate); - switch (rv) { - case KERN_SUCCESS: - break; - case KERN_INVALID_ADDRESS: - return (ENOMEM); - case KERN_FAILURE: - return (EIO); - case KERN_PAGES_LOCKED: - return (EBUSY); - default: - return (EINVAL); - } - return (0); -} - -int -sys_munmap(p, v, retval) - register struct proc *p; - void *v; - register_t *retval; -{ - register struct sys_munmap_args /* { - syscallarg(void *) addr; - syscallarg(size_t) len; - } */ *uap = v; - vm_offset_t addr; - vm_size_t size, pageoff; - vm_map_t map; - vm_offset_t vm_min_address = VM_MIN_ADDRESS; - - - addr = (vm_offset_t) SCARG(uap, addr); - size = (vm_size_t) SCARG(uap, len); -#ifdef DEBUG - if (mmapdebug & MDB_FOLLOW) - printf("munmap(%d): addr 0%lx len %lx\n", p->p_pid, addr, size); -#endif - - /* - * Align the address to a page boundary, - * and adjust the size accordingly. - */ - pageoff = (addr & PAGE_MASK); - addr -= pageoff; - size += pageoff; - size = (vm_size_t) round_page(size); - if ((int)size < 0) - return(EINVAL); - if (size == 0) - return(0); - /* - * Check for illegal addresses. Watch out for address wrap... - * Note that VM_*_ADDRESS are not constants due to casts (argh). - */ - if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) - return (EINVAL); - if (vm_min_address > 0 && addr < vm_min_address) - return (EINVAL); - if (addr > addr + size) - return (EINVAL); - map = &p->p_vmspace->vm_map; - /* - * Make sure entire range is allocated. - */ - if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) - return(EINVAL); - /* returns nothing but KERN_SUCCESS anyway */ - (void) vm_map_remove(map, addr, addr+size); - return(0); -} - -void -munmapfd(p, fd) - struct proc *p; - int fd; -{ -#ifdef DEBUG - if (mmapdebug & MDB_FOLLOW) - printf("munmapfd(%d): fd %d\n", p->p_pid, fd); -#endif - - /* - * XXX should vm_deallocate any regions mapped to this file - */ - p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED; -} - -int -sys_mprotect(p, v, retval) - struct proc *p; - void *v; - register_t *retval; -{ - struct sys_mprotect_args /* { - syscallarg(void *) addr; - syscallarg(int) len; - syscallarg(int) prot; - } */ *uap = v; - vm_offset_t addr; - vm_size_t size, pageoff; - register vm_prot_t prot; - - addr = (vm_offset_t)SCARG(uap, addr); - size = (vm_size_t)SCARG(uap, len); - prot = SCARG(uap, prot) & VM_PROT_ALL; -#ifdef DEBUG - if (mmapdebug & MDB_FOLLOW) - printf("mprotect(%d): addr 0x%lx len %lx prot %d\n", p->p_pid, - addr, size, prot); -#endif - /* - * Align the address to a page boundary, - * and adjust the size accordingly. - */ - pageoff = (addr & PAGE_MASK); - addr -= pageoff; - size += pageoff; - size = (vm_size_t) round_page(size); - if ((int)size < 0) - return(EINVAL); - - switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot, - FALSE)) { - case KERN_SUCCESS: - return (0); - case KERN_PROTECTION_FAILURE: - return (EACCES); - } - return (EINVAL); -} - -int -sys_minherit(p, v, retval) - struct proc *p; - void *v; - register_t *retval; -{ - struct sys_minherit_args /* { - syscallarg(caddr_t) addr; - syscallarg(int) len; - syscallarg(int) inherit; - } */ *uap = v; - vm_offset_t addr; - vm_size_t size, pageoff; - register vm_inherit_t inherit; - - addr = (vm_offset_t)SCARG(uap, addr); - size = (vm_size_t)SCARG(uap, len); - inherit = SCARG(uap, inherit); -#ifdef DEBUG - if (mmapdebug & MDB_FOLLOW) - printf("minherit(%d): addr 0x%lx len %lx inherit %d\n", p->p_pid, - addr, size, inherit); -#endif - /* - * Align the address to a page boundary, - * and adjust the size accordingly. - */ - pageoff = (addr & PAGE_MASK); - addr -= pageoff; - size += pageoff; - size = (vm_size_t) round_page(size); - if ((int)size < 0) - return(EINVAL); - - switch (vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size, - inherit)) { - case KERN_SUCCESS: - return (0); - case KERN_PROTECTION_FAILURE: - return (EACCES); - } - return (EINVAL); -} - -/* ARGSUSED */ -int -sys_madvise(p, v, retval) - struct proc *p; - void *v; - register_t *retval; -{ -#if 0 - struct sys_madvise_args /* { - syscallarg(void *) addr; - syscallarg(size_t) len; - syscallarg(int) behav; - } */ *uap = v; -#endif - - /* Not yet implemented */ - return (EOPNOTSUPP); -} - -/* ARGSUSED */ -int -sys_mincore(p, v, retval) - struct proc *p; - void *v; - register_t *retval; -{ -#if 0 - struct sys_mincore_args /* { - syscallarg(void *) addr; - syscallarg(size_t) len; - syscallarg(char *) vec; - } */ *uap = v; -#endif - - /* Not yet implemented */ - return (EOPNOTSUPP); -} - -int -sys_mlock(p, v, retval) - struct proc *p; - void *v; - register_t *retval; -{ - struct sys_mlock_args /* { - syscallarg(const void *) addr; - syscallarg(size_t) len; - } */ *uap = v; - vm_offset_t addr; - vm_size_t size, pageoff; - int error; - extern int vm_page_max_wired; - - addr = (vm_offset_t)SCARG(uap, addr); - size = (vm_size_t)SCARG(uap, len); -#ifdef DEBUG - if (mmapdebug & MDB_FOLLOW) - printf("mlock(%d): addr 0%lx len %lx\n", p->p_pid, addr, size); -#endif - /* - * Align the address to a page boundary, - * and adjust the size accordingly. - */ - pageoff = (addr & PAGE_MASK); - addr -= pageoff; - size += pageoff; - size = (vm_size_t) round_page(size); - - /* Disallow wrap-around. */ - if (addr + (int)size < addr) - return (EINVAL); - - if (atop(size) + cnt.v_wire_count > vm_page_max_wired) - return (EAGAIN); -#ifdef pmap_wired_count - if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > - p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) - return (EAGAIN); -#else - if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) - return (error); -#endif - - error = vslock((caddr_t)addr, size); - return (error == KERN_SUCCESS ? 0 : ENOMEM); -} - -int -sys_munlock(p, v, retval) - struct proc *p; - void *v; - register_t *retval; -{ - struct sys_munlock_args /* { - syscallarg(const void *) addr; - syscallarg(size_t) len; - } */ *uap = v; - vm_offset_t addr; - vm_size_t size, pageoff; - int error; - - addr = (vm_offset_t)SCARG(uap, addr); - size = (vm_size_t)SCARG(uap, len); -#ifdef DEBUG - if (mmapdebug & MDB_FOLLOW) - printf("munlock(%d): addr 0x%lx len %lx\n", p->p_pid, addr, size); -#endif - /* - * Align the address to a page boundary, - * and adjust the size accordingly. - */ - pageoff = (addr & PAGE_MASK); - addr -= pageoff; - size += pageoff; - size = (vm_size_t) round_page(size); - - /* Disallow wrap-around. */ - if (addr + (int)size < addr) - return (EINVAL); - -#ifndef pmap_wired_count - if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) - return (error); -#endif - - error = vsunlock((caddr_t)addr, size); - return (error == KERN_SUCCESS ? 0 : ENOMEM); -} - -/* - * Internal version of mmap. - * Currently used by mmap, exec, and sys5 shared memory. - * Handle is either a vnode pointer or NULL for MAP_ANON. - * This (internal) interface requires the file offset to be - * page-aligned by the caller. (Also addr, if MAP_FIXED). - */ -int -vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) - register vm_map_t map; - register vm_offset_t *addr; - register vm_size_t size; - vm_prot_t prot, maxprot; - register int flags; - caddr_t handle; /* XXX should be vp */ - vm_offset_t foff; -{ - register vm_pager_t pager; - boolean_t fitit; - vm_object_t object; - struct vnode *vp = NULL; - int type; - int rv = KERN_SUCCESS; - - if (size == 0) - return (0); - - /* The file offset must be page aligned. */ - if (foff & PAGE_MASK) - return (EINVAL); - - if ((flags & MAP_FIXED) == 0) { - /* The address is just a hint */ - fitit = TRUE; - *addr = round_page(*addr); - } else { - /* - * Use the specified address exactly - * (but check alignment first). - */ - fitit = FALSE; - if (*addr & PAGE_MASK) - return (EINVAL); - (void)vm_deallocate(map, *addr, size); - } - - /* - * Lookup/allocate pager. All except an unnamed anonymous lookup - * gain a reference to ensure continued existance of the object. - * (XXX the exception is to appease the pageout daemon) - */ - if (flags & MAP_ANON) { - type = PG_DFLT; - foff = 0; - } else { - vp = (struct vnode *)handle; - if (vp->v_type == VCHR) { - type = PG_DEVICE; - handle = (caddr_t)(long)vp->v_rdev; - } else - type = PG_VNODE; - } - pager = vm_pager_allocate(type, handle, size, prot, foff); - if (pager == NULL) - return (type == PG_DEVICE ? EINVAL : ENOMEM); - /* - * Find object and release extra reference gained by lookup - */ - object = vm_object_lookup(pager); - vm_object_deallocate(object); - - /* - * Anonymous memory. - */ - if (flags & MAP_ANON) { - rv = vm_allocate_with_pager(map, addr, size, fitit, - pager, foff, TRUE); - if (rv != KERN_SUCCESS) { - if (handle == NULL) - vm_pager_deallocate(pager); - else - vm_object_deallocate(object); - goto out; - } - /* - * Don't cache anonymous objects. - * Loses the reference gained by vm_pager_allocate. - * Note that object will be NULL when handle == NULL, - * this is ok since vm_allocate_with_pager has made - * sure that these objects are uncached. - */ - (void) pager_cache(object, FALSE); -#ifdef DEBUG - if (mmapdebug & MDB_MAPIT) - printf("vm_mmap(%d): ANON *addr %lx size %lx pager %p\n", - curproc->p_pid, *addr, size, pager); -#endif - } - /* - * Must be a mapped file. - * Distinguish between character special and regular files. - */ - else if (vp->v_type == VCHR) { - rv = vm_allocate_with_pager(map, addr, size, fitit, - pager, foff, FALSE); - /* - * Uncache the object and lose the reference gained - * by vm_pager_allocate(). If the call to - * vm_allocate_with_pager() was sucessful, then we - * gained an additional reference ensuring the object - * will continue to exist. If the call failed then - * the deallocate call below will terminate the - * object which is fine. - */ - (void) pager_cache(object, FALSE); - if (rv != KERN_SUCCESS) - goto out; - } - /* - * A regular file - */ - else { -#ifdef DEBUG - if (object == NULL) - printf("vm_mmap: no object: vp %p, pager %p\n", - vp, pager); -#endif - /* - * Map it directly. - * Allows modifications to go out to the vnode. - */ - if (flags & MAP_SHARED) { - rv = vm_allocate_with_pager(map, addr, size, - fitit, pager, - foff, FALSE); - if (rv != KERN_SUCCESS) { - vm_object_deallocate(object); - goto out; - } - /* - * Don't cache the object. This is the easiest way - * of ensuring that data gets back to the filesystem - * because vnode_pager_deallocate() will fsync the - * vnode. pager_cache() will lose the extra ref. - */ - if (prot & VM_PROT_WRITE) - pager_cache(object, FALSE); - else - vm_object_deallocate(object); - } - /* - * Copy-on-write of file. Two flavors. - * MAP_COPY is true COW, you essentially get a snapshot of - * the region at the time of mapping. MAP_PRIVATE means only - * that your changes are not reflected back to the object. - * Changes made by others will be seen. - */ - else { - vm_map_t tmap; - vm_offset_t off; - - /* locate and allocate the target address space */ - vm_map_lock(map); - if (fitit) { - /* - * Find space in the map at a location - * that is compatible with the object/offset - * we're going to attach there. - */ - again: - if (vm_map_findspace(map, *addr, size, - addr) == 1) { - rv = KERN_NO_SPACE; - } else { -#ifdef PMAP_PREFER - PMAP_PREFER(foff, addr); -#endif - rv = vm_map_insert(map, NULL, - (vm_offset_t)0, - *addr, *addr+size); - /* - * vm_map_insert() may fail if - * PMAP_PREFER() has altered - * the initial address. - * If so, we start again. - */ - if (rv == KERN_NO_SPACE) - goto again; - } - } else { - rv = vm_map_insert(map, NULL, (vm_offset_t)0, - *addr, *addr + size); - -#ifdef DEBUG - /* - * Check against PMAP preferred address. If - * there's a mismatch, these pages should not - * be shared with others. <howto?> - */ - if (rv == KERN_SUCCESS && - (mmapdebug & MDB_MAPIT)) { - vm_offset_t paddr = *addr; -#ifdef PMAP_PREFER - PMAP_PREFER(foff, &paddr); -#endif - if (paddr != *addr) - printf( - "vm_mmap: pmap botch! " - "[foff %lx, addr %lx, paddr %lx]\n", - foff, *addr, paddr); - } -#endif - } - vm_map_unlock(map); - - if (rv != KERN_SUCCESS) { - vm_object_deallocate(object); - goto out; - } - tmap = vm_map_create(pmap_create(size), VM_MIN_ADDRESS, - VM_MIN_ADDRESS+size, TRUE); - off = VM_MIN_ADDRESS; - rv = vm_allocate_with_pager(tmap, &off, size, - FALSE, pager, - foff, FALSE); - if (rv != KERN_SUCCESS) { - vm_object_deallocate(object); - vm_map_deallocate(tmap); - goto out; - } - /* - * (XXX) - * MAP_PRIVATE implies that we see changes made by - * others. To ensure that we need to guarentee that - * no copy object is created (otherwise original - * pages would be pushed to the copy object and we - * would never see changes made by others). We - * totally sleeze it right now by marking the object - * internal temporarily. - */ - if ((flags & MAP_COPY) == 0) - object->flags |= OBJ_INTERNAL; - rv = vm_map_copy(map, tmap, *addr, size, off, - FALSE, FALSE); - object->flags &= ~OBJ_INTERNAL; - /* - * (XXX) - * My oh my, this only gets worse... - * Force creation of a shadow object so that - * vm_map_fork will do the right thing. - */ - if ((flags & MAP_COPY) == 0) { - vm_map_t tmap; - vm_map_entry_t tentry; - vm_object_t tobject; - vm_offset_t toffset; - vm_prot_t tprot; - boolean_t twired, tsu; - - tmap = map; - vm_map_lookup(&tmap, *addr, VM_PROT_WRITE, - &tentry, &tobject, &toffset, - &tprot, &twired, &tsu); - vm_map_lookup_done(tmap, tentry); - } - /* - * (XXX) - * Map copy code cannot detect sharing unless a - * sharing map is involved. So we cheat and write - * protect everything ourselves. - */ - vm_object_pmap_copy(object, foff, foff + size); - vm_object_deallocate(object); - vm_map_deallocate(tmap); - if (rv != KERN_SUCCESS) - goto out; - } -#ifdef DEBUG - if (mmapdebug & MDB_MAPIT) - printf("vm_mmap(%d): FILE *addr %lx size %lx pager %p\n", - curproc->p_pid, *addr, size, pager); -#endif - } - /* - * Correct protection (default is VM_PROT_ALL). - * If maxprot is different than prot, we must set both explicitly. - */ - rv = KERN_SUCCESS; - if (maxprot != VM_PROT_ALL) - rv = vm_map_protect(map, *addr, *addr+size, maxprot, TRUE); - if (rv == KERN_SUCCESS && prot != maxprot) - rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE); - if (rv != KERN_SUCCESS) { - (void) vm_deallocate(map, *addr, size); - goto out; - } - /* - * Shared memory is also shared with children. - */ - if (flags & MAP_SHARED) { - rv = vm_map_inherit(map, *addr, *addr+size, VM_INHERIT_SHARE); - if (rv != KERN_SUCCESS) { - (void) vm_deallocate(map, *addr, size); - goto out; - } - } -out: -#ifdef DEBUG - if (mmapdebug & MDB_MAPIT) - printf("vm_mmap: rv %d\n", rv); -#endif - switch (rv) { - case KERN_SUCCESS: - return (0); - case KERN_INVALID_ADDRESS: - case KERN_NO_SPACE: - return (ENOMEM); - case KERN_PROTECTION_FAILURE: - return (EACCES); - default: - return (EINVAL); - } -} - -int -sys_mlockall(p, v, retval) - struct proc *p; - void *v; - register_t *retval; -{ -#if 0 - struct sys_mlockall_args /* { - syscallarg(int) flags; - } */ *uap = v; -#endif - - return (EOPNOTSUPP); -} - -int -sys_munlockall(p, v, retval) - struct proc *p; - void *v; - register_t *retval; -{ - - return (EOPNOTSUPP); -} - - -
\ No newline at end of file diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c deleted file mode 100644 index 8336d2acb79..00000000000 --- a/sys/vm/vm_object.c +++ /dev/null @@ -1,1887 +0,0 @@ -/* $OpenBSD: vm_object.c,v 1.22 1998/04/25 07:17:21 niklas Exp $ */ -/* $NetBSD: vm_object.c,v 1.46 1997/03/30 20:56:12 mycroft Exp $ */ - -/*- - * Copyright (c) 1997 Charles M. Hannum. All rights reserved. - * Copyright (c) 1997 Niklas Hallqvist. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Charles M. Hannum. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * The Mach Operating System project at Carnegie-Mellon University. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)vm_object.c 8.7 (Berkeley) 5/11/95 - * - * - * Copyright (c) 1987, 1990 Carnegie-Mellon University. - * All rights reserved. - * - * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * Virtual memory object module. - */ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/malloc.h> -#include <sys/proc.h> - -#include <vm/vm.h> -#include <vm/vm_page.h> -#include <vm/vm_pageout.h> - -/* - * Virtual memory objects maintain the actual data - * associated with allocated virtual memory. A given - * page of memory exists within exactly one object. - * - * An object is only deallocated when all "references" - * are given up. Only one "reference" to a given - * region of an object should be writeable. - * - * Associated with each object is a list of all resident - * memory pages belonging to that object; this list is - * maintained by the "vm_page" module, and locked by the object's - * lock. - * - * Each object also records a "pager" routine which is - * used to retrieve (and store) pages to the proper backing - * storage. In addition, objects may be backed by other - * objects from which they were virtual-copied. - * - * The only items within the object structure which are - * modified after time of creation are: - * reference count locked by object's lock - * pager routine locked by object's lock - * - */ - -struct vm_object kernel_object_store; -struct vm_object kmem_object_store; - -#define VM_OBJECT_HASH_COUNT 157 - -extern int vm_cache_max; /* now in param.c */ -struct vm_object_hash_head vm_object_hashtable[VM_OBJECT_HASH_COUNT]; - -long object_collapses = 0; -long object_bypasses = 0; -boolean_t vm_object_collapse_allowed = TRUE; - -#ifndef VMDEBUG -#define VMDEBUG 0 -#endif - -#ifdef DEBUG -#define VMDEBUG_SHADOW 0x1 -#define VMDEBUG_SHADOW_VERBOSE 0x2 -#define VMDEBUG_COLLAPSE 0x4 -#define VMDEBUG_COLLAPSE_PAGEIN 0x8 -int vmdebug = VMDEBUG; -#endif - -void _vm_object_allocate __P((vm_size_t, vm_object_t)); -int vm_object_bypass __P((vm_object_t)); -void vm_object_collapse_internal __P((vm_object_t, vm_object_t *)); -int vm_object_overlay __P((vm_object_t)); -int vm_object_remove_from_pager - __P((vm_object_t, vm_offset_t, vm_offset_t)); -void vm_object_set_shadow __P((vm_object_t, vm_object_t)); - -/* - * vm_object_init: - * - * Initialize the VM objects module. - */ -void -vm_object_init(size) - vm_size_t size; -{ - register int i; - - TAILQ_INIT(&vm_object_cached_list); - TAILQ_INIT(&vm_object_list); - vm_object_count = 0; - simple_lock_init(&vm_cache_lock); - simple_lock_init(&vm_object_list_lock); - - for (i = 0; i < VM_OBJECT_HASH_COUNT; i++) - TAILQ_INIT(&vm_object_hashtable[i]); - - kernel_object = &kernel_object_store; - _vm_object_allocate(size, kernel_object); - - kmem_object = &kmem_object_store; - _vm_object_allocate(VM_KMEM_SIZE + VM_MBUF_SIZE, kmem_object); -} - -/* - * vm_object_allocate: - * - * Returns a new object with the given size. - */ -vm_object_t -vm_object_allocate(size) - vm_size_t size; -{ - register vm_object_t result; - - result = (vm_object_t)malloc((u_long)sizeof *result, M_VMOBJ, - M_WAITOK); - - _vm_object_allocate(size, result); - - return(result); -} - -void -_vm_object_allocate(size, object) - vm_size_t size; - register vm_object_t object; -{ - TAILQ_INIT(&object->memq); - vm_object_lock_init(object); - object->ref_count = 1; - object->resident_page_count = 0; - object->size = size; - object->flags = OBJ_INTERNAL; /* vm_allocate_with_pager will reset */ - object->paging_in_progress = 0; - object->copy = NULL; - - /* - * Object starts out read-write, with no pager. - */ - - object->pager = NULL; - object->paging_offset = 0; - object->shadow = NULL; - object->shadow_offset = (vm_offset_t) 0; - LIST_INIT(&object->shadowers); - - simple_lock(&vm_object_list_lock); - TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); - vm_object_count++; - cnt.v_nzfod += atop(size); - simple_unlock(&vm_object_list_lock); -} - -/* - * vm_object_reference: - * - * Gets another reference to the given object. - */ -void -vm_object_reference(object) - register vm_object_t object; -{ - if (object == NULL) - return; - - vm_object_lock(object); - object->ref_count++; - vm_object_unlock(object); -} - -/* - * vm_object_deallocate: - * - * Release a reference to the specified object, - * gained either through a vm_object_allocate - * or a vm_object_reference call. When all references - * are gone, storage associated with this object - * may be relinquished. - * - * No object may be locked. - */ -void -vm_object_deallocate(object) - vm_object_t object; -{ - /* - * While "temp" is used for other things as well, we - * initialize it to NULL here for being able to check - * if we are in the first revolution of the loop. - */ - vm_object_t temp = NULL; - - while (object != NULL) { - - /* - * The cache holds a reference (uncounted) to the object; we - * must lock it before removing the object. - */ - - vm_object_cache_lock(); - - /* - * Lose the reference - */ - vm_object_lock(object); - if (--(object->ref_count) != 0) { - vm_object_unlock(object); - vm_object_cache_unlock(); - - /* - * If this is a deallocation of a shadow reference - * (which it is unless it's the first time round) and - * this operation made us singly-shadowed, try to - * collapse us with our shadower. Otherwise we're - * ready. - */ - if (temp != NULL && - (temp = object->shadowers.lh_first) != NULL && - temp->shadowers_list.le_next == NULL) { - vm_object_lock(temp); - - /* - * This is a bit tricky: the temp object can - * go away while collapsing, check the - * vm_object_collapse_internal comments for - * details. In this case we get an object - * back to deallocate (it's done like this - * to prevent potential recursion and hence - * kernel stack overflow). In the normal case - * we won't get an object back, if so, we are - * ready and may return. - */ - vm_object_collapse_internal(temp, &object); - if (object != NULL) { - vm_object_lock(object); - vm_object_cache_lock(); - } else { - vm_object_unlock(temp); - return; - } - } else - return; - } - - /* - * See if this object can persist. If so, enter it in the - * cache, then deactivate all of its pages. - */ - if (object->flags & OBJ_CANPERSIST) { - - TAILQ_INSERT_TAIL(&vm_object_cached_list, object, - cached_list); - vm_object_cached++; - vm_object_cache_unlock(); - - vm_object_deactivate_pages(object); - vm_object_unlock(object); - - vm_object_cache_trim(); - return; - } - - /* - * Make sure no one can look us up now. - */ - vm_object_remove(object->pager); - vm_object_cache_unlock(); - - /* - * Deallocate the object, and move on to the backing object. - */ - temp = object->shadow; - vm_object_reference(temp); - vm_object_terminate(object); - object = temp; - } -} - - -/* - * vm_object_terminate actually destroys the specified object, freeing - * up all previously used resources. - * - * The object must be locked. - */ -void -vm_object_terminate(object) - register vm_object_t object; -{ - register vm_page_t p; - vm_object_t shadow_object; - - /* - * Protect against simultaneous collapses. - */ - object->flags |= OBJ_FADING; - - /* - * Wait until the pageout daemon is through with the object or a - * potential collapse operation is finished. - */ - vm_object_paging_wait(object,"vmterm"); - - /* - * Detach the object from its shadow if we are the shadow's - * copy. - */ - if ((shadow_object = object->shadow) != NULL) { - vm_object_lock(shadow_object); - vm_object_set_shadow(object, NULL); - if (shadow_object->copy == object) - shadow_object->copy = NULL; -#if 0 - else if (shadow_object->copy != NULL) - panic("vm_object_terminate: " - "copy/shadow inconsistency"); -#endif - vm_object_unlock(shadow_object); - } - - /* - * If not an internal object clean all the pages, removing them - * from paging queues as we go. - * - * XXX need to do something in the event of a cleaning error. - */ - if ((object->flags & OBJ_INTERNAL) == 0) - (void) vm_object_page_clean(object, 0, 0, TRUE, TRUE); - - /* - * Now free the pages. - * For internal objects, this also removes them from paging queues. - */ - while ((p = object->memq.tqh_first) != NULL) { - VM_PAGE_CHECK(p); - vm_page_lock_queues(); - vm_page_free(p); - cnt.v_pfree++; - vm_page_unlock_queues(); - } - if ((object->flags & OBJ_INTERNAL) != 0) - vm_object_unlock(object); - - /* - * Let the pager know object is dead. - */ - if (object->pager != NULL) - vm_pager_deallocate(object->pager); - - simple_lock(&vm_object_list_lock); - TAILQ_REMOVE(&vm_object_list, object, object_list); - vm_object_count--; - simple_unlock(&vm_object_list_lock); - - /* - * Free the space for the object. - */ - free((caddr_t)object, M_VMOBJ); -} - -/* - * vm_object_page_clean - * - * Clean all dirty pages in the specified range of object. - * If syncio is TRUE, page cleaning is done synchronously. - * If de_queue is TRUE, pages are removed from any paging queue - * they were on, otherwise they are left on whatever queue they - * were on before the cleaning operation began. - * - * Odd semantics: if start == end, we clean everything. - * - * The object must be locked. - * - * Returns TRUE if all was well, FALSE if there was a pager error - * somewhere. We attempt to clean (and dequeue) all pages regardless - * of where an error occurs. - */ -boolean_t -vm_object_page_clean(object, start, end, syncio, de_queue) - register vm_object_t object; - register vm_offset_t start; - register vm_offset_t end; - boolean_t syncio; - boolean_t de_queue; -{ - register vm_page_t p; - int onqueue = 0; - boolean_t noerror = TRUE; - - if (object == NULL) - return (TRUE); - - /* - * If it is an internal object and there is no pager, attempt to - * allocate one. Note that vm_object_collapse may relocate one - * from a collapsed object so we must recheck afterward. - */ - if ((object->flags & OBJ_INTERNAL) && object->pager == NULL) { - vm_object_collapse(object); - if (object->pager == NULL) { - vm_pager_t pager; - - vm_object_unlock(object); - pager = vm_pager_allocate(PG_DFLT, (caddr_t)0, - object->size, VM_PROT_ALL, (vm_offset_t)0); - if (pager) - vm_object_setpager(object, pager, 0, FALSE); - vm_object_lock(object); - } - } - if (object->pager == NULL) - return (FALSE); - -again: - /* - * Wait until the pageout daemon is through with the object. - */ - vm_object_paging_wait(object,"vclean"); - - /* - * Loop through the object page list cleaning as necessary. - */ - for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { - if ((start == end || (p->offset >= start && p->offset < end)) && - !(p->flags & PG_FICTITIOUS)) { - if ((p->flags & PG_CLEAN) && - pmap_is_modified(VM_PAGE_TO_PHYS(p))) - p->flags &= ~PG_CLEAN; - /* - * Remove the page from any paging queue. - * This needs to be done if either we have been - * explicitly asked to do so or it is about to - * be cleaned (see comment below). - */ - if (de_queue || !(p->flags & PG_CLEAN)) { - vm_page_lock_queues(); - if (p->flags & PG_ACTIVE) { - TAILQ_REMOVE(&vm_page_queue_active, - p, pageq); - p->flags &= ~PG_ACTIVE; - cnt.v_active_count--; - onqueue = 1; - } else if (p->flags & PG_INACTIVE) { - TAILQ_REMOVE(&vm_page_queue_inactive, - p, pageq); - p->flags &= ~PG_INACTIVE; - cnt.v_inactive_count--; - onqueue = -1; - } else - onqueue = 0; - vm_page_unlock_queues(); - } - /* - * To ensure the state of the page doesn't change - * during the clean operation we do two things. - * First we set the busy bit and write-protect all - * mappings to ensure that write accesses to the - * page block (in vm_fault). Second, we remove - * the page from any paging queue to foil the - * pageout daemon (vm_pageout_scan). - */ - pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_READ); - if (!(p->flags & PG_CLEAN)) { - p->flags |= PG_BUSY; - vm_object_paging_begin(object); - vm_object_unlock(object); - /* - * XXX if put fails we mark the page as - * clean to avoid an infinite loop. - * Will loose changes to the page. - */ - if (vm_pager_put(object->pager, p, syncio)) { - printf("%s: pager_put error\n", - "vm_object_page_clean"); - p->flags |= PG_CLEAN; - noerror = FALSE; - } - vm_object_lock(object); - vm_object_paging_end(object); - if (!de_queue && onqueue) { - vm_page_lock_queues(); - if (onqueue > 0) - vm_page_activate(p); - else - vm_page_deactivate(p); - vm_page_unlock_queues(); - } - p->flags &= ~PG_BUSY; - PAGE_WAKEUP(p); - goto again; - } - } - } - return (noerror); -} - -/* - * vm_object_deactivate_pages - * - * Deactivate all pages in the specified object. (Keep its pages - * in memory even though it is no longer referenced.) - * - * The object must be locked. - */ -void -vm_object_deactivate_pages(object) - register vm_object_t object; -{ - register vm_page_t p, next; - - for (p = object->memq.tqh_first; p != NULL; p = next) { - next = p->listq.tqe_next; - vm_page_lock_queues(); - if (p->flags & PG_ACTIVE) - vm_page_deactivate(p); - vm_page_unlock_queues(); - } -} - -/* - * Trim the object cache to size. - */ -void -vm_object_cache_trim() -{ - register vm_object_t object; - - vm_object_cache_lock(); - while (vm_object_cached > vm_cache_max) { - object = vm_object_cached_list.tqh_first; - vm_object_cache_unlock(); - - if (object != vm_object_lookup(object->pager)) - panic("vm_object_cache_trim: I'm sooo confused."); - - pager_cache(object, FALSE); - - vm_object_cache_lock(); - } - vm_object_cache_unlock(); -} - -/* - * vm_object_pmap_copy: - * - * Makes all physical pages in the specified - * object range copy-on-write. No writeable - * references to these pages should remain. - * - * The object must *not* be locked. - */ -void -vm_object_pmap_copy(object, start, end) - register vm_object_t object; - register vm_offset_t start; - register vm_offset_t end; -{ - register vm_page_t p; - - if (object == NULL) - return; - - vm_object_lock(object); - for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { - if ((start <= p->offset) && (p->offset < end)) { - pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_READ); - p->flags |= PG_COPYONWRITE; - } - } - vm_object_unlock(object); -} - -/* - * vm_object_pmap_remove: - * - * Removes all physical pages in the specified - * object range from all physical maps. - * - * The object must *not* be locked. - */ -void -vm_object_pmap_remove(object, start, end) - register vm_object_t object; - register vm_offset_t start; - register vm_offset_t end; -{ - register vm_page_t p; - - if (object == NULL) - return; - - vm_object_lock(object); - for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) - if ((start <= p->offset) && (p->offset < end)) - pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE); - vm_object_unlock(object); -} - -/* - * vm_object_copy: - * - * Create a new object which is a copy of an existing - * object, and mark all of the pages in the existing - * object 'copy-on-write'. The new object has one reference. - * Returns the new object. - * - * May defer the copy until later if the object is not backed - * up by a non-default pager. - */ -void -vm_object_copy(src_object, src_offset, size, - dst_object, dst_offset, src_needs_copy) - register vm_object_t src_object; - vm_offset_t src_offset; - vm_size_t size; - vm_object_t *dst_object; /* OUT */ - vm_offset_t *dst_offset; /* OUT */ - boolean_t *src_needs_copy; /* OUT */ -{ - register vm_object_t new_copy; - register vm_object_t old_copy; - vm_offset_t new_start, new_end; - - register vm_page_t p; - - if (src_object == NULL) { - /* - * Nothing to copy - */ - *dst_object = NULL; - *dst_offset = 0; - *src_needs_copy = FALSE; - return; - } - - /* - * If the object's pager is null_pager or the - * default pager, we don't have to make a copy - * of it. Instead, we set the needs copy flag and - * make a shadow later. - */ - - vm_object_lock(src_object); - if (src_object->pager == NULL || - (src_object->flags & OBJ_INTERNAL)) { - - /* - * Make another reference to the object. - */ - src_object->ref_count++; - - /* - * Mark all of the pages copy-on-write. - */ - for (p = src_object->memq.tqh_first; p; p = p->listq.tqe_next) - if (src_offset <= p->offset && - p->offset < src_offset + size) - p->flags |= PG_COPYONWRITE; - vm_object_unlock(src_object); - - *dst_object = src_object; - *dst_offset = src_offset; - - /* - * Must make a shadow when write is desired - */ - *src_needs_copy = TRUE; - return; - } - - /* - * Try to collapse the object before copying it. - */ - vm_object_collapse(src_object); - - /* - * If the object has a pager, the pager wants to - * see all of the changes. We need a copy-object - * for the changed pages. - * - * If there is a copy-object, and it is empty, - * no changes have been made to the object since the - * copy-object was made. We can use the same copy- - * object. - */ - -Retry1: - old_copy = src_object->copy; - if (old_copy != NULL) { - /* - * Try to get the locks (out of order) - */ - if (!vm_object_lock_try(old_copy)) { - vm_object_unlock(src_object); - - /* XXX should spin a bit here... */ - vm_object_lock(src_object); - goto Retry1; - } - - if (old_copy->resident_page_count == 0 && - old_copy->pager == NULL) { - /* - * Return another reference to - * the existing copy-object. - */ - old_copy->ref_count++; - vm_object_unlock(old_copy); - vm_object_unlock(src_object); - *dst_object = old_copy; - *dst_offset = src_offset; - *src_needs_copy = FALSE; - return; - } - vm_object_unlock(old_copy); - } - vm_object_unlock(src_object); - - /* - * If the object has a pager, the pager wants - * to see all of the changes. We must make - * a copy-object and put the changed pages there. - * - * The copy-object is always made large enough to - * completely shadow the original object, since - * it may have several users who want to shadow - * the original object at different points. - */ - - new_copy = vm_object_allocate(src_object->size); - -Retry2: - vm_object_lock(src_object); - /* - * Copy object may have changed while we were unlocked - */ - old_copy = src_object->copy; - if (old_copy != NULL) { - /* - * Try to get the locks (out of order) - */ - if (!vm_object_lock_try(old_copy)) { - vm_object_unlock(src_object); - goto Retry2; - } - - /* - * Consistency check - */ - if (old_copy->shadow != src_object || - old_copy->shadow_offset != (vm_offset_t) 0) - panic("vm_object_copy: copy/shadow inconsistency"); - - /* - * Make the old copy-object shadow the new one. - * It will receive no more pages from the original - * object. Locking of new_copy not needed. We - * have the only pointer. - */ - vm_object_set_shadow(old_copy, new_copy); - vm_object_unlock(old_copy); - } - - /* Always shadow original at 0 for the whole object */ - new_start = (vm_offset_t)0; - new_end = (vm_offset_t)new_copy->size; - - /* - * Point the new copy at the existing object. - */ - - vm_object_set_shadow(new_copy, src_object); - new_copy->shadow_offset = new_start; - src_object->copy = new_copy; - - /* - * Mark all the affected pages of the existing object - * copy-on-write. - */ - for (p = src_object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) - if ((new_start <= p->offset) && (p->offset < new_end)) - p->flags |= PG_COPYONWRITE; - - vm_object_unlock(src_object); - - *dst_object = new_copy; - *dst_offset = src_offset - new_start; - *src_needs_copy = FALSE; -} - -/* - * vm_object_shadow: - * - * Create a new object which is backed by the - * specified existing object range. The source - * object reference is deallocated. - * - * The new object and offset into that object - * are returned in the source parameters. - * - * The old object should not be locked. - */ -void -vm_object_shadow(object, offset, length) - vm_object_t *object; /* IN/OUT */ - vm_offset_t *offset; /* IN/OUT */ - vm_size_t length; -{ - register vm_object_t source; - register vm_object_t result; - - source = *object; - -#ifdef DIAGNOSTIC - if (source == NULL) - panic("vm_object_shadow: attempt to shadow null object"); -#endif - - /* - * Allocate a new object with the given length - */ - if ((result = vm_object_allocate(length)) == NULL) - panic("vm_object_shadow: no object for shadowing"); - - /* - * The new object shadows the source object. Our caller changes his - * reference to point to the new object, removing a reference to the - * source object. - */ - vm_object_lock(source); - vm_object_set_shadow(result, source); - source->ref_count--; - vm_object_unlock(source); - - /* - * Store the offset into the source object, - * and fix up the offset into the new object. - */ - result->shadow_offset = *offset; - - /* - * Return the new things - */ - *offset = 0; - *object = result; -} - -/* - * Set the specified object's pager to the specified pager. - */ -void -vm_object_setpager(object, pager, paging_offset, read_only) - vm_object_t object; - vm_pager_t pager; - vm_offset_t paging_offset; - boolean_t read_only; -{ -#ifdef lint - read_only++; /* No longer used */ -#endif - - vm_object_lock(object); /* XXX ? */ - object->pager = pager; - object->paging_offset = paging_offset; - vm_object_unlock(object); /* XXX ? */ -} - -/* - * vm_object_hash hashes the pager/id pair. - */ - -#define vm_object_hash(pager) \ - (((unsigned long)pager)%VM_OBJECT_HASH_COUNT) - -/* - * vm_object_lookup looks in the object cache for an object with the - * specified pager and paging id. - */ -vm_object_t -vm_object_lookup(pager) - vm_pager_t pager; -{ - register vm_object_hash_entry_t entry; - vm_object_t object; - - vm_object_cache_lock(); - - for (entry = vm_object_hashtable[vm_object_hash(pager)].tqh_first; - entry != NULL; - entry = entry->hash_links.tqe_next) { - object = entry->object; - if (object->pager == pager) { - vm_object_lock(object); - if (object->ref_count == 0) { - TAILQ_REMOVE(&vm_object_cached_list, object, - cached_list); - vm_object_cached--; - } - object->ref_count++; - vm_object_unlock(object); - vm_object_cache_unlock(); - return(object); - } - } - - vm_object_cache_unlock(); - return(NULL); -} - -/* - * vm_object_enter enters the specified object/pager/id into - * the hash table. - */ - -void -vm_object_enter(object, pager) - vm_object_t object; - vm_pager_t pager; -{ - struct vm_object_hash_head *bucket; - register vm_object_hash_entry_t entry; - - /* - * We don't cache null objects, and we can't cache - * objects with the null pager. - */ - - if (object == NULL) - return; - if (pager == NULL) - return; - - bucket = &vm_object_hashtable[vm_object_hash(pager)]; - entry = (vm_object_hash_entry_t) - malloc((u_long)sizeof *entry, M_VMOBJHASH, M_WAITOK); - entry->object = object; - object->flags |= OBJ_CANPERSIST; - - vm_object_cache_lock(); - TAILQ_INSERT_TAIL(bucket, entry, hash_links); - vm_object_cache_unlock(); -} - -/* - * vm_object_remove: - * - * Remove the pager from the hash table. - * Note: This assumes that the object cache - * is locked. XXX this should be fixed - * by reorganizing vm_object_deallocate. - */ -void -vm_object_remove(pager) - register vm_pager_t pager; -{ - struct vm_object_hash_head *bucket; - register vm_object_hash_entry_t entry; - register vm_object_t object; - - bucket = &vm_object_hashtable[vm_object_hash(pager)]; - - for (entry = bucket->tqh_first; - entry != NULL; - entry = entry->hash_links.tqe_next) { - object = entry->object; - if (object->pager == pager) { - TAILQ_REMOVE(bucket, entry, hash_links); - free((caddr_t)entry, M_VMOBJHASH); - break; - } - } -} - -/* - * vm_object_cache_clear removes all objects from the cache. - */ -void -vm_object_cache_clear() -{ - register vm_object_t object; - - /* - * Remove each object in the cache by scanning down the - * list of cached objects. - */ - vm_object_cache_lock(); - while ((object = vm_object_cached_list.tqh_first) != NULL) { - vm_object_cache_unlock(); - - /* - * Note: it is important that we use vm_object_lookup - * to gain a reference, and not vm_object_reference, because - * the logic for removing an object from the cache lies in - * lookup. - */ - if (object != vm_object_lookup(object->pager)) - panic("vm_object_cache_clear: I'm sooo confused."); - pager_cache(object, FALSE); - - vm_object_cache_lock(); - } - vm_object_cache_unlock(); -} - -/* - * vm_object_remove_from_pager: - * - * Tell object's pager that it needn't back the page - * anymore. If the pager ends up empty, deallocate it. - */ -int -vm_object_remove_from_pager(object, from, to) - vm_object_t object; - vm_offset_t from, to; -{ - vm_pager_t pager = object->pager; - int cnt = 0; - - if (pager == NULL) - return 0; - - cnt = vm_pager_remove(pager, from, to); - - /* If pager became empty, remove it. */ - if (cnt > 0 && vm_pager_count(pager) == 0) { - vm_pager_deallocate(pager); - object->pager = NULL; - } - return(cnt); -} - -#define FREE_PAGE(m) do { \ - PAGE_WAKEUP(m); \ - vm_page_lock_queues(); \ - vm_page_free(m); \ - vm_page_unlock_queues(); \ -} while(0) - -/* - * vm_object_overlay: - * - * Internal function to vm_object_collapse called when - * it has been shown that a collapse operation is likely - * to succeed. We know that the backing object is only - * referenced by me and that paging is not in progress. - */ -int -vm_object_overlay(object) - vm_object_t object; -{ - vm_object_t backing_object = object->shadow; - vm_offset_t backing_offset = object->shadow_offset; - vm_size_t size = object->size; - vm_offset_t offset, paged_offset; - vm_page_t backing_page, page = NULL; - int rv; - -#ifdef DEBUG - if (vmdebug & VMDEBUG_COLLAPSE) - printf("vm_object_overlay(0x%p)\n", object); -#endif - - /* - * Protect against multiple collapses. - */ - backing_object->flags |= OBJ_FADING; - - /* - * The algorithm used is roughly like this: - * (1) Trim a potential pager in the backing object so it'll only hold - * pages in reach. - * (2) Loop over all the resident pages in the shadow object and - * either remove them if they are shadowed or move them into the - * shadowing object. - * (3) Loop over the paged out pages in the shadow object. Start - * pageins on those that aren't shadowed, and just deallocate - * the others. In each iteration check if other users of these - * objects have caused pageins resulting in new resident pages. - * This can happen while we are waiting for a page or a pagein of - * ours. If such resident pages turn up, restart from (2). - */ - - /* - * As a first measure we know we can discard everything that the - * shadowing object doesn't shadow. - */ - if (backing_object->pager != NULL) { - if (backing_offset > 0) - vm_object_remove_from_pager(backing_object, 0, - backing_offset); - if (backing_offset + size < backing_object->size) - vm_object_remove_from_pager(backing_object, - backing_offset + size, backing_object->size); - } - - /* - * At this point, there may still be asynchronous paging in the parent - * object. Any pages being paged in will be represented by fake pages. - * There are three cases: - * 1) The page is being paged in from the parent object's own pager. - * In this case, we just delete our copy, since it's not needed. - * 2) The page is being paged in from the backing object. We prevent - * this case by waiting for paging to complete on the backing object - * before continuing. - * 3) The page is being paged in from a backing object behind the one - * we're deleting. We'll never notice this case, because the - * backing object we're deleting won't have the page. - */ - - vm_object_unlock(object); -retry: - vm_object_paging_wait(backing_object,"vpagew"); - - /* - * While we were asleep, the parent object might have been deleted. If - * so, the backing object will now have only one reference (the one we - * hold). If this happened, just deallocate the backing object and - * return failure status so vm_object_collapse() will stop. This will - * continue vm_object_deallocate() where it stopped due to our - * reference. - */ - if (backing_object->ref_count == 1) - goto fail; - vm_object_lock(object); - - /* - * Next, get rid of resident pages in the backing object. We can - * guarantee to remove every page thus we can write the while-test like - * this. - */ - while ((backing_page = backing_object->memq.tqh_first) != NULL) { - offset = backing_page->offset - backing_offset; - -#ifdef DIAGNOSTIC - if (backing_page->flags & (PG_BUSY | PG_FAKE)) - panic("vm_object_overlay: " - "busy or fake page in backing_object"); -#endif - - /* - * If the page is outside the shadowing object's range or if - * the page is shadowed (either by a resident page or a paged - * out one) we can discard it right away. Otherwise we need to - * move the page to the shadowing object. - */ - if (backing_page->offset < backing_offset || offset >= size || - ((page = vm_page_lookup(object, offset)) != NULL) || - (object->pager != NULL && - vm_pager_has_page(object->pager, offset))) { - /* - * Just discard the page, noone needs it. This - * includes removing the possible backing store too. - */ - if (backing_object->pager != NULL) - vm_object_remove_from_pager(backing_object, - backing_page->offset, - backing_page->offset + PAGE_SIZE); - vm_page_lock_queues(); - vm_page_free(backing_page); - vm_page_unlock_queues(); - } else { - /* - * If the backing page was ever paged out, it was due - * to it being dirty at one point. Unless we have no - * pager allocated to the front object (thus will move - * forward the shadow's one), mark it dirty again so it - * won't be thrown away without being paged out to the - * front pager. - * - * XXX - * Should be able to move a page from one pager to - * another. - */ - if (object->pager != NULL && - vm_object_remove_from_pager(backing_object, - backing_page->offset, - backing_page->offset + PAGE_SIZE)) - backing_page->flags &= ~PG_CLEAN; - - /* Move the page up front. */ - vm_page_rename(backing_page, object, offset); - } - } - - /* - * If the shadowing object doesn't have a pager the easiest - * thing to do now is to just move the backing pager up front - * and everything is done. - */ - if (object->pager == NULL && backing_object->pager != NULL) { - object->pager = backing_object->pager; - object->paging_offset = backing_object->paging_offset + - backing_offset; - backing_object->pager = NULL; - goto done; - } - - /* - * What's left to do is to find all paged out pages in the - * backing pager and either discard or move it to the front - * object. We need to recheck the resident page set as a - * pagein might have given other threads the chance to, via - * readfaults, page in another page into the resident set. In - * this case we need to retry getting rid of pages from core. - */ - paged_offset = 0; - while (backing_object->pager != NULL && - (paged_offset = vm_pager_next(backing_object->pager, - paged_offset)) < backing_object->size) { - offset = paged_offset - backing_offset; - - /* - * If the parent object already has this page, delete it. - * Otherwise, start a pagein. - */ - if (((page = vm_page_lookup(object, offset)) == NULL) && - (object->pager == NULL || - !vm_pager_has_page(object->pager, offset))) { - vm_object_unlock(object); - - /* - * First allocate a page and mark it busy so another - * thread won't try to start another pagein. - */ - backing_page = vm_page_alloc(backing_object, - paged_offset); - if (backing_page == NULL) { - vm_object_unlock(backing_object); - vm_wait("fVmcollapse"); - vm_object_lock(backing_object); - goto retry; - } - backing_page->flags |= PG_BUSY; - -#ifdef DEBUG - if (vmdebug & VMDEBUG_COLLAPSE_PAGEIN) - printf("vm_object_overlay: pagein needed\n"); -#endif - - /* - * Second, start paging it in. If this fails, - * what can we do but punt? - */ - vm_object_paging_begin(backing_object); - vm_object_unlock(backing_object); - cnt.v_pageins++; - rv = vm_pager_get_pages(backing_object->pager, - &backing_page, 1, TRUE); - vm_object_lock(backing_object); - vm_object_paging_end(backing_object); - - /* - * IO error or page outside the range of the pager: - * cleanup and return an error. - */ - if (rv == VM_PAGER_ERROR || rv == VM_PAGER_BAD) { - FREE_PAGE(backing_page); - goto fail; - } - - /* Handle the remaining failures. */ - if (rv != VM_PAGER_OK) { -#ifdef DIAGNOSTIC - panic("vm_object_overlay: pager returned %d", - rv); -#else - FREE_PAGE(backing_page); - goto fail; -#endif - } - cnt.v_pgpgin++; - - /* - * Third, relookup in case pager changed page. Pager - * is responsible for disposition of old page if moved. - */ - backing_page = vm_page_lookup(backing_object, - paged_offset); - - /* - * This page was once dirty, otherwise it - * hadn't been paged out in this shadow object. - * As we now remove the persistant store of the - * page, make sure it will be paged out in the - * front pager by dirtying it. - */ - backing_page->flags &= ~(PG_FAKE | PG_CLEAN); - - /* - * Fourth, restart the process as we have slept, - * thereby letting other threads change object's - * internal structure. Don't be tempted to move it up - * front here, the parent may be gone already. - */ - PAGE_WAKEUP(backing_page); - goto retry; - } - vm_object_remove_from_pager(backing_object, paged_offset, - paged_offset + PAGE_SIZE); - paged_offset += PAGE_SIZE; - } - -done: - /* - * I've seen this condition once in an out of VM situation. For the - * moment I don't know why it occurred, although I suspect - * vm_object_page_clean can create a pager even if it won't use it. - */ - if (backing_object->pager != NULL && - vm_pager_count(backing_object->pager) == 0) { - vm_pager_deallocate(backing_object->pager); - backing_object->pager = NULL; - } - -#ifdef DIAGNOSTIC - if (backing_object->pager) - panic("vm_object_overlay: backing_object->pager remains"); -#endif - - /* - * Object now shadows whatever backing_object did. - */ - if (backing_object->shadow) - vm_object_lock(backing_object->shadow); - vm_object_set_shadow(object, backing_object->shadow); - if (backing_object->shadow) - vm_object_unlock(backing_object->shadow); - object->shadow_offset += backing_object->shadow_offset; - if (object->shadow != NULL && object->shadow->copy != NULL) - panic("vm_object_overlay: we collapsed a copy-object!"); - -#ifdef DIAGNOSTIC - if (backing_object->ref_count != 1) - panic("vm_object_overlay: backing_object still referenced"); -#endif - - object_collapses++; - return KERN_SUCCESS; - -fail: - backing_object->flags &= ~OBJ_FADING; - return KERN_FAILURE; -} - -/* - * vm_object_bypass: - * - * Internal function to vm_object_collapse called when collapsing - * the object with its backing one is not allowed but there may - * be an opportunity to bypass the backing object and shadow the - * next object in the chain instead. - * - * If all of the pages in the backing object are shadowed by the parent - * object, the parent object no longer has to shadow the backing - * object; it can shadow the next one in the chain. - */ -int -vm_object_bypass(object) - vm_object_t object; -{ - vm_object_t backing_object = object->shadow; - vm_offset_t backing_offset = object->shadow_offset; - vm_offset_t offset, new_offset; - vm_page_t p, pp; - - /* - * XXX Punt if paging is going on. The issues in this case need to be - * looked into more closely. For now play it safe and return. There's - * no need to wait for it to end, as the expense will be much higher - * than the gain. - */ - if (vm_object_paging(backing_object)) - return KERN_FAILURE; - - /* - * Should have a check for a 'small' number of pages here. - */ - for (p = backing_object->memq.tqh_first; p != NULL; - p = p->listq.tqe_next) { - new_offset = p->offset - backing_offset; - - /* - * If the parent has a page here, or if this page falls outside - * the parent, keep going. - * - * Otherwise, the backing_object must be left in the chain. - */ - if (p->offset >= backing_offset && new_offset < object->size && - ((pp = vm_page_lookup(object, new_offset)) == NULL || - (pp->flags & PG_FAKE)) && - (object->pager == NULL || - !vm_pager_has_page(object->pager, new_offset))) - /* - * Page still needed. Can't go any further. - */ - return KERN_FAILURE; - } - - if (backing_object->pager) { - /* - * Should have a check for a 'small' number of pages here. - */ - for (offset = vm_pager_next(backing_object->pager, 0); - offset < backing_object->size; - offset = vm_pager_next(backing_object->pager, - offset + PAGE_SIZE)) { - new_offset = offset - backing_offset; - - /* - * If the parent has a page here, or if this page falls - * outside the parent, keep going. - * - * Otherwise, the backing_object must be left in the - * chain. - */ - if (offset >= backing_offset && - new_offset < object->size && - ((pp = vm_page_lookup(object, new_offset)) == - NULL || (pp->flags & PG_FAKE)) && - (object->pager == NULL || - !vm_pager_has_page(object->pager, new_offset))) - /* - * Page still needed. Can't go any further. - */ - return KERN_FAILURE; - } - } - - /* - * Object now shadows whatever backing_object did. - */ - if (backing_object->shadow) - vm_object_lock(backing_object->shadow); - vm_object_set_shadow(object, backing_object->shadow); - if (backing_object->shadow) - vm_object_unlock(backing_object->shadow); - object->shadow_offset += backing_object->shadow_offset; - - /* - * Backing object might have had a copy pointer to us. If it did, - * clear it. - */ - if (backing_object->copy == object) - backing_object->copy = NULL; - - object_bypasses++; - return KERN_SUCCESS; -} - -/* - * vm_object_collapse: - * - * Collapse an object with the object backing it. Pages in the backing object - * are moved into the parent, and the backing object is deallocated. - * - * Requires that the object be locked and the page queues be unlocked. - */ -void -vm_object_collapse(object) - vm_object_t object; - -{ - vm_object_collapse_internal(object, NULL); -} - -/* - * An internal to vm_object.c entry point to the collapsing logic, used by - * vm_object_deallocate to get rid of a potential recursion case. In that case - * an object to be deallocated is fed back via the retry_object pointer. - * External users will have that parameter wired to NULL, and then we are - * allowed to do vm_object_deallocate calls that may mutually recursive call us - * again. In that case it will only get one level deep and thus not be a real - * recursion. - */ -void -vm_object_collapse_internal(object, retry_object) - vm_object_t object, *retry_object; -{ - register vm_object_t backing_object; - int rv; - - /* We'd better initialize this one if the pointer is given. */ - if (retry_object) - *retry_object = NULL; - - if (!vm_object_collapse_allowed || object == NULL) - return; - - do { - /* - * Verify that the conditions are right for collapse: - * - * There is a backing object, and - */ - if ((backing_object = object->shadow) == NULL) - return; - - vm_object_lock(backing_object); - - /* - * ... the backing object is not read_only, is internal and is - * not already being collapsed, ... - */ - if ((backing_object->flags & (OBJ_INTERNAL | OBJ_FADING)) != - OBJ_INTERNAL) { - vm_object_unlock(backing_object); - return; - } - - /* - * The backing object can't be a copy-object: the shadow_offset - * for the copy-object must stay as 0. Furthermore (for the - * we have all the pages' case), if we bypass backing_object - * and just shadow the next object in the chain, old pages from - * that object would then have to be copied BOTH into the - *(former) backing_object and into the parent object. - */ - if (backing_object->shadow != NULL && - backing_object->shadow->copy != NULL) { - vm_object_unlock(backing_object); - return; - } - - /* - * Grab a reference to the backing object so that it - * can't be deallocated behind our back. - */ - backing_object->ref_count++; - -#ifdef DIAGNOSTIC - if (backing_object->ref_count == 1) - panic("vm_object_collapse: " - "collapsing unreferenced object"); -#endif - - /* - * If there is exactly one reference to the backing object, we - * can collapse it into the parent, otherwise we might be able - * to bypass it completely. - */ - rv = backing_object->ref_count == 2 ? - vm_object_overlay(object) : vm_object_bypass(object); - - /* - * Unlock and note we're ready with the backing object. If - * we are now the last referrer this will also deallocate the - * object itself. If the backing object has been orphaned - * and still have a shadow (it is possible in case of - * KERN_FAILURE from vm_object_overlay) this might lead to a - * recursion. However, if we are called from - * vm_object_deallocate, retry_object is not NULL and we are - * allowed to feedback the current backing object via that - * pointer. That way the recursion case turns into an - * iteration in vm_object_deallcate instead. - */ - if (retry_object != NULL && backing_object->ref_count == 1 && - backing_object->shadow != NULL) { - *retry_object = backing_object; - vm_object_unlock(backing_object); - return; - } - vm_object_unlock(backing_object); - vm_object_deallocate(backing_object); - - /* - * Try again with this object's new backing object. - */ - } while (rv == KERN_SUCCESS); -} - -/* - * vm_object_page_remove: [internal] - * - * Removes all physical pages in the specified - * object range from the object's list of pages. - * - * The object must be locked. - */ -void -vm_object_page_remove(object, start, end) - register vm_object_t object; - register vm_offset_t start; - register vm_offset_t end; -{ - register vm_page_t p, next; - - if (object == NULL) - return; - - for (p = object->memq.tqh_first; p != NULL; p = next) { - next = p->listq.tqe_next; - if ((start <= p->offset) && (p->offset < end)) { - pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE); - vm_page_lock_queues(); - vm_page_free(p); - vm_page_unlock_queues(); - } - } -} - -/* - * Routine: vm_object_coalesce - * Function: Coalesces two objects backing up adjoining - * regions of memory into a single object. - * - * returns TRUE if objects were combined. - * - * NOTE: Only works at the moment if the second object is NULL - - * if it's not, which object do we lock first? - * - * Parameters: - * prev_object First object to coalesce - * prev_offset Offset into prev_object - * next_object Second object into coalesce - * next_offset Offset into next_object - * - * prev_size Size of reference to prev_object - * next_size Size of reference to next_object - * - * Conditions: - * The object must *not* be locked. - */ -boolean_t -vm_object_coalesce(prev_object, next_object, prev_offset, next_offset, - prev_size, next_size) - register vm_object_t prev_object; - vm_object_t next_object; - vm_offset_t prev_offset, next_offset; - vm_size_t prev_size, next_size; -{ - vm_size_t newsize; - -#ifdef lint - next_offset++; -#endif - - if (next_object != NULL) { - return(FALSE); - } - - if (prev_object == NULL) { - return(TRUE); - } - - vm_object_lock(prev_object); - - /* - * Try to collapse the object first - */ - vm_object_collapse(prev_object); - - /* - * Can't coalesce if: - * . more than one reference - * . paged out - * . shadows another object - * . has a copy elsewhere - * (any of which mean that the pages not mapped to - * prev_entry may be in use anyway) - */ - - if (prev_object->ref_count > 1 || prev_object->pager != NULL || - prev_object->shadow != NULL || prev_object->copy != NULL) { - vm_object_unlock(prev_object); - return(FALSE); - } - - /* - * Remove any pages that may still be in the object from - * a previous deallocation. - */ - vm_object_page_remove(prev_object, prev_offset + prev_size, - prev_offset + prev_size + next_size); - - /* - * Extend the object if necessary. - */ - newsize = prev_offset + prev_size + next_size; - if (newsize > prev_object->size) - prev_object->size = newsize; - - vm_object_unlock(prev_object); - return(TRUE); -} - -/* - * vm_object_print: [ debug ] - */ -void -vm_object_print(object, full) - vm_object_t object; - boolean_t full; -{ - _vm_object_print(object, full, printf); -} - -void -_vm_object_print(object, full, pr) - vm_object_t object; - boolean_t full; - int (*pr) __P((const char *, ...)); -{ - register vm_page_t p; - char *delim; - vm_object_t o; - register int count; - extern int indent; - - if (object == NULL) - return; - - iprintf(pr, "Object 0x%p: size=0x%lx, res=%d, ref=%d, ", object, - (long)object->size, object->resident_page_count, - object->ref_count); - (*pr)("pager=%p+0x%lx, shadow=(%p)+0x%lx\n", object->pager, - (long)object->paging_offset, object->shadow, - (long)object->shadow_offset); - (*pr)("shadowers=("); - delim = ""; - for (o = object->shadowers.lh_first; o; - o = o->shadowers_list.le_next) { - (*pr)("%s0x%p", delim, o); - delim = ", "; - }; - (*pr)(")\n"); - (*pr)("cache: next=0x%p, prev=0x%p\n", object->cached_list.tqe_next, - object->cached_list.tqe_prev); - - if (!full) - return; - - indent += 2; - count = 0; - for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { - if (count == 0) - iprintf(pr, "memory:="); - else if (count == 6) { - (*pr)("\n"); - iprintf(pr, " ..."); - count = 0; - } else - (*pr)(","); - count++; - - (*pr)("(off=0x%lx,page=0x%lx)", (long)p->offset, - (long)VM_PAGE_TO_PHYS(p)); - } - if (count != 0) - (*pr)("\n"); - indent -= 2; -} - -/* - * vm_object_set_shadow: - * - * Maintain the shadow graph so that back-link consistency is always kept. - * - * Assumes both objects as well as the old shadow to be locked (unless NULL - * of course). - */ -void -vm_object_set_shadow(object, shadow) - vm_object_t object, shadow; -{ - vm_object_t old_shadow = object->shadow; - -#ifdef DEBUG - if (vmdebug & VMDEBUG_SHADOW) - printf("vm_object_set_shadow(object=0x%p, shadow=0x%p) " - "old_shadow=0x%p\n", object, shadow, old_shadow); - if (vmdebug & VMDEBUG_SHADOW_VERBOSE) { - vm_object_print(object, 0); - vm_object_print(old_shadow, 0); - vm_object_print(shadow, 0); - } -#endif - if (old_shadow == shadow) - return; - if (old_shadow) { - old_shadow->ref_count--; - LIST_REMOVE(object, shadowers_list); - } - if (shadow) { - shadow->ref_count++; - LIST_INSERT_HEAD(&shadow->shadowers, object, shadowers_list); - } - object->shadow = shadow; -#ifdef DEBUG - if (vmdebug & VMDEBUG_SHADOW_VERBOSE) { - vm_object_print(object, 0); - vm_object_print(old_shadow, 0); - vm_object_print(shadow, 0); - } -#endif -} diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c deleted file mode 100644 index 6f38260b6e1..00000000000 --- a/sys/vm/vm_page.c +++ /dev/null @@ -1,1881 +0,0 @@ -/* $OpenBSD: vm_page.c,v 1.18 2000/05/27 18:31:35 art Exp $ */ -/* $NetBSD: vm_page.c,v 1.41 1998/02/08 18:24:52 thorpej Exp $ */ - -#define VM_PAGE_ALLOC_MEMORY_STATS - -/*- - * Copyright (c) 1997 The NetBSD Foundation, Inc. - * All rights reserved. - * - * This code is derived from software contributed to The NetBSD Foundation - * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, - * NASA Ames Research Center. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. - * 4. Neither the name of The NetBSD Foundation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * The Mach Operating System project at Carnegie-Mellon University. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)vm_page.c 8.3 (Berkeley) 3/21/94 - * - * - * Copyright (c) 1987, 1990 Carnegie-Mellon University. - * All rights reserved. - * - * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * Resident memory management module. - */ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/proc.h> -#include <sys/malloc.h> - -#include <vm/vm.h> -#include <vm/vm_page.h> -#include <vm/vm_kern.h> -#include <vm/vm_map.h> -#include <vm/vm_pageout.h> - -#include <machine/cpu.h> - -#define VERY_LOW_MEM() (cnt.v_free_count <= vm_page_free_reserved) -#define KERN_OBJ(object) ((object) == kernel_object || (object) == kmem_object) - -int vm_page_free_reserved = 10; - -#if defined(MACHINE_NEW_NONCONTIG) - -/* - * physical memory config is stored in vm_physmem. - */ - -struct vm_physseg vm_physmem[VM_PHYSSEG_MAX]; -int vm_nphysseg = 0; -static int vm_page_lost_count = 0; /* XXXCDC: DEBUG DEBUG */ - -#endif - -#if defined(MACHINE_NONCONTIG) || defined(MACHINE_NEW_NONCONTIG) -/* - * These variables record the values returned by vm_page_bootstrap, - * for debugging purposes. - * - * The implementation of vm_bootstrap_steal_memory here also uses - * them internally. - */ -static vm_offset_t virtual_space_start; -static vm_offset_t virtual_space_end; - -vm_offset_t vm_bootstrap_steal_memory __P((vm_size_t)); -#endif - -/* - * Associated with page of user-allocatable memory is a - * page structure. - */ - -struct pglist *vm_page_buckets; /* Array of buckets */ -int vm_page_bucket_count = 0; /* How big is array? */ -int vm_page_hash_mask; /* Mask for hash function */ -simple_lock_data_t bucket_lock; /* lock for all buckets XXX */ -#if defined(MACHINE_NEW_NONCONTIG) -struct pglist vm_page_bootbucket; /* bootstrap bucket */ -#endif - -struct pglist vm_page_queue_free; -struct pglist vm_page_queue_active; -struct pglist vm_page_queue_inactive; -simple_lock_data_t vm_page_queue_lock; -simple_lock_data_t vm_page_queue_free_lock; - -/* has physical page allocation been initialized? */ -boolean_t vm_page_startup_initialized; - -vm_page_t vm_page_array; -#if defined(MACHINE_NEW_NONCONTIG) - /* NOTHING NEEDED HERE */ -#elif defined(MACHINE_NONCONTIG) -/* OLD NONCONTIG CODE: NUKE NUKE NUKE ONCE CONVERTED */ -u_long first_page; -int vm_page_count; -#else -/* OLD NCONTIG CODE: NUKE NUKE NUKE ONCE CONVERTED */ -long first_page; -long last_page; -vm_offset_t first_phys_addr; -vm_offset_t last_phys_addr; -int vm_page_count; -#endif -vm_size_t page_mask; -int page_shift; - -#if defined(MACHINE_NEW_NONCONTIG) -/* - * local prototypes - */ - -#if !defined(PMAP_STEAL_MEMORY) -static boolean_t vm_page_physget __P((vm_offset_t *)); -#endif -#endif - -/* - * macros - */ - -/* - * vm_page_hash: - * - * Distributes the object/offset key pair among hash buckets. - * - * NOTE: This macro depends on vm_page_bucket_count being a power of 2. - */ -#define vm_page_hash(object, offset) \ - (((unsigned long)object+(unsigned long)atop(offset))&vm_page_hash_mask) - -/* - * vm_set_page_size: - * - * Sets the page size, perhaps based upon the memory - * size. Must be called before any use of page-size - * dependent functions. - * - * Sets page_shift and page_mask from cnt.v_page_size. - */ -void -vm_set_page_size() -{ - - if (cnt.v_page_size == 0) - cnt.v_page_size = DEFAULT_PAGE_SIZE; - page_mask = cnt.v_page_size - 1; - if ((page_mask & cnt.v_page_size) != 0) - panic("vm_set_page_size: page size not a power of two"); - for (page_shift = 0; ; page_shift++) - if ((1 << page_shift) == cnt.v_page_size) - break; -} - -#if defined(MACHINE_NEW_NONCONTIG) -/* - * vm_page_bootstrap: initialize the resident memory module (called - * from vm_mem_init()). - * - * - startp and endp are out params which return the boundaries of the - * free part of the kernel's virtual address space. - */ -void -vm_page_bootstrap(startp, endp) - vm_offset_t *startp, *endp; /* OUT, OUT */ -{ - vm_offset_t paddr; - vm_page_t pagearray; - int lcv, freepages, pagecount, n, i; - - /* - * first init all the locks and queues. - */ - simple_lock_init(&vm_page_queue_free_lock); - simple_lock_init(&vm_page_queue_lock); - TAILQ_INIT(&vm_page_queue_free); - TAILQ_INIT(&vm_page_queue_active); - TAILQ_INIT(&vm_page_queue_inactive); - - /* - * init the <OBJ,OFFSET> => <PAGE> hash table buckets. for now - * we just have one bucket (the bootstrap bucket). later on we - * will malloc() new buckets as we dynamically resize the hash table. - */ - vm_page_bucket_count = 1; - vm_page_hash_mask = 0; - vm_page_buckets = &vm_page_bootbucket; - TAILQ_INIT(vm_page_buckets); - simple_lock_init(&bucket_lock); - - /* - * before calling this function the MD code is expected to register - * some free RAM with the vm_page_physload() function. our job - * now is to allocate vm_page structures for this preloaded memory. - */ - if (vm_nphysseg == 0) - panic("vm_page_bootstrap: no memory pre-allocated"); - - /* - * first calculate the number of free pages... note that start/end - * are inclusive so you have to add one to get the number of pages. - * - * note that we use start/end rather than avail_start/avail_end. - * this allows us to allocate extra vm_page structures in case we - * want to return some memory to the pool after booting. - */ - freepages = 0; - for (lcv = 0; lcv < vm_nphysseg; lcv++) - freepages += (vm_physmem[lcv].end - vm_physmem[lcv].start); - - /* - * we now know we have (PAGE_SIZE * freepages) bytes of memory we can - * use. for each page of memory we use we need a vm_page structure. - * thus, the total number of pages we can use is the total size of - * the memory divided by the PAGE_SIZE plus the size of the vm_page - * structure. we add one to freepages as a fudge factor to avoid - * truncation errors (since we can only allocate in terms of whole - * pages). - */ - pagecount = (PAGE_SIZE * (freepages + 1)) / - (PAGE_SIZE + sizeof(struct vm_page)); - pagearray = (vm_page_t) - vm_bootstrap_steal_memory(pagecount * sizeof(struct vm_page)); - bzero(pagearray, pagecount * sizeof(struct vm_page)); - - /* - * now init the page frames - */ - for (lcv = 0; lcv < vm_nphysseg; lcv++) { - - n = vm_physmem[lcv].end - vm_physmem[lcv].start; - if (n > pagecount) { - printf("vm_page_bootstrap: lost %d page(s) in init\n", - n - pagecount); - vm_page_lost_count += (n - pagecount); - n = pagecount; - } - - /* set up page array pointers */ - vm_physmem[lcv].pgs = pagearray; - pagearray += n; - pagecount -= n; - vm_physmem[lcv].lastpg = vm_physmem[lcv].pgs + (n - 1); - - /* init and free vm_pages (we've already bzero'd them) */ - paddr = ptoa(vm_physmem[lcv].start); - for (i = 0; i < n; i++, paddr += PAGE_SIZE) { - vm_physmem[lcv].pgs[i].phys_addr = paddr; - if (atop(paddr) >= vm_physmem[lcv].avail_start && - atop(paddr) <= vm_physmem[lcv].avail_end) - vm_page_free(&vm_physmem[lcv].pgs[i]); - } - } - - /* - * pass up the values of virtual_space_start and virtual_space_end - * (obtained by vm_bootstrap_steal_memory) to the upper layers of - * the VM. - */ - *startp = round_page(virtual_space_start); - *endp = trunc_page(virtual_space_end); - - /* - * init pagedaemon lock - */ - simple_lock_init(&vm_pages_needed_lock); -} - -/* - * vm_bootstrap_steal_memory: steal memory from physmem for bootstrapping - */ -vm_offset_t -vm_bootstrap_steal_memory(size) - vm_size_t size; -{ -#if defined(PMAP_STEAL_MEMORY) - vm_offset_t addr; - - /* - * Defer this to machine-dependent code; we may need to allocate - * from a direct-mapped segment. - */ - addr = pmap_steal_memory(size, &virtual_space_start, - &virtual_space_end); - - /* round it the way we like it */ - virtual_space_start = round_page(virtual_space_start); - virtual_space_end = trunc_page(virtual_space_end); - - return (addr); -#else /* ! PMAP_STEAL_MEMORY */ - vm_offset_t addr, vaddr, paddr; - - /* round to page size */ - size = round_page(size); - - /* - * on first call to this function init ourselves. we detect this - * by checking virtual_space_start/end which are in the zero'd BSS - * area. - */ - if (virtual_space_start == virtual_space_end) { - pmap_virtual_space(&virtual_space_start, &virtual_space_end); - - /* round it the way we like it */ - virtual_space_start = round_page(virtual_space_start); - virtual_space_end = trunc_page(virtual_space_end); - } - - /* - * allocate virtual memory for this request - */ - addr = virtual_space_start; - virtual_space_start += size; - - /* - * allocate and mapin physical pages to back new virtual pages - */ - for (vaddr = round_page(addr); vaddr < addr + size; - vaddr += PAGE_SIZE) { - if (!vm_page_physget(&paddr)) - panic("vm_bootstrap_steal_memory: out of memory"); - - /* XXX: should be wired, but some pmaps don't like that ... */ - pmap_enter(pmap_kernel(), vaddr, paddr, - VM_PROT_READ|VM_PROT_WRITE, FALSE, 0); - } - return(addr); -#endif /* PMAP_STEAL_MEMORY */ -} - -#if !defined(PMAP_STEAL_MEMORY) -/* - * vm_page_physget: "steal" one page from the vm_physmem structure. - * - * - attempt to allocate it off the end of a segment in which the "avail" - * values match the start/end values. if we can't do that, then we - * will advance both values (making them equal, and removing some - * vm_page structures from the non-avail area). - * - return false if out of memory. - */ -static boolean_t -vm_page_physget(paddrp) - vm_offset_t *paddrp; - -{ - int lcv, x; - - /* pass 1: try allocating from a matching end */ -#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) - for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--) -#else - for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) -#endif - { - if (vm_physmem[lcv].pgs) - panic("vm_page_physget: called _after_ bootstrap"); - - /* try from front */ - if (vm_physmem[lcv].avail_start == vm_physmem[lcv].start && - vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) { - *paddrp = ptoa(vm_physmem[lcv].avail_start); - vm_physmem[lcv].avail_start++; - vm_physmem[lcv].start++; - - /* nothing left? nuke it */ - if (vm_physmem[lcv].avail_start == - vm_physmem[lcv].end) { - if (vm_nphysseg == 1) - panic("vm_page_physget: out of memory!"); - vm_nphysseg--; - for (x = lcv; x < vm_nphysseg; x++) - /* structure copy */ - vm_physmem[x] = vm_physmem[x+1]; - } - return(TRUE); - } - - /* try from rear */ - if (vm_physmem[lcv].avail_end == vm_physmem[lcv].end && - vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) { - *paddrp = ptoa(vm_physmem[lcv].avail_end - 1); - vm_physmem[lcv].avail_end--; - vm_physmem[lcv].end--; - - /* nothing left? nuke it */ - if (vm_physmem[lcv].avail_end == - vm_physmem[lcv].start) { - if (vm_nphysseg == 1) - panic("vm_page_physget: out of memory!"); - vm_nphysseg--; - for (x = lcv; x < vm_nphysseg; x++) - /* structure copy */ - vm_physmem[x] = vm_physmem[x+1]; - } - return(TRUE); - } - } - - /* pass2: forget about matching ends, just allocate something */ -#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) - for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--) -#else - for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) -#endif - { - /* any room in this bank? */ - if (vm_physmem[lcv].avail_start >= vm_physmem[lcv].avail_end) - continue; /* nope */ - - *paddrp = ptoa(vm_physmem[lcv].avail_start); - vm_physmem[lcv].avail_start++; - vm_physmem[lcv].start = vm_physmem[lcv].avail_start; /* truncate! */ - - /* nothing left? nuke it */ - if (vm_physmem[lcv].avail_start == vm_physmem[lcv].end) { - if (vm_nphysseg == 1) - panic("vm_page_physget: out of memory!"); - vm_nphysseg--; - for (x = lcv; x < vm_nphysseg; x++) - vm_physmem[x] = vm_physmem[x+1]; /* structure copy */ - } - return(TRUE); - } - - return(FALSE); /* whoops! */ -} -#endif /* ! PMAP_STEAL_MEMORY */ - -/* - * vm_page_physload: load physical memory into VM system - * - * - all args are PFs - * - all pages in start/end get vm_page structures - * - areas marked by avail_start/avail_end get added to the free page pool - * - we are limited to VM_PHYSSEG_MAX physical memory segments - */ -void -vm_page_physload(start, end, avail_start, avail_end) - vm_offset_t start, end, avail_start, avail_end; -{ - struct vm_page *pgs; - struct vm_physseg *ps; - int preload, lcv, npages; -#if (VM_PHYSSEG_STRAT != VM_PSTRAT_RANDOM) - int x; -#endif - - if (page_shift == 0) - panic("vm_page_physload: page size not set!"); - - /* - * do we have room? - */ - if (vm_nphysseg == VM_PHYSSEG_MAX) { - printf("vm_page_physload: unable to load physical memory segment\n"); - printf("\t%d segments allocated, ignoring 0x%lx -> 0x%lx\n", - VM_PHYSSEG_MAX, start, end); - return; - } - - /* - * check to see if this is a "preload" (i.e. vm_mem_init hasn't been - * called yet, so malloc is not available). - */ - for (lcv = 0; lcv < vm_nphysseg; lcv++) { - if (vm_physmem[lcv].pgs) - break; - } - preload = (lcv == vm_nphysseg); - - /* - * if VM is already running, attempt to malloc() vm_page structures - */ - if (!preload) { -#if defined(VM_PHYSSEG_NOADD) - panic("vm_page_physload: tried to add RAM after vm_mem_init"); -#else -/* XXXCDC: need some sort of lockout for this case */ - vm_offset_t paddr; - - /* # of pages */ - npages = end - start; - MALLOC(pgs, struct vm_page *, sizeof(struct vm_page) * npages, - M_VMPGDATA, M_NOWAIT); - if (pgs == NULL) { - printf("vm_page_physload: " - "can not malloc vm_page structs for segment\n" - "\tignoring 0x%lx -> 0x%lx\n", start, end); - return; - } - /* zero data, init phys_addr, and free pages */ - bzero(pgs, sizeof(struct vm_page) * npages); - for (lcv = 0, paddr = ptoa(start); lcv < npages; - lcv++, paddr += PAGE_SIZE) { - pgs[lcv].phys_addr = paddr; - if (atop(paddr) >= avail_start && - atop(paddr) <= avail_end) - vm_page_free(&pgs[lcv]); - } -/* XXXCDC: incomplete: need to update v_free_count, what else? - v_free_count is updated in vm_page_free, actualy */ -/* XXXCDC: need hook to tell pmap to rebuild pv_list, etc... */ -#endif - } else { - /* XXX/gcc complains if these don't get init'd */ - pgs = NULL; - npages = 0; - } - - /* - * now insert us in the proper place in vm_physmem[] - */ -#if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM) - /* random: put it at the end (easy!) */ - ps = &vm_physmem[vm_nphysseg]; - -#else -#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH) - - /* sort by address for binary search */ - for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) - if (start < vm_physmem[lcv].start) - break; - ps = &vm_physmem[lcv]; - - /* move back other entries, if necessary ... */ - for (x = vm_nphysseg ; x > lcv ; x--) - /* structure copy */ - vm_physmem[x] = vm_physmem[x - 1]; - -#else -#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) - - /* sort by largest segment first */ - for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) - if ((end - start) > - (vm_physmem[lcv].end - vm_physmem[lcv].start)) - break; - ps = &vm_physmem[lcv]; - - /* move back other entries, if necessary ... */ - for (x = vm_nphysseg ; x > lcv ; x--) - /* structure copy */ - vm_physmem[x] = vm_physmem[x - 1]; - -#else - - panic("vm_page_physload: unknown physseg strategy selected!"); - -#endif -#endif -#endif - - ps->start = start; - ps->end = end; - ps->avail_start = avail_start; - ps->avail_end = avail_end; - if (preload) { - ps->pgs = NULL; - } else { - ps->pgs = pgs; - ps->lastpg = pgs + npages - 1; - } - vm_nphysseg++; - - /* - * done! - */ - return; -} - -/* - * vm_page_physrehash: reallocate hash table based on number of - * free pages. - */ -void -vm_page_physrehash() -{ - struct pglist *newbuckets, *oldbuckets; - struct vm_page *pg; - int freepages, lcv, bucketcount, s, oldcount; - - /* - * compute number of pages that can go in the free pool - */ - freepages = 0; - for (lcv = 0; lcv < vm_nphysseg; lcv++) - freepages = freepages + (vm_physmem[lcv].avail_end - - vm_physmem[lcv].avail_start); - - /* - * compute number of buckets needed for this number of pages - */ - bucketcount = 1; - while (bucketcount < freepages) - bucketcount = bucketcount * 2; - - /* - * malloc new buckets - */ - MALLOC(newbuckets, struct pglist*, sizeof(struct pglist) * bucketcount, - M_VMPBUCKET, M_NOWAIT); - if (newbuckets == NULL) { - printf("vm_page_physrehash: " - "WARNING: could not grow page hash table\n"); - return; - } - for (lcv = 0; lcv < bucketcount; lcv++) - TAILQ_INIT(&newbuckets[lcv]); - - /* - * now replace the old buckets with the new ones and rehash everything - */ - s = splimp(); - simple_lock(&bucket_lock); - /* swap old for new ... */ - oldbuckets = vm_page_buckets; - oldcount = vm_page_bucket_count; - vm_page_buckets = newbuckets; - vm_page_bucket_count = bucketcount; - vm_page_hash_mask = bucketcount - 1; /* power of 2 */ - - /* ... and rehash */ - for (lcv = 0 ; lcv < oldcount ; lcv++) { - while ((pg = oldbuckets[lcv].tqh_first) != NULL) { - TAILQ_REMOVE(&oldbuckets[lcv], pg, hashq); - TAILQ_INSERT_TAIL(&vm_page_buckets[ - vm_page_hash(pg->object, pg->offset)], pg, hashq); - } - } - simple_unlock(&bucket_lock); - splx(s); - - /* - * free old bucket array if we malloc'd it previously - */ - if (oldbuckets != &vm_page_bootbucket) - FREE(oldbuckets, M_VMPBUCKET); - - /* - * done - */ - return; -} - -#if 1 /* XXXCDC: TMP TMP TMP DEBUG DEBUG DEBUG */ - -void vm_page_physdump __P((void)); /* SHUT UP GCC */ - -/* call from DDB */ -void -vm_page_physdump() -{ - int lcv; - - printf("rehash: physical memory config [segs=%d of %d]:\n", - vm_nphysseg, VM_PHYSSEG_MAX); - for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) - printf("0x%lx->0x%lx [0x%lx->0x%lx]\n", vm_physmem[lcv].start, - vm_physmem[lcv].end, vm_physmem[lcv].avail_start, - vm_physmem[lcv].avail_end); - printf("STRATEGY = "); - - switch (VM_PHYSSEG_STRAT) { - case VM_PSTRAT_RANDOM: - printf("RANDOM\n"); - break; - - case VM_PSTRAT_BSEARCH: - printf("BSEARCH\n"); - break; - - case VM_PSTRAT_BIGFIRST: - printf("BIGFIRST\n"); - break; - - default: - printf("<<UNKNOWN>>!!!!\n"); - } - printf("number of buckets = %d\n", vm_page_bucket_count); - printf("number of lost pages = %d\n", vm_page_lost_count); -} -#endif - -#elif defined(MACHINE_NONCONTIG) -/* OLD NONCONTIG CODE: NUKE NUKE NUKE ONCE CONVERTED */ - -/* - * We implement vm_page_bootstrap and vm_bootstrap_steal_memory with - * the help of two simpler functions: - * - * pmap_virtual_space and pmap_next_page - */ - -/* - * vm_page_bootstrap: - * - * Initializes the resident memory module. - * - * Allocates memory for the page cells, and - * for the object/offset-to-page hash table headers. - * Each page cell is initialized and placed on the free list. - * Returns the range of available kernel virtual memory. - */ -void -vm_page_bootstrap(startp, endp) - vm_offset_t *startp; - vm_offset_t *endp; -{ - unsigned int i, freepages; - register struct pglist *bucket; - vm_offset_t paddr; - - extern vm_offset_t kentry_data; - extern vm_size_t kentry_data_size; - - - /* - * Initialize the locks - */ - simple_lock_init(&vm_page_queue_free_lock); - simple_lock_init(&vm_page_queue_lock); - - /* - * Initialize the queue headers for the free queue, - * the active queue and the inactive queue. - */ - TAILQ_INIT(&vm_page_queue_free); - TAILQ_INIT(&vm_page_queue_active); - TAILQ_INIT(&vm_page_queue_inactive); - - /* - * Pre-allocate maps and map entries that cannot be dynamically - * allocated via malloc(). The maps include the kernel_map and - * kmem_map which must be initialized before malloc() will - * work (obviously). Also could include pager maps which would - * be allocated before kmeminit. - * - * Allow some kernel map entries... this should be plenty - * since people shouldn't be cluttering up the kernel - * map (they should use their own maps). - */ - - kentry_data_size = round_page(MAX_KMAP*sizeof(struct vm_map) + - MAX_KMAPENT*sizeof(struct vm_map_entry)); - kentry_data = vm_bootstrap_steal_memory(kentry_data_size); - - /* - * Validate these zone addresses. - */ - bzero((caddr_t) kentry_data, kentry_data_size); - - /* - * Allocate (and initialize) the virtual-to-physical - * table hash buckets. - * - * The number of buckets MUST BE a power of 2, and - * the actual value is the next power of 2 greater - * than the number of physical pages in the system. - * - * Note: - * This computation can be tweaked if desired. - */ - if (vm_page_bucket_count == 0) { - unsigned int npages = pmap_free_pages(); - - vm_page_bucket_count = 1; - while (vm_page_bucket_count < npages) - vm_page_bucket_count <<= 1; - } - - vm_page_hash_mask = vm_page_bucket_count - 1; - - vm_page_buckets = (struct pglist *) - vm_bootstrap_steal_memory(vm_page_bucket_count * - sizeof(*vm_page_buckets)); - bucket = vm_page_buckets; - - for (i = vm_page_bucket_count; i--;) { - TAILQ_INIT(bucket); - bucket++; - } - - simple_lock_init(&bucket_lock); - - /* - * We calculate how many page frames we will have and - * then allocate the page structures in one chunk. - * The calculation is non-trivial. We want: - * - * vmpages > (freepages - (vmpages / sizeof(vm_page_t))) - * - * ...which, with some algebra, becomes: - * - * vmpages > (freepages * sizeof(...) / (1 + sizeof(...))) - * - * The value of vm_page_count need not be exact, but must - * be large enough so vm_page_array handles the index range. - */ - - freepages = pmap_free_pages(); - /* Fudge slightly to deal with truncation error. */ - freepages += 1; /* fudge */ - - vm_page_count = (PAGE_SIZE * freepages) / - (PAGE_SIZE + sizeof(*vm_page_array)); - - vm_page_array = (vm_page_t) - vm_bootstrap_steal_memory(vm_page_count * sizeof(*vm_page_array)); - bzero(vm_page_array, vm_page_count * sizeof(*vm_page_array)); - -#ifdef DIAGNOSTIC - /* - * Initialize everything in case the holes are stepped in, - * and set PA to something that will cause a panic... - */ - for (i = 0; i < vm_page_count; i++) - vm_page_array[i].phys_addr = 0xdeadbeef; -#endif - - /* - * Initialize the page frames. Note that some page - * indices may not be usable when pmap_free_pages() - * counts pages in a hole. - */ - - if (!pmap_next_page(&paddr)) - panic("vm_page_bootstrap: can't get first page"); - - first_page = pmap_page_index(paddr); - for (i = 0;;) { - /* - * Initialize a page array element. - */ - - VM_PAGE_INIT(&vm_page_array[i], NULL, NULL); - vm_page_array[i].phys_addr = paddr; - vm_page_free(&vm_page_array[i]); - - /* - * Are there any more physical pages? - */ - - if (!pmap_next_page(&paddr)) - break; - i = pmap_page_index(paddr) - first_page; - - /* - * Don't trust pmap_page_index()... - */ - - if ( -#if 0 - i < 0 || /* can't happen, i is unsigned */ -#endif - i >= vm_page_count) - panic("vm_page_bootstrap: bad i = 0x%x", i); - } - - /* - * Make sure we have nice, round values. - */ - - virtual_space_start = round_page(virtual_space_start); - virtual_space_end = trunc_page(virtual_space_end); - - *startp = virtual_space_start; - *endp = virtual_space_end; - - simple_lock_init(&vm_pages_needed_lock); -} - -vm_offset_t -vm_bootstrap_steal_memory(size) - vm_size_t size; -{ - vm_offset_t addr, vaddr, paddr; - - /* - * We round to page size. - */ - - size = round_page(size); - - /* - * If this is the first call to vm_bootstrap_steal_memory, - * we have to initialize ourself. - */ - - if (virtual_space_start == virtual_space_end) { - pmap_virtual_space(&virtual_space_start, &virtual_space_end); - - /* - * The initial values must be aligned properly, and - * we don't trust the pmap module to do it right. - */ - - virtual_space_start = round_page(virtual_space_start); - virtual_space_end = trunc_page(virtual_space_end); - } - - /* - * Allocate virtual memory for this request. - */ - - addr = virtual_space_start; - virtual_space_start += size; - - /* - * Allocate and map physical pages to back new virtual pages. - */ - - for (vaddr = round_page(addr); - vaddr < addr + size; - vaddr += PAGE_SIZE) { - if (!pmap_next_page(&paddr)) - panic("vm_bootstrap_steal_memory"); - - /* - * XXX Logically, these mappings should be wired, - * but some pmap modules barf if they are. - */ - - pmap_enter(pmap_kernel(), vaddr, paddr, - VM_PROT_READ|VM_PROT_WRITE, FALSE, 0); - } - - return addr; -} - -#else /* MACHINE_NONCONTIG */ - -/* OLD CONTIG CODE: NUKE NUKE NUKE ONCE CONVERTED */ -/* - * vm_page_startup: - * - * Initializes the resident memory module. - * - * Allocates memory for the page cells, and - * for the object/offset-to-page hash table headers. - * Each page cell is initialized and placed on the free list. - */ -void -vm_page_startup(start, end) - vm_offset_t *start; - vm_offset_t *end; -{ - register vm_page_t m; - register struct pglist *bucket; - int npages; - int i; - vm_offset_t pa; - extern vm_offset_t kentry_data; - extern vm_size_t kentry_data_size; - - - /* - * Initialize the locks - */ - simple_lock_init(&vm_page_queue_free_lock); - simple_lock_init(&vm_page_queue_lock); - - /* - * Initialize the queue headers for the free queue, - * the active queue and the inactive queue. - */ - TAILQ_INIT(&vm_page_queue_free); - TAILQ_INIT(&vm_page_queue_active); - TAILQ_INIT(&vm_page_queue_inactive); - - /* - * Calculate the number of hash table buckets. - * - * The number of buckets MUST BE a power of 2, and - * the actual value is the next power of 2 greater - * than the number of physical pages in the system. - * - * Note: - * This computation can be tweaked if desired. - */ - if (vm_page_bucket_count == 0) { - vm_page_bucket_count = 1; - while (vm_page_bucket_count < atop(*end - *start)) - vm_page_bucket_count <<= 1; - } - - vm_page_hash_mask = vm_page_bucket_count - 1; - - /* - * Allocate (and initialize) the hash table buckets. - */ - vm_page_buckets = (struct pglist *) - pmap_bootstrap_alloc(vm_page_bucket_count * sizeof(struct pglist)); - bucket = vm_page_buckets; - - for (i = vm_page_bucket_count; i--;) { - TAILQ_INIT(bucket); - bucket++; - } - - simple_lock_init(&bucket_lock); - - /* - * Truncate the remainder of physical memory to our page size. - */ - *end = trunc_page(*end); - - /* - * Pre-allocate maps and map entries that cannot be dynamically - * allocated via malloc(). The maps include the kernel_map and - * kmem_map which must be initialized before malloc() will - * work (obviously). Also could include pager maps which would - * be allocated before kmeminit. - * - * Allow some kernel map entries... this should be plenty - * since people shouldn't be cluttering up the kernel - * map (they should use their own maps). - */ - kentry_data_size = round_page(MAX_KMAP*sizeof(struct vm_map) + - MAX_KMAPENT*sizeof(struct vm_map_entry)); - kentry_data = (vm_offset_t) pmap_bootstrap_alloc(kentry_data_size); - - /* - * Compute the number of pages of memory that will be - * available for use (taking into account the overhead - * of a page structure per page). - */ - cnt.v_free_count = vm_page_count = - (*end - *start + sizeof(struct vm_page)) / - (PAGE_SIZE + sizeof(struct vm_page)); - - /* - * Record the extent of physical memory that the - * virtual memory system manages. - */ - first_page = *start; - first_page += vm_page_count * sizeof(struct vm_page); - first_page = atop(round_page(first_page)); - last_page = first_page + vm_page_count - 1; - - first_phys_addr = ptoa(first_page); - last_phys_addr = ptoa(last_page) + PAGE_MASK; - - /* - * Allocate and clear the mem entry structures. - */ - m = vm_page_array = (vm_page_t) - pmap_bootstrap_alloc(vm_page_count * sizeof(struct vm_page)); - bzero(vm_page_array, vm_page_count * sizeof(struct vm_page)); - - /* - * Initialize the mem entry structures now, and - * put them in the free queue. - */ - pa = first_phys_addr; - npages = vm_page_count; - while (npages--) { - m->flags = PG_FREE; - m->object = NULL; - m->phys_addr = pa; - TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq); - m++; - pa += PAGE_SIZE; - } - - /* - * Initialize vm_pages_needed lock here - don't wait for pageout - * daemon XXX - */ - simple_lock_init(&vm_pages_needed_lock); - - /* from now on, pmap_bootstrap_alloc can't be used */ - vm_page_startup_initialized = TRUE; -} -#endif /* MACHINE_NONCONTIG */ - -/* - * vm_page_insert: [ internal use only ] - * - * Inserts the given mem entry into the object/object-page - * table and object list. - * - * The object and page must be locked. - */ -void -vm_page_insert(mem, object, offset) - register vm_page_t mem; - register vm_object_t object; - register vm_offset_t offset; -{ - register struct pglist *bucket; - int spl; - - VM_PAGE_CHECK(mem); - - if (mem->flags & PG_TABLED) - panic("vm_page_insert: already inserted"); - - /* - * Record the object/offset pair in this page - */ - - mem->object = object; - mem->offset = offset; - - /* - * Insert it into the object_object/offset hash table - */ - - bucket = &vm_page_buckets[vm_page_hash(object, offset)]; - spl = splimp(); - simple_lock(&bucket_lock); - TAILQ_INSERT_TAIL(bucket, mem, hashq); - simple_unlock(&bucket_lock); - (void) splx(spl); - - /* - * Now link into the object's list of backed pages. - */ - - TAILQ_INSERT_TAIL(&object->memq, mem, listq); - mem->flags |= PG_TABLED; - - /* - * And show that the object has one more resident - * page. - */ - - object->resident_page_count++; -} - -/* - * vm_page_remove: [ internal use only ] - * XXX: used by device pager as well - * - * Removes the given mem entry from the object/offset-page - * table and the object page list. - * - * The object and page must be locked. - */ -void -vm_page_remove(mem) - register vm_page_t mem; -{ - register struct pglist *bucket; - int spl; - - VM_PAGE_CHECK(mem); - -#ifdef DIAGNOSTIC - if (mem->flags & PG_FAULTING) - panic("vm_page_remove: page is faulting"); -#endif - - if (!(mem->flags & PG_TABLED)) - return; - - /* - * Remove from the object_object/offset hash table - */ - - bucket = &vm_page_buckets[vm_page_hash(mem->object, mem->offset)]; - spl = splimp(); - simple_lock(&bucket_lock); - TAILQ_REMOVE(bucket, mem, hashq); - simple_unlock(&bucket_lock); - (void) splx(spl); - - /* - * Now remove from the object's list of backed pages. - */ - - TAILQ_REMOVE(&mem->object->memq, mem, listq); - - /* - * And show that the object has one fewer resident - * page. - */ - - mem->object->resident_page_count--; - - mem->flags &= ~PG_TABLED; -} - -/* - * vm_page_lookup: - * - * Returns the page associated with the object/offset - * pair specified; if none is found, NULL is returned. - * - * The object must be locked. No side effects. - */ -vm_page_t -vm_page_lookup(object, offset) - register vm_object_t object; - register vm_offset_t offset; -{ - register vm_page_t mem; - register struct pglist *bucket; - int spl; - - /* - * Search the hash table for this object/offset pair - */ - - bucket = &vm_page_buckets[vm_page_hash(object, offset)]; - - spl = splimp(); - simple_lock(&bucket_lock); - for (mem = bucket->tqh_first; mem != NULL; mem = mem->hashq.tqe_next) { - VM_PAGE_CHECK(mem); - if ((mem->object == object) && (mem->offset == offset)) { - simple_unlock(&bucket_lock); - splx(spl); - return(mem); - } - } - - simple_unlock(&bucket_lock); - splx(spl); - return(NULL); -} - -/* - * vm_page_rename: - * - * Move the given memory entry from its - * current object to the specified target object/offset. - * - * The object must be locked. - */ -void -vm_page_rename(mem, new_object, new_offset) - register vm_page_t mem; - register vm_object_t new_object; - vm_offset_t new_offset; -{ - - if (mem->object == new_object) - return; - - vm_page_lock_queues(); /* keep page from moving out from - under pageout daemon */ - vm_page_remove(mem); - vm_page_insert(mem, new_object, new_offset); - vm_page_unlock_queues(); -} - -/* - * vm_page_alloc: - * - * Allocate and return a memory cell associated - * with this VM object/offset pair. - * - * Object must be locked. - */ - -vm_page_t -vm_page_alloc(object, offset) - vm_object_t object; - vm_offset_t offset; -{ - register vm_page_t mem; - int spl; - - spl = splimp(); /* XXX */ - simple_lock(&vm_page_queue_free_lock); - mem = vm_page_queue_free.tqh_first; - - if (VERY_LOW_MEM()) { - if ((!KERN_OBJ(object) && curproc != pageout_daemon) - || mem == NULL) { - simple_unlock(&vm_page_queue_free_lock); - splx(spl); - return(NULL); - } - } -#ifdef DIAGNOSTIC - if (mem == NULL) /* because we now depend on VERY_LOW_MEM() */ - panic("vm_page_alloc"); -#endif - TAILQ_REMOVE(&vm_page_queue_free, mem, pageq); - - cnt.v_free_count--; - simple_unlock(&vm_page_queue_free_lock); - splx(spl); - - VM_PAGE_INIT(mem, object, offset); - - /* - * Decide if we should poke the pageout daemon. - * We do this if the free count is less than the low - * water mark, or if the free count is less than the high - * water mark (but above the low water mark) and the inactive - * count is less than its target. - * - * We don't have the counts locked ... if they change a little, - * it doesn't really matter. - */ - - if (cnt.v_free_count < cnt.v_free_min || - (cnt.v_free_count < cnt.v_free_target && - cnt.v_inactive_count < cnt.v_inactive_target)) - thread_wakeup(&vm_pages_needed); - return (mem); -} - -/* - * vm_page_free: - * - * Returns the given page to the free list, - * disassociating it with any VM object. - * - * Object and page must be locked prior to entry. - */ -void -vm_page_free(mem) - register vm_page_t mem; -{ - - vm_page_remove(mem); - if (mem->flags & PG_ACTIVE) { - TAILQ_REMOVE(&vm_page_queue_active, mem, pageq); - mem->flags &= ~PG_ACTIVE; - cnt.v_active_count--; - } - - if (mem->flags & PG_INACTIVE) { - TAILQ_REMOVE(&vm_page_queue_inactive, mem, pageq); - mem->flags &= ~PG_INACTIVE; - cnt.v_inactive_count--; - } - - if (!(mem->flags & PG_FICTITIOUS)) { - int spl; - - spl = splimp(); - simple_lock(&vm_page_queue_free_lock); - mem->flags |= PG_FREE; - TAILQ_INSERT_TAIL(&vm_page_queue_free, mem, pageq); - - cnt.v_free_count++; - simple_unlock(&vm_page_queue_free_lock); - splx(spl); - } -} - -/* - * vm_page_wire: - * - * Mark this page as wired down by yet - * another map, removing it from paging queues - * as necessary. - * - * The page queues must be locked. - */ -void -vm_page_wire(mem) - register vm_page_t mem; -{ - - VM_PAGE_CHECK(mem); - - if (mem->wire_count == 0) { - if (mem->flags & PG_ACTIVE) { - TAILQ_REMOVE(&vm_page_queue_active, mem, pageq); - cnt.v_active_count--; - mem->flags &= ~PG_ACTIVE; - } - if (mem->flags & PG_INACTIVE) { - TAILQ_REMOVE(&vm_page_queue_inactive, mem, pageq); - cnt.v_inactive_count--; - mem->flags &= ~PG_INACTIVE; - } - cnt.v_wire_count++; - } - mem->wire_count++; -} - -/* - * vm_page_unwire: - * - * Release one wiring of this page, potentially - * enabling it to be paged again. - * - * The page queues must be locked. - */ -void -vm_page_unwire(mem) - register vm_page_t mem; -{ - - VM_PAGE_CHECK(mem); - - mem->wire_count--; - if (mem->wire_count == 0) { - TAILQ_INSERT_TAIL(&vm_page_queue_active, mem, pageq); - cnt.v_active_count++; - mem->flags |= PG_ACTIVE; - cnt.v_wire_count--; - } -} - -/* - * vm_page_deactivate: - * - * Returns the given page to the inactive list, - * indicating that no physical maps have access - * to this page. [Used by the physical mapping system.] - * - * The page queues must be locked. - */ -void -vm_page_deactivate(m) - register vm_page_t m; -{ - - VM_PAGE_CHECK(m); - - /* - * Only move active pages -- ignore locked or already - * inactive ones. - */ - - if (m->flags & PG_ACTIVE) { - TAILQ_REMOVE(&vm_page_queue_active, m, pageq); - m->flags &= ~PG_ACTIVE; - cnt.v_active_count--; - goto deact; - } - if ((m->flags & PG_INACTIVE) == 0) { - deact: - TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq); - m->flags |= PG_INACTIVE; - cnt.v_inactive_count++; - pmap_clear_reference(VM_PAGE_TO_PHYS(m)); - if (pmap_is_modified(VM_PAGE_TO_PHYS(m))) - m->flags &= ~PG_CLEAN; - if (m->flags & PG_CLEAN) - m->flags &= ~PG_LAUNDRY; - else - m->flags |= PG_LAUNDRY; - } -} - -/* - * vm_page_activate: - * - * Put the specified page on the active list (if appropriate). - * - * The page queues must be locked. - */ -void -vm_page_activate(m) - register vm_page_t m; -{ - - VM_PAGE_CHECK(m); - - if (m->flags & PG_INACTIVE) { - TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq); - m->flags &= ~PG_INACTIVE; - cnt.v_inactive_count--; - } - if (m->wire_count == 0) { - if (m->flags & PG_ACTIVE) - panic("vm_page_activate: already active"); - - TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); - m->flags |= PG_ACTIVE; - cnt.v_active_count++; - } -} - -/* - * vm_page_zero_fill: - * - * Zero-fill the specified page. - * Written as a standard pagein routine, to - * be used by the zero-fill object. - */ -boolean_t -vm_page_zero_fill(m) - vm_page_t m; -{ - - VM_PAGE_CHECK(m); - - m->flags &= ~PG_CLEAN; - pmap_zero_page(VM_PAGE_TO_PHYS(m)); - return(TRUE); -} - -/* - * vm_page_copy: - * - * Copy one page to another - */ -void -vm_page_copy(src_m, dest_m) - vm_page_t src_m; - vm_page_t dest_m; -{ - - VM_PAGE_CHECK(src_m); - VM_PAGE_CHECK(dest_m); - - dest_m->flags &= ~PG_CLEAN; - pmap_copy_page(VM_PAGE_TO_PHYS(src_m), VM_PAGE_TO_PHYS(dest_m)); -} - -#ifdef VM_PAGE_ALLOC_MEMORY_STATS -#define STAT_INCR(v) (v)++ -#define STAT_DECR(v) do { \ - if ((v) == 0) \ - printf("%s:%d -- Already 0!\n", __FILE__, __LINE__); \ - else \ - (v)--; \ - } while (0) -u_long vm_page_alloc_memory_npages; -#else -#define STAT_INCR(v) -#define STAT_DECR(v) -#endif - -/* - * vm_page_alloc_memory: - * - * Allocate physical pages conforming to the restrictions - * provided: - * - * size The size of the allocation, - * rounded to page size. - * - * low The low address of the allowed - * allocation range. - * - * high The high address of the allowed - * allocation range. - * - * alignment Allocation must be aligned to this - * power-of-two boundary. - * - * boundary No segment in the allocation may - * cross this power-of-two boundary - * (relative to zero). - * - * The allocated pages are placed at the tail of `rlist'; `rlist' - * is assumed to be properly initialized by the caller. The - * number of memory segments that the allocated memory may - * occupy is specified in the `nsegs' arguement. - * - * Returns 0 on success or an errno value to indicate mode - * of failure. - * - * XXX This implementation could be improved. It only - * XXX allocates a single segment. - */ -int -vm_page_alloc_memory(size, low, high, alignment, boundary, - rlist, nsegs, waitok) - vm_size_t size; - vm_offset_t low, high, alignment, boundary; - struct pglist *rlist; - int nsegs, waitok; -{ - vm_offset_t try, idxpa, lastidxpa; -#if defined(MACHINE_NEW_NONCONTIG) - int psi; - struct vm_page *vm_page_array; -#endif - int s, tryidx, idx, end, error; - vm_page_t m; - u_long pagemask; -#ifdef DEBUG - vm_page_t tp; -#endif - -#ifdef DIAGNOSTIC - if ((alignment & (alignment - 1)) != 0) - panic("vm_page_alloc_memory: alignment must be power of 2"); - - if ((boundary & (boundary - 1)) != 0) - panic("vm_page_alloc_memory: boundary must be power of 2"); -#endif - - /* - * Our allocations are always page granularity, so our alignment - * must be, too. - */ - if (alignment < PAGE_SIZE) - alignment = PAGE_SIZE; - - size = round_page(size); - try = roundup(low, alignment); - - if (boundary != 0 && boundary < size) - return (EINVAL); - - pagemask = ~(boundary - 1); - - /* Default to "lose". */ - error = ENOMEM; - - /* - * Block all memory allocation and lock the free list. - */ - s = splimp(); - simple_lock(&vm_page_queue_free_lock); - - /* Are there even any free pages? */ - if (vm_page_queue_free.tqh_first == NULL) - goto out; - - for (;; try += alignment) { - if (try + size > high) { - /* - * We've run past the allowable range. - */ - goto out; - } - - /* - * Make sure this is a managed physical page. - */ -#if defined(MACHINE_NEW_NONCONTIG) - - if ((psi = vm_physseg_find(atop(try), &idx)) == -1) - continue; /* managed? */ - if (vm_physseg_find(atop(try + size), NULL) != psi) - continue; /* end must be in this segment */ - - tryidx = idx; - end = idx + (size / PAGE_SIZE); - vm_page_array = vm_physmem[psi].pgs; - /* XXX: emulates old global vm_page_array */ - -#else - if (IS_VM_PHYSADDR(try) == 0) - continue; - - tryidx = idx = VM_PAGE_INDEX(try); - end = idx + (size / PAGE_SIZE); - if (end > vm_page_count) { - /* - * No more physical memory. - */ - goto out; - } -#endif - - /* - * Found a suitable starting page. See of the range - * is free. - */ - for (; idx < end; idx++) { - if (VM_PAGE_IS_FREE(&vm_page_array[idx]) == 0) { - /* - * Page not available. - */ - break; - } - - idxpa = VM_PAGE_TO_PHYS(&vm_page_array[idx]); - -#if !defined(MACHINE_NEW_NONCONTIG) - /* - * Make sure this is a managed physical page. - * XXX Necessary? I guess only if there - * XXX are holes in the vm_page_array[]. - */ - if (IS_VM_PHYSADDR(idxpa) == 0) - break; -#endif - - if (idx > tryidx) { - lastidxpa = - VM_PAGE_TO_PHYS(&vm_page_array[idx - 1]); - - if ((lastidxpa + PAGE_SIZE) != idxpa) { - /* - * Region not contiguous. - */ - break; - } - if (boundary != 0 && - ((lastidxpa ^ idxpa) & pagemask) != 0) { - /* - * Region crosses boundary. - */ - break; - } - } - } - - if (idx == end) { - /* - * Woo hoo! Found one. - */ - break; - } - } - - /* - * Okay, we have a chunk of memory that conforms to - * the requested constraints. - */ - idx = tryidx; - while (idx < end) { - m = &vm_page_array[idx]; -#ifdef DEBUG - for (tp = vm_page_queue_free.tqh_first; tp != NULL; - tp = tp->pageq.tqe_next) { - if (tp == m) - break; - } - if (tp == NULL) - panic("vm_page_alloc_memory: page not on freelist"); -#endif - TAILQ_REMOVE(&vm_page_queue_free, m, pageq); - cnt.v_free_count--; - m->flags = PG_CLEAN; - m->object = NULL; - m->wire_count = 0; - TAILQ_INSERT_TAIL(rlist, m, pageq); - idx++; - STAT_INCR(vm_page_alloc_memory_npages); - } - error = 0; - - out: - simple_unlock(&vm_page_queue_free_lock); - splx(s); - return (error); -} - -vm_offset_t -vm_page_alloc_contig(size, low, high, alignment) - vm_offset_t size; - vm_offset_t low; - vm_offset_t high; - vm_offset_t alignment; -{ - struct pglist mlist; - struct vm_page *m; - vm_offset_t addr, tmp_addr; - - TAILQ_INIT(&mlist); - if (vm_page_alloc_memory(size, low, high, alignment, 0, - &mlist, 1, FALSE)) - return 0; - addr = tmp_addr = kmem_alloc_pageable(kernel_map, size); - for (m = TAILQ_FIRST(&mlist); m != NULL; m = TAILQ_NEXT(m, pageq)) { - vm_page_insert(m, kernel_object, - tmp_addr - VM_MIN_KERNEL_ADDRESS); - vm_page_wire(m); - pmap_enter(pmap_kernel(), tmp_addr, VM_PAGE_TO_PHYS(m), - VM_PROT_READ|VM_PROT_WRITE, TRUE, 0); - tmp_addr += PAGE_SIZE; - } - return addr; -} - -/* - * vm_page_free_memory: - * - * Free a list of pages previously allocated by vm_page_alloc_memory(). - * The pages are assumed to have no mappings. - */ -void -vm_page_free_memory(list) - struct pglist *list; -{ - vm_page_t m; - int s; - - /* - * Block all memory allocation and lock the free list. - */ - s = splimp(); - simple_lock(&vm_page_queue_free_lock); - - while ((m = list->tqh_first) != NULL) { - TAILQ_REMOVE(list, m, pageq); - m->flags = PG_FREE; - TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq); - cnt.v_free_count++; - STAT_DECR(vm_page_alloc_memory_npages); - } - - simple_unlock(&vm_page_queue_free_lock); - splx(s); -} diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c deleted file mode 100644 index 8903bd57c6a..00000000000 --- a/sys/vm/vm_pageout.c +++ /dev/null @@ -1,620 +0,0 @@ -/* $OpenBSD: vm_pageout.c,v 1.11 2001/03/21 23:24:51 art Exp $ */ -/* $NetBSD: vm_pageout.c,v 1.23 1996/02/05 01:54:07 christos Exp $ */ - -/* - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * The Mach Operating System project at Carnegie-Mellon University. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)vm_pageout.c 8.7 (Berkeley) 6/19/95 - * - * - * Copyright (c) 1987, 1990 Carnegie-Mellon University. - * All rights reserved. - * - * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * The proverbial page-out daemon. - */ - -#include <sys/param.h> -#include <sys/proc.h> -#include <sys/systm.h> -#include <sys/kernel.h> -#include <sys/pool.h> - -#include <vm/vm.h> -#include <vm/vm_page.h> -#include <vm/vm_pageout.h> - -#ifndef VM_PAGE_FREE_MIN -#define VM_PAGE_FREE_MIN (cnt.v_free_count / 20) -#endif - -#ifndef VM_PAGE_FREE_TARGET -#define VM_PAGE_FREE_TARGET ((cnt.v_free_min * 4) / 3) -#endif - -int vm_page_free_min_min = 16 * 1024; -int vm_page_free_min_max = 256 * 1024; - -int vm_pages_needed; /* Event on which pageout daemon sleeps */ - -int vm_page_max_wired = 0; /* XXX max # of wired pages system-wide */ - -#ifdef CLUSTERED_PAGEOUT -#define MAXPOCLUSTER (MAXPHYS/NBPG) /* XXX */ -int doclustered_pageout = 1; -#endif - -/* - * Activate the pageout daemon and sleep awaiting more free memory - */ -void -vm_wait(msg) - char *msg; -{ - int timo = 0; - - if(curproc == pageout_daemon) { - /* - * We might be toast here, but IF some paging operations - * are pending then pages will magically appear. We - * usually can't return an error because callers of - * malloc who can wait generally don't check for - * failure. - * - * Only the pageout_daemon wakes up this channel! - */ - printf("pageout daemon has stalled\n"); - timo = hz >> 3; - } - simple_lock(&vm_pages_needed_lock); - thread_wakeup(&vm_pages_needed); - thread_sleep_msg(&cnt.v_free_count, &vm_pages_needed_lock, FALSE, msg, - timo); -} - -/* - * vm_pageout_scan does the dirty work for the pageout daemon. - */ -void -vm_pageout_scan() -{ - register vm_page_t m, next; - register int page_shortage; - register int s; - register int pages_freed; - int free; - vm_object_t object; - - /* - * Only continue when we want more pages to be "free" - */ - - cnt.v_rev++; - - s = splimp(); - simple_lock(&vm_page_queue_free_lock); - free = cnt.v_free_count; - simple_unlock(&vm_page_queue_free_lock); - splx(s); - -#ifndef __SWAP_BROKEN /* XXX */ - if (free < cnt.v_free_target) { - swapout_threads(); - - /* - * Be sure the pmap system is updated so - * we can scan the inactive queue. - */ - - pmap_update(); - } -#endif /* XXX */ - - /* - * Acquire the resident page system lock, - * as we may be changing what's resident quite a bit. - */ - vm_page_lock_queues(); - - /* - * Start scanning the inactive queue for pages we can free. - * We keep scanning until we have enough free pages or - * we have scanned through the entire queue. If we - * encounter dirty pages, we start cleaning them. - */ - - pages_freed = 0; - for (m = vm_page_queue_inactive.tqh_first; m != NULL; m = next) { - s = splimp(); - simple_lock(&vm_page_queue_free_lock); - free = cnt.v_free_count; - simple_unlock(&vm_page_queue_free_lock); - splx(s); - if (free >= cnt.v_free_target) - break; - - cnt.v_scan++; - next = m->pageq.tqe_next; - - /* - * If the page has been referenced, move it back to the - * active queue. - */ - if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { - vm_page_activate(m); - cnt.v_reactivated++; - continue; - } - - /* - * If the page is clean, free it up. - */ - if (m->flags & PG_CLEAN) { - object = m->object; - if (vm_object_lock_try(object)) { - pmap_page_protect(VM_PAGE_TO_PHYS(m), - VM_PROT_NONE); - vm_page_free(m); - pages_freed++; - cnt.v_dfree++; - vm_object_unlock(object); - } - continue; - } - - /* - * If the page is dirty but already being washed, skip it. - */ - if ((m->flags & PG_LAUNDRY) == 0) - continue; - - /* - * Otherwise the page is dirty and still in the laundry, - * so we start the cleaning operation and remove it from - * the laundry. - */ - object = m->object; - if (!vm_object_lock_try(object)) - continue; -#ifdef CLUSTERED_PAGEOUT - if (object->pager && - vm_pager_cancluster(object->pager, PG_CLUSTERPUT)) - vm_pageout_cluster(m, object); - else -#endif - vm_pageout_page(m, object); - thread_wakeup(object); - vm_object_unlock(object); - /* - * Former next page may no longer even be on the inactive - * queue (due to potential blocking in the pager with the - * queues unlocked). If it isn't, we just start over. - */ - if (next && (next->flags & PG_INACTIVE) == 0) - next = vm_page_queue_inactive.tqh_first; - } - - /* - * Compute the page shortage. If we are still very low on memory - * be sure that we will move a minimal amount of pages from active - * to inactive. - */ - - page_shortage = cnt.v_inactive_target - cnt.v_inactive_count; - if (page_shortage <= 0 && pages_freed == 0) - page_shortage = 1; - - while (page_shortage > 0) { - /* - * Move some more pages from active to inactive. - */ - - if ((m = vm_page_queue_active.tqh_first) == NULL) - break; - vm_page_deactivate(m); - page_shortage--; - } - - vm_page_unlock_queues(); -} - -/* - * Called with object and page queues locked. - * If reactivate is TRUE, a pager error causes the page to be - * put back on the active queue, ow it is left on the inactive queue. - */ -void -vm_pageout_page(m, object) - vm_page_t m; - vm_object_t object; -{ - vm_pager_t pager; - int pageout_status; - - /* - * We set the busy bit to cause potential page faults on - * this page to block. - * - * We also set pageout-in-progress to keep the object from - * disappearing during pageout. This guarantees that the - * page won't move from the inactive queue. (However, any - * other page on the inactive queue may move!) - */ - pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); - m->flags |= PG_BUSY; - - /* - * Try to collapse the object before making a pager for it. - * We must unlock the page queues first. - */ - vm_page_unlock_queues(); - -#if 0 - /* - * vm_object_collapse might want to sleep waiting for pages which - * is not allowed to do in this thread. Anyway, we now aggressively - * collapse object-chains as early as possible so this call ought - * to not be very useful anyhow. This is just an educated guess. - * Not doing a collapse operation is never fatal though, so we skip - * it for the time being. Later we might add some NOWAIT option for - * the collapse code to look at, if it's deemed necessary. - */ - if (object->pager == NULL) - vm_object_collapse(object); -#endif - - vm_object_paging_begin(object); - vm_object_unlock(object); - - /* - * We _used_ to wakeup page consumers here, "in case the following - * operations block". That leads to livelock if the pageout fails, - * which is actually quite a common thing for NFS paging. - */ - - /* - * If there is no pager for the page, use the default pager. - * If there is no place to put the page at the moment, - * leave it in the laundry and hope that there will be - * paging space later. - */ - if ((pager = object->pager) == NULL) { - pager = vm_pager_allocate(PG_DFLT, (caddr_t)0, object->size, - VM_PROT_ALL, (vm_offset_t)0); - if (pager != NULL) - vm_object_setpager(object, pager, 0, FALSE); - } - pageout_status = pager ? vm_pager_put(pager, m, FALSE) : VM_PAGER_FAIL; - vm_object_lock(object); - vm_page_lock_queues(); - - switch (pageout_status) { - case VM_PAGER_OK: - case VM_PAGER_PEND: - /* hmm, don't wakeup if memory is _very_ low? */ - thread_wakeup(&cnt.v_free_count); - cnt.v_pageouts++; - cnt.v_pgpgout++; - m->flags &= ~PG_LAUNDRY; - break; - case VM_PAGER_BAD: - /* - * Page outside of range of object. Right now we - * essentially lose the changes by pretending it - * worked. - * - * XXX dubious, what should we do? - */ - m->flags &= ~PG_LAUNDRY; - m->flags |= PG_CLEAN; - pmap_clear_modify(VM_PAGE_TO_PHYS(m)); - break; - case VM_PAGER_AGAIN: - { - /* - * FAIL on a write is interpreted to mean a resource - * shortage, so we put pause for awhile and try again. - * XXX could get stuck here. - */ - (void)tsleep((caddr_t)&vm_pages_needed, PZERO|PCATCH, - "pageout", hz>>3); - break; - } - case VM_PAGER_FAIL: - case VM_PAGER_ERROR: - /* - * If page couldn't be paged out, then reactivate - * the page so it doesn't clog the inactive list. - * (We will try paging out it again later). - */ - vm_page_activate(m); - cnt.v_reactivated++; - break; - } - - pmap_clear_reference(VM_PAGE_TO_PHYS(m)); - - /* - * If the operation is still going, leave the page busy - * to block all other accesses. Also, leave the paging - * in progress indicator set so that we don't attempt an - * object collapse. - */ - if (pageout_status != VM_PAGER_PEND) { - m->flags &= ~PG_BUSY; - PAGE_WAKEUP(m); - vm_object_paging_end(object); - } -} - -#ifdef CLUSTERED_PAGEOUT -#define PAGEOUTABLE(p) \ - ((((p)->flags & (PG_INACTIVE|PG_CLEAN|PG_LAUNDRY)) == \ - (PG_INACTIVE|PG_LAUNDRY)) && !pmap_is_referenced(VM_PAGE_TO_PHYS(p))) - -/* - * Attempt to pageout as many contiguous (to ``m'') dirty pages as possible - * from ``object''. Using information returned from the pager, we assemble - * a sorted list of contiguous dirty pages and feed them to the pager in one - * chunk. Called with paging queues and object locked. Also, object must - * already have a pager. - */ -void -vm_pageout_cluster(m, object) - vm_page_t m; - vm_object_t object; -{ - vm_offset_t offset, loff, hoff; - vm_page_t plist[MAXPOCLUSTER], *plistp, p; - int postatus, ix, count; - - cnt.v_pageouts++; - /* - * Determine the range of pages that can be part of a cluster - * for this object/offset. If it is only our single page, just - * do it normally. - */ - vm_pager_cluster(object->pager, m->offset, &loff, &hoff); - if (hoff - loff == PAGE_SIZE) { - vm_pageout_page(m, object); - return; - } - - plistp = plist; - - /* - * Target page is always part of the cluster. - */ - pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); - m->flags |= PG_BUSY; - plistp[atop(m->offset - loff)] = m; - count = 1; - - /* - * Backup from the given page til we find one not fulfilling - * the pageout criteria or we hit the lower bound for the - * cluster. For each page determined to be part of the - * cluster, unmap it and busy it out so it won't change. - */ - ix = atop(m->offset - loff); - offset = m->offset; - while (offset > loff && count < MAXPOCLUSTER-1) { - p = vm_page_lookup(object, offset - PAGE_SIZE); - if (p == NULL || !PAGEOUTABLE(p)) - break; - pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE); - p->flags |= PG_BUSY; - plistp[--ix] = p; - offset -= PAGE_SIZE; - count++; - } - plistp += atop(offset - loff); - loff = offset; - - /* - * Now do the same moving forward from the target. - */ - ix = atop(m->offset - loff) + 1; - offset = m->offset + PAGE_SIZE; - while (offset < hoff && count < MAXPOCLUSTER) { - p = vm_page_lookup(object, offset); - if (p == NULL || !PAGEOUTABLE(p)) - break; - pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE); - p->flags |= PG_BUSY; - plistp[ix++] = p; - offset += PAGE_SIZE; - count++; - } - hoff = offset; - - /* - * Pageout the page. - * Unlock everything and do a wakeup prior to the pager call - * in case it blocks. - */ - vm_page_unlock_queues(); - vm_object_paging_begin(object); - vm_object_unlock(object); -again: - thread_wakeup(&cnt.v_free_count); - postatus = vm_pager_put_pages(object->pager, plistp, count, FALSE); - /* - * XXX rethink this - */ - if (postatus == VM_PAGER_AGAIN) { - (void)tsleep((caddr_t)&vm_pages_needed, PZERO|PCATCH, - "pageout", 0); - goto again; - } else if (postatus == VM_PAGER_BAD) - panic("vm_pageout_cluster: VM_PAGER_BAD"); - vm_object_lock(object); - vm_page_lock_queues(); - - /* - * Loop through the affected pages, reflecting the outcome of - * the operation. - */ - for (ix = 0; ix < count; ix++) { - p = *plistp++; - switch (postatus) { - case VM_PAGER_OK: - case VM_PAGER_PEND: - cnt.v_pgpgout++; - p->flags &= ~PG_LAUNDRY; - break; - case VM_PAGER_FAIL: - case VM_PAGER_ERROR: - /* - * Pageout failed, reactivate the target page so it - * doesn't clog the inactive list. Other pages are - * left as they are. - */ - if (p == m) { - vm_page_activate(p); - cnt.v_reactivated++; - } - break; - } - pmap_clear_reference(VM_PAGE_TO_PHYS(p)); - /* - * If the operation is still going, leave the page busy - * to block all other accesses. - */ - if (postatus != VM_PAGER_PEND) { - p->flags &= ~PG_BUSY; - PAGE_WAKEUP(p); - } - } - /* - * If the operation is still going, leave the paging in progress - * indicator set so that we don't attempt an object collapse. - */ - if (postatus != VM_PAGER_PEND) - vm_object_paging_end(object); -} -#endif - -/* - * vm_pageout is the high level pageout daemon. - */ - -void -vm_pageout() -{ - pageout_daemon = curproc; - (void) spl0(); - - /* - * Initialize some paging parameters. - */ - - if (cnt.v_free_min == 0) { - cnt.v_free_min = VM_PAGE_FREE_MIN; - vm_page_free_min_min /= cnt.v_page_size; - vm_page_free_min_max /= cnt.v_page_size; - if (cnt.v_free_min < vm_page_free_min_min) - cnt.v_free_min = vm_page_free_min_min; - if (cnt.v_free_min > vm_page_free_min_max) - cnt.v_free_min = vm_page_free_min_max; - } - - if (cnt.v_free_target == 0) - cnt.v_free_target = VM_PAGE_FREE_TARGET; - - if (cnt.v_free_target <= cnt.v_free_min) - cnt.v_free_target = cnt.v_free_min + 1; - - /* XXX does not really belong here */ - if (vm_page_max_wired == 0) - vm_page_max_wired = cnt.v_free_count / 3; - - /* - * The pageout daemon is never done, so loop - * forever. - */ - - simple_lock(&vm_pages_needed_lock); - while (TRUE) { - thread_sleep_msg(&vm_pages_needed, &vm_pages_needed_lock, - FALSE, "paged", 0); - /* - * Compute the inactive target for this scan. - * We need to keep a reasonable amount of memory in the - * inactive list to better simulate LRU behavior. - */ - cnt.v_inactive_target = - (cnt.v_active_count + cnt.v_inactive_count) / 3; - if (cnt.v_inactive_target <= cnt.v_free_target) - cnt.v_inactive_target = cnt.v_free_target + 1; - - /* - * Only make a scan if we are likely to do something. - * Otherwise we might have been awakened by a pager - * to clean up async pageouts. - */ - if (cnt.v_free_count < cnt.v_free_target || - cnt.v_inactive_count < cnt.v_inactive_target) { - pool_drain(0); - vm_pageout_scan(); - } - vm_pager_sync(); - simple_lock(&vm_pages_needed_lock); - thread_wakeup(&cnt.v_free_count); - } -} diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c deleted file mode 100644 index 34e0fc31c2b..00000000000 --- a/sys/vm/vm_pager.c +++ /dev/null @@ -1,426 +0,0 @@ -/* $OpenBSD: vm_pager.c,v 1.10 2001/06/08 08:09:44 art Exp $ */ -/* $NetBSD: vm_pager.c,v 1.21 1996/03/16 23:15:25 christos Exp $ */ - -/* - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * The Mach Operating System project at Carnegie-Mellon University. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)vm_pager.c 8.7 (Berkeley) 7/7/94 - * - * - * Copyright (c) 1987, 1990 Carnegie-Mellon University. - * All rights reserved. - * - * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * Paging space routine stubs. Emulates a matchmaker-like interface - * for builtin pagers. - */ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/malloc.h> -#include <sys/proc.h> - -#include <vm/vm.h> -#include <vm/vm_page.h> -#include <vm/vm_kern.h> - -#ifdef SWAPPAGER -extern struct pagerops swappagerops; -#endif - -extern struct pagerops vnodepagerops; - -#ifdef DEVPAGER -extern struct pagerops devicepagerops; -#endif - -struct pagerops *pagertab[] = { -#ifdef SWAPPAGER - &swappagerops, /* PG_SWAP */ -#else - NULL, -#endif - &vnodepagerops, /* PG_VNODE */ -#ifdef DEVPAGER - &devicepagerops, /* PG_DEV */ -#else - NULL, -#endif -}; -int npagers = sizeof (pagertab) / sizeof (pagertab[0]); - -struct pagerops *dfltpagerops = NULL; /* default pager */ - -/* - * Kernel address space for mapping pages. - * Used by pagers where KVAs are needed for IO. - * - * XXX needs to be large enough to support the number of pending async - * cleaning requests (NPENDINGIO == 64) * the maximum swap cluster size - * (MAXPHYS == 64k) if you want to get the most efficiency. - */ -#define PAGER_MAP_SIZE (4 * 1024 * 1024) - -vm_map_t pager_map; -boolean_t pager_map_wanted; -vm_offset_t pager_sva, pager_eva; - -void -vm_pager_init() -{ - struct pagerops **pgops; - - /* - * Allocate a kernel submap for tracking get/put page mappings - */ - pager_map = kmem_suballoc(kernel_map, &pager_sva, &pager_eva, - PAGER_MAP_SIZE, FALSE); - /* - * Initialize known pagers - */ - for (pgops = pagertab; pgops < &pagertab[npagers]; pgops++) - if (*pgops) - (*(*pgops)->pgo_init)(); - if (dfltpagerops == NULL) - panic("no default pager"); -} - -/* - * Allocate an instance of a pager of the given type. - * Size, protection and offset parameters are passed in for pagers that - * need to perform page-level validation (e.g. the device pager). - */ -vm_pager_t -vm_pager_allocate(type, handle, size, prot, off) - int type; - caddr_t handle; - vm_size_t size; - vm_prot_t prot; - vm_offset_t off; -{ - struct pagerops *ops; - - ops = (type == PG_DFLT) ? dfltpagerops : pagertab[type]; - if (ops) - return ((*ops->pgo_alloc)(handle, size, prot, off)); - return (NULL); -} - -void -vm_pager_deallocate(pager) - vm_pager_t pager; -{ - if (pager == NULL) - panic("vm_pager_deallocate: null pager"); - (*pager->pg_ops->pgo_dealloc)(pager); -} - -int -vm_pager_remove(pager, from, to) - vm_pager_t pager; - vm_offset_t from, to; -{ - if (pager == NULL) - panic("vm_pager_remove: null pager"); - return (*pager->pg_ops->pgo_remove)(pager, from, to); -} - -vm_offset_t -vm_pager_next(pager, offset) - vm_pager_t pager; - vm_offset_t offset; -{ - if (pager == NULL) - panic("vm_pager_next: null pager"); - return (*pager->pg_ops->pgo_next)(pager, offset); -} - -int -vm_pager_count(pager) - vm_pager_t pager; -{ - if (pager == NULL) - panic("vm_pager_count: null pager"); - return (*pager->pg_ops->pgo_count)(pager); -} - -int -vm_pager_get_pages(pager, mlist, npages, sync) - vm_pager_t pager; - vm_page_t *mlist; - int npages; - boolean_t sync; -{ - int rv; - - if (pager == NULL) { - rv = VM_PAGER_OK; - while (npages--) - if (!vm_page_zero_fill(*mlist)) { - rv = VM_PAGER_FAIL; - break; - } else - mlist++; - return (rv); - } - return ((*pager->pg_ops->pgo_getpages)(pager, mlist, npages, sync)); -} - -int -vm_pager_put_pages(pager, mlist, npages, sync) - vm_pager_t pager; - vm_page_t *mlist; - int npages; - boolean_t sync; -{ - if (pager == NULL) - panic("vm_pager_put_pages: null pager"); - return ((*pager->pg_ops->pgo_putpages)(pager, mlist, npages, sync)); -} - -/* XXX compatibility*/ -int -vm_pager_get(pager, m, sync) - vm_pager_t pager; - vm_page_t m; - boolean_t sync; -{ - return vm_pager_get_pages(pager, &m, 1, sync); -} - -/* XXX compatibility*/ -int -vm_pager_put(pager, m, sync) - vm_pager_t pager; - vm_page_t m; - boolean_t sync; -{ - return vm_pager_put_pages(pager, &m, 1, sync); -} - -boolean_t -vm_pager_has_page(pager, offset) - vm_pager_t pager; - vm_offset_t offset; -{ - if (pager == NULL) - panic("vm_pager_has_page: null pager"); - return ((*pager->pg_ops->pgo_haspage)(pager, offset)); -} - -/* - * Called by pageout daemon before going back to sleep. - * Gives pagers a chance to clean up any completed async pageing operations. - */ -void -vm_pager_sync() -{ - struct pagerops **pgops; - - for (pgops = pagertab; pgops < &pagertab[npagers]; pgops++) - if (*pgops) - (*(*pgops)->pgo_putpages)(NULL, NULL, 0, FALSE); -} - -void -vm_pager_cluster(pager, offset, loff, hoff) - vm_pager_t pager; - vm_offset_t offset; - vm_offset_t *loff; - vm_offset_t *hoff; -{ - if (pager == NULL) - panic("vm_pager_cluster: null pager"); - ((*pager->pg_ops->pgo_cluster)(pager, offset, loff, hoff)); -} - -void -vm_pager_clusternull(pager, offset, loff, hoff) - vm_pager_t pager; - vm_offset_t offset; - vm_offset_t *loff; - vm_offset_t *hoff; -{ - panic("vm_pager_nullcluster called"); -} - -vm_offset_t -vm_pager_map_pages(mlist, npages, canwait) - vm_page_t *mlist; - int npages; - boolean_t canwait; -{ - vm_offset_t kva, va; - vm_size_t size; - vm_page_t m; - - /* - * Allocate space in the pager map, if none available return 0. - * This is basically an expansion of kmem_alloc_wait with optional - * blocking on no space. - */ - size = npages * PAGE_SIZE; - vm_map_lock(pager_map); - while (vm_map_findspace(pager_map, 0, size, &kva)) { - if (!canwait) { - vm_map_unlock(pager_map); - return (0); - } - pager_map_wanted = TRUE; - vm_map_unlock(pager_map); - (void) tsleep(pager_map, PVM, "pager_map", 0); - vm_map_lock(pager_map); - } - vm_map_insert(pager_map, NULL, 0, kva, kva + size); - vm_map_unlock(pager_map); - - for (va = kva; npages--; va += PAGE_SIZE) { - m = *mlist++; -#ifdef DEBUG - if ((m->flags & PG_BUSY) == 0) - panic("vm_pager_map_pages: page not busy"); - if (m->flags & PG_PAGEROWNED) - panic("vm_pager_map_pages: page already in pager"); -#endif -#ifdef DEBUG - m->flags |= PG_PAGEROWNED; -#endif - pmap_enter(vm_map_pmap(pager_map), va, VM_PAGE_TO_PHYS(m), - VM_PROT_DEFAULT, TRUE, 0); - } - return (kva); -} - -void -vm_pager_unmap_pages(kva, npages) - vm_offset_t kva; - int npages; -{ - vm_size_t size = npages * PAGE_SIZE; - -#ifdef DEBUG - vm_offset_t va; - vm_page_t m; - int np = npages; - - for (va = kva; np--; va += PAGE_SIZE) { - m = vm_pager_atop(va); - if (m->flags & PG_PAGEROWNED) - m->flags &= ~PG_PAGEROWNED; - else - printf("vm_pager_unmap_pages: %p(%lx/%lx) not owned\n", - m, va, VM_PAGE_TO_PHYS(m)); - } -#endif - pmap_remove(vm_map_pmap(pager_map), kva, kva + size); - vm_map_lock(pager_map); - (void) vm_map_delete(pager_map, kva, kva + size); - if (pager_map_wanted) - wakeup(pager_map); - vm_map_unlock(pager_map); -} - -vm_page_t -vm_pager_atop(kva) - vm_offset_t kva; -{ - vm_offset_t pa; - - if (pmap_extract(vm_map_pmap(pager_map), kva, &pa) == FALSE) - panic("vm_pager_atop"); - return (PHYS_TO_VM_PAGE(pa)); -} - -vm_pager_t -vm_pager_lookup(pglist, handle) - register struct pagerlst *pglist; - caddr_t handle; -{ - register vm_pager_t pager; - - for (pager = pglist->tqh_first; pager; pager = pager->pg_list.tqe_next) - if (pager->pg_handle == handle) - return (pager); - return (NULL); -} - -/* - * This routine gains a reference to the object. - * Explicit deallocation is necessary. - */ -int -pager_cache(object, should_cache) - vm_object_t object; - boolean_t should_cache; -{ - if (object == NULL) - return (KERN_INVALID_ARGUMENT); - - vm_object_cache_lock(); - vm_object_lock(object); - if (should_cache) - object->flags |= OBJ_CANPERSIST; - else - object->flags &= ~OBJ_CANPERSIST; - vm_object_unlock(object); - vm_object_cache_unlock(); - - vm_object_deallocate(object); - - return (KERN_SUCCESS); -} diff --git a/sys/vm/vm_swap.c b/sys/vm/vm_swap.c deleted file mode 100644 index 56b9b0525d7..00000000000 --- a/sys/vm/vm_swap.c +++ /dev/null @@ -1,1248 +0,0 @@ -/* $OpenBSD: vm_swap.c,v 1.16 2001/05/05 20:57:04 art Exp $ */ -/* $NetBSD: vm_swap.c,v 1.64 1998/11/08 19:45:17 mycroft Exp $ */ - -/* - * Copyright (c) 1995, 1996, 1997 Matthew R. Green, Tobias Weingartner - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/buf.h> -#include <sys/proc.h> -#include <sys/namei.h> -#include <sys/disklabel.h> -#include <sys/dmap.h> -#include <sys/errno.h> -#include <sys/kernel.h> -#include <sys/malloc.h> -#include <sys/lock.h> -#include <sys/vnode.h> -#include <sys/map.h> -#include <sys/file.h> -#include <sys/stat.h> -#include <sys/extent.h> -#include <sys/swap.h> -#include <sys/mount.h> -#include <sys/syscallargs.h> - -#include <machine/vmparam.h> - -#include <vm/vm_conf.h> - -#include <miscfs/specfs/specdev.h> - -/* - * The idea here is to provide a single interface for multiple swap devices, - * of any kind and priority in a simple and fast way. - * - * Each swap device has these properties: - * * swap in use. - * * swap enabled. - * * map information in `/dev/drum'. - * * vnode pointer. - * Files have these additional properties: - * * block size. - * * maximum byte count in buffer. - * * buffer. - * * credentials. - * - * The arguments to swapctl(2) are: - * int cmd; - * void *arg; - * int misc; - * The cmd can be one of: - * SWAP_NSWAP - swapctl(2) returns the number of swap devices currently in - * use. - * SWAP_STATS - swapctl(2) takes a struct ent * in (void *arg) and writes - * misc or fewer (to zero) entries of configured swap devices, - * and returns the number of entries written or -1 on error. - * SWAP_ON - swapctl(2) takes a (char *) in arg to be the pathname of a - * device or file to begin swapping on, with it's priority in - * misc, returning 0 on success and -1 on error. - * SWAP_OFF - swapctl(2) takes a (char *) n arg to be the pathname of a - * device or file to stop swapping on. returning 0 or -1. - * XXX unwritten. - * SWAP_CTL - swapctl(2) changes the priority of a swap device, using the - * misc value. - */ - -#ifdef SWAPDEBUG -#define STATIC -#define VMSDB_SWON 0x0001 -#define VMSDB_SWOFF 0x0002 -#define VMSDB_SWINIT 0x0004 -#define VMSDB_SWALLOC 0x0008 -#define VMSDB_SWFLOW 0x0010 -#define VMSDB_INFO 0x0020 -int vmswapdebug = 0; -int vmswap_domount = 1; - -#define DPRINTF(f, m) do { \ - if (vmswapdebug & (f)) \ - printf m; \ -} while(0) -#else -#define STATIC static -#define DPRINTF(f, m) -#endif - -#define SWAP_TO_FILES - -struct swapdev { - struct swapent swd_se; -#define swd_dev swd_se.se_dev -#define swd_flags swd_se.se_flags -#define swd_nblks swd_se.se_nblks -#define swd_inuse swd_se.se_inuse -#define swd_priority swd_se.se_priority -#define swd_path swd_se.se_path - daddr_t swd_mapoffset; - int swd_mapsize; - struct extent *swd_ex; - struct vnode *swd_vp; - CIRCLEQ_ENTRY(swapdev) swd_next; - -#ifdef SWAP_TO_FILES - int swd_bsize; - int swd_maxactive; - struct buf swd_tab; - struct ucred *swd_cred; -#endif -}; - -/* - * Swap device priority entry; the list is kept sorted on `spi_priority'. - */ -struct swappri { - int spi_priority; - CIRCLEQ_HEAD(spi_swapdev, swapdev) spi_swapdev; - LIST_ENTRY(swappri) spi_swappri; -}; - - - - -/* - * The following two structures are used to keep track of data transfers - * on swap devices associated with regular files. - * NOTE: this code is more or less a copy of vnd.c; we use the same - * structure names here to ease porting.. - */ - - -struct vndxfer { - struct buf *vx_bp; /* Pointer to parent buffer */ - struct swapdev *vx_sdp; - int vx_error; - int vx_pending; /* # of pending aux buffers */ - int vx_flags; -#define VX_BUSY 1 -#define VX_DEAD 2 -}; - - -struct vndbuf { - struct buf vb_buf; - struct vndxfer *vb_xfer; -}; - -/* To get from a buffer to the encapsulating vndbuf */ -#define BUF_TO_VNDBUF(bp) \ - ((struct vndbuf *)((long)bp - ((long)&((struct vndbuf *)0)->vb_buf))) - -/* vnd macro stuff, rewritten to use malloc()/free() */ -#define getvndxfer() \ - (struct vndxfer *)malloc(sizeof(struct vndxfer), M_VMSWAP, M_WAITOK); - -#define putvndxfer(vnx) \ - free(vnx, M_VMSWAP) - -#define getvndbuf() \ - (struct vndbuf *)malloc(sizeof(struct vndbuf), M_VMSWAP, M_WAITOK); - -#define putvndbuf(vbp) \ - free(vbp, M_VMSWAP) - - -int nswapdev; -int swflags; -struct extent *swapmap; -LIST_HEAD(swap_priority, swappri) swap_priority; - -STATIC int swap_on __P((struct proc *, struct swapdev *)); -#ifdef SWAP_OFF_WORKS -STATIC int swap_off __P((struct proc *, struct swapdev *)); -#endif -STATIC struct swapdev *swap_getsdpfromaddr __P((daddr_t)); -STATIC void swap_addmap __P((struct swapdev *, int)); - -#ifdef SWAP_TO_FILES -STATIC void sw_reg_strategy __P((struct swapdev *, struct buf *, int)); -STATIC void sw_reg_iodone __P((struct buf *)); -STATIC void sw_reg_start __P((struct swapdev *)); -#endif - -STATIC void insert_swapdev __P((struct swapdev *, int)); -STATIC struct swapdev *find_swapdev __P((struct vnode *, int)); -STATIC void swaplist_trim __P((void)); - -STATIC void swapmount __P((void)); - -/* - * We use two locks to protect the swap device lists. - * The long-term lock is used only used to prevent races in - * concurrently executing swapctl(2) system calls. - */ -struct simplelock swaplist_lock; -struct lock swaplist_change_lock; - -/* - * Insert a swap device on the priority list. - */ -void -insert_swapdev(sdp, priority) - struct swapdev *sdp; - int priority; -{ - struct swappri *spp, *pspp; - -again: - simple_lock(&swaplist_lock); - - /* - * Find entry at or after which to insert the new device. - */ - for (pspp = NULL, spp = swap_priority.lh_first; spp != NULL; - spp = spp->spi_swappri.le_next) { - if (priority <= spp->spi_priority) - break; - pspp = spp; - } - - if (spp == NULL || spp->spi_priority != priority) { - spp = (struct swappri *) - malloc(sizeof *spp, M_VMSWAP, M_NOWAIT); - - if (spp == NULL) { - simple_unlock(&swaplist_lock); - tsleep((caddr_t)&lbolt, PSWP, "memory", 0); - goto again; - } - DPRINTF(VMSDB_SWFLOW, - ("sw: had to create a new swappri = %d\n", priority)); - - spp->spi_priority = priority; - CIRCLEQ_INIT(&spp->spi_swapdev); - - if (pspp) - LIST_INSERT_AFTER(pspp, spp, spi_swappri); - else - LIST_INSERT_HEAD(&swap_priority, spp, spi_swappri); - - } - /* Onto priority list */ - CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next); - sdp->swd_priority = priority; - simple_unlock(&swaplist_lock); -} - -/* - * Find and optionally remove a swap device from the priority list. - */ -struct swapdev * -find_swapdev(vp, remove) - struct vnode *vp; - int remove; -{ - struct swapdev *sdp; - struct swappri *spp; - - simple_lock(&swaplist_lock); - for (spp = swap_priority.lh_first; spp != NULL; - spp = spp->spi_swappri.le_next) { - for (sdp = spp->spi_swapdev.cqh_first; - sdp != (void *)&spp->spi_swapdev; - sdp = sdp->swd_next.cqe_next) - if (sdp->swd_vp == vp) { - if (remove) - CIRCLEQ_REMOVE(&spp->spi_swapdev, sdp, - swd_next); - simple_unlock(&swaplist_lock); - return (sdp); - } - } - simple_unlock(&swaplist_lock); - return (NULL); -} - -/* - * Scan priority list for empty priority entries. - */ -void -swaplist_trim() -{ - struct swappri *spp; - - simple_lock(&swaplist_lock); -restart: - for (spp = swap_priority.lh_first; spp != NULL; - spp = spp->spi_swappri.le_next) { - if (spp->spi_swapdev.cqh_first != (void *)&spp->spi_swapdev) - continue; - LIST_REMOVE(spp, spi_swappri); - free((caddr_t)spp, M_VMSWAP); - goto restart; - } - simple_unlock(&swaplist_lock); -} - -int -sys_swapctl(p, v, retval) - struct proc *p; - void *v; - register_t *retval; -{ - struct sys_swapctl_args /* { - syscallarg(int) cmd; - syscallarg(const void *) arg; - syscallarg(int) misc; - } */ *uap = (struct sys_swapctl_args *)v; - struct vnode *vp; - struct nameidata nd; - struct swappri *spp; - struct swapdev *sdp; - struct swapent *sep; - char userpath[MAXPATHLEN]; - int count, error, misc; - size_t len; - int priority; - - misc = SCARG(uap, misc); - - DPRINTF(VMSDB_SWFLOW, ("entering sys_swapctl\n")); - - /* how many swap devices */ - if (SCARG(uap, cmd) == SWAP_NSWAP) { - DPRINTF(VMSDB_SWFLOW,("did SWAP_NSWAP: leaving sys_swapctl\n")); - *retval = nswapdev; - return (0); - } - - /* stats on the swap devices. */ - if (SCARG(uap, cmd) == SWAP_STATS) { - sep = (struct swapent *)SCARG(uap, arg); - count = 0; - - error = lockmgr(&swaplist_change_lock, LK_SHARED, (void *)0, p); - if (error) - return (error); - for (spp = swap_priority.lh_first; spp != NULL; - spp = spp->spi_swappri.le_next) { - for (sdp = spp->spi_swapdev.cqh_first; - sdp != (void *)&spp->spi_swapdev && misc-- > 0; - sdp = sdp->swd_next.cqe_next, sep++, count++) { - /* - * We do not do NetBSD 1.3 compat call. - */ - error = copyout((caddr_t)&sdp->swd_se, - (caddr_t)sep, sizeof(struct swapent)); - - if (error) - goto out; - } - } -out: - (void)lockmgr(&swaplist_change_lock, LK_RELEASE, (void *)0, p); - if (error) - return (error); - - DPRINTF(VMSDB_SWFLOW,("did SWAP_STATS: leaving sys_swapctl\n")); - - *retval = count; - return (0); - } - if ((error = suser(p->p_ucred, &p->p_acflag))) - return (error); - - if (SCARG(uap, arg) == NULL) { - /* XXX - interface - arg==NULL: miniroot */ - vp = rootvp; - if (vget(vp, LK_EXCLUSIVE, p)) - return (EBUSY); - if (SCARG(uap, cmd) == SWAP_ON && - copystr("miniroot", userpath, sizeof userpath, &len)) - panic("swapctl: miniroot copy failed"); - } else { - int space; - char *where; - - if (SCARG(uap, cmd) == SWAP_ON) { - if ((error = copyinstr(SCARG(uap, arg), userpath, - sizeof userpath, &len))) - return (error); - space = UIO_SYSSPACE; - where = userpath; - } else { - space = UIO_USERSPACE; - where = (char *)SCARG(uap, arg); - } - NDINIT(&nd, LOOKUP, FOLLOW|LOCKLEAF, space, where, p); - if ((error = namei(&nd))) - return (error); - - vp = nd.ni_vp; - } - - error = lockmgr(&swaplist_change_lock, LK_EXCLUSIVE, (void *)0, p); - if (error) - goto bad2; - - switch(SCARG(uap, cmd)) { - case SWAP_CTL: - priority = SCARG(uap, misc); - if ((sdp = find_swapdev(vp, 1)) == NULL) { - error = ENOENT; - break; - } - insert_swapdev(sdp, priority); - swaplist_trim(); - break; - - case SWAP_ON: - priority = SCARG(uap, misc); - - /* Check for duplicates */ - if ((sdp = find_swapdev(vp, 0)) != NULL) { - if (!bcmp(sdp->swd_path, "swap_device", 12)) { - copystr(userpath, sdp->swd_path, len, 0); - error = 0; - } else - error = EBUSY; - goto bad; - } - - sdp = (struct swapdev *) - malloc(sizeof *sdp, M_VMSWAP, M_WAITOK); - bzero(sdp, sizeof(*sdp)); - - sdp->swd_vp = vp; - sdp->swd_dev = (vp->v_type == VBLK) ? vp->v_rdev : NODEV; - - if ((error = swap_on(p, sdp)) != 0) { - free((caddr_t)sdp, M_VMSWAP); - break; - } -#ifdef SWAP_TO_FILES - /* - * XXX Is NFS elaboration necessary? - */ - if (vp->v_type == VREG) - sdp->swd_cred = crdup(p->p_ucred); -#endif - if (copystr(userpath, sdp->swd_path, len, 0) != 0) - panic("swapctl: copystr"); - insert_swapdev(sdp, priority); - - /* Keep reference to vnode */ - vref(vp); - break; - - case SWAP_OFF: - DPRINTF(VMSDB_SWFLOW, ("doing SWAP_OFF...\n")); -#ifdef SWAP_OFF_WORKS - if ((sdp = find_swapdev(vp, 0)) == NULL) { - error = ENXIO; - break; - } - /* - * If a device isn't in use or enabled, we - * can't stop swapping from it (again). - */ - if ((sdp->swd_flags & - (SWF_INUSE|SWF_ENABLE)) == 0) { - error = EBUSY; - goto bad; - } - if ((error = swap_off(p, sdp)) != 0) - goto bad; - - /* Find again and remove this time */ - if ((sdp = find_swapdev(vp, 1)) == NULL) { - error = ENXIO; - break; - } - free((caddr_t)sdp, M_VMSWAP); -#else - error = ENODEV; -#endif - break; - - default: - DPRINTF(VMSDB_SWFLOW, - ("unhandled command: %x\n", SCARG(uap, cmd))); - error = EINVAL; - } - -bad: - (void)lockmgr(&swaplist_change_lock, LK_RELEASE, (void *)0, p); -bad2: - vput(vp); - - DPRINTF(VMSDB_SWFLOW, ("leaving sys_swapctl: error %d\n", error)); - return (error); -} - -/* - * swap_on() attempts to begin swapping on a swapdev. we check that this - * device is OK to swap from, miss the start of any disk (to avoid any - * disk labels that may exist). - */ -STATIC int -swap_on(p, sdp) - struct proc *p; - struct swapdev *sdp; -{ - static int count = 0; - struct vnode *vp = sdp->swd_vp; - int error, nblks, size; - long addr; - char *storage; - int storagesize; -#ifdef SWAP_TO_FILES - struct vattr va; -#endif -#ifdef NFSCLIENT - extern int (**nfsv2_vnodeop_p) __P((void *)); -#endif /* NFSCLIENT */ - dev_t dev = sdp->swd_dev; - char *name; - - - /* If root on swap, then the skip open/close operations. */ - if (vp != rootvp) { - if ((error = VOP_OPEN(vp, FREAD|FWRITE, p->p_ucred, p))) - return (error); - vp->v_writecount++; - } - - DPRINTF(VMSDB_INFO, - ("swap_on: dev = %d, major(dev) = %d\n", dev, major(dev))); - - switch (vp->v_type) { - case VBLK: - if (bdevsw[major(dev)].d_psize == 0 || - (nblks = (*bdevsw[major(dev)].d_psize)(dev)) == -1) { - error = ENXIO; - goto bad; - } - break; - -#ifdef SWAP_TO_FILES - case VREG: - if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p))) - goto bad; - nblks = (int)btodb(va.va_size); - if ((error = - VFS_STATFS(vp->v_mount, &vp->v_mount->mnt_stat, p)) != 0) - goto bad; - - sdp->swd_bsize = vp->v_mount->mnt_stat.f_iosize; -#ifdef NFSCLIENT - if (vp->v_op == nfsv2_vnodeop_p) - sdp->swd_maxactive = 2; /* XXX */ - else -#endif /* NFSCLIENT */ - sdp->swd_maxactive = 8; /* XXX */ - break; -#endif - - default: - error = ENXIO; - goto bad; - } - if (nblks == 0) { - DPRINTF(VMSDB_SWFLOW, ("swap_on: nblks == 0\n")); - error = EINVAL; - goto bad; - } - - sdp->swd_flags |= SWF_INUSE; - sdp->swd_nblks = nblks; - - /* - * skip over first cluster of a device in case of labels or - * boot blocks. - */ - if (vp->v_type == VBLK) { - size = (int)(nblks - ctod(1)); - addr = (long)ctod(1); - } else { - size = (int)nblks; - addr = (long)0; - } - - DPRINTF(VMSDB_SWON, - ("swap_on: dev %x: size %d, addr %ld\n", dev, size, addr)); - - name = malloc(12, M_VMSWAP, M_WAITOK); - sprintf(name, "swap0x%04x", count++); - /* XXX make this based on ram as well. */ - storagesize = EXTENT_FIXED_STORAGE_SIZE(maxproc * 2); - storage = malloc(storagesize, M_VMSWAP, M_WAITOK); - sdp->swd_ex = extent_create(name, 0, nblks, M_VMSWAP, - storage, storagesize, EX_WAITOK); - if (addr) { - if (extent_alloc_region(sdp->swd_ex, 0, addr, EX_WAITOK)) - panic("disklabel region"); - sdp->swd_inuse += addr; - } - - - if (vp == rootvp) { - struct mount *mp; - struct statfs *sp; - int rootblks; - - /* Get size from root FS (mountroot did statfs) */ - mp = rootvnode->v_mount; - sp = &mp->mnt_stat; - rootblks = sp->f_blocks * (sp->f_bsize / DEV_BSIZE); - if (rootblks > nblks) - panic("miniroot size"); - - if (extent_alloc_region(sdp->swd_ex, addr, rootblks, EX_WAITOK)) - panic("miniroot region"); - - printf("Preserved %d blocks, leaving %d pages of swap\n", - rootblks, dtoc(size - rootblks)); - } - - swap_addmap(sdp, size); - nswapdev++; - sdp->swd_flags |= SWF_ENABLE; - return (0); - -bad: - if (vp != rootvp) { - vp->v_writecount--; - (void)VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p); - } - return (error); -} - -#ifdef SWAP_OFF_WORKS -STATIC int -swap_off(p, sdp) - struct proc *p; - struct swapdev *sdp; -{ - char *name; - - /* turn off the enable flag */ - sdp->swd_flags &= ~SWF_ENABLE; - - DPRINTF(VMSDB_SWOFF, ("swap_off: %x\n", sdp->swd_dev)); - - /* - * XXX write me - * - * the idea is to find out which processes are using this swap - * device, and page them all in. - * - * eventually, we should try to move them out to other swap areas - * if available. - * - * The alternative is to create a redirection map for this swap - * device. This should work by moving all the pages of data from - * the ex-swap device to another one, and making an entry in the - * redirection map for it. locking is going to be important for - * this! - * - * There might be an easier way to do a "soft" swapoff. First - * we mark the particular swap partition as not desirable anymore. - * Then we use the pager to page a couple of pages in, each time - * it has the memory, and the chance to do so. Thereby moving pages - * back into memory. Once they are in memory, when they get paged - * out again, they do not go back onto the "undesirable" device - * anymore, but to good devices. This might take longer, but it - * can certainly work. If need be, the user process can sleep on - * the particular sdp entry, and the swapper can then wake him up - * when everything is done. - */ - - /* until the above code is written, we must ENODEV */ - return ENODEV; - - extent_free(swapmap, sdp->swd_mapoffset, sdp->swd_mapsize, EX_WAITOK); - nswapdev--; - name = sdp->swd_ex->ex_name; - extent_destroy(sdp->swd_ex); - free(name, M_VMSWAP); - free((caddr_t)sdp->swd_ex, M_VMSWAP); - if (sdp->swp_vp != rootvp) { - vp->v_writecount--; - (void) VOP_CLOSE(sdp->swd_vp, FREAD|FWRITE, p->p_ucred, p); - } - if (sdp->swd_vp) - vrele(sdp->swd_vp); - free((caddr_t)sdp, M_VMSWAP); - return (0); -} -#endif - -/* - * To decide where to allocate what part of swap, we must "round robin" - * the swap devices in swap_priority of the same priority until they are - * full. we do this with a list of swap priorities that have circle - * queues of swapdevs. - * - * The following functions control allocation and freeing of part of the - * swap area. you call swap_alloc() with a size and it returns an address. - * later you call swap_free() and it frees the use of that swap area. - * - * daddr_t swap_alloc(int size); - * void swap_free(int size, daddr_t addr); - */ - -daddr_t -swap_alloc(size) - int size; -{ - struct swapdev *sdp; - struct swappri *spp; - u_long result; - - if (nswapdev < 1) - return 0; - - simple_lock(&swaplist_lock); - for (spp = swap_priority.lh_first; spp != NULL; - spp = spp->spi_swappri.le_next) { - for (sdp = spp->spi_swapdev.cqh_first; - sdp != (void *)&spp->spi_swapdev; - sdp = sdp->swd_next.cqe_next) { - /* if it's not enabled, then we can't swap from it */ - if ((sdp->swd_flags & SWF_ENABLE) == 0 || - /* XXX IS THIS CORRECT ? */ -#if 1 - (sdp->swd_inuse + size > sdp->swd_nblks) || -#endif - extent_alloc(sdp->swd_ex, size, EX_NOALIGN, - EX_NOBOUNDARY, EX_MALLOCOK|EX_NOWAIT, - &result) != 0) { - continue; - } - CIRCLEQ_REMOVE(&spp->spi_swapdev, sdp, swd_next); - CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next); - sdp->swd_inuse += size; - simple_unlock(&swaplist_lock); - return (daddr_t)(result + sdp->swd_mapoffset); - } - } - simple_unlock(&swaplist_lock); - return 0; -} - -void -swap_free(size, addr) - int size; - daddr_t addr; -{ - struct swapdev *sdp = swap_getsdpfromaddr(addr); - -#ifdef DIAGNOSTIC - if (sdp == NULL) - panic("swap_free: unmapped address\n"); - if (nswapdev < 1) - panic("swap_free: nswapdev < 1\n"); -#endif - extent_free(sdp->swd_ex, addr - sdp->swd_mapoffset, size, - EX_MALLOCOK|EX_NOWAIT); - sdp->swd_inuse -= size; -#ifdef DIAGNOSTIC - if (sdp->swd_inuse < 0) - panic("swap_free: inuse < 0"); -#endif -} - -/* - * We have a physical -> virtual mapping to address here. There are several - * different physical address spaces (one for each swap partition) that are - * to be mapped onto a single virtual address space. - */ -#define ADDR_IN_MAP(addr, sdp) \ - (((addr) >= (sdp)->swd_mapoffset) && \ - ((addr) < ((sdp)->swd_mapoffset + (sdp)->swd_mapsize))) - -struct swapdev * -swap_getsdpfromaddr(addr) - daddr_t addr; -{ - struct swapdev *sdp; - struct swappri *spp; - - simple_lock(&swaplist_lock); - for (spp = swap_priority.lh_first; spp != NULL; - spp = spp->spi_swappri.le_next) - for (sdp = spp->spi_swapdev.cqh_first; - sdp != (void *)&spp->spi_swapdev; - sdp = sdp->swd_next.cqe_next) - if (ADDR_IN_MAP(addr, sdp)) { - simple_unlock(&swaplist_lock); - return sdp; - } - simple_unlock(&swaplist_lock); - return NULL; -} - -void -swap_addmap(sdp, size) - struct swapdev *sdp; - int size; -{ - u_long result; - - if (extent_alloc(swapmap, size, EX_NOALIGN, EX_NOBOUNDARY, - EX_WAITOK, &result)) - panic("swap_addmap"); - - sdp->swd_mapoffset = result; - sdp->swd_mapsize = size; -} - -/*ARGSUSED*/ -int -swread(dev, uio, ioflag) - dev_t dev; - struct uio *uio; - int ioflag; -{ - - return (physio(swstrategy, NULL, dev, B_READ, minphys, uio)); -} - -/*ARGSUSED*/ -int -swwrite(dev, uio, ioflag) - dev_t dev; - struct uio *uio; - int ioflag; -{ - - return (physio(swstrategy, NULL, dev, B_WRITE, minphys, uio)); -} - -void -swstrategy(bp) - struct buf *bp; -{ - struct swapdev *sdp; - daddr_t bn; - int s; - - bn = bp->b_blkno; - sdp = swap_getsdpfromaddr(bn); - if (sdp == NULL) { - bp->b_error = EINVAL; - bp->b_flags |= B_ERROR; - biodone(bp); - return; - } - - bn -= sdp->swd_mapoffset; - - DPRINTF(VMSDB_SWFLOW, - ("swstrategy(%s): mapoff %x, bn %x, bcount %ld\n", - ((bp->b_flags & B_READ) == 0) ? "write" : "read", - sdp->swd_mapoffset, bn, bp->b_bcount)); - - switch (sdp->swd_vp->v_type) { - default: - panic("swstrategy: vnode type %x", sdp->swd_vp->v_type); - case VBLK: - s = splbio(); - buf_replacevnode(bp, sdp->swd_vp); - bp->b_blkno = bn + ctod(1); - splx(s); - VOP_STRATEGY(bp); - return; -#ifdef SWAP_TO_FILES - case VREG: - sw_reg_strategy(sdp, bp, bn); - return; -#endif - } - /* NOTREACHED */ -} - -#ifdef SWAP_TO_FILES - -STATIC void -sw_reg_strategy(sdp, bp, bn) - struct swapdev *sdp; - struct buf *bp; - int bn; -{ - struct vnode *vp; - struct vndxfer *vnx; - daddr_t nbn; - caddr_t addr; - int s, off, nra, error, sz, resid; - - /* - * Translate the device logical block numbers into physical - * block numbers of the underlying filesystem device. - */ - bp->b_resid = bp->b_bcount; - addr = bp->b_data; - bn = dbtob(bn); - - /* Allocate a header for this transfer and link it to the buffer */ - vnx = getvndxfer(); - vnx->vx_flags = VX_BUSY; - vnx->vx_error = 0; - vnx->vx_pending = 0; - vnx->vx_bp = bp; - vnx->vx_sdp = sdp; - - error = 0; - for (resid = bp->b_resid; resid; resid -= sz) { - struct vndbuf *nbp; - - nra = 0; - error = VOP_BMAP(sdp->swd_vp, bn / sdp->swd_bsize, - &vp, &nbn, &nra); - - if (error == 0 && (long)nbn == -1) - error = EIO; - - /* - * If there was an error or a hole in the file...punt. - * Note that we may have to wait for any operations - * that we have already fired off before releasing - * the buffer. - * - * XXX we could deal with holes here but it would be - * a hassle (in the write case). - */ - if (error) { - s = splbio(); - vnx->vx_error = error; - goto out; - } - - if ((off = bn % sdp->swd_bsize) != 0) - sz = sdp->swd_bsize - off; - else - sz = (1 + nra) * sdp->swd_bsize; - - if (resid < sz) - sz = resid; - - DPRINTF(VMSDB_SWFLOW, - ("sw_reg_strategy: vp %p/%p bn 0x%x/0x%x" - " sz 0x%x\n", sdp->swd_vp, vp, bn, nbn, sz)); - - nbp = getvndbuf(); - nbp->vb_buf.b_flags = bp->b_flags | B_NOCACHE | B_CALL; - nbp->vb_buf.b_bcount = sz; - nbp->vb_buf.b_bufsize = bp->b_bufsize; - nbp->vb_buf.b_error = 0; - nbp->vb_buf.b_data = addr; - nbp->vb_buf.b_blkno = nbn + btodb(off); - nbp->vb_buf.b_proc = bp->b_proc; - nbp->vb_buf.b_iodone = sw_reg_iodone; - nbp->vb_buf.b_vp = NULLVP; - nbp->vb_buf.b_rcred = sdp->swd_cred; - nbp->vb_buf.b_wcred = sdp->swd_cred; - if (bp->b_dirtyend == 0) { - nbp->vb_buf.b_dirtyoff = 0; - nbp->vb_buf.b_dirtyend = sz; - } else { - nbp->vb_buf.b_dirtyoff = - max(0, bp->b_dirtyoff - (bp->b_bcount-resid)); - nbp->vb_buf.b_dirtyend = - min(sz, - max(0, bp->b_dirtyend - (bp->b_bcount-resid))); - } - if (bp->b_validend == 0) { - nbp->vb_buf.b_validoff = 0; - nbp->vb_buf.b_validend = sz; - } else { - nbp->vb_buf.b_validoff = - max(0, bp->b_validoff - (bp->b_bcount-resid)); - nbp->vb_buf.b_validend = - min(sz, - max(0, bp->b_validend - (bp->b_bcount-resid))); - } - - nbp->vb_xfer = vnx; - - /* - * Just sort by block number - */ - nbp->vb_buf.b_cylinder = nbp->vb_buf.b_blkno; - s = splbio(); - if (vnx->vx_error != 0) { - putvndbuf(nbp); - goto out; - } - vnx->vx_pending++; - bgetvp(vp, &nbp->vb_buf); - disksort(&sdp->swd_tab, &nbp->vb_buf); - sw_reg_start(sdp); - splx(s); - - bn += sz; - addr += sz; - } - - s = splbio(); - -out: /* Arrive here at splbio */ - vnx->vx_flags &= ~VX_BUSY; - if (vnx->vx_pending == 0) { - if (vnx->vx_error != 0) { - bp->b_error = vnx->vx_error; - bp->b_flags |= B_ERROR; - } - putvndxfer(vnx); - biodone(bp); - } - splx(s); -} - -/* - * Feed requests sequentially. - * We do it this way to keep from flooding NFS servers if we are connected - * to an NFS file. This places the burden on the client rather than the - * server. - */ -STATIC void -sw_reg_start(sdp) - struct swapdev *sdp; -{ - struct buf *bp; - - if ((sdp->swd_flags & SWF_BUSY) != 0) - /* Recursion control */ - return; - - sdp->swd_flags |= SWF_BUSY; - - while (sdp->swd_tab.b_active < sdp->swd_maxactive) { - bp = sdp->swd_tab.b_actf; - if (bp == NULL) - break; - sdp->swd_tab.b_actf = bp->b_actf; - sdp->swd_tab.b_active++; - - DPRINTF(VMSDB_SWFLOW, - ("sw_reg_start: bp %p vp %p blkno %x addr %p cnt %lx\n", - bp, bp->b_vp, bp->b_blkno,bp->b_data, bp->b_bcount)); - - if ((bp->b_flags & B_READ) == 0) - bp->b_vp->v_numoutput++; - VOP_STRATEGY(bp); - } - sdp->swd_flags &= ~SWF_BUSY; -} - -STATIC void -sw_reg_iodone(bp) - struct buf *bp; -{ - register struct vndbuf *vbp = BUF_TO_VNDBUF(bp); - register struct vndxfer *vnx = (struct vndxfer *)vbp->vb_xfer; - register struct buf *pbp = vnx->vx_bp; - struct swapdev *sdp = vnx->vx_sdp; - int s, resid; - - DPRINTF(VMSDB_SWFLOW, - ("sw_reg_iodone: vbp %p vp %p blkno %x addr %p " - "cnt %lx(%lx)\n", - vbp, vbp->vb_buf.b_vp, vbp->vb_buf.b_blkno, - vbp->vb_buf.b_data, vbp->vb_buf.b_bcount, - vbp->vb_buf.b_resid)); - - s = splbio(); - resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid; - pbp->b_resid -= resid; - vnx->vx_pending--; - - if (vbp->vb_buf.b_error) { - DPRINTF(VMSDB_INFO, ("sw_reg_iodone: vbp %p error %d\n", vbp, - vbp->vb_buf.b_error)); - - vnx->vx_error = vbp->vb_buf.b_error; - } - - if (vbp->vb_buf.b_vp != NULLVP) - brelvp(&vbp->vb_buf); - - putvndbuf(vbp); - - /* - * Wrap up this transaction if it has run to completion or, in - * case of an error, when all auxiliary buffers have returned. - */ - if (vnx->vx_error != 0) { - pbp->b_flags |= B_ERROR; - pbp->b_error = vnx->vx_error; - if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) { - - DPRINTF(VMSDB_SWFLOW, - ("swiodone: pbp %p iodone: error %d\n", - pbp, vnx->vx_error)); - putvndxfer(vnx); - biodone(pbp); - } - } else if (pbp->b_resid == 0) { - -#ifdef DIAGNOSTIC - if (vnx->vx_pending != 0) - panic("swiodone: vnx pending: %d", vnx->vx_pending); -#endif - - if ((vnx->vx_flags & VX_BUSY) == 0) { - DPRINTF(VMSDB_SWFLOW, - ("swiodone: pbp %p iodone\n", pbp)); - putvndxfer(vnx); - biodone(pbp); - } - } - - sdp->swd_tab.b_active--; - sw_reg_start(sdp); - - splx(s); -} -#endif /* SWAP_TO_FILES */ - -void -swapinit() -{ - struct buf *sp = swbuf; - struct proc *p = &proc0; /* XXX */ - int i; - - DPRINTF(VMSDB_SWINIT, ("swapinit\n")); - - nswapdev = 0; - if (bdevvp(swapdev, &swapdev_vp)) - panic("swapinit: can not setup swapdev_vp"); - - simple_lock_init(&swaplist_lock); - lockinit(&swaplist_change_lock, PSWP, "swap change", 0, 0); - LIST_INIT(&swap_priority); - - /* - * Create swap block resource map. The range [1..INT_MAX] allows - * for a grand total of 2 gigablocks of swap resource. - * (start at 1 because "block #0" will be interpreted as - * an allocation failure). - */ - swapmap = extent_create("swapmap", 1, INT_MAX, - M_VMSWAP, 0, 0, EX_WAITOK); - if (swapmap == 0) - panic("swapinit: extent_create failed"); - - /* - * Now set up swap buffer headers. - */ - bswlist.b_actf = sp; - for (i = 0; i < nswbuf - 1; i++, sp++) { - sp->b_actf = sp + 1; - sp->b_rcred = sp->b_wcred = p->p_ucred; - sp->b_vnbufs.le_next = NOLIST; - } - sp->b_rcred = sp->b_wcred = p->p_ucred; - sp->b_vnbufs.le_next = NOLIST; - sp->b_actf = NULL; - - /* Mount primary swap if available */ -#ifdef SWAPDEBUG - if(vmswap_domount) -#endif - swapmount(); - - DPRINTF(VMSDB_SWINIT, ("leaving swapinit\n")); -} - -/* - * Mount the primary swap device pointed to by 'swdevt[0]'. - */ -STATIC void -swapmount() -{ - extern int getdevvp(dev_t, struct vnode **, enum vtype); - struct swapdev *sdp; - struct vnode *vp = NULL; - struct proc *p = curproc; - dev_t swap_dev = swdevt[0].sw_dev; - - /* Make sure we have a device */ - if (swap_dev == NODEV) { - printf("swapmount: No swap device!\n"); - return; - } - - /* Malloc needed things */ - sdp = (struct swapdev *)malloc(sizeof *sdp, M_VMSWAP, M_WAITOK); - bzero(sdp, sizeof(*sdp)); - - /* Do swap_on() stuff */ - if(bdevvp(swap_dev, &vp)){ - printf("swapmount: bdevvp() failed\n"); - return; - } - -#ifdef SWAPDEBUG - vprint("swapmount", vp); -#endif - - sdp->swd_vp = vp; - sdp->swd_dev = (vp->v_type == VBLK) ? vp->v_rdev : NODEV; - if(copystr("swap_device", sdp->swd_path, sizeof sdp->swd_path, 0) != 0){ - printf("swapmount: copystr() failed\n"); - return; - } - - /* Look for a swap device */ - if (swap_on(p, sdp) != 0) { - free((caddr_t)sdp, M_VMSWAP); - return; - } - -#ifdef SWAP_TO_FILES - /* - * XXX Is NFS elaboration necessary? - */ - if (vp->v_type == VREG) - sdp->swd_cred = crdup(p->p_ucred); -#endif - insert_swapdev(sdp, 0); -} diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c deleted file mode 100644 index 4a35e4e3482..00000000000 --- a/sys/vm/vm_unix.c +++ /dev/null @@ -1,254 +0,0 @@ -/* $OpenBSD: vm_unix.c,v 1.10 2001/05/05 20:57:04 art Exp $ */ -/* $NetBSD: vm_unix.c,v 1.19 1996/02/10 00:08:14 christos Exp $ */ - -/* - * Copyright (c) 1988 University of Utah. - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * the Systems Programming Group of the University of Utah Computer - * Science Department. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: Utah $Hdr: vm_unix.c 1.1 89/11/07$ - * - * @(#)vm_unix.c 8.2 (Berkeley) 1/9/95 - */ - -/* - * Traditional sbrk/grow interface to VM - */ -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/proc.h> -#include <sys/resourcevar.h> -#include <sys/vnode.h> -#include <sys/core.h> - -#include <sys/mount.h> -#include <sys/syscallargs.h> - -#include <vm/vm.h> - -/* ARGSUSED */ -int -sys_obreak(p, v, retval) - struct proc *p; - void *v; - register_t *retval; -{ - struct sys_obreak_args /* { - syscallarg(char *) nsize; - } */ *uap = v; - register struct vmspace *vm = p->p_vmspace; - vm_offset_t new, old; - int rv; - register int diff; - - old = (vm_offset_t)vm->vm_daddr; - new = (vm_offset_t)SCARG(uap, nsize); - - /* Check for overflow, round to page */ - if(round_page(new) < new) - return(ENOMEM); - new = round_page(new); - - /* Check limit */ - if ((new > old) && ((new - old) > p->p_rlimit[RLIMIT_DATA].rlim_cur)) - return(ENOMEM); - - /* Turn the trick */ - old = round_page(old + ctob(vm->vm_dsize)); - diff = new - old; - if (diff > 0) { - rv = vm_allocate(&vm->vm_map, &old, diff, FALSE); - if (rv != KERN_SUCCESS) { - uprintf("sbrk: grow failed, return = %d\n", rv); - return(ENOMEM); - } - vm->vm_dsize += btoc(diff); - } else if (diff < 0) { - diff = -diff; - rv = vm_deallocate(&vm->vm_map, new, diff); - if (rv != KERN_SUCCESS) { - uprintf("sbrk: shrink failed, return = %d\n", rv); - return(ENOMEM); - } - vm->vm_dsize -= btoc(diff); - } - return(0); -} - -/* - * Enlarge the "stack segment" to include the specified - * stack pointer for the process. - */ -int -grow(p, sp) - struct proc *p; - vm_offset_t sp; -{ - register struct vmspace *vm = p->p_vmspace; - register int si; - - /* - * For user defined stacks (from sendsig). - */ - if (sp < (vm_offset_t)vm->vm_maxsaddr) - return (0); - /* - * For common case of already allocated (from trap). - */ - if (sp >= USRSTACK - ctob(vm->vm_ssize)) - return (1); - /* - * Really need to check vs limit and increment stack size if ok. - */ - si = btoc(USRSTACK-sp) - vm->vm_ssize; - if (vm->vm_ssize + si > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur)) - return (0); - vm->vm_ssize += si; - return (1); -} - -/* ARGSUSED */ -int -sys_ovadvise(p, v, retval) - struct proc *p; - void *v; - register_t *retval; -{ -#if 0 - struct sys_ovadvise_args /* { - syscallarg(int) anom; - } */ *uap = v; -#endif - - return (EINVAL); -} - -int -vm_coredump(p, vp, cred, chdr) - struct proc *p; - struct vnode *vp; - struct ucred *cred; - struct core *chdr; -{ - register struct vmspace *vm = p->p_vmspace; - register vm_map_t map = &vm->vm_map; - register vm_map_entry_t entry; - vm_offset_t start, end; - struct coreseg cseg; - off_t offset; - int flag, error = 0; - - if (!map->is_main_map) { -#ifdef DEBUG - uprintf( - "vm_coredump: %s map %p: pmap=%p, ref=%d, nentries=%d, version=%d\n", - (map->is_main_map ? "Task" : "Share"), - map, (map->pmap), map->ref_count, map->nentries, - map->timestamp); -#endif - return EIO; - } - - offset = chdr->c_hdrsize + chdr->c_seghdrsize + chdr->c_cpusize; - - for (entry = map->header.next; entry != &map->header; - entry = entry->next) { - - if (entry->is_a_map || entry->is_sub_map) { -#ifdef DEBUG - uprintf("vm_coredump: entry: share=%p, offset=%p\n", - entry->object.share_map, entry->offset); -#endif - continue; - } - - if (entry->object.vm_object && - entry->object.vm_object->pager && - entry->object.vm_object->pager->pg_type == PG_DEVICE) { -#ifdef DEBUG - printf("vm_coredump: skipping dev @ 0x%lx\n", - entry->start); -#endif - continue; - } - - if (!(entry->protection & VM_PROT_WRITE)) - continue; - - start = entry->start; - end = entry->end; - - if (start >= VM_MAXUSER_ADDRESS) - continue; - - if (end > VM_MAXUSER_ADDRESS) - end = VM_MAXUSER_ADDRESS; - - if (start >= (vm_offset_t)vm->vm_maxsaddr) { - flag = CORE_STACK; - start = trunc_page(USRSTACK - ctob(vm->vm_ssize)); - if (start >= end) - continue; - } else - flag = CORE_DATA; - - /* - * Set up a new core file segment. - */ - CORE_SETMAGIC(cseg, CORESEGMAGIC, CORE_GETMID(*chdr), flag); - cseg.c_addr = start; - cseg.c_size = end - start; - - error = vn_rdwr(UIO_WRITE, vp, - (caddr_t)&cseg, chdr->c_seghdrsize, - offset, UIO_SYSSPACE, - IO_NODELOCKED|IO_UNIT, cred, NULL, p); - if (error) - break; - - offset += chdr->c_seghdrsize; - error = vn_rdwr(UIO_WRITE, vp, - (caddr_t)cseg.c_addr, (int)cseg.c_size, - offset, UIO_USERSPACE, - IO_NODELOCKED|IO_UNIT, cred, NULL, p); - if (error) - break; - - offset += cseg.c_size; - chdr->c_nseg++; - } - - return error; -} diff --git a/sys/vm/vm_user.c b/sys/vm/vm_user.c deleted file mode 100644 index d717f6204c8..00000000000 --- a/sys/vm/vm_user.c +++ /dev/null @@ -1,340 +0,0 @@ -/* $OpenBSD: vm_user.c,v 1.3 1996/04/19 16:10:52 niklas Exp $ */ -/* $NetBSD: vm_user.c,v 1.13 1996/02/28 22:39:16 gwr Exp $ */ - -/* - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * The Mach Operating System project at Carnegie-Mellon University. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)vm_user.c 8.2 (Berkeley) 1/12/94 - * - * - * Copyright (c) 1987, 1990 Carnegie-Mellon University. - * All rights reserved. - * - * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * User-exported virtual memory functions. - */ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/proc.h> - -#include <vm/vm.h> - -simple_lock_data_t vm_alloc_lock; /* XXX */ - -#ifdef MACHVMCOMPAT -/* - * BSD style syscall interfaces to MACH calls - * All return MACH return values. - */ -struct svm_allocate_args { - vm_map_t map; - vm_offset_t *addr; - vm_size_t size; - boolean_t anywhere; -}; -/* ARGSUSED */ -int -svm_allocate(p, uap, retval) - struct proc *p; - struct svm_allocate_args *uap; - register_t *retval; -{ - vm_offset_t addr; - int rv; - - SCARG(uap, map) = p->p_map; /* XXX */ - - if (copyin((caddr_t)SCARG(uap, addr), (caddr_t)&addr, sizeof (addr))) - rv = KERN_INVALID_ARGUMENT; - else - rv = vm_allocate(SCARG(uap, map), &addr, SCARG(uap, size), - SCARG(uap, anywhere)); - if (rv == KERN_SUCCESS) { - if (copyout((caddr_t)&addr, (caddr_t)SCARG(uap, addr), - sizeof(addr))) - rv = KERN_INVALID_ARGUMENT; - } - return((int)rv); -} - -struct svm_deallocate_args { - vm_map_t map; - vm_offset_t addr; - vm_size_t size; -}; -/* ARGSUSED */ -int -svm_deallocate(p, uap, retval) - struct proc *p; - struct svm_deallocate_args *uap; - register_t *retval; -{ - int rv; - - SCARG(uap, map) = p->p_map; /* XXX */ - rv = vm_deallocate(SCARG(uap, map), SCARG(uap, addr), SCARG(uap, size)); - return((int)rv); -} - -struct svm_inherit_args { - vm_map_t map; - vm_offset_t addr; - vm_size_t size; - vm_inherit_t inherit; -}; -/* ARGSUSED */ -int -svm_inherit(p, uap, retval) - struct proc *p; - struct svm_inherit_args *uap; - register_t *retval; -{ - int rv; - - SCARG(uap, map) = p->p_map; /* XXX */ - rv = vm_inherit(SCARG(uap, map), SCARG(uap, addr), SCARG(uap, size), - SCARG(uap, inherit)); - return((int)rv); -} - -struct svm_protect_args { - vm_map_t map; - vm_offset_t addr; - vm_size_t size; - boolean_t setmax; - vm_prot_t prot; -}; -/* ARGSUSED */ -int -svm_protect(p, uap, retval) - struct proc *p; - struct svm_protect_args *uap; - register_t *retval; -{ - int rv; - - SCARG(uap, map) = p->p_map; /* XXX */ - rv = vm_protect(SCARG(uap, map), SCARG(uap, addr), SCARG(uap, size), - SCARG(uap, setmax), SCARG(uap, prot)); - return((int)rv); -} - -/* - * vm_inherit sets the inheritence of the specified range in the - * specified map. - */ -int -vm_inherit(map, start, size, new_inheritance) - register vm_map_t map; - vm_offset_t start; - vm_size_t size; - vm_inherit_t new_inheritance; -{ - if (map == NULL) - return(KERN_INVALID_ARGUMENT); - - return(vm_map_inherit(map, trunc_page(start), round_page(start+size), new_inheritance)); -} - -/* - * vm_protect sets the protection of the specified range in the - * specified map. - */ - -int -vm_protect(map, start, size, set_maximum, new_protection) - register vm_map_t map; - vm_offset_t start; - vm_size_t size; - boolean_t set_maximum; - vm_prot_t new_protection; -{ - if (map == NULL) - return(KERN_INVALID_ARGUMENT); - - return(vm_map_protect(map, trunc_page(start), round_page(start+size), new_protection, set_maximum)); -} -#endif - -/* - * vm_allocate allocates "zero fill" memory in the specfied - * map. - */ -int -vm_allocate(map, addr, size, anywhere) - register vm_map_t map; - register vm_offset_t *addr; - register vm_size_t size; - boolean_t anywhere; -{ - int result; - - if (map == NULL) - return(KERN_INVALID_ARGUMENT); - if (size == 0) { - *addr = 0; - return(KERN_SUCCESS); - } - - if (anywhere) - *addr = vm_map_min(map); - else - *addr = trunc_page(*addr); - size = round_page(size); - - result = vm_map_find(map, NULL, (vm_offset_t) 0, addr, size, anywhere); - - return(result); -} - -/* - * vm_deallocate deallocates the specified range of addresses in the - * specified address map. - */ -int -vm_deallocate(map, start, size) - register vm_map_t map; - vm_offset_t start; - vm_size_t size; -{ - if (map == NULL) - return(KERN_INVALID_ARGUMENT); - - if (size == (vm_offset_t) 0) - return(KERN_SUCCESS); - - return(vm_map_remove(map, trunc_page(start), round_page(start+size))); -} - -/* - * Similar to vm_allocate but assigns an explicit pager. - */ -int -vm_allocate_with_pager(map, addr, size, anywhere, pager, poffset, internal) - register vm_map_t map; - register vm_offset_t *addr; - register vm_size_t size; - boolean_t anywhere; - vm_pager_t pager; - vm_offset_t poffset; - boolean_t internal; -{ - register vm_object_t object; - register int result; - vm_offset_t start; - - if (map == NULL) - return(KERN_INVALID_ARGUMENT); - - *addr = trunc_page(*addr); - size = round_page(size); - - /* - * Lookup the pager/paging-space in the object cache. - * If it's not there, then create a new object and cache - * it. - */ - object = vm_object_lookup(pager); - cnt.v_lookups++; - if (object == NULL) { - object = vm_object_allocate(size); - /* - * From Mike Hibler: "unnamed anonymous objects should never - * be on the hash list ... For now you can just change - * vm_allocate_with_pager to not do vm_object_enter if this - * is an internal object ..." - */ - if (!internal) - vm_object_enter(object, pager); - } else - cnt.v_hits++; - if (internal) - object->flags |= OBJ_INTERNAL; - else { - object->flags &= ~OBJ_INTERNAL; - cnt.v_nzfod -= atop(size); - } - - start = *addr; - vm_map_lock(map); - if (anywhere) { - again: - if (vm_map_findspace(map, start, size, addr)) - result = KERN_NO_SPACE; - else { -#ifdef PMAP_PREFER - PMAP_PREFER(poffset, addr); -#endif - start = *addr; - result = vm_map_insert(map, object, poffset, - start, start + size); - if (result == KERN_NO_SPACE) - goto again; - } - } else - result = vm_map_insert(map, object, poffset, - start, start + size); - vm_map_unlock(map); - - if (result != KERN_SUCCESS) - vm_object_deallocate(object); - else if (pager != NULL) - vm_object_setpager(object, pager, (vm_offset_t) 0, TRUE); - return(result); -} diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c deleted file mode 100644 index d1c885fb65c..00000000000 --- a/sys/vm/vnode_pager.c +++ /dev/null @@ -1,591 +0,0 @@ -/* $OpenBSD: vnode_pager.c,v 1.8 2001/05/16 12:54:34 ho Exp $ */ -/* $NetBSD: vnode_pager.c,v 1.19 1996/03/16 23:15:27 christos Exp $ */ - -/* - * Copyright (c) 1990 University of Utah. - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * the Systems Programming Group of the University of Utah Computer - * Science Department. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)vnode_pager.c 8.10 (Berkeley) 5/14/95 - */ - -/* - * Page to/from files (vnodes). - * - * TODO: - * pageouts - * fix credential use (uses current process credentials now) - */ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/proc.h> -#include <sys/malloc.h> -#include <sys/vnode.h> -#include <sys/uio.h> -#include <sys/mount.h> - -#include <vm/vm.h> -#include <vm/vm_page.h> -#include <vm/vnode_pager.h> - -struct pagerlst vnode_pager_list; /* list of managed vnodes */ - -#ifdef DEBUG -int vpagerdebug = 0x00; -#define VDB_FOLLOW 0x01 -#define VDB_INIT 0x02 -#define VDB_IO 0x04 -#define VDB_FAIL 0x08 -#define VDB_ALLOC 0x10 -#define VDB_SIZE 0x20 -#endif - -static vm_pager_t vnode_pager_alloc - __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t)); -static void vnode_pager_cluster - __P((vm_pager_t, vm_offset_t, - vm_offset_t *, vm_offset_t *)); -static void vnode_pager_dealloc __P((vm_pager_t)); -static int vnode_pager_getpage - __P((vm_pager_t, vm_page_t *, int, boolean_t)); -static boolean_t vnode_pager_haspage __P((vm_pager_t, vm_offset_t)); -static void vnode_pager_init __P((void)); -static int vnode_pager_io - __P((vn_pager_t, vm_page_t *, int, - boolean_t, enum uio_rw)); -static boolean_t vnode_pager_putpage - __P((vm_pager_t, vm_page_t *, int, boolean_t)); - -struct pagerops vnodepagerops = { - vnode_pager_init, - vnode_pager_alloc, - vnode_pager_dealloc, - vnode_pager_getpage, - vnode_pager_putpage, - vnode_pager_haspage, - vnode_pager_cluster -}; - -static void -vnode_pager_init() -{ -#ifdef DEBUG - if (vpagerdebug & VDB_FOLLOW) - printf("vnode_pager_init()\n"); -#endif - TAILQ_INIT(&vnode_pager_list); -} - -/* - * Allocate (or lookup) pager for a vnode. - * Handle is a vnode pointer. - */ -static vm_pager_t -vnode_pager_alloc(handle, size, prot, foff) - caddr_t handle; - vm_size_t size; - vm_prot_t prot; - vm_offset_t foff; -{ - register vm_pager_t pager; - register vn_pager_t vnp; - vm_object_t object; - struct vattr vattr; - struct vnode *vp; - struct proc *p = curproc; /* XXX */ - -#ifdef DEBUG - if (vpagerdebug & (VDB_FOLLOW|VDB_ALLOC)) - printf("vnode_pager_alloc(%p, %lx, %x)\n", handle, size, prot); -#endif - /* - * Pageout to vnode, no can do yet. - */ - if (handle == NULL) - return(NULL); - - /* - * Vnodes keep a pointer to any associated pager so no need to - * lookup with vm_pager_lookup. - */ - vp = (struct vnode *)handle; - pager = (vm_pager_t)vp->v_vmdata; - if (pager == NULL) { - /* - * Allocate pager structures - */ - pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, M_WAITOK); - vnp = (vn_pager_t)malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK); - /* - * And an object of the appropriate size - */ - if (VOP_GETATTR(vp, &vattr, p->p_ucred, p) == 0) { - object = vm_object_allocate(round_page(vattr.va_size)); - vm_object_enter(object, pager); - vm_object_setpager(object, pager, 0, TRUE); - } else { - free((caddr_t)vnp, M_VMPGDATA); - free((caddr_t)pager, M_VMPAGER); - return(NULL); - } - /* - * Hold a reference to the vnode and initialize pager data. - */ - VREF(vp); - vnp->vnp_flags = 0; - vnp->vnp_vp = vp; - vnp->vnp_size = vattr.va_size; - TAILQ_INSERT_TAIL(&vnode_pager_list, pager, pg_list); - pager->pg_handle = handle; - pager->pg_type = PG_VNODE; - pager->pg_flags = 0; - pager->pg_ops = &vnodepagerops; - pager->pg_data = vnp; - vp->v_vmdata = (caddr_t)pager; - } else { - /* - * vm_object_lookup() will remove the object from the - * cache if found and also gain a reference to the object. - */ - object = vm_object_lookup(pager); -#ifdef DEBUG - vnp = (vn_pager_t)pager->pg_data; -#endif - } -#ifdef DEBUG - if (vpagerdebug & VDB_ALLOC) - printf("vnode_pager_setup: vp %p sz %lx pager %p object %p\n", - vp, vnp->vnp_size, pager, object); -#endif - return(pager); -} - -static void -vnode_pager_dealloc(pager) - vm_pager_t pager; -{ - register vn_pager_t vnp = (vn_pager_t)pager->pg_data; - register struct vnode *vp; -#ifdef NOTDEF - struct proc *p = curproc; /* XXX */ -#endif - -#ifdef DEBUG - if (vpagerdebug & VDB_FOLLOW) - printf("vnode_pager_dealloc(%p)\n", pager); -#endif - if ((vp = vnp->vnp_vp) != NULL) { - vp->v_vmdata = NULL; - vp->v_flag &= ~VTEXT; -#if NOTDEF - /* can hang if done at reboot on NFS FS */ - (void) VOP_FSYNC(vp, p->p_ucred, p); -#endif - vrele(vp); - } - TAILQ_REMOVE(&vnode_pager_list, pager, pg_list); - free((caddr_t)vnp, M_VMPGDATA); - free((caddr_t)pager, M_VMPAGER); -} - -static int -vnode_pager_getpage(pager, mlist, npages, sync) - vm_pager_t pager; - vm_page_t *mlist; - int npages; - boolean_t sync; -{ - -#ifdef DEBUG - if (vpagerdebug & VDB_FOLLOW) - printf("vnode_pager_getpage(%p, %p, %x, %x)\n", - pager, mlist, npages, sync); -#endif - return(vnode_pager_io((vn_pager_t)pager->pg_data, - mlist, npages, sync, UIO_READ)); -} - -static boolean_t -vnode_pager_putpage(pager, mlist, npages, sync) - vm_pager_t pager; - vm_page_t *mlist; - int npages; - boolean_t sync; -{ - int err; - -#ifdef DEBUG - if (vpagerdebug & VDB_FOLLOW) - printf("vnode_pager_putpage(%p, %p, %x, %x)\n", - pager, mlist, npages, sync); -#endif - if (pager == NULL) - return (FALSE); /* ??? */ - err = vnode_pager_io((vn_pager_t)pager->pg_data, - mlist, npages, sync, UIO_WRITE); - /* - * If the operation was successful, mark the pages clean. - */ - if (err == VM_PAGER_OK) { - while (npages--) { - (*mlist)->flags |= PG_CLEAN; - pmap_clear_modify(VM_PAGE_TO_PHYS(*mlist)); - mlist++; - } - } - return(err); -} - -static boolean_t -vnode_pager_haspage(pager, offset) - vm_pager_t pager; - vm_offset_t offset; -{ - struct proc *p = curproc; /* XXX */ - vn_pager_t vnp = (vn_pager_t)pager->pg_data; - daddr_t bn; - int err; - -#ifdef DEBUG - if (vpagerdebug & VDB_FOLLOW) - printf("vnode_pager_haspage(%p, %lx)\n", pager, offset); -#endif - - /* - * Offset beyond end of file, do not have the page - * Lock the vnode first to make sure we have the most recent - * version of the size. - */ - vn_lock(vnp->vnp_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE, p); - if (offset >= vnp->vnp_size) { - VOP_UNLOCK(vnp->vnp_vp, 0, p); -#ifdef DEBUG - if (vpagerdebug & (VDB_FAIL|VDB_SIZE)) - printf("vnode_pager_haspage: pg %p, off %lx, size %lx\n", - pager, offset, vnp->vnp_size); -#endif - return(FALSE); - } - - /* - * Read the index to find the disk block to read - * from. If there is no block, report that we don't - * have this data. - * - * Assumes that the vnode has whole page or nothing. - */ - err = VOP_BMAP(vnp->vnp_vp, - offset / vnp->vnp_vp->v_mount->mnt_stat.f_iosize, - (struct vnode **)0, &bn, NULL); - VOP_UNLOCK(vnp->vnp_vp, 0, p); - if (err) { -#ifdef DEBUG - if (vpagerdebug & VDB_FAIL) - printf("vnode_pager_haspage: BMAP err %d, pg %p, off %lx\n", - err, pager, offset); -#endif - return(TRUE); - } - return((long)bn < 0 ? FALSE : TRUE); -} - -static void -vnode_pager_cluster(pager, offset, loffset, hoffset) - vm_pager_t pager; - vm_offset_t offset; - vm_offset_t *loffset; - vm_offset_t *hoffset; -{ - vn_pager_t vnp = (vn_pager_t)pager->pg_data; - vm_offset_t loff, hoff; - -#ifdef DEBUG - if (vpagerdebug & VDB_FOLLOW) - printf("vnode_pager_cluster(%p, %lx) ", pager, offset); -#endif - loff = offset; - if (loff >= vnp->vnp_size) - panic("vnode_pager_cluster: bad offset"); - /* - * XXX could use VOP_BMAP to get maxcontig value - */ - hoff = loff + MAXBSIZE; - if (hoff > round_page(vnp->vnp_size)) - hoff = round_page(vnp->vnp_size); - - *loffset = loff; - *hoffset = hoff; -#ifdef DEBUG - if (vpagerdebug & VDB_FOLLOW) - printf("returns [%lx-%lx]\n", loff, hoff); -#endif -} - -/* - * (XXX) - * Lets the VM system know about a change in size for a file. - * If this vnode is mapped into some address space (i.e. we have a pager - * for it) we adjust our own internal size and flush any cached pages in - * the associated object that are affected by the size change. - * - * Note: this routine may be invoked as a result of a pager put - * operation (possibly at object termination time), so we must be careful. - */ -void -vnode_pager_setsize(vp, nsize) - struct vnode *vp; - u_long nsize; -{ - register vn_pager_t vnp; - register vm_object_t object; - vm_pager_t pager; - - /* - * Not a mapped vnode - */ - if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL) - return; - /* - * Hasn't changed size - */ - pager = (vm_pager_t)vp->v_vmdata; - vnp = (vn_pager_t)pager->pg_data; - if (nsize == vnp->vnp_size) - return; - /* - * No object. - * This can happen during object termination since - * vm_object_page_clean is called after the object - * has been removed from the hash table, and clean - * may cause vnode write operations which can wind - * up back here. - */ - object = vm_object_lookup(pager); - if (object == NULL) - return; - -#ifdef DEBUG - if (vpagerdebug & (VDB_FOLLOW|VDB_SIZE)) - printf("vnode_pager_setsize: vp %p obj %p osz %ld nsz %ld\n", - vp, object, vnp->vnp_size, nsize); -#endif - /* - * File has shrunk. - * Toss any cached pages beyond the new EOF. - */ - if (nsize < vnp->vnp_size) { - vm_object_lock(object); - vm_object_page_remove(object, - (vm_offset_t)nsize, vnp->vnp_size); - vm_object_unlock(object); - } - vnp->vnp_size = (vm_offset_t)nsize; - vm_object_deallocate(object); -} - -void -vnode_pager_umount(mp) - register struct mount *mp; -{ - struct proc *p = curproc; /* XXX */ - vm_pager_t pager, npager; - struct vnode *vp; - - for (pager = vnode_pager_list.tqh_first; pager != NULL; pager = npager){ - /* - * Save the next pointer now since uncaching may - * terminate the object and render pager invalid - */ - npager = pager->pg_list.tqe_next; - vp = ((vn_pager_t)pager->pg_data)->vnp_vp; - if (mp == (struct mount *)0 || vp->v_mount == mp) { - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - (void) vnode_pager_uncache(vp); - VOP_UNLOCK(vp, 0, p); - } - } -} - -/* - * Remove vnode associated object from the object cache. - * - * XXX unlock the vnode if it is currently locked. - * We must do this since uncaching the object may result in its - * destruction which may initiate paging activity which may necessitate - * re-locking the vnode. - */ -boolean_t -vnode_pager_uncache(vp) - register struct vnode *vp; -{ - struct proc *p = curproc; /* XXX */ - vm_object_t object; - boolean_t uncached; - vm_pager_t pager; - - /* - * Not a mapped vnode - */ - if (vp->v_type != VREG || (pager = (vm_pager_t)vp->v_vmdata) == NULL) - return (TRUE); -#ifdef DEBUG - if (!VOP_ISLOCKED(vp)) { -#ifdef NFSCLIENT - extern int (**nfsv2_vnodeop_p) __P((void *)); - extern int (**spec_nfsv2nodeop_p) __P((void *)); -#ifdef FIFO - extern int (**fifo_nfsv2nodeop_p) __P((void *)); -#endif - - if (vp->v_op != nfsv2_vnodeop_p - && vp->v_op != spec_nfsv2nodeop_p -#ifdef FIFO - && vp->v_op != fifo_nfsv2nodeop_p -#endif - ) - -#endif - panic("vnode_pager_uncache: vnode not locked!"); - } -#endif - /* - * Must use vm_object_lookup() as it actually removes - * the object from the cache list. - */ - object = vm_object_lookup(pager); - if (object) { - uncached = (object->ref_count <= 1); - VOP_UNLOCK(vp, 0, p); - pager_cache(object, FALSE); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - } else - uncached = TRUE; - return(uncached); -} - -static int -vnode_pager_io(vnp, mlist, npages, sync, rw) - register vn_pager_t vnp; - vm_page_t *mlist; - int npages; - boolean_t sync; - enum uio_rw rw; -{ - struct uio auio; - struct iovec aiov; - vm_offset_t kva, foff; - int error, size; - struct proc *p = curproc; /* XXX */ - - /* XXX */ - vm_page_t m; - if (npages != 1) - panic("vnode_pager_io: cannot handle multiple pages"); - m = *mlist; - /* XXX */ - -#ifdef DEBUG - if (vpagerdebug & VDB_FOLLOW) - printf("vnode_pager_io(%p, %p, %c): vnode %p\n", - vnp, m, rw == UIO_READ ? 'R' : 'W', vnp->vnp_vp); -#endif - foff = m->offset + m->object->paging_offset; - /* - * Allocate a kernel virtual address and initialize so that - * we can use VOP_READ/WRITE routines. - */ - kva = vm_pager_map_pages(mlist, npages, sync); - if (kva == NULL) - return(VM_PAGER_AGAIN); - /* - * After all of the potentially blocking operations have been - * performed, we can do the size checks: - * read beyond EOF (returns error) - * short read - */ - vn_lock(vnp->vnp_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE, p); - if (foff >= vnp->vnp_size) { - VOP_UNLOCK(vnp->vnp_vp, 0, p); - vm_pager_unmap_pages(kva, npages); -#ifdef DEBUG - if (vpagerdebug & VDB_SIZE) - printf("vnode_pager_io: vp %p, off %ld size %ld\n", - vnp->vnp_vp, foff, vnp->vnp_size); -#endif - return(VM_PAGER_BAD); - } - if (foff + PAGE_SIZE > vnp->vnp_size) - size = vnp->vnp_size - foff; - else - size = PAGE_SIZE; - aiov.iov_base = (caddr_t)kva; - aiov.iov_len = size; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_offset = foff; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_rw = rw; - auio.uio_resid = size; - auio.uio_procp = (struct proc *)0; -#ifdef DEBUG - if (vpagerdebug & VDB_IO) - printf("vnode_pager_io: vp %p kva %lx foff %lx size %x", - vnp->vnp_vp, kva, foff, size); -#endif - if (rw == UIO_READ) - error = VOP_READ(vnp->vnp_vp, &auio, 0, p->p_ucred); - else - error = VOP_WRITE(vnp->vnp_vp, &auio, 0, p->p_ucred); - VOP_UNLOCK(vnp->vnp_vp, 0, p); -#ifdef DEBUG - if (vpagerdebug & VDB_IO) { - if (error || auio.uio_resid) - printf(" returns error %x, resid %x", - error, auio.uio_resid); - printf("\n"); - } -#endif - if (!error) { - register int count = size - auio.uio_resid; - - if (count == 0) - error = EINVAL; - else if (count != PAGE_SIZE && rw == UIO_READ) - bzero((void *)(kva + count), PAGE_SIZE - count); - } - vm_pager_unmap_pages(kva, npages); - return (error ? VM_PAGER_ERROR : VM_PAGER_OK); -} |