summaryrefslogtreecommitdiff
path: root/sys/vm
diff options
context:
space:
mode:
authorArtur Grabowski <art@cvs.openbsd.org>2001-06-27 04:53:33 +0000
committerArtur Grabowski <art@cvs.openbsd.org>2001-06-27 04:53:33 +0000
commit23908c853b9acc0b1320aca6a88554f9fa3e0345 (patch)
treeb1c414d1c8d188c5f376dabf99916885e274d73d /sys/vm
parent736c4adc1cd88f788e32174b7575db53904c92c8 (diff)
Die!
Diffstat (limited to 'sys/vm')
-rw-r--r--sys/vm/device_pager.c370
-rw-r--r--sys/vm/swap_pager.c1267
-rw-r--r--sys/vm/vm_fault.c1015
-rw-r--r--sys/vm/vm_glue.c532
-rw-r--r--sys/vm/vm_init.c127
-rw-r--r--sys/vm/vm_kern.c465
-rw-r--r--sys/vm/vm_map.c2746
-rw-r--r--sys/vm/vm_meter.c236
-rw-r--r--sys/vm/vm_mmap.c1054
-rw-r--r--sys/vm/vm_object.c1887
-rw-r--r--sys/vm/vm_page.c1881
-rw-r--r--sys/vm/vm_pageout.c620
-rw-r--r--sys/vm/vm_pager.c426
-rw-r--r--sys/vm/vm_swap.c1248
-rw-r--r--sys/vm/vm_unix.c254
-rw-r--r--sys/vm/vm_user.c340
-rw-r--r--sys/vm/vnode_pager.c591
17 files changed, 0 insertions, 15059 deletions
diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c
deleted file mode 100644
index 34423923334..00000000000
--- a/sys/vm/device_pager.c
+++ /dev/null
@@ -1,370 +0,0 @@
-/* $OpenBSD: device_pager.c,v 1.6 2001/05/16 12:54:34 ho Exp $ */
-/* $NetBSD: device_pager.c,v 1.24 1997/01/03 18:03:14 mrg Exp $ */
-
-/*
- * Copyright (c) 1990 University of Utah.
- * Copyright (c) 1991, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * the Systems Programming Group of the University of Utah Computer
- * Science Department.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)device_pager.c 8.5 (Berkeley) 1/12/94
- */
-
-/*
- * Page to/from special files.
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/conf.h>
-#include <sys/mman.h>
-#include <sys/malloc.h>
-#include <sys/proc.h>
-
-#include <vm/vm.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_page.h>
-#include <vm/device_pager.h>
-
-struct pagerlst dev_pager_list; /* list of managed devices */
-struct pglist dev_pager_fakelist; /* list of available vm_page_t's */
-
-#ifdef DEBUG
-int dpagerdebug = 0;
-#define DDB_FOLLOW 0x01
-#define DDB_INIT 0x02
-#define DDB_ALLOC 0x04
-#define DDB_FAIL 0x08
-#endif
-
-static vm_pager_t dev_pager_alloc
- __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
-static void dev_pager_dealloc __P((vm_pager_t));
-static int dev_pager_getpage
- __P((vm_pager_t, vm_page_t *, int, boolean_t));
-static boolean_t dev_pager_haspage __P((vm_pager_t, vm_offset_t));
-static void dev_pager_init __P((void));
-static int dev_pager_putpage
- __P((vm_pager_t, vm_page_t *, int, boolean_t));
-static vm_page_t dev_pager_getfake __P((vm_offset_t));
-static void dev_pager_putfake __P((vm_page_t));
-
-struct pagerops devicepagerops = {
- dev_pager_init,
- dev_pager_alloc,
- dev_pager_dealloc,
- dev_pager_getpage,
- dev_pager_putpage,
- dev_pager_haspage,
- vm_pager_clusternull
-};
-
-static void
-dev_pager_init()
-{
-#ifdef DEBUG
- if (dpagerdebug & DDB_FOLLOW)
- printf("dev_pager_init()\n");
-#endif
- TAILQ_INIT(&dev_pager_list);
- TAILQ_INIT(&dev_pager_fakelist);
-}
-
-static vm_pager_t
-dev_pager_alloc(handle, size, prot, foff)
- caddr_t handle;
- vm_size_t size;
- vm_prot_t prot;
- vm_offset_t foff;
-{
- dev_t dev;
- vm_pager_t pager;
- int (*mapfunc) __P((dev_t, int, int));
- vm_object_t object;
- dev_pager_t devp;
- int npages, off;
-
-#ifdef DEBUG
- if (dpagerdebug & DDB_FOLLOW)
- printf("dev_pager_alloc(%p, %lx, %x, %lx)\n",
- handle, size, prot, foff);
-#endif
-#ifdef DIAGNOSTIC
- /*
- * Pageout to device, should never happen.
- */
- if (handle == NULL)
- panic("dev_pager_alloc called");
-#endif
-
- /*
- * Make sure this device can be mapped.
- */
- dev = (dev_t)(long)handle;
- mapfunc = cdevsw[major(dev)].d_mmap;
- if (mapfunc == NULL ||
- mapfunc == (int (*) __P((dev_t, int, int))) enodev ||
- mapfunc == (int (*) __P((dev_t, int, int))) nullop)
- return(NULL);
-
- /*
- * Offset should be page aligned.
- */
- if (foff & PAGE_MASK)
- return(NULL);
-
- /*
- * Check that the specified range of the device allows the
- * desired protection.
- *
- * XXX assumes VM_PROT_* == PROT_*
- */
- npages = atop(round_page(size));
- for (off = foff; npages--; off += PAGE_SIZE)
- if ((*mapfunc)(dev, off, (int)prot) == -1)
- return(NULL);
-
- /*
- * Look up pager, creating as necessary.
- */
-top:
- pager = vm_pager_lookup(&dev_pager_list, handle);
- if (pager == NULL) {
- /*
- * Allocate and initialize pager structs
- */
- pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, M_WAITOK);
- devp = (dev_pager_t)malloc(sizeof *devp, M_VMPGDATA, M_WAITOK);
- pager->pg_handle = handle;
- pager->pg_ops = &devicepagerops;
- pager->pg_type = PG_DEVICE;
- pager->pg_flags = 0;
- pager->pg_data = devp;
- TAILQ_INIT(&devp->devp_pglist);
- /*
- * Allocate object and associate it with the pager.
- */
- object = devp->devp_object = vm_object_allocate(0);
- vm_object_enter(object, pager);
- vm_object_setpager(object, pager, (vm_offset_t)0, FALSE);
- /*
- * Finally, put it on the managed list so other can find it.
- * First we re-lookup in case someone else beat us to this
- * point (due to blocking in the various mallocs). If so,
- * we free everything and start over.
- */
- if (vm_pager_lookup(&dev_pager_list, handle)) {
- free((caddr_t)devp, M_VMPGDATA);
- free((caddr_t)pager, M_VMPAGER);
- goto top;
- }
- TAILQ_INSERT_TAIL(&dev_pager_list, pager, pg_list);
-#ifdef DEBUG
- if (dpagerdebug & DDB_ALLOC) {
- printf("dev_pager_alloc: pager %p devp %p object %p\n",
- pager, devp, object);
- vm_object_print(object, FALSE);
- }
-#endif
- } else {
- /*
- * vm_object_lookup() gains a reference and also
- * removes the object from the cache.
- */
- object = vm_object_lookup(pager);
-#ifdef DIAGNOSTIC
- devp = (dev_pager_t)pager->pg_data;
- if (object != devp->devp_object)
- panic("dev_pager_setup: bad object");
-#endif
- }
- return(pager);
-}
-
-static void
-dev_pager_dealloc(pager)
- vm_pager_t pager;
-{
- dev_pager_t devp;
- vm_object_t object;
- vm_page_t m;
-
-#ifdef DEBUG
- if (dpagerdebug & DDB_FOLLOW)
- printf("dev_pager_dealloc(%p)\n", pager);
-#endif
- TAILQ_REMOVE(&dev_pager_list, pager, pg_list);
- /*
- * Get the object.
- * Note: cannot use vm_object_lookup since object has already
- * been removed from the hash chain.
- */
- devp = (dev_pager_t)pager->pg_data;
- object = devp->devp_object;
-#ifdef DEBUG
- if (dpagerdebug & DDB_ALLOC)
- printf("dev_pager_dealloc: devp %p object %p\n", devp, object);
-#endif
- /*
- * Free up our fake pages.
- */
- while ((m = devp->devp_pglist.tqh_first) != NULL) {
- TAILQ_REMOVE(&devp->devp_pglist, m, pageq);
- dev_pager_putfake(m);
- }
- free((caddr_t)devp, M_VMPGDATA);
- free((caddr_t)pager, M_VMPAGER);
-}
-
-static int
-dev_pager_getpage(pager, mlist, npages, sync)
- vm_pager_t pager;
- vm_page_t *mlist;
- int npages;
- boolean_t sync;
-{
- register vm_object_t object;
- vm_offset_t offset, paddr;
- vm_page_t page;
- dev_t dev;
- int (*mapfunc) __P((dev_t, int, int)), prot;
- vm_page_t m;
-
-#ifdef DEBUG
- if (dpagerdebug & DDB_FOLLOW)
- printf("dev_pager_getpage(%p, %p, %x, %x)\n",
- pager, mlist, npages, sync);
-#endif
-
- if (npages != 1)
- panic("dev_pager_getpage: cannot handle multiple pages");
- m = *mlist;
-
- object = m->object;
- dev = (dev_t)(long)pager->pg_handle;
- offset = m->offset + object->paging_offset;
- prot = PROT_READ; /* XXX should pass in? */
- mapfunc = cdevsw[major(dev)].d_mmap;
-#ifdef DIAGNOSTIC
- if (mapfunc == NULL ||
- mapfunc == (int (*) __P((dev_t, int, int))) enodev ||
- mapfunc == (int (*) __P((dev_t, int, int))) nullop)
- panic("dev_pager_getpage: no map function");
-#endif
- paddr = pmap_phys_address((*mapfunc)(dev, (int)offset, prot));
-#ifdef DIAGNOSTIC
- if (paddr == -1)
- panic("dev_pager_getpage: map function returns error");
-#endif
- /*
- * Replace the passed in page with our own fake page and free
- * up the original.
- */
- page = dev_pager_getfake(paddr);
- TAILQ_INSERT_TAIL(&((dev_pager_t)pager->pg_data)->devp_pglist, page,
- pageq);
- vm_object_lock(object);
- vm_page_lock_queues();
- vm_page_free(m);
- vm_page_insert(page, object, offset);
- vm_page_unlock_queues();
- PAGE_WAKEUP(m);
- if (offset + PAGE_SIZE > object->size)
- object->size = offset + PAGE_SIZE; /* XXX anal */
- vm_object_unlock(object);
-
- return(VM_PAGER_OK);
-}
-
-static int
-dev_pager_putpage(pager, mlist, npages, sync)
- vm_pager_t pager;
- vm_page_t *mlist;
- int npages;
- boolean_t sync;
-{
-#ifdef DEBUG
- if (dpagerdebug & DDB_FOLLOW)
- printf("dev_pager_putpage(%p, %p, %x, %x)\n",
- pager, mlist, npages, sync);
-#endif
- if (pager == NULL)
- return (FALSE);
- panic("dev_pager_putpage called");
-}
-
-static boolean_t
-dev_pager_haspage(pager, offset)
- vm_pager_t pager;
- vm_offset_t offset;
-{
-#ifdef DEBUG
- if (dpagerdebug & DDB_FOLLOW)
- printf("dev_pager_haspage(%p, %lx)\n", pager, offset);
-#endif
- return(TRUE);
-}
-
-static vm_page_t
-dev_pager_getfake(paddr)
- vm_offset_t paddr;
-{
- vm_page_t m;
- int i;
-
- if (dev_pager_fakelist.tqh_first == NULL) {
- m = (vm_page_t)malloc(PAGE_SIZE, M_VMPGDATA, M_WAITOK);
- for (i = PAGE_SIZE / sizeof(*m); i > 0; i--) {
- TAILQ_INSERT_TAIL(&dev_pager_fakelist, m, pageq);
- m++;
- }
- }
- m = dev_pager_fakelist.tqh_first;
- TAILQ_REMOVE(&dev_pager_fakelist, m, pageq);
- m->flags = PG_BUSY | PG_CLEAN | PG_FAKE | PG_FICTITIOUS;
- m->phys_addr = paddr;
- m->wire_count = 1;
- return(m);
-}
-
-static void
-dev_pager_putfake(m)
- vm_page_t m;
-{
-#ifdef DIAGNOSTIC
- if (!(m->flags & PG_FICTITIOUS))
- panic("dev_pager_putfake: bad page");
-#endif
- TAILQ_INSERT_TAIL(&dev_pager_fakelist, m, pageq);
-}
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
deleted file mode 100644
index f7ee9045449..00000000000
--- a/sys/vm/swap_pager.c
+++ /dev/null
@@ -1,1267 +0,0 @@
-/* $OpenBSD: swap_pager.c,v 1.17 2001/02/28 20:32:40 csapuntz Exp $ */
-/* $NetBSD: swap_pager.c,v 1.27 1996/03/16 23:15:20 christos Exp $ */
-
-/*
- * Copyright (c) 1990 University of Utah.
- * Copyright (c) 1991, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * the Systems Programming Group of the University of Utah Computer
- * Science Department.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
- *
- * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94
- */
-
-/*
- * Quick hack to page to dedicated partition(s).
- * TODO:
- * Add multiprocessor locks
- * Deal with async writes in a better fashion
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/proc.h>
-#include <sys/buf.h>
-#include <sys/map.h>
-#include <sys/simplelock.h>
-#include <sys/vnode.h>
-#include <sys/malloc.h>
-#include <sys/swap.h>
-
-#include <miscfs/specfs/specdev.h>
-
-#include <vm/vm.h>
-#include <vm/vm_page.h>
-#include <vm/vm_pageout.h>
-#include <vm/swap_pager.h>
-
-/* XXX this makes the max swap devices 16 */
-#define NSWSIZES 16 /* size of swtab */
-#define MAXDADDRS 64 /* max # of disk addrs for fixed allocations */
-#ifndef NPENDINGIO
-#define NPENDINGIO 64 /* max # of pending cleans */
-#endif
-
-#ifdef DEBUG
-int swpagerdebug = 0x100;
-#define SDB_FOLLOW 0x001
-#define SDB_INIT 0x002
-#define SDB_ALLOC 0x004
-#define SDB_IO 0x008
-#define SDB_WRITE 0x010
-#define SDB_FAIL 0x020
-#define SDB_ALLOCBLK 0x040
-#define SDB_FULL 0x080
-#define SDB_ANOM 0x100
-#define SDB_ANOMPANIC 0x200
-#define SDB_CLUSTER 0x400
-#define SDB_PARANOIA 0x800
-#endif
-
-TAILQ_HEAD(swpclean, swpagerclean);
-
-struct swpagerclean {
- TAILQ_ENTRY(swpagerclean) spc_list;
- int spc_flags;
- struct buf *spc_bp;
- sw_pager_t spc_swp;
- vm_offset_t spc_kva;
- vm_page_t spc_m;
- int spc_npages;
-} swcleanlist[NPENDINGIO];
-typedef struct swpagerclean *swp_clean_t;
-
-/* spc_flags values */
-#define SPC_FREE 0x00
-#define SPC_BUSY 0x01
-#define SPC_DONE 0x02
-#define SPC_ERROR 0x04
-
-struct swtab {
- vm_size_t st_osize; /* size of object (bytes) */
- int st_bsize; /* vs. size of swap block (DEV_BSIZE units) */
-#ifdef DEBUG
- u_long st_inuse; /* number in this range in use */
- u_long st_usecnt; /* total used of this size */
-#endif
-} swtab[NSWSIZES+1];
-
-#ifdef DEBUG
-int swap_pager_poip; /* pageouts in progress */
-int swap_pager_piip; /* pageins in progress */
-#endif
-
-int swap_pager_maxcluster; /* maximum cluster size */
-int swap_pager_npendingio; /* number of pager clean structs */
-
-struct swpclean swap_pager_inuse; /* list of pending page cleans */
-struct swpclean swap_pager_free; /* list of free pager clean structs */
-struct pagerlst swap_pager_list; /* list of "named" anon regions */
-
-extern struct buf bswlist; /* import from vm_swap.c */
-
-static void swap_pager_init __P((void));
-static vm_pager_t swap_pager_alloc
- __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
-static void swap_pager_clean __P((int));
-#ifdef DEBUG
-static void swap_pager_clean_check __P((vm_page_t *, int, int));
-#endif
-static void swap_pager_cluster
- __P((vm_pager_t, vm_offset_t,
- vm_offset_t *, vm_offset_t *));
-static void swap_pager_dealloc __P((vm_pager_t));
-static int swap_pager_remove
- __P((vm_pager_t, vm_offset_t, vm_offset_t));
-static vm_offset_t swap_pager_next __P((vm_pager_t, vm_offset_t));
-static int swap_pager_count __P((vm_pager_t));
-static int swap_pager_getpage
- __P((vm_pager_t, vm_page_t *, int, boolean_t));
-static boolean_t swap_pager_haspage __P((vm_pager_t, vm_offset_t));
-static int swap_pager_io __P((sw_pager_t, vm_page_t *, int, int));
-static void swap_pager_iodone __P((struct buf *));
-static int swap_pager_putpage
- __P((vm_pager_t, vm_page_t *, int, boolean_t));
-static int count_bits __P((u_int));
-
-struct pagerops swappagerops = {
- swap_pager_init,
- swap_pager_alloc,
- swap_pager_dealloc,
- swap_pager_getpage,
- swap_pager_putpage,
- swap_pager_haspage,
- swap_pager_cluster,
- swap_pager_remove,
- swap_pager_next,
- swap_pager_count
-};
-
-static void
-swap_pager_init()
-{
- swp_clean_t spc;
- int i, maxbsize, bsize;
-
-#ifdef DEBUG
- if (swpagerdebug & (SDB_FOLLOW|SDB_INIT))
- printf("swpg_init()\n");
-#endif
- dfltpagerops = &swappagerops;
- TAILQ_INIT(&swap_pager_list);
-
- /*
- * Allocate async IO structures.
- *
- * XXX it would be nice if we could do this dynamically based on
- * the value of nswbuf (since we are ultimately limited by that)
- * but neither nswbuf or malloc has been initialized yet. So the
- * structs are statically allocated above.
- */
- swap_pager_npendingio = NPENDINGIO;
-
- /*
- * Initialize clean lists
- */
- TAILQ_INIT(&swap_pager_inuse);
- TAILQ_INIT(&swap_pager_free);
- for (i = 0, spc = swcleanlist; i < swap_pager_npendingio; i++, spc++) {
- TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
- spc->spc_flags = SPC_FREE;
- }
-
-/* this needs to be at least ctod(1) for all ports for vtod() to work */
-#define DMMIN 32
- /*
- * Fill in our table of object size vs. allocation size. bsize needs
- * to be at least ctod(1) for all ports for vtod() to work, with a
- * bare minimum of 32.
- */
-#define max(a, b) ((a) > (b) ? (a) : (b))
- bsize = max(32, max(ctod(1), btodb(PAGE_SIZE)));
- maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE);
- if (maxbsize > NBPG)
- maxbsize = NBPG;
- for (i = 0; i < NSWSIZES; i++) {
- if (bsize <= btodb(MAXPHYS))
- swap_pager_maxcluster = dbtob(bsize);
- swtab[i].st_bsize = bsize;
- if (bsize >= maxbsize) {
- swtab[i].st_osize = 0;
- break;
- }
- swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize));
-#ifdef DEBUG
- if (swpagerdebug & SDB_INIT)
- printf("swpg_init: ix %d, size %lx, bsize %x\n",
- i, swtab[i].st_osize, swtab[i].st_bsize);
-#endif
- bsize *= 2;
- }
-}
-
-/*
- * Allocate a pager structure and associated resources.
- * Note that if we are called from the pageout daemon (handle == NULL)
- * we should not wait for memory as it could resulting in deadlock.
- */
-static vm_pager_t
-swap_pager_alloc(handle, size, prot, foff)
- caddr_t handle;
- register vm_size_t size;
- vm_prot_t prot;
- vm_offset_t foff;
-{
- register vm_pager_t pager;
- register sw_pager_t swp;
- struct swtab *swt;
- int waitok;
-
-#ifdef DEBUG
- if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
- printf("swpg_alloc(%p, %lx, %x)\n", handle, size, prot);
-#endif
- /*
- * If this is a "named" anonymous region, look it up and
- * return the appropriate pager if it exists.
- */
- if (handle) {
- pager = vm_pager_lookup(&swap_pager_list, handle);
- if (pager != NULL) {
- /*
- * Use vm_object_lookup to gain a reference
- * to the object and also to remove from the
- * object cache.
- */
- if (vm_object_lookup(pager) == NULL)
- panic("swap_pager_alloc: bad object");
- return (pager);
- }
- }
- /*
- * Pager doesn't exist, allocate swap management resources
- * and initialize.
- */
- waitok = handle ? M_WAITOK : M_NOWAIT;
- pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok);
- if (pager == NULL)
- return (NULL);
- swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok);
- if (swp == NULL) {
-#ifdef DEBUG
- if (swpagerdebug & SDB_FAIL)
- printf("swpg_alloc: swpager malloc failed\n");
-#endif
- free((caddr_t)pager, M_VMPAGER);
- return (NULL);
- }
- size = round_page(size);
- for (swt = swtab; swt->st_osize; swt++)
- if (size <= swt->st_osize)
- break;
-#ifdef DEBUG
- swt->st_inuse++;
- swt->st_usecnt++;
-#endif
- swp->sw_osize = size;
- swp->sw_bsize = swt->st_bsize;
- swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize;
- swp->sw_blocks = (sw_blk_t)malloc(swp->sw_nblocks *
- sizeof(*swp->sw_blocks), M_VMPGDATA, M_NOWAIT);
- if (swp->sw_blocks == NULL) {
- free((caddr_t)swp, M_VMPGDATA);
- free((caddr_t)pager, M_VMPAGER);
-#ifdef DEBUG
- if (swpagerdebug & SDB_FAIL)
- printf("swpg_alloc: sw_blocks malloc failed\n");
- swt->st_inuse--;
- swt->st_usecnt--;
-#endif
- return (FALSE);
- }
- bzero((caddr_t)swp->sw_blocks,
- swp->sw_nblocks * sizeof(*swp->sw_blocks));
- swp->sw_poip = swp->sw_cnt = 0;
- if (handle) {
- vm_object_t object;
-
- swp->sw_flags = SW_NAMED;
- TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list);
- /*
- * Consistant with other pagers: return with object
- * referenced. Can't do this with handle == NULL
- * since it might be the pageout daemon calling.
- */
- object = vm_object_allocate(size);
- vm_object_enter(object, pager);
- vm_object_setpager(object, pager, 0, FALSE);
- } else {
- swp->sw_flags = 0;
- pager->pg_list.tqe_next = NULL;
- pager->pg_list.tqe_prev = NULL;
- }
- pager->pg_handle = handle;
- pager->pg_ops = &swappagerops;
- pager->pg_type = PG_SWAP;
- pager->pg_flags = PG_CLUSTERPUT;
- pager->pg_data = swp;
-
-#ifdef DEBUG
- if (swpagerdebug & SDB_ALLOC)
- printf("swpg_alloc: pg_data %p, %x of %x at %p\n",
- swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks);
-#endif
- return (pager);
-}
-
-static void
-swap_pager_dealloc(pager)
- vm_pager_t pager;
-{
- register int i;
- register sw_blk_t bp;
- register sw_pager_t swp;
- int s;
-#ifdef DEBUG
- struct swtab *swt;
- /* save panic time state */
- if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
- return;
- if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
- printf("swpg_dealloc(%p)\n", pager);
-#endif
- /*
- * Remove from list right away so lookups will fail if we
- * block for pageout completion.
- */
- swp = (sw_pager_t) pager->pg_data;
- if (swp->sw_flags & SW_NAMED) {
- TAILQ_REMOVE(&swap_pager_list, pager, pg_list);
- swp->sw_flags &= ~SW_NAMED;
- }
-#ifdef DEBUG
- for (swt = swtab; swt->st_osize; swt++)
- if (swp->sw_osize <= swt->st_osize)
- break;
- swt->st_inuse--;
-#endif
-
- /*
- * Wait for all pageouts to finish and remove
- * all entries from cleaning list.
- */
- s = splbio();
- while (swp->sw_poip) {
- swp->sw_flags |= SW_WANTED;
- (void) tsleep(swp, PVM, "swpgdealloc", 0);
- }
- splx(s);
- swap_pager_clean(B_WRITE);
-
- /*
- * Free left over swap blocks
- */
- for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++)
- if (bp->swb_block) {
-#ifdef DEBUG
- if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL))
- printf("swpg_dealloc: blk %x\n",
- bp->swb_block);
-#endif
- swap_free(swp->sw_bsize, bp->swb_block);
- }
- /*
- * Free swap management resources
- */
- free((caddr_t)swp->sw_blocks, M_VMPGDATA);
- free((caddr_t)swp, M_VMPGDATA);
- free((caddr_t)pager, M_VMPAGER);
-}
-
-static int
-swap_pager_getpage(pager, mlist, npages, sync)
- vm_pager_t pager;
- vm_page_t *mlist;
- int npages;
- boolean_t sync;
-{
- register int rv;
-#ifdef DIAGNOSTIC
- vm_page_t m;
- int i;
-#endif
-
-#ifdef DEBUG
- if (swpagerdebug & SDB_FOLLOW)
- printf("swpg_getpage(%p, %p, %x, %x)\n",
- pager, mlist, npages, sync);
-#endif
-#ifdef DIAGNOSTIC
- for (i = 0; i < npages; i++) {
- m = mlist[i];
-
- if (m->flags & PG_FAULTING)
- panic("swap_pager_getpage: page is already faulting");
- m->flags |= PG_FAULTING;
- }
-#endif
- rv = swap_pager_io((sw_pager_t)pager->pg_data, mlist, npages, B_READ);
-#ifdef DIAGNOSTIC
- for (i = 0; i < npages; i++) {
- m = mlist[i];
-
- m->flags &= ~PG_FAULTING;
- }
-#endif
- return (rv);
-}
-
-static int
-swap_pager_putpage(pager, mlist, npages, sync)
- vm_pager_t pager;
- vm_page_t *mlist;
- int npages;
- boolean_t sync;
-{
-
-#ifdef DEBUG
- if (swpagerdebug & SDB_FOLLOW)
- printf("swpg_putpage(%p, %p, %x, %x)\n",
- pager, mlist, npages, sync);
-#endif
- if (pager == NULL) {
- swap_pager_clean(B_WRITE);
- return (VM_PAGER_OK); /* ??? */
- }
- return (swap_pager_io((sw_pager_t)pager->pg_data, mlist, npages,
- B_WRITE | (sync ? 0 : B_ASYNC)));
-}
-
-static boolean_t
-swap_pager_haspage(pager, offset)
- vm_pager_t pager;
- vm_offset_t offset;
-{
- register sw_pager_t swp;
- register sw_blk_t swb;
- int ix;
-
-#ifdef DEBUG
- if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
- printf("swpg_haspage(%p, %lx) ", pager, offset);
-#endif
- swp = (sw_pager_t) pager->pg_data;
- ix = offset / dbtob(swp->sw_bsize);
- if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
-#ifdef DEBUG
- if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK))
- printf("swpg_haspage: %p bad offset %lx, ix %x\n",
- swp->sw_blocks, offset, ix);
-#endif
- return (FALSE);
- }
- swb = &swp->sw_blocks[ix];
- if (swb->swb_block)
- ix = atop(offset % dbtob(swp->sw_bsize));
-#ifdef DEBUG
- if (swpagerdebug & SDB_ALLOCBLK)
- printf("%p blk %x+%x ", swp->sw_blocks, swb->swb_block, ix);
- if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
- printf("-> %c\n",
- "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]);
-#endif
- if (swb->swb_block && (swb->swb_mask & (1 << ix)))
- return (TRUE);
- return (FALSE);
-}
-
-static void
-swap_pager_cluster(pager, offset, loffset, hoffset)
- vm_pager_t pager;
- vm_offset_t offset;
- vm_offset_t *loffset;
- vm_offset_t *hoffset;
-{
- sw_pager_t swp;
- register int bsize;
- vm_offset_t loff, hoff;
-
-#ifdef DEBUG
- if (swpagerdebug & (SDB_FOLLOW|SDB_CLUSTER))
- printf("swpg_cluster(%p, %lx) ", pager, offset);
-#endif
- swp = (sw_pager_t) pager->pg_data;
- bsize = dbtob(swp->sw_bsize);
- if (bsize > swap_pager_maxcluster)
- bsize = swap_pager_maxcluster;
-
- loff = offset - (offset % bsize);
-#ifdef DIAGNOSTIC
- if (loff >= swp->sw_osize)
- panic("swap_pager_cluster: bad offset");
-#endif
-
- hoff = loff + bsize;
- if (hoff > swp->sw_osize)
- hoff = swp->sw_osize;
-
- *loffset = loff;
- *hoffset = hoff;
-#ifdef DEBUG
- if (swpagerdebug & (SDB_FOLLOW|SDB_CLUSTER))
- printf("returns [%lx-%lx]\n", loff, hoff);
-#endif
-}
-
-/*
- * Scaled down version of swap().
- * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed.
- * BOGUS: lower level IO routines expect a KVA so we have to map our
- * provided physical page into the KVA to keep them happy.
- */
-static int
-swap_pager_io(swp, mlist, npages, flags)
- register sw_pager_t swp;
- vm_page_t *mlist;
- int npages;
- int flags;
-{
- register struct buf *bp;
- register sw_blk_t swb;
- register int s;
- int ix;
- u_int mask;
- boolean_t rv;
- vm_offset_t kva, off;
- swp_clean_t spc;
- vm_page_t m;
-
-#ifdef DEBUG
- /* save panic time state */
- if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
- return (VM_PAGER_FAIL); /* XXX: correct return? */
- if (swpagerdebug & (SDB_FOLLOW|SDB_IO))
- printf("swpg_io(%p, %p, %x, %x)\n", swp, mlist, npages, flags);
- if (flags & B_READ) {
- if (flags & B_ASYNC)
- panic("swap_pager_io: cannot do ASYNC reads");
- if (npages != 1)
- panic("swap_pager_io: cannot do clustered reads");
- }
-#endif
-
- /*
- * First determine if the page exists in the pager if this is
- * a sync read. This quickly handles cases where we are
- * following shadow chains looking for the top level object
- * with the page.
- */
- m = *mlist;
- off = m->offset + m->object->paging_offset;
- ix = off / dbtob(swp->sw_bsize);
- if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
-#ifdef DEBUG
- if ((flags & B_READ) == 0 && (swpagerdebug & SDB_ANOM)) {
- printf("swap_pager_io: no swap block on write\n");
- return (VM_PAGER_BAD);
- }
-#endif
- return (VM_PAGER_FAIL);
- }
- swb = &swp->sw_blocks[ix];
- off = off % dbtob(swp->sw_bsize);
- if ((flags & B_READ) &&
- (swb->swb_block == 0 || (swb->swb_mask & (1 << atop(off))) == 0))
- return (VM_PAGER_FAIL);
-
- /*
- * For reads (pageins) and synchronous writes, we clean up
- * all completed async pageouts.
- */
- if ((flags & B_ASYNC) == 0) {
- s = splbio();
- swap_pager_clean(flags&B_READ);
-#ifdef DEBUG
- if (swpagerdebug & SDB_PARANOIA)
- swap_pager_clean_check(mlist, npages, flags&B_READ);
-#endif
- splx(s);
- }
- /*
- * For async writes (pageouts), we cleanup completed pageouts so
- * that all available resources are freed. Also tells us if this
- * page is already being cleaned. If it is, or no resources
- * are available, we try again later.
- */
- else {
- swap_pager_clean(B_WRITE);
-#ifdef DEBUG
- if (swpagerdebug & SDB_PARANOIA)
- swap_pager_clean_check(mlist, npages, B_WRITE);
-#endif
- if (swap_pager_free.tqh_first == NULL) {
-#ifdef DEBUG
- if (swpagerdebug & SDB_FAIL)
- printf("%s: no available io headers\n",
- "swap_pager_io");
-#endif
- return (VM_PAGER_AGAIN);
- }
- }
-
- /*
- * Allocate a swap block if necessary.
- */
- if (swb->swb_block == 0) {
- swb->swb_block = swap_alloc(swp->sw_bsize);
- if (swb->swb_block == 0) {
-#ifdef DEBUG
- if (swpagerdebug & SDB_FAIL)
- printf("swpg_io: rmalloc of %x failed\n",
- swp->sw_bsize);
-#endif
- /*
- * XXX this is technically a resource shortage that
- * should return AGAIN, but the situation isn't likely
- * to be remedied just by delaying a little while and
- * trying again (the pageout daemon's current response
- * to AGAIN) so we just return FAIL.
- */
- return (VM_PAGER_FAIL);
- }
-#ifdef DEBUG
- if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK))
- printf("swpg_io: %p alloc blk %x at ix %x\n",
- swp->sw_blocks, swb->swb_block, ix);
-#endif
- }
-
- /*
- * Allocate a kernel virtual address and initialize so that PTE
- * is available for lower level IO drivers.
- */
- kva = vm_pager_map_pages(mlist, npages, !(flags & B_ASYNC));
- if (kva == NULL) {
-#ifdef DEBUG
- if (swpagerdebug & SDB_FAIL)
- printf("%s: no KVA space to map pages\n",
- "swap_pager_io");
-#endif
- return (VM_PAGER_AGAIN);
- }
-
- /*
- * Get a swap buffer header and initialize it.
- */
- s = splbio();
- while (bswlist.b_actf == NULL) {
-#ifdef DEBUG
- if (swpagerdebug & SDB_IO) /* XXX what should this be? */
- printf("swap_pager_io: wait on swbuf for %p (%d)\n",
- m, flags);
-#endif
- bswlist.b_flags |= B_WANTED;
- tsleep((caddr_t)&bswlist, PSWP+1, "swpgiobuf", 0);
- }
- bp = bswlist.b_actf;
- bswlist.b_actf = bp->b_actf;
- splx(s);
- bp->b_flags = B_BUSY | (flags & B_READ);
- bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */
- bp->b_data = (caddr_t)kva;
- bp->b_blkno = swb->swb_block + btodb(off);
- bp->b_vp = 0;
- buf_replacevnode(bp, swapdev_vp);
- bp->b_bcount = npages * PAGE_SIZE;
-
- /*
- * For writes we set up additional buffer fields, record a pageout
- * in progress and mark that these swap blocks are now allocated.
- */
- if ((bp->b_flags & B_READ) == 0) {
- bp->b_dirtyoff = 0;
- bp->b_dirtyend = npages * PAGE_SIZE;
- s = splbio();
- swp->sw_poip++;
- splx(s);
- mask = (~(~0 << npages)) << atop(off);
-#ifdef DEBUG
- swap_pager_poip++;
- if (swpagerdebug & SDB_WRITE)
- printf("swpg_io: write: bp=%p swp=%p poip=%d\n",
- bp, swp, swp->sw_poip);
- if ((swpagerdebug & SDB_ALLOCBLK) &&
- (swb->swb_mask & mask) != mask)
- printf("swpg_io: %p write %d pages at %x+%lx\n",
- swp->sw_blocks, npages, swb->swb_block, atop(off));
- if (swpagerdebug & SDB_CLUSTER)
- printf("swpg_io: off=%lx, npg=%x, mask=%x, bmask=%x\n",
- off, npages, mask, swb->swb_mask);
-#endif
- swp->sw_cnt += count_bits(mask & ~swb->swb_mask);
- swb->swb_mask |= mask;
- }
- /*
- * If this is an async write we set up still more buffer fields
- * and place a "cleaning" entry on the inuse queue.
- */
- if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
-#ifdef DIAGNOSTIC
- if (swap_pager_free.tqh_first == NULL)
- panic("swpg_io: lost spc");
-#endif
- spc = swap_pager_free.tqh_first;
- TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
-#ifdef DIAGNOSTIC
- if (spc->spc_flags != SPC_FREE)
- panic("swpg_io: bad free spc");
-#endif
- spc->spc_flags = SPC_BUSY;
- spc->spc_bp = bp;
- spc->spc_swp = swp;
- spc->spc_kva = kva;
- /*
- * Record the first page. This allows swap_pager_clean
- * to efficiently handle the common case of a single page.
- * For clusters, it allows us to locate the object easily
- * and we then reconstruct the rest of the mlist from spc_kva.
- */
- spc->spc_m = m;
- spc->spc_npages = npages;
- bp->b_flags |= B_CALL;
- bp->b_iodone = swap_pager_iodone;
- s = splbio();
- TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
- splx(s);
- }
-
- /*
- * Finally, start the IO operation.
- * If it is async we are all done, otherwise we must wait for
- * completion and cleanup afterwards.
- */
-#ifdef DEBUG
- if (swpagerdebug & SDB_IO)
- printf("swpg_io: IO start: bp %p, db %lx, va %lx, pa %lx\n",
- bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m));
-#endif
- VOP_STRATEGY(bp);
- if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
-#ifdef DEBUG
- if (swpagerdebug & SDB_IO)
- printf("swpg_io: IO started: bp %p\n", bp);
-#endif
- return (VM_PAGER_PEND);
- }
- s = splbio();
-#ifdef DEBUG
- if (flags & B_READ)
- swap_pager_piip++;
- else
- swap_pager_poip++;
-#endif
- while ((bp->b_flags & B_DONE) == 0)
- (void) tsleep(bp, PVM, "swpgio", 0);
- if ((flags & B_READ) == 0)
- --swp->sw_poip;
-#ifdef DEBUG
- if (flags & B_READ)
- --swap_pager_piip;
- else
- --swap_pager_poip;
-#endif
- rv = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK;
- bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
- bp->b_actf = bswlist.b_actf;
- bswlist.b_actf = bp;
- if (bp->b_vp)
- brelvp(bp);
- if (bswlist.b_flags & B_WANTED) {
- bswlist.b_flags &= ~B_WANTED;
- wakeup(&bswlist);
- }
- if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) {
- m->flags |= PG_CLEAN;
- pmap_clear_modify(VM_PAGE_TO_PHYS(m));
- }
- splx(s);
-#ifdef DEBUG
- if (swpagerdebug & SDB_IO)
- printf("swpg_io: IO done: bp %p, rv %d\n", bp, rv);
- if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_ERROR)
- printf("swpg_io: IO error\n");
-#endif
- vm_pager_unmap_pages(kva, npages);
- return (rv);
-}
-
-static void
-swap_pager_clean(rw)
- int rw;
-{
- register swp_clean_t spc;
- register int s, i;
- vm_object_t object;
- vm_page_t m;
-
-#ifdef DEBUG
- /* save panic time state */
- if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
- return;
- if (swpagerdebug & SDB_FOLLOW)
- printf("swpg_clean(%x)\n", rw);
-#endif
-
- for (;;) {
- /*
- * Look up and removal from inuse list must be done
- * at splbio() to avoid conflicts with swap_pager_iodone.
- */
- s = splbio();
- for (spc = swap_pager_inuse.tqh_first;
- spc != NULL;
- spc = spc->spc_list.tqe_next) {
- /*
- * If the operation is done, remove it from the
- * list and process it.
- *
- * XXX if we can't get the object lock we also
- * leave it on the list and try again later.
- * Is there something better we could do?
- */
- if ((spc->spc_flags & SPC_DONE) &&
- vm_object_lock_try(spc->spc_m->object)) {
- TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
- break;
- }
- }
- splx(s);
-
- /*
- * No operations done, thats all we can do for now.
- */
- if (spc == NULL)
- break;
-
- /*
- * Found a completed operation so finish it off.
- * Note: no longer at splbio since entry is off the list.
- */
- m = spc->spc_m;
- object = m->object;
-
- /*
- * Process each page in the cluster.
- * The first page is explicitly kept in the cleaning
- * entry, others must be reconstructed from the KVA.
- */
- for (i = 0; i < spc->spc_npages; i++) {
- if (i)
- m = vm_pager_atop(spc->spc_kva + ptoa(i));
- /*
- * If no error mark as clean and inform the pmap
- * system. If there was an error, mark as dirty
- * so we will try again.
- *
- * XXX could get stuck doing this, should give up
- * after awhile.
- */
- if (spc->spc_flags & SPC_ERROR) {
- printf("%s: clean of page %lx failed\n",
- "swap_pager_clean", VM_PAGE_TO_PHYS(m));
- m->flags |= PG_LAUNDRY;
- } else {
- m->flags |= PG_CLEAN;
- pmap_clear_modify(VM_PAGE_TO_PHYS(m));
- }
- m->flags &= ~PG_BUSY;
- PAGE_WAKEUP(m);
- }
-
- /*
- * Done with the object, decrement the paging count
- * and unlock it.
- */
- vm_object_paging_end(object);
- vm_object_unlock(object);
-
- /*
- * Free up KVM used and put the entry back on the list.
- */
- vm_pager_unmap_pages(spc->spc_kva, spc->spc_npages);
- spc->spc_flags = SPC_FREE;
- TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
-#ifdef DEBUG
- if (swpagerdebug & SDB_WRITE)
- printf("swpg_clean: free spc %p\n", spc);
-#endif
- }
-}
-
-#ifdef DEBUG
-static void
-swap_pager_clean_check(mlist, npages, rw)
- vm_page_t *mlist;
- int npages;
- int rw;
-{
- register swp_clean_t spc;
- boolean_t bad;
- int i, j, s;
- vm_page_t m;
-
- if (panicstr)
- return;
-
- bad = FALSE;
- s = splbio();
- for (spc = swap_pager_inuse.tqh_first;
- spc != NULL;
- spc = spc->spc_list.tqe_next) {
- for (j = 0; j < spc->spc_npages; j++) {
- m = vm_pager_atop(spc->spc_kva + ptoa(j));
- for (i = 0; i < npages; i++)
- if (m == mlist[i]) {
- if (swpagerdebug & SDB_ANOM)
- printf(
- "swpg_clean_check: %s: page %p on list, flags %x\n",
- rw == B_WRITE ? "write" : "read", mlist[i], spc->spc_flags);
- bad = TRUE;
- }
- }
- }
- splx(s);
- if (bad)
- panic("swpg_clean_check");
-}
-#endif
-
-static void
-swap_pager_iodone(bp)
- register struct buf *bp;
-{
- register swp_clean_t spc;
- daddr_t blk;
- int s;
-
-#ifdef DEBUG
- /* save panic time state */
- if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
- return;
- if (swpagerdebug & SDB_FOLLOW)
- printf("swpg_iodone(%p)\n", bp);
-#endif
- s = splbio();
- for (spc = swap_pager_inuse.tqh_first;
- spc != NULL;
- spc = spc->spc_list.tqe_next)
- if (spc->spc_bp == bp)
- break;
-#ifdef DIAGNOSTIC
- if (spc == NULL)
- panic("swap_pager_iodone: bp not found");
-#endif
-
- spc->spc_flags &= ~SPC_BUSY;
- spc->spc_flags |= SPC_DONE;
- if (bp->b_flags & B_ERROR)
- spc->spc_flags |= SPC_ERROR;
- spc->spc_bp = NULL;
- blk = bp->b_blkno;
-
-#ifdef DEBUG
- --swap_pager_poip;
- if (swpagerdebug & SDB_WRITE)
- printf("swpg_iodone: bp=%p swp=%p flags=%x spc=%p poip=%x\n",
- bp, spc->spc_swp, spc->spc_swp->sw_flags,
- spc, spc->spc_swp->sw_poip);
-#endif
-
- spc->spc_swp->sw_poip--;
- if (spc->spc_swp->sw_flags & SW_WANTED) {
- spc->spc_swp->sw_flags &= ~SW_WANTED;
- wakeup(spc->spc_swp);
- }
-
- bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
- bp->b_actf = bswlist.b_actf;
- bswlist.b_actf = bp;
- if (bp->b_vp)
- brelvp(bp);
- if (bswlist.b_flags & B_WANTED) {
- bswlist.b_flags &= ~B_WANTED;
- wakeup(&bswlist);
- }
- wakeup(&vm_pages_needed);
- splx(s);
-}
-
-/*
- * swap_pager_remove:
- *
- * This is called via the vm_pager_remove path and
- * will remove any pages inside the range [from, to)
- * backed by us. It is assumed that both addresses
- * are multiples of PAGE_SIZE. The special case
- * where TO is zero means: remove to end of object.
- */
-static int
-swap_pager_remove(pager, from, to)
- vm_pager_t pager;
- vm_offset_t from, to;
-{
- sw_pager_t swp;
- sw_blk_t swb;
- int bsize, blk, bit, to_blk, to_bit, mask, cnt = 0;
-
-#ifdef DEBUG
- if (swpagerdebug & SDB_FOLLOW)
- printf("swpg_remove()\n");
-#endif
-
- /* Special case stupid ranges. */
- if (to > 0 && from >= to)
- return (0);
-
- swp = (sw_pager_t)pager->pg_data;
-
- /*
- * If we back no pages, just return. XXX Can this
- * ever be the case? At least all remove calls should
- * be through vm_object_remove_from_pager which also
- * deallocates the pager when it no longer backs any
- * pages. Left is the initial case: can a swap-pager
- * be created without any pages put into it?
- */
- if (swp->sw_cnt == 0)
- return (0);
-
- bsize = dbtob(swp->sw_bsize);
- blk = from / bsize;
-
- /* Another fast one.. no blocks in range. */
- if (blk >= swp->sw_nblocks)
- return (0);
- bit = atop(from % bsize);
-
- /*
- * Deal with the special case with TO == 0.
- * XXX Perhaps the code might be improved if we
- * made to_blk & to_bit signify the inclusive end
- * of range instead (i.e. to - 1).
- */
- if (to) {
- to_blk = to / bsize;
- if (to_blk >= swp->sw_nblocks) {
- to_blk = swp->sw_nblocks;
- to_bit = 0;
- } else
- to_bit = atop(to % bsize);
- } else {
- to_blk = swp->sw_nblocks;
- to_bit = 0;
- }
-
- /*
- * Loop over the range, remove pages as we find them.
- * If all pages in a block get freed, deallocate the
- * swap block as well.
- */
- for (swb = &swp->sw_blocks[blk], mask = (1 << bit) - 1;
- blk < to_blk || (blk == to_blk && to_bit);
- blk++, swb++, mask = 0) {
-
- /* Don't bother if the block is already cleared. */
- if (swb->swb_block == 0)
- continue;
-
- /*
- * When coming to the end-block we need to
- * adjust the mask in the other end, as well as
- * ensuring this will be the last iteration.
- */
- if (blk == to_blk) {
- mask |= ~((1 << to_bit) - 1);
- to_bit = 0;
- }
-
- /* Count pages that will be removed. */
- cnt += count_bits(swb->swb_mask & ~mask);
-
- /*
- * Remove pages by applying our mask, and if this
- * means no pages are left in the block, free it.
- */
- if ((swb->swb_mask &= mask) == 0) {
- swap_free(swp->sw_bsize, swb->swb_block);
- swb->swb_block = 0;
- }
- }
-
- /* Adjust the page count and return the removed count. */
- swp->sw_cnt -= cnt;
-#ifdef DIAGNOSTIC
- if (swp->sw_cnt < 0)
- panic("swap_pager_remove: sw_cnt < 0");
-#endif
- return (cnt);
-}
-
-/*
- * swap_pager_next:
- *
- * This is called via the vm_pager_next path and
- * will return the offset of the next page (addresswise)
- * which this pager is backing. If there are no more
- * pages we will return the size of the pager's managed
- * space (which by definition is larger than any page's
- * offset).
- */
-static vm_offset_t
-swap_pager_next(pager, offset)
- vm_pager_t pager;
- vm_offset_t offset;
-{
- sw_pager_t swp;
- sw_blk_t swb;
- int bsize, blk, bit, to_blk, to_bit, mask;
-
-#ifdef DEBUG
- if (swpagerdebug & SDB_FOLLOW)
- printf("swpg_next()\n");
-#endif
-
- swp = (sw_pager_t)pager->pg_data;
-
- /*
- * If we back no pages, just return our size. XXX Can
- * this ever be the case? At least all remove calls
- * should be through vm_object_remove_from_pager which
- * also deallocates the pager when it no longer backs any
- * pages. Left is the initial case: can a swap-pager
- * be created without any pages put into it?
- */
- if (swp->sw_cnt == 0)
- return (swp->sw_osize);
-
- bsize = dbtob(swp->sw_bsize);
- blk = offset / bsize;
-
- /* Another fast one.. no blocks in range. */
- if (blk >= swp->sw_nblocks)
- return (swp->sw_osize);
- bit = atop(offset % bsize);
- to_blk = swp->sw_osize / bsize;
- to_bit = atop(swp->sw_osize % bsize);
-
- /*
- * Loop over the remaining blocks, returning as soon
- * as we find a page.
- */
- swb = &swp->sw_blocks[blk];
- mask = ~((1 << bit) - 1);
- for (;;) {
- if (blk == to_blk) {
- /* Nothing to be done in this end-block? */
- if (to_bit == 0)
- break;
- mask &= (1 << to_bit) - 1;
- }
-
- /*
- * Check this block for a backed page and return
- * its offset if there.
- */
- mask &= swb->swb_mask;
- if (mask)
- return (blk * bsize + (ffs (mask) - 1) * PAGE_SIZE);
-
- /*
- * If we handled the end of range now, this
- * means we are ready.
- */
- if (blk == to_blk)
- break;
-
- /* Get on with the next block. */
- blk++;
- swb++;
- mask = ~0;
- }
- return (swp->sw_osize);
-}
-
-/*
- * swap_pager_count:
- *
- * Just returns the count of pages backed by this pager.
- */
-int
-swap_pager_count(pager)
- vm_pager_t pager;
-{
-#ifndef notyet
- return ((sw_pager_t)pager->pg_data)->sw_cnt;
-#else
- sw_pager_t swp;
- sw_blk_t swb;
- int i, cnt = 0;
-
- swp = (sw_pager_t)pager->pg_data;
- if (swp->sw_blocks == NULL)
- return (0);
- for (i = 0; i < swp->sw_nblocks; i++)
- cnt += count_bits(swp->sw_blocks[i].swb_mask);
- return (cnt);
-#endif
-}
-
-/*
- * count_bits:
- *
- * Counts the number of set bits in a word.
- */
-static int
-count_bits(x)
- u_int x;
-{
- int cnt = 0;
-
- while (x) {
- cnt += x & 1;
- x >>= 1;
- }
- return (cnt);
-}
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
deleted file mode 100644
index 40cfd7821f4..00000000000
--- a/sys/vm/vm_fault.c
+++ /dev/null
@@ -1,1015 +0,0 @@
-/* $OpenBSD: vm_fault.c,v 1.19 2001/06/08 08:09:43 art Exp $ */
-/* $NetBSD: vm_fault.c,v 1.21 1998/01/31 04:02:39 ross Exp $ */
-
-/*
- * Copyright (c) 1991, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * The Mach Operating System project at Carnegie-Mellon University.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)vm_fault.c 8.5 (Berkeley) 1/9/95
- *
- *
- * Copyright (c) 1987, 1990 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Authors: Avadis Tevanian, Jr., Michael Wayne Young
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*
- * Page fault handling module.
- */
-
-#include <sys/param.h>
-#include <sys/proc.h>
-#include <sys/systm.h>
-#include <sys/user.h>
-
-#include <vm/vm.h>
-#include <vm/vm_page.h>
-#include <vm/vm_pageout.h>
-
-/*
- * vm_fault:
- *
- * Handle a page fault occuring at the given address,
- * requiring the given permissions, in the map specified.
- * If successful, the page is inserted into the
- * associated physical map.
- *
- * NOTE: the given address should be truncated to the
- * proper page address.
- *
- * KERN_SUCCESS is returned if the page fault is handled; otherwise,
- * a standard error specifying why the fault is fatal is returned.
- *
- *
- * The map in question must be referenced, and remains so.
- * Caller may hold no locks.
- */
-int
-vm_fault(map, vaddr, fault_type, change_wiring)
- vm_map_t map;
- vm_offset_t vaddr;
- vm_prot_t fault_type;
- boolean_t change_wiring;
-{
- vm_object_t first_object;
- vm_offset_t first_offset;
- vm_map_entry_t entry;
- register vm_object_t object;
- register vm_offset_t offset;
- register vm_page_t m;
- vm_page_t first_m;
- vm_prot_t prot;
- int result;
- boolean_t wired;
- boolean_t su;
- boolean_t lookup_still_valid;
- boolean_t page_exists;
- vm_page_t old_m;
- vm_object_t next_object;
-
- cnt.v_faults++; /* needs lock XXX */
-
-/*
- * Recovery actions
- */
-#define FREE_PAGE(m) { \
- PAGE_WAKEUP(m); \
- vm_page_lock_queues(); \
- vm_page_free(m); \
- vm_page_unlock_queues(); \
-}
-
-#define RELEASE_PAGE(m) { \
- PAGE_WAKEUP(m); \
- vm_page_lock_queues(); \
- vm_page_activate(m); \
- vm_page_unlock_queues(); \
-}
-
-#define UNLOCK_MAP { \
- if (lookup_still_valid) { \
- vm_map_lookup_done(map, entry); \
- lookup_still_valid = FALSE; \
- } \
-}
-
-#define UNLOCK_THINGS { \
- vm_object_paging_end(object); \
- vm_object_unlock(object); \
- if (object != first_object) { \
- vm_object_lock(first_object); \
- FREE_PAGE(first_m); \
- vm_object_paging_end(first_object); \
- vm_object_unlock(first_object); \
- } \
- UNLOCK_MAP; \
-}
-
-#define UNLOCK_AND_DEALLOCATE { \
- UNLOCK_THINGS; \
- vm_object_deallocate(first_object); \
-}
-
- RetryFault: ;
-
- /*
- * Find the backing store object and offset into
- * it to begin the search.
- */
-
- if ((result = vm_map_lookup(&map, vaddr, fault_type, &entry,
- &first_object, &first_offset, &prot, &wired, &su)) !=
- KERN_SUCCESS) {
- return (result);
- }
- lookup_still_valid = TRUE;
-
- if (wired)
- fault_type = prot;
-
- first_m = NULL;
-
- /*
- * Make a reference to this object to
- * prevent its disposal while we are messing with
- * it. Once we have the reference, the map is free
- * to be diddled. Since objects reference their
- * shadows (and copies), they will stay around as well.
- */
-
- vm_object_lock(first_object);
-
- first_object->ref_count++;
- vm_object_paging_begin(first_object);
-
- /*
- * INVARIANTS (through entire routine):
- *
- * 1) At all times, we must either have the object lock or a busy
- * page in some object to prevent some other thread from trying
- * to bring in the same page.
- *
- * Note that we cannot hold any locks during the pager access or
- * when waiting for memory, so we use a busy page then.
- *
- * Note also that we aren't as concerned about more than one thead
- * attempting to pager_data_unlock the same page at once, so we
- * don't hold the page as busy then, but do record the highest
- * unlock value so far. [Unlock requests may also be delivered
- * out of order.]
- *
- * 2) Once we have a busy page, we must remove it from the pageout
- * queues, so that the pageout daemon will not grab it away.
- *
- * 3) To prevent another thread from racing us down the shadow chain
- * and entering a new page in the top object before we do, we must
- * keep a busy page in the top object while following the shadow
- * chain.
- *
- * 4) We must increment paging_in_progress on any object for which we
- * have a busy page, to prevent vm_object_collapse from removing
- * the busy page without our noticing.
- */
-
- /*
- * Search for the page at object/offset.
- */
- object = first_object;
- offset = first_offset;
-
- /*
- * See whether this page is resident
- */
- while (TRUE) {
- m = vm_page_lookup(object, offset);
- if (m != NULL) {
- /*
- * If the page is being brought in,
- * wait for it and then retry.
- */
- if (m->flags & PG_BUSY) {
-#ifdef DOTHREADS
- int wait_result;
-
- PAGE_ASSERT_WAIT(m, !change_wiring);
- UNLOCK_THINGS;
- thread_block("mFltbsy");
- wait_result = current_thread()->wait_result;
- vm_object_deallocate(first_object);
- if (wait_result != THREAD_AWAKENED)
- return (KERN_SUCCESS);
- goto RetryFault;
-#else
- PAGE_ASSERT_WAIT(m, !change_wiring);
- UNLOCK_THINGS;
- cnt.v_intrans++;
- thread_block("mFltbsy2");
- vm_object_deallocate(first_object);
- goto RetryFault;
-#endif
- }
-
- /*
- * Remove the page from the pageout daemon's
- * reach while we play with it.
- */
-
- vm_page_lock_queues();
- if (m->flags & PG_INACTIVE) {
- TAILQ_REMOVE(&vm_page_queue_inactive, m,
- pageq);
- m->flags &= ~PG_INACTIVE;
- cnt.v_inactive_count--;
- cnt.v_reactivated++;
- }
-
- if (m->flags & PG_ACTIVE) {
- TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
- m->flags &= ~PG_ACTIVE;
- cnt.v_active_count--;
- }
- vm_page_unlock_queues();
-
- /*
- * Mark page busy for other threads.
- */
- m->flags |= PG_BUSY;
- if (curproc != &proc0)
- curproc->p_addr->u_stats.p_ru.ru_minflt++;
- break;
- }
-
- if (((object->pager != NULL) && (!change_wiring || wired))
- || (object == first_object)) {
-
- /*
- * Allocate a new page for this object/offset
- * pair.
- */
- m = vm_page_alloc(object, offset);
-
- if (m == NULL) {
- UNLOCK_AND_DEALLOCATE;
- vm_wait("fVfault1");
- goto RetryFault;
- }
- }
-
- if (object->pager != NULL && (!change_wiring || wired)) {
- int rv;
-
- /*
- * Now that we have a busy page, we can
- * release the object lock.
- */
- vm_object_unlock(object);
-
- /*
- * Call the pager to retrieve the data, if any,
- * after releasing the lock on the map.
- */
- UNLOCK_MAP;
- cnt.v_pageins++;
- rv = vm_pager_get(object->pager, m, TRUE);
-
- /*
- * Reaquire the object lock to preserve our
- * invariant.
- */
- vm_object_lock(object);
-
- /*
- * Found the page.
- * Leave it busy while we play with it.
- */
- if (rv == VM_PAGER_OK) {
- /*
- * Relookup in case pager changed page.
- * Pager is responsible for disposition
- * of old page if moved.
- */
- m = vm_page_lookup(object, offset);
-
- cnt.v_pgpgin++;
- m->flags &= ~PG_FAKE;
- m->flags |= PG_CLEAN;
- pmap_clear_modify(VM_PAGE_TO_PHYS(m));
- if (curproc != &proc0)
- curproc->p_addr->
- u_stats.p_ru.ru_majflt++;
- break;
- }
-
- /*
- * IO error or page outside the range of the pager:
- * cleanup and return an error.
- */
- if (rv == VM_PAGER_ERROR || rv == VM_PAGER_BAD) {
- FREE_PAGE(m);
- UNLOCK_AND_DEALLOCATE;
- return (KERN_PROTECTION_FAILURE); /* XXX */
- }
- /*
- * rv == VM_PAGER_FAIL:
- *
- * Page does not exist at this object/offset.
- * Free the bogus page (waking up anyone waiting
- * for it) and continue on to the next object.
- *
- * If this is the top-level object, we must
- * leave the busy page to prevent another
- * thread from rushing past us, and inserting
- * the page in that object at the same time
- * that we are.
- */
- if (object != first_object) {
- FREE_PAGE(m);
- /* note that `m' is not used after this */
- }
- }
-
- /*
- * We get here if the object has no pager (or unwiring)
- * or the pager doesn't have the page.
- */
- if (object == first_object)
- first_m = m;
-
- /*
- * Move on to the next object. Lock the next
- * object before unlocking the current one.
- */
-
- offset += object->shadow_offset;
- next_object = object->shadow;
- if (next_object == NULL) {
- /*
- * If there's no object left, fill the page
- * in the top object with zeros.
- */
- if (object != first_object) {
- vm_object_paging_end(object);
- vm_object_unlock(object);
-
- object = first_object;
- offset = first_offset;
- m = first_m;
- vm_object_lock(object);
- }
- first_m = NULL;
-
- vm_page_zero_fill(m);
- cnt.v_zfod++;
- m->flags &= ~PG_FAKE;
- if (curproc != &proc0)
- curproc->p_addr->u_stats.p_ru.ru_minflt++;
- break;
- }
- else {
- vm_object_lock(next_object);
- if (object != first_object)
- vm_object_paging_end(object);
- vm_object_unlock(object);
- object = next_object;
- vm_object_paging_begin(object);
- }
- }
-
- if ((m->flags & (PG_ACTIVE | PG_INACTIVE | PG_BUSY)) != PG_BUSY)
- panic("vm_fault: active, inactive or !busy after main loop");
-
- /*
- * PAGE HAS BEEN FOUND.
- * [Loop invariant still holds -- the object lock is held.]
- */
- old_m = m; /* save page that would be copied */
-
- /*
- * If the page is being written, but isn't already owned by the
- * top-level object, we have to copy it into a new page owned
- * by the top-level object.
- */
- if (object != first_object) {
- /*
- * We only really need to copy if we want to write it.
- */
- if (fault_type & VM_PROT_WRITE) {
-
- /*
- * If we try to collapse first_object at this
- * point, we may deadlock when we try to get
- * the lock on an intermediate object (since we
- * have the bottom object locked). We can't
- * unlock the bottom object, because the page
- * we found may move (by collapse) if we do.
- *
- * Instead, we first copy the page. Then, when
- * we have no more use for the bottom object,
- * we unlock it and try to collapse.
- *
- * Note that we copy the page even if we didn't
- * need to... that's the breaks.
- */
-
- /*
- * We already have an empty page in
- * first_object - use it.
- */
- vm_page_copy(m, first_m);
- first_m->flags &= ~PG_FAKE;
-
- /*
- * If another map is truly sharing this
- * page with us, we have to flush all
- * uses of the original page, since we
- * can't distinguish those which want the
- * original from those which need the
- * new copy.
- *
- * XXX If we know that only one map has
- * access to this page, then we could
- * avoid the pmap_page_protect() call.
- */
- vm_page_lock_queues();
- vm_page_deactivate(m);
- pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
- vm_page_unlock_queues();
-
- /*
- * We no longer need the old page or object.
- */
- PAGE_WAKEUP(m);
- vm_object_paging_end(object);
- vm_object_unlock(object);
-
- /*
- * Only use the new page below...
- */
- cnt.v_cow_faults++;
- m = first_m;
- object = first_object;
- offset = first_offset;
-
- /*
- * Now that we've gotten the copy out of the
- * way, let's try to collapse the top object.
- */
- vm_object_lock(object);
- /*
- * But we have to play ugly games with
- * paging_in_progress to do that...
- */
- vm_object_paging_end(object);
- vm_object_collapse(object);
- vm_object_paging_begin(object);
- } else {
- prot &= ~VM_PROT_WRITE;
- m->flags |= PG_COPYONWRITE;
- }
- }
-
- if (m->flags & (PG_ACTIVE|PG_INACTIVE))
- panic("%s: active or inactive before copy object handling",
- "vm_fault");
-
- /*
- * If the page is being written, but hasn't been
- * copied to the copy-object, we have to copy it there.
- */
- RetryCopy:
- if (first_object->copy != NULL) {
- vm_object_t copy_object = first_object->copy;
- vm_offset_t copy_offset;
- vm_page_t copy_m;
-
- /*
- * We only need to copy if we want to write it.
- */
- if ((fault_type & VM_PROT_WRITE) == 0) {
- prot &= ~VM_PROT_WRITE;
- m->flags |= PG_COPYONWRITE;
- }
- else {
- /*
- * Try to get the lock on the copy_object.
- */
- if (!vm_object_lock_try(copy_object)) {
- vm_object_unlock(object);
- /* should spin a bit here... */
- vm_object_lock(object);
- goto RetryCopy;
- }
-
- /*
- * Make another reference to the copy-object,
- * to keep it from disappearing during the
- * copy.
- */
- copy_object->ref_count++;
-
- /*
- * Does the page exist in the copy?
- */
- copy_offset = first_offset -
- copy_object->shadow_offset;
- copy_m = vm_page_lookup(copy_object, copy_offset);
- if ((page_exists = (copy_m != NULL)) != 0) {
- if (copy_m->flags & PG_BUSY) {
-#ifdef DOTHREADS
- int wait_result;
-
- /*
- * If the page is being brought
- * in, wait for it and then retry.
- */
- PAGE_ASSERT_WAIT(copy_m,
- !change_wiring);
- RELEASE_PAGE(m);
- copy_object->ref_count--;
- vm_object_unlock(copy_object);
- UNLOCK_THINGS;
- thread_block("mCpybsy");
- wait_result =
- current_thread()->wait_result;
- vm_object_deallocate(first_object);
- if (wait_result != THREAD_AWAKENED)
- return (KERN_SUCCESS);
- goto RetryFault;
-#else
- /*
- * If the page is being brought
- * in, wait for it and then retry.
- */
- PAGE_ASSERT_WAIT(copy_m,
- !change_wiring);
- RELEASE_PAGE(m);
- copy_object->ref_count--;
- vm_object_unlock(copy_object);
- UNLOCK_THINGS;
- thread_block("mCpybsy2");
- vm_object_deallocate(first_object);
- goto RetryFault;
-#endif
- }
- }
-
- /*
- * If the page is not in memory (in the object)
- * and the object has a pager, we have to check
- * if the pager has the data in secondary
- * storage.
- */
- if (!page_exists) {
-
- /*
- * If we don't allocate a (blank) page
- * here... another thread could try
- * to page it in, allocate a page, and
- * then block on the busy page in its
- * shadow (first_object). Then we'd
- * trip over the busy page after we
- * found that the copy_object's pager
- * doesn't have the page...
- */
- copy_m =
- vm_page_alloc(copy_object, copy_offset);
- if (copy_m == NULL) {
- /*
- * Wait for a page, then retry.
- */
- RELEASE_PAGE(m);
- copy_object->ref_count--;
- vm_object_unlock(copy_object);
- UNLOCK_AND_DEALLOCATE;
- vm_wait("fCopy");
- goto RetryFault;
- }
-
- if (copy_object->pager != NULL) {
- vm_object_unlock(object);
- vm_object_unlock(copy_object);
- UNLOCK_MAP;
-
- page_exists = vm_pager_has_page(
- copy_object->pager,
- (copy_offset +
- copy_object->paging_offset));
-
- vm_object_lock(copy_object);
-
- /*
- * Since the map is unlocked, someone
- * else could have copied this object
- * and put a different copy_object
- * between the two. Or, the last
- * reference to the copy-object (other
- * than the one we have) may have
- * disappeared - if that has happened,
- * we don't need to make the copy.
- */
- if (copy_object->shadow != object ||
- copy_object->ref_count == 1) {
- /*
- * Gaah... start over!
- */
- FREE_PAGE(copy_m);
- vm_object_unlock(copy_object);
- /* may block */
- vm_object_deallocate(
- copy_object);
- vm_object_lock(object);
- goto RetryCopy;
- }
- vm_object_lock(object);
-
- if (page_exists) {
- /*
- * We didn't need the page
- */
- FREE_PAGE(copy_m);
- }
- }
- }
- if (!page_exists) {
- /*
- * Must copy page into copy-object.
- */
- vm_page_copy(m, copy_m);
- copy_m->flags &= ~PG_FAKE;
-
- /*
- * Things to remember:
- * 1. The copied page must be marked 'dirty'
- * so it will be paged out to the copy
- * object.
- * 2. If the old page was in use by any users
- * of the copy-object, it must be removed
- * from all pmaps. (We can't know which
- * pmaps use it.)
- */
- vm_page_lock_queues();
- pmap_page_protect(VM_PAGE_TO_PHYS(old_m),
- VM_PROT_NONE);
- copy_m->flags &= ~PG_CLEAN;
- vm_page_activate(copy_m); /* XXX */
- vm_page_unlock_queues();
-
- PAGE_WAKEUP(copy_m);
- }
- /*
- * The reference count on copy_object must be
- * at least 2: one for our extra reference,
- * and at least one from the outside world
- * (we checked that when we last locked
- * copy_object).
- */
- copy_object->ref_count--;
- vm_object_unlock(copy_object);
- m->flags &= ~PG_COPYONWRITE;
- }
- }
-
- if (m->flags & (PG_ACTIVE | PG_INACTIVE))
- panic("vm_fault: active or inactive before retrying lookup");
-
- /*
- * We must verify that the maps have not changed
- * since our last lookup.
- */
- if (!lookup_still_valid) {
- vm_object_t retry_object;
- vm_offset_t retry_offset;
- vm_prot_t retry_prot;
-
- /*
- * Since map entries may be pageable, make sure we can
- * take a page fault on them.
- */
- vm_object_unlock(object);
-
- /*
- * To avoid trying to write_lock the map while another
- * thread has it read_locked (in vm_map_pageable), we
- * do not try for write permission. If the page is
- * still writable, we will get write permission. If it
- * is not, or has been marked needs_copy, we enter the
- * mapping without write permission, and will merely
- * take another fault.
- */
- result = vm_map_lookup(&map, vaddr,
- fault_type & ~VM_PROT_WRITE, &entry, &retry_object,
- &retry_offset, &retry_prot, &wired, &su);
-
- vm_object_lock(object);
-
- /*
- * If we don't need the page any longer, put it on the
- * active list (the easiest thing to do here). If no
- * one needs it, pageout will grab it eventually.
- */
-
- if (result != KERN_SUCCESS) {
- RELEASE_PAGE(m);
- UNLOCK_AND_DEALLOCATE;
- return (result);
- }
-
- lookup_still_valid = TRUE;
-
- if ((retry_object != first_object) ||
- (retry_offset != first_offset)) {
- RELEASE_PAGE(m);
- UNLOCK_AND_DEALLOCATE;
- goto RetryFault;
- }
-
- /*
- * Check whether the protection has changed or the object
- * has been copied while we left the map unlocked.
- * Changing from read to write permission is OK - we leave
- * the page write-protected, and catch the write fault.
- * Changing from write to read permission means that we
- * can't mark the page write-enabled after all.
- */
- prot &= retry_prot;
- if (m->flags & PG_COPYONWRITE)
- prot &= ~VM_PROT_WRITE;
- }
-
- /*
- * (the various bits we're fiddling with here are locked by
- * the object's lock)
- */
-
- /* XXX This distorts the meaning of the copy_on_write bit */
-
- if (prot & VM_PROT_WRITE)
- m->flags &= ~PG_COPYONWRITE;
-
- /*
- * It's critically important that a wired-down page be faulted
- * only once in each map for which it is wired.
- */
-
- if (m->flags & (PG_ACTIVE | PG_INACTIVE))
- panic("vm_fault: active or inactive before pmap_enter");
-
- vm_object_unlock(object);
-
- /*
- * Put this page into the physical map.
- * We had to do the unlock above because pmap_enter
- * may cause other faults. We don't put the
- * page back on the active queue until later so
- * that the page-out daemon won't find us (yet).
- */
-
- pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), prot, wired, 0);
-
- /*
- * If the page is not wired down, then put it where the
- * pageout daemon can find it.
- */
- vm_object_lock(object);
- vm_page_lock_queues();
- if (change_wiring) {
- if (wired)
- vm_page_wire(m);
- else
- vm_page_unwire(m);
- }
- else
- vm_page_activate(m);
- vm_page_unlock_queues();
-
- /*
- * Unlock everything, and return
- */
-
- PAGE_WAKEUP(m);
- UNLOCK_AND_DEALLOCATE;
-
- return (KERN_SUCCESS);
-}
-
-/*
- * vm_fault_wire:
- *
- * Wire down a range of virtual addresses in a map.
- */
-int
-vm_fault_wire(map, start, end)
- vm_map_t map;
- vm_offset_t start, end;
-{
- register vm_offset_t va;
- register pmap_t pmap;
- int rv;
-
- pmap = vm_map_pmap(map);
-
- /*
- * We simulate a fault to get the page and enter it
- * in the physical map.
- */
-
- for (va = start; va < end; va += PAGE_SIZE) {
- rv = vm_fault(map, va, VM_PROT_NONE, TRUE);
- if (rv) {
- if (va != start)
- vm_fault_unwire(map, start, va);
- return (rv);
- }
- }
- return (KERN_SUCCESS);
-}
-
-
-/*
- * vm_fault_unwire:
- *
- * Unwire a range of virtual addresses in a map.
- */
-void
-vm_fault_unwire(map, start, end)
- vm_map_t map;
- vm_offset_t start, end;
-{
-
- register vm_offset_t va;
- vm_offset_t pa;
- register pmap_t pmap;
-
- pmap = vm_map_pmap(map);
-
- /*
- * Since the pages are wired down, we must be able to
- * get their mappings from the physical map system.
- */
- vm_page_lock_queues();
-
- for (va = start; va < end; va += PAGE_SIZE) {
- if (pmap_extract(pmap, va, &pa) == FALSE) {
- panic("unwire: page not in pmap");
- }
- pmap_unwire(pmap, va);
- vm_page_unwire(PHYS_TO_VM_PAGE(pa));
- }
- vm_page_unlock_queues();
-}
-
-/*
- * Routine:
- * vm_fault_copy_entry
- * Function:
- * Copy all of the pages from a wired-down map entry to another.
- *
- * In/out conditions:
- * The source and destination maps must be locked for write.
- * The source map entry must be wired down (or be a sharing map
- * entry corresponding to a main map entry that is wired down).
- */
-void
-vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry)
- vm_map_t dst_map;
- vm_map_t src_map;
- vm_map_entry_t dst_entry;
- vm_map_entry_t src_entry;
-{
-
- vm_object_t dst_object;
- vm_object_t src_object;
- vm_offset_t dst_offset;
- vm_offset_t src_offset;
- vm_prot_t prot;
- vm_offset_t vaddr;
- vm_page_t dst_m;
- vm_page_t src_m;
-
-#ifdef lint
- src_map++;
-#endif
-
- src_object = src_entry->object.vm_object;
- src_offset = src_entry->offset;
-
- /*
- * Create the top-level object for the destination entry.
- * (Doesn't actually shadow anything - we copy the pages
- * directly.)
- */
- dst_object =
- vm_object_allocate((vm_size_t)(dst_entry->end - dst_entry->start));
-
- dst_entry->object.vm_object = dst_object;
- dst_entry->offset = 0;
-
- prot = dst_entry->max_protection;
-
- /*
- * Loop through all of the pages in the entry's range, copying
- * each one from the source object (it should be there) to the
- * destination object.
- */
- for (vaddr = dst_entry->start, dst_offset = 0;
- vaddr < dst_entry->end;
- vaddr += PAGE_SIZE, dst_offset += PAGE_SIZE) {
-
- /*
- * Allocate a page in the destination object
- */
- vm_object_lock(dst_object);
- do {
- dst_m = vm_page_alloc(dst_object, dst_offset);
- if (dst_m == NULL) {
- vm_object_unlock(dst_object);
- vm_wait("fVm_copy");
- vm_object_lock(dst_object);
- }
- } while (dst_m == NULL);
-
- /*
- * Find the page in the source object, and copy it in.
- * (Because the source is wired down, the page will be
- * in memory.)
- */
- vm_object_lock(src_object);
- src_m = vm_page_lookup(src_object, dst_offset + src_offset);
- if (src_m == NULL)
- panic("vm_fault_copy_wired: page missing");
-
- vm_page_copy(src_m, dst_m);
-
- /*
- * Enter it in the pmap...
- */
- vm_object_unlock(src_object);
- vm_object_unlock(dst_object);
-
- pmap_enter(dst_map->pmap, vaddr, VM_PAGE_TO_PHYS(dst_m), prot,
- FALSE, 0);
-
- /*
- * Mark it no longer busy, and put it on the active list.
- */
- vm_object_lock(dst_object);
- vm_page_lock_queues();
- vm_page_activate(dst_m);
- vm_page_unlock_queues();
- PAGE_WAKEUP(dst_m);
- vm_object_unlock(dst_object);
- }
-
-}
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
deleted file mode 100644
index 9b86e5c7b37..00000000000
--- a/sys/vm/vm_glue.c
+++ /dev/null
@@ -1,532 +0,0 @@
-/* $OpenBSD: vm_glue.c,v 1.38 2001/06/08 08:09:43 art Exp $ */
-/* $NetBSD: vm_glue.c,v 1.55.4.1 1996/06/13 17:25:45 cgd Exp $ */
-
-/*
- * Copyright (c) 1991, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * The Mach Operating System project at Carnegie-Mellon University.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)vm_glue.c 8.9 (Berkeley) 3/4/95
- *
- *
- * Copyright (c) 1987, 1990 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/proc.h>
-#include <sys/resourcevar.h>
-#include <sys/buf.h>
-#include <sys/user.h>
-#ifdef SYSVSHM
-#include <sys/shm.h>
-#endif
-
-#include <vm/vm.h>
-#include <vm/vm_extern.h>
-#include <vm/vm_page.h>
-#include <vm/vm_kern.h>
-
-#include <machine/cpu.h>
-
-int avefree = 0; /* XXX */
-unsigned maxdmap = MAXDSIZ; /* XXX */
-unsigned maxsmap = MAXSSIZ; /* XXX */
-int readbuffers = 0; /* XXX allow kgdb to read kernel buffer pool */
-
-int
-kernacc(addr, len, rw)
- caddr_t addr;
- int len, rw;
-{
- boolean_t rv;
- vm_offset_t saddr, eaddr;
- vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
-
- saddr = trunc_page((vaddr_t)addr);
- eaddr = round_page((vaddr_t)addr+len);
- rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot);
- /*
- * XXX there are still some things (e.g. the buffer cache) that
- * are managed behind the VM system's back so even though an
- * address is accessible in the mind of the VM system, there may
- * not be physical pages where the VM thinks there is. This can
- * lead to bogus allocation of pages in the kernel address space
- * or worse, inconsistencies at the pmap level. We only worry
- * about the buffer cache for now.
- */
- if (!readbuffers && rv && (eaddr > (vm_offset_t)buffers &&
- saddr < (vm_offset_t)buffers + MAXBSIZE * nbuf))
- rv = FALSE;
- return (rv == TRUE);
-}
-
-int
-useracc(addr, len, rw)
- caddr_t addr;
- int len, rw;
-{
- boolean_t rv;
- vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
-
-#if defined(i386) || defined(pc532)
- /*
- * XXX - specially disallow access to user page tables - they are
- * in the map. This is here until i386 & pc532 pmaps are fixed...
- */
- if ((vm_offset_t) addr >= VM_MAXUSER_ADDRESS
- || (vm_offset_t) addr + len > VM_MAXUSER_ADDRESS
- || (vm_offset_t) addr + len <= (vm_offset_t) addr)
- return (FALSE);
-#endif
-
- rv = vm_map_check_protection(&curproc->p_vmspace->vm_map,
- trunc_page((vaddr_t)addr), round_page((vaddr_t)addr+len), prot);
- return (rv == TRUE);
-}
-
-#ifdef KGDB
-/*
- * Change protections on kernel pages from addr to addr+len
- * (presumably so debugger can plant a breakpoint).
- *
- * We force the protection change at the pmap level. If we were
- * to use vm_map_protect a change to allow writing would be lazily-
- * applied meaning we would still take a protection fault, something
- * we really don't want to do. It would also fragment the kernel
- * map unnecessarily. We cannot use pmap_protect since it also won't
- * enforce a write-enable request. Using pmap_enter is the only way
- * we can ensure the change takes place properly.
- */
-void
-chgkprot(addr, len, rw)
- register caddr_t addr;
- int len, rw;
-{
- vm_prot_t prot;
- vm_offset_t pa, sva, eva;
-
- prot = rw == B_READ ? VM_PROT_READ : VM_PROT_READ|VM_PROT_WRITE;
- eva = round_page((vaddr_t)addr + len);
- for (sva = trunc_page((vaddr_t)addr); sva < eva; sva += PAGE_SIZE) {
- /*
- * Extract physical address for the page.
- */
- if (pmap_extract(pmap_kernel(), sva, &pa) == FALSE)
- panic("chgkprot: invalid page");
- pmap_enter(pmap_kernel(), sva, pa, prot, TRUE, 0);
- }
-}
-#endif
-
-int
-vslock(addr, len)
- caddr_t addr;
- u_int len;
-{
-#ifdef __i386__
- pmap_prefault(&curproc->p_vmspace->vm_map, (vm_offset_t)addr, len);
-#endif
- return (vm_map_pageable(&curproc->p_vmspace->vm_map,
- trunc_page((vaddr_t)addr),
- round_page((vaddr_t)addr+len), FALSE));
-}
-
-int
-vsunlock(addr, len)
- caddr_t addr;
- u_int len;
-{
- return (vm_map_pageable(&curproc->p_vmspace->vm_map,
- trunc_page((vaddr_t)addr),
- round_page((vaddr_t)addr+len), TRUE));
-}
-
-/*
- * Implement fork's actions on an address space.
- * Here we arrange for the address space to be copied or referenced,
- * allocate a user struct (pcb and kernel stack), then call the
- * machine-dependent layer to fill those in and make the new process
- * ready to run.
- * NOTE: the kernel stack may be at a different location in the child
- * process, and thus addresses of automatic variables may be invalid
- * after cpu_fork returns in the child process. We do nothing here
- * after cpu_fork returns.
- */
-#ifdef __FORK_BRAINDAMAGE
-int
-#else
-void
-#endif
-vm_fork(p1, p2, stack, stacksize)
- register struct proc *p1, *p2;
- void *stack;
- size_t stacksize;
-{
- register struct user *up = p2->p_addr;
-
-#if defined(i386) || defined(pc532)
- /*
- * avoid copying any of the parent's pagetables or other per-process
- * objects that reside in the map by marking all of them non-inheritable
- */
- (void)vm_map_inherit(&p1->p_vmspace->vm_map,
- VM_MAXUSER_ADDRESS, VM_MAX_ADDRESS, VM_INHERIT_NONE);
-#endif
- p2->p_vmspace = vmspace_fork(p1->p_vmspace);
-
-#ifdef SYSVSHM
- if (p1->p_vmspace->vm_shm)
- shmfork(p1->p_vmspace, p2->p_vmspace);
-#endif
-
- vm_map_pageable(kernel_map, (vm_offset_t)up,
- (vm_offset_t)up + USPACE, FALSE);
-
- /*
- * p_stats currently point at fields in the user struct. Copy
- * parts of p_stats, and zero out the rest.
- */
- p2->p_stats = &up->u_stats;
- bzero(&up->u_stats.pstat_startzero,
- (unsigned) ((caddr_t)&up->u_stats.pstat_endzero -
- (caddr_t)&up->u_stats.pstat_startzero));
- bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy,
- ((caddr_t)&up->u_stats.pstat_endcopy -
- (caddr_t)&up->u_stats.pstat_startcopy));
-
-#if defined(i386) || defined(pc532)
- {
- vm_offset_t addr = VM_MAXUSER_ADDRESS; struct vm_map *vp;
-
- /* ream out old pagetables and kernel stack */
- vp = &p2->p_vmspace->vm_map;
- (void)vm_deallocate(vp, addr, VM_MAX_ADDRESS - addr);
- (void)vm_allocate(vp, &addr, VM_MAX_ADDRESS - addr, FALSE);
- (void)vm_map_inherit(vp, addr, VM_MAX_ADDRESS,
- VM_INHERIT_NONE);
- }
-#endif
-
-#ifdef __FORK_BRAINDAMAGE
- /*
- * cpu_fork will copy and update the kernel stack and pcb,
- * and make the child ready to run. It marks the child
- * so that it can return differently than the parent.
- * It returns twice, once in the parent process and
- * once in the child.
- */
- return (cpu_fork(p1, p2, stack, stacksize));
-#else
- /*
- * cpu_fork will copy and update the kernel stack and pcb,
- * and make the child ready to run. The child will exit
- * directly to user mode on its first time slice, and will
- * not return here.
- */
- cpu_fork(p1, p2, stack, stacksize);
-#endif
-}
-
-/*
- * Set default limits for VM system.
- * Called for proc 0, and then inherited by all others.
- */
-void
-vm_init_limits(p)
- register struct proc *p;
-{
-
- /*
- * Set up the initial limits on process VM.
- * Set the maximum resident set size to be all
- * of (reasonably) available memory. This causes
- * any single, large process to start random page
- * replacement once it fills memory.
- */
- p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
- p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
- p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
- p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ;
- p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(cnt.v_free_count);
-}
-
-#include <vm/vm_pageout.h>
-
-#ifdef DEBUG
-int enableswap = 1;
-int swapdebug = 0;
-#define SDB_FOLLOW 1
-#define SDB_SWAPIN 2
-#define SDB_SWAPOUT 4
-#endif
-
-/*
- * Swap in a process's u-area.
- */
-void
-swapin(p)
- struct proc *p;
-{
- vm_offset_t addr;
- int s;
-
- addr = (vm_offset_t)p->p_addr;
- vm_map_pageable(kernel_map, addr, addr + USPACE, FALSE);
- /*
- * Some architectures need to be notified when the
- * user area has moved to new physical page(s) (e.g.
- * see pmax/pmax/vm_machdep.c).
- */
- cpu_swapin(p);
- s = splstatclock();
- if (p->p_stat == SRUN)
- setrunqueue(p);
- p->p_flag |= P_INMEM;
- splx(s);
- p->p_swtime = 0;
- ++cnt.v_swpin;
-}
-
-/*
- * Brutally simple:
- * 1. Attempt to swapin every swaped-out, runnable process in
- * order of priority.
- * 2. If not enough memory, wake the pageout daemon and let it
- * clear some space.
- */
-void
-scheduler()
-{
- register struct proc *p;
- register int pri;
- struct proc *pp;
- int ppri;
-
-loop:
-#ifdef DEBUG
- while (!enableswap) {
- panic ("swap disabled??");
- tsleep((caddr_t)&proc0, PVM, "noswap", 0);
- }
-#endif
- pp = NULL;
- ppri = INT_MIN;
- for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
- if (p->p_stat == SRUN && (p->p_flag & P_INMEM) == 0) {
- pri = p->p_swtime + p->p_slptime - p->p_nice * 8;
- if (pri > ppri) {
- pp = p;
- ppri = pri;
- }
- }
- }
-#ifdef DEBUG
- if (swapdebug & SDB_FOLLOW)
- printf("scheduler: running, procp %p pri %d\n", pp, ppri);
-#endif
- /*
- * Nothing to do, back to sleep
- */
- if ((p = pp) == NULL) {
- tsleep((caddr_t)&proc0, PVM, "scheduler", 0);
- goto loop;
- }
-
- /*
- * We would like to bring someone in.
- * This part is really bogus cuz we could deadlock on memory
- * despite our feeble check.
- * XXX should require at least vm_swrss / 2
- */
- if (cnt.v_free_count > atop(USPACE)) {
-#ifdef DEBUG
- if (swapdebug & SDB_SWAPIN)
- printf("swapin: pid %d(%s)@%p, pri %d free %d\n",
- p->p_pid, p->p_comm, p->p_addr, ppri,
- cnt.v_free_count);
-#endif
-#if defined(arc) || defined(pica)
- vm_map_pageable(kernel_map, (vm_offset_t)p->p_addr,
- (vm_offset_t)p->p_addr + atop(USPACE), FALSE);
-#endif
- swapin(p);
- goto loop;
- }
- /*
- * Not enough memory, jab the pageout daemon and wait til the
- * coast is clear.
- */
-#ifdef DEBUG
- if (swapdebug & SDB_FOLLOW)
- printf("scheduler: no room for pid %d(%s), free %d\n",
- p->p_pid, p->p_comm, cnt.v_free_count);
-#endif
- (void)splhigh();
- vm_wait("fLowmem");
- (void)spl0();
-#ifdef DEBUG
- if (swapdebug & SDB_FOLLOW)
- printf("scheduler: room again, free %d\n", cnt.v_free_count);
-#endif
- goto loop;
-}
-
-#define swappable(p) \
- (((p)->p_flag & (P_SYSTEM | P_INMEM | P_WEXIT)) == P_INMEM && \
- (p)->p_holdcnt == 0)
-
-/*
- * Swapout is driven by the pageout daemon. Very simple, we find eligible
- * procs and unwire their u-areas. We try to always "swap" at least one
- * process in case we need the room for a swapin.
- * If any procs have been sleeping/stopped for at least maxslp seconds,
- * they are swapped. Else, we swap the longest-sleeping or stopped process,
- * if any, otherwise the longest-resident process.
- */
-void
-swapout_threads()
-{
- register struct proc *p;
- struct proc *outp, *outp2;
- int outpri, outpri2;
- int didswap = 0;
- extern int maxslp;
-
-#ifdef DEBUG
- if (!enableswap)
- return;
-#endif
- outp = outp2 = NULL;
- outpri = outpri2 = 0;
- for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
- if (!swappable(p))
- continue;
- switch (p->p_stat) {
- case SRUN:
- if (p->p_swtime > outpri2) {
- outp2 = p;
- outpri2 = p->p_swtime;
- }
- continue;
-
- case SSLEEP:
- case SSTOP:
- if (p->p_slptime >= maxslp) {
- swapout(p);
- didswap++;
- } else if (p->p_slptime > outpri) {
- outp = p;
- outpri = p->p_slptime;
- }
- continue;
- }
- }
- /*
- * If we didn't get rid of any real duds, toss out the next most
- * likely sleeping/stopped or running candidate. We only do this
- * if we are real low on memory since we don't gain much by doing
- * it (USPACE bytes).
- */
- if (didswap == 0 &&
- cnt.v_free_count <= atop(round_page(USPACE))) {
- if ((p = outp) == 0)
- p = outp2;
-#ifdef DEBUG
- if (swapdebug & SDB_SWAPOUT)
- printf("swapout_threads: no duds, try procp %p\n", p);
-#endif
- if (p)
- swapout(p);
- }
-}
-
-void
-swapout(p)
- register struct proc *p;
-{
- vm_offset_t addr;
- int s;
-
-#ifdef DEBUG
- if (swapdebug & SDB_SWAPOUT)
- printf("swapout: pid %d(%s)@%p, stat %x pri %d free %d\n",
- p->p_pid, p->p_comm, p->p_addr, p->p_stat, p->p_slptime,
- cnt.v_free_count);
-#endif
-
- /*
- * Do any machine-specific actions necessary before swapout.
- * This can include saving floating point state, etc.
- */
- cpu_swapout(p);
-
- /*
- * Unwire the to-be-swapped process's user struct and kernel stack.
- */
- addr = (vm_offset_t)p->p_addr;
- vm_map_pageable(kernel_map, addr, addr + USPACE, TRUE);
- pmap_collect(vm_map_pmap(&p->p_vmspace->vm_map));
-
- /*
- * Mark it as (potentially) swapped out.
- */
- s = splstatclock();
- p->p_flag &= ~P_INMEM;
- if (p->p_stat == SRUN)
- remrunqueue(p);
- splx(s);
- p->p_swtime = 0;
- ++cnt.v_swpout;
-}
diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c
deleted file mode 100644
index 6b46acbf01d..00000000000
--- a/sys/vm/vm_init.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/* $OpenBSD: vm_init.c,v 1.3 1998/03/01 00:38:06 niklas Exp $ */
-/* $NetBSD: vm_init.c,v 1.11 1998/01/09 06:00:50 thorpej Exp $ */
-
-/*
- * Copyright (c) 1991, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * The Mach Operating System project at Carnegie-Mellon University.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)vm_init.c 8.1 (Berkeley) 6/11/93
- *
- *
- * Copyright (c) 1987, 1990 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Authors: Avadis Tevanian, Jr., Michael Wayne Young
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*
- * Initialize the Virtual Memory subsystem.
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-
-#include <vm/vm.h>
-#include <vm/vm_page.h>
-#include <vm/vm_kern.h>
-
-/*
- * vm_init initializes the virtual memory system.
- * This is done only by the first cpu up.
- *
- * The start and end address of physical memory is passed in.
- */
-
-void vm_mem_init()
-{
-#if !defined(MACHINE_NONCONTIG) && !defined(MACHINE_NEW_NONCONTIG)
- extern vm_offset_t avail_start, avail_end;
- extern vm_offset_t virtual_avail, virtual_end;
-#else
- vm_offset_t start, end;
-#endif
-
- /*
- * Initializes resident memory structures.
- * From here on, all physical memory is accounted for,
- * and we use only virtual addresses.
- */
- if (page_shift == 0) {
- printf("vm_mem_init: WARN: MD code did not set page size\n");
- vm_set_page_size();
- }
-#if !defined(MACHINE_NONCONTIG) && !defined(MACHINE_NEW_NONCONTIG)
- vm_page_startup(&avail_start, &avail_end);
-#else
- vm_page_bootstrap(&start, &end);
-#endif
-
- /*
- * Initialize other VM packages
- */
-#if !defined(MACHINE_NONCONTIG) && !defined(MACHINE_NEW_NONCONTIG)
- vm_object_init(virtual_end - VM_MIN_KERNEL_ADDRESS);
-#else
- vm_object_init(end - VM_MIN_KERNEL_ADDRESS);
-#endif
- vm_map_startup();
-#if !defined(MACHINE_NONCONTIG) && !defined(MACHINE_NEW_NONCONTIG)
- kmem_init(virtual_avail, virtual_end);
- pmap_init(avail_start, avail_end);
-#else
- kmem_init(start, end);
- pmap_init();
-#endif
- vm_pager_init();
-}
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
deleted file mode 100644
index c2da9722860..00000000000
--- a/sys/vm/vm_kern.c
+++ /dev/null
@@ -1,465 +0,0 @@
-/* $OpenBSD: vm_kern.c,v 1.11 1999/09/03 18:02:27 art Exp $ */
-/* $NetBSD: vm_kern.c,v 1.17.6.1 1996/06/13 17:21:28 cgd Exp $ */
-
-/*
- * Copyright (c) 1991, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * The Mach Operating System project at Carnegie-Mellon University.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)vm_kern.c 8.4 (Berkeley) 1/9/95
- *
- *
- * Copyright (c) 1987, 1990 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Authors: Avadis Tevanian, Jr., Michael Wayne Young
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*
- * Kernel memory management.
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/proc.h>
-
-#include <vm/vm.h>
-#include <vm/vm_extern.h>
-#include <vm/vm_page.h>
-#include <vm/vm_pageout.h>
-#include <vm/vm_kern.h>
-
-/*
- * kmem_alloc_pageable:
- *
- * Allocate pageable memory to the kernel's address map.
- * map must be "kernel_map" below.
- */
-vm_offset_t
-kmem_alloc_pageable(map, size)
- vm_map_t map;
- register vm_size_t size;
-{
- vm_offset_t addr;
- register int result;
-
-#if 0
- if (map != kernel_map)
- panic("kmem_alloc_pageable: not called with kernel_map");
-#endif
-
- size = round_page(size);
-
- addr = vm_map_min(map);
- result = vm_map_find(map, NULL, (vm_offset_t)0, &addr, size, TRUE);
- if (result != KERN_SUCCESS) {
- return (0);
- }
-
- return (addr);
-}
-
-/*
- * Allocate wired-down memory in the kernel's address map
- * or a submap.
- */
-vm_offset_t
-kmem_alloc(map, size)
- register vm_map_t map;
- register vm_size_t size;
-{
- vm_offset_t addr;
- register vm_offset_t offset;
- extern vm_object_t kernel_object;
- vm_offset_t i;
-
- size = round_page(size);
-
- /*
- * Use the kernel object for wired-down kernel pages.
- * Assume that no region of the kernel object is
- * referenced more than once.
- */
-
- /*
- * Locate sufficient space in the map. This will give us the
- * final virtual address for the new memory, and thus will tell
- * us the offset within the kernel map.
- */
- vm_map_lock(map);
- if (vm_map_findspace(map, 0, size, &addr)) {
- vm_map_unlock(map);
- return (0);
- }
- offset = addr - VM_MIN_KERNEL_ADDRESS;
- vm_object_reference(kernel_object);
- vm_map_insert(map, kernel_object, offset, addr, addr + size);
- vm_map_unlock(map);
-
- /*
- * Guarantee that there are pages already in this object
- * before calling vm_map_pageable. This is to prevent the
- * following scenario:
- *
- * 1) Threads have swapped out, so that there is a
- * pager for the kernel_object.
- * 2) The kmsg zone is empty, and so we are kmem_allocing
- * a new page for it.
- * 3) vm_map_pageable calls vm_fault; there is no page,
- * but there is a pager, so we call
- * pager_data_request. But the kmsg zone is empty,
- * so we must kmem_alloc.
- * 4) goto 1
- * 5) Even if the kmsg zone is not empty: when we get
- * the data back from the pager, it will be (very
- * stale) non-zero data. kmem_alloc is defined to
- * return zero-filled memory.
- *
- * We're intentionally not activating the pages we allocate
- * to prevent a race with page-out. vm_map_pageable will wire
- * the pages.
- */
-
- vm_object_lock(kernel_object);
- for (i = 0; i < size; i += PAGE_SIZE) {
- vm_page_t mem;
-
- while ((mem = vm_page_alloc(kernel_object, offset + i)) ==
- NULL) {
- vm_object_unlock(kernel_object);
- vm_wait("fKmwire");
- vm_object_lock(kernel_object);
- }
- vm_page_zero_fill(mem);
- mem->flags &= ~PG_BUSY;
- }
- vm_object_unlock(kernel_object);
-
- /*
- * And finally, mark the data as non-pageable.
- */
-
- (void)vm_map_pageable(map, (vm_offset_t)addr, addr + size, FALSE);
-
- /*
- * Try to coalesce the map
- */
-
- vm_map_simplify(map, addr);
-
- return (addr);
-}
-
-/*
- * kmem_free:
- *
- * Release a region of kernel virtual memory allocated
- * with kmem_alloc, and return the physical pages
- * associated with that region.
- */
-void
-kmem_free(map, addr, size)
- vm_map_t map;
- register vm_offset_t addr;
- vm_size_t size;
-{
- (void)vm_map_remove(map, trunc_page(addr), round_page(addr + size));
-}
-
-/*
- * kmem_suballoc:
- *
- * Allocates a map to manage a subrange
- * of the kernel virtual address space.
- *
- * Arguments are as follows:
- *
- * parent Map to take range from
- * size Size of range to find
- * min, max Returned endpoints of map
- * pageable Can the region be paged
- */
-vm_map_t
-kmem_suballoc(parent, min, max, size, pageable)
- register vm_map_t parent;
- vm_offset_t *min, *max;
- register vm_size_t size;
- boolean_t pageable;
-{
- register int ret;
- vm_map_t result;
-
- size = round_page(size);
-
- *min = (vm_offset_t)vm_map_min(parent);
- ret = vm_map_find(parent, NULL, (vm_offset_t)0, min, size, TRUE);
- if (ret != KERN_SUCCESS) {
- printf("kmem_suballoc: bad status return of %d.\n", ret);
- panic("kmem_suballoc");
- }
- *max = *min + size;
- pmap_reference(vm_map_pmap(parent));
- result = vm_map_create(vm_map_pmap(parent), *min, *max, pageable);
- if (result == NULL)
- panic("kmem_suballoc: cannot create submap");
- if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS)
- panic("kmem_suballoc: unable to change range to submap");
- return (result);
-}
-
-/*
- * Allocate wired-down memory in the kernel's address map for the higher
- * level kernel memory allocator (kern/kern_malloc.c). We cannot use
- * kmem_alloc() because we may need to allocate memory at interrupt
- * level where we cannot block (canwait == FALSE).
- *
- * This routine has its own private kernel submap (kmem_map) and object
- * (kmem_object). This, combined with the fact that only malloc uses
- * this routine, ensures that we will never block in map or object waits.
- *
- * Note that this still only works in a uni-processor environment and
- * when called at splimp().
- *
- * We don't worry about expanding the map (adding entries) since entries
- * for wired maps are statically allocated.
- */
-vm_offset_t
-kmem_malloc(map, size, canwait)
- register vm_map_t map;
- register vm_size_t size;
- boolean_t canwait;
-{
- register vm_offset_t offset, i;
- vm_map_entry_t entry;
- vm_offset_t addr;
- vm_page_t m;
- extern vm_object_t kmem_object;
-
- if (map != kmem_map && map != mb_map)
- panic("kern_malloc_alloc: map != {kmem,mb}_map");
-
- size = round_page(size);
- addr = vm_map_min(map);
-
- /*
- * Locate sufficient space in the map. This will give us the
- * final virtual address for the new memory, and thus will tell
- * us the offset within the kernel map.
- */
- vm_map_lock(map);
- if (vm_map_findspace(map, 0, size, &addr)) {
- vm_map_unlock(map);
- /*
- * Should wait, but that makes no sense since we will
- * likely never wake up unless action to free resources
- * is taken by the calling subsystem.
- *
- * We return NULL, and if the caller was able to wait
- * then they should take corrective action and retry.
- */
- return (0);
- }
- offset = addr - vm_map_min(kmem_map);
- vm_object_reference(kmem_object);
- vm_map_insert(map, kmem_object, offset, addr, addr + size);
-
- /*
- * If we can wait, just mark the range as wired
- * (will fault pages as necessary).
- */
- if (canwait) {
- vm_map_unlock(map);
- (void)vm_map_pageable(map, (vm_offset_t)addr, addr + size,
- FALSE);
- vm_map_simplify(map, addr);
- return (addr);
- }
-
- /*
- * If we cannot wait then we must allocate all memory up front,
- * pulling it off the active queue to prevent pageout.
- */
- vm_object_lock(kmem_object);
- for (i = 0; i < size; i += PAGE_SIZE) {
- m = vm_page_alloc(kmem_object, offset + i);
-
- /*
- * Ran out of space, free everything up and return.
- * Don't need to lock page queues here as we know
- * that the pages we got aren't on any queues.
- */
- if (m == NULL) {
- while (i != 0) {
- i -= PAGE_SIZE;
- m = vm_page_lookup(kmem_object, offset + i);
- vm_page_free(m);
- }
- vm_object_unlock(kmem_object);
- vm_map_delete(map, addr, addr + size);
- vm_map_unlock(map);
- return (0);
- }
-#if 0
- vm_page_zero_fill(m);
-#endif
- m->flags &= ~PG_BUSY;
- }
- vm_object_unlock(kmem_object);
-
- /*
- * Mark map entry as non-pageable.
- * Assert: vm_map_insert() will never be able to extend the previous
- * entry so there will be a new entry exactly corresponding to this
- * address range and it will have wired_count == 0.
- */
- if (!vm_map_lookup_entry(map, addr, &entry) ||
- entry->start != addr || entry->end != addr + size ||
- entry->wired_count)
- panic("kmem_malloc: entry not found or misaligned");
- entry->wired_count++;
-
- /*
- * Loop thru pages, entering them in the pmap.
- * (We cannot add them to the wired count without
- * wrapping the vm_page_queue_lock in splimp...)
- */
- for (i = 0; i < size; i += PAGE_SIZE) {
- vm_object_lock(kmem_object);
- m = vm_page_lookup(kmem_object, offset + i);
- vm_object_unlock(kmem_object);
- pmap_enter(map->pmap, addr + i, VM_PAGE_TO_PHYS(m),
- VM_PROT_DEFAULT, TRUE, 0);
- }
- vm_map_unlock(map);
-
- vm_map_simplify(map, addr);
- return (addr);
-}
-
-/*
- * kmem_alloc_wait
- *
- * Allocates pageable memory from a sub-map of the kernel. If the submap
- * has no room, the caller sleeps waiting for more memory in the submap.
- *
- */
-vm_offset_t
-kmem_alloc_wait(map, size)
- vm_map_t map;
- vm_size_t size;
-{
- vm_offset_t addr;
-
- size = round_page(size);
-
- for (;;) {
- /*
- * To make this work for more than one map,
- * use the map's lock to lock out sleepers/wakers.
- */
- vm_map_lock(map);
- if (vm_map_findspace(map, 0, size, &addr) == 0)
- break;
- /* no space now; see if we can ever get space */
- if (vm_map_max(map) - vm_map_min(map) < size) {
- vm_map_unlock(map);
- return (0);
- }
- assert_wait(map, TRUE);
- vm_map_unlock(map);
- thread_block("mKmwait");
- }
- vm_map_insert(map, NULL, (vm_offset_t)0, addr, addr + size);
- vm_map_unlock(map);
- return (addr);
-}
-
-/*
- * kmem_free_wakeup
- *
- * Returns memory to a submap of the kernel, and wakes up any threads
- * waiting for memory in that map.
- */
-void
-kmem_free_wakeup(map, addr, size)
- vm_map_t map;
- vm_offset_t addr;
- vm_size_t size;
-{
- vm_map_lock(map);
- (void)vm_map_delete(map, trunc_page(addr), round_page(addr + size));
- thread_wakeup(map);
- vm_map_unlock(map);
-}
-
-/*
- * Create the kernel map; insert a mapping covering kernel text, data, bss,
- * and all space allocated thus far (`boostrap' data). The new map will thus
- * map the range between VM_MIN_KERNEL_ADDRESS and `start' as allocated, and
- * the range between `start' and `end' as free.
- */
-void
-kmem_init(start, end)
- vm_offset_t start, end;
-{
- register vm_map_t m;
-
- m = vm_map_create(pmap_kernel(), VM_MIN_KERNEL_ADDRESS, end, FALSE);
- vm_map_lock(m);
- /* N.B.: cannot use kgdb to debug, starting with this assignment ... */
- kernel_map = m;
- (void)vm_map_insert(m, NULL, (vm_offset_t)0, VM_MIN_KERNEL_ADDRESS,
- start);
- /* ... and ending with the completion of the above `insert' */
- vm_map_unlock(m);
-}
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
deleted file mode 100644
index d3dac7a832a..00000000000
--- a/sys/vm/vm_map.c
+++ /dev/null
@@ -1,2746 +0,0 @@
-/* $OpenBSD: vm_map.c,v 1.20 2001/05/16 12:54:34 ho Exp $ */
-/* $NetBSD: vm_map.c,v 1.23 1996/02/10 00:08:08 christos Exp $ */
-
-/*
- * Copyright (c) 1991, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * The Mach Operating System project at Carnegie-Mellon University.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)vm_map.c 8.9 (Berkeley) 5/17/95
- *
- *
- * Copyright (c) 1987, 1990 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Authors: Avadis Tevanian, Jr., Michael Wayne Young
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*
- * Virtual memory mapping module.
- */
-
-#include <sys/param.h>
-#include <sys/proc.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-
-#include <vm/vm.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_page.h>
-
-/*
- * Virtual memory maps provide for the mapping, protection,
- * and sharing of virtual memory objects. In addition,
- * this module provides for an efficient virtual copy of
- * memory from one map to another.
- *
- * Synchronization is required prior to most operations.
- *
- * Maps consist of an ordered doubly-linked list of simple
- * entries; a single hint is used to speed up lookups.
- *
- * In order to properly represent the sharing of virtual
- * memory regions among maps, the map structure is bi-level.
- * Top-level ("address") maps refer to regions of sharable
- * virtual memory. These regions are implemented as
- * ("sharing") maps, which then refer to the actual virtual
- * memory objects. When two address maps "share" memory,
- * their top-level maps both have references to the same
- * sharing map. When memory is virtual-copied from one
- * address map to another, the references in the sharing
- * maps are actually copied -- no copying occurs at the
- * virtual memory object level.
- *
- * Since portions of maps are specified by start/end addreses,
- * which may not align with existing map entries, all
- * routines merely "clip" entries to these start/end values.
- * [That is, an entry is split into two, bordering at a
- * start or end value.] Note that these clippings may not
- * always be necessary (as the two resulting entries are then
- * not changed); however, the clipping is done for convenience.
- * No attempt is currently made to "glue back together" two
- * abutting entries.
- *
- * As mentioned above, virtual copy operations are performed
- * by copying VM object references from one sharing map to
- * another, and then marking both regions as copy-on-write.
- * It is important to note that only one writeable reference
- * to a VM object region exists in any map -- this means that
- * shadow object creation can be delayed until a write operation
- * occurs.
- */
-
-/*
- * vm_map_startup:
- *
- * Initialize the vm_map module. Must be called before
- * any other vm_map routines.
- *
- * Map and entry structures are allocated from the general
- * purpose memory pool with some exceptions:
- *
- * - The kernel map and kmem submap are allocated statically.
- * - Kernel map entries are allocated out of a static pool.
- *
- * These restrictions are necessary since malloc() uses the
- * maps and requires map entries.
- */
-
-#if defined(MACHINE_NEW_NONCONTIG)
-u_int8_t kentry_data_store[MAX_KMAP*sizeof(struct vm_map) +
- MAX_KMAPENT*sizeof(struct vm_map_entry)];
-vm_offset_t kentry_data = (vm_offset_t) kentry_data_store;
-vm_size_t kentry_data_size = sizeof(kentry_data_store);
-#else
-/* NUKE NUKE NUKE */
-vm_offset_t kentry_data;
-vm_size_t kentry_data_size;
-#endif
-vm_map_entry_t kentry_free;
-vm_map_t kmap_free;
-
-static int kentry_count;
-static vm_offset_t mapvm_start, mapvm, mapvmmax;
-static int mapvmpgcnt;
-
-static struct vm_map_entry *mappool;
-static int mappoolcnt;
-#define KENTRY_LOW_WATER 128
-
-static void _vm_map_clip_end __P((vm_map_t, vm_map_entry_t, vm_offset_t));
-static void _vm_map_clip_start __P((vm_map_t, vm_map_entry_t, vm_offset_t));
-
-void
-vm_map_startup()
-{
- register int i;
- register vm_map_entry_t mep;
- vm_map_t mp;
-
- /*
- * zero kentry area
- * XXX necessary?
- */
- bzero((caddr_t)kentry_data, kentry_data_size);
-
- /*
- * Static map structures for allocation before initialization of
- * kernel map or kmem map. vm_map_create knows how to deal with them.
- */
- kmap_free = mp = (vm_map_t) kentry_data;
- i = MAX_KMAP;
- while (--i > 0) {
- mp->header.next = (vm_map_entry_t) (mp + 1);
- mp++;
- }
- mp++->header.next = NULL;
-
- /*
- * Form a free list of statically allocated kernel map entries
- * with the rest.
- */
- kentry_free = mep = (vm_map_entry_t) mp;
- kentry_count = i = (kentry_data_size - MAX_KMAP * sizeof *mp) / sizeof *mep;
- while (--i > 0) {
- mep->next = mep + 1;
- mep++;
- }
- mep->next = NULL;
-}
-
-/*
- * Allocate a vmspace structure, including a vm_map and pmap,
- * and initialize those structures. The refcnt is set to 1.
- * The remaining fields must be initialized by the caller.
- */
-struct vmspace *
-vmspace_alloc(min, max, pageable)
- vm_offset_t min, max;
- int pageable;
-{
- register struct vmspace *vm;
-
- if (mapvmpgcnt == 0 && mapvm == 0) {
-#if defined(MACHINE_NEW_NONCONTIG)
- int vm_page_count = 0;
- int lcv;
-
- for (lcv = 0; lcv < vm_nphysseg; lcv++)
- vm_page_count += (vm_physmem[lcv].end -
- vm_physmem[lcv].start);
-
- mapvmpgcnt = (vm_page_count *
- sizeof(struct vm_map_entry) + PAGE_SIZE - 1) / PAGE_SIZE;
-
-#elif defined(MACHINE_NONCONTIG)
- mapvmpgcnt = (vm_page_count *
- sizeof(struct vm_map_entry) + PAGE_SIZE - 1) / PAGE_SIZE;
-#else /* must be contig */
- mapvmpgcnt = ((last_page-first_page) *
- sizeof(struct vm_map_entry) + PAGE_SIZE - 1) / PAGE_SIZE;
-#endif /* contig */
- mapvm_start = mapvm = kmem_alloc_pageable(kernel_map,
- mapvmpgcnt * PAGE_SIZE);
- mapvmmax = mapvm_start + mapvmpgcnt * PAGE_SIZE;
- if (!mapvm)
- mapvmpgcnt = 0;
- }
- MALLOC(vm, struct vmspace *, sizeof(struct vmspace), M_VMMAP, M_WAITOK);
- bzero(vm, (caddr_t) &vm->vm_startcopy - (caddr_t) vm);
- vm_map_init(&vm->vm_map, min, max, pageable);
- vm->vm_map.pmap = pmap_create(0);
- vm->vm_refcnt = 1;
- return (vm);
-}
-
-void
-vmspace_free(vm)
- register struct vmspace *vm;
-{
-
- if (--vm->vm_refcnt == 0) {
- /*
- * Lock the map, to wait out all other references to it.
- * Delete all of the mappings and pages they hold,
- * then call the pmap module to reclaim anything left.
- */
- vm_map_lock(&vm->vm_map);
- (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
- vm->vm_map.max_offset);
- pmap_destroy(vm->vm_map.pmap);
- FREE(vm, M_VMMAP);
- }
-}
-
-/*
- * vm_map_create:
- *
- * Creates and returns a new empty VM map with
- * the given physical map structure, and having
- * the given lower and upper address bounds.
- */
-vm_map_t
-vm_map_create(pmap, min, max, pageable)
- pmap_t pmap;
- vm_offset_t min, max;
- boolean_t pageable;
-{
- register vm_map_t result;
- extern vm_map_t kmem_map;
-
- if (kmem_map == NULL) {
- result = kmap_free;
- if (result == NULL)
- panic("vm_map_create: out of maps");
- kmap_free = (vm_map_t) result->header.next;
- } else
- MALLOC(result, vm_map_t, sizeof(struct vm_map),
- M_VMMAP, M_WAITOK);
-
- vm_map_init(result, min, max, pageable);
- result->pmap = pmap;
- return(result);
-}
-
-/*
- * Initialize an existing vm_map structure
- * such as that in the vmspace structure.
- * The pmap is set elsewhere.
- */
-void
-vm_map_init(map, min, max, pageable)
- register struct vm_map *map;
- vm_offset_t min, max;
- boolean_t pageable;
-{
- map->header.next = map->header.prev = &map->header;
- map->nentries = 0;
- map->size = 0;
- map->ref_count = 1;
- map->is_main_map = TRUE;
- map->min_offset = min;
- map->max_offset = max;
- map->entries_pageable = pageable;
- map->first_free = &map->header;
- map->hint = &map->header;
- map->timestamp = 0;
- lockinit(&map->lock, PVM, "thrd_sleep", 0, 0);
- simple_lock_init(&map->ref_lock);
- simple_lock_init(&map->hint_lock);
-}
-
-/*
- * vm_map_entry_create: [ internal use only ]
- *
- * Allocates a VM map entry for insertion.
- * No entry fields are filled in. This routine is
- */
-vm_map_entry_t
-vm_map_entry_create(map)
- vm_map_t map;
-{
- vm_map_entry_t entry;
- int i, s;
-
- /*
- * This is a *very* nasty (and sort of incomplete) hack!!!!
- */
- if (kentry_count < KENTRY_LOW_WATER) {
- s = splimp();
- if (mapvmpgcnt && mapvm) {
- vm_page_t m;
-
- m = vm_page_alloc(kernel_object,
- mapvm - VM_MIN_KERNEL_ADDRESS);
-
- if (m) {
- int newentries;
-
- newentries = (PAGE_SIZE / sizeof(struct vm_map_entry));
-#ifdef DIAGNOSTIC
- printf("vm_map_entry_create: allocated %d new entries.\n", newentries);
-#endif
-
- /* XXX */
- vm_page_wire(m);
- PAGE_WAKEUP(m);
- pmap_enter(pmap_kernel(), mapvm,
- VM_PAGE_TO_PHYS(m),
- VM_PROT_READ|VM_PROT_WRITE, FALSE, 0);
-
- entry = (vm_map_entry_t) mapvm;
- mapvm += PAGE_SIZE;
- --mapvmpgcnt;
-
- for (i = 0; i < newentries; i++) {
- vm_map_entry_dispose(kernel_map, entry);
- entry++;
- }
- }
- }
- splx(s);
- }
-
- if (map->entries_pageable) {
- if ((entry = mappool) != NULL) {
- mappool = mappool->next;
- --mappoolcnt;
- } else {
- MALLOC(entry, vm_map_entry_t,
- sizeof(struct vm_map_entry), M_VMMAPENT, M_WAITOK);
- }
- } else {
- s = splimp();
- if ((entry = kentry_free) != NULL) {
- kentry_free = kentry_free->next;
- --kentry_count;
- }
- if (entry == NULL)
- panic("vm_map_entry_create: out of map entries for kernel");
- splx(s);
- }
-
- return(entry);
-}
-
-/*
- * vm_map_entry_dispose: [ internal use only ]
- *
- * Inverse of vm_map_entry_create.
- */
-void
-vm_map_entry_dispose(map, entry)
- vm_map_t map;
- vm_map_entry_t entry;
-{
- int s;
-
- if (map->entries_pageable) {
- entry->next = mappool;
- mappool = entry;
- ++mappoolcnt;
- } else {
- s = splimp();
- entry->next = kentry_free;
- kentry_free = entry;
- ++kentry_count;
- splx(s);
- }
-}
-
-/*
- * vm_map_entry_{un,}link:
- *
- * Insert/remove entries from maps.
- */
-#define vm_map_entry_link(map, after_where, entry) \
- { \
- (map)->nentries++; \
- (entry)->prev = (after_where); \
- (entry)->next = (after_where)->next; \
- (entry)->prev->next = (entry); \
- (entry)->next->prev = (entry); \
- }
-#define vm_map_entry_unlink(map, entry) \
- { \
- (map)->nentries--; \
- (entry)->next->prev = (entry)->prev; \
- (entry)->prev->next = (entry)->next; \
- }
-
-/*
- * vm_map_reference:
- *
- * Creates another valid reference to the given map.
- *
- */
-void
-vm_map_reference(map)
- register vm_map_t map;
-{
- if (map == NULL)
- return;
-
- simple_lock(&map->ref_lock);
- map->ref_count++;
- simple_unlock(&map->ref_lock);
-}
-
-/*
- * vm_map_deallocate:
- *
- * Removes a reference from the specified map,
- * destroying it if no references remain.
- * The map should not be locked.
- */
-void
-vm_map_deallocate(map)
- register vm_map_t map;
-{
- register int c;
-
- if (map == NULL)
- return;
-
- simple_lock(&map->ref_lock);
- c = --map->ref_count;
-
- if (c > 0) {
- simple_unlock(&map->ref_lock);
- return;
- }
-
- /*
- * Lock the map, to wait out all other references
- * to it.
- */
-
- vm_map_lock_drain_interlock(map);
-
- (void) vm_map_delete(map, map->min_offset, map->max_offset);
-
- pmap_destroy(map->pmap);
-
- vm_map_unlock(map);
-
- FREE(map, M_VMMAP);
-}
-
-/*
- * vm_map_insert:
- *
- * Inserts the given whole VM object into the target
- * map at the specified address range. The object's
- * size should match that of the address range.
- *
- * Requires that the map be locked, and leaves it so.
- */
-int
-vm_map_insert(map, object, offset, start, end)
- vm_map_t map;
- vm_object_t object;
- vm_offset_t offset;
- vm_offset_t start;
- vm_offset_t end;
-{
- register vm_map_entry_t new_entry;
- register vm_map_entry_t prev_entry;
- vm_map_entry_t temp_entry;
-
- /*
- * Check that the start and end points are not bogus.
- */
-
- if ((start < map->min_offset) || (end > map->max_offset) ||
- (start >= end))
- return(KERN_INVALID_ADDRESS);
-
- /*
- * Find the entry prior to the proposed
- * starting address; if it's part of an
- * existing entry, this range is bogus.
- */
-
- if (vm_map_lookup_entry(map, start, &temp_entry))
- return(KERN_NO_SPACE);
-
- prev_entry = temp_entry;
-
- /*
- * Assert that the next entry doesn't overlap the
- * end point.
- */
-
- if ((prev_entry->next != &map->header) &&
- (prev_entry->next->start < end))
- return(KERN_NO_SPACE);
-
- /*
- * See if we can avoid creating a new entry by
- * extending one of our neighbors.
- */
-
- if (object == NULL) {
- if ((prev_entry != &map->header) &&
- (prev_entry->end == start) &&
- (map->is_main_map) &&
- (prev_entry->is_a_map == FALSE) &&
- (prev_entry->is_sub_map == FALSE) &&
- (prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
- (prev_entry->protection == VM_PROT_DEFAULT) &&
- (prev_entry->max_protection == VM_PROT_DEFAULT) &&
- (prev_entry->wired_count == 0)) {
-
- if (vm_object_coalesce(prev_entry->object.vm_object,
- NULL,
- prev_entry->offset,
- (vm_offset_t) 0,
- (vm_size_t)(prev_entry->end
- - prev_entry->start),
- (vm_size_t)(end - prev_entry->end))) {
- /*
- * Coalesced the two objects - can extend
- * the previous map entry to include the
- * new range.
- */
- map->size += (end - prev_entry->end);
- prev_entry->end = end;
- return(KERN_SUCCESS);
- }
- }
- }
-
- /*
- * Create a new entry
- */
-
- new_entry = vm_map_entry_create(map);
- new_entry->start = start;
- new_entry->end = end;
-
- new_entry->is_a_map = FALSE;
- new_entry->is_sub_map = FALSE;
- new_entry->object.vm_object = object;
- new_entry->offset = offset;
-
- new_entry->copy_on_write = FALSE;
- new_entry->needs_copy = FALSE;
-
- if (map->is_main_map) {
- new_entry->inheritance = VM_INHERIT_DEFAULT;
- new_entry->protection = VM_PROT_DEFAULT;
- new_entry->max_protection = VM_PROT_DEFAULT;
- new_entry->wired_count = 0;
- }
-
- /*
- * Insert the new entry into the list
- */
-
- vm_map_entry_link(map, prev_entry, new_entry);
- map->size += new_entry->end - new_entry->start;
-
- /*
- * Update the free space hint
- */
-
- if ((map->first_free == prev_entry) && (prev_entry->end >= new_entry->start))
- map->first_free = new_entry;
-
- return(KERN_SUCCESS);
-}
-
-/*
- * SAVE_HINT:
- *
- * Saves the specified entry as the hint for
- * future lookups. Performs necessary interlocks.
- */
-#define SAVE_HINT(map,value) \
- simple_lock(&(map)->hint_lock); \
- (map)->hint = (value); \
- simple_unlock(&(map)->hint_lock);
-
-/*
- * vm_map_lookup_entry: [ internal use only ]
- *
- * Finds the map entry containing (or
- * immediately preceding) the specified address
- * in the given map; the entry is returned
- * in the "entry" parameter. The boolean
- * result indicates whether the address is
- * actually contained in the map.
- */
-boolean_t
-vm_map_lookup_entry(map, address, entry)
- register vm_map_t map;
- register vm_offset_t address;
- vm_map_entry_t *entry; /* OUT */
-{
- register vm_map_entry_t cur;
- register vm_map_entry_t last;
-
- /*
- * Start looking either from the head of the
- * list, or from the hint.
- */
-
- simple_lock(&map->hint_lock);
- cur = map->hint;
- simple_unlock(&map->hint_lock);
-
- if (cur == &map->header)
- cur = cur->next;
-
- if (address >= cur->start) {
- /*
- * Go from hint to end of list.
- *
- * But first, make a quick check to see if
- * we are already looking at the entry we
- * want (which is usually the case).
- * Note also that we don't need to save the hint
- * here... it is the same hint (unless we are
- * at the header, in which case the hint didn't
- * buy us anything anyway).
- */
- last = &map->header;
- if ((cur != last) && (cur->end > address)) {
- *entry = cur;
- return(TRUE);
- }
- }
- else {
- /*
- * Go from start to hint, *inclusively*
- */
- last = cur->next;
- cur = map->header.next;
- }
-
- /*
- * Search linearly
- */
-
- while (cur != last) {
- if (cur->end > address) {
- if (address >= cur->start) {
- /*
- * Save this lookup for future
- * hints, and return
- */
-
- *entry = cur;
- SAVE_HINT(map, cur);
- return(TRUE);
- }
- break;
- }
- cur = cur->next;
- }
- *entry = cur->prev;
- SAVE_HINT(map, *entry);
- return(FALSE);
-}
-
-/*
- * Find sufficient space for `length' bytes in the given map, starting at
- * `start'. The map must be locked. Returns 0 on success, 1 on no space.
- */
-int
-vm_map_findspace(map, start, length, addr)
- register vm_map_t map;
- register vm_offset_t start;
- vm_size_t length;
- vm_offset_t *addr;
-{
- register vm_map_entry_t entry, next;
- register vm_offset_t end;
-
- if (start < map->min_offset)
- start = map->min_offset;
- if (start > map->max_offset)
- return (1);
-
- /*
- * Look for the first possible address; if there's already
- * something at this address, we have to start after it.
- */
- if (start == map->min_offset) {
- if ((entry = map->first_free) != &map->header)
- start = entry->end;
- } else {
- vm_map_entry_t tmp;
- if (vm_map_lookup_entry(map, start, &tmp))
- start = tmp->end;
- entry = tmp;
- }
-
- /*
- * Look through the rest of the map, trying to fit a new region in
- * the gap between existing regions, or after the very last region.
- */
- for (;; start = (entry = next)->end) {
- /*
- * Find the end of the proposed new region. Be sure we didn't
- * go beyond the end of the map, or wrap around the address;
- * if so, we lose. Otherwise, if this is the last entry, or
- * if the proposed new region fits before the next entry, we
- * win.
- */
- end = start + length;
- if (end > map->max_offset || end < start)
- return (1);
- next = entry->next;
- if (next == &map->header || next->start >= end)
- break;
- }
- SAVE_HINT(map, entry);
- *addr = start;
- return (0);
-}
-
-/*
- * vm_map_find finds an unallocated region in the target address
- * map with the given length. The search is defined to be
- * first-fit from the specified address; the region found is
- * returned in the same parameter.
- *
- */
-int
-vm_map_find(map, object, offset, addr, length, find_space)
- vm_map_t map;
- vm_object_t object;
- vm_offset_t offset;
- vm_offset_t *addr; /* IN/OUT */
- vm_size_t length;
- boolean_t find_space;
-{
- register vm_offset_t start;
- int result;
-
- start = *addr;
- vm_map_lock(map);
- if (find_space) {
- if (vm_map_findspace(map, start, length, addr)) {
- vm_map_unlock(map);
- return (KERN_NO_SPACE);
- }
- start = *addr;
- }
- result = vm_map_insert(map, object, offset, start, start + length);
- vm_map_unlock(map);
- return (result);
-}
-
-/*
- * vm_map_simplify_entry: [ internal use only ]
- *
- * Simplify the given map entry by:
- * removing extra sharing maps
- * [XXX maybe later] merging with a neighbor
- */
-void
-vm_map_simplify_entry(map, entry)
- vm_map_t map;
- vm_map_entry_t entry;
-{
-#ifdef lint
- map++;
-#endif
-
- /*
- * If this entry corresponds to a sharing map, then
- * see if we can remove the level of indirection.
- * If it's not a sharing map, then it points to
- * a VM object, so see if we can merge with either
- * of our neighbors.
- */
-
- if (entry->is_sub_map)
- return;
- if (entry->is_a_map) {
-#if 0
- vm_map_t my_share_map;
- int count;
-
- my_share_map = entry->object.share_map;
- simple_lock(&my_share_map->ref_lock);
- count = my_share_map->ref_count;
- simple_unlock(&my_share_map->ref_lock);
-
- if (count == 1) {
- /* Can move the region from
- * entry->start to entry->end (+ entry->offset)
- * in my_share_map into place of entry.
- * Later.
- */
- }
-#endif
- }
- else {
- /*
- * Try to merge with our neighbors.
- *
- * Conditions for merge are:
- *
- * 1. entries are adjacent.
- * 2. both entries point to objects
- * with null pagers.
- *
- * If a merge is possible, we replace the two
- * entries with a single entry, then merge
- * the two objects into a single object.
- *
- * Now, all that is left to do is write the
- * code!
- */
- }
-}
-
-/*
- * vm_map_clip_start: [ internal use only ]
- *
- * Asserts that the given entry begins at or after
- * the specified address; if necessary,
- * it splits the entry into two.
- */
-#define vm_map_clip_start(map, entry, startaddr) \
-{ \
- if (startaddr > entry->start) \
- _vm_map_clip_start(map, entry, startaddr); \
-}
-
-/*
- * This routine is called only when it is known that
- * the entry must be split.
- */
-static void
-_vm_map_clip_start(map, entry, start)
- register vm_map_t map;
- register vm_map_entry_t entry;
- register vm_offset_t start;
-{
- register vm_map_entry_t new_entry;
-
- /*
- * See if we can simplify this entry first
- */
-
- vm_map_simplify_entry(map, entry);
-
- /*
- * Split off the front portion --
- * note that we must insert the new
- * entry BEFORE this one, so that
- * this entry has the specified starting
- * address.
- */
-
- new_entry = vm_map_entry_create(map);
- *new_entry = *entry;
-
- new_entry->end = start;
- entry->offset += (start - entry->start);
- entry->start = start;
-
- vm_map_entry_link(map, entry->prev, new_entry);
-
- if (entry->is_a_map || entry->is_sub_map)
- vm_map_reference(new_entry->object.share_map);
- else
- vm_object_reference(new_entry->object.vm_object);
-}
-
-/*
- * vm_map_clip_end: [ internal use only ]
- *
- * Asserts that the given entry ends at or before
- * the specified address; if necessary,
- * it splits the entry into two.
- */
-
-#define vm_map_clip_end(map, entry, endaddr) \
-{ \
- if (endaddr < entry->end) \
- _vm_map_clip_end(map, entry, endaddr); \
-}
-
-/*
- * This routine is called only when it is known that
- * the entry must be split.
- */
-static void
-_vm_map_clip_end(map, entry, end)
- register vm_map_t map;
- register vm_map_entry_t entry;
- register vm_offset_t end;
-{
- register vm_map_entry_t new_entry;
-
- /*
- * Create a new entry and insert it
- * AFTER the specified entry
- */
-
- new_entry = vm_map_entry_create(map);
- *new_entry = *entry;
-
- new_entry->start = entry->end = end;
- new_entry->offset += (end - entry->start);
-
- vm_map_entry_link(map, entry, new_entry);
-
- if (entry->is_a_map || entry->is_sub_map)
- vm_map_reference(new_entry->object.share_map);
- else
- vm_object_reference(new_entry->object.vm_object);
-}
-
-/*
- * VM_MAP_RANGE_CHECK: [ internal use only ]
- *
- * Asserts that the starting and ending region
- * addresses fall within the valid range of the map.
- */
-#define VM_MAP_RANGE_CHECK(map, start, end) \
- { \
- if (start < vm_map_min(map)) \
- start = vm_map_min(map); \
- if (end > vm_map_max(map)) \
- end = vm_map_max(map); \
- if (start > end) \
- start = end; \
- }
-
-/*
- * vm_map_submap: [ kernel use only ]
- *
- * Mark the given range as handled by a subordinate map.
- *
- * This range must have been created with vm_map_find,
- * and no other operations may have been performed on this
- * range prior to calling vm_map_submap.
- *
- * Only a limited number of operations can be performed
- * within this range after calling vm_map_submap:
- * vm_fault
- * [Don't try vm_map_copy!]
- *
- * To remove a submapping, one must first remove the
- * range from the superior map, and then destroy the
- * submap (if desired). [Better yet, don't try it.]
- */
-int
-vm_map_submap(map, start, end, submap)
- register vm_map_t map;
- register vm_offset_t start;
- register vm_offset_t end;
- vm_map_t submap;
-{
- vm_map_entry_t entry;
- register int result = KERN_INVALID_ARGUMENT;
-
- vm_map_lock(map);
-
- VM_MAP_RANGE_CHECK(map, start, end);
-
- if (vm_map_lookup_entry(map, start, &entry)) {
- vm_map_clip_start(map, entry, start);
- }
- else
- entry = entry->next;
-
- vm_map_clip_end(map, entry, end);
-
- if ((entry->start == start) && (entry->end == end) &&
- (!entry->is_a_map) &&
- (entry->object.vm_object == NULL) &&
- (!entry->copy_on_write)) {
- entry->is_a_map = FALSE;
- entry->is_sub_map = TRUE;
- vm_map_reference(entry->object.sub_map = submap);
- result = KERN_SUCCESS;
- }
- vm_map_unlock(map);
-
- return(result);
-}
-
-/*
- * vm_map_protect:
- *
- * Sets the protection of the specified address
- * region in the target map. If "set_max" is
- * specified, the maximum protection is to be set;
- * otherwise, only the current protection is affected.
- */
-int
-vm_map_protect(map, start, end, new_prot, set_max)
- register vm_map_t map;
- register vm_offset_t start;
- register vm_offset_t end;
- register vm_prot_t new_prot;
- register boolean_t set_max;
-{
- register vm_map_entry_t current;
- vm_map_entry_t entry;
-
- vm_map_lock(map);
-
- VM_MAP_RANGE_CHECK(map, start, end);
-
- if (vm_map_lookup_entry(map, start, &entry)) {
- vm_map_clip_start(map, entry, start);
- }
- else
- entry = entry->next;
-
- /*
- * Make a first pass to check for protection
- * violations.
- */
-
- current = entry;
- while ((current != &map->header) && (current->start < end)) {
- if (current->is_sub_map)
- return(KERN_INVALID_ARGUMENT);
- if ((new_prot & current->max_protection) != new_prot) {
- vm_map_unlock(map);
- return(KERN_PROTECTION_FAILURE);
- }
-
- current = current->next;
- }
-
- /*
- * Go back and fix up protections.
- * [Note that clipping is not necessary the second time.]
- */
-
- current = entry;
-
- while ((current != &map->header) && (current->start < end)) {
- vm_prot_t old_prot;
-
- vm_map_clip_end(map, current, end);
-
- old_prot = current->protection;
- if (set_max)
- current->protection =
- (current->max_protection = new_prot) &
- old_prot;
- else
- current->protection = new_prot;
-
- /*
- * Update physical map if necessary.
- * Worry about copy-on-write here -- CHECK THIS XXX
- */
-
- if (current->protection != old_prot) {
-
-#define MASK(entry) ((entry)->copy_on_write ? ~VM_PROT_WRITE : \
- VM_PROT_ALL)
-#define max(a,b) ((a) > (b) ? (a) : (b))
-
- if (current->is_a_map) {
- vm_map_entry_t share_entry;
- vm_offset_t share_end;
-
- vm_map_lock(current->object.share_map);
- (void) vm_map_lookup_entry(
- current->object.share_map,
- current->offset,
- &share_entry);
- share_end = current->offset +
- (current->end - current->start);
- while ((share_entry !=
- &current->object.share_map->header) &&
- (share_entry->start < share_end)) {
-
- pmap_protect(map->pmap,
- (max(share_entry->start,
- current->offset) -
- current->offset +
- current->start),
- min(share_entry->end,
- share_end) -
- current->offset +
- current->start,
- current->protection &
- MASK(share_entry));
-
- share_entry = share_entry->next;
- }
- vm_map_unlock(current->object.share_map);
- }
- else
- pmap_protect(map->pmap, current->start,
- current->end,
- current->protection & MASK(entry));
-#undef max
-#undef MASK
- }
- current = current->next;
- }
-
- vm_map_unlock(map);
- return(KERN_SUCCESS);
-}
-
-/*
- * vm_map_inherit:
- *
- * Sets the inheritance of the specified address
- * range in the target map. Inheritance
- * affects how the map will be shared with
- * child maps at the time of vm_map_fork.
- */
-int
-vm_map_inherit(map, start, end, new_inheritance)
- register vm_map_t map;
- register vm_offset_t start;
- register vm_offset_t end;
- register vm_inherit_t new_inheritance;
-{
- register vm_map_entry_t entry;
- vm_map_entry_t temp_entry;
-
- switch (new_inheritance) {
- case VM_INHERIT_NONE:
- case VM_INHERIT_COPY:
- case VM_INHERIT_SHARE:
- break;
- default:
- return(KERN_INVALID_ARGUMENT);
- }
-
- vm_map_lock(map);
-
- VM_MAP_RANGE_CHECK(map, start, end);
-
- if (vm_map_lookup_entry(map, start, &temp_entry)) {
- entry = temp_entry;
- vm_map_clip_start(map, entry, start);
- }
- else
- entry = temp_entry->next;
-
- while ((entry != &map->header) && (entry->start < end)) {
- vm_map_clip_end(map, entry, end);
-
- entry->inheritance = new_inheritance;
-
- entry = entry->next;
- }
-
- vm_map_unlock(map);
- return(KERN_SUCCESS);
-}
-
-/*
- * vm_map_pageable:
- *
- * Sets the pageability of the specified address
- * range in the target map. Regions specified
- * as not pageable require locked-down physical
- * memory and physical page maps.
- *
- * The map must not be locked, but a reference
- * must remain to the map throughout the call.
- */
-int
-vm_map_pageable(map, start, end, new_pageable)
- register vm_map_t map;
- register vm_offset_t start;
- register vm_offset_t end;
- register boolean_t new_pageable;
-{
- register vm_map_entry_t entry;
- vm_map_entry_t start_entry;
- register vm_offset_t failed = 0;
- int rv;
-
- vm_map_lock(map);
-
- VM_MAP_RANGE_CHECK(map, start, end);
-
- /*
- * Only one pageability change may take place at one
- * time, since vm_fault assumes it will be called
- * only once for each wiring/unwiring. Therefore, we
- * have to make sure we're actually changing the pageability
- * for the entire region. We do so before making any changes.
- */
-
- if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
- vm_map_unlock(map);
- return(KERN_INVALID_ADDRESS);
- }
- entry = start_entry;
-
- /*
- * Actions are rather different for wiring and unwiring,
- * so we have two separate cases.
- */
-
- if (new_pageable) {
-
- vm_map_clip_start(map, entry, start);
-
- /*
- * Unwiring. First ensure that the range to be
- * unwired is really wired down and that there
- * are no holes.
- */
- while ((entry != &map->header) && (entry->start < end)) {
-
- if (entry->wired_count == 0 ||
- (entry->end < end &&
- (entry->next == &map->header ||
- entry->next->start > entry->end))) {
- vm_map_unlock(map);
- return(KERN_INVALID_ARGUMENT);
- }
- entry = entry->next;
- }
-
- /*
- * Now decrement the wiring count for each region.
- * If a region becomes completely unwired,
- * unwire its physical pages and mappings.
- */
- vm_map_set_recursive(&map->lock);
-
- entry = start_entry;
- while ((entry != &map->header) && (entry->start < end)) {
- vm_map_clip_end(map, entry, end);
-
- entry->wired_count--;
- if (entry->wired_count == 0)
- vm_fault_unwire(map, entry->start, entry->end);
-
- entry = entry->next;
- }
- vm_map_clear_recursive(&map->lock);
- }
-
- else {
- /*
- * Wiring. We must do this in two passes:
- *
- * 1. Holding the write lock, we create any shadow
- * or zero-fill objects that need to be created.
- * Then we clip each map entry to the region to be
- * wired and increment its wiring count. We
- * create objects before clipping the map entries
- * to avoid object proliferation.
- *
- * 2. We downgrade to a read lock, and call
- * vm_fault_wire to fault in the pages for any
- * newly wired area (wired_count is 1).
- *
- * Downgrading to a read lock for vm_fault_wire avoids
- * a possible deadlock with another thread that may have
- * faulted on one of the pages to be wired (it would mark
- * the page busy, blocking us, then in turn block on the
- * map lock that we hold). Because of problems in the
- * recursive lock package, we cannot upgrade to a write
- * lock in vm_map_lookup. Thus, any actions that require
- * the write lock must be done beforehand. Because we
- * keep the read lock on the map, the copy-on-write status
- * of the entries we modify here cannot change.
- */
-
- /*
- * Pass 1.
- */
- while ((entry != &map->header) && (entry->start < end)) {
- if (entry->wired_count == 0) {
-
- /*
- * Perform actions of vm_map_lookup that need
- * the write lock on the map: create a shadow
- * object for a copy-on-write region, or an
- * object for a zero-fill region.
- *
- * We don't have to do this for entries that
- * point to sharing maps, because we won't hold
- * the lock on the sharing map.
- */
- if (!entry->is_a_map) {
- if (entry->needs_copy &&
- ((entry->protection & VM_PROT_WRITE) != 0)) {
-
- vm_object_shadow(&entry->object.vm_object,
- &entry->offset,
- (vm_size_t)(entry->end
- - entry->start));
- entry->needs_copy = FALSE;
- }
- else if (entry->object.vm_object == NULL) {
- entry->object.vm_object =
- vm_object_allocate((vm_size_t)(entry->end
- - entry->start));
- entry->offset = (vm_offset_t)0;
- }
- }
- }
- vm_map_clip_start(map, entry, start);
- vm_map_clip_end(map, entry, end);
- entry->wired_count++;
-
- /*
- * Check for holes
- */
- if (entry->end < end &&
- (entry->next == &map->header ||
- entry->next->start > entry->end)) {
- /*
- * Found one. Object creation actions
- * do not need to be undone, but the
- * wired counts need to be restored.
- */
- while (entry != &map->header && entry->end > start) {
- entry->wired_count--;
- entry = entry->prev;
- }
- vm_map_unlock(map);
- return(KERN_INVALID_ARGUMENT);
- }
- entry = entry->next;
- }
-
- /*
- * Pass 2.
- */
-
- /*
- * HACK HACK HACK HACK
- *
- * If we are wiring in the kernel map or a submap of it,
- * unlock the map to avoid deadlocks. We trust that the
- * kernel threads are well-behaved, and therefore will
- * not do anything destructive to this region of the map
- * while we have it unlocked. We cannot trust user threads
- * to do the same.
- *
- * HACK HACK HACK HACK
- */
- if (vm_map_pmap(map) == pmap_kernel()) {
- vm_map_unlock(map); /* trust me ... */
- }
- else {
- vm_map_set_recursive(&map->lock);
- lockmgr(&map->lock, LK_DOWNGRADE, (void *)0, curproc);
- }
-
- rv = 0;
- entry = start_entry;
- while (entry != &map->header && entry->start < end) {
- /*
- * If vm_fault_wire fails for any page we need to
- * undo what has been done. We decrement the wiring
- * count for those pages which have not yet been
- * wired (now) and unwire those that have (later).
- *
- * XXX this violates the locking protocol on the map,
- * needs to be fixed.
- */
- if (rv)
- entry->wired_count--;
- else if (entry->wired_count == 1) {
- rv = vm_fault_wire(map, entry->start, entry->end);
- if (rv) {
- failed = entry->start;
- entry->wired_count--;
- }
- }
- entry = entry->next;
- }
-
- if (vm_map_pmap(map) == pmap_kernel()) {
- vm_map_lock(map);
- }
- else {
- vm_map_clear_recursive(&map->lock);
- }
- if (rv) {
- vm_map_unlock(map);
- (void) vm_map_pageable(map, start, failed, TRUE);
- return(rv);
- }
- }
-
- vm_map_unlock(map);
-
- return(KERN_SUCCESS);
-}
-
-/*
- * vm_map_clean
- *
- * Push any dirty cached pages in the address range to their pager.
- * If syncio is TRUE, dirty pages are written synchronously.
- * If invalidate is TRUE, any cached pages are freed as well.
- *
- * Returns an error if any part of the specified range is not mapped.
- */
-int
-vm_map_clean(map, start, end, syncio, invalidate)
- vm_map_t map;
- vm_offset_t start;
- vm_offset_t end;
- boolean_t syncio;
- boolean_t invalidate;
-{
- register vm_map_entry_t current;
- vm_map_entry_t entry;
- vm_size_t size;
- vm_object_t object;
- vm_offset_t offset;
-
- vm_map_lock_read(map);
- VM_MAP_RANGE_CHECK(map, start, end);
- if (!vm_map_lookup_entry(map, start, &entry)) {
- vm_map_unlock_read(map);
- return(KERN_INVALID_ADDRESS);
- }
-
- /*
- * Make a first pass to check for holes, and (if invalidating)
- * wired pages.
- */
- for (current = entry; current->start < end; current = current->next) {
- if (current->is_sub_map) {
- vm_map_unlock_read(map);
- return(KERN_INVALID_ARGUMENT);
- }
- if (end > current->end &&
- (current->next == &map->header ||
- current->end != current->next->start)) {
- vm_map_unlock_read(map);
- return(KERN_INVALID_ADDRESS);
- }
- if (current->wired_count) {
- vm_map_unlock_read(map);
- return(KERN_PAGES_LOCKED);
- }
- }
-
- /*
- * Make a second pass, cleaning/uncaching pages from the indicated
- * objects as we go.
- */
- for (current = entry; current->start < end; current = current->next) {
- offset = current->offset + (start - current->start);
- size = (end <= current->end ? end : current->end) - start;
- if (current->is_a_map) {
- register vm_map_t smap;
- vm_map_entry_t tentry;
- vm_size_t tsize;
-
- smap = current->object.share_map;
- vm_map_lock_read(smap);
- (void) vm_map_lookup_entry(smap, offset, &tentry);
- tsize = tentry->end - offset;
- if (tsize < size)
- size = tsize;
- object = tentry->object.vm_object;
- offset = tentry->offset + (offset - tentry->start);
- vm_object_lock(object);
- vm_map_unlock_read(smap);
- } else {
- object = current->object.vm_object;
- vm_object_lock(object);
- }
- /*
- * XXX should we continue on an error?
- */
- if (!vm_object_page_clean(object, offset, offset+size, syncio,
- FALSE)) {
- vm_object_unlock(object);
- vm_map_unlock_read(map);
- return(KERN_FAILURE);
- }
- if (invalidate)
- vm_object_page_remove(object, offset, offset+size);
- vm_object_unlock(object);
- start += size;
- }
-
- vm_map_unlock_read(map);
- return(KERN_SUCCESS);
-}
-
-/*
- * vm_map_entry_unwire: [ internal use only ]
- *
- * Make the region specified by this entry pageable.
- *
- * The map in question should be locked.
- * [This is the reason for this routine's existence.]
- */
-void
-vm_map_entry_unwire(map, entry)
- vm_map_t map;
- register vm_map_entry_t entry;
-{
- vm_fault_unwire(map, entry->start, entry->end);
- entry->wired_count = 0;
-}
-
-/*
- * vm_map_entry_delete: [ internal use only ]
- *
- * Deallocate the given entry from the target map.
- */
-void
-vm_map_entry_delete(map, entry)
- register vm_map_t map;
- register vm_map_entry_t entry;
-{
- if (entry->wired_count != 0)
- vm_map_entry_unwire(map, entry);
-
- vm_map_entry_unlink(map, entry);
- map->size -= entry->end - entry->start;
-
- if (entry->is_a_map || entry->is_sub_map)
- vm_map_deallocate(entry->object.share_map);
- else
- vm_object_deallocate(entry->object.vm_object);
-
- vm_map_entry_dispose(map, entry);
-}
-
-/*
- * vm_map_delete: [ internal use only ]
- *
- * Deallocates the given address range from the target
- * map.
- *
- * When called with a sharing map, removes pages from
- * that region from all physical maps.
- */
-int
-vm_map_delete(map, start, end)
- register vm_map_t map;
- vm_offset_t start;
- register vm_offset_t end;
-{
- register vm_map_entry_t entry;
- vm_map_entry_t first_entry;
-
- /*
- * Find the start of the region, and clip it
- */
-
- if (!vm_map_lookup_entry(map, start, &first_entry))
- entry = first_entry->next;
- else {
- entry = first_entry;
- vm_map_clip_start(map, entry, start);
-
- /*
- * Fix the lookup hint now, rather than each
- * time though the loop.
- */
-
- SAVE_HINT(map, entry->prev);
- }
-
- /*
- * Save the free space hint
- */
-
- if (map->first_free->start >= start)
- map->first_free = entry->prev;
-
- /*
- * Step through all entries in this region
- */
-
- while ((entry != &map->header) && (entry->start < end)) {
- vm_map_entry_t next;
- register vm_offset_t s, e;
- register vm_object_t object;
-
- vm_map_clip_end(map, entry, end);
-
- next = entry->next;
- s = entry->start;
- e = entry->end;
-
- /*
- * Unwire before removing addresses from the pmap;
- * otherwise, unwiring will put the entries back in
- * the pmap.
- */
-
- object = entry->object.vm_object;
- if (entry->wired_count != 0)
- vm_map_entry_unwire(map, entry);
-
- /*
- * If this is a sharing map, we must remove
- * *all* references to this data, since we can't
- * find all of the physical maps which are sharing
- * it.
- */
-
- if (object == kernel_object || object == kmem_object)
- vm_object_page_remove(object, entry->offset,
- entry->offset + (e - s));
- else if (!map->is_main_map)
- vm_object_pmap_remove(object,
- entry->offset,
- entry->offset + (e - s));
- else
- pmap_remove(map->pmap, s, e);
-
- /*
- * Delete the entry (which may delete the object)
- * only after removing all pmap entries pointing
- * to its pages. (Otherwise, its page frames may
- * be reallocated, and any modify bits will be
- * set in the wrong object!)
- */
-
- vm_map_entry_delete(map, entry);
- entry = next;
- }
- return(KERN_SUCCESS);
-}
-
-/*
- * vm_map_remove:
- *
- * Remove the given address range from the target map.
- * This is the exported form of vm_map_delete.
- */
-int
-vm_map_remove(map, start, end)
- register vm_map_t map;
- register vm_offset_t start;
- register vm_offset_t end;
-{
- register int result;
-
- vm_map_lock(map);
- VM_MAP_RANGE_CHECK(map, start, end);
- result = vm_map_delete(map, start, end);
- vm_map_unlock(map);
-
- return(result);
-}
-
-/*
- * vm_map_check_protection:
- *
- * Assert that the target map allows the specified
- * privilege on the entire address region given.
- * The entire region must be allocated.
- */
-boolean_t
-vm_map_check_protection(map, start, end, protection)
- register vm_map_t map;
- register vm_offset_t start;
- register vm_offset_t end;
- register vm_prot_t protection;
-{
- register vm_map_entry_t entry;
- vm_map_entry_t tmp_entry;
-
- if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
- return(FALSE);
- }
-
- entry = tmp_entry;
-
- while (start < end) {
- if (entry == &map->header) {
- return(FALSE);
- }
-
- /*
- * No holes allowed!
- */
-
- if (start < entry->start) {
- return(FALSE);
- }
-
- /*
- * Check protection associated with entry.
- */
-
- if ((entry->protection & protection) != protection) {
- return(FALSE);
- }
-
- /* go to next entry */
-
- start = entry->end;
- entry = entry->next;
- }
- return(TRUE);
-}
-
-/*
- * vm_map_copy_entry:
- *
- * Copies the contents of the source entry to the destination
- * entry. The entries *must* be aligned properly.
- */
-void
-vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry)
- vm_map_t src_map, dst_map;
- register vm_map_entry_t src_entry, dst_entry;
-{
- vm_object_t temp_object;
-
- if (src_entry->is_sub_map || dst_entry->is_sub_map)
- return;
-
- if (dst_entry->object.vm_object != NULL &&
- (dst_entry->object.vm_object->flags & OBJ_INTERNAL) == 0)
- printf("vm_map_copy_entry: copying over permanent data!\n");
-
- /*
- * If our destination map was wired down,
- * unwire it now.
- */
-
- if (dst_entry->wired_count != 0)
- vm_map_entry_unwire(dst_map, dst_entry);
-
- /*
- * If we're dealing with a sharing map, we
- * must remove the destination pages from
- * all maps (since we cannot know which maps
- * this sharing map belongs in).
- */
-
- if (dst_map->is_main_map)
- pmap_remove(dst_map->pmap, dst_entry->start, dst_entry->end);
- else
- vm_object_pmap_remove(dst_entry->object.vm_object,
- dst_entry->offset,
- dst_entry->offset +
- (dst_entry->end - dst_entry->start));
-
- if (src_entry->wired_count == 0) {
-
- boolean_t src_needs_copy;
-
- /*
- * If the source entry is marked needs_copy,
- * it is already write-protected.
- */
- if (!src_entry->needs_copy) {
-
- boolean_t su;
-
- /*
- * If the source entry has only one mapping,
- * we can just protect the virtual address
- * range.
- */
- if (!(su = src_map->is_main_map)) {
- simple_lock(&src_map->ref_lock);
- su = (src_map->ref_count == 1);
- simple_unlock(&src_map->ref_lock);
- }
-
- if (su) {
- pmap_protect(src_map->pmap,
- src_entry->start,
- src_entry->end,
- src_entry->protection & ~VM_PROT_WRITE);
- }
- else {
- vm_object_pmap_copy(src_entry->object.vm_object,
- src_entry->offset,
- src_entry->offset + (src_entry->end
- -src_entry->start));
- }
- }
-
- /*
- * Make a copy of the object.
- */
- temp_object = dst_entry->object.vm_object;
- vm_object_copy(src_entry->object.vm_object, src_entry->offset,
- (vm_size_t)(src_entry->end - src_entry->start),
- &dst_entry->object.vm_object, &dst_entry->offset,
- &src_needs_copy);
-
- /*
- * If we didn't get a copy-object now, mark the
- * source map entry so that a shadow will be created
- * to hold its changed pages.
- */
- if (src_needs_copy)
- src_entry->needs_copy = TRUE;
-
- /*
- * The destination always needs to have a shadow
- * created, unless it's a zero-fill entry.
- */
- if (dst_entry->object.vm_object != NULL)
- dst_entry->needs_copy = TRUE;
- else
- dst_entry->needs_copy = FALSE;
-
- /*
- * Mark the entries copy-on-write, so that write-enabling
- * the entry won't make copy-on-write pages writable.
- */
- src_entry->copy_on_write = TRUE;
- dst_entry->copy_on_write = TRUE;
- /*
- * Get rid of the old object.
- */
- vm_object_deallocate(temp_object);
-
- pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
- dst_entry->end - dst_entry->start, src_entry->start);
- }
- else {
- /*
- * Of course, wired down pages can't be set copy-on-write.
- * Cause wired pages to be copied into the new
- * map by simulating faults (the new pages are
- * pageable)
- */
- vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
- }
-}
-
-/*
- * vm_map_copy:
- *
- * Perform a virtual memory copy from the source
- * address map/range to the destination map/range.
- *
- * If src_destroy or dst_alloc is requested,
- * the source and destination regions should be
- * disjoint, not only in the top-level map, but
- * in the sharing maps as well. [The best way
- * to guarantee this is to use a new intermediate
- * map to make copies. This also reduces map
- * fragmentation.]
- */
-int
-vm_map_copy(dst_map, src_map,
- dst_addr, len, src_addr,
- dst_alloc, src_destroy)
- vm_map_t dst_map;
- vm_map_t src_map;
- vm_offset_t dst_addr;
- vm_size_t len;
- vm_offset_t src_addr;
- boolean_t dst_alloc;
- boolean_t src_destroy;
-{
- register
- vm_map_entry_t src_entry;
- register
- vm_map_entry_t dst_entry;
- vm_map_entry_t tmp_entry;
- vm_offset_t src_start;
- vm_offset_t src_end;
- vm_offset_t dst_start;
- vm_offset_t dst_end;
- vm_offset_t src_clip;
- vm_offset_t dst_clip;
- int result;
- boolean_t old_src_destroy;
-
- /*
- * XXX While we figure out why src_destroy screws up,
- * we'll do it by explicitly vm_map_delete'ing at the end.
- */
-
- old_src_destroy = src_destroy;
- src_destroy = FALSE;
-
- /*
- * Compute start and end of region in both maps
- */
-
- src_start = src_addr;
- src_end = src_start + len;
- dst_start = dst_addr;
- dst_end = dst_start + len;
-
- /*
- * Check that the region can exist in both source
- * and destination.
- */
-
- if ((dst_end < dst_start) || (src_end < src_start))
- return(KERN_NO_SPACE);
-
- /*
- * Lock the maps in question -- we avoid deadlock
- * by ordering lock acquisition by map value
- */
-
- if (src_map == dst_map) {
- vm_map_lock(src_map);
- }
- else if ((long) src_map < (long) dst_map) {
- vm_map_lock(src_map);
- vm_map_lock(dst_map);
- } else {
- vm_map_lock(dst_map);
- vm_map_lock(src_map);
- }
-
- result = KERN_SUCCESS;
-
- /*
- * Check protections... source must be completely readable and
- * destination must be completely writable. [Note that if we're
- * allocating the destination region, we don't have to worry
- * about protection, but instead about whether the region
- * exists.]
- */
-
- if (src_map->is_main_map && dst_map->is_main_map) {
- if (!vm_map_check_protection(src_map, src_start, src_end,
- VM_PROT_READ)) {
- result = KERN_PROTECTION_FAILURE;
- goto Return;
- }
-
- if (dst_alloc) {
- /* XXX Consider making this a vm_map_find instead */
- if ((result = vm_map_insert(dst_map, NULL,
- (vm_offset_t) 0, dst_start, dst_end)) != KERN_SUCCESS)
- goto Return;
- }
- else if (!vm_map_check_protection(dst_map, dst_start, dst_end,
- VM_PROT_WRITE)) {
- result = KERN_PROTECTION_FAILURE;
- goto Return;
- }
- }
-
- /*
- * Find the start entries and clip.
- *
- * Note that checking protection asserts that the
- * lookup cannot fail.
- *
- * Also note that we wait to do the second lookup
- * until we have done the first clip, as the clip
- * may affect which entry we get!
- */
-
- (void) vm_map_lookup_entry(src_map, src_addr, &tmp_entry);
- src_entry = tmp_entry;
- vm_map_clip_start(src_map, src_entry, src_start);
-
- (void) vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry);
- dst_entry = tmp_entry;
- vm_map_clip_start(dst_map, dst_entry, dst_start);
-
- /*
- * If both source and destination entries are the same,
- * retry the first lookup, as it may have changed.
- */
-
- if (src_entry == dst_entry) {
- (void) vm_map_lookup_entry(src_map, src_addr, &tmp_entry);
- src_entry = tmp_entry;
- }
-
- /*
- * If source and destination entries are still the same,
- * a null copy is being performed.
- */
-
- if (src_entry == dst_entry)
- goto Return;
-
- /*
- * Go through entries until we get to the end of the
- * region.
- */
-
- while (src_start < src_end) {
- /*
- * Clip the entries to the endpoint of the entire region.
- */
-
- vm_map_clip_end(src_map, src_entry, src_end);
- vm_map_clip_end(dst_map, dst_entry, dst_end);
-
- /*
- * Clip each entry to the endpoint of the other entry.
- */
-
- src_clip = src_entry->start + (dst_entry->end - dst_entry->start);
- vm_map_clip_end(src_map, src_entry, src_clip);
-
- dst_clip = dst_entry->start + (src_entry->end - src_entry->start);
- vm_map_clip_end(dst_map, dst_entry, dst_clip);
-
- /*
- * Both entries now match in size and relative endpoints.
- *
- * If both entries refer to a VM object, we can
- * deal with them now.
- */
-
- if (!src_entry->is_a_map && !dst_entry->is_a_map) {
- vm_map_copy_entry(src_map, dst_map, src_entry,
- dst_entry);
- }
- else {
- register vm_map_t new_dst_map;
- vm_offset_t new_dst_start;
- vm_size_t new_size;
- vm_map_t new_src_map;
- vm_offset_t new_src_start;
-
- /*
- * We have to follow at least one sharing map.
- */
-
- new_size = (dst_entry->end - dst_entry->start);
-
- if (src_entry->is_a_map) {
- new_src_map = src_entry->object.share_map;
- new_src_start = src_entry->offset;
- }
- else {
- new_src_map = src_map;
- new_src_start = src_entry->start;
- vm_map_set_recursive(&src_map->lock);
- }
-
- if (dst_entry->is_a_map) {
- vm_offset_t new_dst_end;
-
- new_dst_map = dst_entry->object.share_map;
- new_dst_start = dst_entry->offset;
-
- /*
- * Since the destination sharing entries
- * will be merely deallocated, we can
- * do that now, and replace the region
- * with a null object. [This prevents
- * splitting the source map to match
- * the form of the destination map.]
- * Note that we can only do so if the
- * source and destination do not overlap.
- */
-
- new_dst_end = new_dst_start + new_size;
-
- if (new_dst_map != new_src_map) {
- vm_map_lock(new_dst_map);
- (void) vm_map_delete(new_dst_map,
- new_dst_start,
- new_dst_end);
- (void) vm_map_insert(new_dst_map,
- NULL,
- (vm_offset_t) 0,
- new_dst_start,
- new_dst_end);
- vm_map_unlock(new_dst_map);
- }
- }
- else {
- new_dst_map = dst_map;
- new_dst_start = dst_entry->start;
- vm_map_set_recursive(&dst_map->lock);
- }
-
- /*
- * Recursively copy the sharing map.
- */
-
- (void) vm_map_copy(new_dst_map, new_src_map,
- new_dst_start, new_size, new_src_start,
- FALSE, FALSE);
-
- if (dst_map == new_dst_map)
- vm_map_clear_recursive(&dst_map->lock);
- if (src_map == new_src_map)
- vm_map_clear_recursive(&src_map->lock);
- }
-
- /*
- * Update variables for next pass through the loop.
- */
-
- src_start = src_entry->end;
- src_entry = src_entry->next;
- dst_start = dst_entry->end;
- dst_entry = dst_entry->next;
-
- /*
- * If the source is to be destroyed, here is the
- * place to do it.
- */
-
- if (src_destroy && src_map->is_main_map &&
- dst_map->is_main_map)
- vm_map_entry_delete(src_map, src_entry->prev);
- }
-
- /*
- * Update the physical maps as appropriate
- */
-
- if (src_map->is_main_map && dst_map->is_main_map) {
- if (src_destroy)
- pmap_remove(src_map->pmap, src_addr, src_addr + len);
- }
-
- /*
- * Unlock the maps
- */
-
- Return: ;
-
- if (old_src_destroy)
- vm_map_delete(src_map, src_addr, src_addr + len);
-
- vm_map_unlock(src_map);
- if (src_map != dst_map)
- vm_map_unlock(dst_map);
-
- return(result);
-}
-
-/*
- * vmspace_fork:
- * Create a new process vmspace structure and vm_map
- * based on those of an existing process. The new map
- * is based on the old map, according to the inheritance
- * values on the regions in that map.
- *
- * The source map must not be locked.
- */
-struct vmspace *
-vmspace_fork(vm1)
- register struct vmspace *vm1;
-{
- register struct vmspace *vm2;
- vm_map_t old_map = &vm1->vm_map;
- vm_map_t new_map;
- vm_map_entry_t old_entry;
- vm_map_entry_t new_entry;
-
- vm_map_lock(old_map);
-
- vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset,
- old_map->entries_pageable);
- bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
- (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
- new_map = &vm2->vm_map; /* XXX */
-
- old_entry = old_map->header.next;
-
- while (old_entry != &old_map->header) {
- if (old_entry->is_sub_map)
- panic("vm_map_fork: encountered a submap");
-
- switch (old_entry->inheritance) {
- case VM_INHERIT_NONE:
- break;
-
- case VM_INHERIT_SHARE:
- /*
- * If we don't already have a sharing map:
- */
-
- if (!old_entry->is_a_map) {
- vm_map_t new_share_map;
- vm_map_entry_t new_share_entry;
-
- /*
- * Create a new sharing map
- */
-
- new_share_map = vm_map_create(NULL,
- old_entry->start,
- old_entry->end,
- TRUE);
- new_share_map->is_main_map = FALSE;
-
- /*
- * Create the only sharing entry from the
- * old task map entry.
- */
-
- new_share_entry =
- vm_map_entry_create(new_share_map);
- *new_share_entry = *old_entry;
- new_share_entry->wired_count = 0;
-
- /*
- * Insert the entry into the new sharing
- * map
- */
-
- vm_map_entry_link(new_share_map,
- new_share_map->header.prev,
- new_share_entry);
-
- /*
- * Fix up the task map entry to refer
- * to the sharing map now.
- */
-
- old_entry->is_a_map = TRUE;
- old_entry->object.share_map = new_share_map;
- old_entry->offset = old_entry->start;
- }
-
- /*
- * Clone the entry, referencing the sharing map.
- */
-
- new_entry = vm_map_entry_create(new_map);
- *new_entry = *old_entry;
- new_entry->wired_count = 0;
- vm_map_reference(new_entry->object.share_map);
-
- /*
- * Insert the entry into the new map -- we
- * know we're inserting at the end of the new
- * map.
- */
-
- vm_map_entry_link(new_map, new_map->header.prev,
- new_entry);
-
- /*
- * Update the physical map
- */
-
- pmap_copy(new_map->pmap, old_map->pmap,
- new_entry->start,
- (old_entry->end - old_entry->start),
- old_entry->start);
- break;
-
- case VM_INHERIT_COPY:
- /*
- * Clone the entry and link into the map.
- */
-
- new_entry = vm_map_entry_create(new_map);
- *new_entry = *old_entry;
- new_entry->wired_count = 0;
- new_entry->object.vm_object = NULL;
- new_entry->is_a_map = FALSE;
- vm_map_entry_link(new_map, new_map->header.prev,
- new_entry);
- if (old_entry->is_a_map) {
- int check;
-
- check = vm_map_copy(new_map,
- old_entry->object.share_map,
- new_entry->start,
- (vm_size_t)(new_entry->end -
- new_entry->start),
- old_entry->offset,
- FALSE, FALSE);
- if (check != KERN_SUCCESS)
- printf("vm_map_fork: copy in share_map region failed\n");
- }
- else {
- vm_map_copy_entry(old_map, new_map, old_entry,
- new_entry);
- }
- break;
- }
- old_entry = old_entry->next;
- }
-
- new_map->size = old_map->size;
- vm_map_unlock(old_map);
-
- return(vm2);
-}
-
-/*
- * vm_map_lookup:
- *
- * Finds the VM object, offset, and
- * protection for a given virtual address in the
- * specified map, assuming a page fault of the
- * type specified.
- *
- * Leaves the map in question locked for read; return
- * values are guaranteed until a vm_map_lookup_done
- * call is performed. Note that the map argument
- * is in/out; the returned map must be used in
- * the call to vm_map_lookup_done.
- *
- * A handle (out_entry) is returned for use in
- * vm_map_lookup_done, to make that fast.
- *
- * If a lookup is requested with "write protection"
- * specified, the map may be changed to perform virtual
- * copying operations, although the data referenced will
- * remain the same.
- */
-int
-vm_map_lookup(var_map, vaddr, fault_type, out_entry,
- object, offset, out_prot, wired, single_use)
- vm_map_t *var_map; /* IN/OUT */
- register vm_offset_t vaddr;
- register vm_prot_t fault_type;
-
- vm_map_entry_t *out_entry; /* OUT */
- vm_object_t *object; /* OUT */
- vm_offset_t *offset; /* OUT */
- vm_prot_t *out_prot; /* OUT */
- boolean_t *wired; /* OUT */
- boolean_t *single_use; /* OUT */
-{
- vm_map_t share_map;
- vm_offset_t share_offset;
- register vm_map_entry_t entry;
- register vm_map_t map = *var_map;
- register vm_prot_t prot;
- register boolean_t su;
-
- RetryLookup: ;
-
- /*
- * Lookup the faulting address.
- */
-
- vm_map_lock_read(map);
-
-#define RETURN(why) \
- { \
- vm_map_unlock_read(map); \
- return(why); \
- }
-
- /*
- * If the map has an interesting hint, try it before calling
- * full blown lookup routine.
- */
-
- simple_lock(&map->hint_lock);
- entry = map->hint;
- simple_unlock(&map->hint_lock);
-
- *out_entry = entry;
-
- if ((entry == &map->header) ||
- (vaddr < entry->start) || (vaddr >= entry->end)) {
- vm_map_entry_t tmp_entry;
-
- /*
- * Entry was either not a valid hint, or the vaddr
- * was not contained in the entry, so do a full lookup.
- */
- if (!vm_map_lookup_entry(map, vaddr, &tmp_entry))
- RETURN(KERN_INVALID_ADDRESS);
-
- entry = tmp_entry;
- *out_entry = entry;
- }
-
- /*
- * Handle submaps.
- */
-
- if (entry->is_sub_map) {
- vm_map_t old_map = map;
-
- *var_map = map = entry->object.sub_map;
- vm_map_unlock_read(old_map);
- goto RetryLookup;
- }
-
- /*
- * Check whether this task is allowed to have
- * this page.
- */
-
- prot = entry->protection;
- if ((fault_type & (prot)) != fault_type)
- RETURN(KERN_PROTECTION_FAILURE);
-
- /*
- * If this page is not pageable, we have to get
- * it for all possible accesses.
- */
-
- if ((*wired = (entry->wired_count != 0)) != 0)
- prot = fault_type = entry->protection;
-
- /*
- * If we don't already have a VM object, track
- * it down.
- */
-
- if ((su = !entry->is_a_map) != 0) {
- share_map = map;
- share_offset = vaddr;
- }
- else {
- vm_map_entry_t share_entry;
-
- /*
- * Compute the sharing map, and offset into it.
- */
-
- share_map = entry->object.share_map;
- share_offset = (vaddr - entry->start) + entry->offset;
-
- /*
- * Look for the backing store object and offset
- */
-
- vm_map_lock_read(share_map);
-
- if (!vm_map_lookup_entry(share_map, share_offset,
- &share_entry)) {
- vm_map_unlock_read(share_map);
- RETURN(KERN_INVALID_ADDRESS);
- }
- entry = share_entry;
- }
-
- /*
- * If the entry was copy-on-write, we either ...
- */
-
- if (entry->needs_copy) {
- /*
- * If we want to write the page, we may as well
- * handle that now since we've got the sharing
- * map locked.
- *
- * If we don't need to write the page, we just
- * demote the permissions allowed.
- */
-
- if (fault_type & VM_PROT_WRITE) {
- /*
- * Make a new object, and place it in the
- * object chain. Note that no new references
- * have appeared -- one just moved from the
- * share map to the new object.
- */
-
- if (lockmgr(&share_map->lock, LK_EXCLUPGRADE,
- (void *)0, curproc)) {
- if (share_map != map)
- vm_map_unlock_read(map);
- goto RetryLookup;
- }
-
- vm_object_shadow(
- &entry->object.vm_object,
- &entry->offset,
- (vm_size_t) (entry->end - entry->start));
-
- entry->needs_copy = FALSE;
-
- lockmgr(&share_map->lock, LK_DOWNGRADE,
- (void *)0, curproc);
- }
- else {
- /*
- * We're attempting to read a copy-on-write
- * page -- don't allow writes.
- */
-
- prot &= (~VM_PROT_WRITE);
- }
- }
-
- /*
- * Create an object if necessary.
- */
- if (entry->object.vm_object == NULL) {
-
- if (lockmgr(&share_map->lock, LK_EXCLUPGRADE,
- (void *)0, curproc)) {
- if (share_map != map)
- vm_map_unlock_read(map);
- goto RetryLookup;
- }
-
- entry->object.vm_object = vm_object_allocate(
- (vm_size_t)(entry->end - entry->start));
- entry->offset = 0;
- lockmgr(&share_map->lock, LK_DOWNGRADE, (void *)0, curproc);
- }
-
- /*
- * Return the object/offset from this entry. If the entry
- * was copy-on-write or empty, it has been fixed up.
- */
-
- *offset = (share_offset - entry->start) + entry->offset;
- *object = entry->object.vm_object;
-
- /*
- * Return whether this is the only map sharing this data.
- */
-
- if (!su) {
- simple_lock(&share_map->ref_lock);
- su = (share_map->ref_count == 1);
- simple_unlock(&share_map->ref_lock);
- }
-
- *out_prot = prot;
- *single_use = su;
-
- return(KERN_SUCCESS);
-
-#undef RETURN
-}
-
-/*
- * vm_map_lookup_done:
- *
- * Releases locks acquired by a vm_map_lookup
- * (according to the handle returned by that lookup).
- */
-
-void
-vm_map_lookup_done(map, entry)
- register vm_map_t map;
- vm_map_entry_t entry;
-{
- /*
- * If this entry references a map, unlock it first.
- */
-
- if (entry->is_a_map)
- vm_map_unlock_read(entry->object.share_map);
-
- /*
- * Unlock the main-level map
- */
-
- vm_map_unlock_read(map);
-}
-
-/*
- * Routine: vm_map_simplify
- * Purpose:
- * Attempt to simplify the map representation in
- * the vicinity of the given starting address.
- * Note:
- * This routine is intended primarily to keep the
- * kernel maps more compact -- they generally don't
- * benefit from the "expand a map entry" technology
- * at allocation time because the adjacent entry
- * is often wired down.
- */
-void
-vm_map_simplify(map, start)
- vm_map_t map;
- vm_offset_t start;
-{
- vm_map_entry_t this_entry;
- vm_map_entry_t prev_entry;
-
- vm_map_lock(map);
- if (
- (vm_map_lookup_entry(map, start, &this_entry)) &&
- ((prev_entry = this_entry->prev) != &map->header) &&
-
- (prev_entry->end == start) &&
- (map->is_main_map) &&
-
- (prev_entry->is_a_map == FALSE) &&
- (prev_entry->is_sub_map == FALSE) &&
-
- (this_entry->is_a_map == FALSE) &&
- (this_entry->is_sub_map == FALSE) &&
-
- (prev_entry->inheritance == this_entry->inheritance) &&
- (prev_entry->protection == this_entry->protection) &&
- (prev_entry->max_protection == this_entry->max_protection) &&
- (prev_entry->wired_count == this_entry->wired_count) &&
-
- (prev_entry->copy_on_write == this_entry->copy_on_write) &&
- (prev_entry->needs_copy == this_entry->needs_copy) &&
-
- (prev_entry->object.vm_object == this_entry->object.vm_object) &&
- ((prev_entry->offset + (prev_entry->end - prev_entry->start))
- == this_entry->offset)
- ) {
- if (map->first_free == this_entry)
- map->first_free = prev_entry;
-
- SAVE_HINT(map, prev_entry);
- vm_map_entry_unlink(map, this_entry);
- prev_entry->end = this_entry->end;
- vm_object_deallocate(this_entry->object.vm_object);
- vm_map_entry_dispose(map, this_entry);
- }
- vm_map_unlock(map);
-}
-
-/*
- * vm_map_print: [ debug ]
- */
-void
-vm_map_print(map, full)
- register vm_map_t map;
- boolean_t full;
-{
- _vm_map_print(map, full, printf);
-}
-
-void
-_vm_map_print(map, full, pr)
- register vm_map_t map;
- boolean_t full;
- int (*pr) __P((const char *, ...));
-{
- register vm_map_entry_t entry;
- extern int indent;
-
- iprintf(pr, "%s map %p: pmap=%p, ref=%d, nentries=%d, version=%d\n",
- (map->is_main_map ? "Task" : "Share"),
- map, (map->pmap), map->ref_count, map->nentries,
- map->timestamp);
-
- if (!full && indent)
- return;
-
- indent += 2;
- for (entry = map->header.next; entry != &map->header;
- entry = entry->next) {
- iprintf(pr, "map entry %p: start=%p, end=%p, ",
- entry, entry->start, entry->end);
- if (map->is_main_map) {
- static char *inheritance_name[4] =
- { "share", "copy", "none", "donate_copy"};
- (*pr)("prot=%x/%x/%s, ",
- entry->protection,
- entry->max_protection,
- inheritance_name[entry->inheritance]);
- if (entry->wired_count != 0)
- (*pr)("wired, ");
- }
-
- if (entry->is_a_map || entry->is_sub_map) {
- (*pr)("share=%p, offset=%p\n",
- entry->object.share_map,
- entry->offset);
- if ((entry->prev == &map->header) ||
- (!entry->prev->is_a_map) ||
- (entry->prev->object.share_map !=
- entry->object.share_map)) {
- indent += 2;
- vm_map_print(entry->object.share_map, full);
- indent -= 2;
- }
-
- }
- else {
- (*pr)("object=%p, offset=%p", entry->object.vm_object,
- entry->offset);
- if (entry->copy_on_write)
- (*pr)(", copy (%s)",
- entry->needs_copy ? "needed" : "done");
- (*pr)("\n");
-
- if ((entry->prev == &map->header) ||
- (entry->prev->is_a_map) ||
- (entry->prev->object.vm_object !=
- entry->object.vm_object)) {
- indent += 2;
- _vm_object_print(entry->object.vm_object,
- full, pr);
- indent -= 2;
- }
- }
- }
- indent -= 2;
-}
diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c
deleted file mode 100644
index 0364c169ea9..00000000000
--- a/sys/vm/vm_meter.c
+++ /dev/null
@@ -1,236 +0,0 @@
-/* $OpenBSD: vm_meter.c,v 1.8 1998/03/01 00:38:14 niklas Exp $ */
-/* $NetBSD: vm_meter.c,v 1.18 1996/02/05 01:53:59 christos Exp $ */
-
-/*
- * Copyright (c) 1982, 1986, 1989, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)vm_meter.c 8.7 (Berkeley) 5/10/95
- */
-
-#include <sys/param.h>
-#include <sys/proc.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <vm/vm.h>
-#include <sys/sysctl.h>
-#include <sys/exec.h>
-
-struct loadavg averunnable; /* load average, of runnable procs */
-
-int maxslp = MAXSLP;
-#if !defined(MACHINE_NONCONTIG) && !defined(MACHINE_NEW_NONCONTIG)
-int saferss = SAFERSS;
-#endif /* MACHINE_NONCONTIG */
-
-void
-vmmeter()
-{
-
- if (time.tv_sec % 5 == 0)
- loadav(&averunnable);
- if (proc0.p_slptime > maxslp/2)
- wakeup((caddr_t)&proc0);
-}
-
-/*
- * Constants for averages over 1, 5, and 15 minutes
- * when sampling at 5 second intervals.
- */
-fixpt_t cexp[3] = {
- 0.9200444146293232 * FSCALE, /* exp(-1/12) */
- 0.9834714538216174 * FSCALE, /* exp(-1/60) */
- 0.9944598480048967 * FSCALE, /* exp(-1/180) */
-};
-
-/*
- * Compute a tenex style load average of a quantity on
- * 1, 5 and 15 minute intervals.
- */
-void
-loadav(avg)
- register struct loadavg *avg;
-{
- register int i, nrun;
- register struct proc *p;
-
- for (nrun = 0, p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
- switch (p->p_stat) {
- case SSLEEP:
- if (p->p_priority > PZERO || p->p_slptime > 1)
- continue;
- /* fall through */
- case SRUN:
- case SIDL:
- nrun++;
- }
- }
- for (i = 0; i < 3; i++)
- avg->ldavg[i] = (cexp[i] * avg->ldavg[i] +
- nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;
-}
-
-/*
- * Attributes associated with virtual memory.
- */
-int
-vm_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
- int *name;
- u_int namelen;
- void *oldp;
- size_t *oldlenp;
- void *newp;
- size_t newlen;
- struct proc *p;
-{
- struct vmtotal vmtotals;
- struct _ps_strings _ps = { PS_STRINGS };
-
- /* all sysctl names at this level are terminal */
- if (namelen != 1)
- return (ENOTDIR); /* overloaded */
-
- switch (name[0]) {
- case VM_LOADAVG:
- averunnable.fscale = FSCALE;
- return (sysctl_rdstruct(oldp, oldlenp, newp, &averunnable,
- sizeof(averunnable)));
- case VM_METER:
- vmtotal(&vmtotals);
- return (sysctl_rdstruct(oldp, oldlenp, newp, &vmtotals,
- sizeof(vmtotals)));
- case VM_PSSTRINGS:
- return (sysctl_rdstruct(oldp, oldlenp, newp, &_ps,
- sizeof _ps));
- default:
- return (EOPNOTSUPP);
- }
- /* NOTREACHED */
-}
-
-/*
- * Calculate the current state of the system.
- * Done on demand from getkerninfo().
- */
-void
-vmtotal(totalp)
- register struct vmtotal *totalp;
-{
- register struct proc *p;
- register vm_map_entry_t entry;
- register vm_object_t object;
- register vm_map_t map;
- int paging;
-
- bzero(totalp, sizeof *totalp);
- /*
- * Mark all objects as inactive.
- */
- simple_lock(&vm_object_list_lock);
- for (object = vm_object_list.tqh_first;
- object != NULL;
- object = object->object_list.tqe_next)
- object->flags &= ~OBJ_ACTIVE;
- simple_unlock(&vm_object_list_lock);
- /*
- * Calculate process statistics.
- */
- for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
- if (p->p_flag & P_SYSTEM)
- continue;
- switch (p->p_stat) {
- case 0:
- continue;
-
- case SSLEEP:
- case SSTOP:
- if (p->p_flag & P_INMEM) {
- if (p->p_priority <= PZERO)
- totalp->t_dw++;
- else if (p->p_slptime < maxslp)
- totalp->t_sl++;
- } else if (p->p_slptime < maxslp)
- totalp->t_sw++;
- if (p->p_slptime >= maxslp)
- continue;
- break;
-
- case SRUN:
- case SIDL:
- if (p->p_flag & P_INMEM)
- totalp->t_rq++;
- else
- totalp->t_sw++;
- if (p->p_stat == SIDL)
- continue;
- break;
- }
- /*
- * Note active objects.
- */
- paging = 0;
- for (map = &p->p_vmspace->vm_map, entry = map->header.next;
- entry != &map->header; entry = entry->next) {
- if (entry->is_a_map || entry->is_sub_map ||
- entry->object.vm_object == NULL)
- continue;
- entry->object.vm_object->flags |= OBJ_ACTIVE;
- paging |= vm_object_paging(entry->object.vm_object);
- }
- if (paging)
- totalp->t_pw++;
- }
- /*
- * Calculate object memory usage statistics.
- */
- simple_lock(&vm_object_list_lock);
- for (object = vm_object_list.tqh_first;
- object != NULL;
- object = object->object_list.tqe_next) {
- totalp->t_vm += num_pages(object->size);
- totalp->t_rm += object->resident_page_count;
- if (object->flags & OBJ_ACTIVE) {
- totalp->t_avm += num_pages(object->size);
- totalp->t_arm += object->resident_page_count;
- }
- if (object->ref_count > 1) {
- /* shared object */
- simple_unlock(&vm_object_list_lock);
- totalp->t_vmshr += num_pages(object->size);
- totalp->t_rmshr += object->resident_page_count;
- if (object->flags & OBJ_ACTIVE) {
- totalp->t_avmshr += num_pages(object->size);
- totalp->t_armshr += object->resident_page_count;
- }
- }
- }
- totalp->t_free = cnt.v_free_count;
-}
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
deleted file mode 100644
index 57aac2da609..00000000000
--- a/sys/vm/vm_mmap.c
+++ /dev/null
@@ -1,1054 +0,0 @@
-/* $OpenBSD: vm_mmap.c,v 1.17 2001/05/05 21:26:47 art Exp $ */
-/* $NetBSD: vm_mmap.c,v 1.47 1996/03/16 23:15:23 christos Exp $ */
-
-/*
- * Copyright (c) 1988 University of Utah.
- * Copyright (c) 1991, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * the Systems Programming Group of the University of Utah Computer
- * Science Department.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
- *
- * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94
- */
-
-/*
- * Mapped file (mmap) interface to VM
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/filedesc.h>
-#include <sys/resourcevar.h>
-#include <sys/proc.h>
-#include <sys/vnode.h>
-#include <sys/file.h>
-#include <sys/mman.h>
-#include <sys/conf.h>
-#include <sys/stat.h>
-
-#include <sys/mount.h>
-#include <sys/syscallargs.h>
-
-#include <miscfs/specfs/specdev.h>
-
-#include <vm/vm.h>
-#include <vm/vm_pager.h>
-#include <vm/vm_prot.h>
-
-#ifdef DEBUG
-int mmapdebug = 0;
-#define MDB_FOLLOW 0x01
-#define MDB_SYNC 0x02
-#define MDB_MAPIT 0x04
-#endif
-
-/* ARGSUSED */
-int
-sys_sbrk(p, v, retval)
- struct proc *p;
- void *v;
- register_t *retval;
-{
-#if 0
- struct sys_sbrk_args /* {
- syscallarg(int) incr;
- } */ *uap = v;
-#endif
-
- /* Not yet implemented */
- return (EOPNOTSUPP);
-}
-
-/* ARGSUSED */
-int
-sys_sstk(p, v, retval)
- struct proc *p;
- void *v;
- register_t *retval;
-{
-#if 0
- struct sys_sstk_args /* {
- syscallarg(int) incr;
- } */ *uap = v;
-#endif
-
- /* Not yet implemented */
- return (EOPNOTSUPP);
-}
-
-
-/*
- * Memory Map (mmap) system call. Note that the file offset
- * and address are allowed to be NOT page aligned, though if
- * the MAP_FIXED flag it set, both must have the same remainder
- * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not
- * page-aligned, the actual mapping starts at trunc_page(addr)
- * and the return value is adjusted up by the page offset.
- */
-int
-sys_mmap(p, v, retval)
- struct proc *p;
- void *v;
- register_t *retval;
-{
- register struct sys_mmap_args /* {
- syscallarg(void *) addr;
- syscallarg(size_t) len;
- syscallarg(int) prot;
- syscallarg(int) flags;
- syscallarg(int) fd;
- syscallarg(long) pad;
- syscallarg(off_t) pos;
- } */ *uap = v;
- struct vattr va;
- register struct filedesc *fdp = p->p_fd;
- register struct file *fp;
- struct vnode *vp;
- vm_offset_t addr, pos;
- vm_size_t size, pageoff;
- vm_prot_t prot, maxprot;
- caddr_t handle;
- int fd, flags, error;
- vm_offset_t vm_min_address = VM_MIN_ADDRESS;
-
- addr = (vm_offset_t) SCARG(uap, addr);
- size = (vm_size_t) SCARG(uap, len);
- prot = SCARG(uap, prot) & VM_PROT_ALL;
- flags = SCARG(uap, flags);
- fd = SCARG(uap, fd);
- pos = (vm_offset_t) SCARG(uap, pos);
-
-#ifdef DEBUG
- if (mmapdebug & MDB_FOLLOW)
- printf("mmap(%d): addr %lx len %lx pro %x flg %x fd %d pos %lx\n",
- p->p_pid, addr, size, prot, flags, fd, pos);
-#endif
-
- /*
- * Align the file position to a page boundary,
- * and save its page offset component.
- */
- pageoff = (pos & PAGE_MASK);
- pos -= pageoff;
-
- /* Adjust size for rounding (on both ends). */
- size += pageoff; /* low end... */
- size = (vm_size_t) round_page(size); /* hi end */
-
- /* Do not allow mappings that cause address wrap... */
- if ((ssize_t)size < 0)
- return (EINVAL);
-
- /*
- * Check for illegal addresses. Watch out for address wrap...
- * Note that VM_*_ADDRESS are not constants due to casts (argh).
- */
- if (flags & MAP_FIXED) {
- /*
- * The specified address must have the same remainder
- * as the file offset taken modulo PAGE_SIZE, so it
- * should be aligned after adjustment by pageoff.
- */
- addr -= pageoff;
- if (addr & PAGE_MASK)
- return (EINVAL);
- /* Address range must be all in user VM space. */
- if (VM_MAXUSER_ADDRESS > 0 &&
- addr + size > VM_MAXUSER_ADDRESS)
- return (EINVAL);
- if (vm_min_address > 0 && addr < vm_min_address)
- return (EINVAL);
- if (addr > addr + size)
- return (EINVAL);
- }
- /*
- * XXX for non-fixed mappings where no hint is provided or
- * the hint would fall in the potential heap space,
- * place it after the end of the largest possible heap.
- *
- * There should really be a pmap call to determine a reasonable
- * location. (To avoid VA cache alias problems, for example!)
- */
- else if (addr < round_page((vaddr_t)p->p_vmspace->vm_daddr + MAXDSIZ))
- addr = round_page((vaddr_t)p->p_vmspace->vm_daddr + MAXDSIZ);
-
- if ((flags & MAP_ANON) == 0) {
- /*
- * Mapping file, get fp for validation.
- * Obtain vnode and make sure it is of appropriate type.
- */
- if (((unsigned)fd) >= fdp->fd_nfiles ||
- (fp = fdp->fd_ofiles[fd]) == NULL)
- return (EBADF);
- if (fp->f_type != DTYPE_VNODE)
- return (EINVAL);
- vp = (struct vnode *)fp->f_data;
-
- /*
- * XXX hack to handle use of /dev/zero to map anon
- * memory (ala SunOS).
- */
- if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
- flags |= MAP_ANON;
- goto is_anon;
- }
-
- /*
- * Only files and cdevs are mappable, and cdevs does not
- * provide private mappings of any kind.
- */
- if (vp->v_type != VREG &&
- (vp->v_type != VCHR || (flags & (MAP_PRIVATE|MAP_COPY))))
- return (EINVAL);
- /*
- * Ensure that file and memory protections are
- * compatible. Note that we only worry about
- * writability if mapping is shared; in this case,
- * current and max prot are dictated by the open file.
- * XXX use the vnode instead? Problem is: what
- * credentials do we use for determination?
- * What if proc does a setuid?
- */
- maxprot = VM_PROT_EXECUTE; /* ??? */
- if (fp->f_flag & FREAD)
- maxprot |= VM_PROT_READ;
- else if (prot & PROT_READ)
- return (EACCES);
-
- /*
- * If we are sharing potential changes (either via MAP_SHARED
- * or via the implicit sharing of character device mappings),
- * there are security issues with giving out PROT_WRITE
- */
- if ((flags & MAP_SHARED) || vp->v_type == VCHR) {
-
- /* In case we opened the thing readonly... */
- if (!(fp->f_flag & FWRITE)) {
- /*
- * If we are trying to get write permission
- * bail out, otherwise go ahead but don't
- * raise maxprot to contain VM_PROT_WRITE, as
- * we have not asked for write permission at
- * all.
- */
- if (prot & PROT_WRITE)
- return (EACCES);
-
- /*
- * If the file is writable, only add PROT_WRITE to
- * maxprot if the file is not immutable, append-only.
- * If it is, and if we are going for PROT_WRITE right
- * away, return EPERM.
- */
- } else if ((error =
- VOP_GETATTR(vp, &va, p->p_ucred, p)))
- return (error);
- else if (va.va_flags & (IMMUTABLE|APPEND)) {
- if (prot & PROT_WRITE)
- return (EPERM);
- } else
- maxprot |= VM_PROT_WRITE;
- } else
- maxprot |= VM_PROT_WRITE;
- handle = (caddr_t)vp;
- } else {
- /*
- * (flags & MAP_ANON) == TRUE
- * Mapping blank space is trivial.
- */
- if (fd != -1)
- return (EINVAL);
- is_anon:
- handle = NULL;
- maxprot = VM_PROT_ALL;
- pos = 0;
- }
- error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
- flags, handle, pos);
- if (error == 0)
- *retval = (register_t)(addr + pageoff);
- return (error);
-}
-
-int
-sys_msync(p, v, retval)
- struct proc *p;
- void *v;
- register_t *retval;
-{
- struct sys_msync_args /* {
- syscallarg(void *) addr;
- syscallarg(size_t) len;
- syscallarg(int) flags;
- } */ *uap = v;
- vm_offset_t addr;
- vm_size_t size, pageoff;
- vm_map_t map;
- int rv, flags;
- boolean_t syncio, invalidate;
-
- addr = (vm_offset_t)SCARG(uap, addr);
- size = (vm_size_t)SCARG(uap, len);
- flags = SCARG(uap, flags);
-#ifdef DEBUG
- if (mmapdebug & (MDB_FOLLOW|MDB_SYNC))
- printf("msync(%d): addr 0x%lx len %lx\n", p->p_pid, addr, size);
-#endif
-
- /* sanity check flags */
- if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
- (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
- (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
- return (EINVAL);
- if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
- flags |= MS_SYNC;
-
- /*
- * Align the address to a page boundary,
- * and adjust the size accordingly.
- */
- pageoff = (addr & PAGE_MASK);
- addr -= pageoff;
- size += pageoff;
- size = (vm_size_t) round_page(size);
-
- /* Disallow wrap-around. */
- if (addr + size < addr)
- return (ENOMEM);
-
- map = &p->p_vmspace->vm_map;
- /*
- * XXX Gak! If size is zero we are supposed to sync "all modified
- * pages with the region containing addr". Unfortunately, we
- * don't really keep track of individual mmaps so we approximate
- * by flushing the range of the map entry containing addr.
- * This can be incorrect if the region splits or is coalesced
- * with a neighbor.
- */
- if (size == 0) {
- vm_map_entry_t entry;
-
- vm_map_lock_read(map);
- rv = vm_map_lookup_entry(map, addr, &entry);
- vm_map_unlock_read(map);
- if (rv == FALSE)
- return (ENOMEM);
- addr = entry->start;
- size = entry->end - entry->start;
- }
-#ifdef DEBUG
- if (mmapdebug & MDB_SYNC)
- printf("msync: cleaning/flushing address range [0x%lx-0x%lx)\n",
- addr, addr+size);
-#endif
-
-#if 0
- /*
- * XXX Asynchronous msync() causes:
- * . the process to hang on wchan "vospgw", and
- * . a "vm_object_page_clean: pager_put error" message to
- * be printed by the kernel.
- */
- syncio = (flags & MS_SYNC) ? TRUE : FALSE;
-#else
- syncio = TRUE;
-#endif
- invalidate = (flags & MS_INVALIDATE) ? TRUE : FALSE;
-
- /*
- * XXX bummer, gotta flush all cached pages to ensure
- * consistency with the file system cache. Otherwise, we could
- * pass this in to implement Sun's MS_INVALIDATE.
- */
- invalidate = TRUE;
- /*
- * Clean the pages and interpret the return value.
- */
- rv = vm_map_clean(map, addr, addr+size, syncio, invalidate);
- switch (rv) {
- case KERN_SUCCESS:
- break;
- case KERN_INVALID_ADDRESS:
- return (ENOMEM);
- case KERN_FAILURE:
- return (EIO);
- case KERN_PAGES_LOCKED:
- return (EBUSY);
- default:
- return (EINVAL);
- }
- return (0);
-}
-
-int
-sys_munmap(p, v, retval)
- register struct proc *p;
- void *v;
- register_t *retval;
-{
- register struct sys_munmap_args /* {
- syscallarg(void *) addr;
- syscallarg(size_t) len;
- } */ *uap = v;
- vm_offset_t addr;
- vm_size_t size, pageoff;
- vm_map_t map;
- vm_offset_t vm_min_address = VM_MIN_ADDRESS;
-
-
- addr = (vm_offset_t) SCARG(uap, addr);
- size = (vm_size_t) SCARG(uap, len);
-#ifdef DEBUG
- if (mmapdebug & MDB_FOLLOW)
- printf("munmap(%d): addr 0%lx len %lx\n", p->p_pid, addr, size);
-#endif
-
- /*
- * Align the address to a page boundary,
- * and adjust the size accordingly.
- */
- pageoff = (addr & PAGE_MASK);
- addr -= pageoff;
- size += pageoff;
- size = (vm_size_t) round_page(size);
- if ((int)size < 0)
- return(EINVAL);
- if (size == 0)
- return(0);
- /*
- * Check for illegal addresses. Watch out for address wrap...
- * Note that VM_*_ADDRESS are not constants due to casts (argh).
- */
- if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
- return (EINVAL);
- if (vm_min_address > 0 && addr < vm_min_address)
- return (EINVAL);
- if (addr > addr + size)
- return (EINVAL);
- map = &p->p_vmspace->vm_map;
- /*
- * Make sure entire range is allocated.
- */
- if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
- return(EINVAL);
- /* returns nothing but KERN_SUCCESS anyway */
- (void) vm_map_remove(map, addr, addr+size);
- return(0);
-}
-
-void
-munmapfd(p, fd)
- struct proc *p;
- int fd;
-{
-#ifdef DEBUG
- if (mmapdebug & MDB_FOLLOW)
- printf("munmapfd(%d): fd %d\n", p->p_pid, fd);
-#endif
-
- /*
- * XXX should vm_deallocate any regions mapped to this file
- */
- p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
-}
-
-int
-sys_mprotect(p, v, retval)
- struct proc *p;
- void *v;
- register_t *retval;
-{
- struct sys_mprotect_args /* {
- syscallarg(void *) addr;
- syscallarg(int) len;
- syscallarg(int) prot;
- } */ *uap = v;
- vm_offset_t addr;
- vm_size_t size, pageoff;
- register vm_prot_t prot;
-
- addr = (vm_offset_t)SCARG(uap, addr);
- size = (vm_size_t)SCARG(uap, len);
- prot = SCARG(uap, prot) & VM_PROT_ALL;
-#ifdef DEBUG
- if (mmapdebug & MDB_FOLLOW)
- printf("mprotect(%d): addr 0x%lx len %lx prot %d\n", p->p_pid,
- addr, size, prot);
-#endif
- /*
- * Align the address to a page boundary,
- * and adjust the size accordingly.
- */
- pageoff = (addr & PAGE_MASK);
- addr -= pageoff;
- size += pageoff;
- size = (vm_size_t) round_page(size);
- if ((int)size < 0)
- return(EINVAL);
-
- switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot,
- FALSE)) {
- case KERN_SUCCESS:
- return (0);
- case KERN_PROTECTION_FAILURE:
- return (EACCES);
- }
- return (EINVAL);
-}
-
-int
-sys_minherit(p, v, retval)
- struct proc *p;
- void *v;
- register_t *retval;
-{
- struct sys_minherit_args /* {
- syscallarg(caddr_t) addr;
- syscallarg(int) len;
- syscallarg(int) inherit;
- } */ *uap = v;
- vm_offset_t addr;
- vm_size_t size, pageoff;
- register vm_inherit_t inherit;
-
- addr = (vm_offset_t)SCARG(uap, addr);
- size = (vm_size_t)SCARG(uap, len);
- inherit = SCARG(uap, inherit);
-#ifdef DEBUG
- if (mmapdebug & MDB_FOLLOW)
- printf("minherit(%d): addr 0x%lx len %lx inherit %d\n", p->p_pid,
- addr, size, inherit);
-#endif
- /*
- * Align the address to a page boundary,
- * and adjust the size accordingly.
- */
- pageoff = (addr & PAGE_MASK);
- addr -= pageoff;
- size += pageoff;
- size = (vm_size_t) round_page(size);
- if ((int)size < 0)
- return(EINVAL);
-
- switch (vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
- inherit)) {
- case KERN_SUCCESS:
- return (0);
- case KERN_PROTECTION_FAILURE:
- return (EACCES);
- }
- return (EINVAL);
-}
-
-/* ARGSUSED */
-int
-sys_madvise(p, v, retval)
- struct proc *p;
- void *v;
- register_t *retval;
-{
-#if 0
- struct sys_madvise_args /* {
- syscallarg(void *) addr;
- syscallarg(size_t) len;
- syscallarg(int) behav;
- } */ *uap = v;
-#endif
-
- /* Not yet implemented */
- return (EOPNOTSUPP);
-}
-
-/* ARGSUSED */
-int
-sys_mincore(p, v, retval)
- struct proc *p;
- void *v;
- register_t *retval;
-{
-#if 0
- struct sys_mincore_args /* {
- syscallarg(void *) addr;
- syscallarg(size_t) len;
- syscallarg(char *) vec;
- } */ *uap = v;
-#endif
-
- /* Not yet implemented */
- return (EOPNOTSUPP);
-}
-
-int
-sys_mlock(p, v, retval)
- struct proc *p;
- void *v;
- register_t *retval;
-{
- struct sys_mlock_args /* {
- syscallarg(const void *) addr;
- syscallarg(size_t) len;
- } */ *uap = v;
- vm_offset_t addr;
- vm_size_t size, pageoff;
- int error;
- extern int vm_page_max_wired;
-
- addr = (vm_offset_t)SCARG(uap, addr);
- size = (vm_size_t)SCARG(uap, len);
-#ifdef DEBUG
- if (mmapdebug & MDB_FOLLOW)
- printf("mlock(%d): addr 0%lx len %lx\n", p->p_pid, addr, size);
-#endif
- /*
- * Align the address to a page boundary,
- * and adjust the size accordingly.
- */
- pageoff = (addr & PAGE_MASK);
- addr -= pageoff;
- size += pageoff;
- size = (vm_size_t) round_page(size);
-
- /* Disallow wrap-around. */
- if (addr + (int)size < addr)
- return (EINVAL);
-
- if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
- return (EAGAIN);
-#ifdef pmap_wired_count
- if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
- p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
- return (EAGAIN);
-#else
- if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
- return (error);
-#endif
-
- error = vslock((caddr_t)addr, size);
- return (error == KERN_SUCCESS ? 0 : ENOMEM);
-}
-
-int
-sys_munlock(p, v, retval)
- struct proc *p;
- void *v;
- register_t *retval;
-{
- struct sys_munlock_args /* {
- syscallarg(const void *) addr;
- syscallarg(size_t) len;
- } */ *uap = v;
- vm_offset_t addr;
- vm_size_t size, pageoff;
- int error;
-
- addr = (vm_offset_t)SCARG(uap, addr);
- size = (vm_size_t)SCARG(uap, len);
-#ifdef DEBUG
- if (mmapdebug & MDB_FOLLOW)
- printf("munlock(%d): addr 0x%lx len %lx\n", p->p_pid, addr, size);
-#endif
- /*
- * Align the address to a page boundary,
- * and adjust the size accordingly.
- */
- pageoff = (addr & PAGE_MASK);
- addr -= pageoff;
- size += pageoff;
- size = (vm_size_t) round_page(size);
-
- /* Disallow wrap-around. */
- if (addr + (int)size < addr)
- return (EINVAL);
-
-#ifndef pmap_wired_count
- if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
- return (error);
-#endif
-
- error = vsunlock((caddr_t)addr, size);
- return (error == KERN_SUCCESS ? 0 : ENOMEM);
-}
-
-/*
- * Internal version of mmap.
- * Currently used by mmap, exec, and sys5 shared memory.
- * Handle is either a vnode pointer or NULL for MAP_ANON.
- * This (internal) interface requires the file offset to be
- * page-aligned by the caller. (Also addr, if MAP_FIXED).
- */
-int
-vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
- register vm_map_t map;
- register vm_offset_t *addr;
- register vm_size_t size;
- vm_prot_t prot, maxprot;
- register int flags;
- caddr_t handle; /* XXX should be vp */
- vm_offset_t foff;
-{
- register vm_pager_t pager;
- boolean_t fitit;
- vm_object_t object;
- struct vnode *vp = NULL;
- int type;
- int rv = KERN_SUCCESS;
-
- if (size == 0)
- return (0);
-
- /* The file offset must be page aligned. */
- if (foff & PAGE_MASK)
- return (EINVAL);
-
- if ((flags & MAP_FIXED) == 0) {
- /* The address is just a hint */
- fitit = TRUE;
- *addr = round_page(*addr);
- } else {
- /*
- * Use the specified address exactly
- * (but check alignment first).
- */
- fitit = FALSE;
- if (*addr & PAGE_MASK)
- return (EINVAL);
- (void)vm_deallocate(map, *addr, size);
- }
-
- /*
- * Lookup/allocate pager. All except an unnamed anonymous lookup
- * gain a reference to ensure continued existance of the object.
- * (XXX the exception is to appease the pageout daemon)
- */
- if (flags & MAP_ANON) {
- type = PG_DFLT;
- foff = 0;
- } else {
- vp = (struct vnode *)handle;
- if (vp->v_type == VCHR) {
- type = PG_DEVICE;
- handle = (caddr_t)(long)vp->v_rdev;
- } else
- type = PG_VNODE;
- }
- pager = vm_pager_allocate(type, handle, size, prot, foff);
- if (pager == NULL)
- return (type == PG_DEVICE ? EINVAL : ENOMEM);
- /*
- * Find object and release extra reference gained by lookup
- */
- object = vm_object_lookup(pager);
- vm_object_deallocate(object);
-
- /*
- * Anonymous memory.
- */
- if (flags & MAP_ANON) {
- rv = vm_allocate_with_pager(map, addr, size, fitit,
- pager, foff, TRUE);
- if (rv != KERN_SUCCESS) {
- if (handle == NULL)
- vm_pager_deallocate(pager);
- else
- vm_object_deallocate(object);
- goto out;
- }
- /*
- * Don't cache anonymous objects.
- * Loses the reference gained by vm_pager_allocate.
- * Note that object will be NULL when handle == NULL,
- * this is ok since vm_allocate_with_pager has made
- * sure that these objects are uncached.
- */
- (void) pager_cache(object, FALSE);
-#ifdef DEBUG
- if (mmapdebug & MDB_MAPIT)
- printf("vm_mmap(%d): ANON *addr %lx size %lx pager %p\n",
- curproc->p_pid, *addr, size, pager);
-#endif
- }
- /*
- * Must be a mapped file.
- * Distinguish between character special and regular files.
- */
- else if (vp->v_type == VCHR) {
- rv = vm_allocate_with_pager(map, addr, size, fitit,
- pager, foff, FALSE);
- /*
- * Uncache the object and lose the reference gained
- * by vm_pager_allocate(). If the call to
- * vm_allocate_with_pager() was sucessful, then we
- * gained an additional reference ensuring the object
- * will continue to exist. If the call failed then
- * the deallocate call below will terminate the
- * object which is fine.
- */
- (void) pager_cache(object, FALSE);
- if (rv != KERN_SUCCESS)
- goto out;
- }
- /*
- * A regular file
- */
- else {
-#ifdef DEBUG
- if (object == NULL)
- printf("vm_mmap: no object: vp %p, pager %p\n",
- vp, pager);
-#endif
- /*
- * Map it directly.
- * Allows modifications to go out to the vnode.
- */
- if (flags & MAP_SHARED) {
- rv = vm_allocate_with_pager(map, addr, size,
- fitit, pager,
- foff, FALSE);
- if (rv != KERN_SUCCESS) {
- vm_object_deallocate(object);
- goto out;
- }
- /*
- * Don't cache the object. This is the easiest way
- * of ensuring that data gets back to the filesystem
- * because vnode_pager_deallocate() will fsync the
- * vnode. pager_cache() will lose the extra ref.
- */
- if (prot & VM_PROT_WRITE)
- pager_cache(object, FALSE);
- else
- vm_object_deallocate(object);
- }
- /*
- * Copy-on-write of file. Two flavors.
- * MAP_COPY is true COW, you essentially get a snapshot of
- * the region at the time of mapping. MAP_PRIVATE means only
- * that your changes are not reflected back to the object.
- * Changes made by others will be seen.
- */
- else {
- vm_map_t tmap;
- vm_offset_t off;
-
- /* locate and allocate the target address space */
- vm_map_lock(map);
- if (fitit) {
- /*
- * Find space in the map at a location
- * that is compatible with the object/offset
- * we're going to attach there.
- */
- again:
- if (vm_map_findspace(map, *addr, size,
- addr) == 1) {
- rv = KERN_NO_SPACE;
- } else {
-#ifdef PMAP_PREFER
- PMAP_PREFER(foff, addr);
-#endif
- rv = vm_map_insert(map, NULL,
- (vm_offset_t)0,
- *addr, *addr+size);
- /*
- * vm_map_insert() may fail if
- * PMAP_PREFER() has altered
- * the initial address.
- * If so, we start again.
- */
- if (rv == KERN_NO_SPACE)
- goto again;
- }
- } else {
- rv = vm_map_insert(map, NULL, (vm_offset_t)0,
- *addr, *addr + size);
-
-#ifdef DEBUG
- /*
- * Check against PMAP preferred address. If
- * there's a mismatch, these pages should not
- * be shared with others. <howto?>
- */
- if (rv == KERN_SUCCESS &&
- (mmapdebug & MDB_MAPIT)) {
- vm_offset_t paddr = *addr;
-#ifdef PMAP_PREFER
- PMAP_PREFER(foff, &paddr);
-#endif
- if (paddr != *addr)
- printf(
- "vm_mmap: pmap botch! "
- "[foff %lx, addr %lx, paddr %lx]\n",
- foff, *addr, paddr);
- }
-#endif
- }
- vm_map_unlock(map);
-
- if (rv != KERN_SUCCESS) {
- vm_object_deallocate(object);
- goto out;
- }
- tmap = vm_map_create(pmap_create(size), VM_MIN_ADDRESS,
- VM_MIN_ADDRESS+size, TRUE);
- off = VM_MIN_ADDRESS;
- rv = vm_allocate_with_pager(tmap, &off, size,
- FALSE, pager,
- foff, FALSE);
- if (rv != KERN_SUCCESS) {
- vm_object_deallocate(object);
- vm_map_deallocate(tmap);
- goto out;
- }
- /*
- * (XXX)
- * MAP_PRIVATE implies that we see changes made by
- * others. To ensure that we need to guarentee that
- * no copy object is created (otherwise original
- * pages would be pushed to the copy object and we
- * would never see changes made by others). We
- * totally sleeze it right now by marking the object
- * internal temporarily.
- */
- if ((flags & MAP_COPY) == 0)
- object->flags |= OBJ_INTERNAL;
- rv = vm_map_copy(map, tmap, *addr, size, off,
- FALSE, FALSE);
- object->flags &= ~OBJ_INTERNAL;
- /*
- * (XXX)
- * My oh my, this only gets worse...
- * Force creation of a shadow object so that
- * vm_map_fork will do the right thing.
- */
- if ((flags & MAP_COPY) == 0) {
- vm_map_t tmap;
- vm_map_entry_t tentry;
- vm_object_t tobject;
- vm_offset_t toffset;
- vm_prot_t tprot;
- boolean_t twired, tsu;
-
- tmap = map;
- vm_map_lookup(&tmap, *addr, VM_PROT_WRITE,
- &tentry, &tobject, &toffset,
- &tprot, &twired, &tsu);
- vm_map_lookup_done(tmap, tentry);
- }
- /*
- * (XXX)
- * Map copy code cannot detect sharing unless a
- * sharing map is involved. So we cheat and write
- * protect everything ourselves.
- */
- vm_object_pmap_copy(object, foff, foff + size);
- vm_object_deallocate(object);
- vm_map_deallocate(tmap);
- if (rv != KERN_SUCCESS)
- goto out;
- }
-#ifdef DEBUG
- if (mmapdebug & MDB_MAPIT)
- printf("vm_mmap(%d): FILE *addr %lx size %lx pager %p\n",
- curproc->p_pid, *addr, size, pager);
-#endif
- }
- /*
- * Correct protection (default is VM_PROT_ALL).
- * If maxprot is different than prot, we must set both explicitly.
- */
- rv = KERN_SUCCESS;
- if (maxprot != VM_PROT_ALL)
- rv = vm_map_protect(map, *addr, *addr+size, maxprot, TRUE);
- if (rv == KERN_SUCCESS && prot != maxprot)
- rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE);
- if (rv != KERN_SUCCESS) {
- (void) vm_deallocate(map, *addr, size);
- goto out;
- }
- /*
- * Shared memory is also shared with children.
- */
- if (flags & MAP_SHARED) {
- rv = vm_map_inherit(map, *addr, *addr+size, VM_INHERIT_SHARE);
- if (rv != KERN_SUCCESS) {
- (void) vm_deallocate(map, *addr, size);
- goto out;
- }
- }
-out:
-#ifdef DEBUG
- if (mmapdebug & MDB_MAPIT)
- printf("vm_mmap: rv %d\n", rv);
-#endif
- switch (rv) {
- case KERN_SUCCESS:
- return (0);
- case KERN_INVALID_ADDRESS:
- case KERN_NO_SPACE:
- return (ENOMEM);
- case KERN_PROTECTION_FAILURE:
- return (EACCES);
- default:
- return (EINVAL);
- }
-}
-
-int
-sys_mlockall(p, v, retval)
- struct proc *p;
- void *v;
- register_t *retval;
-{
-#if 0
- struct sys_mlockall_args /* {
- syscallarg(int) flags;
- } */ *uap = v;
-#endif
-
- return (EOPNOTSUPP);
-}
-
-int
-sys_munlockall(p, v, retval)
- struct proc *p;
- void *v;
- register_t *retval;
-{
-
- return (EOPNOTSUPP);
-}
-
-
- \ No newline at end of file
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
deleted file mode 100644
index 8336d2acb79..00000000000
--- a/sys/vm/vm_object.c
+++ /dev/null
@@ -1,1887 +0,0 @@
-/* $OpenBSD: vm_object.c,v 1.22 1998/04/25 07:17:21 niklas Exp $ */
-/* $NetBSD: vm_object.c,v 1.46 1997/03/30 20:56:12 mycroft Exp $ */
-
-/*-
- * Copyright (c) 1997 Charles M. Hannum. All rights reserved.
- * Copyright (c) 1997 Niklas Hallqvist. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Charles M. Hannum.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * Copyright (c) 1991, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * The Mach Operating System project at Carnegie-Mellon University.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)vm_object.c 8.7 (Berkeley) 5/11/95
- *
- *
- * Copyright (c) 1987, 1990 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Authors: Avadis Tevanian, Jr., Michael Wayne Young
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*
- * Virtual memory object module.
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/proc.h>
-
-#include <vm/vm.h>
-#include <vm/vm_page.h>
-#include <vm/vm_pageout.h>
-
-/*
- * Virtual memory objects maintain the actual data
- * associated with allocated virtual memory. A given
- * page of memory exists within exactly one object.
- *
- * An object is only deallocated when all "references"
- * are given up. Only one "reference" to a given
- * region of an object should be writeable.
- *
- * Associated with each object is a list of all resident
- * memory pages belonging to that object; this list is
- * maintained by the "vm_page" module, and locked by the object's
- * lock.
- *
- * Each object also records a "pager" routine which is
- * used to retrieve (and store) pages to the proper backing
- * storage. In addition, objects may be backed by other
- * objects from which they were virtual-copied.
- *
- * The only items within the object structure which are
- * modified after time of creation are:
- * reference count locked by object's lock
- * pager routine locked by object's lock
- *
- */
-
-struct vm_object kernel_object_store;
-struct vm_object kmem_object_store;
-
-#define VM_OBJECT_HASH_COUNT 157
-
-extern int vm_cache_max; /* now in param.c */
-struct vm_object_hash_head vm_object_hashtable[VM_OBJECT_HASH_COUNT];
-
-long object_collapses = 0;
-long object_bypasses = 0;
-boolean_t vm_object_collapse_allowed = TRUE;
-
-#ifndef VMDEBUG
-#define VMDEBUG 0
-#endif
-
-#ifdef DEBUG
-#define VMDEBUG_SHADOW 0x1
-#define VMDEBUG_SHADOW_VERBOSE 0x2
-#define VMDEBUG_COLLAPSE 0x4
-#define VMDEBUG_COLLAPSE_PAGEIN 0x8
-int vmdebug = VMDEBUG;
-#endif
-
-void _vm_object_allocate __P((vm_size_t, vm_object_t));
-int vm_object_bypass __P((vm_object_t));
-void vm_object_collapse_internal __P((vm_object_t, vm_object_t *));
-int vm_object_overlay __P((vm_object_t));
-int vm_object_remove_from_pager
- __P((vm_object_t, vm_offset_t, vm_offset_t));
-void vm_object_set_shadow __P((vm_object_t, vm_object_t));
-
-/*
- * vm_object_init:
- *
- * Initialize the VM objects module.
- */
-void
-vm_object_init(size)
- vm_size_t size;
-{
- register int i;
-
- TAILQ_INIT(&vm_object_cached_list);
- TAILQ_INIT(&vm_object_list);
- vm_object_count = 0;
- simple_lock_init(&vm_cache_lock);
- simple_lock_init(&vm_object_list_lock);
-
- for (i = 0; i < VM_OBJECT_HASH_COUNT; i++)
- TAILQ_INIT(&vm_object_hashtable[i]);
-
- kernel_object = &kernel_object_store;
- _vm_object_allocate(size, kernel_object);
-
- kmem_object = &kmem_object_store;
- _vm_object_allocate(VM_KMEM_SIZE + VM_MBUF_SIZE, kmem_object);
-}
-
-/*
- * vm_object_allocate:
- *
- * Returns a new object with the given size.
- */
-vm_object_t
-vm_object_allocate(size)
- vm_size_t size;
-{
- register vm_object_t result;
-
- result = (vm_object_t)malloc((u_long)sizeof *result, M_VMOBJ,
- M_WAITOK);
-
- _vm_object_allocate(size, result);
-
- return(result);
-}
-
-void
-_vm_object_allocate(size, object)
- vm_size_t size;
- register vm_object_t object;
-{
- TAILQ_INIT(&object->memq);
- vm_object_lock_init(object);
- object->ref_count = 1;
- object->resident_page_count = 0;
- object->size = size;
- object->flags = OBJ_INTERNAL; /* vm_allocate_with_pager will reset */
- object->paging_in_progress = 0;
- object->copy = NULL;
-
- /*
- * Object starts out read-write, with no pager.
- */
-
- object->pager = NULL;
- object->paging_offset = 0;
- object->shadow = NULL;
- object->shadow_offset = (vm_offset_t) 0;
- LIST_INIT(&object->shadowers);
-
- simple_lock(&vm_object_list_lock);
- TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
- vm_object_count++;
- cnt.v_nzfod += atop(size);
- simple_unlock(&vm_object_list_lock);
-}
-
-/*
- * vm_object_reference:
- *
- * Gets another reference to the given object.
- */
-void
-vm_object_reference(object)
- register vm_object_t object;
-{
- if (object == NULL)
- return;
-
- vm_object_lock(object);
- object->ref_count++;
- vm_object_unlock(object);
-}
-
-/*
- * vm_object_deallocate:
- *
- * Release a reference to the specified object,
- * gained either through a vm_object_allocate
- * or a vm_object_reference call. When all references
- * are gone, storage associated with this object
- * may be relinquished.
- *
- * No object may be locked.
- */
-void
-vm_object_deallocate(object)
- vm_object_t object;
-{
- /*
- * While "temp" is used for other things as well, we
- * initialize it to NULL here for being able to check
- * if we are in the first revolution of the loop.
- */
- vm_object_t temp = NULL;
-
- while (object != NULL) {
-
- /*
- * The cache holds a reference (uncounted) to the object; we
- * must lock it before removing the object.
- */
-
- vm_object_cache_lock();
-
- /*
- * Lose the reference
- */
- vm_object_lock(object);
- if (--(object->ref_count) != 0) {
- vm_object_unlock(object);
- vm_object_cache_unlock();
-
- /*
- * If this is a deallocation of a shadow reference
- * (which it is unless it's the first time round) and
- * this operation made us singly-shadowed, try to
- * collapse us with our shadower. Otherwise we're
- * ready.
- */
- if (temp != NULL &&
- (temp = object->shadowers.lh_first) != NULL &&
- temp->shadowers_list.le_next == NULL) {
- vm_object_lock(temp);
-
- /*
- * This is a bit tricky: the temp object can
- * go away while collapsing, check the
- * vm_object_collapse_internal comments for
- * details. In this case we get an object
- * back to deallocate (it's done like this
- * to prevent potential recursion and hence
- * kernel stack overflow). In the normal case
- * we won't get an object back, if so, we are
- * ready and may return.
- */
- vm_object_collapse_internal(temp, &object);
- if (object != NULL) {
- vm_object_lock(object);
- vm_object_cache_lock();
- } else {
- vm_object_unlock(temp);
- return;
- }
- } else
- return;
- }
-
- /*
- * See if this object can persist. If so, enter it in the
- * cache, then deactivate all of its pages.
- */
- if (object->flags & OBJ_CANPERSIST) {
-
- TAILQ_INSERT_TAIL(&vm_object_cached_list, object,
- cached_list);
- vm_object_cached++;
- vm_object_cache_unlock();
-
- vm_object_deactivate_pages(object);
- vm_object_unlock(object);
-
- vm_object_cache_trim();
- return;
- }
-
- /*
- * Make sure no one can look us up now.
- */
- vm_object_remove(object->pager);
- vm_object_cache_unlock();
-
- /*
- * Deallocate the object, and move on to the backing object.
- */
- temp = object->shadow;
- vm_object_reference(temp);
- vm_object_terminate(object);
- object = temp;
- }
-}
-
-
-/*
- * vm_object_terminate actually destroys the specified object, freeing
- * up all previously used resources.
- *
- * The object must be locked.
- */
-void
-vm_object_terminate(object)
- register vm_object_t object;
-{
- register vm_page_t p;
- vm_object_t shadow_object;
-
- /*
- * Protect against simultaneous collapses.
- */
- object->flags |= OBJ_FADING;
-
- /*
- * Wait until the pageout daemon is through with the object or a
- * potential collapse operation is finished.
- */
- vm_object_paging_wait(object,"vmterm");
-
- /*
- * Detach the object from its shadow if we are the shadow's
- * copy.
- */
- if ((shadow_object = object->shadow) != NULL) {
- vm_object_lock(shadow_object);
- vm_object_set_shadow(object, NULL);
- if (shadow_object->copy == object)
- shadow_object->copy = NULL;
-#if 0
- else if (shadow_object->copy != NULL)
- panic("vm_object_terminate: "
- "copy/shadow inconsistency");
-#endif
- vm_object_unlock(shadow_object);
- }
-
- /*
- * If not an internal object clean all the pages, removing them
- * from paging queues as we go.
- *
- * XXX need to do something in the event of a cleaning error.
- */
- if ((object->flags & OBJ_INTERNAL) == 0)
- (void) vm_object_page_clean(object, 0, 0, TRUE, TRUE);
-
- /*
- * Now free the pages.
- * For internal objects, this also removes them from paging queues.
- */
- while ((p = object->memq.tqh_first) != NULL) {
- VM_PAGE_CHECK(p);
- vm_page_lock_queues();
- vm_page_free(p);
- cnt.v_pfree++;
- vm_page_unlock_queues();
- }
- if ((object->flags & OBJ_INTERNAL) != 0)
- vm_object_unlock(object);
-
- /*
- * Let the pager know object is dead.
- */
- if (object->pager != NULL)
- vm_pager_deallocate(object->pager);
-
- simple_lock(&vm_object_list_lock);
- TAILQ_REMOVE(&vm_object_list, object, object_list);
- vm_object_count--;
- simple_unlock(&vm_object_list_lock);
-
- /*
- * Free the space for the object.
- */
- free((caddr_t)object, M_VMOBJ);
-}
-
-/*
- * vm_object_page_clean
- *
- * Clean all dirty pages in the specified range of object.
- * If syncio is TRUE, page cleaning is done synchronously.
- * If de_queue is TRUE, pages are removed from any paging queue
- * they were on, otherwise they are left on whatever queue they
- * were on before the cleaning operation began.
- *
- * Odd semantics: if start == end, we clean everything.
- *
- * The object must be locked.
- *
- * Returns TRUE if all was well, FALSE if there was a pager error
- * somewhere. We attempt to clean (and dequeue) all pages regardless
- * of where an error occurs.
- */
-boolean_t
-vm_object_page_clean(object, start, end, syncio, de_queue)
- register vm_object_t object;
- register vm_offset_t start;
- register vm_offset_t end;
- boolean_t syncio;
- boolean_t de_queue;
-{
- register vm_page_t p;
- int onqueue = 0;
- boolean_t noerror = TRUE;
-
- if (object == NULL)
- return (TRUE);
-
- /*
- * If it is an internal object and there is no pager, attempt to
- * allocate one. Note that vm_object_collapse may relocate one
- * from a collapsed object so we must recheck afterward.
- */
- if ((object->flags & OBJ_INTERNAL) && object->pager == NULL) {
- vm_object_collapse(object);
- if (object->pager == NULL) {
- vm_pager_t pager;
-
- vm_object_unlock(object);
- pager = vm_pager_allocate(PG_DFLT, (caddr_t)0,
- object->size, VM_PROT_ALL, (vm_offset_t)0);
- if (pager)
- vm_object_setpager(object, pager, 0, FALSE);
- vm_object_lock(object);
- }
- }
- if (object->pager == NULL)
- return (FALSE);
-
-again:
- /*
- * Wait until the pageout daemon is through with the object.
- */
- vm_object_paging_wait(object,"vclean");
-
- /*
- * Loop through the object page list cleaning as necessary.
- */
- for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) {
- if ((start == end || (p->offset >= start && p->offset < end)) &&
- !(p->flags & PG_FICTITIOUS)) {
- if ((p->flags & PG_CLEAN) &&
- pmap_is_modified(VM_PAGE_TO_PHYS(p)))
- p->flags &= ~PG_CLEAN;
- /*
- * Remove the page from any paging queue.
- * This needs to be done if either we have been
- * explicitly asked to do so or it is about to
- * be cleaned (see comment below).
- */
- if (de_queue || !(p->flags & PG_CLEAN)) {
- vm_page_lock_queues();
- if (p->flags & PG_ACTIVE) {
- TAILQ_REMOVE(&vm_page_queue_active,
- p, pageq);
- p->flags &= ~PG_ACTIVE;
- cnt.v_active_count--;
- onqueue = 1;
- } else if (p->flags & PG_INACTIVE) {
- TAILQ_REMOVE(&vm_page_queue_inactive,
- p, pageq);
- p->flags &= ~PG_INACTIVE;
- cnt.v_inactive_count--;
- onqueue = -1;
- } else
- onqueue = 0;
- vm_page_unlock_queues();
- }
- /*
- * To ensure the state of the page doesn't change
- * during the clean operation we do two things.
- * First we set the busy bit and write-protect all
- * mappings to ensure that write accesses to the
- * page block (in vm_fault). Second, we remove
- * the page from any paging queue to foil the
- * pageout daemon (vm_pageout_scan).
- */
- pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_READ);
- if (!(p->flags & PG_CLEAN)) {
- p->flags |= PG_BUSY;
- vm_object_paging_begin(object);
- vm_object_unlock(object);
- /*
- * XXX if put fails we mark the page as
- * clean to avoid an infinite loop.
- * Will loose changes to the page.
- */
- if (vm_pager_put(object->pager, p, syncio)) {
- printf("%s: pager_put error\n",
- "vm_object_page_clean");
- p->flags |= PG_CLEAN;
- noerror = FALSE;
- }
- vm_object_lock(object);
- vm_object_paging_end(object);
- if (!de_queue && onqueue) {
- vm_page_lock_queues();
- if (onqueue > 0)
- vm_page_activate(p);
- else
- vm_page_deactivate(p);
- vm_page_unlock_queues();
- }
- p->flags &= ~PG_BUSY;
- PAGE_WAKEUP(p);
- goto again;
- }
- }
- }
- return (noerror);
-}
-
-/*
- * vm_object_deactivate_pages
- *
- * Deactivate all pages in the specified object. (Keep its pages
- * in memory even though it is no longer referenced.)
- *
- * The object must be locked.
- */
-void
-vm_object_deactivate_pages(object)
- register vm_object_t object;
-{
- register vm_page_t p, next;
-
- for (p = object->memq.tqh_first; p != NULL; p = next) {
- next = p->listq.tqe_next;
- vm_page_lock_queues();
- if (p->flags & PG_ACTIVE)
- vm_page_deactivate(p);
- vm_page_unlock_queues();
- }
-}
-
-/*
- * Trim the object cache to size.
- */
-void
-vm_object_cache_trim()
-{
- register vm_object_t object;
-
- vm_object_cache_lock();
- while (vm_object_cached > vm_cache_max) {
- object = vm_object_cached_list.tqh_first;
- vm_object_cache_unlock();
-
- if (object != vm_object_lookup(object->pager))
- panic("vm_object_cache_trim: I'm sooo confused.");
-
- pager_cache(object, FALSE);
-
- vm_object_cache_lock();
- }
- vm_object_cache_unlock();
-}
-
-/*
- * vm_object_pmap_copy:
- *
- * Makes all physical pages in the specified
- * object range copy-on-write. No writeable
- * references to these pages should remain.
- *
- * The object must *not* be locked.
- */
-void
-vm_object_pmap_copy(object, start, end)
- register vm_object_t object;
- register vm_offset_t start;
- register vm_offset_t end;
-{
- register vm_page_t p;
-
- if (object == NULL)
- return;
-
- vm_object_lock(object);
- for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) {
- if ((start <= p->offset) && (p->offset < end)) {
- pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_READ);
- p->flags |= PG_COPYONWRITE;
- }
- }
- vm_object_unlock(object);
-}
-
-/*
- * vm_object_pmap_remove:
- *
- * Removes all physical pages in the specified
- * object range from all physical maps.
- *
- * The object must *not* be locked.
- */
-void
-vm_object_pmap_remove(object, start, end)
- register vm_object_t object;
- register vm_offset_t start;
- register vm_offset_t end;
-{
- register vm_page_t p;
-
- if (object == NULL)
- return;
-
- vm_object_lock(object);
- for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next)
- if ((start <= p->offset) && (p->offset < end))
- pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
- vm_object_unlock(object);
-}
-
-/*
- * vm_object_copy:
- *
- * Create a new object which is a copy of an existing
- * object, and mark all of the pages in the existing
- * object 'copy-on-write'. The new object has one reference.
- * Returns the new object.
- *
- * May defer the copy until later if the object is not backed
- * up by a non-default pager.
- */
-void
-vm_object_copy(src_object, src_offset, size,
- dst_object, dst_offset, src_needs_copy)
- register vm_object_t src_object;
- vm_offset_t src_offset;
- vm_size_t size;
- vm_object_t *dst_object; /* OUT */
- vm_offset_t *dst_offset; /* OUT */
- boolean_t *src_needs_copy; /* OUT */
-{
- register vm_object_t new_copy;
- register vm_object_t old_copy;
- vm_offset_t new_start, new_end;
-
- register vm_page_t p;
-
- if (src_object == NULL) {
- /*
- * Nothing to copy
- */
- *dst_object = NULL;
- *dst_offset = 0;
- *src_needs_copy = FALSE;
- return;
- }
-
- /*
- * If the object's pager is null_pager or the
- * default pager, we don't have to make a copy
- * of it. Instead, we set the needs copy flag and
- * make a shadow later.
- */
-
- vm_object_lock(src_object);
- if (src_object->pager == NULL ||
- (src_object->flags & OBJ_INTERNAL)) {
-
- /*
- * Make another reference to the object.
- */
- src_object->ref_count++;
-
- /*
- * Mark all of the pages copy-on-write.
- */
- for (p = src_object->memq.tqh_first; p; p = p->listq.tqe_next)
- if (src_offset <= p->offset &&
- p->offset < src_offset + size)
- p->flags |= PG_COPYONWRITE;
- vm_object_unlock(src_object);
-
- *dst_object = src_object;
- *dst_offset = src_offset;
-
- /*
- * Must make a shadow when write is desired
- */
- *src_needs_copy = TRUE;
- return;
- }
-
- /*
- * Try to collapse the object before copying it.
- */
- vm_object_collapse(src_object);
-
- /*
- * If the object has a pager, the pager wants to
- * see all of the changes. We need a copy-object
- * for the changed pages.
- *
- * If there is a copy-object, and it is empty,
- * no changes have been made to the object since the
- * copy-object was made. We can use the same copy-
- * object.
- */
-
-Retry1:
- old_copy = src_object->copy;
- if (old_copy != NULL) {
- /*
- * Try to get the locks (out of order)
- */
- if (!vm_object_lock_try(old_copy)) {
- vm_object_unlock(src_object);
-
- /* XXX should spin a bit here... */
- vm_object_lock(src_object);
- goto Retry1;
- }
-
- if (old_copy->resident_page_count == 0 &&
- old_copy->pager == NULL) {
- /*
- * Return another reference to
- * the existing copy-object.
- */
- old_copy->ref_count++;
- vm_object_unlock(old_copy);
- vm_object_unlock(src_object);
- *dst_object = old_copy;
- *dst_offset = src_offset;
- *src_needs_copy = FALSE;
- return;
- }
- vm_object_unlock(old_copy);
- }
- vm_object_unlock(src_object);
-
- /*
- * If the object has a pager, the pager wants
- * to see all of the changes. We must make
- * a copy-object and put the changed pages there.
- *
- * The copy-object is always made large enough to
- * completely shadow the original object, since
- * it may have several users who want to shadow
- * the original object at different points.
- */
-
- new_copy = vm_object_allocate(src_object->size);
-
-Retry2:
- vm_object_lock(src_object);
- /*
- * Copy object may have changed while we were unlocked
- */
- old_copy = src_object->copy;
- if (old_copy != NULL) {
- /*
- * Try to get the locks (out of order)
- */
- if (!vm_object_lock_try(old_copy)) {
- vm_object_unlock(src_object);
- goto Retry2;
- }
-
- /*
- * Consistency check
- */
- if (old_copy->shadow != src_object ||
- old_copy->shadow_offset != (vm_offset_t) 0)
- panic("vm_object_copy: copy/shadow inconsistency");
-
- /*
- * Make the old copy-object shadow the new one.
- * It will receive no more pages from the original
- * object. Locking of new_copy not needed. We
- * have the only pointer.
- */
- vm_object_set_shadow(old_copy, new_copy);
- vm_object_unlock(old_copy);
- }
-
- /* Always shadow original at 0 for the whole object */
- new_start = (vm_offset_t)0;
- new_end = (vm_offset_t)new_copy->size;
-
- /*
- * Point the new copy at the existing object.
- */
-
- vm_object_set_shadow(new_copy, src_object);
- new_copy->shadow_offset = new_start;
- src_object->copy = new_copy;
-
- /*
- * Mark all the affected pages of the existing object
- * copy-on-write.
- */
- for (p = src_object->memq.tqh_first; p != NULL; p = p->listq.tqe_next)
- if ((new_start <= p->offset) && (p->offset < new_end))
- p->flags |= PG_COPYONWRITE;
-
- vm_object_unlock(src_object);
-
- *dst_object = new_copy;
- *dst_offset = src_offset - new_start;
- *src_needs_copy = FALSE;
-}
-
-/*
- * vm_object_shadow:
- *
- * Create a new object which is backed by the
- * specified existing object range. The source
- * object reference is deallocated.
- *
- * The new object and offset into that object
- * are returned in the source parameters.
- *
- * The old object should not be locked.
- */
-void
-vm_object_shadow(object, offset, length)
- vm_object_t *object; /* IN/OUT */
- vm_offset_t *offset; /* IN/OUT */
- vm_size_t length;
-{
- register vm_object_t source;
- register vm_object_t result;
-
- source = *object;
-
-#ifdef DIAGNOSTIC
- if (source == NULL)
- panic("vm_object_shadow: attempt to shadow null object");
-#endif
-
- /*
- * Allocate a new object with the given length
- */
- if ((result = vm_object_allocate(length)) == NULL)
- panic("vm_object_shadow: no object for shadowing");
-
- /*
- * The new object shadows the source object. Our caller changes his
- * reference to point to the new object, removing a reference to the
- * source object.
- */
- vm_object_lock(source);
- vm_object_set_shadow(result, source);
- source->ref_count--;
- vm_object_unlock(source);
-
- /*
- * Store the offset into the source object,
- * and fix up the offset into the new object.
- */
- result->shadow_offset = *offset;
-
- /*
- * Return the new things
- */
- *offset = 0;
- *object = result;
-}
-
-/*
- * Set the specified object's pager to the specified pager.
- */
-void
-vm_object_setpager(object, pager, paging_offset, read_only)
- vm_object_t object;
- vm_pager_t pager;
- vm_offset_t paging_offset;
- boolean_t read_only;
-{
-#ifdef lint
- read_only++; /* No longer used */
-#endif
-
- vm_object_lock(object); /* XXX ? */
- object->pager = pager;
- object->paging_offset = paging_offset;
- vm_object_unlock(object); /* XXX ? */
-}
-
-/*
- * vm_object_hash hashes the pager/id pair.
- */
-
-#define vm_object_hash(pager) \
- (((unsigned long)pager)%VM_OBJECT_HASH_COUNT)
-
-/*
- * vm_object_lookup looks in the object cache for an object with the
- * specified pager and paging id.
- */
-vm_object_t
-vm_object_lookup(pager)
- vm_pager_t pager;
-{
- register vm_object_hash_entry_t entry;
- vm_object_t object;
-
- vm_object_cache_lock();
-
- for (entry = vm_object_hashtable[vm_object_hash(pager)].tqh_first;
- entry != NULL;
- entry = entry->hash_links.tqe_next) {
- object = entry->object;
- if (object->pager == pager) {
- vm_object_lock(object);
- if (object->ref_count == 0) {
- TAILQ_REMOVE(&vm_object_cached_list, object,
- cached_list);
- vm_object_cached--;
- }
- object->ref_count++;
- vm_object_unlock(object);
- vm_object_cache_unlock();
- return(object);
- }
- }
-
- vm_object_cache_unlock();
- return(NULL);
-}
-
-/*
- * vm_object_enter enters the specified object/pager/id into
- * the hash table.
- */
-
-void
-vm_object_enter(object, pager)
- vm_object_t object;
- vm_pager_t pager;
-{
- struct vm_object_hash_head *bucket;
- register vm_object_hash_entry_t entry;
-
- /*
- * We don't cache null objects, and we can't cache
- * objects with the null pager.
- */
-
- if (object == NULL)
- return;
- if (pager == NULL)
- return;
-
- bucket = &vm_object_hashtable[vm_object_hash(pager)];
- entry = (vm_object_hash_entry_t)
- malloc((u_long)sizeof *entry, M_VMOBJHASH, M_WAITOK);
- entry->object = object;
- object->flags |= OBJ_CANPERSIST;
-
- vm_object_cache_lock();
- TAILQ_INSERT_TAIL(bucket, entry, hash_links);
- vm_object_cache_unlock();
-}
-
-/*
- * vm_object_remove:
- *
- * Remove the pager from the hash table.
- * Note: This assumes that the object cache
- * is locked. XXX this should be fixed
- * by reorganizing vm_object_deallocate.
- */
-void
-vm_object_remove(pager)
- register vm_pager_t pager;
-{
- struct vm_object_hash_head *bucket;
- register vm_object_hash_entry_t entry;
- register vm_object_t object;
-
- bucket = &vm_object_hashtable[vm_object_hash(pager)];
-
- for (entry = bucket->tqh_first;
- entry != NULL;
- entry = entry->hash_links.tqe_next) {
- object = entry->object;
- if (object->pager == pager) {
- TAILQ_REMOVE(bucket, entry, hash_links);
- free((caddr_t)entry, M_VMOBJHASH);
- break;
- }
- }
-}
-
-/*
- * vm_object_cache_clear removes all objects from the cache.
- */
-void
-vm_object_cache_clear()
-{
- register vm_object_t object;
-
- /*
- * Remove each object in the cache by scanning down the
- * list of cached objects.
- */
- vm_object_cache_lock();
- while ((object = vm_object_cached_list.tqh_first) != NULL) {
- vm_object_cache_unlock();
-
- /*
- * Note: it is important that we use vm_object_lookup
- * to gain a reference, and not vm_object_reference, because
- * the logic for removing an object from the cache lies in
- * lookup.
- */
- if (object != vm_object_lookup(object->pager))
- panic("vm_object_cache_clear: I'm sooo confused.");
- pager_cache(object, FALSE);
-
- vm_object_cache_lock();
- }
- vm_object_cache_unlock();
-}
-
-/*
- * vm_object_remove_from_pager:
- *
- * Tell object's pager that it needn't back the page
- * anymore. If the pager ends up empty, deallocate it.
- */
-int
-vm_object_remove_from_pager(object, from, to)
- vm_object_t object;
- vm_offset_t from, to;
-{
- vm_pager_t pager = object->pager;
- int cnt = 0;
-
- if (pager == NULL)
- return 0;
-
- cnt = vm_pager_remove(pager, from, to);
-
- /* If pager became empty, remove it. */
- if (cnt > 0 && vm_pager_count(pager) == 0) {
- vm_pager_deallocate(pager);
- object->pager = NULL;
- }
- return(cnt);
-}
-
-#define FREE_PAGE(m) do { \
- PAGE_WAKEUP(m); \
- vm_page_lock_queues(); \
- vm_page_free(m); \
- vm_page_unlock_queues(); \
-} while(0)
-
-/*
- * vm_object_overlay:
- *
- * Internal function to vm_object_collapse called when
- * it has been shown that a collapse operation is likely
- * to succeed. We know that the backing object is only
- * referenced by me and that paging is not in progress.
- */
-int
-vm_object_overlay(object)
- vm_object_t object;
-{
- vm_object_t backing_object = object->shadow;
- vm_offset_t backing_offset = object->shadow_offset;
- vm_size_t size = object->size;
- vm_offset_t offset, paged_offset;
- vm_page_t backing_page, page = NULL;
- int rv;
-
-#ifdef DEBUG
- if (vmdebug & VMDEBUG_COLLAPSE)
- printf("vm_object_overlay(0x%p)\n", object);
-#endif
-
- /*
- * Protect against multiple collapses.
- */
- backing_object->flags |= OBJ_FADING;
-
- /*
- * The algorithm used is roughly like this:
- * (1) Trim a potential pager in the backing object so it'll only hold
- * pages in reach.
- * (2) Loop over all the resident pages in the shadow object and
- * either remove them if they are shadowed or move them into the
- * shadowing object.
- * (3) Loop over the paged out pages in the shadow object. Start
- * pageins on those that aren't shadowed, and just deallocate
- * the others. In each iteration check if other users of these
- * objects have caused pageins resulting in new resident pages.
- * This can happen while we are waiting for a page or a pagein of
- * ours. If such resident pages turn up, restart from (2).
- */
-
- /*
- * As a first measure we know we can discard everything that the
- * shadowing object doesn't shadow.
- */
- if (backing_object->pager != NULL) {
- if (backing_offset > 0)
- vm_object_remove_from_pager(backing_object, 0,
- backing_offset);
- if (backing_offset + size < backing_object->size)
- vm_object_remove_from_pager(backing_object,
- backing_offset + size, backing_object->size);
- }
-
- /*
- * At this point, there may still be asynchronous paging in the parent
- * object. Any pages being paged in will be represented by fake pages.
- * There are three cases:
- * 1) The page is being paged in from the parent object's own pager.
- * In this case, we just delete our copy, since it's not needed.
- * 2) The page is being paged in from the backing object. We prevent
- * this case by waiting for paging to complete on the backing object
- * before continuing.
- * 3) The page is being paged in from a backing object behind the one
- * we're deleting. We'll never notice this case, because the
- * backing object we're deleting won't have the page.
- */
-
- vm_object_unlock(object);
-retry:
- vm_object_paging_wait(backing_object,"vpagew");
-
- /*
- * While we were asleep, the parent object might have been deleted. If
- * so, the backing object will now have only one reference (the one we
- * hold). If this happened, just deallocate the backing object and
- * return failure status so vm_object_collapse() will stop. This will
- * continue vm_object_deallocate() where it stopped due to our
- * reference.
- */
- if (backing_object->ref_count == 1)
- goto fail;
- vm_object_lock(object);
-
- /*
- * Next, get rid of resident pages in the backing object. We can
- * guarantee to remove every page thus we can write the while-test like
- * this.
- */
- while ((backing_page = backing_object->memq.tqh_first) != NULL) {
- offset = backing_page->offset - backing_offset;
-
-#ifdef DIAGNOSTIC
- if (backing_page->flags & (PG_BUSY | PG_FAKE))
- panic("vm_object_overlay: "
- "busy or fake page in backing_object");
-#endif
-
- /*
- * If the page is outside the shadowing object's range or if
- * the page is shadowed (either by a resident page or a paged
- * out one) we can discard it right away. Otherwise we need to
- * move the page to the shadowing object.
- */
- if (backing_page->offset < backing_offset || offset >= size ||
- ((page = vm_page_lookup(object, offset)) != NULL) ||
- (object->pager != NULL &&
- vm_pager_has_page(object->pager, offset))) {
- /*
- * Just discard the page, noone needs it. This
- * includes removing the possible backing store too.
- */
- if (backing_object->pager != NULL)
- vm_object_remove_from_pager(backing_object,
- backing_page->offset,
- backing_page->offset + PAGE_SIZE);
- vm_page_lock_queues();
- vm_page_free(backing_page);
- vm_page_unlock_queues();
- } else {
- /*
- * If the backing page was ever paged out, it was due
- * to it being dirty at one point. Unless we have no
- * pager allocated to the front object (thus will move
- * forward the shadow's one), mark it dirty again so it
- * won't be thrown away without being paged out to the
- * front pager.
- *
- * XXX
- * Should be able to move a page from one pager to
- * another.
- */
- if (object->pager != NULL &&
- vm_object_remove_from_pager(backing_object,
- backing_page->offset,
- backing_page->offset + PAGE_SIZE))
- backing_page->flags &= ~PG_CLEAN;
-
- /* Move the page up front. */
- vm_page_rename(backing_page, object, offset);
- }
- }
-
- /*
- * If the shadowing object doesn't have a pager the easiest
- * thing to do now is to just move the backing pager up front
- * and everything is done.
- */
- if (object->pager == NULL && backing_object->pager != NULL) {
- object->pager = backing_object->pager;
- object->paging_offset = backing_object->paging_offset +
- backing_offset;
- backing_object->pager = NULL;
- goto done;
- }
-
- /*
- * What's left to do is to find all paged out pages in the
- * backing pager and either discard or move it to the front
- * object. We need to recheck the resident page set as a
- * pagein might have given other threads the chance to, via
- * readfaults, page in another page into the resident set. In
- * this case we need to retry getting rid of pages from core.
- */
- paged_offset = 0;
- while (backing_object->pager != NULL &&
- (paged_offset = vm_pager_next(backing_object->pager,
- paged_offset)) < backing_object->size) {
- offset = paged_offset - backing_offset;
-
- /*
- * If the parent object already has this page, delete it.
- * Otherwise, start a pagein.
- */
- if (((page = vm_page_lookup(object, offset)) == NULL) &&
- (object->pager == NULL ||
- !vm_pager_has_page(object->pager, offset))) {
- vm_object_unlock(object);
-
- /*
- * First allocate a page and mark it busy so another
- * thread won't try to start another pagein.
- */
- backing_page = vm_page_alloc(backing_object,
- paged_offset);
- if (backing_page == NULL) {
- vm_object_unlock(backing_object);
- vm_wait("fVmcollapse");
- vm_object_lock(backing_object);
- goto retry;
- }
- backing_page->flags |= PG_BUSY;
-
-#ifdef DEBUG
- if (vmdebug & VMDEBUG_COLLAPSE_PAGEIN)
- printf("vm_object_overlay: pagein needed\n");
-#endif
-
- /*
- * Second, start paging it in. If this fails,
- * what can we do but punt?
- */
- vm_object_paging_begin(backing_object);
- vm_object_unlock(backing_object);
- cnt.v_pageins++;
- rv = vm_pager_get_pages(backing_object->pager,
- &backing_page, 1, TRUE);
- vm_object_lock(backing_object);
- vm_object_paging_end(backing_object);
-
- /*
- * IO error or page outside the range of the pager:
- * cleanup and return an error.
- */
- if (rv == VM_PAGER_ERROR || rv == VM_PAGER_BAD) {
- FREE_PAGE(backing_page);
- goto fail;
- }
-
- /* Handle the remaining failures. */
- if (rv != VM_PAGER_OK) {
-#ifdef DIAGNOSTIC
- panic("vm_object_overlay: pager returned %d",
- rv);
-#else
- FREE_PAGE(backing_page);
- goto fail;
-#endif
- }
- cnt.v_pgpgin++;
-
- /*
- * Third, relookup in case pager changed page. Pager
- * is responsible for disposition of old page if moved.
- */
- backing_page = vm_page_lookup(backing_object,
- paged_offset);
-
- /*
- * This page was once dirty, otherwise it
- * hadn't been paged out in this shadow object.
- * As we now remove the persistant store of the
- * page, make sure it will be paged out in the
- * front pager by dirtying it.
- */
- backing_page->flags &= ~(PG_FAKE | PG_CLEAN);
-
- /*
- * Fourth, restart the process as we have slept,
- * thereby letting other threads change object's
- * internal structure. Don't be tempted to move it up
- * front here, the parent may be gone already.
- */
- PAGE_WAKEUP(backing_page);
- goto retry;
- }
- vm_object_remove_from_pager(backing_object, paged_offset,
- paged_offset + PAGE_SIZE);
- paged_offset += PAGE_SIZE;
- }
-
-done:
- /*
- * I've seen this condition once in an out of VM situation. For the
- * moment I don't know why it occurred, although I suspect
- * vm_object_page_clean can create a pager even if it won't use it.
- */
- if (backing_object->pager != NULL &&
- vm_pager_count(backing_object->pager) == 0) {
- vm_pager_deallocate(backing_object->pager);
- backing_object->pager = NULL;
- }
-
-#ifdef DIAGNOSTIC
- if (backing_object->pager)
- panic("vm_object_overlay: backing_object->pager remains");
-#endif
-
- /*
- * Object now shadows whatever backing_object did.
- */
- if (backing_object->shadow)
- vm_object_lock(backing_object->shadow);
- vm_object_set_shadow(object, backing_object->shadow);
- if (backing_object->shadow)
- vm_object_unlock(backing_object->shadow);
- object->shadow_offset += backing_object->shadow_offset;
- if (object->shadow != NULL && object->shadow->copy != NULL)
- panic("vm_object_overlay: we collapsed a copy-object!");
-
-#ifdef DIAGNOSTIC
- if (backing_object->ref_count != 1)
- panic("vm_object_overlay: backing_object still referenced");
-#endif
-
- object_collapses++;
- return KERN_SUCCESS;
-
-fail:
- backing_object->flags &= ~OBJ_FADING;
- return KERN_FAILURE;
-}
-
-/*
- * vm_object_bypass:
- *
- * Internal function to vm_object_collapse called when collapsing
- * the object with its backing one is not allowed but there may
- * be an opportunity to bypass the backing object and shadow the
- * next object in the chain instead.
- *
- * If all of the pages in the backing object are shadowed by the parent
- * object, the parent object no longer has to shadow the backing
- * object; it can shadow the next one in the chain.
- */
-int
-vm_object_bypass(object)
- vm_object_t object;
-{
- vm_object_t backing_object = object->shadow;
- vm_offset_t backing_offset = object->shadow_offset;
- vm_offset_t offset, new_offset;
- vm_page_t p, pp;
-
- /*
- * XXX Punt if paging is going on. The issues in this case need to be
- * looked into more closely. For now play it safe and return. There's
- * no need to wait for it to end, as the expense will be much higher
- * than the gain.
- */
- if (vm_object_paging(backing_object))
- return KERN_FAILURE;
-
- /*
- * Should have a check for a 'small' number of pages here.
- */
- for (p = backing_object->memq.tqh_first; p != NULL;
- p = p->listq.tqe_next) {
- new_offset = p->offset - backing_offset;
-
- /*
- * If the parent has a page here, or if this page falls outside
- * the parent, keep going.
- *
- * Otherwise, the backing_object must be left in the chain.
- */
- if (p->offset >= backing_offset && new_offset < object->size &&
- ((pp = vm_page_lookup(object, new_offset)) == NULL ||
- (pp->flags & PG_FAKE)) &&
- (object->pager == NULL ||
- !vm_pager_has_page(object->pager, new_offset)))
- /*
- * Page still needed. Can't go any further.
- */
- return KERN_FAILURE;
- }
-
- if (backing_object->pager) {
- /*
- * Should have a check for a 'small' number of pages here.
- */
- for (offset = vm_pager_next(backing_object->pager, 0);
- offset < backing_object->size;
- offset = vm_pager_next(backing_object->pager,
- offset + PAGE_SIZE)) {
- new_offset = offset - backing_offset;
-
- /*
- * If the parent has a page here, or if this page falls
- * outside the parent, keep going.
- *
- * Otherwise, the backing_object must be left in the
- * chain.
- */
- if (offset >= backing_offset &&
- new_offset < object->size &&
- ((pp = vm_page_lookup(object, new_offset)) ==
- NULL || (pp->flags & PG_FAKE)) &&
- (object->pager == NULL ||
- !vm_pager_has_page(object->pager, new_offset)))
- /*
- * Page still needed. Can't go any further.
- */
- return KERN_FAILURE;
- }
- }
-
- /*
- * Object now shadows whatever backing_object did.
- */
- if (backing_object->shadow)
- vm_object_lock(backing_object->shadow);
- vm_object_set_shadow(object, backing_object->shadow);
- if (backing_object->shadow)
- vm_object_unlock(backing_object->shadow);
- object->shadow_offset += backing_object->shadow_offset;
-
- /*
- * Backing object might have had a copy pointer to us. If it did,
- * clear it.
- */
- if (backing_object->copy == object)
- backing_object->copy = NULL;
-
- object_bypasses++;
- return KERN_SUCCESS;
-}
-
-/*
- * vm_object_collapse:
- *
- * Collapse an object with the object backing it. Pages in the backing object
- * are moved into the parent, and the backing object is deallocated.
- *
- * Requires that the object be locked and the page queues be unlocked.
- */
-void
-vm_object_collapse(object)
- vm_object_t object;
-
-{
- vm_object_collapse_internal(object, NULL);
-}
-
-/*
- * An internal to vm_object.c entry point to the collapsing logic, used by
- * vm_object_deallocate to get rid of a potential recursion case. In that case
- * an object to be deallocated is fed back via the retry_object pointer.
- * External users will have that parameter wired to NULL, and then we are
- * allowed to do vm_object_deallocate calls that may mutually recursive call us
- * again. In that case it will only get one level deep and thus not be a real
- * recursion.
- */
-void
-vm_object_collapse_internal(object, retry_object)
- vm_object_t object, *retry_object;
-{
- register vm_object_t backing_object;
- int rv;
-
- /* We'd better initialize this one if the pointer is given. */
- if (retry_object)
- *retry_object = NULL;
-
- if (!vm_object_collapse_allowed || object == NULL)
- return;
-
- do {
- /*
- * Verify that the conditions are right for collapse:
- *
- * There is a backing object, and
- */
- if ((backing_object = object->shadow) == NULL)
- return;
-
- vm_object_lock(backing_object);
-
- /*
- * ... the backing object is not read_only, is internal and is
- * not already being collapsed, ...
- */
- if ((backing_object->flags & (OBJ_INTERNAL | OBJ_FADING)) !=
- OBJ_INTERNAL) {
- vm_object_unlock(backing_object);
- return;
- }
-
- /*
- * The backing object can't be a copy-object: the shadow_offset
- * for the copy-object must stay as 0. Furthermore (for the
- * we have all the pages' case), if we bypass backing_object
- * and just shadow the next object in the chain, old pages from
- * that object would then have to be copied BOTH into the
- *(former) backing_object and into the parent object.
- */
- if (backing_object->shadow != NULL &&
- backing_object->shadow->copy != NULL) {
- vm_object_unlock(backing_object);
- return;
- }
-
- /*
- * Grab a reference to the backing object so that it
- * can't be deallocated behind our back.
- */
- backing_object->ref_count++;
-
-#ifdef DIAGNOSTIC
- if (backing_object->ref_count == 1)
- panic("vm_object_collapse: "
- "collapsing unreferenced object");
-#endif
-
- /*
- * If there is exactly one reference to the backing object, we
- * can collapse it into the parent, otherwise we might be able
- * to bypass it completely.
- */
- rv = backing_object->ref_count == 2 ?
- vm_object_overlay(object) : vm_object_bypass(object);
-
- /*
- * Unlock and note we're ready with the backing object. If
- * we are now the last referrer this will also deallocate the
- * object itself. If the backing object has been orphaned
- * and still have a shadow (it is possible in case of
- * KERN_FAILURE from vm_object_overlay) this might lead to a
- * recursion. However, if we are called from
- * vm_object_deallocate, retry_object is not NULL and we are
- * allowed to feedback the current backing object via that
- * pointer. That way the recursion case turns into an
- * iteration in vm_object_deallcate instead.
- */
- if (retry_object != NULL && backing_object->ref_count == 1 &&
- backing_object->shadow != NULL) {
- *retry_object = backing_object;
- vm_object_unlock(backing_object);
- return;
- }
- vm_object_unlock(backing_object);
- vm_object_deallocate(backing_object);
-
- /*
- * Try again with this object's new backing object.
- */
- } while (rv == KERN_SUCCESS);
-}
-
-/*
- * vm_object_page_remove: [internal]
- *
- * Removes all physical pages in the specified
- * object range from the object's list of pages.
- *
- * The object must be locked.
- */
-void
-vm_object_page_remove(object, start, end)
- register vm_object_t object;
- register vm_offset_t start;
- register vm_offset_t end;
-{
- register vm_page_t p, next;
-
- if (object == NULL)
- return;
-
- for (p = object->memq.tqh_first; p != NULL; p = next) {
- next = p->listq.tqe_next;
- if ((start <= p->offset) && (p->offset < end)) {
- pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
- vm_page_lock_queues();
- vm_page_free(p);
- vm_page_unlock_queues();
- }
- }
-}
-
-/*
- * Routine: vm_object_coalesce
- * Function: Coalesces two objects backing up adjoining
- * regions of memory into a single object.
- *
- * returns TRUE if objects were combined.
- *
- * NOTE: Only works at the moment if the second object is NULL -
- * if it's not, which object do we lock first?
- *
- * Parameters:
- * prev_object First object to coalesce
- * prev_offset Offset into prev_object
- * next_object Second object into coalesce
- * next_offset Offset into next_object
- *
- * prev_size Size of reference to prev_object
- * next_size Size of reference to next_object
- *
- * Conditions:
- * The object must *not* be locked.
- */
-boolean_t
-vm_object_coalesce(prev_object, next_object, prev_offset, next_offset,
- prev_size, next_size)
- register vm_object_t prev_object;
- vm_object_t next_object;
- vm_offset_t prev_offset, next_offset;
- vm_size_t prev_size, next_size;
-{
- vm_size_t newsize;
-
-#ifdef lint
- next_offset++;
-#endif
-
- if (next_object != NULL) {
- return(FALSE);
- }
-
- if (prev_object == NULL) {
- return(TRUE);
- }
-
- vm_object_lock(prev_object);
-
- /*
- * Try to collapse the object first
- */
- vm_object_collapse(prev_object);
-
- /*
- * Can't coalesce if:
- * . more than one reference
- * . paged out
- * . shadows another object
- * . has a copy elsewhere
- * (any of which mean that the pages not mapped to
- * prev_entry may be in use anyway)
- */
-
- if (prev_object->ref_count > 1 || prev_object->pager != NULL ||
- prev_object->shadow != NULL || prev_object->copy != NULL) {
- vm_object_unlock(prev_object);
- return(FALSE);
- }
-
- /*
- * Remove any pages that may still be in the object from
- * a previous deallocation.
- */
- vm_object_page_remove(prev_object, prev_offset + prev_size,
- prev_offset + prev_size + next_size);
-
- /*
- * Extend the object if necessary.
- */
- newsize = prev_offset + prev_size + next_size;
- if (newsize > prev_object->size)
- prev_object->size = newsize;
-
- vm_object_unlock(prev_object);
- return(TRUE);
-}
-
-/*
- * vm_object_print: [ debug ]
- */
-void
-vm_object_print(object, full)
- vm_object_t object;
- boolean_t full;
-{
- _vm_object_print(object, full, printf);
-}
-
-void
-_vm_object_print(object, full, pr)
- vm_object_t object;
- boolean_t full;
- int (*pr) __P((const char *, ...));
-{
- register vm_page_t p;
- char *delim;
- vm_object_t o;
- register int count;
- extern int indent;
-
- if (object == NULL)
- return;
-
- iprintf(pr, "Object 0x%p: size=0x%lx, res=%d, ref=%d, ", object,
- (long)object->size, object->resident_page_count,
- object->ref_count);
- (*pr)("pager=%p+0x%lx, shadow=(%p)+0x%lx\n", object->pager,
- (long)object->paging_offset, object->shadow,
- (long)object->shadow_offset);
- (*pr)("shadowers=(");
- delim = "";
- for (o = object->shadowers.lh_first; o;
- o = o->shadowers_list.le_next) {
- (*pr)("%s0x%p", delim, o);
- delim = ", ";
- };
- (*pr)(")\n");
- (*pr)("cache: next=0x%p, prev=0x%p\n", object->cached_list.tqe_next,
- object->cached_list.tqe_prev);
-
- if (!full)
- return;
-
- indent += 2;
- count = 0;
- for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) {
- if (count == 0)
- iprintf(pr, "memory:=");
- else if (count == 6) {
- (*pr)("\n");
- iprintf(pr, " ...");
- count = 0;
- } else
- (*pr)(",");
- count++;
-
- (*pr)("(off=0x%lx,page=0x%lx)", (long)p->offset,
- (long)VM_PAGE_TO_PHYS(p));
- }
- if (count != 0)
- (*pr)("\n");
- indent -= 2;
-}
-
-/*
- * vm_object_set_shadow:
- *
- * Maintain the shadow graph so that back-link consistency is always kept.
- *
- * Assumes both objects as well as the old shadow to be locked (unless NULL
- * of course).
- */
-void
-vm_object_set_shadow(object, shadow)
- vm_object_t object, shadow;
-{
- vm_object_t old_shadow = object->shadow;
-
-#ifdef DEBUG
- if (vmdebug & VMDEBUG_SHADOW)
- printf("vm_object_set_shadow(object=0x%p, shadow=0x%p) "
- "old_shadow=0x%p\n", object, shadow, old_shadow);
- if (vmdebug & VMDEBUG_SHADOW_VERBOSE) {
- vm_object_print(object, 0);
- vm_object_print(old_shadow, 0);
- vm_object_print(shadow, 0);
- }
-#endif
- if (old_shadow == shadow)
- return;
- if (old_shadow) {
- old_shadow->ref_count--;
- LIST_REMOVE(object, shadowers_list);
- }
- if (shadow) {
- shadow->ref_count++;
- LIST_INSERT_HEAD(&shadow->shadowers, object, shadowers_list);
- }
- object->shadow = shadow;
-#ifdef DEBUG
- if (vmdebug & VMDEBUG_SHADOW_VERBOSE) {
- vm_object_print(object, 0);
- vm_object_print(old_shadow, 0);
- vm_object_print(shadow, 0);
- }
-#endif
-}
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
deleted file mode 100644
index 6f38260b6e1..00000000000
--- a/sys/vm/vm_page.c
+++ /dev/null
@@ -1,1881 +0,0 @@
-/* $OpenBSD: vm_page.c,v 1.18 2000/05/27 18:31:35 art Exp $ */
-/* $NetBSD: vm_page.c,v 1.41 1998/02/08 18:24:52 thorpej Exp $ */
-
-#define VM_PAGE_ALLOC_MEMORY_STATS
-
-/*-
- * Copyright (c) 1997 The NetBSD Foundation, Inc.
- * All rights reserved.
- *
- * This code is derived from software contributed to The NetBSD Foundation
- * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
- * NASA Ames Research Center.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the NetBSD
- * Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * Copyright (c) 1991, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * The Mach Operating System project at Carnegie-Mellon University.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)vm_page.c 8.3 (Berkeley) 3/21/94
- *
- *
- * Copyright (c) 1987, 1990 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Authors: Avadis Tevanian, Jr., Michael Wayne Young
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*
- * Resident memory management module.
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/proc.h>
-#include <sys/malloc.h>
-
-#include <vm/vm.h>
-#include <vm/vm_page.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_map.h>
-#include <vm/vm_pageout.h>
-
-#include <machine/cpu.h>
-
-#define VERY_LOW_MEM() (cnt.v_free_count <= vm_page_free_reserved)
-#define KERN_OBJ(object) ((object) == kernel_object || (object) == kmem_object)
-
-int vm_page_free_reserved = 10;
-
-#if defined(MACHINE_NEW_NONCONTIG)
-
-/*
- * physical memory config is stored in vm_physmem.
- */
-
-struct vm_physseg vm_physmem[VM_PHYSSEG_MAX];
-int vm_nphysseg = 0;
-static int vm_page_lost_count = 0; /* XXXCDC: DEBUG DEBUG */
-
-#endif
-
-#if defined(MACHINE_NONCONTIG) || defined(MACHINE_NEW_NONCONTIG)
-/*
- * These variables record the values returned by vm_page_bootstrap,
- * for debugging purposes.
- *
- * The implementation of vm_bootstrap_steal_memory here also uses
- * them internally.
- */
-static vm_offset_t virtual_space_start;
-static vm_offset_t virtual_space_end;
-
-vm_offset_t vm_bootstrap_steal_memory __P((vm_size_t));
-#endif
-
-/*
- * Associated with page of user-allocatable memory is a
- * page structure.
- */
-
-struct pglist *vm_page_buckets; /* Array of buckets */
-int vm_page_bucket_count = 0; /* How big is array? */
-int vm_page_hash_mask; /* Mask for hash function */
-simple_lock_data_t bucket_lock; /* lock for all buckets XXX */
-#if defined(MACHINE_NEW_NONCONTIG)
-struct pglist vm_page_bootbucket; /* bootstrap bucket */
-#endif
-
-struct pglist vm_page_queue_free;
-struct pglist vm_page_queue_active;
-struct pglist vm_page_queue_inactive;
-simple_lock_data_t vm_page_queue_lock;
-simple_lock_data_t vm_page_queue_free_lock;
-
-/* has physical page allocation been initialized? */
-boolean_t vm_page_startup_initialized;
-
-vm_page_t vm_page_array;
-#if defined(MACHINE_NEW_NONCONTIG)
- /* NOTHING NEEDED HERE */
-#elif defined(MACHINE_NONCONTIG)
-/* OLD NONCONTIG CODE: NUKE NUKE NUKE ONCE CONVERTED */
-u_long first_page;
-int vm_page_count;
-#else
-/* OLD NCONTIG CODE: NUKE NUKE NUKE ONCE CONVERTED */
-long first_page;
-long last_page;
-vm_offset_t first_phys_addr;
-vm_offset_t last_phys_addr;
-int vm_page_count;
-#endif
-vm_size_t page_mask;
-int page_shift;
-
-#if defined(MACHINE_NEW_NONCONTIG)
-/*
- * local prototypes
- */
-
-#if !defined(PMAP_STEAL_MEMORY)
-static boolean_t vm_page_physget __P((vm_offset_t *));
-#endif
-#endif
-
-/*
- * macros
- */
-
-/*
- * vm_page_hash:
- *
- * Distributes the object/offset key pair among hash buckets.
- *
- * NOTE: This macro depends on vm_page_bucket_count being a power of 2.
- */
-#define vm_page_hash(object, offset) \
- (((unsigned long)object+(unsigned long)atop(offset))&vm_page_hash_mask)
-
-/*
- * vm_set_page_size:
- *
- * Sets the page size, perhaps based upon the memory
- * size. Must be called before any use of page-size
- * dependent functions.
- *
- * Sets page_shift and page_mask from cnt.v_page_size.
- */
-void
-vm_set_page_size()
-{
-
- if (cnt.v_page_size == 0)
- cnt.v_page_size = DEFAULT_PAGE_SIZE;
- page_mask = cnt.v_page_size - 1;
- if ((page_mask & cnt.v_page_size) != 0)
- panic("vm_set_page_size: page size not a power of two");
- for (page_shift = 0; ; page_shift++)
- if ((1 << page_shift) == cnt.v_page_size)
- break;
-}
-
-#if defined(MACHINE_NEW_NONCONTIG)
-/*
- * vm_page_bootstrap: initialize the resident memory module (called
- * from vm_mem_init()).
- *
- * - startp and endp are out params which return the boundaries of the
- * free part of the kernel's virtual address space.
- */
-void
-vm_page_bootstrap(startp, endp)
- vm_offset_t *startp, *endp; /* OUT, OUT */
-{
- vm_offset_t paddr;
- vm_page_t pagearray;
- int lcv, freepages, pagecount, n, i;
-
- /*
- * first init all the locks and queues.
- */
- simple_lock_init(&vm_page_queue_free_lock);
- simple_lock_init(&vm_page_queue_lock);
- TAILQ_INIT(&vm_page_queue_free);
- TAILQ_INIT(&vm_page_queue_active);
- TAILQ_INIT(&vm_page_queue_inactive);
-
- /*
- * init the <OBJ,OFFSET> => <PAGE> hash table buckets. for now
- * we just have one bucket (the bootstrap bucket). later on we
- * will malloc() new buckets as we dynamically resize the hash table.
- */
- vm_page_bucket_count = 1;
- vm_page_hash_mask = 0;
- vm_page_buckets = &vm_page_bootbucket;
- TAILQ_INIT(vm_page_buckets);
- simple_lock_init(&bucket_lock);
-
- /*
- * before calling this function the MD code is expected to register
- * some free RAM with the vm_page_physload() function. our job
- * now is to allocate vm_page structures for this preloaded memory.
- */
- if (vm_nphysseg == 0)
- panic("vm_page_bootstrap: no memory pre-allocated");
-
- /*
- * first calculate the number of free pages... note that start/end
- * are inclusive so you have to add one to get the number of pages.
- *
- * note that we use start/end rather than avail_start/avail_end.
- * this allows us to allocate extra vm_page structures in case we
- * want to return some memory to the pool after booting.
- */
- freepages = 0;
- for (lcv = 0; lcv < vm_nphysseg; lcv++)
- freepages += (vm_physmem[lcv].end - vm_physmem[lcv].start);
-
- /*
- * we now know we have (PAGE_SIZE * freepages) bytes of memory we can
- * use. for each page of memory we use we need a vm_page structure.
- * thus, the total number of pages we can use is the total size of
- * the memory divided by the PAGE_SIZE plus the size of the vm_page
- * structure. we add one to freepages as a fudge factor to avoid
- * truncation errors (since we can only allocate in terms of whole
- * pages).
- */
- pagecount = (PAGE_SIZE * (freepages + 1)) /
- (PAGE_SIZE + sizeof(struct vm_page));
- pagearray = (vm_page_t)
- vm_bootstrap_steal_memory(pagecount * sizeof(struct vm_page));
- bzero(pagearray, pagecount * sizeof(struct vm_page));
-
- /*
- * now init the page frames
- */
- for (lcv = 0; lcv < vm_nphysseg; lcv++) {
-
- n = vm_physmem[lcv].end - vm_physmem[lcv].start;
- if (n > pagecount) {
- printf("vm_page_bootstrap: lost %d page(s) in init\n",
- n - pagecount);
- vm_page_lost_count += (n - pagecount);
- n = pagecount;
- }
-
- /* set up page array pointers */
- vm_physmem[lcv].pgs = pagearray;
- pagearray += n;
- pagecount -= n;
- vm_physmem[lcv].lastpg = vm_physmem[lcv].pgs + (n - 1);
-
- /* init and free vm_pages (we've already bzero'd them) */
- paddr = ptoa(vm_physmem[lcv].start);
- for (i = 0; i < n; i++, paddr += PAGE_SIZE) {
- vm_physmem[lcv].pgs[i].phys_addr = paddr;
- if (atop(paddr) >= vm_physmem[lcv].avail_start &&
- atop(paddr) <= vm_physmem[lcv].avail_end)
- vm_page_free(&vm_physmem[lcv].pgs[i]);
- }
- }
-
- /*
- * pass up the values of virtual_space_start and virtual_space_end
- * (obtained by vm_bootstrap_steal_memory) to the upper layers of
- * the VM.
- */
- *startp = round_page(virtual_space_start);
- *endp = trunc_page(virtual_space_end);
-
- /*
- * init pagedaemon lock
- */
- simple_lock_init(&vm_pages_needed_lock);
-}
-
-/*
- * vm_bootstrap_steal_memory: steal memory from physmem for bootstrapping
- */
-vm_offset_t
-vm_bootstrap_steal_memory(size)
- vm_size_t size;
-{
-#if defined(PMAP_STEAL_MEMORY)
- vm_offset_t addr;
-
- /*
- * Defer this to machine-dependent code; we may need to allocate
- * from a direct-mapped segment.
- */
- addr = pmap_steal_memory(size, &virtual_space_start,
- &virtual_space_end);
-
- /* round it the way we like it */
- virtual_space_start = round_page(virtual_space_start);
- virtual_space_end = trunc_page(virtual_space_end);
-
- return (addr);
-#else /* ! PMAP_STEAL_MEMORY */
- vm_offset_t addr, vaddr, paddr;
-
- /* round to page size */
- size = round_page(size);
-
- /*
- * on first call to this function init ourselves. we detect this
- * by checking virtual_space_start/end which are in the zero'd BSS
- * area.
- */
- if (virtual_space_start == virtual_space_end) {
- pmap_virtual_space(&virtual_space_start, &virtual_space_end);
-
- /* round it the way we like it */
- virtual_space_start = round_page(virtual_space_start);
- virtual_space_end = trunc_page(virtual_space_end);
- }
-
- /*
- * allocate virtual memory for this request
- */
- addr = virtual_space_start;
- virtual_space_start += size;
-
- /*
- * allocate and mapin physical pages to back new virtual pages
- */
- for (vaddr = round_page(addr); vaddr < addr + size;
- vaddr += PAGE_SIZE) {
- if (!vm_page_physget(&paddr))
- panic("vm_bootstrap_steal_memory: out of memory");
-
- /* XXX: should be wired, but some pmaps don't like that ... */
- pmap_enter(pmap_kernel(), vaddr, paddr,
- VM_PROT_READ|VM_PROT_WRITE, FALSE, 0);
- }
- return(addr);
-#endif /* PMAP_STEAL_MEMORY */
-}
-
-#if !defined(PMAP_STEAL_MEMORY)
-/*
- * vm_page_physget: "steal" one page from the vm_physmem structure.
- *
- * - attempt to allocate it off the end of a segment in which the "avail"
- * values match the start/end values. if we can't do that, then we
- * will advance both values (making them equal, and removing some
- * vm_page structures from the non-avail area).
- * - return false if out of memory.
- */
-static boolean_t
-vm_page_physget(paddrp)
- vm_offset_t *paddrp;
-
-{
- int lcv, x;
-
- /* pass 1: try allocating from a matching end */
-#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
- for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
-#else
- for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
-#endif
- {
- if (vm_physmem[lcv].pgs)
- panic("vm_page_physget: called _after_ bootstrap");
-
- /* try from front */
- if (vm_physmem[lcv].avail_start == vm_physmem[lcv].start &&
- vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
- *paddrp = ptoa(vm_physmem[lcv].avail_start);
- vm_physmem[lcv].avail_start++;
- vm_physmem[lcv].start++;
-
- /* nothing left? nuke it */
- if (vm_physmem[lcv].avail_start ==
- vm_physmem[lcv].end) {
- if (vm_nphysseg == 1)
- panic("vm_page_physget: out of memory!");
- vm_nphysseg--;
- for (x = lcv; x < vm_nphysseg; x++)
- /* structure copy */
- vm_physmem[x] = vm_physmem[x+1];
- }
- return(TRUE);
- }
-
- /* try from rear */
- if (vm_physmem[lcv].avail_end == vm_physmem[lcv].end &&
- vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
- *paddrp = ptoa(vm_physmem[lcv].avail_end - 1);
- vm_physmem[lcv].avail_end--;
- vm_physmem[lcv].end--;
-
- /* nothing left? nuke it */
- if (vm_physmem[lcv].avail_end ==
- vm_physmem[lcv].start) {
- if (vm_nphysseg == 1)
- panic("vm_page_physget: out of memory!");
- vm_nphysseg--;
- for (x = lcv; x < vm_nphysseg; x++)
- /* structure copy */
- vm_physmem[x] = vm_physmem[x+1];
- }
- return(TRUE);
- }
- }
-
- /* pass2: forget about matching ends, just allocate something */
-#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
- for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
-#else
- for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
-#endif
- {
- /* any room in this bank? */
- if (vm_physmem[lcv].avail_start >= vm_physmem[lcv].avail_end)
- continue; /* nope */
-
- *paddrp = ptoa(vm_physmem[lcv].avail_start);
- vm_physmem[lcv].avail_start++;
- vm_physmem[lcv].start = vm_physmem[lcv].avail_start; /* truncate! */
-
- /* nothing left? nuke it */
- if (vm_physmem[lcv].avail_start == vm_physmem[lcv].end) {
- if (vm_nphysseg == 1)
- panic("vm_page_physget: out of memory!");
- vm_nphysseg--;
- for (x = lcv; x < vm_nphysseg; x++)
- vm_physmem[x] = vm_physmem[x+1]; /* structure copy */
- }
- return(TRUE);
- }
-
- return(FALSE); /* whoops! */
-}
-#endif /* ! PMAP_STEAL_MEMORY */
-
-/*
- * vm_page_physload: load physical memory into VM system
- *
- * - all args are PFs
- * - all pages in start/end get vm_page structures
- * - areas marked by avail_start/avail_end get added to the free page pool
- * - we are limited to VM_PHYSSEG_MAX physical memory segments
- */
-void
-vm_page_physload(start, end, avail_start, avail_end)
- vm_offset_t start, end, avail_start, avail_end;
-{
- struct vm_page *pgs;
- struct vm_physseg *ps;
- int preload, lcv, npages;
-#if (VM_PHYSSEG_STRAT != VM_PSTRAT_RANDOM)
- int x;
-#endif
-
- if (page_shift == 0)
- panic("vm_page_physload: page size not set!");
-
- /*
- * do we have room?
- */
- if (vm_nphysseg == VM_PHYSSEG_MAX) {
- printf("vm_page_physload: unable to load physical memory segment\n");
- printf("\t%d segments allocated, ignoring 0x%lx -> 0x%lx\n",
- VM_PHYSSEG_MAX, start, end);
- return;
- }
-
- /*
- * check to see if this is a "preload" (i.e. vm_mem_init hasn't been
- * called yet, so malloc is not available).
- */
- for (lcv = 0; lcv < vm_nphysseg; lcv++) {
- if (vm_physmem[lcv].pgs)
- break;
- }
- preload = (lcv == vm_nphysseg);
-
- /*
- * if VM is already running, attempt to malloc() vm_page structures
- */
- if (!preload) {
-#if defined(VM_PHYSSEG_NOADD)
- panic("vm_page_physload: tried to add RAM after vm_mem_init");
-#else
-/* XXXCDC: need some sort of lockout for this case */
- vm_offset_t paddr;
-
- /* # of pages */
- npages = end - start;
- MALLOC(pgs, struct vm_page *, sizeof(struct vm_page) * npages,
- M_VMPGDATA, M_NOWAIT);
- if (pgs == NULL) {
- printf("vm_page_physload: "
- "can not malloc vm_page structs for segment\n"
- "\tignoring 0x%lx -> 0x%lx\n", start, end);
- return;
- }
- /* zero data, init phys_addr, and free pages */
- bzero(pgs, sizeof(struct vm_page) * npages);
- for (lcv = 0, paddr = ptoa(start); lcv < npages;
- lcv++, paddr += PAGE_SIZE) {
- pgs[lcv].phys_addr = paddr;
- if (atop(paddr) >= avail_start &&
- atop(paddr) <= avail_end)
- vm_page_free(&pgs[lcv]);
- }
-/* XXXCDC: incomplete: need to update v_free_count, what else?
- v_free_count is updated in vm_page_free, actualy */
-/* XXXCDC: need hook to tell pmap to rebuild pv_list, etc... */
-#endif
- } else {
- /* XXX/gcc complains if these don't get init'd */
- pgs = NULL;
- npages = 0;
- }
-
- /*
- * now insert us in the proper place in vm_physmem[]
- */
-#if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM)
- /* random: put it at the end (easy!) */
- ps = &vm_physmem[vm_nphysseg];
-
-#else
-#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
-
- /* sort by address for binary search */
- for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
- if (start < vm_physmem[lcv].start)
- break;
- ps = &vm_physmem[lcv];
-
- /* move back other entries, if necessary ... */
- for (x = vm_nphysseg ; x > lcv ; x--)
- /* structure copy */
- vm_physmem[x] = vm_physmem[x - 1];
-
-#else
-#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
-
- /* sort by largest segment first */
- for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
- if ((end - start) >
- (vm_physmem[lcv].end - vm_physmem[lcv].start))
- break;
- ps = &vm_physmem[lcv];
-
- /* move back other entries, if necessary ... */
- for (x = vm_nphysseg ; x > lcv ; x--)
- /* structure copy */
- vm_physmem[x] = vm_physmem[x - 1];
-
-#else
-
- panic("vm_page_physload: unknown physseg strategy selected!");
-
-#endif
-#endif
-#endif
-
- ps->start = start;
- ps->end = end;
- ps->avail_start = avail_start;
- ps->avail_end = avail_end;
- if (preload) {
- ps->pgs = NULL;
- } else {
- ps->pgs = pgs;
- ps->lastpg = pgs + npages - 1;
- }
- vm_nphysseg++;
-
- /*
- * done!
- */
- return;
-}
-
-/*
- * vm_page_physrehash: reallocate hash table based on number of
- * free pages.
- */
-void
-vm_page_physrehash()
-{
- struct pglist *newbuckets, *oldbuckets;
- struct vm_page *pg;
- int freepages, lcv, bucketcount, s, oldcount;
-
- /*
- * compute number of pages that can go in the free pool
- */
- freepages = 0;
- for (lcv = 0; lcv < vm_nphysseg; lcv++)
- freepages = freepages + (vm_physmem[lcv].avail_end -
- vm_physmem[lcv].avail_start);
-
- /*
- * compute number of buckets needed for this number of pages
- */
- bucketcount = 1;
- while (bucketcount < freepages)
- bucketcount = bucketcount * 2;
-
- /*
- * malloc new buckets
- */
- MALLOC(newbuckets, struct pglist*, sizeof(struct pglist) * bucketcount,
- M_VMPBUCKET, M_NOWAIT);
- if (newbuckets == NULL) {
- printf("vm_page_physrehash: "
- "WARNING: could not grow page hash table\n");
- return;
- }
- for (lcv = 0; lcv < bucketcount; lcv++)
- TAILQ_INIT(&newbuckets[lcv]);
-
- /*
- * now replace the old buckets with the new ones and rehash everything
- */
- s = splimp();
- simple_lock(&bucket_lock);
- /* swap old for new ... */
- oldbuckets = vm_page_buckets;
- oldcount = vm_page_bucket_count;
- vm_page_buckets = newbuckets;
- vm_page_bucket_count = bucketcount;
- vm_page_hash_mask = bucketcount - 1; /* power of 2 */
-
- /* ... and rehash */
- for (lcv = 0 ; lcv < oldcount ; lcv++) {
- while ((pg = oldbuckets[lcv].tqh_first) != NULL) {
- TAILQ_REMOVE(&oldbuckets[lcv], pg, hashq);
- TAILQ_INSERT_TAIL(&vm_page_buckets[
- vm_page_hash(pg->object, pg->offset)], pg, hashq);
- }
- }
- simple_unlock(&bucket_lock);
- splx(s);
-
- /*
- * free old bucket array if we malloc'd it previously
- */
- if (oldbuckets != &vm_page_bootbucket)
- FREE(oldbuckets, M_VMPBUCKET);
-
- /*
- * done
- */
- return;
-}
-
-#if 1 /* XXXCDC: TMP TMP TMP DEBUG DEBUG DEBUG */
-
-void vm_page_physdump __P((void)); /* SHUT UP GCC */
-
-/* call from DDB */
-void
-vm_page_physdump()
-{
- int lcv;
-
- printf("rehash: physical memory config [segs=%d of %d]:\n",
- vm_nphysseg, VM_PHYSSEG_MAX);
- for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
- printf("0x%lx->0x%lx [0x%lx->0x%lx]\n", vm_physmem[lcv].start,
- vm_physmem[lcv].end, vm_physmem[lcv].avail_start,
- vm_physmem[lcv].avail_end);
- printf("STRATEGY = ");
-
- switch (VM_PHYSSEG_STRAT) {
- case VM_PSTRAT_RANDOM:
- printf("RANDOM\n");
- break;
-
- case VM_PSTRAT_BSEARCH:
- printf("BSEARCH\n");
- break;
-
- case VM_PSTRAT_BIGFIRST:
- printf("BIGFIRST\n");
- break;
-
- default:
- printf("<<UNKNOWN>>!!!!\n");
- }
- printf("number of buckets = %d\n", vm_page_bucket_count);
- printf("number of lost pages = %d\n", vm_page_lost_count);
-}
-#endif
-
-#elif defined(MACHINE_NONCONTIG)
-/* OLD NONCONTIG CODE: NUKE NUKE NUKE ONCE CONVERTED */
-
-/*
- * We implement vm_page_bootstrap and vm_bootstrap_steal_memory with
- * the help of two simpler functions:
- *
- * pmap_virtual_space and pmap_next_page
- */
-
-/*
- * vm_page_bootstrap:
- *
- * Initializes the resident memory module.
- *
- * Allocates memory for the page cells, and
- * for the object/offset-to-page hash table headers.
- * Each page cell is initialized and placed on the free list.
- * Returns the range of available kernel virtual memory.
- */
-void
-vm_page_bootstrap(startp, endp)
- vm_offset_t *startp;
- vm_offset_t *endp;
-{
- unsigned int i, freepages;
- register struct pglist *bucket;
- vm_offset_t paddr;
-
- extern vm_offset_t kentry_data;
- extern vm_size_t kentry_data_size;
-
-
- /*
- * Initialize the locks
- */
- simple_lock_init(&vm_page_queue_free_lock);
- simple_lock_init(&vm_page_queue_lock);
-
- /*
- * Initialize the queue headers for the free queue,
- * the active queue and the inactive queue.
- */
- TAILQ_INIT(&vm_page_queue_free);
- TAILQ_INIT(&vm_page_queue_active);
- TAILQ_INIT(&vm_page_queue_inactive);
-
- /*
- * Pre-allocate maps and map entries that cannot be dynamically
- * allocated via malloc(). The maps include the kernel_map and
- * kmem_map which must be initialized before malloc() will
- * work (obviously). Also could include pager maps which would
- * be allocated before kmeminit.
- *
- * Allow some kernel map entries... this should be plenty
- * since people shouldn't be cluttering up the kernel
- * map (they should use their own maps).
- */
-
- kentry_data_size = round_page(MAX_KMAP*sizeof(struct vm_map) +
- MAX_KMAPENT*sizeof(struct vm_map_entry));
- kentry_data = vm_bootstrap_steal_memory(kentry_data_size);
-
- /*
- * Validate these zone addresses.
- */
- bzero((caddr_t) kentry_data, kentry_data_size);
-
- /*
- * Allocate (and initialize) the virtual-to-physical
- * table hash buckets.
- *
- * The number of buckets MUST BE a power of 2, and
- * the actual value is the next power of 2 greater
- * than the number of physical pages in the system.
- *
- * Note:
- * This computation can be tweaked if desired.
- */
- if (vm_page_bucket_count == 0) {
- unsigned int npages = pmap_free_pages();
-
- vm_page_bucket_count = 1;
- while (vm_page_bucket_count < npages)
- vm_page_bucket_count <<= 1;
- }
-
- vm_page_hash_mask = vm_page_bucket_count - 1;
-
- vm_page_buckets = (struct pglist *)
- vm_bootstrap_steal_memory(vm_page_bucket_count *
- sizeof(*vm_page_buckets));
- bucket = vm_page_buckets;
-
- for (i = vm_page_bucket_count; i--;) {
- TAILQ_INIT(bucket);
- bucket++;
- }
-
- simple_lock_init(&bucket_lock);
-
- /*
- * We calculate how many page frames we will have and
- * then allocate the page structures in one chunk.
- * The calculation is non-trivial. We want:
- *
- * vmpages > (freepages - (vmpages / sizeof(vm_page_t)))
- *
- * ...which, with some algebra, becomes:
- *
- * vmpages > (freepages * sizeof(...) / (1 + sizeof(...)))
- *
- * The value of vm_page_count need not be exact, but must
- * be large enough so vm_page_array handles the index range.
- */
-
- freepages = pmap_free_pages();
- /* Fudge slightly to deal with truncation error. */
- freepages += 1; /* fudge */
-
- vm_page_count = (PAGE_SIZE * freepages) /
- (PAGE_SIZE + sizeof(*vm_page_array));
-
- vm_page_array = (vm_page_t)
- vm_bootstrap_steal_memory(vm_page_count * sizeof(*vm_page_array));
- bzero(vm_page_array, vm_page_count * sizeof(*vm_page_array));
-
-#ifdef DIAGNOSTIC
- /*
- * Initialize everything in case the holes are stepped in,
- * and set PA to something that will cause a panic...
- */
- for (i = 0; i < vm_page_count; i++)
- vm_page_array[i].phys_addr = 0xdeadbeef;
-#endif
-
- /*
- * Initialize the page frames. Note that some page
- * indices may not be usable when pmap_free_pages()
- * counts pages in a hole.
- */
-
- if (!pmap_next_page(&paddr))
- panic("vm_page_bootstrap: can't get first page");
-
- first_page = pmap_page_index(paddr);
- for (i = 0;;) {
- /*
- * Initialize a page array element.
- */
-
- VM_PAGE_INIT(&vm_page_array[i], NULL, NULL);
- vm_page_array[i].phys_addr = paddr;
- vm_page_free(&vm_page_array[i]);
-
- /*
- * Are there any more physical pages?
- */
-
- if (!pmap_next_page(&paddr))
- break;
- i = pmap_page_index(paddr) - first_page;
-
- /*
- * Don't trust pmap_page_index()...
- */
-
- if (
-#if 0
- i < 0 || /* can't happen, i is unsigned */
-#endif
- i >= vm_page_count)
- panic("vm_page_bootstrap: bad i = 0x%x", i);
- }
-
- /*
- * Make sure we have nice, round values.
- */
-
- virtual_space_start = round_page(virtual_space_start);
- virtual_space_end = trunc_page(virtual_space_end);
-
- *startp = virtual_space_start;
- *endp = virtual_space_end;
-
- simple_lock_init(&vm_pages_needed_lock);
-}
-
-vm_offset_t
-vm_bootstrap_steal_memory(size)
- vm_size_t size;
-{
- vm_offset_t addr, vaddr, paddr;
-
- /*
- * We round to page size.
- */
-
- size = round_page(size);
-
- /*
- * If this is the first call to vm_bootstrap_steal_memory,
- * we have to initialize ourself.
- */
-
- if (virtual_space_start == virtual_space_end) {
- pmap_virtual_space(&virtual_space_start, &virtual_space_end);
-
- /*
- * The initial values must be aligned properly, and
- * we don't trust the pmap module to do it right.
- */
-
- virtual_space_start = round_page(virtual_space_start);
- virtual_space_end = trunc_page(virtual_space_end);
- }
-
- /*
- * Allocate virtual memory for this request.
- */
-
- addr = virtual_space_start;
- virtual_space_start += size;
-
- /*
- * Allocate and map physical pages to back new virtual pages.
- */
-
- for (vaddr = round_page(addr);
- vaddr < addr + size;
- vaddr += PAGE_SIZE) {
- if (!pmap_next_page(&paddr))
- panic("vm_bootstrap_steal_memory");
-
- /*
- * XXX Logically, these mappings should be wired,
- * but some pmap modules barf if they are.
- */
-
- pmap_enter(pmap_kernel(), vaddr, paddr,
- VM_PROT_READ|VM_PROT_WRITE, FALSE, 0);
- }
-
- return addr;
-}
-
-#else /* MACHINE_NONCONTIG */
-
-/* OLD CONTIG CODE: NUKE NUKE NUKE ONCE CONVERTED */
-/*
- * vm_page_startup:
- *
- * Initializes the resident memory module.
- *
- * Allocates memory for the page cells, and
- * for the object/offset-to-page hash table headers.
- * Each page cell is initialized and placed on the free list.
- */
-void
-vm_page_startup(start, end)
- vm_offset_t *start;
- vm_offset_t *end;
-{
- register vm_page_t m;
- register struct pglist *bucket;
- int npages;
- int i;
- vm_offset_t pa;
- extern vm_offset_t kentry_data;
- extern vm_size_t kentry_data_size;
-
-
- /*
- * Initialize the locks
- */
- simple_lock_init(&vm_page_queue_free_lock);
- simple_lock_init(&vm_page_queue_lock);
-
- /*
- * Initialize the queue headers for the free queue,
- * the active queue and the inactive queue.
- */
- TAILQ_INIT(&vm_page_queue_free);
- TAILQ_INIT(&vm_page_queue_active);
- TAILQ_INIT(&vm_page_queue_inactive);
-
- /*
- * Calculate the number of hash table buckets.
- *
- * The number of buckets MUST BE a power of 2, and
- * the actual value is the next power of 2 greater
- * than the number of physical pages in the system.
- *
- * Note:
- * This computation can be tweaked if desired.
- */
- if (vm_page_bucket_count == 0) {
- vm_page_bucket_count = 1;
- while (vm_page_bucket_count < atop(*end - *start))
- vm_page_bucket_count <<= 1;
- }
-
- vm_page_hash_mask = vm_page_bucket_count - 1;
-
- /*
- * Allocate (and initialize) the hash table buckets.
- */
- vm_page_buckets = (struct pglist *)
- pmap_bootstrap_alloc(vm_page_bucket_count * sizeof(struct pglist));
- bucket = vm_page_buckets;
-
- for (i = vm_page_bucket_count; i--;) {
- TAILQ_INIT(bucket);
- bucket++;
- }
-
- simple_lock_init(&bucket_lock);
-
- /*
- * Truncate the remainder of physical memory to our page size.
- */
- *end = trunc_page(*end);
-
- /*
- * Pre-allocate maps and map entries that cannot be dynamically
- * allocated via malloc(). The maps include the kernel_map and
- * kmem_map which must be initialized before malloc() will
- * work (obviously). Also could include pager maps which would
- * be allocated before kmeminit.
- *
- * Allow some kernel map entries... this should be plenty
- * since people shouldn't be cluttering up the kernel
- * map (they should use their own maps).
- */
- kentry_data_size = round_page(MAX_KMAP*sizeof(struct vm_map) +
- MAX_KMAPENT*sizeof(struct vm_map_entry));
- kentry_data = (vm_offset_t) pmap_bootstrap_alloc(kentry_data_size);
-
- /*
- * Compute the number of pages of memory that will be
- * available for use (taking into account the overhead
- * of a page structure per page).
- */
- cnt.v_free_count = vm_page_count =
- (*end - *start + sizeof(struct vm_page)) /
- (PAGE_SIZE + sizeof(struct vm_page));
-
- /*
- * Record the extent of physical memory that the
- * virtual memory system manages.
- */
- first_page = *start;
- first_page += vm_page_count * sizeof(struct vm_page);
- first_page = atop(round_page(first_page));
- last_page = first_page + vm_page_count - 1;
-
- first_phys_addr = ptoa(first_page);
- last_phys_addr = ptoa(last_page) + PAGE_MASK;
-
- /*
- * Allocate and clear the mem entry structures.
- */
- m = vm_page_array = (vm_page_t)
- pmap_bootstrap_alloc(vm_page_count * sizeof(struct vm_page));
- bzero(vm_page_array, vm_page_count * sizeof(struct vm_page));
-
- /*
- * Initialize the mem entry structures now, and
- * put them in the free queue.
- */
- pa = first_phys_addr;
- npages = vm_page_count;
- while (npages--) {
- m->flags = PG_FREE;
- m->object = NULL;
- m->phys_addr = pa;
- TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq);
- m++;
- pa += PAGE_SIZE;
- }
-
- /*
- * Initialize vm_pages_needed lock here - don't wait for pageout
- * daemon XXX
- */
- simple_lock_init(&vm_pages_needed_lock);
-
- /* from now on, pmap_bootstrap_alloc can't be used */
- vm_page_startup_initialized = TRUE;
-}
-#endif /* MACHINE_NONCONTIG */
-
-/*
- * vm_page_insert: [ internal use only ]
- *
- * Inserts the given mem entry into the object/object-page
- * table and object list.
- *
- * The object and page must be locked.
- */
-void
-vm_page_insert(mem, object, offset)
- register vm_page_t mem;
- register vm_object_t object;
- register vm_offset_t offset;
-{
- register struct pglist *bucket;
- int spl;
-
- VM_PAGE_CHECK(mem);
-
- if (mem->flags & PG_TABLED)
- panic("vm_page_insert: already inserted");
-
- /*
- * Record the object/offset pair in this page
- */
-
- mem->object = object;
- mem->offset = offset;
-
- /*
- * Insert it into the object_object/offset hash table
- */
-
- bucket = &vm_page_buckets[vm_page_hash(object, offset)];
- spl = splimp();
- simple_lock(&bucket_lock);
- TAILQ_INSERT_TAIL(bucket, mem, hashq);
- simple_unlock(&bucket_lock);
- (void) splx(spl);
-
- /*
- * Now link into the object's list of backed pages.
- */
-
- TAILQ_INSERT_TAIL(&object->memq, mem, listq);
- mem->flags |= PG_TABLED;
-
- /*
- * And show that the object has one more resident
- * page.
- */
-
- object->resident_page_count++;
-}
-
-/*
- * vm_page_remove: [ internal use only ]
- * XXX: used by device pager as well
- *
- * Removes the given mem entry from the object/offset-page
- * table and the object page list.
- *
- * The object and page must be locked.
- */
-void
-vm_page_remove(mem)
- register vm_page_t mem;
-{
- register struct pglist *bucket;
- int spl;
-
- VM_PAGE_CHECK(mem);
-
-#ifdef DIAGNOSTIC
- if (mem->flags & PG_FAULTING)
- panic("vm_page_remove: page is faulting");
-#endif
-
- if (!(mem->flags & PG_TABLED))
- return;
-
- /*
- * Remove from the object_object/offset hash table
- */
-
- bucket = &vm_page_buckets[vm_page_hash(mem->object, mem->offset)];
- spl = splimp();
- simple_lock(&bucket_lock);
- TAILQ_REMOVE(bucket, mem, hashq);
- simple_unlock(&bucket_lock);
- (void) splx(spl);
-
- /*
- * Now remove from the object's list of backed pages.
- */
-
- TAILQ_REMOVE(&mem->object->memq, mem, listq);
-
- /*
- * And show that the object has one fewer resident
- * page.
- */
-
- mem->object->resident_page_count--;
-
- mem->flags &= ~PG_TABLED;
-}
-
-/*
- * vm_page_lookup:
- *
- * Returns the page associated with the object/offset
- * pair specified; if none is found, NULL is returned.
- *
- * The object must be locked. No side effects.
- */
-vm_page_t
-vm_page_lookup(object, offset)
- register vm_object_t object;
- register vm_offset_t offset;
-{
- register vm_page_t mem;
- register struct pglist *bucket;
- int spl;
-
- /*
- * Search the hash table for this object/offset pair
- */
-
- bucket = &vm_page_buckets[vm_page_hash(object, offset)];
-
- spl = splimp();
- simple_lock(&bucket_lock);
- for (mem = bucket->tqh_first; mem != NULL; mem = mem->hashq.tqe_next) {
- VM_PAGE_CHECK(mem);
- if ((mem->object == object) && (mem->offset == offset)) {
- simple_unlock(&bucket_lock);
- splx(spl);
- return(mem);
- }
- }
-
- simple_unlock(&bucket_lock);
- splx(spl);
- return(NULL);
-}
-
-/*
- * vm_page_rename:
- *
- * Move the given memory entry from its
- * current object to the specified target object/offset.
- *
- * The object must be locked.
- */
-void
-vm_page_rename(mem, new_object, new_offset)
- register vm_page_t mem;
- register vm_object_t new_object;
- vm_offset_t new_offset;
-{
-
- if (mem->object == new_object)
- return;
-
- vm_page_lock_queues(); /* keep page from moving out from
- under pageout daemon */
- vm_page_remove(mem);
- vm_page_insert(mem, new_object, new_offset);
- vm_page_unlock_queues();
-}
-
-/*
- * vm_page_alloc:
- *
- * Allocate and return a memory cell associated
- * with this VM object/offset pair.
- *
- * Object must be locked.
- */
-
-vm_page_t
-vm_page_alloc(object, offset)
- vm_object_t object;
- vm_offset_t offset;
-{
- register vm_page_t mem;
- int spl;
-
- spl = splimp(); /* XXX */
- simple_lock(&vm_page_queue_free_lock);
- mem = vm_page_queue_free.tqh_first;
-
- if (VERY_LOW_MEM()) {
- if ((!KERN_OBJ(object) && curproc != pageout_daemon)
- || mem == NULL) {
- simple_unlock(&vm_page_queue_free_lock);
- splx(spl);
- return(NULL);
- }
- }
-#ifdef DIAGNOSTIC
- if (mem == NULL) /* because we now depend on VERY_LOW_MEM() */
- panic("vm_page_alloc");
-#endif
- TAILQ_REMOVE(&vm_page_queue_free, mem, pageq);
-
- cnt.v_free_count--;
- simple_unlock(&vm_page_queue_free_lock);
- splx(spl);
-
- VM_PAGE_INIT(mem, object, offset);
-
- /*
- * Decide if we should poke the pageout daemon.
- * We do this if the free count is less than the low
- * water mark, or if the free count is less than the high
- * water mark (but above the low water mark) and the inactive
- * count is less than its target.
- *
- * We don't have the counts locked ... if they change a little,
- * it doesn't really matter.
- */
-
- if (cnt.v_free_count < cnt.v_free_min ||
- (cnt.v_free_count < cnt.v_free_target &&
- cnt.v_inactive_count < cnt.v_inactive_target))
- thread_wakeup(&vm_pages_needed);
- return (mem);
-}
-
-/*
- * vm_page_free:
- *
- * Returns the given page to the free list,
- * disassociating it with any VM object.
- *
- * Object and page must be locked prior to entry.
- */
-void
-vm_page_free(mem)
- register vm_page_t mem;
-{
-
- vm_page_remove(mem);
- if (mem->flags & PG_ACTIVE) {
- TAILQ_REMOVE(&vm_page_queue_active, mem, pageq);
- mem->flags &= ~PG_ACTIVE;
- cnt.v_active_count--;
- }
-
- if (mem->flags & PG_INACTIVE) {
- TAILQ_REMOVE(&vm_page_queue_inactive, mem, pageq);
- mem->flags &= ~PG_INACTIVE;
- cnt.v_inactive_count--;
- }
-
- if (!(mem->flags & PG_FICTITIOUS)) {
- int spl;
-
- spl = splimp();
- simple_lock(&vm_page_queue_free_lock);
- mem->flags |= PG_FREE;
- TAILQ_INSERT_TAIL(&vm_page_queue_free, mem, pageq);
-
- cnt.v_free_count++;
- simple_unlock(&vm_page_queue_free_lock);
- splx(spl);
- }
-}
-
-/*
- * vm_page_wire:
- *
- * Mark this page as wired down by yet
- * another map, removing it from paging queues
- * as necessary.
- *
- * The page queues must be locked.
- */
-void
-vm_page_wire(mem)
- register vm_page_t mem;
-{
-
- VM_PAGE_CHECK(mem);
-
- if (mem->wire_count == 0) {
- if (mem->flags & PG_ACTIVE) {
- TAILQ_REMOVE(&vm_page_queue_active, mem, pageq);
- cnt.v_active_count--;
- mem->flags &= ~PG_ACTIVE;
- }
- if (mem->flags & PG_INACTIVE) {
- TAILQ_REMOVE(&vm_page_queue_inactive, mem, pageq);
- cnt.v_inactive_count--;
- mem->flags &= ~PG_INACTIVE;
- }
- cnt.v_wire_count++;
- }
- mem->wire_count++;
-}
-
-/*
- * vm_page_unwire:
- *
- * Release one wiring of this page, potentially
- * enabling it to be paged again.
- *
- * The page queues must be locked.
- */
-void
-vm_page_unwire(mem)
- register vm_page_t mem;
-{
-
- VM_PAGE_CHECK(mem);
-
- mem->wire_count--;
- if (mem->wire_count == 0) {
- TAILQ_INSERT_TAIL(&vm_page_queue_active, mem, pageq);
- cnt.v_active_count++;
- mem->flags |= PG_ACTIVE;
- cnt.v_wire_count--;
- }
-}
-
-/*
- * vm_page_deactivate:
- *
- * Returns the given page to the inactive list,
- * indicating that no physical maps have access
- * to this page. [Used by the physical mapping system.]
- *
- * The page queues must be locked.
- */
-void
-vm_page_deactivate(m)
- register vm_page_t m;
-{
-
- VM_PAGE_CHECK(m);
-
- /*
- * Only move active pages -- ignore locked or already
- * inactive ones.
- */
-
- if (m->flags & PG_ACTIVE) {
- TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
- m->flags &= ~PG_ACTIVE;
- cnt.v_active_count--;
- goto deact;
- }
- if ((m->flags & PG_INACTIVE) == 0) {
- deact:
- TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
- m->flags |= PG_INACTIVE;
- cnt.v_inactive_count++;
- pmap_clear_reference(VM_PAGE_TO_PHYS(m));
- if (pmap_is_modified(VM_PAGE_TO_PHYS(m)))
- m->flags &= ~PG_CLEAN;
- if (m->flags & PG_CLEAN)
- m->flags &= ~PG_LAUNDRY;
- else
- m->flags |= PG_LAUNDRY;
- }
-}
-
-/*
- * vm_page_activate:
- *
- * Put the specified page on the active list (if appropriate).
- *
- * The page queues must be locked.
- */
-void
-vm_page_activate(m)
- register vm_page_t m;
-{
-
- VM_PAGE_CHECK(m);
-
- if (m->flags & PG_INACTIVE) {
- TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
- m->flags &= ~PG_INACTIVE;
- cnt.v_inactive_count--;
- }
- if (m->wire_count == 0) {
- if (m->flags & PG_ACTIVE)
- panic("vm_page_activate: already active");
-
- TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
- m->flags |= PG_ACTIVE;
- cnt.v_active_count++;
- }
-}
-
-/*
- * vm_page_zero_fill:
- *
- * Zero-fill the specified page.
- * Written as a standard pagein routine, to
- * be used by the zero-fill object.
- */
-boolean_t
-vm_page_zero_fill(m)
- vm_page_t m;
-{
-
- VM_PAGE_CHECK(m);
-
- m->flags &= ~PG_CLEAN;
- pmap_zero_page(VM_PAGE_TO_PHYS(m));
- return(TRUE);
-}
-
-/*
- * vm_page_copy:
- *
- * Copy one page to another
- */
-void
-vm_page_copy(src_m, dest_m)
- vm_page_t src_m;
- vm_page_t dest_m;
-{
-
- VM_PAGE_CHECK(src_m);
- VM_PAGE_CHECK(dest_m);
-
- dest_m->flags &= ~PG_CLEAN;
- pmap_copy_page(VM_PAGE_TO_PHYS(src_m), VM_PAGE_TO_PHYS(dest_m));
-}
-
-#ifdef VM_PAGE_ALLOC_MEMORY_STATS
-#define STAT_INCR(v) (v)++
-#define STAT_DECR(v) do { \
- if ((v) == 0) \
- printf("%s:%d -- Already 0!\n", __FILE__, __LINE__); \
- else \
- (v)--; \
- } while (0)
-u_long vm_page_alloc_memory_npages;
-#else
-#define STAT_INCR(v)
-#define STAT_DECR(v)
-#endif
-
-/*
- * vm_page_alloc_memory:
- *
- * Allocate physical pages conforming to the restrictions
- * provided:
- *
- * size The size of the allocation,
- * rounded to page size.
- *
- * low The low address of the allowed
- * allocation range.
- *
- * high The high address of the allowed
- * allocation range.
- *
- * alignment Allocation must be aligned to this
- * power-of-two boundary.
- *
- * boundary No segment in the allocation may
- * cross this power-of-two boundary
- * (relative to zero).
- *
- * The allocated pages are placed at the tail of `rlist'; `rlist'
- * is assumed to be properly initialized by the caller. The
- * number of memory segments that the allocated memory may
- * occupy is specified in the `nsegs' arguement.
- *
- * Returns 0 on success or an errno value to indicate mode
- * of failure.
- *
- * XXX This implementation could be improved. It only
- * XXX allocates a single segment.
- */
-int
-vm_page_alloc_memory(size, low, high, alignment, boundary,
- rlist, nsegs, waitok)
- vm_size_t size;
- vm_offset_t low, high, alignment, boundary;
- struct pglist *rlist;
- int nsegs, waitok;
-{
- vm_offset_t try, idxpa, lastidxpa;
-#if defined(MACHINE_NEW_NONCONTIG)
- int psi;
- struct vm_page *vm_page_array;
-#endif
- int s, tryidx, idx, end, error;
- vm_page_t m;
- u_long pagemask;
-#ifdef DEBUG
- vm_page_t tp;
-#endif
-
-#ifdef DIAGNOSTIC
- if ((alignment & (alignment - 1)) != 0)
- panic("vm_page_alloc_memory: alignment must be power of 2");
-
- if ((boundary & (boundary - 1)) != 0)
- panic("vm_page_alloc_memory: boundary must be power of 2");
-#endif
-
- /*
- * Our allocations are always page granularity, so our alignment
- * must be, too.
- */
- if (alignment < PAGE_SIZE)
- alignment = PAGE_SIZE;
-
- size = round_page(size);
- try = roundup(low, alignment);
-
- if (boundary != 0 && boundary < size)
- return (EINVAL);
-
- pagemask = ~(boundary - 1);
-
- /* Default to "lose". */
- error = ENOMEM;
-
- /*
- * Block all memory allocation and lock the free list.
- */
- s = splimp();
- simple_lock(&vm_page_queue_free_lock);
-
- /* Are there even any free pages? */
- if (vm_page_queue_free.tqh_first == NULL)
- goto out;
-
- for (;; try += alignment) {
- if (try + size > high) {
- /*
- * We've run past the allowable range.
- */
- goto out;
- }
-
- /*
- * Make sure this is a managed physical page.
- */
-#if defined(MACHINE_NEW_NONCONTIG)
-
- if ((psi = vm_physseg_find(atop(try), &idx)) == -1)
- continue; /* managed? */
- if (vm_physseg_find(atop(try + size), NULL) != psi)
- continue; /* end must be in this segment */
-
- tryidx = idx;
- end = idx + (size / PAGE_SIZE);
- vm_page_array = vm_physmem[psi].pgs;
- /* XXX: emulates old global vm_page_array */
-
-#else
- if (IS_VM_PHYSADDR(try) == 0)
- continue;
-
- tryidx = idx = VM_PAGE_INDEX(try);
- end = idx + (size / PAGE_SIZE);
- if (end > vm_page_count) {
- /*
- * No more physical memory.
- */
- goto out;
- }
-#endif
-
- /*
- * Found a suitable starting page. See of the range
- * is free.
- */
- for (; idx < end; idx++) {
- if (VM_PAGE_IS_FREE(&vm_page_array[idx]) == 0) {
- /*
- * Page not available.
- */
- break;
- }
-
- idxpa = VM_PAGE_TO_PHYS(&vm_page_array[idx]);
-
-#if !defined(MACHINE_NEW_NONCONTIG)
- /*
- * Make sure this is a managed physical page.
- * XXX Necessary? I guess only if there
- * XXX are holes in the vm_page_array[].
- */
- if (IS_VM_PHYSADDR(idxpa) == 0)
- break;
-#endif
-
- if (idx > tryidx) {
- lastidxpa =
- VM_PAGE_TO_PHYS(&vm_page_array[idx - 1]);
-
- if ((lastidxpa + PAGE_SIZE) != idxpa) {
- /*
- * Region not contiguous.
- */
- break;
- }
- if (boundary != 0 &&
- ((lastidxpa ^ idxpa) & pagemask) != 0) {
- /*
- * Region crosses boundary.
- */
- break;
- }
- }
- }
-
- if (idx == end) {
- /*
- * Woo hoo! Found one.
- */
- break;
- }
- }
-
- /*
- * Okay, we have a chunk of memory that conforms to
- * the requested constraints.
- */
- idx = tryidx;
- while (idx < end) {
- m = &vm_page_array[idx];
-#ifdef DEBUG
- for (tp = vm_page_queue_free.tqh_first; tp != NULL;
- tp = tp->pageq.tqe_next) {
- if (tp == m)
- break;
- }
- if (tp == NULL)
- panic("vm_page_alloc_memory: page not on freelist");
-#endif
- TAILQ_REMOVE(&vm_page_queue_free, m, pageq);
- cnt.v_free_count--;
- m->flags = PG_CLEAN;
- m->object = NULL;
- m->wire_count = 0;
- TAILQ_INSERT_TAIL(rlist, m, pageq);
- idx++;
- STAT_INCR(vm_page_alloc_memory_npages);
- }
- error = 0;
-
- out:
- simple_unlock(&vm_page_queue_free_lock);
- splx(s);
- return (error);
-}
-
-vm_offset_t
-vm_page_alloc_contig(size, low, high, alignment)
- vm_offset_t size;
- vm_offset_t low;
- vm_offset_t high;
- vm_offset_t alignment;
-{
- struct pglist mlist;
- struct vm_page *m;
- vm_offset_t addr, tmp_addr;
-
- TAILQ_INIT(&mlist);
- if (vm_page_alloc_memory(size, low, high, alignment, 0,
- &mlist, 1, FALSE))
- return 0;
- addr = tmp_addr = kmem_alloc_pageable(kernel_map, size);
- for (m = TAILQ_FIRST(&mlist); m != NULL; m = TAILQ_NEXT(m, pageq)) {
- vm_page_insert(m, kernel_object,
- tmp_addr - VM_MIN_KERNEL_ADDRESS);
- vm_page_wire(m);
- pmap_enter(pmap_kernel(), tmp_addr, VM_PAGE_TO_PHYS(m),
- VM_PROT_READ|VM_PROT_WRITE, TRUE, 0);
- tmp_addr += PAGE_SIZE;
- }
- return addr;
-}
-
-/*
- * vm_page_free_memory:
- *
- * Free a list of pages previously allocated by vm_page_alloc_memory().
- * The pages are assumed to have no mappings.
- */
-void
-vm_page_free_memory(list)
- struct pglist *list;
-{
- vm_page_t m;
- int s;
-
- /*
- * Block all memory allocation and lock the free list.
- */
- s = splimp();
- simple_lock(&vm_page_queue_free_lock);
-
- while ((m = list->tqh_first) != NULL) {
- TAILQ_REMOVE(list, m, pageq);
- m->flags = PG_FREE;
- TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq);
- cnt.v_free_count++;
- STAT_DECR(vm_page_alloc_memory_npages);
- }
-
- simple_unlock(&vm_page_queue_free_lock);
- splx(s);
-}
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
deleted file mode 100644
index 8903bd57c6a..00000000000
--- a/sys/vm/vm_pageout.c
+++ /dev/null
@@ -1,620 +0,0 @@
-/* $OpenBSD: vm_pageout.c,v 1.11 2001/03/21 23:24:51 art Exp $ */
-/* $NetBSD: vm_pageout.c,v 1.23 1996/02/05 01:54:07 christos Exp $ */
-
-/*
- * Copyright (c) 1991, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * The Mach Operating System project at Carnegie-Mellon University.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)vm_pageout.c 8.7 (Berkeley) 6/19/95
- *
- *
- * Copyright (c) 1987, 1990 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Authors: Avadis Tevanian, Jr., Michael Wayne Young
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*
- * The proverbial page-out daemon.
- */
-
-#include <sys/param.h>
-#include <sys/proc.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/pool.h>
-
-#include <vm/vm.h>
-#include <vm/vm_page.h>
-#include <vm/vm_pageout.h>
-
-#ifndef VM_PAGE_FREE_MIN
-#define VM_PAGE_FREE_MIN (cnt.v_free_count / 20)
-#endif
-
-#ifndef VM_PAGE_FREE_TARGET
-#define VM_PAGE_FREE_TARGET ((cnt.v_free_min * 4) / 3)
-#endif
-
-int vm_page_free_min_min = 16 * 1024;
-int vm_page_free_min_max = 256 * 1024;
-
-int vm_pages_needed; /* Event on which pageout daemon sleeps */
-
-int vm_page_max_wired = 0; /* XXX max # of wired pages system-wide */
-
-#ifdef CLUSTERED_PAGEOUT
-#define MAXPOCLUSTER (MAXPHYS/NBPG) /* XXX */
-int doclustered_pageout = 1;
-#endif
-
-/*
- * Activate the pageout daemon and sleep awaiting more free memory
- */
-void
-vm_wait(msg)
- char *msg;
-{
- int timo = 0;
-
- if(curproc == pageout_daemon) {
- /*
- * We might be toast here, but IF some paging operations
- * are pending then pages will magically appear. We
- * usually can't return an error because callers of
- * malloc who can wait generally don't check for
- * failure.
- *
- * Only the pageout_daemon wakes up this channel!
- */
- printf("pageout daemon has stalled\n");
- timo = hz >> 3;
- }
- simple_lock(&vm_pages_needed_lock);
- thread_wakeup(&vm_pages_needed);
- thread_sleep_msg(&cnt.v_free_count, &vm_pages_needed_lock, FALSE, msg,
- timo);
-}
-
-/*
- * vm_pageout_scan does the dirty work for the pageout daemon.
- */
-void
-vm_pageout_scan()
-{
- register vm_page_t m, next;
- register int page_shortage;
- register int s;
- register int pages_freed;
- int free;
- vm_object_t object;
-
- /*
- * Only continue when we want more pages to be "free"
- */
-
- cnt.v_rev++;
-
- s = splimp();
- simple_lock(&vm_page_queue_free_lock);
- free = cnt.v_free_count;
- simple_unlock(&vm_page_queue_free_lock);
- splx(s);
-
-#ifndef __SWAP_BROKEN /* XXX */
- if (free < cnt.v_free_target) {
- swapout_threads();
-
- /*
- * Be sure the pmap system is updated so
- * we can scan the inactive queue.
- */
-
- pmap_update();
- }
-#endif /* XXX */
-
- /*
- * Acquire the resident page system lock,
- * as we may be changing what's resident quite a bit.
- */
- vm_page_lock_queues();
-
- /*
- * Start scanning the inactive queue for pages we can free.
- * We keep scanning until we have enough free pages or
- * we have scanned through the entire queue. If we
- * encounter dirty pages, we start cleaning them.
- */
-
- pages_freed = 0;
- for (m = vm_page_queue_inactive.tqh_first; m != NULL; m = next) {
- s = splimp();
- simple_lock(&vm_page_queue_free_lock);
- free = cnt.v_free_count;
- simple_unlock(&vm_page_queue_free_lock);
- splx(s);
- if (free >= cnt.v_free_target)
- break;
-
- cnt.v_scan++;
- next = m->pageq.tqe_next;
-
- /*
- * If the page has been referenced, move it back to the
- * active queue.
- */
- if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) {
- vm_page_activate(m);
- cnt.v_reactivated++;
- continue;
- }
-
- /*
- * If the page is clean, free it up.
- */
- if (m->flags & PG_CLEAN) {
- object = m->object;
- if (vm_object_lock_try(object)) {
- pmap_page_protect(VM_PAGE_TO_PHYS(m),
- VM_PROT_NONE);
- vm_page_free(m);
- pages_freed++;
- cnt.v_dfree++;
- vm_object_unlock(object);
- }
- continue;
- }
-
- /*
- * If the page is dirty but already being washed, skip it.
- */
- if ((m->flags & PG_LAUNDRY) == 0)
- continue;
-
- /*
- * Otherwise the page is dirty and still in the laundry,
- * so we start the cleaning operation and remove it from
- * the laundry.
- */
- object = m->object;
- if (!vm_object_lock_try(object))
- continue;
-#ifdef CLUSTERED_PAGEOUT
- if (object->pager &&
- vm_pager_cancluster(object->pager, PG_CLUSTERPUT))
- vm_pageout_cluster(m, object);
- else
-#endif
- vm_pageout_page(m, object);
- thread_wakeup(object);
- vm_object_unlock(object);
- /*
- * Former next page may no longer even be on the inactive
- * queue (due to potential blocking in the pager with the
- * queues unlocked). If it isn't, we just start over.
- */
- if (next && (next->flags & PG_INACTIVE) == 0)
- next = vm_page_queue_inactive.tqh_first;
- }
-
- /*
- * Compute the page shortage. If we are still very low on memory
- * be sure that we will move a minimal amount of pages from active
- * to inactive.
- */
-
- page_shortage = cnt.v_inactive_target - cnt.v_inactive_count;
- if (page_shortage <= 0 && pages_freed == 0)
- page_shortage = 1;
-
- while (page_shortage > 0) {
- /*
- * Move some more pages from active to inactive.
- */
-
- if ((m = vm_page_queue_active.tqh_first) == NULL)
- break;
- vm_page_deactivate(m);
- page_shortage--;
- }
-
- vm_page_unlock_queues();
-}
-
-/*
- * Called with object and page queues locked.
- * If reactivate is TRUE, a pager error causes the page to be
- * put back on the active queue, ow it is left on the inactive queue.
- */
-void
-vm_pageout_page(m, object)
- vm_page_t m;
- vm_object_t object;
-{
- vm_pager_t pager;
- int pageout_status;
-
- /*
- * We set the busy bit to cause potential page faults on
- * this page to block.
- *
- * We also set pageout-in-progress to keep the object from
- * disappearing during pageout. This guarantees that the
- * page won't move from the inactive queue. (However, any
- * other page on the inactive queue may move!)
- */
- pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
- m->flags |= PG_BUSY;
-
- /*
- * Try to collapse the object before making a pager for it.
- * We must unlock the page queues first.
- */
- vm_page_unlock_queues();
-
-#if 0
- /*
- * vm_object_collapse might want to sleep waiting for pages which
- * is not allowed to do in this thread. Anyway, we now aggressively
- * collapse object-chains as early as possible so this call ought
- * to not be very useful anyhow. This is just an educated guess.
- * Not doing a collapse operation is never fatal though, so we skip
- * it for the time being. Later we might add some NOWAIT option for
- * the collapse code to look at, if it's deemed necessary.
- */
- if (object->pager == NULL)
- vm_object_collapse(object);
-#endif
-
- vm_object_paging_begin(object);
- vm_object_unlock(object);
-
- /*
- * We _used_ to wakeup page consumers here, "in case the following
- * operations block". That leads to livelock if the pageout fails,
- * which is actually quite a common thing for NFS paging.
- */
-
- /*
- * If there is no pager for the page, use the default pager.
- * If there is no place to put the page at the moment,
- * leave it in the laundry and hope that there will be
- * paging space later.
- */
- if ((pager = object->pager) == NULL) {
- pager = vm_pager_allocate(PG_DFLT, (caddr_t)0, object->size,
- VM_PROT_ALL, (vm_offset_t)0);
- if (pager != NULL)
- vm_object_setpager(object, pager, 0, FALSE);
- }
- pageout_status = pager ? vm_pager_put(pager, m, FALSE) : VM_PAGER_FAIL;
- vm_object_lock(object);
- vm_page_lock_queues();
-
- switch (pageout_status) {
- case VM_PAGER_OK:
- case VM_PAGER_PEND:
- /* hmm, don't wakeup if memory is _very_ low? */
- thread_wakeup(&cnt.v_free_count);
- cnt.v_pageouts++;
- cnt.v_pgpgout++;
- m->flags &= ~PG_LAUNDRY;
- break;
- case VM_PAGER_BAD:
- /*
- * Page outside of range of object. Right now we
- * essentially lose the changes by pretending it
- * worked.
- *
- * XXX dubious, what should we do?
- */
- m->flags &= ~PG_LAUNDRY;
- m->flags |= PG_CLEAN;
- pmap_clear_modify(VM_PAGE_TO_PHYS(m));
- break;
- case VM_PAGER_AGAIN:
- {
- /*
- * FAIL on a write is interpreted to mean a resource
- * shortage, so we put pause for awhile and try again.
- * XXX could get stuck here.
- */
- (void)tsleep((caddr_t)&vm_pages_needed, PZERO|PCATCH,
- "pageout", hz>>3);
- break;
- }
- case VM_PAGER_FAIL:
- case VM_PAGER_ERROR:
- /*
- * If page couldn't be paged out, then reactivate
- * the page so it doesn't clog the inactive list.
- * (We will try paging out it again later).
- */
- vm_page_activate(m);
- cnt.v_reactivated++;
- break;
- }
-
- pmap_clear_reference(VM_PAGE_TO_PHYS(m));
-
- /*
- * If the operation is still going, leave the page busy
- * to block all other accesses. Also, leave the paging
- * in progress indicator set so that we don't attempt an
- * object collapse.
- */
- if (pageout_status != VM_PAGER_PEND) {
- m->flags &= ~PG_BUSY;
- PAGE_WAKEUP(m);
- vm_object_paging_end(object);
- }
-}
-
-#ifdef CLUSTERED_PAGEOUT
-#define PAGEOUTABLE(p) \
- ((((p)->flags & (PG_INACTIVE|PG_CLEAN|PG_LAUNDRY)) == \
- (PG_INACTIVE|PG_LAUNDRY)) && !pmap_is_referenced(VM_PAGE_TO_PHYS(p)))
-
-/*
- * Attempt to pageout as many contiguous (to ``m'') dirty pages as possible
- * from ``object''. Using information returned from the pager, we assemble
- * a sorted list of contiguous dirty pages and feed them to the pager in one
- * chunk. Called with paging queues and object locked. Also, object must
- * already have a pager.
- */
-void
-vm_pageout_cluster(m, object)
- vm_page_t m;
- vm_object_t object;
-{
- vm_offset_t offset, loff, hoff;
- vm_page_t plist[MAXPOCLUSTER], *plistp, p;
- int postatus, ix, count;
-
- cnt.v_pageouts++;
- /*
- * Determine the range of pages that can be part of a cluster
- * for this object/offset. If it is only our single page, just
- * do it normally.
- */
- vm_pager_cluster(object->pager, m->offset, &loff, &hoff);
- if (hoff - loff == PAGE_SIZE) {
- vm_pageout_page(m, object);
- return;
- }
-
- plistp = plist;
-
- /*
- * Target page is always part of the cluster.
- */
- pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
- m->flags |= PG_BUSY;
- plistp[atop(m->offset - loff)] = m;
- count = 1;
-
- /*
- * Backup from the given page til we find one not fulfilling
- * the pageout criteria or we hit the lower bound for the
- * cluster. For each page determined to be part of the
- * cluster, unmap it and busy it out so it won't change.
- */
- ix = atop(m->offset - loff);
- offset = m->offset;
- while (offset > loff && count < MAXPOCLUSTER-1) {
- p = vm_page_lookup(object, offset - PAGE_SIZE);
- if (p == NULL || !PAGEOUTABLE(p))
- break;
- pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
- p->flags |= PG_BUSY;
- plistp[--ix] = p;
- offset -= PAGE_SIZE;
- count++;
- }
- plistp += atop(offset - loff);
- loff = offset;
-
- /*
- * Now do the same moving forward from the target.
- */
- ix = atop(m->offset - loff) + 1;
- offset = m->offset + PAGE_SIZE;
- while (offset < hoff && count < MAXPOCLUSTER) {
- p = vm_page_lookup(object, offset);
- if (p == NULL || !PAGEOUTABLE(p))
- break;
- pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
- p->flags |= PG_BUSY;
- plistp[ix++] = p;
- offset += PAGE_SIZE;
- count++;
- }
- hoff = offset;
-
- /*
- * Pageout the page.
- * Unlock everything and do a wakeup prior to the pager call
- * in case it blocks.
- */
- vm_page_unlock_queues();
- vm_object_paging_begin(object);
- vm_object_unlock(object);
-again:
- thread_wakeup(&cnt.v_free_count);
- postatus = vm_pager_put_pages(object->pager, plistp, count, FALSE);
- /*
- * XXX rethink this
- */
- if (postatus == VM_PAGER_AGAIN) {
- (void)tsleep((caddr_t)&vm_pages_needed, PZERO|PCATCH,
- "pageout", 0);
- goto again;
- } else if (postatus == VM_PAGER_BAD)
- panic("vm_pageout_cluster: VM_PAGER_BAD");
- vm_object_lock(object);
- vm_page_lock_queues();
-
- /*
- * Loop through the affected pages, reflecting the outcome of
- * the operation.
- */
- for (ix = 0; ix < count; ix++) {
- p = *plistp++;
- switch (postatus) {
- case VM_PAGER_OK:
- case VM_PAGER_PEND:
- cnt.v_pgpgout++;
- p->flags &= ~PG_LAUNDRY;
- break;
- case VM_PAGER_FAIL:
- case VM_PAGER_ERROR:
- /*
- * Pageout failed, reactivate the target page so it
- * doesn't clog the inactive list. Other pages are
- * left as they are.
- */
- if (p == m) {
- vm_page_activate(p);
- cnt.v_reactivated++;
- }
- break;
- }
- pmap_clear_reference(VM_PAGE_TO_PHYS(p));
- /*
- * If the operation is still going, leave the page busy
- * to block all other accesses.
- */
- if (postatus != VM_PAGER_PEND) {
- p->flags &= ~PG_BUSY;
- PAGE_WAKEUP(p);
- }
- }
- /*
- * If the operation is still going, leave the paging in progress
- * indicator set so that we don't attempt an object collapse.
- */
- if (postatus != VM_PAGER_PEND)
- vm_object_paging_end(object);
-}
-#endif
-
-/*
- * vm_pageout is the high level pageout daemon.
- */
-
-void
-vm_pageout()
-{
- pageout_daemon = curproc;
- (void) spl0();
-
- /*
- * Initialize some paging parameters.
- */
-
- if (cnt.v_free_min == 0) {
- cnt.v_free_min = VM_PAGE_FREE_MIN;
- vm_page_free_min_min /= cnt.v_page_size;
- vm_page_free_min_max /= cnt.v_page_size;
- if (cnt.v_free_min < vm_page_free_min_min)
- cnt.v_free_min = vm_page_free_min_min;
- if (cnt.v_free_min > vm_page_free_min_max)
- cnt.v_free_min = vm_page_free_min_max;
- }
-
- if (cnt.v_free_target == 0)
- cnt.v_free_target = VM_PAGE_FREE_TARGET;
-
- if (cnt.v_free_target <= cnt.v_free_min)
- cnt.v_free_target = cnt.v_free_min + 1;
-
- /* XXX does not really belong here */
- if (vm_page_max_wired == 0)
- vm_page_max_wired = cnt.v_free_count / 3;
-
- /*
- * The pageout daemon is never done, so loop
- * forever.
- */
-
- simple_lock(&vm_pages_needed_lock);
- while (TRUE) {
- thread_sleep_msg(&vm_pages_needed, &vm_pages_needed_lock,
- FALSE, "paged", 0);
- /*
- * Compute the inactive target for this scan.
- * We need to keep a reasonable amount of memory in the
- * inactive list to better simulate LRU behavior.
- */
- cnt.v_inactive_target =
- (cnt.v_active_count + cnt.v_inactive_count) / 3;
- if (cnt.v_inactive_target <= cnt.v_free_target)
- cnt.v_inactive_target = cnt.v_free_target + 1;
-
- /*
- * Only make a scan if we are likely to do something.
- * Otherwise we might have been awakened by a pager
- * to clean up async pageouts.
- */
- if (cnt.v_free_count < cnt.v_free_target ||
- cnt.v_inactive_count < cnt.v_inactive_target) {
- pool_drain(0);
- vm_pageout_scan();
- }
- vm_pager_sync();
- simple_lock(&vm_pages_needed_lock);
- thread_wakeup(&cnt.v_free_count);
- }
-}
diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c
deleted file mode 100644
index 34e0fc31c2b..00000000000
--- a/sys/vm/vm_pager.c
+++ /dev/null
@@ -1,426 +0,0 @@
-/* $OpenBSD: vm_pager.c,v 1.10 2001/06/08 08:09:44 art Exp $ */
-/* $NetBSD: vm_pager.c,v 1.21 1996/03/16 23:15:25 christos Exp $ */
-
-/*
- * Copyright (c) 1991, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * The Mach Operating System project at Carnegie-Mellon University.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)vm_pager.c 8.7 (Berkeley) 7/7/94
- *
- *
- * Copyright (c) 1987, 1990 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Authors: Avadis Tevanian, Jr., Michael Wayne Young
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*
- * Paging space routine stubs. Emulates a matchmaker-like interface
- * for builtin pagers.
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/proc.h>
-
-#include <vm/vm.h>
-#include <vm/vm_page.h>
-#include <vm/vm_kern.h>
-
-#ifdef SWAPPAGER
-extern struct pagerops swappagerops;
-#endif
-
-extern struct pagerops vnodepagerops;
-
-#ifdef DEVPAGER
-extern struct pagerops devicepagerops;
-#endif
-
-struct pagerops *pagertab[] = {
-#ifdef SWAPPAGER
- &swappagerops, /* PG_SWAP */
-#else
- NULL,
-#endif
- &vnodepagerops, /* PG_VNODE */
-#ifdef DEVPAGER
- &devicepagerops, /* PG_DEV */
-#else
- NULL,
-#endif
-};
-int npagers = sizeof (pagertab) / sizeof (pagertab[0]);
-
-struct pagerops *dfltpagerops = NULL; /* default pager */
-
-/*
- * Kernel address space for mapping pages.
- * Used by pagers where KVAs are needed for IO.
- *
- * XXX needs to be large enough to support the number of pending async
- * cleaning requests (NPENDINGIO == 64) * the maximum swap cluster size
- * (MAXPHYS == 64k) if you want to get the most efficiency.
- */
-#define PAGER_MAP_SIZE (4 * 1024 * 1024)
-
-vm_map_t pager_map;
-boolean_t pager_map_wanted;
-vm_offset_t pager_sva, pager_eva;
-
-void
-vm_pager_init()
-{
- struct pagerops **pgops;
-
- /*
- * Allocate a kernel submap for tracking get/put page mappings
- */
- pager_map = kmem_suballoc(kernel_map, &pager_sva, &pager_eva,
- PAGER_MAP_SIZE, FALSE);
- /*
- * Initialize known pagers
- */
- for (pgops = pagertab; pgops < &pagertab[npagers]; pgops++)
- if (*pgops)
- (*(*pgops)->pgo_init)();
- if (dfltpagerops == NULL)
- panic("no default pager");
-}
-
-/*
- * Allocate an instance of a pager of the given type.
- * Size, protection and offset parameters are passed in for pagers that
- * need to perform page-level validation (e.g. the device pager).
- */
-vm_pager_t
-vm_pager_allocate(type, handle, size, prot, off)
- int type;
- caddr_t handle;
- vm_size_t size;
- vm_prot_t prot;
- vm_offset_t off;
-{
- struct pagerops *ops;
-
- ops = (type == PG_DFLT) ? dfltpagerops : pagertab[type];
- if (ops)
- return ((*ops->pgo_alloc)(handle, size, prot, off));
- return (NULL);
-}
-
-void
-vm_pager_deallocate(pager)
- vm_pager_t pager;
-{
- if (pager == NULL)
- panic("vm_pager_deallocate: null pager");
- (*pager->pg_ops->pgo_dealloc)(pager);
-}
-
-int
-vm_pager_remove(pager, from, to)
- vm_pager_t pager;
- vm_offset_t from, to;
-{
- if (pager == NULL)
- panic("vm_pager_remove: null pager");
- return (*pager->pg_ops->pgo_remove)(pager, from, to);
-}
-
-vm_offset_t
-vm_pager_next(pager, offset)
- vm_pager_t pager;
- vm_offset_t offset;
-{
- if (pager == NULL)
- panic("vm_pager_next: null pager");
- return (*pager->pg_ops->pgo_next)(pager, offset);
-}
-
-int
-vm_pager_count(pager)
- vm_pager_t pager;
-{
- if (pager == NULL)
- panic("vm_pager_count: null pager");
- return (*pager->pg_ops->pgo_count)(pager);
-}
-
-int
-vm_pager_get_pages(pager, mlist, npages, sync)
- vm_pager_t pager;
- vm_page_t *mlist;
- int npages;
- boolean_t sync;
-{
- int rv;
-
- if (pager == NULL) {
- rv = VM_PAGER_OK;
- while (npages--)
- if (!vm_page_zero_fill(*mlist)) {
- rv = VM_PAGER_FAIL;
- break;
- } else
- mlist++;
- return (rv);
- }
- return ((*pager->pg_ops->pgo_getpages)(pager, mlist, npages, sync));
-}
-
-int
-vm_pager_put_pages(pager, mlist, npages, sync)
- vm_pager_t pager;
- vm_page_t *mlist;
- int npages;
- boolean_t sync;
-{
- if (pager == NULL)
- panic("vm_pager_put_pages: null pager");
- return ((*pager->pg_ops->pgo_putpages)(pager, mlist, npages, sync));
-}
-
-/* XXX compatibility*/
-int
-vm_pager_get(pager, m, sync)
- vm_pager_t pager;
- vm_page_t m;
- boolean_t sync;
-{
- return vm_pager_get_pages(pager, &m, 1, sync);
-}
-
-/* XXX compatibility*/
-int
-vm_pager_put(pager, m, sync)
- vm_pager_t pager;
- vm_page_t m;
- boolean_t sync;
-{
- return vm_pager_put_pages(pager, &m, 1, sync);
-}
-
-boolean_t
-vm_pager_has_page(pager, offset)
- vm_pager_t pager;
- vm_offset_t offset;
-{
- if (pager == NULL)
- panic("vm_pager_has_page: null pager");
- return ((*pager->pg_ops->pgo_haspage)(pager, offset));
-}
-
-/*
- * Called by pageout daemon before going back to sleep.
- * Gives pagers a chance to clean up any completed async pageing operations.
- */
-void
-vm_pager_sync()
-{
- struct pagerops **pgops;
-
- for (pgops = pagertab; pgops < &pagertab[npagers]; pgops++)
- if (*pgops)
- (*(*pgops)->pgo_putpages)(NULL, NULL, 0, FALSE);
-}
-
-void
-vm_pager_cluster(pager, offset, loff, hoff)
- vm_pager_t pager;
- vm_offset_t offset;
- vm_offset_t *loff;
- vm_offset_t *hoff;
-{
- if (pager == NULL)
- panic("vm_pager_cluster: null pager");
- ((*pager->pg_ops->pgo_cluster)(pager, offset, loff, hoff));
-}
-
-void
-vm_pager_clusternull(pager, offset, loff, hoff)
- vm_pager_t pager;
- vm_offset_t offset;
- vm_offset_t *loff;
- vm_offset_t *hoff;
-{
- panic("vm_pager_nullcluster called");
-}
-
-vm_offset_t
-vm_pager_map_pages(mlist, npages, canwait)
- vm_page_t *mlist;
- int npages;
- boolean_t canwait;
-{
- vm_offset_t kva, va;
- vm_size_t size;
- vm_page_t m;
-
- /*
- * Allocate space in the pager map, if none available return 0.
- * This is basically an expansion of kmem_alloc_wait with optional
- * blocking on no space.
- */
- size = npages * PAGE_SIZE;
- vm_map_lock(pager_map);
- while (vm_map_findspace(pager_map, 0, size, &kva)) {
- if (!canwait) {
- vm_map_unlock(pager_map);
- return (0);
- }
- pager_map_wanted = TRUE;
- vm_map_unlock(pager_map);
- (void) tsleep(pager_map, PVM, "pager_map", 0);
- vm_map_lock(pager_map);
- }
- vm_map_insert(pager_map, NULL, 0, kva, kva + size);
- vm_map_unlock(pager_map);
-
- for (va = kva; npages--; va += PAGE_SIZE) {
- m = *mlist++;
-#ifdef DEBUG
- if ((m->flags & PG_BUSY) == 0)
- panic("vm_pager_map_pages: page not busy");
- if (m->flags & PG_PAGEROWNED)
- panic("vm_pager_map_pages: page already in pager");
-#endif
-#ifdef DEBUG
- m->flags |= PG_PAGEROWNED;
-#endif
- pmap_enter(vm_map_pmap(pager_map), va, VM_PAGE_TO_PHYS(m),
- VM_PROT_DEFAULT, TRUE, 0);
- }
- return (kva);
-}
-
-void
-vm_pager_unmap_pages(kva, npages)
- vm_offset_t kva;
- int npages;
-{
- vm_size_t size = npages * PAGE_SIZE;
-
-#ifdef DEBUG
- vm_offset_t va;
- vm_page_t m;
- int np = npages;
-
- for (va = kva; np--; va += PAGE_SIZE) {
- m = vm_pager_atop(va);
- if (m->flags & PG_PAGEROWNED)
- m->flags &= ~PG_PAGEROWNED;
- else
- printf("vm_pager_unmap_pages: %p(%lx/%lx) not owned\n",
- m, va, VM_PAGE_TO_PHYS(m));
- }
-#endif
- pmap_remove(vm_map_pmap(pager_map), kva, kva + size);
- vm_map_lock(pager_map);
- (void) vm_map_delete(pager_map, kva, kva + size);
- if (pager_map_wanted)
- wakeup(pager_map);
- vm_map_unlock(pager_map);
-}
-
-vm_page_t
-vm_pager_atop(kva)
- vm_offset_t kva;
-{
- vm_offset_t pa;
-
- if (pmap_extract(vm_map_pmap(pager_map), kva, &pa) == FALSE)
- panic("vm_pager_atop");
- return (PHYS_TO_VM_PAGE(pa));
-}
-
-vm_pager_t
-vm_pager_lookup(pglist, handle)
- register struct pagerlst *pglist;
- caddr_t handle;
-{
- register vm_pager_t pager;
-
- for (pager = pglist->tqh_first; pager; pager = pager->pg_list.tqe_next)
- if (pager->pg_handle == handle)
- return (pager);
- return (NULL);
-}
-
-/*
- * This routine gains a reference to the object.
- * Explicit deallocation is necessary.
- */
-int
-pager_cache(object, should_cache)
- vm_object_t object;
- boolean_t should_cache;
-{
- if (object == NULL)
- return (KERN_INVALID_ARGUMENT);
-
- vm_object_cache_lock();
- vm_object_lock(object);
- if (should_cache)
- object->flags |= OBJ_CANPERSIST;
- else
- object->flags &= ~OBJ_CANPERSIST;
- vm_object_unlock(object);
- vm_object_cache_unlock();
-
- vm_object_deallocate(object);
-
- return (KERN_SUCCESS);
-}
diff --git a/sys/vm/vm_swap.c b/sys/vm/vm_swap.c
deleted file mode 100644
index 56b9b0525d7..00000000000
--- a/sys/vm/vm_swap.c
+++ /dev/null
@@ -1,1248 +0,0 @@
-/* $OpenBSD: vm_swap.c,v 1.16 2001/05/05 20:57:04 art Exp $ */
-/* $NetBSD: vm_swap.c,v 1.64 1998/11/08 19:45:17 mycroft Exp $ */
-
-/*
- * Copyright (c) 1995, 1996, 1997 Matthew R. Green, Tobias Weingartner
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/buf.h>
-#include <sys/proc.h>
-#include <sys/namei.h>
-#include <sys/disklabel.h>
-#include <sys/dmap.h>
-#include <sys/errno.h>
-#include <sys/kernel.h>
-#include <sys/malloc.h>
-#include <sys/lock.h>
-#include <sys/vnode.h>
-#include <sys/map.h>
-#include <sys/file.h>
-#include <sys/stat.h>
-#include <sys/extent.h>
-#include <sys/swap.h>
-#include <sys/mount.h>
-#include <sys/syscallargs.h>
-
-#include <machine/vmparam.h>
-
-#include <vm/vm_conf.h>
-
-#include <miscfs/specfs/specdev.h>
-
-/*
- * The idea here is to provide a single interface for multiple swap devices,
- * of any kind and priority in a simple and fast way.
- *
- * Each swap device has these properties:
- * * swap in use.
- * * swap enabled.
- * * map information in `/dev/drum'.
- * * vnode pointer.
- * Files have these additional properties:
- * * block size.
- * * maximum byte count in buffer.
- * * buffer.
- * * credentials.
- *
- * The arguments to swapctl(2) are:
- * int cmd;
- * void *arg;
- * int misc;
- * The cmd can be one of:
- * SWAP_NSWAP - swapctl(2) returns the number of swap devices currently in
- * use.
- * SWAP_STATS - swapctl(2) takes a struct ent * in (void *arg) and writes
- * misc or fewer (to zero) entries of configured swap devices,
- * and returns the number of entries written or -1 on error.
- * SWAP_ON - swapctl(2) takes a (char *) in arg to be the pathname of a
- * device or file to begin swapping on, with it's priority in
- * misc, returning 0 on success and -1 on error.
- * SWAP_OFF - swapctl(2) takes a (char *) n arg to be the pathname of a
- * device or file to stop swapping on. returning 0 or -1.
- * XXX unwritten.
- * SWAP_CTL - swapctl(2) changes the priority of a swap device, using the
- * misc value.
- */
-
-#ifdef SWAPDEBUG
-#define STATIC
-#define VMSDB_SWON 0x0001
-#define VMSDB_SWOFF 0x0002
-#define VMSDB_SWINIT 0x0004
-#define VMSDB_SWALLOC 0x0008
-#define VMSDB_SWFLOW 0x0010
-#define VMSDB_INFO 0x0020
-int vmswapdebug = 0;
-int vmswap_domount = 1;
-
-#define DPRINTF(f, m) do { \
- if (vmswapdebug & (f)) \
- printf m; \
-} while(0)
-#else
-#define STATIC static
-#define DPRINTF(f, m)
-#endif
-
-#define SWAP_TO_FILES
-
-struct swapdev {
- struct swapent swd_se;
-#define swd_dev swd_se.se_dev
-#define swd_flags swd_se.se_flags
-#define swd_nblks swd_se.se_nblks
-#define swd_inuse swd_se.se_inuse
-#define swd_priority swd_se.se_priority
-#define swd_path swd_se.se_path
- daddr_t swd_mapoffset;
- int swd_mapsize;
- struct extent *swd_ex;
- struct vnode *swd_vp;
- CIRCLEQ_ENTRY(swapdev) swd_next;
-
-#ifdef SWAP_TO_FILES
- int swd_bsize;
- int swd_maxactive;
- struct buf swd_tab;
- struct ucred *swd_cred;
-#endif
-};
-
-/*
- * Swap device priority entry; the list is kept sorted on `spi_priority'.
- */
-struct swappri {
- int spi_priority;
- CIRCLEQ_HEAD(spi_swapdev, swapdev) spi_swapdev;
- LIST_ENTRY(swappri) spi_swappri;
-};
-
-
-
-
-/*
- * The following two structures are used to keep track of data transfers
- * on swap devices associated with regular files.
- * NOTE: this code is more or less a copy of vnd.c; we use the same
- * structure names here to ease porting..
- */
-
-
-struct vndxfer {
- struct buf *vx_bp; /* Pointer to parent buffer */
- struct swapdev *vx_sdp;
- int vx_error;
- int vx_pending; /* # of pending aux buffers */
- int vx_flags;
-#define VX_BUSY 1
-#define VX_DEAD 2
-};
-
-
-struct vndbuf {
- struct buf vb_buf;
- struct vndxfer *vb_xfer;
-};
-
-/* To get from a buffer to the encapsulating vndbuf */
-#define BUF_TO_VNDBUF(bp) \
- ((struct vndbuf *)((long)bp - ((long)&((struct vndbuf *)0)->vb_buf)))
-
-/* vnd macro stuff, rewritten to use malloc()/free() */
-#define getvndxfer() \
- (struct vndxfer *)malloc(sizeof(struct vndxfer), M_VMSWAP, M_WAITOK);
-
-#define putvndxfer(vnx) \
- free(vnx, M_VMSWAP)
-
-#define getvndbuf() \
- (struct vndbuf *)malloc(sizeof(struct vndbuf), M_VMSWAP, M_WAITOK);
-
-#define putvndbuf(vbp) \
- free(vbp, M_VMSWAP)
-
-
-int nswapdev;
-int swflags;
-struct extent *swapmap;
-LIST_HEAD(swap_priority, swappri) swap_priority;
-
-STATIC int swap_on __P((struct proc *, struct swapdev *));
-#ifdef SWAP_OFF_WORKS
-STATIC int swap_off __P((struct proc *, struct swapdev *));
-#endif
-STATIC struct swapdev *swap_getsdpfromaddr __P((daddr_t));
-STATIC void swap_addmap __P((struct swapdev *, int));
-
-#ifdef SWAP_TO_FILES
-STATIC void sw_reg_strategy __P((struct swapdev *, struct buf *, int));
-STATIC void sw_reg_iodone __P((struct buf *));
-STATIC void sw_reg_start __P((struct swapdev *));
-#endif
-
-STATIC void insert_swapdev __P((struct swapdev *, int));
-STATIC struct swapdev *find_swapdev __P((struct vnode *, int));
-STATIC void swaplist_trim __P((void));
-
-STATIC void swapmount __P((void));
-
-/*
- * We use two locks to protect the swap device lists.
- * The long-term lock is used only used to prevent races in
- * concurrently executing swapctl(2) system calls.
- */
-struct simplelock swaplist_lock;
-struct lock swaplist_change_lock;
-
-/*
- * Insert a swap device on the priority list.
- */
-void
-insert_swapdev(sdp, priority)
- struct swapdev *sdp;
- int priority;
-{
- struct swappri *spp, *pspp;
-
-again:
- simple_lock(&swaplist_lock);
-
- /*
- * Find entry at or after which to insert the new device.
- */
- for (pspp = NULL, spp = swap_priority.lh_first; spp != NULL;
- spp = spp->spi_swappri.le_next) {
- if (priority <= spp->spi_priority)
- break;
- pspp = spp;
- }
-
- if (spp == NULL || spp->spi_priority != priority) {
- spp = (struct swappri *)
- malloc(sizeof *spp, M_VMSWAP, M_NOWAIT);
-
- if (spp == NULL) {
- simple_unlock(&swaplist_lock);
- tsleep((caddr_t)&lbolt, PSWP, "memory", 0);
- goto again;
- }
- DPRINTF(VMSDB_SWFLOW,
- ("sw: had to create a new swappri = %d\n", priority));
-
- spp->spi_priority = priority;
- CIRCLEQ_INIT(&spp->spi_swapdev);
-
- if (pspp)
- LIST_INSERT_AFTER(pspp, spp, spi_swappri);
- else
- LIST_INSERT_HEAD(&swap_priority, spp, spi_swappri);
-
- }
- /* Onto priority list */
- CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next);
- sdp->swd_priority = priority;
- simple_unlock(&swaplist_lock);
-}
-
-/*
- * Find and optionally remove a swap device from the priority list.
- */
-struct swapdev *
-find_swapdev(vp, remove)
- struct vnode *vp;
- int remove;
-{
- struct swapdev *sdp;
- struct swappri *spp;
-
- simple_lock(&swaplist_lock);
- for (spp = swap_priority.lh_first; spp != NULL;
- spp = spp->spi_swappri.le_next) {
- for (sdp = spp->spi_swapdev.cqh_first;
- sdp != (void *)&spp->spi_swapdev;
- sdp = sdp->swd_next.cqe_next)
- if (sdp->swd_vp == vp) {
- if (remove)
- CIRCLEQ_REMOVE(&spp->spi_swapdev, sdp,
- swd_next);
- simple_unlock(&swaplist_lock);
- return (sdp);
- }
- }
- simple_unlock(&swaplist_lock);
- return (NULL);
-}
-
-/*
- * Scan priority list for empty priority entries.
- */
-void
-swaplist_trim()
-{
- struct swappri *spp;
-
- simple_lock(&swaplist_lock);
-restart:
- for (spp = swap_priority.lh_first; spp != NULL;
- spp = spp->spi_swappri.le_next) {
- if (spp->spi_swapdev.cqh_first != (void *)&spp->spi_swapdev)
- continue;
- LIST_REMOVE(spp, spi_swappri);
- free((caddr_t)spp, M_VMSWAP);
- goto restart;
- }
- simple_unlock(&swaplist_lock);
-}
-
-int
-sys_swapctl(p, v, retval)
- struct proc *p;
- void *v;
- register_t *retval;
-{
- struct sys_swapctl_args /* {
- syscallarg(int) cmd;
- syscallarg(const void *) arg;
- syscallarg(int) misc;
- } */ *uap = (struct sys_swapctl_args *)v;
- struct vnode *vp;
- struct nameidata nd;
- struct swappri *spp;
- struct swapdev *sdp;
- struct swapent *sep;
- char userpath[MAXPATHLEN];
- int count, error, misc;
- size_t len;
- int priority;
-
- misc = SCARG(uap, misc);
-
- DPRINTF(VMSDB_SWFLOW, ("entering sys_swapctl\n"));
-
- /* how many swap devices */
- if (SCARG(uap, cmd) == SWAP_NSWAP) {
- DPRINTF(VMSDB_SWFLOW,("did SWAP_NSWAP: leaving sys_swapctl\n"));
- *retval = nswapdev;
- return (0);
- }
-
- /* stats on the swap devices. */
- if (SCARG(uap, cmd) == SWAP_STATS) {
- sep = (struct swapent *)SCARG(uap, arg);
- count = 0;
-
- error = lockmgr(&swaplist_change_lock, LK_SHARED, (void *)0, p);
- if (error)
- return (error);
- for (spp = swap_priority.lh_first; spp != NULL;
- spp = spp->spi_swappri.le_next) {
- for (sdp = spp->spi_swapdev.cqh_first;
- sdp != (void *)&spp->spi_swapdev && misc-- > 0;
- sdp = sdp->swd_next.cqe_next, sep++, count++) {
- /*
- * We do not do NetBSD 1.3 compat call.
- */
- error = copyout((caddr_t)&sdp->swd_se,
- (caddr_t)sep, sizeof(struct swapent));
-
- if (error)
- goto out;
- }
- }
-out:
- (void)lockmgr(&swaplist_change_lock, LK_RELEASE, (void *)0, p);
- if (error)
- return (error);
-
- DPRINTF(VMSDB_SWFLOW,("did SWAP_STATS: leaving sys_swapctl\n"));
-
- *retval = count;
- return (0);
- }
- if ((error = suser(p->p_ucred, &p->p_acflag)))
- return (error);
-
- if (SCARG(uap, arg) == NULL) {
- /* XXX - interface - arg==NULL: miniroot */
- vp = rootvp;
- if (vget(vp, LK_EXCLUSIVE, p))
- return (EBUSY);
- if (SCARG(uap, cmd) == SWAP_ON &&
- copystr("miniroot", userpath, sizeof userpath, &len))
- panic("swapctl: miniroot copy failed");
- } else {
- int space;
- char *where;
-
- if (SCARG(uap, cmd) == SWAP_ON) {
- if ((error = copyinstr(SCARG(uap, arg), userpath,
- sizeof userpath, &len)))
- return (error);
- space = UIO_SYSSPACE;
- where = userpath;
- } else {
- space = UIO_USERSPACE;
- where = (char *)SCARG(uap, arg);
- }
- NDINIT(&nd, LOOKUP, FOLLOW|LOCKLEAF, space, where, p);
- if ((error = namei(&nd)))
- return (error);
-
- vp = nd.ni_vp;
- }
-
- error = lockmgr(&swaplist_change_lock, LK_EXCLUSIVE, (void *)0, p);
- if (error)
- goto bad2;
-
- switch(SCARG(uap, cmd)) {
- case SWAP_CTL:
- priority = SCARG(uap, misc);
- if ((sdp = find_swapdev(vp, 1)) == NULL) {
- error = ENOENT;
- break;
- }
- insert_swapdev(sdp, priority);
- swaplist_trim();
- break;
-
- case SWAP_ON:
- priority = SCARG(uap, misc);
-
- /* Check for duplicates */
- if ((sdp = find_swapdev(vp, 0)) != NULL) {
- if (!bcmp(sdp->swd_path, "swap_device", 12)) {
- copystr(userpath, sdp->swd_path, len, 0);
- error = 0;
- } else
- error = EBUSY;
- goto bad;
- }
-
- sdp = (struct swapdev *)
- malloc(sizeof *sdp, M_VMSWAP, M_WAITOK);
- bzero(sdp, sizeof(*sdp));
-
- sdp->swd_vp = vp;
- sdp->swd_dev = (vp->v_type == VBLK) ? vp->v_rdev : NODEV;
-
- if ((error = swap_on(p, sdp)) != 0) {
- free((caddr_t)sdp, M_VMSWAP);
- break;
- }
-#ifdef SWAP_TO_FILES
- /*
- * XXX Is NFS elaboration necessary?
- */
- if (vp->v_type == VREG)
- sdp->swd_cred = crdup(p->p_ucred);
-#endif
- if (copystr(userpath, sdp->swd_path, len, 0) != 0)
- panic("swapctl: copystr");
- insert_swapdev(sdp, priority);
-
- /* Keep reference to vnode */
- vref(vp);
- break;
-
- case SWAP_OFF:
- DPRINTF(VMSDB_SWFLOW, ("doing SWAP_OFF...\n"));
-#ifdef SWAP_OFF_WORKS
- if ((sdp = find_swapdev(vp, 0)) == NULL) {
- error = ENXIO;
- break;
- }
- /*
- * If a device isn't in use or enabled, we
- * can't stop swapping from it (again).
- */
- if ((sdp->swd_flags &
- (SWF_INUSE|SWF_ENABLE)) == 0) {
- error = EBUSY;
- goto bad;
- }
- if ((error = swap_off(p, sdp)) != 0)
- goto bad;
-
- /* Find again and remove this time */
- if ((sdp = find_swapdev(vp, 1)) == NULL) {
- error = ENXIO;
- break;
- }
- free((caddr_t)sdp, M_VMSWAP);
-#else
- error = ENODEV;
-#endif
- break;
-
- default:
- DPRINTF(VMSDB_SWFLOW,
- ("unhandled command: %x\n", SCARG(uap, cmd)));
- error = EINVAL;
- }
-
-bad:
- (void)lockmgr(&swaplist_change_lock, LK_RELEASE, (void *)0, p);
-bad2:
- vput(vp);
-
- DPRINTF(VMSDB_SWFLOW, ("leaving sys_swapctl: error %d\n", error));
- return (error);
-}
-
-/*
- * swap_on() attempts to begin swapping on a swapdev. we check that this
- * device is OK to swap from, miss the start of any disk (to avoid any
- * disk labels that may exist).
- */
-STATIC int
-swap_on(p, sdp)
- struct proc *p;
- struct swapdev *sdp;
-{
- static int count = 0;
- struct vnode *vp = sdp->swd_vp;
- int error, nblks, size;
- long addr;
- char *storage;
- int storagesize;
-#ifdef SWAP_TO_FILES
- struct vattr va;
-#endif
-#ifdef NFSCLIENT
- extern int (**nfsv2_vnodeop_p) __P((void *));
-#endif /* NFSCLIENT */
- dev_t dev = sdp->swd_dev;
- char *name;
-
-
- /* If root on swap, then the skip open/close operations. */
- if (vp != rootvp) {
- if ((error = VOP_OPEN(vp, FREAD|FWRITE, p->p_ucred, p)))
- return (error);
- vp->v_writecount++;
- }
-
- DPRINTF(VMSDB_INFO,
- ("swap_on: dev = %d, major(dev) = %d\n", dev, major(dev)));
-
- switch (vp->v_type) {
- case VBLK:
- if (bdevsw[major(dev)].d_psize == 0 ||
- (nblks = (*bdevsw[major(dev)].d_psize)(dev)) == -1) {
- error = ENXIO;
- goto bad;
- }
- break;
-
-#ifdef SWAP_TO_FILES
- case VREG:
- if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)))
- goto bad;
- nblks = (int)btodb(va.va_size);
- if ((error =
- VFS_STATFS(vp->v_mount, &vp->v_mount->mnt_stat, p)) != 0)
- goto bad;
-
- sdp->swd_bsize = vp->v_mount->mnt_stat.f_iosize;
-#ifdef NFSCLIENT
- if (vp->v_op == nfsv2_vnodeop_p)
- sdp->swd_maxactive = 2; /* XXX */
- else
-#endif /* NFSCLIENT */
- sdp->swd_maxactive = 8; /* XXX */
- break;
-#endif
-
- default:
- error = ENXIO;
- goto bad;
- }
- if (nblks == 0) {
- DPRINTF(VMSDB_SWFLOW, ("swap_on: nblks == 0\n"));
- error = EINVAL;
- goto bad;
- }
-
- sdp->swd_flags |= SWF_INUSE;
- sdp->swd_nblks = nblks;
-
- /*
- * skip over first cluster of a device in case of labels or
- * boot blocks.
- */
- if (vp->v_type == VBLK) {
- size = (int)(nblks - ctod(1));
- addr = (long)ctod(1);
- } else {
- size = (int)nblks;
- addr = (long)0;
- }
-
- DPRINTF(VMSDB_SWON,
- ("swap_on: dev %x: size %d, addr %ld\n", dev, size, addr));
-
- name = malloc(12, M_VMSWAP, M_WAITOK);
- sprintf(name, "swap0x%04x", count++);
- /* XXX make this based on ram as well. */
- storagesize = EXTENT_FIXED_STORAGE_SIZE(maxproc * 2);
- storage = malloc(storagesize, M_VMSWAP, M_WAITOK);
- sdp->swd_ex = extent_create(name, 0, nblks, M_VMSWAP,
- storage, storagesize, EX_WAITOK);
- if (addr) {
- if (extent_alloc_region(sdp->swd_ex, 0, addr, EX_WAITOK))
- panic("disklabel region");
- sdp->swd_inuse += addr;
- }
-
-
- if (vp == rootvp) {
- struct mount *mp;
- struct statfs *sp;
- int rootblks;
-
- /* Get size from root FS (mountroot did statfs) */
- mp = rootvnode->v_mount;
- sp = &mp->mnt_stat;
- rootblks = sp->f_blocks * (sp->f_bsize / DEV_BSIZE);
- if (rootblks > nblks)
- panic("miniroot size");
-
- if (extent_alloc_region(sdp->swd_ex, addr, rootblks, EX_WAITOK))
- panic("miniroot region");
-
- printf("Preserved %d blocks, leaving %d pages of swap\n",
- rootblks, dtoc(size - rootblks));
- }
-
- swap_addmap(sdp, size);
- nswapdev++;
- sdp->swd_flags |= SWF_ENABLE;
- return (0);
-
-bad:
- if (vp != rootvp) {
- vp->v_writecount--;
- (void)VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p);
- }
- return (error);
-}
-
-#ifdef SWAP_OFF_WORKS
-STATIC int
-swap_off(p, sdp)
- struct proc *p;
- struct swapdev *sdp;
-{
- char *name;
-
- /* turn off the enable flag */
- sdp->swd_flags &= ~SWF_ENABLE;
-
- DPRINTF(VMSDB_SWOFF, ("swap_off: %x\n", sdp->swd_dev));
-
- /*
- * XXX write me
- *
- * the idea is to find out which processes are using this swap
- * device, and page them all in.
- *
- * eventually, we should try to move them out to other swap areas
- * if available.
- *
- * The alternative is to create a redirection map for this swap
- * device. This should work by moving all the pages of data from
- * the ex-swap device to another one, and making an entry in the
- * redirection map for it. locking is going to be important for
- * this!
- *
- * There might be an easier way to do a "soft" swapoff. First
- * we mark the particular swap partition as not desirable anymore.
- * Then we use the pager to page a couple of pages in, each time
- * it has the memory, and the chance to do so. Thereby moving pages
- * back into memory. Once they are in memory, when they get paged
- * out again, they do not go back onto the "undesirable" device
- * anymore, but to good devices. This might take longer, but it
- * can certainly work. If need be, the user process can sleep on
- * the particular sdp entry, and the swapper can then wake him up
- * when everything is done.
- */
-
- /* until the above code is written, we must ENODEV */
- return ENODEV;
-
- extent_free(swapmap, sdp->swd_mapoffset, sdp->swd_mapsize, EX_WAITOK);
- nswapdev--;
- name = sdp->swd_ex->ex_name;
- extent_destroy(sdp->swd_ex);
- free(name, M_VMSWAP);
- free((caddr_t)sdp->swd_ex, M_VMSWAP);
- if (sdp->swp_vp != rootvp) {
- vp->v_writecount--;
- (void) VOP_CLOSE(sdp->swd_vp, FREAD|FWRITE, p->p_ucred, p);
- }
- if (sdp->swd_vp)
- vrele(sdp->swd_vp);
- free((caddr_t)sdp, M_VMSWAP);
- return (0);
-}
-#endif
-
-/*
- * To decide where to allocate what part of swap, we must "round robin"
- * the swap devices in swap_priority of the same priority until they are
- * full. we do this with a list of swap priorities that have circle
- * queues of swapdevs.
- *
- * The following functions control allocation and freeing of part of the
- * swap area. you call swap_alloc() with a size and it returns an address.
- * later you call swap_free() and it frees the use of that swap area.
- *
- * daddr_t swap_alloc(int size);
- * void swap_free(int size, daddr_t addr);
- */
-
-daddr_t
-swap_alloc(size)
- int size;
-{
- struct swapdev *sdp;
- struct swappri *spp;
- u_long result;
-
- if (nswapdev < 1)
- return 0;
-
- simple_lock(&swaplist_lock);
- for (spp = swap_priority.lh_first; spp != NULL;
- spp = spp->spi_swappri.le_next) {
- for (sdp = spp->spi_swapdev.cqh_first;
- sdp != (void *)&spp->spi_swapdev;
- sdp = sdp->swd_next.cqe_next) {
- /* if it's not enabled, then we can't swap from it */
- if ((sdp->swd_flags & SWF_ENABLE) == 0 ||
- /* XXX IS THIS CORRECT ? */
-#if 1
- (sdp->swd_inuse + size > sdp->swd_nblks) ||
-#endif
- extent_alloc(sdp->swd_ex, size, EX_NOALIGN,
- EX_NOBOUNDARY, EX_MALLOCOK|EX_NOWAIT,
- &result) != 0) {
- continue;
- }
- CIRCLEQ_REMOVE(&spp->spi_swapdev, sdp, swd_next);
- CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next);
- sdp->swd_inuse += size;
- simple_unlock(&swaplist_lock);
- return (daddr_t)(result + sdp->swd_mapoffset);
- }
- }
- simple_unlock(&swaplist_lock);
- return 0;
-}
-
-void
-swap_free(size, addr)
- int size;
- daddr_t addr;
-{
- struct swapdev *sdp = swap_getsdpfromaddr(addr);
-
-#ifdef DIAGNOSTIC
- if (sdp == NULL)
- panic("swap_free: unmapped address\n");
- if (nswapdev < 1)
- panic("swap_free: nswapdev < 1\n");
-#endif
- extent_free(sdp->swd_ex, addr - sdp->swd_mapoffset, size,
- EX_MALLOCOK|EX_NOWAIT);
- sdp->swd_inuse -= size;
-#ifdef DIAGNOSTIC
- if (sdp->swd_inuse < 0)
- panic("swap_free: inuse < 0");
-#endif
-}
-
-/*
- * We have a physical -> virtual mapping to address here. There are several
- * different physical address spaces (one for each swap partition) that are
- * to be mapped onto a single virtual address space.
- */
-#define ADDR_IN_MAP(addr, sdp) \
- (((addr) >= (sdp)->swd_mapoffset) && \
- ((addr) < ((sdp)->swd_mapoffset + (sdp)->swd_mapsize)))
-
-struct swapdev *
-swap_getsdpfromaddr(addr)
- daddr_t addr;
-{
- struct swapdev *sdp;
- struct swappri *spp;
-
- simple_lock(&swaplist_lock);
- for (spp = swap_priority.lh_first; spp != NULL;
- spp = spp->spi_swappri.le_next)
- for (sdp = spp->spi_swapdev.cqh_first;
- sdp != (void *)&spp->spi_swapdev;
- sdp = sdp->swd_next.cqe_next)
- if (ADDR_IN_MAP(addr, sdp)) {
- simple_unlock(&swaplist_lock);
- return sdp;
- }
- simple_unlock(&swaplist_lock);
- return NULL;
-}
-
-void
-swap_addmap(sdp, size)
- struct swapdev *sdp;
- int size;
-{
- u_long result;
-
- if (extent_alloc(swapmap, size, EX_NOALIGN, EX_NOBOUNDARY,
- EX_WAITOK, &result))
- panic("swap_addmap");
-
- sdp->swd_mapoffset = result;
- sdp->swd_mapsize = size;
-}
-
-/*ARGSUSED*/
-int
-swread(dev, uio, ioflag)
- dev_t dev;
- struct uio *uio;
- int ioflag;
-{
-
- return (physio(swstrategy, NULL, dev, B_READ, minphys, uio));
-}
-
-/*ARGSUSED*/
-int
-swwrite(dev, uio, ioflag)
- dev_t dev;
- struct uio *uio;
- int ioflag;
-{
-
- return (physio(swstrategy, NULL, dev, B_WRITE, minphys, uio));
-}
-
-void
-swstrategy(bp)
- struct buf *bp;
-{
- struct swapdev *sdp;
- daddr_t bn;
- int s;
-
- bn = bp->b_blkno;
- sdp = swap_getsdpfromaddr(bn);
- if (sdp == NULL) {
- bp->b_error = EINVAL;
- bp->b_flags |= B_ERROR;
- biodone(bp);
- return;
- }
-
- bn -= sdp->swd_mapoffset;
-
- DPRINTF(VMSDB_SWFLOW,
- ("swstrategy(%s): mapoff %x, bn %x, bcount %ld\n",
- ((bp->b_flags & B_READ) == 0) ? "write" : "read",
- sdp->swd_mapoffset, bn, bp->b_bcount));
-
- switch (sdp->swd_vp->v_type) {
- default:
- panic("swstrategy: vnode type %x", sdp->swd_vp->v_type);
- case VBLK:
- s = splbio();
- buf_replacevnode(bp, sdp->swd_vp);
- bp->b_blkno = bn + ctod(1);
- splx(s);
- VOP_STRATEGY(bp);
- return;
-#ifdef SWAP_TO_FILES
- case VREG:
- sw_reg_strategy(sdp, bp, bn);
- return;
-#endif
- }
- /* NOTREACHED */
-}
-
-#ifdef SWAP_TO_FILES
-
-STATIC void
-sw_reg_strategy(sdp, bp, bn)
- struct swapdev *sdp;
- struct buf *bp;
- int bn;
-{
- struct vnode *vp;
- struct vndxfer *vnx;
- daddr_t nbn;
- caddr_t addr;
- int s, off, nra, error, sz, resid;
-
- /*
- * Translate the device logical block numbers into physical
- * block numbers of the underlying filesystem device.
- */
- bp->b_resid = bp->b_bcount;
- addr = bp->b_data;
- bn = dbtob(bn);
-
- /* Allocate a header for this transfer and link it to the buffer */
- vnx = getvndxfer();
- vnx->vx_flags = VX_BUSY;
- vnx->vx_error = 0;
- vnx->vx_pending = 0;
- vnx->vx_bp = bp;
- vnx->vx_sdp = sdp;
-
- error = 0;
- for (resid = bp->b_resid; resid; resid -= sz) {
- struct vndbuf *nbp;
-
- nra = 0;
- error = VOP_BMAP(sdp->swd_vp, bn / sdp->swd_bsize,
- &vp, &nbn, &nra);
-
- if (error == 0 && (long)nbn == -1)
- error = EIO;
-
- /*
- * If there was an error or a hole in the file...punt.
- * Note that we may have to wait for any operations
- * that we have already fired off before releasing
- * the buffer.
- *
- * XXX we could deal with holes here but it would be
- * a hassle (in the write case).
- */
- if (error) {
- s = splbio();
- vnx->vx_error = error;
- goto out;
- }
-
- if ((off = bn % sdp->swd_bsize) != 0)
- sz = sdp->swd_bsize - off;
- else
- sz = (1 + nra) * sdp->swd_bsize;
-
- if (resid < sz)
- sz = resid;
-
- DPRINTF(VMSDB_SWFLOW,
- ("sw_reg_strategy: vp %p/%p bn 0x%x/0x%x"
- " sz 0x%x\n", sdp->swd_vp, vp, bn, nbn, sz));
-
- nbp = getvndbuf();
- nbp->vb_buf.b_flags = bp->b_flags | B_NOCACHE | B_CALL;
- nbp->vb_buf.b_bcount = sz;
- nbp->vb_buf.b_bufsize = bp->b_bufsize;
- nbp->vb_buf.b_error = 0;
- nbp->vb_buf.b_data = addr;
- nbp->vb_buf.b_blkno = nbn + btodb(off);
- nbp->vb_buf.b_proc = bp->b_proc;
- nbp->vb_buf.b_iodone = sw_reg_iodone;
- nbp->vb_buf.b_vp = NULLVP;
- nbp->vb_buf.b_rcred = sdp->swd_cred;
- nbp->vb_buf.b_wcred = sdp->swd_cred;
- if (bp->b_dirtyend == 0) {
- nbp->vb_buf.b_dirtyoff = 0;
- nbp->vb_buf.b_dirtyend = sz;
- } else {
- nbp->vb_buf.b_dirtyoff =
- max(0, bp->b_dirtyoff - (bp->b_bcount-resid));
- nbp->vb_buf.b_dirtyend =
- min(sz,
- max(0, bp->b_dirtyend - (bp->b_bcount-resid)));
- }
- if (bp->b_validend == 0) {
- nbp->vb_buf.b_validoff = 0;
- nbp->vb_buf.b_validend = sz;
- } else {
- nbp->vb_buf.b_validoff =
- max(0, bp->b_validoff - (bp->b_bcount-resid));
- nbp->vb_buf.b_validend =
- min(sz,
- max(0, bp->b_validend - (bp->b_bcount-resid)));
- }
-
- nbp->vb_xfer = vnx;
-
- /*
- * Just sort by block number
- */
- nbp->vb_buf.b_cylinder = nbp->vb_buf.b_blkno;
- s = splbio();
- if (vnx->vx_error != 0) {
- putvndbuf(nbp);
- goto out;
- }
- vnx->vx_pending++;
- bgetvp(vp, &nbp->vb_buf);
- disksort(&sdp->swd_tab, &nbp->vb_buf);
- sw_reg_start(sdp);
- splx(s);
-
- bn += sz;
- addr += sz;
- }
-
- s = splbio();
-
-out: /* Arrive here at splbio */
- vnx->vx_flags &= ~VX_BUSY;
- if (vnx->vx_pending == 0) {
- if (vnx->vx_error != 0) {
- bp->b_error = vnx->vx_error;
- bp->b_flags |= B_ERROR;
- }
- putvndxfer(vnx);
- biodone(bp);
- }
- splx(s);
-}
-
-/*
- * Feed requests sequentially.
- * We do it this way to keep from flooding NFS servers if we are connected
- * to an NFS file. This places the burden on the client rather than the
- * server.
- */
-STATIC void
-sw_reg_start(sdp)
- struct swapdev *sdp;
-{
- struct buf *bp;
-
- if ((sdp->swd_flags & SWF_BUSY) != 0)
- /* Recursion control */
- return;
-
- sdp->swd_flags |= SWF_BUSY;
-
- while (sdp->swd_tab.b_active < sdp->swd_maxactive) {
- bp = sdp->swd_tab.b_actf;
- if (bp == NULL)
- break;
- sdp->swd_tab.b_actf = bp->b_actf;
- sdp->swd_tab.b_active++;
-
- DPRINTF(VMSDB_SWFLOW,
- ("sw_reg_start: bp %p vp %p blkno %x addr %p cnt %lx\n",
- bp, bp->b_vp, bp->b_blkno,bp->b_data, bp->b_bcount));
-
- if ((bp->b_flags & B_READ) == 0)
- bp->b_vp->v_numoutput++;
- VOP_STRATEGY(bp);
- }
- sdp->swd_flags &= ~SWF_BUSY;
-}
-
-STATIC void
-sw_reg_iodone(bp)
- struct buf *bp;
-{
- register struct vndbuf *vbp = BUF_TO_VNDBUF(bp);
- register struct vndxfer *vnx = (struct vndxfer *)vbp->vb_xfer;
- register struct buf *pbp = vnx->vx_bp;
- struct swapdev *sdp = vnx->vx_sdp;
- int s, resid;
-
- DPRINTF(VMSDB_SWFLOW,
- ("sw_reg_iodone: vbp %p vp %p blkno %x addr %p "
- "cnt %lx(%lx)\n",
- vbp, vbp->vb_buf.b_vp, vbp->vb_buf.b_blkno,
- vbp->vb_buf.b_data, vbp->vb_buf.b_bcount,
- vbp->vb_buf.b_resid));
-
- s = splbio();
- resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid;
- pbp->b_resid -= resid;
- vnx->vx_pending--;
-
- if (vbp->vb_buf.b_error) {
- DPRINTF(VMSDB_INFO, ("sw_reg_iodone: vbp %p error %d\n", vbp,
- vbp->vb_buf.b_error));
-
- vnx->vx_error = vbp->vb_buf.b_error;
- }
-
- if (vbp->vb_buf.b_vp != NULLVP)
- brelvp(&vbp->vb_buf);
-
- putvndbuf(vbp);
-
- /*
- * Wrap up this transaction if it has run to completion or, in
- * case of an error, when all auxiliary buffers have returned.
- */
- if (vnx->vx_error != 0) {
- pbp->b_flags |= B_ERROR;
- pbp->b_error = vnx->vx_error;
- if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) {
-
- DPRINTF(VMSDB_SWFLOW,
- ("swiodone: pbp %p iodone: error %d\n",
- pbp, vnx->vx_error));
- putvndxfer(vnx);
- biodone(pbp);
- }
- } else if (pbp->b_resid == 0) {
-
-#ifdef DIAGNOSTIC
- if (vnx->vx_pending != 0)
- panic("swiodone: vnx pending: %d", vnx->vx_pending);
-#endif
-
- if ((vnx->vx_flags & VX_BUSY) == 0) {
- DPRINTF(VMSDB_SWFLOW,
- ("swiodone: pbp %p iodone\n", pbp));
- putvndxfer(vnx);
- biodone(pbp);
- }
- }
-
- sdp->swd_tab.b_active--;
- sw_reg_start(sdp);
-
- splx(s);
-}
-#endif /* SWAP_TO_FILES */
-
-void
-swapinit()
-{
- struct buf *sp = swbuf;
- struct proc *p = &proc0; /* XXX */
- int i;
-
- DPRINTF(VMSDB_SWINIT, ("swapinit\n"));
-
- nswapdev = 0;
- if (bdevvp(swapdev, &swapdev_vp))
- panic("swapinit: can not setup swapdev_vp");
-
- simple_lock_init(&swaplist_lock);
- lockinit(&swaplist_change_lock, PSWP, "swap change", 0, 0);
- LIST_INIT(&swap_priority);
-
- /*
- * Create swap block resource map. The range [1..INT_MAX] allows
- * for a grand total of 2 gigablocks of swap resource.
- * (start at 1 because "block #0" will be interpreted as
- * an allocation failure).
- */
- swapmap = extent_create("swapmap", 1, INT_MAX,
- M_VMSWAP, 0, 0, EX_WAITOK);
- if (swapmap == 0)
- panic("swapinit: extent_create failed");
-
- /*
- * Now set up swap buffer headers.
- */
- bswlist.b_actf = sp;
- for (i = 0; i < nswbuf - 1; i++, sp++) {
- sp->b_actf = sp + 1;
- sp->b_rcred = sp->b_wcred = p->p_ucred;
- sp->b_vnbufs.le_next = NOLIST;
- }
- sp->b_rcred = sp->b_wcred = p->p_ucred;
- sp->b_vnbufs.le_next = NOLIST;
- sp->b_actf = NULL;
-
- /* Mount primary swap if available */
-#ifdef SWAPDEBUG
- if(vmswap_domount)
-#endif
- swapmount();
-
- DPRINTF(VMSDB_SWINIT, ("leaving swapinit\n"));
-}
-
-/*
- * Mount the primary swap device pointed to by 'swdevt[0]'.
- */
-STATIC void
-swapmount()
-{
- extern int getdevvp(dev_t, struct vnode **, enum vtype);
- struct swapdev *sdp;
- struct vnode *vp = NULL;
- struct proc *p = curproc;
- dev_t swap_dev = swdevt[0].sw_dev;
-
- /* Make sure we have a device */
- if (swap_dev == NODEV) {
- printf("swapmount: No swap device!\n");
- return;
- }
-
- /* Malloc needed things */
- sdp = (struct swapdev *)malloc(sizeof *sdp, M_VMSWAP, M_WAITOK);
- bzero(sdp, sizeof(*sdp));
-
- /* Do swap_on() stuff */
- if(bdevvp(swap_dev, &vp)){
- printf("swapmount: bdevvp() failed\n");
- return;
- }
-
-#ifdef SWAPDEBUG
- vprint("swapmount", vp);
-#endif
-
- sdp->swd_vp = vp;
- sdp->swd_dev = (vp->v_type == VBLK) ? vp->v_rdev : NODEV;
- if(copystr("swap_device", sdp->swd_path, sizeof sdp->swd_path, 0) != 0){
- printf("swapmount: copystr() failed\n");
- return;
- }
-
- /* Look for a swap device */
- if (swap_on(p, sdp) != 0) {
- free((caddr_t)sdp, M_VMSWAP);
- return;
- }
-
-#ifdef SWAP_TO_FILES
- /*
- * XXX Is NFS elaboration necessary?
- */
- if (vp->v_type == VREG)
- sdp->swd_cred = crdup(p->p_ucred);
-#endif
- insert_swapdev(sdp, 0);
-}
diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c
deleted file mode 100644
index 4a35e4e3482..00000000000
--- a/sys/vm/vm_unix.c
+++ /dev/null
@@ -1,254 +0,0 @@
-/* $OpenBSD: vm_unix.c,v 1.10 2001/05/05 20:57:04 art Exp $ */
-/* $NetBSD: vm_unix.c,v 1.19 1996/02/10 00:08:14 christos Exp $ */
-
-/*
- * Copyright (c) 1988 University of Utah.
- * Copyright (c) 1991, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * the Systems Programming Group of the University of Utah Computer
- * Science Department.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: Utah $Hdr: vm_unix.c 1.1 89/11/07$
- *
- * @(#)vm_unix.c 8.2 (Berkeley) 1/9/95
- */
-
-/*
- * Traditional sbrk/grow interface to VM
- */
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/proc.h>
-#include <sys/resourcevar.h>
-#include <sys/vnode.h>
-#include <sys/core.h>
-
-#include <sys/mount.h>
-#include <sys/syscallargs.h>
-
-#include <vm/vm.h>
-
-/* ARGSUSED */
-int
-sys_obreak(p, v, retval)
- struct proc *p;
- void *v;
- register_t *retval;
-{
- struct sys_obreak_args /* {
- syscallarg(char *) nsize;
- } */ *uap = v;
- register struct vmspace *vm = p->p_vmspace;
- vm_offset_t new, old;
- int rv;
- register int diff;
-
- old = (vm_offset_t)vm->vm_daddr;
- new = (vm_offset_t)SCARG(uap, nsize);
-
- /* Check for overflow, round to page */
- if(round_page(new) < new)
- return(ENOMEM);
- new = round_page(new);
-
- /* Check limit */
- if ((new > old) && ((new - old) > p->p_rlimit[RLIMIT_DATA].rlim_cur))
- return(ENOMEM);
-
- /* Turn the trick */
- old = round_page(old + ctob(vm->vm_dsize));
- diff = new - old;
- if (diff > 0) {
- rv = vm_allocate(&vm->vm_map, &old, diff, FALSE);
- if (rv != KERN_SUCCESS) {
- uprintf("sbrk: grow failed, return = %d\n", rv);
- return(ENOMEM);
- }
- vm->vm_dsize += btoc(diff);
- } else if (diff < 0) {
- diff = -diff;
- rv = vm_deallocate(&vm->vm_map, new, diff);
- if (rv != KERN_SUCCESS) {
- uprintf("sbrk: shrink failed, return = %d\n", rv);
- return(ENOMEM);
- }
- vm->vm_dsize -= btoc(diff);
- }
- return(0);
-}
-
-/*
- * Enlarge the "stack segment" to include the specified
- * stack pointer for the process.
- */
-int
-grow(p, sp)
- struct proc *p;
- vm_offset_t sp;
-{
- register struct vmspace *vm = p->p_vmspace;
- register int si;
-
- /*
- * For user defined stacks (from sendsig).
- */
- if (sp < (vm_offset_t)vm->vm_maxsaddr)
- return (0);
- /*
- * For common case of already allocated (from trap).
- */
- if (sp >= USRSTACK - ctob(vm->vm_ssize))
- return (1);
- /*
- * Really need to check vs limit and increment stack size if ok.
- */
- si = btoc(USRSTACK-sp) - vm->vm_ssize;
- if (vm->vm_ssize + si > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur))
- return (0);
- vm->vm_ssize += si;
- return (1);
-}
-
-/* ARGSUSED */
-int
-sys_ovadvise(p, v, retval)
- struct proc *p;
- void *v;
- register_t *retval;
-{
-#if 0
- struct sys_ovadvise_args /* {
- syscallarg(int) anom;
- } */ *uap = v;
-#endif
-
- return (EINVAL);
-}
-
-int
-vm_coredump(p, vp, cred, chdr)
- struct proc *p;
- struct vnode *vp;
- struct ucred *cred;
- struct core *chdr;
-{
- register struct vmspace *vm = p->p_vmspace;
- register vm_map_t map = &vm->vm_map;
- register vm_map_entry_t entry;
- vm_offset_t start, end;
- struct coreseg cseg;
- off_t offset;
- int flag, error = 0;
-
- if (!map->is_main_map) {
-#ifdef DEBUG
- uprintf(
- "vm_coredump: %s map %p: pmap=%p, ref=%d, nentries=%d, version=%d\n",
- (map->is_main_map ? "Task" : "Share"),
- map, (map->pmap), map->ref_count, map->nentries,
- map->timestamp);
-#endif
- return EIO;
- }
-
- offset = chdr->c_hdrsize + chdr->c_seghdrsize + chdr->c_cpusize;
-
- for (entry = map->header.next; entry != &map->header;
- entry = entry->next) {
-
- if (entry->is_a_map || entry->is_sub_map) {
-#ifdef DEBUG
- uprintf("vm_coredump: entry: share=%p, offset=%p\n",
- entry->object.share_map, entry->offset);
-#endif
- continue;
- }
-
- if (entry->object.vm_object &&
- entry->object.vm_object->pager &&
- entry->object.vm_object->pager->pg_type == PG_DEVICE) {
-#ifdef DEBUG
- printf("vm_coredump: skipping dev @ 0x%lx\n",
- entry->start);
-#endif
- continue;
- }
-
- if (!(entry->protection & VM_PROT_WRITE))
- continue;
-
- start = entry->start;
- end = entry->end;
-
- if (start >= VM_MAXUSER_ADDRESS)
- continue;
-
- if (end > VM_MAXUSER_ADDRESS)
- end = VM_MAXUSER_ADDRESS;
-
- if (start >= (vm_offset_t)vm->vm_maxsaddr) {
- flag = CORE_STACK;
- start = trunc_page(USRSTACK - ctob(vm->vm_ssize));
- if (start >= end)
- continue;
- } else
- flag = CORE_DATA;
-
- /*
- * Set up a new core file segment.
- */
- CORE_SETMAGIC(cseg, CORESEGMAGIC, CORE_GETMID(*chdr), flag);
- cseg.c_addr = start;
- cseg.c_size = end - start;
-
- error = vn_rdwr(UIO_WRITE, vp,
- (caddr_t)&cseg, chdr->c_seghdrsize,
- offset, UIO_SYSSPACE,
- IO_NODELOCKED|IO_UNIT, cred, NULL, p);
- if (error)
- break;
-
- offset += chdr->c_seghdrsize;
- error = vn_rdwr(UIO_WRITE, vp,
- (caddr_t)cseg.c_addr, (int)cseg.c_size,
- offset, UIO_USERSPACE,
- IO_NODELOCKED|IO_UNIT, cred, NULL, p);
- if (error)
- break;
-
- offset += cseg.c_size;
- chdr->c_nseg++;
- }
-
- return error;
-}
diff --git a/sys/vm/vm_user.c b/sys/vm/vm_user.c
deleted file mode 100644
index d717f6204c8..00000000000
--- a/sys/vm/vm_user.c
+++ /dev/null
@@ -1,340 +0,0 @@
-/* $OpenBSD: vm_user.c,v 1.3 1996/04/19 16:10:52 niklas Exp $ */
-/* $NetBSD: vm_user.c,v 1.13 1996/02/28 22:39:16 gwr Exp $ */
-
-/*
- * Copyright (c) 1991, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * The Mach Operating System project at Carnegie-Mellon University.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)vm_user.c 8.2 (Berkeley) 1/12/94
- *
- *
- * Copyright (c) 1987, 1990 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Authors: Avadis Tevanian, Jr., Michael Wayne Young
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-/*
- * User-exported virtual memory functions.
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/proc.h>
-
-#include <vm/vm.h>
-
-simple_lock_data_t vm_alloc_lock; /* XXX */
-
-#ifdef MACHVMCOMPAT
-/*
- * BSD style syscall interfaces to MACH calls
- * All return MACH return values.
- */
-struct svm_allocate_args {
- vm_map_t map;
- vm_offset_t *addr;
- vm_size_t size;
- boolean_t anywhere;
-};
-/* ARGSUSED */
-int
-svm_allocate(p, uap, retval)
- struct proc *p;
- struct svm_allocate_args *uap;
- register_t *retval;
-{
- vm_offset_t addr;
- int rv;
-
- SCARG(uap, map) = p->p_map; /* XXX */
-
- if (copyin((caddr_t)SCARG(uap, addr), (caddr_t)&addr, sizeof (addr)))
- rv = KERN_INVALID_ARGUMENT;
- else
- rv = vm_allocate(SCARG(uap, map), &addr, SCARG(uap, size),
- SCARG(uap, anywhere));
- if (rv == KERN_SUCCESS) {
- if (copyout((caddr_t)&addr, (caddr_t)SCARG(uap, addr),
- sizeof(addr)))
- rv = KERN_INVALID_ARGUMENT;
- }
- return((int)rv);
-}
-
-struct svm_deallocate_args {
- vm_map_t map;
- vm_offset_t addr;
- vm_size_t size;
-};
-/* ARGSUSED */
-int
-svm_deallocate(p, uap, retval)
- struct proc *p;
- struct svm_deallocate_args *uap;
- register_t *retval;
-{
- int rv;
-
- SCARG(uap, map) = p->p_map; /* XXX */
- rv = vm_deallocate(SCARG(uap, map), SCARG(uap, addr), SCARG(uap, size));
- return((int)rv);
-}
-
-struct svm_inherit_args {
- vm_map_t map;
- vm_offset_t addr;
- vm_size_t size;
- vm_inherit_t inherit;
-};
-/* ARGSUSED */
-int
-svm_inherit(p, uap, retval)
- struct proc *p;
- struct svm_inherit_args *uap;
- register_t *retval;
-{
- int rv;
-
- SCARG(uap, map) = p->p_map; /* XXX */
- rv = vm_inherit(SCARG(uap, map), SCARG(uap, addr), SCARG(uap, size),
- SCARG(uap, inherit));
- return((int)rv);
-}
-
-struct svm_protect_args {
- vm_map_t map;
- vm_offset_t addr;
- vm_size_t size;
- boolean_t setmax;
- vm_prot_t prot;
-};
-/* ARGSUSED */
-int
-svm_protect(p, uap, retval)
- struct proc *p;
- struct svm_protect_args *uap;
- register_t *retval;
-{
- int rv;
-
- SCARG(uap, map) = p->p_map; /* XXX */
- rv = vm_protect(SCARG(uap, map), SCARG(uap, addr), SCARG(uap, size),
- SCARG(uap, setmax), SCARG(uap, prot));
- return((int)rv);
-}
-
-/*
- * vm_inherit sets the inheritence of the specified range in the
- * specified map.
- */
-int
-vm_inherit(map, start, size, new_inheritance)
- register vm_map_t map;
- vm_offset_t start;
- vm_size_t size;
- vm_inherit_t new_inheritance;
-{
- if (map == NULL)
- return(KERN_INVALID_ARGUMENT);
-
- return(vm_map_inherit(map, trunc_page(start), round_page(start+size), new_inheritance));
-}
-
-/*
- * vm_protect sets the protection of the specified range in the
- * specified map.
- */
-
-int
-vm_protect(map, start, size, set_maximum, new_protection)
- register vm_map_t map;
- vm_offset_t start;
- vm_size_t size;
- boolean_t set_maximum;
- vm_prot_t new_protection;
-{
- if (map == NULL)
- return(KERN_INVALID_ARGUMENT);
-
- return(vm_map_protect(map, trunc_page(start), round_page(start+size), new_protection, set_maximum));
-}
-#endif
-
-/*
- * vm_allocate allocates "zero fill" memory in the specfied
- * map.
- */
-int
-vm_allocate(map, addr, size, anywhere)
- register vm_map_t map;
- register vm_offset_t *addr;
- register vm_size_t size;
- boolean_t anywhere;
-{
- int result;
-
- if (map == NULL)
- return(KERN_INVALID_ARGUMENT);
- if (size == 0) {
- *addr = 0;
- return(KERN_SUCCESS);
- }
-
- if (anywhere)
- *addr = vm_map_min(map);
- else
- *addr = trunc_page(*addr);
- size = round_page(size);
-
- result = vm_map_find(map, NULL, (vm_offset_t) 0, addr, size, anywhere);
-
- return(result);
-}
-
-/*
- * vm_deallocate deallocates the specified range of addresses in the
- * specified address map.
- */
-int
-vm_deallocate(map, start, size)
- register vm_map_t map;
- vm_offset_t start;
- vm_size_t size;
-{
- if (map == NULL)
- return(KERN_INVALID_ARGUMENT);
-
- if (size == (vm_offset_t) 0)
- return(KERN_SUCCESS);
-
- return(vm_map_remove(map, trunc_page(start), round_page(start+size)));
-}
-
-/*
- * Similar to vm_allocate but assigns an explicit pager.
- */
-int
-vm_allocate_with_pager(map, addr, size, anywhere, pager, poffset, internal)
- register vm_map_t map;
- register vm_offset_t *addr;
- register vm_size_t size;
- boolean_t anywhere;
- vm_pager_t pager;
- vm_offset_t poffset;
- boolean_t internal;
-{
- register vm_object_t object;
- register int result;
- vm_offset_t start;
-
- if (map == NULL)
- return(KERN_INVALID_ARGUMENT);
-
- *addr = trunc_page(*addr);
- size = round_page(size);
-
- /*
- * Lookup the pager/paging-space in the object cache.
- * If it's not there, then create a new object and cache
- * it.
- */
- object = vm_object_lookup(pager);
- cnt.v_lookups++;
- if (object == NULL) {
- object = vm_object_allocate(size);
- /*
- * From Mike Hibler: "unnamed anonymous objects should never
- * be on the hash list ... For now you can just change
- * vm_allocate_with_pager to not do vm_object_enter if this
- * is an internal object ..."
- */
- if (!internal)
- vm_object_enter(object, pager);
- } else
- cnt.v_hits++;
- if (internal)
- object->flags |= OBJ_INTERNAL;
- else {
- object->flags &= ~OBJ_INTERNAL;
- cnt.v_nzfod -= atop(size);
- }
-
- start = *addr;
- vm_map_lock(map);
- if (anywhere) {
- again:
- if (vm_map_findspace(map, start, size, addr))
- result = KERN_NO_SPACE;
- else {
-#ifdef PMAP_PREFER
- PMAP_PREFER(poffset, addr);
-#endif
- start = *addr;
- result = vm_map_insert(map, object, poffset,
- start, start + size);
- if (result == KERN_NO_SPACE)
- goto again;
- }
- } else
- result = vm_map_insert(map, object, poffset,
- start, start + size);
- vm_map_unlock(map);
-
- if (result != KERN_SUCCESS)
- vm_object_deallocate(object);
- else if (pager != NULL)
- vm_object_setpager(object, pager, (vm_offset_t) 0, TRUE);
- return(result);
-}
diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c
deleted file mode 100644
index d1c885fb65c..00000000000
--- a/sys/vm/vnode_pager.c
+++ /dev/null
@@ -1,591 +0,0 @@
-/* $OpenBSD: vnode_pager.c,v 1.8 2001/05/16 12:54:34 ho Exp $ */
-/* $NetBSD: vnode_pager.c,v 1.19 1996/03/16 23:15:27 christos Exp $ */
-
-/*
- * Copyright (c) 1990 University of Utah.
- * Copyright (c) 1991, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * the Systems Programming Group of the University of Utah Computer
- * Science Department.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)vnode_pager.c 8.10 (Berkeley) 5/14/95
- */
-
-/*
- * Page to/from files (vnodes).
- *
- * TODO:
- * pageouts
- * fix credential use (uses current process credentials now)
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/proc.h>
-#include <sys/malloc.h>
-#include <sys/vnode.h>
-#include <sys/uio.h>
-#include <sys/mount.h>
-
-#include <vm/vm.h>
-#include <vm/vm_page.h>
-#include <vm/vnode_pager.h>
-
-struct pagerlst vnode_pager_list; /* list of managed vnodes */
-
-#ifdef DEBUG
-int vpagerdebug = 0x00;
-#define VDB_FOLLOW 0x01
-#define VDB_INIT 0x02
-#define VDB_IO 0x04
-#define VDB_FAIL 0x08
-#define VDB_ALLOC 0x10
-#define VDB_SIZE 0x20
-#endif
-
-static vm_pager_t vnode_pager_alloc
- __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
-static void vnode_pager_cluster
- __P((vm_pager_t, vm_offset_t,
- vm_offset_t *, vm_offset_t *));
-static void vnode_pager_dealloc __P((vm_pager_t));
-static int vnode_pager_getpage
- __P((vm_pager_t, vm_page_t *, int, boolean_t));
-static boolean_t vnode_pager_haspage __P((vm_pager_t, vm_offset_t));
-static void vnode_pager_init __P((void));
-static int vnode_pager_io
- __P((vn_pager_t, vm_page_t *, int,
- boolean_t, enum uio_rw));
-static boolean_t vnode_pager_putpage
- __P((vm_pager_t, vm_page_t *, int, boolean_t));
-
-struct pagerops vnodepagerops = {
- vnode_pager_init,
- vnode_pager_alloc,
- vnode_pager_dealloc,
- vnode_pager_getpage,
- vnode_pager_putpage,
- vnode_pager_haspage,
- vnode_pager_cluster
-};
-
-static void
-vnode_pager_init()
-{
-#ifdef DEBUG
- if (vpagerdebug & VDB_FOLLOW)
- printf("vnode_pager_init()\n");
-#endif
- TAILQ_INIT(&vnode_pager_list);
-}
-
-/*
- * Allocate (or lookup) pager for a vnode.
- * Handle is a vnode pointer.
- */
-static vm_pager_t
-vnode_pager_alloc(handle, size, prot, foff)
- caddr_t handle;
- vm_size_t size;
- vm_prot_t prot;
- vm_offset_t foff;
-{
- register vm_pager_t pager;
- register vn_pager_t vnp;
- vm_object_t object;
- struct vattr vattr;
- struct vnode *vp;
- struct proc *p = curproc; /* XXX */
-
-#ifdef DEBUG
- if (vpagerdebug & (VDB_FOLLOW|VDB_ALLOC))
- printf("vnode_pager_alloc(%p, %lx, %x)\n", handle, size, prot);
-#endif
- /*
- * Pageout to vnode, no can do yet.
- */
- if (handle == NULL)
- return(NULL);
-
- /*
- * Vnodes keep a pointer to any associated pager so no need to
- * lookup with vm_pager_lookup.
- */
- vp = (struct vnode *)handle;
- pager = (vm_pager_t)vp->v_vmdata;
- if (pager == NULL) {
- /*
- * Allocate pager structures
- */
- pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, M_WAITOK);
- vnp = (vn_pager_t)malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK);
- /*
- * And an object of the appropriate size
- */
- if (VOP_GETATTR(vp, &vattr, p->p_ucred, p) == 0) {
- object = vm_object_allocate(round_page(vattr.va_size));
- vm_object_enter(object, pager);
- vm_object_setpager(object, pager, 0, TRUE);
- } else {
- free((caddr_t)vnp, M_VMPGDATA);
- free((caddr_t)pager, M_VMPAGER);
- return(NULL);
- }
- /*
- * Hold a reference to the vnode and initialize pager data.
- */
- VREF(vp);
- vnp->vnp_flags = 0;
- vnp->vnp_vp = vp;
- vnp->vnp_size = vattr.va_size;
- TAILQ_INSERT_TAIL(&vnode_pager_list, pager, pg_list);
- pager->pg_handle = handle;
- pager->pg_type = PG_VNODE;
- pager->pg_flags = 0;
- pager->pg_ops = &vnodepagerops;
- pager->pg_data = vnp;
- vp->v_vmdata = (caddr_t)pager;
- } else {
- /*
- * vm_object_lookup() will remove the object from the
- * cache if found and also gain a reference to the object.
- */
- object = vm_object_lookup(pager);
-#ifdef DEBUG
- vnp = (vn_pager_t)pager->pg_data;
-#endif
- }
-#ifdef DEBUG
- if (vpagerdebug & VDB_ALLOC)
- printf("vnode_pager_setup: vp %p sz %lx pager %p object %p\n",
- vp, vnp->vnp_size, pager, object);
-#endif
- return(pager);
-}
-
-static void
-vnode_pager_dealloc(pager)
- vm_pager_t pager;
-{
- register vn_pager_t vnp = (vn_pager_t)pager->pg_data;
- register struct vnode *vp;
-#ifdef NOTDEF
- struct proc *p = curproc; /* XXX */
-#endif
-
-#ifdef DEBUG
- if (vpagerdebug & VDB_FOLLOW)
- printf("vnode_pager_dealloc(%p)\n", pager);
-#endif
- if ((vp = vnp->vnp_vp) != NULL) {
- vp->v_vmdata = NULL;
- vp->v_flag &= ~VTEXT;
-#if NOTDEF
- /* can hang if done at reboot on NFS FS */
- (void) VOP_FSYNC(vp, p->p_ucred, p);
-#endif
- vrele(vp);
- }
- TAILQ_REMOVE(&vnode_pager_list, pager, pg_list);
- free((caddr_t)vnp, M_VMPGDATA);
- free((caddr_t)pager, M_VMPAGER);
-}
-
-static int
-vnode_pager_getpage(pager, mlist, npages, sync)
- vm_pager_t pager;
- vm_page_t *mlist;
- int npages;
- boolean_t sync;
-{
-
-#ifdef DEBUG
- if (vpagerdebug & VDB_FOLLOW)
- printf("vnode_pager_getpage(%p, %p, %x, %x)\n",
- pager, mlist, npages, sync);
-#endif
- return(vnode_pager_io((vn_pager_t)pager->pg_data,
- mlist, npages, sync, UIO_READ));
-}
-
-static boolean_t
-vnode_pager_putpage(pager, mlist, npages, sync)
- vm_pager_t pager;
- vm_page_t *mlist;
- int npages;
- boolean_t sync;
-{
- int err;
-
-#ifdef DEBUG
- if (vpagerdebug & VDB_FOLLOW)
- printf("vnode_pager_putpage(%p, %p, %x, %x)\n",
- pager, mlist, npages, sync);
-#endif
- if (pager == NULL)
- return (FALSE); /* ??? */
- err = vnode_pager_io((vn_pager_t)pager->pg_data,
- mlist, npages, sync, UIO_WRITE);
- /*
- * If the operation was successful, mark the pages clean.
- */
- if (err == VM_PAGER_OK) {
- while (npages--) {
- (*mlist)->flags |= PG_CLEAN;
- pmap_clear_modify(VM_PAGE_TO_PHYS(*mlist));
- mlist++;
- }
- }
- return(err);
-}
-
-static boolean_t
-vnode_pager_haspage(pager, offset)
- vm_pager_t pager;
- vm_offset_t offset;
-{
- struct proc *p = curproc; /* XXX */
- vn_pager_t vnp = (vn_pager_t)pager->pg_data;
- daddr_t bn;
- int err;
-
-#ifdef DEBUG
- if (vpagerdebug & VDB_FOLLOW)
- printf("vnode_pager_haspage(%p, %lx)\n", pager, offset);
-#endif
-
- /*
- * Offset beyond end of file, do not have the page
- * Lock the vnode first to make sure we have the most recent
- * version of the size.
- */
- vn_lock(vnp->vnp_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE, p);
- if (offset >= vnp->vnp_size) {
- VOP_UNLOCK(vnp->vnp_vp, 0, p);
-#ifdef DEBUG
- if (vpagerdebug & (VDB_FAIL|VDB_SIZE))
- printf("vnode_pager_haspage: pg %p, off %lx, size %lx\n",
- pager, offset, vnp->vnp_size);
-#endif
- return(FALSE);
- }
-
- /*
- * Read the index to find the disk block to read
- * from. If there is no block, report that we don't
- * have this data.
- *
- * Assumes that the vnode has whole page or nothing.
- */
- err = VOP_BMAP(vnp->vnp_vp,
- offset / vnp->vnp_vp->v_mount->mnt_stat.f_iosize,
- (struct vnode **)0, &bn, NULL);
- VOP_UNLOCK(vnp->vnp_vp, 0, p);
- if (err) {
-#ifdef DEBUG
- if (vpagerdebug & VDB_FAIL)
- printf("vnode_pager_haspage: BMAP err %d, pg %p, off %lx\n",
- err, pager, offset);
-#endif
- return(TRUE);
- }
- return((long)bn < 0 ? FALSE : TRUE);
-}
-
-static void
-vnode_pager_cluster(pager, offset, loffset, hoffset)
- vm_pager_t pager;
- vm_offset_t offset;
- vm_offset_t *loffset;
- vm_offset_t *hoffset;
-{
- vn_pager_t vnp = (vn_pager_t)pager->pg_data;
- vm_offset_t loff, hoff;
-
-#ifdef DEBUG
- if (vpagerdebug & VDB_FOLLOW)
- printf("vnode_pager_cluster(%p, %lx) ", pager, offset);
-#endif
- loff = offset;
- if (loff >= vnp->vnp_size)
- panic("vnode_pager_cluster: bad offset");
- /*
- * XXX could use VOP_BMAP to get maxcontig value
- */
- hoff = loff + MAXBSIZE;
- if (hoff > round_page(vnp->vnp_size))
- hoff = round_page(vnp->vnp_size);
-
- *loffset = loff;
- *hoffset = hoff;
-#ifdef DEBUG
- if (vpagerdebug & VDB_FOLLOW)
- printf("returns [%lx-%lx]\n", loff, hoff);
-#endif
-}
-
-/*
- * (XXX)
- * Lets the VM system know about a change in size for a file.
- * If this vnode is mapped into some address space (i.e. we have a pager
- * for it) we adjust our own internal size and flush any cached pages in
- * the associated object that are affected by the size change.
- *
- * Note: this routine may be invoked as a result of a pager put
- * operation (possibly at object termination time), so we must be careful.
- */
-void
-vnode_pager_setsize(vp, nsize)
- struct vnode *vp;
- u_long nsize;
-{
- register vn_pager_t vnp;
- register vm_object_t object;
- vm_pager_t pager;
-
- /*
- * Not a mapped vnode
- */
- if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL)
- return;
- /*
- * Hasn't changed size
- */
- pager = (vm_pager_t)vp->v_vmdata;
- vnp = (vn_pager_t)pager->pg_data;
- if (nsize == vnp->vnp_size)
- return;
- /*
- * No object.
- * This can happen during object termination since
- * vm_object_page_clean is called after the object
- * has been removed from the hash table, and clean
- * may cause vnode write operations which can wind
- * up back here.
- */
- object = vm_object_lookup(pager);
- if (object == NULL)
- return;
-
-#ifdef DEBUG
- if (vpagerdebug & (VDB_FOLLOW|VDB_SIZE))
- printf("vnode_pager_setsize: vp %p obj %p osz %ld nsz %ld\n",
- vp, object, vnp->vnp_size, nsize);
-#endif
- /*
- * File has shrunk.
- * Toss any cached pages beyond the new EOF.
- */
- if (nsize < vnp->vnp_size) {
- vm_object_lock(object);
- vm_object_page_remove(object,
- (vm_offset_t)nsize, vnp->vnp_size);
- vm_object_unlock(object);
- }
- vnp->vnp_size = (vm_offset_t)nsize;
- vm_object_deallocate(object);
-}
-
-void
-vnode_pager_umount(mp)
- register struct mount *mp;
-{
- struct proc *p = curproc; /* XXX */
- vm_pager_t pager, npager;
- struct vnode *vp;
-
- for (pager = vnode_pager_list.tqh_first; pager != NULL; pager = npager){
- /*
- * Save the next pointer now since uncaching may
- * terminate the object and render pager invalid
- */
- npager = pager->pg_list.tqe_next;
- vp = ((vn_pager_t)pager->pg_data)->vnp_vp;
- if (mp == (struct mount *)0 || vp->v_mount == mp) {
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
- (void) vnode_pager_uncache(vp);
- VOP_UNLOCK(vp, 0, p);
- }
- }
-}
-
-/*
- * Remove vnode associated object from the object cache.
- *
- * XXX unlock the vnode if it is currently locked.
- * We must do this since uncaching the object may result in its
- * destruction which may initiate paging activity which may necessitate
- * re-locking the vnode.
- */
-boolean_t
-vnode_pager_uncache(vp)
- register struct vnode *vp;
-{
- struct proc *p = curproc; /* XXX */
- vm_object_t object;
- boolean_t uncached;
- vm_pager_t pager;
-
- /*
- * Not a mapped vnode
- */
- if (vp->v_type != VREG || (pager = (vm_pager_t)vp->v_vmdata) == NULL)
- return (TRUE);
-#ifdef DEBUG
- if (!VOP_ISLOCKED(vp)) {
-#ifdef NFSCLIENT
- extern int (**nfsv2_vnodeop_p) __P((void *));
- extern int (**spec_nfsv2nodeop_p) __P((void *));
-#ifdef FIFO
- extern int (**fifo_nfsv2nodeop_p) __P((void *));
-#endif
-
- if (vp->v_op != nfsv2_vnodeop_p
- && vp->v_op != spec_nfsv2nodeop_p
-#ifdef FIFO
- && vp->v_op != fifo_nfsv2nodeop_p
-#endif
- )
-
-#endif
- panic("vnode_pager_uncache: vnode not locked!");
- }
-#endif
- /*
- * Must use vm_object_lookup() as it actually removes
- * the object from the cache list.
- */
- object = vm_object_lookup(pager);
- if (object) {
- uncached = (object->ref_count <= 1);
- VOP_UNLOCK(vp, 0, p);
- pager_cache(object, FALSE);
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
- } else
- uncached = TRUE;
- return(uncached);
-}
-
-static int
-vnode_pager_io(vnp, mlist, npages, sync, rw)
- register vn_pager_t vnp;
- vm_page_t *mlist;
- int npages;
- boolean_t sync;
- enum uio_rw rw;
-{
- struct uio auio;
- struct iovec aiov;
- vm_offset_t kva, foff;
- int error, size;
- struct proc *p = curproc; /* XXX */
-
- /* XXX */
- vm_page_t m;
- if (npages != 1)
- panic("vnode_pager_io: cannot handle multiple pages");
- m = *mlist;
- /* XXX */
-
-#ifdef DEBUG
- if (vpagerdebug & VDB_FOLLOW)
- printf("vnode_pager_io(%p, %p, %c): vnode %p\n",
- vnp, m, rw == UIO_READ ? 'R' : 'W', vnp->vnp_vp);
-#endif
- foff = m->offset + m->object->paging_offset;
- /*
- * Allocate a kernel virtual address and initialize so that
- * we can use VOP_READ/WRITE routines.
- */
- kva = vm_pager_map_pages(mlist, npages, sync);
- if (kva == NULL)
- return(VM_PAGER_AGAIN);
- /*
- * After all of the potentially blocking operations have been
- * performed, we can do the size checks:
- * read beyond EOF (returns error)
- * short read
- */
- vn_lock(vnp->vnp_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE, p);
- if (foff >= vnp->vnp_size) {
- VOP_UNLOCK(vnp->vnp_vp, 0, p);
- vm_pager_unmap_pages(kva, npages);
-#ifdef DEBUG
- if (vpagerdebug & VDB_SIZE)
- printf("vnode_pager_io: vp %p, off %ld size %ld\n",
- vnp->vnp_vp, foff, vnp->vnp_size);
-#endif
- return(VM_PAGER_BAD);
- }
- if (foff + PAGE_SIZE > vnp->vnp_size)
- size = vnp->vnp_size - foff;
- else
- size = PAGE_SIZE;
- aiov.iov_base = (caddr_t)kva;
- aiov.iov_len = size;
- auio.uio_iov = &aiov;
- auio.uio_iovcnt = 1;
- auio.uio_offset = foff;
- auio.uio_segflg = UIO_SYSSPACE;
- auio.uio_rw = rw;
- auio.uio_resid = size;
- auio.uio_procp = (struct proc *)0;
-#ifdef DEBUG
- if (vpagerdebug & VDB_IO)
- printf("vnode_pager_io: vp %p kva %lx foff %lx size %x",
- vnp->vnp_vp, kva, foff, size);
-#endif
- if (rw == UIO_READ)
- error = VOP_READ(vnp->vnp_vp, &auio, 0, p->p_ucred);
- else
- error = VOP_WRITE(vnp->vnp_vp, &auio, 0, p->p_ucred);
- VOP_UNLOCK(vnp->vnp_vp, 0, p);
-#ifdef DEBUG
- if (vpagerdebug & VDB_IO) {
- if (error || auio.uio_resid)
- printf(" returns error %x, resid %x",
- error, auio.uio_resid);
- printf("\n");
- }
-#endif
- if (!error) {
- register int count = size - auio.uio_resid;
-
- if (count == 0)
- error = EINVAL;
- else if (count != PAGE_SIZE && rw == UIO_READ)
- bzero((void *)(kva + count), PAGE_SIZE - count);
- }
- vm_pager_unmap_pages(kva, npages);
- return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
-}