133 files changed, 6236 insertions, 7293 deletions
diff --git a/sys/adosfs/adosfs.h b/sys/adosfs/adosfs.h
index c608fba701b..5c0b9e3c893 100644
--- a/sys/adosfs/adosfs.h
+++ b/sys/adosfs/adosfs.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: adosfs.h,v 1.9 2001/12/10 04:45:31 art Exp $	*/
+/*	$OpenBSD: adosfs.h,v 1.10 2001/12/19 08:58:05 art Exp $	*/
 /*	$NetBSD: adosfs.h,v 1.12 1996/10/08 22:18:02 thorpej Exp $	*/
 
 /*
@@ -32,8 +32,6 @@
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <miscfs/genfs/genfs.h>
-
 /*
  * Amigados datestamp. (from 1/1/1978 00:00:00 local)
  */
@@ -53,7 +51,6 @@ enum anode_type { AROOT, ADIR, AFILE, ALDIR, ALFILE, ASLINK };
  * table for f/e. it is always ANODETABSZ(ap) bytes in size.
  */
 struct anode {
-	struct genfs_node gnode;
 	LIST_ENTRY(anode) link;
 	enum anode_type type;
 	char name[31];			/* (r/d/f) name for object */
diff --git a/sys/adosfs/advfsops.c b/sys/adosfs/advfsops.c
index 5cd599c4f8e..43a673042b5 100644
--- a/sys/adosfs/advfsops.c
+++ b/sys/adosfs/advfsops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: advfsops.c,v 1.20 2001/12/10 04:45:31 art Exp $	*/
+/*	$OpenBSD: advfsops.c,v 1.21 2001/12/19 08:58:05 art Exp $	*/
 /*	$NetBSD: advfsops.c,v 1.24 1996/12/22 10:10:12 cgd Exp $	*/
 
 /*
@@ -557,7 +557,6 @@ adosfs_vget(mp, an, vpp)
 
 	*vpp = vp;		/* return vp */
 	brelse(bp);		/* release buffer */
-	vp->v_size = ap->fsize;
 	return (0);
 }
 
diff --git a/sys/adosfs/advnops.c b/sys/adosfs/advnops.c
index 4928e2b1b93..27f806f3bce 100644
--- a/sys/adosfs/advnops.c
+++ b/sys/adosfs/advnops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: advnops.c,v 1.21 2001/12/10 04:45:31 art Exp $	*/
+/*	$OpenBSD: advnops.c,v 1.22 2001/12/19 08:58:05 art Exp $	*/
 /*	$NetBSD: advnops.c,v 1.32 1996/10/13 02:52:09 christos Exp $	*/
 
 /*
@@ -131,9 +131,7 @@ struct vnodeopv_entry_desc adosfs_vnodeop_entries[] = {
 	{ &vop_pathconf_desc, adosfs_pathconf },	/* pathconf */
 	{ &vop_advlock_desc, adosfs_advlock },		/* advlock */
 	{ &vop_bwrite_desc, adosfs_bwrite },		/* bwrite */
-	{ &vop_getpages_desc, genfs_getpages },
-	{ &vop_mmap_desc, vop_generic_mmap },
-	{ NULL, NULL }
+	{ (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
 };
 
 struct vnodeopv_desc adosfs_vnodeop_opv_desc =
@@ -274,28 +272,6 @@ adosfs_read(v)
 	/*
 	 * taken from ufs_read()
 	 */
-
-	if (sp->a_vp->v_type == VREG) {
-		error = 0;
-		while (uio->uio_resid > 0) {
-			void *win;
-			vsize_t bytelen = min(ap->fsize - uio->uio_offset,
-					uio->uio_resid);
-
-			if (bytelen == 0) {
-				break;
-			}
-			win = ubc_alloc(&sp->a_vp->v_uobj, uio->uio_offset,
-					&bytelen, UBC_READ);
-			error = uiomove(win, bytelen, uio);
-			ubc_release(win, 0);
-			if (error) {
-				break;
-			}
-		}
-		goto reterr;
-	}
-
 	do {
 		/*
 		 * we are only supporting ADosFFS currently
diff --git a/sys/arch/alpha/alpha/pmap.c b/sys/arch/alpha/alpha/pmap.c
index 8306df273c4..22fb769b976 100644
--- a/sys/arch/alpha/alpha/pmap.c
+++ b/sys/arch/alpha/alpha/pmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.c,v 1.31 2001/12/08 02:24:05 art Exp $ */
+/* $OpenBSD: pmap.c,v 1.32 2001/12/19 08:58:05 art Exp $ */
 /* $NetBSD: pmap.c,v 1.154 2000/12/07 22:18:55 thorpej Exp $ */
 
 /*-
@@ -809,7 +809,7 @@ pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids)
 	/*
 	 * Figure out how many PTE's are necessary to map the kernel.
 	 */
-	lev3mapsize = (VM_PHYS_SIZE + (ubc_nwins << ubc_winshift) +
+	lev3mapsize = (VM_PHYS_SIZE +
 		nbuf * MAXBSIZE + 16 * NCARGS + PAGER_MAP_SIZE) / NBPG +
 		(maxproc * UPAGES) + nkmempages;
 
diff --git a/sys/arch/amiga/dev/clock.c b/sys/arch/amiga/dev/clock.c
index 213c3fa01e2..a9665e25289 100644
--- a/sys/arch/amiga/dev/clock.c
+++ b/sys/arch/amiga/dev/clock.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: clock.c,v 1.12 2001/11/28 13:47:38 art Exp $	*/
+/*	$OpenBSD: clock.c,v 1.13 2001/12/19 08:58:05 art Exp $	*/
 /*	$NetBSD: clock.c,v 1.25 1997/01/02 20:59:42 is Exp $	*/
 
 /*
@@ -521,8 +521,8 @@ clockunmmap(dev, addr, p)
 
 	if (addr == 0)
 		return(EINVAL);		/* XXX: how do we deal with this? */
-	uvm_deallocate(p->p_vmspace->vm_map, (vm_offset_t)addr, PAGE_SIZE);
-	return (0);
+	rv = vm_deallocate(p->p_vmspace->vm_map, (vm_offset_t)addr, PAGE_SIZE);
+	return(rv == KERN_SUCCESS ? 0 : EINVAL);
 }
 
 startclock()
diff --git a/sys/arch/hp300/dev/grf.c b/sys/arch/hp300/dev/grf.c
index 3b1801f6a28..322cc2e78e9 100644
--- a/sys/arch/hp300/dev/grf.c
+++ b/sys/arch/hp300/dev/grf.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: grf.c,v 1.17 2001/11/28 13:47:38 art Exp $	*/
+/*	$OpenBSD: grf.c,v 1.18 2001/12/19 08:58:05 art Exp $	*/
 /*	$NetBSD: grf.c,v 1.30 1998/08/20 08:33:41 kleink Exp $	*/
 
 /*
@@ -654,6 +654,7 @@ grfunmap(dev, addr, p)
 	struct grf_softc *sc = grf_cd.cd_devs[GRFUNIT(dev)];
 	struct grf_data *gp = sc->sc_data;
 	vsize_t size;
+	int rv;
 
 #ifdef DEBUG
 	if (grfdebug & GDB_MMAP)
@@ -663,8 +664,9 @@ grfunmap(dev, addr, p)
 		return(EINVAL);		/* XXX: how do we deal with this? */
 	(void) (*gp->g_sw->gd_mode)(gp, GM_UNMAP, 0);
 	size = round_page(gp->g_display.gd_regsize + gp->g_display.gd_fbsize);
-	uvm_unmap(&p->p_vmspace->vm_map, (vaddr_t)addr, (vaddr_t)addr + size);
-	return (0);
+	rv = uvm_unmap(&p->p_vmspace->vm_map, (vaddr_t)addr,
+	    (vaddr_t)addr + size);
+	return(rv == KERN_SUCCESS ? 0 : EINVAL);
 }
 
 #ifdef COMPAT_HPUX
diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c
index 587bd8c8f97..d1b677fc309 100644
--- a/sys/arch/i386/i386/pmap.c
+++ b/sys/arch/i386/i386/pmap.c
@@ -1,5 +1,5 @@
-/*	$OpenBSD: pmap.c,v 1.56 2001/12/11 18:49:25 art Exp $	*/
-/*	$NetBSD: pmap.c,v 1.120 2001/04/22 23:42:14 thorpej Exp $	*/
+/*	$OpenBSD: pmap.c,v 1.57 2001/12/19 08:58:05 art Exp $	*/
+/*	$NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $	*/
 
 /*
  *
@@ -290,15 +290,6 @@ int nkpde = NKPTP;
 
 int pmap_pg_g = 0;
 
-#ifdef LARGEPAGES
-/*
- * pmap_largepages: if our processor supports PG_PS and we are
- * using it, this is set to TRUE.
- */
-
-int pmap_largepages;
-#endif
-
 /*
  * i386 physical memory comes in a big contig chunk with a small
  * hole toward the front of it...  the following 4 paddr_t's
@@ -307,6 +298,8 @@ int pmap_largepages;
  */
 paddr_t avail_start;	/* PA of first available physical page */
 paddr_t avail_end;	/* PA of last available physical page */
+paddr_t hole_start;	/* PA of start of "hole" */
+paddr_t hole_end;	/* PA of end of "hole" */
 
 /*
  * other data structures
@@ -346,6 +339,7 @@ static vaddr_t pv_cachedva;		/* cached VA for later use */
  */
 
 static struct pmap_head pmaps;
+static struct pmap *pmaps_hand = NULL;	/* used by pmap_steal_ptp */
 
 /*
  * pool that pmap structures are allocated from
@@ -354,15 +348,6 @@ static struct pmap_head pmaps;
 struct pool pmap_pmap_pool;
 
 /*
- * pool and cache that PDPs are allocated from
- */
-
-struct pool pmap_pdp_pool;
-struct pool_cache pmap_pdp_cache;
-
-int	pmap_pdp_ctor(void *, void *, int);
-
-/*
  * special VAs and the PTEs that map them
  */
 
@@ -389,7 +374,7 @@ extern vaddr_t pentium_idt_vaddr;
  */
 
 static struct pv_entry	*pmap_add_pvpage __P((struct pv_page *, boolean_t));
-static struct vm_page	*pmap_alloc_ptp __P((struct pmap *, int));
+static struct vm_page	*pmap_alloc_ptp __P((struct pmap *, int, boolean_t));
 static struct pv_entry	*pmap_alloc_pv __P((struct pmap *, int)); /* see codes below */
 #define ALLOCPV_NEED	0	/* need PV now */
 #define ALLOCPV_TRY	1	/* just try to allocate, don't steal */
@@ -402,27 +387,36 @@ static void		 pmap_free_pv __P((struct pmap *, struct pv_entry *));
 static void		 pmap_free_pvs __P((struct pmap *, struct pv_entry *));
 static void		 pmap_free_pv_doit __P((struct pv_entry *));
 static void		 pmap_free_pvpage __P((void));
-static struct vm_page	*pmap_get_ptp __P((struct pmap *, int));
+static struct vm_page	*pmap_get_ptp __P((struct pmap *, int, boolean_t));
 static boolean_t	 pmap_is_curpmap __P((struct pmap *));
 static pt_entry_t	*pmap_map_ptes __P((struct pmap *));
 static struct pv_entry	*pmap_remove_pv __P((struct pv_head *, struct pmap *,
 					     vaddr_t));
-static void		 pmap_do_remove __P((struct pmap *, vaddr_t,
-						vaddr_t, int));
 static boolean_t	 pmap_remove_pte __P((struct pmap *, struct vm_page *,
-					      pt_entry_t *, vaddr_t, int));
+					      pt_entry_t *, vaddr_t));
 static void		 pmap_remove_ptes __P((struct pmap *,
 					       struct pmap_remove_record *,
 					       struct vm_page *, vaddr_t,
-					       vaddr_t, vaddr_t, int));
-#define PMAP_REMOVE_ALL		0	/* remove all mappings */
-#define PMAP_REMOVE_SKIPWIRED	1	/* skip wired mappings */
+					       vaddr_t, vaddr_t));
+static struct vm_page	*pmap_steal_ptp __P((struct uvm_object *,
+					     vaddr_t));
 static vaddr_t		 pmap_tmpmap_pa __P((paddr_t));
 static pt_entry_t	*pmap_tmpmap_pvepte __P((struct pv_entry *));
 static void		 pmap_tmpunmap_pa __P((void));
 static void		 pmap_tmpunmap_pvepte __P((struct pv_entry *));
+static boolean_t	 pmap_transfer_ptes __P((struct pmap *,
+					 struct pmap_transfer_location *,
+					 struct pmap *,
+					 struct pmap_transfer_location *,
+					 int, boolean_t));
+static boolean_t	 pmap_try_steal_pv __P((struct pv_head *,
+						struct pv_entry *,
+						struct pv_entry *));
 static void		pmap_unmap_ptes __P((struct pmap *));
 
+void			pmap_pinit __P((pmap_t));
+void			pmap_release __P((pmap_t));
+
 /*
  * p m a p   i n l i n e   h e l p e r   f u n c t i o n s
  */
@@ -603,16 +597,8 @@ pmap_kenter_pa(va, pa, prot)
 {
 	pt_entry_t *pte, opte;
 
-	if (va < VM_MIN_KERNEL_ADDRESS)
-		pte = vtopte(va);
-	else
-		pte = kvtopte(va);
+	pte = vtopte(va);
 	opte = *pte;
-#ifdef LARGEPAGES
-	/* XXX For now... */
-	if (opte & PG_PS)
-		panic("pmap_kenter_pa: PG_PS");
-#endif
 	*pte = pa | ((prot & VM_PROT_WRITE)? PG_RW : PG_RO) |
 		PG_V | pmap_pg_g;	/* zap! */
 	if (pmap_valid_entry(opte))
@@ -638,16 +624,8 @@ pmap_kremove(va, len)
 	pt_entry_t *pte;
 
 	len >>= PAGE_SHIFT;
-	for ( /* null */ ; len ; len--, va += PAGE_SIZE) {
-		if (va < VM_MIN_KERNEL_ADDRESS)
-			pte = vtopte(va);
-		else
-			pte = kvtopte(va);
-#ifdef LARGEPAGES
-		/* XXX For now... */
-		if (*pte & PG_PS)
-			panic("pmap_kremove: PG_PS");
-#endif
+	for ( /* null */ ; len ; len--, va += NBPG) {
+		pte = vtopte(va);
 #ifdef DIAGNOSTIC
 		if (*pte & PG_PVLIST)
 			panic("pmap_kremove: PG_PVLIST mapping for 0x%lx\n",
@@ -782,44 +760,6 @@ pmap_bootstrap(kva_start)
 				PTE_BASE[i386_btop(kva)] |= PG_G;
 	}
 
-#ifdef LARGEPAGES
-	/*
-	 * enable large pages of they are supported.
-	 */
-
-	if (cpu_feature & CPUID_PSE) {
-		paddr_t pa;
-		vaddr_t kva_end;
-		pd_entry_t *pde;
-		extern char _etext;
-
-		lcr4(rcr4() | CR4_PSE);	/* enable hardware (via %cr4) */
-		pmap_largepages = 1;	/* enable software */
-
-		/*
-		 * the TLB must be flushed after enabling large pages
-		 * on Pentium CPUs, according to section 3.6.2.2 of
-		 * "Intel Architecture Software Developer's Manual,
-		 * Volume 3: System Programming".
-		 */
-		tlbflush();
-
-		/*
-		 * now, remap the kernel text using large pages.  we
-		 * assume that the linker has properly aligned the
-		 * .data segment to a 4MB boundary.
-		 */
-		kva_end = roundup((vaddr_t)&_etext, NBPD);
-		for (pa = 0, kva = KERNBASE; kva < kva_end;
-		     kva += NBPD, pa += NBPD) {
-			pde = &kpm->pm_pdir[pdei(kva)];
-			*pde = pa | pmap_pg_g | PG_PS |
-			    PG_KR | PG_V;	/* zap! */
-			tlbflush();
-		}
-	}
-#endif /* LARGEPAGES */
-
 	/*
 	 * now we allocate the "special" VAs which are used for tmp mappings
 	 * by the pmap (and other modules).    we allocate the VAs by advancing
@@ -899,14 +839,41 @@ pmap_bootstrap(kva_start)
 	pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl",
 		  0, pool_page_alloc_nointr, pool_page_free_nointr, M_VMPMAP);
 
-  	/*
-	 * initialize the PDE pool and cache.
+#ifdef __NetBSD__
+	/*
+	 * we must call uvm_page_physload() after we are done playing with
+	 * virtual_avail but before we call pmap_steal_memory.  [i.e. here]
+	 * this call tells the VM system how much physical memory it
+	 * controls.  If we have 16M of RAM or less, just put it all on
+	 * the default free list.  Otherwise, put the first 16M of RAM
+	 * on a lower priority free list (so that all of the ISA DMA'able
+	 * memory won't be eaten up first-off).
 	 */
 
-	pool_init(&pmap_pdp_pool, PAGE_SIZE, 0, 0, 0, "pdppl",
-		  0, pool_page_alloc_nointr, pool_page_free_nointr, M_VMPMAP);
-	pool_cache_init(&pmap_pdp_cache, &pmap_pdp_pool,
-			pmap_pdp_ctor, NULL, NULL);
+	if (avail_end <= (16 * 1024 * 1024))
+		first16q = VM_FREELIST_DEFAULT;
+	else
+		first16q = VM_FREELIST_FIRST16;
+
+	if (avail_start < hole_start)   /* any free memory before the hole? */
+		uvm_page_physload(atop(avail_start), atop(hole_start),
+				  atop(avail_start), atop(hole_start),
+				  first16q);
+
+	if (first16q != VM_FREELIST_DEFAULT &&
+	    hole_end < 16 * 1024 * 1024) {
+		uvm_page_physload(atop(hole_end), atop(16 * 1024 * 1024),
+				  atop(hole_end), atop(16 * 1024 * 1024),
+				  first16q);
+		uvm_page_physload(atop(16 * 1024 * 1024), atop(avail_end),
+				  atop(16 * 1024 * 1024), atop(avail_end),
+				  VM_FREELIST_DEFAULT);
+	} else {
+		uvm_page_physload(atop(hole_end), atop(avail_end),
+				  atop(hole_end), atop(avail_end),
+				  VM_FREELIST_DEFAULT);
+	}
+#endif
 
 	/*
 	 * ensure the TLB is sync'd with reality by flushing it...
@@ -1025,8 +992,8 @@ pmap_alloc_pv(pmap, mode)
 
 	simple_lock(&pvalloc_lock);
 
-	pvpage = TAILQ_FIRST(&pv_freepages);
-	if (pvpage != NULL) {
+	if (pv_freepages.tqh_first != NULL) {
+		pvpage = pv_freepages.tqh_first;
 		pvpage->pvinfo.pvpi_nfree--;
 		if (pvpage->pvinfo.pvpi_nfree == 0) {
 			/* nothing left in this one? */
@@ -1079,17 +1046,17 @@ pmap_alloc_pvpage(pmap, mode)
 {
 	struct vm_page *pg;
 	struct pv_page *pvpage;
-	struct pv_entry *pv;
-	int s;
+	int lcv, idx, npg, s;
+	struct pv_entry *pv, *cpv, *prevpv;
 
 	/*
 	 * if we need_entry and we've got unused pv_pages, allocate from there
 	 */
 
-	pvpage = TAILQ_FIRST(&pv_unusedpgs);
-	if (mode != ALLOCPV_NONEED && pvpage != NULL) {
+	if (mode != ALLOCPV_NONEED && pv_unusedpgs.tqh_first != NULL) {
 
 		/* move it to pv_freepages list */
+		pvpage = pv_unusedpgs.tqh_first;
 		TAILQ_REMOVE(&pv_unusedpgs, pvpage, pvinfo.pvpi_list);
 		TAILQ_INSERT_HEAD(&pv_freepages, pvpage, pvinfo.pvpi_list);
 
@@ -1111,24 +1078,24 @@ pmap_alloc_pvpage(pmap, mode)
 	 * if not, try to allocate one.
 	 */
 
-	s = splvm();   /* must protect kmem_map/kmem_object with splvm! */
+	s = splimp();   /* must protect kmem_map/kmem_object with splimp! */
 	if (pv_cachedva == 0) {
 		pv_cachedva = uvm_km_kmemalloc(kmem_map, uvmexp.kmem_object,
-		    PAGE_SIZE, UVM_KMF_TRYLOCK|UVM_KMF_VALLOC);
+		    NBPG, UVM_KMF_TRYLOCK|UVM_KMF_VALLOC);
 		if (pv_cachedva == 0) {
 			splx(s);
-			return (NULL);
+			goto steal_one;
 		}
 	}
 
 	/*
 	 * we have a VA, now let's try and allocate a page in the object
-	 * note: we are still holding splvm to protect kmem_object
+	 * note: we are still holding splimp to protect kmem_object
 	 */
 
 	if (!simple_lock_try(&uvmexp.kmem_object->vmobjlock)) {
 		splx(s);
-		return (NULL);
+		goto steal_one;
 	}
 
 	pg = uvm_pagealloc(uvmexp.kmem_object, pv_cachedva -
@@ -1139,10 +1106,10 @@ pmap_alloc_pvpage(pmap, mode)
 
 	simple_unlock(&uvmexp.kmem_object->vmobjlock);
 	splx(s);
-	/* splvm now dropped */
+	/* splimp now dropped */
 
 	if (pg == NULL)
-		return (NULL);
+		goto steal_one;
 
 	/*
 	 * add a mapping for our new pv_page and free its entrys (save one!)
@@ -1151,12 +1118,113 @@ pmap_alloc_pvpage(pmap, mode)
 	 * pmap is already locked!  (...but entering the mapping is safe...)
 	 */
 
-	pmap_kenter_pa(pv_cachedva, VM_PAGE_TO_PHYS(pg),
-	    VM_PROT_READ|VM_PROT_WRITE);
-	pmap_update(pmap_kernel());
-	pvpage = (struct pv_page *)pv_cachedva;
+	pmap_kenter_pa(pv_cachedva, VM_PAGE_TO_PHYS(pg), VM_PROT_ALL);
+	pvpage = (struct pv_page *) pv_cachedva;
 	pv_cachedva = 0;
-	return (pmap_add_pvpage(pvpage, mode != ALLOCPV_NONEED));
+	return(pmap_add_pvpage(pvpage, mode != ALLOCPV_NONEED));
+
+steal_one:
+	/*
+	 * if we don't really need a pv_entry right now, we can just return.
+	 */
+
+	if (mode != ALLOCPV_NEED)
+		return(NULL);
+
+	/*
+	 * last ditch effort!   we couldn't allocate a free page to make
+	 * more pv_entrys so we try and steal one from someone else.
+	 */
+
+	pv = NULL;
+	for (lcv = 0 ; pv == NULL && lcv < vm_nphysseg ; lcv++) {
+		npg = vm_physmem[lcv].end - vm_physmem[lcv].start;
+		for (idx = 0 ; idx < npg ; idx++) {
+			struct pv_head *pvhead = vm_physmem[lcv].pmseg.pvhead;
+
+			if (pvhead->pvh_list == NULL)
+				continue;	/* spot check */
+			if (!simple_lock_try(&pvhead->pvh_lock))
+				continue;
+			cpv = prevpv = pvhead->pvh_list;
+			while (cpv) {
+				if (pmap_try_steal_pv(pvhead, cpv, prevpv))
+					break;
+				prevpv = cpv;
+				cpv = cpv->pv_next;
+			}
+			simple_unlock(&pvhead->pvh_lock);
+			/* got one?  break out of the loop! */
+			if (cpv) {
+				pv = cpv;
+				break;
+			}
+		}
+	}
+
+	return(pv);
+}
+
+/*
+ * pmap_try_steal_pv: try and steal a pv_entry from a pmap
+ *
+ * => return true if we did it!
+ */
+
+static boolean_t
+pmap_try_steal_pv(pvh, cpv, prevpv)
+	struct pv_head *pvh;
+	struct pv_entry *cpv, *prevpv;
+{
+	pt_entry_t *ptep;	/* pointer to a PTE */
+
+	/*
+	 * we never steal kernel mappings or mappings from pmaps we can't lock
+	 */
+
+	if (cpv->pv_pmap == pmap_kernel() ||
+	    !simple_lock_try(&cpv->pv_pmap->pm_obj.vmobjlock))
+		return(FALSE);
+
+	/*
+	 * yes, we can try and steal it.   first we need to remove the
+	 * mapping from the pmap.
+	 */
+
+	ptep = pmap_tmpmap_pvepte(cpv);
+	if (*ptep & PG_W) {
+		ptep = NULL;	/* wired page, avoid stealing this one */
+	} else {
+		*ptep = 0;		/* zap! */
+		if (pmap_is_curpmap(cpv->pv_pmap))
+			pmap_update_pg(cpv->pv_va);
+		pmap_tmpunmap_pvepte(cpv);
+	}
+	if (ptep == NULL) {
+		simple_unlock(&cpv->pv_pmap->pm_obj.vmobjlock);
+		return(FALSE);	/* wired page, abort! */
+	}
+	cpv->pv_pmap->pm_stats.resident_count--;
+	if (cpv->pv_ptp && cpv->pv_ptp->wire_count)
+		/* drop PTP's wired count */
+		cpv->pv_ptp->wire_count--;
+
+	/*
+	 * XXX: if wire_count goes to one the PTP could be freed, however,
+	 * we'd have to lock the page queues (etc.) to do that and it could
+	 * cause deadlock headaches.   besides, the pmap we just stole from
+	 * may want the mapping back anyway, so leave the PTP around.
+	 */
+
+	/*
+	 * now we need to remove the entry from the pvlist
+	 */
+
+	if (cpv == pvh->pvh_list)
+		pvh->pvh_list = cpv->pv_next;
+	else
+		prevpv->pv_next = cpv->pv_next;
+	return(TRUE);
 }
 
 /*
@@ -1417,19 +1485,33 @@ pmap_remove_pv(pvh, pmap, va)
  * => we use the ptp's wire_count to count the number of active mappings
  *	in the PTP (we start it at one to prevent any chance this PTP
  *	will ever leak onto the active/inactive queues)
+ * => we should not be holding any pv_head locks (in case we are forced
+ *	to call pmap_steal_ptp())
+ * => we may need to lock pv_head's if we have to steal a PTP
+ * => just_try: true if we want a PTP, but not enough to steal one
+ * 	from another pmap (e.g. during optional functions like pmap_copy)
  */
 
 __inline static struct vm_page *
-pmap_alloc_ptp(pmap, pde_index)
+pmap_alloc_ptp(pmap, pde_index, just_try)
 	struct pmap *pmap;
 	int pde_index;
+	boolean_t just_try;
 {
 	struct vm_page *ptp;
 
 	ptp = uvm_pagealloc(&pmap->pm_obj, ptp_i2o(pde_index), NULL,
 			    UVM_PGA_USERESERVE|UVM_PGA_ZERO);
-	if (ptp == NULL)
-		return(NULL);
+	if (ptp == NULL) {
+		if (just_try)
+			return(NULL);
+		ptp = pmap_steal_ptp(&pmap->pm_obj, ptp_i2o(pde_index));
+		if (ptp == NULL) {
+			return (NULL);
+		}
+		/* stole one; zero it. */
+		pmap_zero_page(VM_PAGE_TO_PHYS(ptp));
+	}
 
 	/* got one! */
 	ptp->flags &= ~PG_BUSY;	/* never busy */
@@ -1442,6 +1524,111 @@ pmap_alloc_ptp(pmap, pde_index)
 }
 
 /*
+ * pmap_steal_ptp: steal a PTP from any pmap that we can access
+ *
+ * => obj is locked by caller.
+ * => we can throw away mappings at this level (except in the kernel's pmap)
+ * => stolen PTP is placed in <obj,offset> pmap
+ * => we lock pv_head's
+ * => hopefully, this function will be seldom used [much better to have
+ *	enough free pages around for us to allocate off the free page list]
+ */
+
+static struct vm_page *
+pmap_steal_ptp(obj, offset)
+	struct uvm_object *obj;
+	vaddr_t offset;
+{
+	struct vm_page *ptp = NULL;
+	struct pmap *firstpmap;
+	struct uvm_object *curobj;
+	pt_entry_t *ptes;
+	int idx, lcv;
+	boolean_t caller_locked, we_locked;
+
+	simple_lock(&pmaps_lock);
+	if (pmaps_hand == NULL)
+		pmaps_hand = LIST_FIRST(&pmaps);
+	firstpmap = pmaps_hand;
+
+	do { /* while we haven't looped back around to firstpmap */
+
+		curobj = &pmaps_hand->pm_obj;
+		we_locked = FALSE;
+		caller_locked = (curobj == obj);
+		if (!caller_locked) {
+			we_locked = simple_lock_try(&curobj->vmobjlock);
+		}
+		if (caller_locked || we_locked) {
+			ptp = curobj->memq.tqh_first;
+			for (/*null*/; ptp != NULL; ptp = ptp->listq.tqe_next) {
+
+				/*
+				 * might have found a PTP we can steal
+				 * (unless it has wired pages).
+				 */
+
+				idx = ptp_o2i(ptp->offset);
+#ifdef DIAGNOSTIC
+				if (VM_PAGE_TO_PHYS(ptp) !=
+				    (pmaps_hand->pm_pdir[idx] & PG_FRAME))
+					panic("pmap_steal_ptp: PTP mismatch!");
+#endif
+
+				ptes = (pt_entry_t *)
+					pmap_tmpmap_pa(VM_PAGE_TO_PHYS(ptp));
+				for (lcv = 0 ; lcv < PTES_PER_PTP ; lcv++)
+					if ((ptes[lcv] & (PG_V|PG_W)) ==
+					    (PG_V|PG_W))
+						break;
+				if (lcv == PTES_PER_PTP)
+					pmap_remove_ptes(pmaps_hand, NULL, ptp,
+							 (vaddr_t)ptes,
+							 ptp_i2v(idx),
+							 ptp_i2v(idx+1));
+				pmap_tmpunmap_pa();
+
+				if (lcv != PTES_PER_PTP)
+					/* wired, try next PTP */
+					continue;
+
+				/*
+				 * got it!!!
+				 */
+
+				pmaps_hand->pm_pdir[idx] = 0;	/* zap! */
+				pmaps_hand->pm_stats.resident_count--;
+				if (pmap_is_curpmap(pmaps_hand))
+					tlbflush();
+				else if (pmap_valid_entry(*APDP_PDE) &&
+					 (*APDP_PDE & PG_FRAME) ==
+					 pmaps_hand->pm_pdirpa) {
+					pmap_update_pg(((vaddr_t)APTE_BASE) +
+						       ptp->offset);
+				}
+
+				/* put it in our pmap! */
+				uvm_pagerealloc(ptp, obj, offset);
+				break;	/* break out of "for" loop */
+			}
+			if (we_locked) {
+				simple_unlock(&curobj->vmobjlock);
+			}
+		}
+
+		/* advance the pmaps_hand */
+		pmaps_hand = LIST_NEXT(pmaps_hand, pm_list);
+		if (pmaps_hand == NULL) {
+			pmaps_hand = LIST_FIRST(&pmaps);
+		}
+
+	} while (ptp == NULL && pmaps_hand != firstpmap);
+
+	simple_unlock(&pmaps_lock);
+	return(ptp);
+}
+
+/*
  * pmap_get_ptp: get a PTP (if there isn't one, allocate a new one)
  *
  * => pmap should NOT be pmap_kernel()
@@ -1449,9 +1636,10 @@ pmap_alloc_ptp(pmap, pde_index)
  */
 
 static struct vm_page *
-pmap_get_ptp(pmap, pde_index)
+pmap_get_ptp(pmap, pde_index, just_try)
 	struct pmap *pmap;
 	int pde_index;
+	boolean_t just_try;
 {
 	struct vm_page *ptp;
 
@@ -1473,7 +1661,7 @@ pmap_get_ptp(pmap, pde_index)
 	}
 
 	/* allocate a new PTP (updates ptphint) */
-	return(pmap_alloc_ptp(pmap, pde_index));
+	return(pmap_alloc_ptp(pmap, pde_index, just_try));
 }
 
 /*
@@ -1481,41 +1669,6 @@ pmap_get_ptp(pmap, pde_index)
  */
 
 /*
- * pmap_pdp_ctor: constructor for the PDP cache.
- */
-
-int
-pmap_pdp_ctor(void *arg, void *object, int flags)
-{
-	pd_entry_t *pdir = object;
-	paddr_t pdirpa;
-
-	/*
-	 * NOTE: The `pmap_lock' is held when the PDP is allocated.
-	 * WE MUST NOT BLOCK!
-	 */
-
-	/* fetch the physical address of the page directory. */
-	(void) pmap_extract(pmap_kernel(), (vaddr_t) pdir, &pdirpa);
-
-	/* zero init area */
-	memset(pdir, 0, PDSLOT_PTE * sizeof(pd_entry_t));
-
-	/* put in recursibve PDE to map the PTEs */
-	pdir[PDSLOT_PTE] = pdirpa | PG_V | PG_KW;
-
-	/* put in kernel VM PDEs */
-	memcpy(&pdir[PDSLOT_KERN], &PDP_BASE[PDSLOT_KERN],
-	    nkpde * sizeof(pd_entry_t));
-
-	/* zero the rest */
-	memset(&pdir[PDSLOT_KERN + nkpde], 0,
-	    PAGE_SIZE - ((PDSLOT_KERN + nkpde) * sizeof(pd_entry_t)));
-
-	return (0);
-}
-
-/*
  * pmap_create: create a pmap
  *
  * => note: old pmap interface took a "size" args which allowed for
@@ -1528,7 +1681,18 @@ pmap_create()
 	struct pmap *pmap;
 
 	pmap = pool_get(&pmap_pmap_pool, PR_WAITOK);
+	pmap_pinit(pmap);
+	return(pmap);
+}
+
+/*
+ * pmap_pinit: given a zero'd pmap structure, init it.
+ */
 
+void
+pmap_pinit(pmap)
+	struct pmap *pmap;
+{
 	/* init uvm_object */
 	simple_lock_init(&pmap->pm_obj.vmobjlock);
 	pmap->pm_obj.pgops = NULL;	/* currently not a mappable object */
@@ -1540,35 +1704,39 @@ pmap_create()
 	pmap->pm_ptphint = NULL;
 	pmap->pm_flags = 0;
 
+	/* allocate PDP */
+	pmap->pm_pdir = (pd_entry_t *) uvm_km_alloc(kernel_map, NBPG);
+	if (pmap->pm_pdir == NULL)
+		panic("pmap_pinit: kernel_map out of virtual space!");
+	(void) pmap_extract(pmap_kernel(), (vaddr_t)pmap->pm_pdir,
+			    (paddr_t *)&pmap->pm_pdirpa);
+
+	/* init PDP */
+	/* zero init area */
+	bzero(pmap->pm_pdir, PDSLOT_PTE * sizeof(pd_entry_t));
+	/* put in recursive PDE to map the PTEs */
+	pmap->pm_pdir[PDSLOT_PTE] = pmap->pm_pdirpa | PG_V | PG_KW;
+
 	/* init the LDT */
 	pmap->pm_ldt = NULL;
 	pmap->pm_ldt_len = 0;
 	pmap->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
 
-	/* allocate PDP */
-
 	/*
 	 * we need to lock pmaps_lock to prevent nkpde from changing on
-	 * us.  note that there is no need to splvm to protect us from
-	 * malloc since malloc allocates out of a submap and we should
-	 * have already allocated kernel PTPs to cover the range...
-	 *
-	 * NOTE: WE MUST NOT BLOCK WHILE HOLDING THE `pmap_lock'!
+	 * us.   note that there is no need to splimp to protect us from
+	 * malloc since malloc allocates out of a submap and we should have
+	 * already allocated kernel PTPs to cover the range...
 	 */
 	simple_lock(&pmaps_lock);
-
-	/* XXX Need a generic "I want memory" wchan */
-	while ((pmap->pm_pdir =
-	    pool_cache_get(&pmap_pdp_cache, PR_NOWAIT)) == NULL)
-		(void) ltsleep(&lbolt, PVM, "pmapcr", hz >> 3, &pmaps_lock);
-
-	pmap->pm_pdirpa = pmap->pm_pdir[PDSLOT_PTE] & PG_FRAME;
-
+	/* put in kernel VM PDEs */
+	bcopy(&PDP_BASE[PDSLOT_KERN], &pmap->pm_pdir[PDSLOT_KERN],
+	       nkpde * sizeof(pd_entry_t));
+	/* zero the rest */
+	bzero(&pmap->pm_pdir[PDSLOT_KERN + nkpde],
+	       NBPG - ((PDSLOT_KERN + nkpde) * sizeof(pd_entry_t)));
 	LIST_INSERT_HEAD(&pmaps, pmap, pm_list);
-
 	simple_unlock(&pmaps_lock);
-
-	return (pmap);
 }
 
 /*
@@ -1580,7 +1748,6 @@ void
 pmap_destroy(pmap)
 	struct pmap *pmap;
 {
-	struct vm_page *pg;
 	int refs;
 
 	/*
@@ -1598,11 +1765,32 @@ pmap_destroy(pmap)
 	 * reference count is zero, free pmap resources and then free pmap.
 	 */
 
+	pmap_release(pmap);
+	pool_put(&pmap_pmap_pool, pmap);
+}
+
+/*
+ * pmap_release: release all resources held by a pmap
+ *
+ * => if pmap is still referenced it should be locked
+ * => XXX: we currently don't expect any busy PTPs because we don't
+ *    allow anything to map them (except for the kernel's private
+ *    recursive mapping) or make them busy.
+ */
+
+void
+pmap_release(pmap)
+	struct pmap *pmap;
+{
+	struct vm_page *pg;
+
 	/*
 	 * remove it from global list of pmaps
 	 */
 
 	simple_lock(&pmaps_lock);
+	if (pmap == pmaps_hand)
+		pmaps_hand = LIST_NEXT(pmaps_hand, pm_list);
 	LIST_REMOVE(pmap, pm_list);
 	simple_unlock(&pmaps_lock);
 
@@ -1610,7 +1798,8 @@ pmap_destroy(pmap)
 	 * free any remaining PTPs
 	 */
 
-	while ((pg = TAILQ_FIRST(&pmap->pm_obj.memq)) != NULL) {
+	while (pmap->pm_obj.memq.tqh_first != NULL) {
+		pg = pmap->pm_obj.memq.tqh_first;
 #ifdef DIAGNOSTIC
 		if (pg->flags & PG_BUSY)
 			panic("pmap_release: busy page table page");
@@ -1622,7 +1811,7 @@ pmap_destroy(pmap)
 	}
 
 	/* XXX: need to flush it out of other processor's APTE space? */
-	pool_cache_put(&pmap_pdp_cache, pmap->pm_pdir);
+	uvm_km_free(kernel_map, (vaddr_t)pmap->pm_pdir, NBPG);
 
 #ifdef USER_LDT
 	if (pmap->pm_flags & PMF_USER_LDT) {
@@ -1635,8 +1824,6 @@ pmap_destroy(pmap)
 			    pmap->pm_ldt_len * sizeof(union descriptor));
 	}
 #endif
-
-	pool_put(&pmap_pmap_pool, pmap);
 }
 
 /*
@@ -1776,49 +1963,20 @@ pmap_extract(pmap, va, pap)
 	vaddr_t va;
 	paddr_t *pap;
 {
-	pt_entry_t *ptes, pte;
-	pd_entry_t pde;
-
-	if (__predict_true((pde = pmap->pm_pdir[pdei(va)]) != 0)) {
-#ifdef LARGEPAGES
-		if (pde & PG_PS) {
-			if (pap != NULL)
-				*pap = (pde & PG_LGFRAME) | (va & ~PG_LGFRAME);
-			return (TRUE);
-		}
-#endif
+	paddr_t retval;
+	pt_entry_t *ptes;
 
+	if (pmap->pm_pdir[pdei(va)]) {
 		ptes = pmap_map_ptes(pmap);
-		pte = ptes[i386_btop(va)];
+		retval = (paddr_t)(ptes[i386_btop(va)] & PG_FRAME);
 		pmap_unmap_ptes(pmap);
-
-		if (__predict_true((pte & PG_V) != 0)) {
-			if (pap != NULL)
-				*pap = (pte & PG_FRAME) | (va & ~PG_FRAME);
-			return (TRUE);
-		}
+		if (pap != NULL)
+			*pap = retval | (va & ~PG_FRAME);
+		return (TRUE);
 	}
 	return (FALSE);
 }
 
-#ifdef LARGEPAGES
-/*
- * vtophys: virtual address to physical address.  For use by
- * machine-dependent code only.
- */
-
-paddr_t
-vtophys(va)
-	vaddr_t va;
-{
-	paddr_t pa;
-
-	if (pmap_extract(pmap_kernel(), va, &pa) == TRUE)
-		return (pa);
-	return (0);
-}
-#endif
-
 /*
  * pmap_virtual_space: used during bootup [pmap_steal_memory] to
  *	determine the bounds of the kernel virtual addess space.
@@ -1841,10 +1999,15 @@ void
 pmap_zero_page(pa)
 	paddr_t pa;
 {
-
 	simple_lock(&pmap_zero_page_lock);
+#ifdef DIAGNOSTIC
+	if (*zero_pte)
+		panic("pmap_zero_page: lock botch");
+#endif
+
 	*zero_pte = (pa & PG_FRAME) | PG_V | PG_RW;	/* map in */
-	bzero(zerop, PAGE_SIZE);				/* zero */
+	bzero(zerop, NBPG);				/* zero */
+	*zero_pte = 0;				/* zap! */
 	pmap_update_pg((vaddr_t)zerop);		/* flush TLB */
 	simple_unlock(&pmap_zero_page_lock);
 }
@@ -1857,31 +2020,20 @@ boolean_t
 pmap_zero_page_uncached(pa)
 	paddr_t pa;
 {
-	int i, *ptr;
-	boolean_t rv = TRUE;
-
 	simple_lock(&pmap_zero_page_lock);
+#ifdef DIAGNOSTIC
+	if (*zero_pte)
+		panic("pmap_zero_page_uncached: lock botch");
+#endif
 
 	*zero_pte = (pa & PG_FRAME) | PG_V | PG_RW |	/* map in */
 	    ((cpu_class != CPUCLASS_386) ? PG_N : 0);
-	pmap_update_pg((vaddr_t)zerop);
-	for (i = 0, ptr = (int *) zerop; i < PAGE_SIZE / sizeof(int); i++) {
-		if (whichqs != 0) {
-			/*
-			 * A process has become ready.  Abort now,
-			 * so we don't keep it waiting while we
-			 * do slow memory access to finish this
-			 * page.
-			 */
-			rv = FALSE;
-			break;
-		}
-		*ptr++ = 0;
-	}
-
+	memset(zerop, 0, NBPG);				/* zero */
+	*zero_pte = 0;					/* zap! */
+	pmap_update_pg((vaddr_t)zerop);			/* flush TLB */
 	simple_unlock(&pmap_zero_page_lock);
 
-	return (rv);
+	return (TRUE);
 }
 
 /*
@@ -1922,13 +2074,12 @@ pmap_copy_page(srcpa, dstpa)
  */
 
 static void
-pmap_remove_ptes(pmap, pmap_rr, ptp, ptpva, startva, endva, flags)
+pmap_remove_ptes(pmap, pmap_rr, ptp, ptpva, startva, endva)
 	struct pmap *pmap;
 	struct pmap_remove_record *pmap_rr;
 	struct vm_page *ptp;
 	vaddr_t ptpva;
 	vaddr_t startva, endva;
-	int flags;
 {
 	struct pv_entry *pv_tofree = NULL;	/* list of pv_entrys to free */
 	struct pv_entry *pve;
@@ -1946,12 +2097,9 @@ pmap_remove_ptes(pmap, pmap_rr, ptp, ptpva, startva, endva, flags)
 	 */
 
 	for (/*null*/; startva < endva && (ptp == NULL || ptp->wire_count > 1)
-			     ; pte++, startva += PAGE_SIZE) {
+			     ; pte++, startva += NBPG) {
 		if (!pmap_valid_entry(*pte))
 			continue;			/* VA not mapped */
-		if ((flags & PMAP_REMOVE_SKIPWIRED) && (*pte & PG_W)) {
-			continue;
-		}
 
 		opte = *pte;		/* save the old PTE */
 		*pte = 0;			/* zap! */
@@ -2029,12 +2177,11 @@ pmap_remove_ptes(pmap, pmap_rr, ptp, ptpva, startva, endva, flags)
  */
 
 static boolean_t
-pmap_remove_pte(pmap, ptp, pte, va, flags)
+pmap_remove_pte(pmap, ptp, pte, va)
 	struct pmap *pmap;
 	struct vm_page *ptp;
 	pt_entry_t *pte;
 	vaddr_t va;
-	int flags;
 {
 	pt_entry_t opte;
 	int bank, off;
@@ -2042,9 +2189,6 @@ pmap_remove_pte(pmap, ptp, pte, va, flags)
 
 	if (!pmap_valid_entry(*pte))
 		return(FALSE);		/* VA not mapped */
-	if ((flags & PMAP_REMOVE_SKIPWIRED) && (*pte & PG_W)) {
-		return(FALSE);
-	}
 
 	opte = *pte;			/* save the old PTE */
 	*pte = 0;			/* zap! */
@@ -2102,21 +2246,6 @@ pmap_remove(pmap, sva, eva)
 	struct pmap *pmap;
 	vaddr_t sva, eva;
 {
-	pmap_do_remove(pmap, sva, eva, PMAP_REMOVE_ALL);
-}
-
-/*
- * pmap_do_remove: mapping removal guts
- *
- * => caller should not be holding any pmap locks
- */
-
-static void
-pmap_do_remove(pmap, sva, eva, flags)
-	struct pmap *pmap;
-	vaddr_t sva, eva;
-	int flags;
-{
 	pt_entry_t *ptes;
 	boolean_t result;
 	paddr_t ptppa;
@@ -2164,7 +2293,7 @@ pmap_do_remove(pmap, sva, eva, flags)
 
 			/* do it! */
 			result = pmap_remove_pte(pmap, ptp,
-			    &ptes[i386_btop(sva)], sva, flags);
+						 &ptes[i386_btop(sva)], sva);
 
 			/*
 			 * if mapping removed and the PTP is no longer
@@ -2259,7 +2388,7 @@ pmap_do_remove(pmap, sva, eva, flags)
 			}
 		}
 		pmap_remove_ptes(pmap, prr, ptp,
-		    (vaddr_t)&ptes[i386_btop(sva)], sva, blkendva, flags);
+				 (vaddr_t)&ptes[i386_btop(sva)], sva, blkendva);
 
 		/* if PTP is no longer being used, free it! */
 		if (ptp && ptp->wire_count <= 1) {
@@ -2756,7 +2885,449 @@ pmap_collect(pmap)
 	 */
 
 	pmap_remove(pmap, VM_MIN_ADDRESS, VM_MAX_ADDRESS);
-	pmap_update(pmap);
+}
+
+/*
+ * pmap_transfer: transfer (move or copy) mapping from one pmap
+ * 	to another.
+ *
+ * => this function is optional, it doesn't have to do anything
+ * => we assume that the mapping in the src pmap is valid (i.e. that
+ *    it doesn't run off the end of the map's virtual space).
+ * => we assume saddr, daddr, and len are page aligned/lengthed
+ */
+
+void
+pmap_transfer(dstpmap, srcpmap, daddr, len, saddr, move)
+	struct pmap *dstpmap, *srcpmap;
+	vaddr_t daddr, saddr;
+	vsize_t len;
+	boolean_t move;
+{
+	/* base address of PTEs, dst could be NULL */
+	pt_entry_t *srcptes, *dstptes;
+
+	struct pmap_transfer_location srcl, dstl;
+	int dstvalid;		  /* # of PTEs left in dst's current PTP */
+	struct pmap *mapped_pmap; /* the pmap we passed to pmap_map_ptes */
+	vsize_t blklen;
+	int blkpgs, toxfer;
+	boolean_t ok;
+
+#ifdef DIAGNOSTIC
+	/*
+	 * sanity check: let's make sure our len doesn't overflow our dst
+	 * space.
+	 */
+
+	if (daddr < VM_MAXUSER_ADDRESS) {
+		if (VM_MAXUSER_ADDRESS - daddr < len) {
+			printf("pmap_transfer: no room in user pmap "
+			       "(addr=0x%lx, len=0x%lx)\n", daddr, len);
+			return;
+		}
+	} else if (daddr < VM_MIN_KERNEL_ADDRESS ||
+		   daddr >= VM_MAX_KERNEL_ADDRESS) {
+		printf("pmap_transfer: invalid transfer address 0x%lx\n",
+		       daddr);
+	} else {
+		if (VM_MAX_KERNEL_ADDRESS - daddr < len) {
+			printf("pmap_transfer: no room in kernel pmap "
+			       "(addr=0x%lx, len=0x%lx)\n", daddr, len);
+			return;
+		}
+	}
+#endif
+
+	/*
+	 * ideally we would like to have either src or dst pmap's be the
+	 * current pmap so that we can map the other one in APTE space
+	 * (if needed... one of the maps could be the kernel's pmap).
+	 *
+	 * however, if we can't get this, then we have to use the tmpmap
+	 * (alternately we could punt).
+	 */
+
+	if (!pmap_is_curpmap(dstpmap) && !pmap_is_curpmap(srcpmap)) {
+		dstptes = NULL;			/* dstptes NOT mapped */
+		srcptes = pmap_map_ptes(srcpmap);   /* let's map the source */
+		mapped_pmap = srcpmap;
+	} else {
+		if (!pmap_is_curpmap(srcpmap)) {
+			srcptes = pmap_map_ptes(srcpmap);   /* possible APTE */
+			dstptes = PTE_BASE;
+			mapped_pmap = srcpmap;
+		} else {
+			dstptes = pmap_map_ptes(dstpmap);   /* possible APTE */
+			srcptes = PTE_BASE;
+			mapped_pmap = dstpmap;
+		}
+	}
+
+	/*
+	 * at this point we know that the srcptes are mapped.   the dstptes
+	 * are mapped if (dstptes != NULL).    if (dstptes == NULL) then we
+	 * will have to map the dst PTPs page at a time using the tmpmap.
+	 * [XXX: is it worth the effort, or should we just punt?]
+	 */
+
+	srcl.addr = saddr;
+	srcl.pte = &srcptes[i386_btop(srcl.addr)];
+	srcl.ptp = NULL;
+	dstl.addr = daddr;
+	if (dstptes)
+		dstl.pte = &dstptes[i386_btop(dstl.addr)];
+	else
+		dstl.pte  = NULL;		/* we map page at a time */
+	dstl.ptp = NULL;
+	dstvalid = 0;		/* force us to load a new dst PTP to start */
+
+	while (len) {
+
+		/*
+		 * compute the size of this block.
+		 */
+
+		/* length in bytes */
+		blklen = i386_round_pdr(srcl.addr+1) - srcl.addr;
+		if (blklen > len)
+			blklen = len;
+		blkpgs = i386_btop(blklen);
+
+		/*
+		 * if the block is not valid in the src pmap,
+		 * then we can skip it!
+		 */
+
+		if (!pmap_valid_entry(srcpmap->pm_pdir[pdei(srcl.addr)])) {
+			len = len - blklen;
+			srcl.pte  = srcl.pte + blkpgs;
+			srcl.addr += blklen;
+			dstl.addr += blklen;
+			if (blkpgs > dstvalid) {
+				dstvalid = 0;
+				dstl.ptp = NULL;
+			} else {
+				dstvalid = dstvalid - blkpgs;
+			}
+			if (dstptes == NULL && (len == 0 || dstvalid == 0)) {
+				if (dstl.pte) {
+					pmap_tmpunmap_pa();
+					dstl.pte = NULL;
+				}
+			} else {
+				dstl.pte += blkpgs;
+			}
+			continue;
+		}
+
+		/*
+		 * we have a valid source block of "blkpgs" PTEs to transfer.
+		 * if we don't have any dst PTEs ready, then get some.
+		 */
+
+		if (dstvalid == 0) {
+			if (!pmap_valid_entry(dstpmap->
+					      pm_pdir[pdei(dstl.addr)])) {
+#ifdef DIAGNOSTIC
+				if (dstl.addr >= VM_MIN_KERNEL_ADDRESS)
+					panic("pmap_transfer: missing kernel "
+					      "PTP at 0x%lx", dstl.addr);
+#endif
+				dstl.ptp = pmap_get_ptp(dstpmap,
+							pdei(dstl.addr), TRUE);
+				if (dstl.ptp == NULL)	/* out of RAM?  punt. */
+					break;
+			} else {
+				dstl.ptp = NULL;
+			}
+			dstvalid = i386_btop(i386_round_pdr(dstl.addr+1) -
+					     dstl.addr);
+			if (dstptes == NULL) {
+				dstl.pte = (pt_entry_t *)
+					pmap_tmpmap_pa(dstpmap->
+						       pm_pdir[pdei(dstl.addr)]
+						       & PG_FRAME);
+				dstl.pte = dstl.pte + (PTES_PER_PTP - dstvalid);
+			}
+		}
+
+		/*
+		 * we have a valid source block of "blkpgs" PTEs to transfer.
+		 * we have a valid dst block of "dstvalid" PTEs ready.
+		 * thus we can transfer min(blkpgs, dstvalid) PTEs now.
+		 */
+
+		srcl.ptp = NULL;	/* don't know source PTP yet */
+		if (dstvalid < blkpgs)
+			toxfer = dstvalid;
+		else
+			toxfer = blkpgs;
+
+		if (toxfer > 0) {
+			ok = pmap_transfer_ptes(srcpmap, &srcl, dstpmap, &dstl,
+						toxfer, move);
+
+			if (!ok)		/* memory shortage?  punt. */
+				break;
+
+			dstvalid -= toxfer;
+			blkpgs -= toxfer;
+			len -= i386_ptob(toxfer);
+			if (blkpgs == 0)	/* out of src PTEs?  restart */
+				continue;
+		}
+
+		/*
+		 * we have a valid source block of "blkpgs" PTEs left
+		 * to transfer.  we have just used up our "dstvalid"
+		 * PTEs, and thus must obtain more dst PTEs to finish
+		 * off the src block.  since we are now going to
+		 * obtain a brand new dst PTP, we know we can finish
+		 * the src block in one more transfer.
+		 */
+
+#ifdef DIAGNOSTIC
+		if (dstvalid)
+			panic("pmap_transfer: dstvalid non-zero after drain");
+		if ((dstl.addr & (NBPD-1)) != 0)
+			panic("pmap_transfer: dstaddr not on PD boundary "
+			      "(0x%lx)\n", dstl.addr);
+#endif
+
+		if (dstptes == NULL && dstl.pte != NULL) {
+			/* dispose of old PT mapping */
+			pmap_tmpunmap_pa();
+			dstl.pte = NULL;
+		}
+
+		/*
+		 * get new dst PTP
+		 */
+		if (!pmap_valid_entry(dstpmap->pm_pdir[pdei(dstl.addr)])) {
+#ifdef DIAGNOSTIC
+			if (dstl.addr >= VM_MIN_KERNEL_ADDRESS)
+				panic("pmap_transfer: missing kernel PTP at "
+				      "0x%lx", dstl.addr);
+#endif
+			dstl.ptp = pmap_get_ptp(dstpmap, pdei(dstl.addr), TRUE);
+			if (dstl.ptp == NULL)	/* out of free RAM?  punt. */
+				break;
+		} else {
+			dstl.ptp = NULL;
+		}
+
+		dstvalid = PTES_PER_PTP;	/* new PTP */
+
+		/*
+		 * if the dstptes are un-mapped, then we need to tmpmap in the
+		 * dstl.ptp.
+		 */
+
+		if (dstptes == NULL) {
+			dstl.pte = (pt_entry_t *)
+				pmap_tmpmap_pa(dstpmap->pm_pdir[pdei(dstl.addr)]
+					       & PG_FRAME);
+		}
+
+		/*
+		 * we have a valid source block of "blkpgs" PTEs left
+		 * to transfer.  we just got a brand new dst PTP to
+		 * receive these PTEs.
+		 */
+
+#ifdef DIAGNOSTIC
+		if (dstvalid < blkpgs)
+			panic("pmap_transfer: too many blkpgs?");
+#endif
+		toxfer = blkpgs;
+		ok = pmap_transfer_ptes(srcpmap, &srcl, dstpmap, &dstl, toxfer,
+					move);
+
+		if (!ok)		/* memory shortage?   punt. */
+			break;
+
+		dstvalid -= toxfer;
+		blkpgs -= toxfer;
+		len -= i386_ptob(toxfer);
+
+		/*
+		 * done src pte block
+		 */
+	}
+	if (dstptes == NULL && dstl.pte != NULL)
+		pmap_tmpunmap_pa();		/* dst PTP still mapped? */
+	pmap_unmap_ptes(mapped_pmap);
+}
+
+/*
+ * pmap_transfer_ptes: transfer PTEs from one pmap to another
+ *
+ * => we assume that the needed PTPs are mapped and that we will
+ *	not cross a block boundary.
+ * => we return TRUE if we transfered all PTEs, FALSE if we were
+ *	unable to allocate a pv_entry
+ */
+
+static boolean_t
+pmap_transfer_ptes(srcpmap, srcl, dstpmap, dstl, toxfer, move)
+	struct pmap *srcpmap, *dstpmap;
+	struct pmap_transfer_location *srcl, *dstl;
+	int toxfer;
+	boolean_t move;
+{
+	pt_entry_t dstproto, opte;
+	int bank, off;
+	struct pv_head *pvh;
+	struct pv_entry *pve, *lpve;
+
+	/*
+	 * generate "prototype" dst PTE
+	 */
+
+	if (dstl->addr < VM_MAX_ADDRESS)
+		dstproto = PG_u;		/* "user" page */
+	else
+		dstproto = pmap_pg_g;	/* kernel page */
+
+	/*
+	 * ensure we have dst PTP for user addresses.
+	 */
+
+	if (dstl->ptp == NULL && dstl->addr < VM_MAXUSER_ADDRESS)
+		dstl->ptp = PHYS_TO_VM_PAGE(dstpmap->pm_pdir[pdei(dstl->addr)] &
+					    PG_FRAME);
+
+	/*
+	 * main loop over range
+	 */
+
+	for (/*null*/; toxfer > 0 ; toxfer--,
+			     srcl->addr += NBPG, dstl->addr += NBPG,
+			     srcl->pte++, dstl->pte++) {
+
+		if (!pmap_valid_entry(*srcl->pte))  /* skip invalid entrys */
+			continue;
+
+#ifdef DIAGNOSTIC
+		if (pmap_valid_entry(*dstl->pte))
+			panic("pmap_transfer_ptes: attempt to overwrite "
+			      "active entry");
+#endif
+
+		/*
+		 * let's not worry about non-pvlist mappings (typically device
+		 * pager mappings).
+		 */
+
+		opte = *srcl->pte;
+
+		if ((opte & PG_PVLIST) == 0)
+			continue;
+
+		/*
+		 * if we are moving the mapping, then we can just adjust the
+		 * current pv_entry.    if we are copying the mapping, then we
+		 * need to allocate a new pv_entry to account for it.
+		 */
+
+		if (move == FALSE) {
+			pve = pmap_alloc_pv(dstpmap, ALLOCPV_TRY);
+			if (pve == NULL)
+				return(FALSE); 		/* punt! */
+		} else {
+			pve = NULL;  /* XXX: quiet gcc warning */
+		}
+
+		/*
+		 * find the pv_head for this mapping.  since our mapping is
+		 * on the pvlist (PG_PVLIST), there must be a pv_head.
+		 */
+
+		bank = vm_physseg_find(atop(opte & PG_FRAME), &off);
+#ifdef DIAGNOSTIC
+		if (bank == -1)
+			panic("pmap_transfer_ptes: PG_PVLIST PTE and "
+			      "no pv_head!");
+#endif
+		pvh = &vm_physmem[bank].pmseg.pvhead[off];
+
+		/*
+		 * now lock down the pvhead and find the current entry (there
+		 * must be one).
+		 */
+
+		simple_lock(&pvh->pvh_lock);
+		for (lpve = pvh->pvh_list ; lpve ; lpve = lpve->pv_next)
+			if (lpve->pv_pmap == srcpmap &&
+			    lpve->pv_va == srcl->addr)
+				break;
+#ifdef DIAGNOSTIC
+		if (lpve == NULL)
+			panic("pmap_transfer_ptes: PG_PVLIST PTE, but "
+			      "entry not found");
+#endif
+
+		/*
+		 * update src ptp.   if the ptp is null in the pventry, then
+		 * we are not counting valid entrys for this ptp (this is only
+		 * true for kernel PTPs).
+		 */
+
+		if (srcl->ptp == NULL)
+			srcl->ptp = lpve->pv_ptp;
+#ifdef DIAGNOSTIC
+		if (srcl->ptp &&
+		    (srcpmap->pm_pdir[pdei(srcl->addr)] & PG_FRAME) !=
+		    VM_PAGE_TO_PHYS(srcl->ptp))
+			panic("pmap_transfer_ptes: pm_pdir - pv_ptp mismatch!");
+#endif
+
+		/*
+		 * for move, update the pve we just found (lpve) to
+		 * point to its new mapping.  for copy, init the new
+		 * pve and put it in the list.
+		 */
+
+		if (move == TRUE) {
+			pve = lpve;
+		}
+		pve->pv_pmap = dstpmap;
+		pve->pv_va = dstl->addr;
+		pve->pv_ptp = dstl->ptp;
+		if (move == FALSE) {		/* link in copy */
+			pve->pv_next = lpve->pv_next;
+			lpve->pv_next = pve;
+		}
+
+		/*
+		 * sync the R/M bits while we are here.
+		 */
+
+		vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M));
+
+		/*
+		 * now actually update the ptes and unlock the pvlist.
+		 */
+
+		if (move) {
+			*srcl->pte = 0;		/* zap! */
+			if (pmap_is_curpmap(srcpmap))
+				pmap_update_pg(srcl->addr);
+			if (srcl->ptp)
+				/* don't bother trying to free PTP */
+				srcl->ptp->wire_count--;
+			srcpmap->pm_stats.resident_count--;
+			if (opte & PG_W)
+				srcpmap->pm_stats.wired_count--;
+		}
+		*dstl->pte = (opte & ~(PG_u|PG_U|PG_M|PG_G|PG_W)) | dstproto;
+		dstpmap->pm_stats.resident_count++;
+		if (dstl->ptp)
+			dstl->ptp->wire_count++;
+		simple_unlock(&pvh->pvh_lock);
+	}
+	return(TRUE);
 }
 
 /*
@@ -2817,10 +3388,10 @@ pmap_enter(pmap, va, pa, prot, flags)
 	if (pmap == pmap_kernel()) {
 		ptp = NULL;
 	} else {
-		ptp = pmap_get_ptp(pmap, pdei(va));
+		ptp = pmap_get_ptp(pmap, pdei(va), FALSE);
 		if (ptp == NULL) {
 			if (flags & PMAP_CANFAIL) {
-				return ENOMEM;
+				return (KERN_RESOURCE_SHORTAGE);
 			}
 			panic("pmap_enter: get ptp failed");
 		}
@@ -2920,7 +3491,7 @@ pmap_enter(pmap, va, pa, prot, flags)
 			pve = pmap_alloc_pv(pmap, ALLOCPV_NEED);
 			if (pve == NULL) {
 				if (flags & PMAP_CANFAIL) {
-					error = ENOMEM;
+					error = KERN_RESOURCE_SHORTAGE;
 					goto out;
 				}
 				panic("pmap_enter: no pv entries available");
@@ -3029,7 +3600,7 @@ pmap_growkernel(maxkvaddr)
 		 * INVOKED WHILE pmap_init() IS RUNNING!
 		 */
 
-		if (pmap_alloc_ptp(kpm, PDSLOT_KERN + nkpde) == NULL) {
+		if (pmap_alloc_ptp(kpm, PDSLOT_KERN + nkpde, FALSE) == NULL) {
 			panic("pmap_growkernel: alloc ptp failed");
 		}
 
@@ -3038,14 +3609,11 @@ pmap_growkernel(maxkvaddr)
 
 		/* distribute new kernel PTP to all active pmaps */
 		simple_lock(&pmaps_lock);
-		LIST_FOREACH(pm, &pmaps, pm_list) {
+		for (pm = pmaps.lh_first; pm != NULL;
+		     pm = pm->pm_list.le_next) {
 			pm->pm_pdir[PDSLOT_KERN + nkpde] =
 				kpm->pm_pdir[PDSLOT_KERN + nkpde];
 		}
-
-		/* Invalidate the PDP cache. */
-		pool_cache_invalidate(&pmap_pdp_cache);
-
 		simple_unlock(&pmaps_lock);
 	}
 
@@ -3104,7 +3672,7 @@ pmap_dump(pmap, sva, eva)
 			continue;
 
 		pte = &ptes[i386_btop(sva)];
-		for (/* null */; sva < blkendva ; sva += PAGE_SIZE, pte++) {
+		for (/* null */; sva < blkendva ; sva += NBPG, pte++) {
 			if (!pmap_valid_entry(*pte))
 				continue;
 			printf("va %#lx -> pa %#x (pte=%#x)\n",
diff --git a/sys/arch/i386/include/pmap.h b/sys/arch/i386/include/pmap.h
index b9950057cef..ff7cca16a22 100644
--- a/sys/arch/i386/include/pmap.h
+++ b/sys/arch/i386/include/pmap.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: pmap.h,v 1.24 2001/12/11 17:24:34 art Exp $	*/
+/*	$OpenBSD: pmap.h,v 1.25 2001/12/19 08:58:05 art Exp $	*/
 /*	$NetBSD: pmap.h,v 1.44 2000/04/24 17:18:18 thorpej Exp $	*/
 
 /*
@@ -204,12 +204,8 @@
 #define vtopte(VA)	(PTE_BASE + i386_btop(VA))
 #define kvtopte(VA)	vtopte(VA)
 #define ptetov(PT)	(i386_ptob(PT - PTE_BASE))
-#ifdef LARGEPAGES
-paddr_t vtophys(vaddr_t);
-#else
 #define	vtophys(VA)	((*vtopte(VA) & PG_FRAME) | \
 			 ((unsigned)(VA) & ~PG_FRAME))
-#endif
 #define	avtopte(VA)	(APTE_BASE + i386_btop(VA))
 #define	ptetoav(PT)	(i386_ptob(PT - APTE_BASE))
 #define	avtophys(VA)	((*avtopte(VA) & PG_FRAME) | \
diff --git a/sys/arch/mac68k/dev/grf.c b/sys/arch/mac68k/dev/grf.c
index c11cbc5da35..34aeb896c27 100644
--- a/sys/arch/mac68k/dev/grf.c
+++ b/sys/arch/mac68k/dev/grf.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: grf.c,v 1.19 2001/11/28 13:47:38 art Exp $	*/
+/*	$OpenBSD: grf.c,v 1.20 2001/12/19 08:58:05 art Exp $	*/
 /*	$NetBSD: grf.c,v 1.41 1997/02/24 06:20:04 scottr Exp $	*/
 
 /*
@@ -391,6 +391,7 @@ grfunmap(dev, addr, p)
 {
 	struct grf_softc *gp;
 	vm_size_t size;
+	int     rv;
 
 	gp = grf_cd.cd_devs[GRFUNIT(dev)];
 
@@ -404,8 +405,8 @@ grfunmap(dev, addr, p)
 
 	size = round_page(gp->sc_grfmode->fbsize);
 
-	uvm_unmap(&p->p_vmspace->vm_map, (vm_offset_t)addr,
+	rv = uvm_unmap(&p->p_vmspace->vm_map, (vm_offset_t)addr,
 	    (vm_offset_t)addr + size);
 
-	return (0);
+	return (rv == 0 ? 0 : EINVAL);
 }
diff --git a/sys/arch/sparc/include/pmap.h b/sys/arch/sparc/include/pmap.h
index a67cbb80097..ff2f0d93710 100644
--- a/sys/arch/sparc/include/pmap.h
+++ b/sys/arch/sparc/include/pmap.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: pmap.h,v 1.29 2001/12/07 10:52:25 art Exp $	*/
+/*	$OpenBSD: pmap.h,v 1.30 2001/12/19 08:58:05 art Exp $	*/
 /*	$NetBSD: pmap.h,v 1.30 1997/08/04 20:00:47 pk Exp $ */
 
 /*
@@ -295,17 +295,22 @@ void		pmap_pinit __P((pmap_t));
 void		pmap_reference __P((pmap_t));
 void		pmap_release __P((pmap_t));
 void		pmap_remove __P((pmap_t, vaddr_t, vaddr_t));
-#define		pmap_update(pm)	/* nothing */
 void		pmap_init __P((void));
 int		pmap_page_index __P((paddr_t));
 void		pmap_virtual_space __P((vaddr_t *, vaddr_t *));
 void		pmap_redzone __P((void));
-void		kvm_uncache __P((caddr_t, int));
+void		kvm_setcache __P((caddr_t, int, int));
+#define		kvm_uncache(addr, npages) kvm_setcache(addr, npages, 0)
+#define		kvm_recache(addr, npages) kvm_setcache(addr, npages, 1)
+void		pmap_cache_enable __P((void));
 struct user;
 void		switchexit __P((struct proc *));
 int		mmu_pagein __P((struct pmap *pm, vaddr_t, int));
 void		pmap_writetext __P((unsigned char *, int));
 
+#define		pmap_update(pm)		/* nothing */
+#define		pmap_copy(DP,SP,D,L,S)	/* nothing */
+
 /* SUN4/SUN4C SPECIFIC DECLARATIONS */
 
 #if defined(SUN4) || defined(SUN4C)
diff --git a/sys/arch/sparc/include/psl.h b/sys/arch/sparc/include/psl.h
index 7b1139f03c0..8959f9f1072 100644
--- a/sys/arch/sparc/include/psl.h
+++ b/sys/arch/sparc/include/psl.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: psl.h,v 1.7 2001/12/07 10:38:11 art Exp $	*/
+/*	$OpenBSD: psl.h,v 1.8 2001/12/19 08:58:05 art Exp $	*/
 /*	$NetBSD: psl.h,v 1.12 1997/03/10 21:49:11 pk Exp $ */
 
 /*
@@ -203,6 +203,11 @@ SPLHOLD(spltty, PIL_TTY)
 SPLHOLD(splimp, 7)
 SPLHOLD(splvm, 7)
 
+/*
+ * remove.
+ */
+SPLHOLD(splpmap, 7)
+
 SPLHOLD(splclock, PIL_CLOCK)
 
 /* fd hardware interrupts are at level 11 */
diff --git a/sys/arch/sparc/include/vmparam.h b/sys/arch/sparc/include/vmparam.h
index 40858973242..15e06e9ad4c 100644
--- a/sys/arch/sparc/include/vmparam.h
+++ b/sys/arch/sparc/include/vmparam.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vmparam.h,v 1.21 2001/12/05 16:25:44 art Exp $	*/
+/*	$OpenBSD: vmparam.h,v 1.22 2001/12/19 08:58:05 art Exp $	*/
 /*	$NetBSD: vmparam.h,v 1.13 1997/07/12 16:20:03 perry Exp $	*/
 
 /*
@@ -109,6 +109,7 @@
 
 /* virtual sizes (bytes) for various kernel submaps */
 #define VM_MBUF_SIZE		(NMBCLUSTERS*MCLBYTES)
+#define VM_KMEM_SIZE		(NKMEMCLUSTERS*PAGE_SIZE)
 
 #define VM_PHYSSEG_MAX		32	/* we only have one "hole" */
 #define VM_PHYSSEG_STRAT	VM_PSTRAT_BSEARCH
@@ -117,7 +118,6 @@
 /*
  * pmap specific data stored in the vm_physmem[] array
  */
-#define __HAVE_PMAP_PHYSSEG
 struct pmap_physseg {
 	struct pvlist *pv_head;
 };
diff --git a/sys/arch/sparc/sparc/cache.c b/sys/arch/sparc/sparc/cache.c
index be40c5ae1c8..4fe55973023 100644
--- a/sys/arch/sparc/sparc/cache.c
+++ b/sys/arch/sparc/sparc/cache.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: cache.c,v 1.14 2001/12/05 14:40:48 art Exp $	*/
+/*	$OpenBSD: cache.c,v 1.15 2001/12/19 08:58:05 art Exp $	*/
 /*	$NetBSD: cache.c,v 1.34 1997/09/26 22:17:23 pk Exp $	*/
 
 /*
@@ -768,102 +768,72 @@ viking_cache_flush(base, len)
 }
 
 void
-viking_pcache_flush_page(pa, invalidate_only)
-	paddr_t pa;
-	int invalidate_only;
+viking_pcache_flush_line(va, pa)
+	int va;
+	int pa;
 {
-	int set, i;
+	/*
+	 * Flush cache line corresponding to virtual address `va'
+	 * which is mapped at physical address `pa'.
+	 */
+	extern char etext[];
+	static char *base;
+	int i;
+	char *v;
 
 	/*
-	 * The viking's on-chip data cache is 4-way set associative,
-	 * consisting of 128 sets, each holding 4 lines of 32 bytes.
-	 * Note that one 4096 byte page exactly covers all 128 sets
-	 * in the cache.
+	 * Construct a virtual address that hits the same cache line
+	 * as PA, then read from 2*ASSOCIATIVITY-1 different physical
+	 * locations (all different from PA).
 	 */
-	if (invalidate_only) {
-		u_int pa_tag = (pa >> 12);
-		u_int tagaddr;
-		u_int64_t tag;
-
-		/*
-		 * Loop over all sets and invalidate all entries tagged
-		 * with the given physical address by resetting the cache
-		 * tag in ASI_DCACHETAG control space.
-		 *
-		 * The address format for accessing a tag is:
-		 *
-		 * 31   30      27   26                  11      5 4  3 2    0
-		 * +------+-----+------+-------//--------+--------+----+-----+
-		 * | type | xxx | line |       xxx       |  set   | xx | 0   |
-		 * +------+-----+------+-------//--------+--------+----+-----+
-		 *
-		 * set:  the cache set tag to be read (0-127)
-		 * line: the line within the set (0-3)
-		 * type: 1: read set tag; 2: read physical tag
-		 *
-		 * The (type 2) tag read from this address is a 64-bit word
-		 * formatted as follows:
-		 *
-		 *          5         4         4
-		 * 63       6         8         0            23               0
-		 * +-------+-+-------+-+-------+-+-----------+----------------+
-		 * |  xxx  |V|  xxx  |D|  xxx  |S|    xxx    |    PA[35-12]   |
-		 * +-------+-+-------+-+-------+-+-----------+----------------+
-		 *
-		 * PA: bits 12-35 of the physical address
-		 * S:  line shared bit
-		 * D:  line dirty bit
-		 * V:  line valid bit
-		 */
-
-#define VIKING_DCACHETAG_S	0x0000010000000000UL	/* line valid bit */
-#define VIKING_DCACHETAG_D	0x0001000000000000UL	/* line dirty bit */
-#define VIKING_DCACHETAG_V	0x0100000000000000UL	/* line shared bit */
-#define VIKING_DCACHETAG_PAMASK	0x0000000000ffffffUL	/* PA tag field */
-
-		for (set = 0; set < 128; set++) {
-			/* Set set number and access type */
-			tagaddr = (set << 5) | (2 << 30);
-
-			/* Examine the tag for each line in the set */
-			for (i = 0 ; i < 4; i++) {
-				tag = ldda(tagaddr | (i << 26), ASI_DCACHETAG);
-				/*
-				 * If this is a valid tag and the PA field
-				 * matches clear the tag.
-				 */
-				if ((tag & VIKING_DCACHETAG_PAMASK) == pa_tag &&
-				    (tag & VIKING_DCACHETAG_V) != 0)
-					stda(tagaddr | (i << 26),
-					     ASI_DCACHETAG, 0);
-			}
-		}
 
-	} else {
-		extern char kernel_text[];
-
-		/*
-		 * Force the cache to validate its backing memory
-		 * by displacing all cache lines with known read-only
-		 * content from the start of kernel text.
-		 *
-		 * Note that this thrashes the entire cache. However,
-		 * we currently only need to call upon this code
-		 * once at boot time.
-		 */
-		for (set = 0; set < 128; set++) {
-			int *v = (int *)(kernel_text + (set << 5));
-
-			/*
-			 * We need to read (2*associativity-1) different
-			 * locations to be sure to displace the entire set.
-			 */
-			i = 2 * 4 - 1;
-			while (i--) {
-				(*(volatile int *)v);
-				v += 4096;
-			}
-		}
+#if 0
+	if (base == 0) {
+		cshift = CACHEINFO.ic_l2linesize;
+		csize = CACHEINFO.ic_nlines << cshift;
+		cmask = csize - 1;
+		base = (char *)roundup((int)etext, csize);
+	}
+
+	v = base + (((va & cmask) >> cshift) << cshift);
+	i = CACHEINFO.dc_associativity * 2 - 1;
+
+	while (i--) {
+		(*(volatile int *)v);
+		v += csize;
+	}
+#else
+#define cshift	5			/* CACHEINFO.ic_l2linesize */
+#define csize	(128 << cshift)		/* CACHEINFO.ic_nlines << cshift */
+#define cmask	(csize - 1)
+#define cass	4			/* CACHEINFO.dc_associativity */
+
+	if (base == 0)
+		base = (char *)roundup((unsigned int)etext, csize);
+
+	v = base + (((pa & cmask) >> cshift) << cshift);
+	i = 2 * cass - 1;
+
+	while (i--) {
+		(*(volatile int *)v);
+		v += csize;
 	}
+#undef cass
+#undef cmask
+#undef csize
+#undef cshift
+#endif
+}
+
+void
+srmmu_pcache_flush_line(va, pa)
+	int va;
+	int pa;
+{
+	/*
+	 * Flush cache line corresponding to virtual address `va'
+	 * which is mapped at physical address `pa'.
+	 */
+	sta(va, ASI_IDCACHELFP, 0);
 }
 #endif /* SUN4M */
diff --git a/sys/arch/sparc/sparc/cache.h b/sys/arch/sparc/sparc/cache.h
index 445ec2369f8..e4c58416831 100644
--- a/sys/arch/sparc/sparc/cache.h
+++ b/sys/arch/sparc/sparc/cache.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: cache.h,v 1.5 2001/12/05 14:40:48 art Exp $	*/
+/*	$OpenBSD: cache.h,v 1.6 2001/12/19 08:58:05 art Exp $	*/
 /*	$NetBSD: cache.h,v 1.16 1997/07/06 21:15:14 pk Exp $ */
 
 /*
@@ -178,7 +178,8 @@ void	hypersparc_cache_flush_all __P((void));
 
 void	ms1_cache_flush __P((caddr_t, u_int));
 void	viking_cache_flush __P((caddr_t, u_int));
-void	viking_pcache_flush_page __P((paddr_t, int));
+void	viking_pcache_flush_line __P((int, int));
+void	srmmu_pcache_flush_line __P((int, int));
 
 extern void sparc_noop __P((void));
 
@@ -192,8 +193,8 @@ extern void sparc_noop __P((void));
 	(void (*)__P((int))) sparc_noop
 #define noop_cache_flush \
 	(void (*)__P((caddr_t, u_int))) sparc_noop
-#define noop_pcache_flush_page \
-	(void (*)__P((paddr_t, int))) sparc_noop
+#define noop_pcache_flush_line \
+	(void (*)__P((int, int))) sparc_noop
 #define noop_pure_vcache_flush \
 	(void (*)__P((void))) sparc_noop
 #define noop_cache_flush_all \
@@ -203,7 +204,6 @@ extern void sparc_noop __P((void));
 #define cache_flush_segment(vr,vs)	cpuinfo.vcache_flush_segment(vr,vs)
 #define cache_flush_region(vr)		cpuinfo.vcache_flush_region(vr)
 #define cache_flush_context()		cpuinfo.vcache_flush_context()
-#define pcache_flush_page(pa,flag)	cpuinfo.pcache_flush_page(pa,flag)
 
 /*
  * Cache control information.
diff --git a/sys/arch/sparc/sparc/cpu.c b/sys/arch/sparc/sparc/cpu.c
index 66425e45b3b..e2662c99a49 100644
--- a/sys/arch/sparc/sparc/cpu.c
+++ b/sys/arch/sparc/sparc/cpu.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: cpu.c,v 1.35 2001/12/07 10:44:52 art Exp $	*/
+/*	$OpenBSD: cpu.c,v 1.36 2001/12/19 08:58:05 art Exp $	*/
 /*	$NetBSD: cpu.c,v 1.56 1997/09/15 20:52:36 pk Exp $ */
 
 /*
@@ -252,6 +252,7 @@ cpu_attach(parent, self, aux)
 		 */
 		s = splhigh();
 		sc->cache_enable();
+		pmap_cache_enable();
 		splx(s);
 		return;
 	}
@@ -428,7 +429,7 @@ struct module_info module_sun4 = {
 	sun4_vcache_flush_segment,
 	sun4_vcache_flush_region,
 	sun4_vcache_flush_context,
-	noop_pcache_flush_page,
+	noop_pcache_flush_line,
 	noop_pure_vcache_flush,
 	noop_cache_flush_all,
 	0
@@ -554,7 +555,7 @@ struct module_info module_sun4c = {
 	sun4_vcache_flush_segment,
 	sun4_vcache_flush_region,
 	sun4_vcache_flush_context,
-	noop_pcache_flush_page,
+	noop_pcache_flush_line,
 	noop_pure_vcache_flush,
 	noop_cache_flush_all,
 	0
@@ -752,7 +753,7 @@ struct module_info module_ms1 = {
 	noop_vcache_flush_segment,
 	noop_vcache_flush_region,
 	noop_vcache_flush_context,
-	noop_pcache_flush_page,
+	noop_pcache_flush_line,
 	noop_pure_vcache_flush,
 	ms1_cache_flush_all,
 	memerr4m
@@ -780,7 +781,7 @@ struct module_info module_ms2 = {
 	srmmu_vcache_flush_segment,
 	srmmu_vcache_flush_region,
 	srmmu_vcache_flush_context,
-	noop_pcache_flush_page,
+	noop_pcache_flush_line,
 	noop_pure_vcache_flush,
 	srmmu_cache_flush_all,
 	memerr4m
@@ -803,7 +804,7 @@ struct module_info module_swift = {
 	srmmu_vcache_flush_segment,
 	srmmu_vcache_flush_region,
 	srmmu_vcache_flush_context,
-	noop_pcache_flush_page,
+	srmmu_pcache_flush_line,
 	noop_pure_vcache_flush,
 	srmmu_cache_flush_all,
 	memerr4m
@@ -851,7 +852,7 @@ struct module_info module_viking = {
 	noop_vcache_flush_segment,
 	noop_vcache_flush_region,
 	noop_vcache_flush_context,
-	viking_pcache_flush_page,
+	viking_pcache_flush_line,
 	noop_pure_vcache_flush,
 	noop_cache_flush_all,
 	viking_memerr
@@ -889,7 +890,7 @@ viking_hotfix(sc)
 			sc->flags |= CPUFLG_CACHEPAGETABLES;
 	} else {
 		sc->cache_flush = viking_cache_flush;
-		sc->pcache_flush_page = viking_pcache_flush_page;
+		sc->pcache_flush_line = viking_pcache_flush_line;
 	}
 
 	/* XXX! */
@@ -934,7 +935,7 @@ struct module_info module_hypersparc = {
 	srmmu_vcache_flush_segment,
 	srmmu_vcache_flush_region,
 	srmmu_vcache_flush_context,
-	noop_pcache_flush_page,
+	srmmu_pcache_flush_line,
 	hypersparc_pure_vcache_flush,
 	hypersparc_cache_flush_all,
 	hypersparc_memerr
@@ -985,7 +986,7 @@ struct module_info module_cypress = {
 	srmmu_vcache_flush_segment,
 	srmmu_vcache_flush_region,
 	srmmu_vcache_flush_context,
-	noop_pcache_flush_page,
+	srmmu_pcache_flush_line,
 	noop_pure_vcache_flush,
 	cypress_cache_flush_all,
 	memerr4m
@@ -1008,7 +1009,7 @@ struct module_info module_turbosparc = {	/* UNTESTED */
 	srmmu_vcache_flush_segment,
 	srmmu_vcache_flush_region,
 	srmmu_vcache_flush_context,
-	noop_pcache_flush_page,
+	srmmu_pcache_flush_line,
 	noop_pure_vcache_flush,
 	srmmu_cache_flush_all,
 	memerr4m
@@ -1046,7 +1047,7 @@ cpumatch_turbosparc(sc, mp, node)
 	sc->vcache_flush_segment = 0;
 	sc->vcache_flush_region = 0;
 	sc->vcache_flush_context = 0;
-	sc->pcache_flush_page = 0;
+	sc->pcache_flush_line = 0;
 
 	replacemul();
 }
@@ -1233,7 +1234,7 @@ getcpuinfo(sc, node)
 		MPCOPY(vcache_flush_segment);
 		MPCOPY(vcache_flush_region);
 		MPCOPY(vcache_flush_context);
-		MPCOPY(pcache_flush_page);
+		MPCOPY(pcache_flush_line);
 		MPCOPY(pure_vcache_flush);
 		MPCOPY(cache_flush_all);
 		MPCOPY(memerr);
diff --git a/sys/arch/sparc/sparc/cpuvar.h b/sys/arch/sparc/sparc/cpuvar.h
index 45acc703a3e..fc6e4de2778 100644
--- a/sys/arch/sparc/sparc/cpuvar.h
+++ b/sys/arch/sparc/sparc/cpuvar.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: cpuvar.h,v 1.6 2001/12/07 10:39:47 art Exp $	*/
+/*	$OpenBSD: cpuvar.h,v 1.7 2001/12/19 08:58:05 art Exp $	*/
 /*	$NetBSD: cpuvar.h,v 1.4 1997/07/06 21:14:25 pk Exp $ */
 
 /*
@@ -70,7 +70,7 @@ struct module_info {
 	void (*vcache_flush_segment) __P((int, int));
 	void (*vcache_flush_region) __P((int));
 	void (*vcache_flush_context) __P((void));
-	void (*pcache_flush_page) __P((paddr_t, int));
+	void (*pcache_flush_line) __P((int, int));
 	void (*pure_vcache_flush) __P((void));
 	void (*cache_flush_all)__P((void));
 	void (*memerr) __P((unsigned, u_int, u_int, struct trapframe *));
@@ -183,7 +183,7 @@ struct cpu_softc {
 	void	(*vcache_flush_segment)__P((int, int));
 	void	(*vcache_flush_region)__P((int));
 	void	(*vcache_flush_context)__P((void));
-	void	(*pcache_flush_page)__P((paddr_t, int));
+	void	(*pcache_flush_line)__P((int, int));
 	void	(*pure_vcache_flush) __P((void));
 	void	(*cache_flush_all)__P((void));
 
@@ -283,6 +283,7 @@ struct cpu_softc {
  */
 void getcpuinfo __P((struct cpu_softc *sc, int node));
 void mmu_install_tables __P((struct cpu_softc *));
+void pmap_alloc_cpu __P((struct cpu_softc *));
 
 #define cpuinfo	(*(struct cpu_softc *)CPUINFO_VA)
 #endif	/* _SPARC_CPUVAR_H */
diff --git a/sys/arch/sparc/sparc/machdep.c b/sys/arch/sparc/sparc/machdep.c
index f240f3218cb..e4383c71bbb 100644
--- a/sys/arch/sparc/sparc/machdep.c
+++ b/sys/arch/sparc/sparc/machdep.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: machdep.c,v 1.74 2001/12/08 02:24:07 art Exp $	*/
+/*	$OpenBSD: machdep.c,v 1.75 2001/12/19 08:58:05 art Exp $	*/
 /*	$NetBSD: machdep.c,v 1.85 1997/09/12 08:55:02 pk Exp $ */
 
 /*
@@ -245,7 +245,6 @@ cpu_startup()
 			curbufsize -= PAGE_SIZE;
 		}
 	}
-	pmap_update(pmap_kernel());
 	/*
 	 * Allocate a submap for exec arguments.  This map effectively
 	 * limits the number of processes exec'ing at any time.
@@ -869,11 +868,9 @@ dumpsys()
 
 			(void) pmap_map(dumpspace, maddr, maddr + n,
 					VM_PROT_READ);
-			pmap_update(pmap_kernel());
 			error = (*dump)(dumpdev, blkno,
 					(caddr_t)dumpspace, (int)n);
 			pmap_remove(pmap_kernel(), dumpspace, dumpspace + n);
-			pmap_update(pmap_kernel());
 			if (error)
 				break;
 			maddr += n;
@@ -978,7 +975,6 @@ mapdev(phys, virt, offset, size)
 		va += PAGE_SIZE;
 		pa += PAGE_SIZE;
 	} while ((size -= PAGE_SIZE) > 0);
-	pmap_update(pmap_kernel());
 	return (ret);
 }
 
diff --git a/sys/arch/sparc/sparc/pmap.c b/sys/arch/sparc/sparc/pmap.c
index e23610b67a6..f0ef4375d49 100644
--- a/sys/arch/sparc/sparc/pmap.c
+++ b/sys/arch/sparc/sparc/pmap.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: pmap.c,v 1.116 2001/12/09 04:51:35 art Exp $	*/
+/*	$OpenBSD: pmap.c,v 1.117 2001/12/19 08:58:05 art Exp $	*/
 /*	$NetBSD: pmap.c,v 1.118 1998/05/19 19:00:18 thorpej Exp $ */
 
 /*
@@ -162,13 +162,20 @@ int	pmapdebug = 0;
 /*
  * Internal helpers.
  */
-static __inline struct pvlist *pvhead(int);
+static __inline struct pvlist *pvhead __P((int));
+static __inline struct pvlist *pvalloc __P((void));
+static __inline void pvfree __P((struct pvlist *));
+
+#if defined(SUN4M)
+static u_int	VA2PA __P((caddr_t));
+#endif
 
 /*
  * Given a page number, return the head of its pvlist.
  */
 static __inline struct pvlist *
-pvhead(int pnum)
+pvhead(pnum)
+	int pnum;
 {
 	int bank, off;
 
@@ -181,6 +188,24 @@ pvhead(int pnum)
 
 struct pool pvpool;
 
+/*
+ * Wrappers around some memory allocation.
+ * XXX - the plan is to make them non-sleeping.
+ */
+
+static __inline struct pvlist *
+pvalloc()
+{
+	return pool_get(&pvpool, PR_WAITOK);
+}
+
+static __inline void
+pvfree(pv)
+	struct pvlist *pv;
+{
+	pool_put(&pvpool, pv);
+}
+
 #if defined(SUN4M)
 /*
  * Memory pools and back-end supplier for SRMMU page tables.
@@ -189,49 +214,49 @@ struct pool pvpool;
  */
 static struct pool L1_pool;
 static struct pool L23_pool;
-void *pgt_page_alloc(unsigned long, int, int);
-void  pgt_page_free(void *, unsigned long, int);
+void *pgt_page_alloc __P((unsigned long, int, int));
+void  pgt_page_free __P((void *, unsigned long, int));
+
+void    pcache_flush __P((caddr_t, caddr_t, int));
+void
+pcache_flush(va, pa, n)
+        caddr_t va, pa;
+        int     n;
+{
+        void (*f)__P((int,int)) = cpuinfo.pcache_flush_line;
+
+        while ((n -= 4) >= 0)
+                (*f)((u_int)va+n, (u_int)pa+n);
+}
 
 /*
  * Page table pool back-end.
  */
 void *
-pgt_page_alloc(unsigned long sz, int flags, int mtype)
+pgt_page_alloc(sz, flags, mtype)
+        unsigned long sz;
+        int flags;
+        int mtype;
 {
-	struct vm_page *pg;
-	int nocache = (cpuinfo.flags & CPUFLG_CACHEPAGETABLES) == 0;
-	vaddr_t va;
-	paddr_t pa;
-
-	if ((pg = uvm_pagealloc(NULL, 0, NULL, 0)) == NULL)
-		return (NULL);
-
-	if ((va = uvm_km_valloc(kernel_map, PAGE_SIZE)) == 0) {
-		uvm_pagefree(pg);
-		return (NULL);
-	}
+        caddr_t p;
 
-	pa = VM_PAGE_TO_PHYS(pg);
-	if (nocache)
-		pcache_flush_page(pa, 1);
+        p = (caddr_t)uvm_km_kmemalloc(kernel_map, uvm.kernel_object,
+                                      (vsize_t)sz, UVM_KMF_NOWAIT);
 
-	pmap_kenter_pa(va, pa | (nocache ? PMAP_NC : 0),
-	    VM_PROT_READ|VM_PROT_WRITE);
-	pmap_update(pmap_kernel());
-        return ((void *)va);
+        if (p != NULL && ((cpuinfo.flags & CPUFLG_CACHEPAGETABLES) == 0)) {
+                pcache_flush(p, (caddr_t)VA2PA(p), sz);
+                kvm_uncache(p, atop(sz));
+        }
+        return (p);
 }       
    
 void
-pgt_page_free(void *v, unsigned long sz, int mtype)
+pgt_page_free(v, sz, mtype)
+        void *v;
+        unsigned long sz;
+        int mtype;
 {
-	vaddr_t va = (vaddr_t)v;
-	paddr_t pa;
-
-	if (pmap_extract(pmap_kernel(), va, &pa) == FALSE)
-		panic("pgt_page_free");
-	uvm_pagefree(PHYS_TO_VM_PAGE(pa));
-	pmap_kremove(va, sz);
-	uvm_km_free(kernel_map, (vaddr_t)v, sz);
+        uvm_km_free(kernel_map, (vaddr_t)v, sz);
 }
 #endif /* SUN4M */
 
@@ -380,9 +405,9 @@ vaddr_t pagetables_start, pagetables_end;
 struct	memarr pmemarr[MA_SIZE];/* physical memory regions */
 int	npmemarr;		/* number of entries in pmemarr */
 
-static void pmap_page_upload(paddr_t);
-void pmap_pinit(pmap_t);
-void pmap_release(pmap_t);
+static void pmap_page_upload __P((paddr_t));
+void pmap_pinit __P((pmap_t));
+void pmap_release __P((pmap_t));
 
 int mmu_has_hole;
 
@@ -440,9 +465,9 @@ static u_long segfixmask = 0xffffffff; /* all bits valid to start */
 #if defined(SUN4M)
 #define getpte4m(va)		lda((va & 0xFFFFF000) | ASI_SRMMUFP_L3, \
 				    ASI_SRMMUFP)
-u_int	*getptep4m(struct pmap *, vaddr_t);
-static __inline void setpgt4m(int *, int);
-void	setpte4m(vaddr_t va, int pte);
+u_int	*getptep4m __P((struct pmap *, vaddr_t));
+static __inline void	setpgt4m __P((int *, int));
+void	setpte4m __P((vaddr_t va, int pte));
 #endif
 
 #if defined(SUN4) || defined(SUN4C)
@@ -464,35 +489,35 @@ void	setpte4m(vaddr_t va, int pte);
  */
 
 #if defined(SUN4M)
-static void mmu_setup4m_L1(int, struct pmap *);
-static void mmu_setup4m_L2(int, struct regmap *);
-static void  mmu_setup4m_L3(int, struct segmap *);
-void	mmu_reservemon4m(struct pmap *);
-
-void	pmap_rmk4m(struct pmap *, vaddr_t, vaddr_t, int, int);
-void	pmap_rmu4m(struct pmap *, vaddr_t, vaddr_t, int, int);
-int	pmap_enk4m(struct pmap *, vaddr_t, vm_prot_t,
-			  int, struct pvlist *, int);
-int	pmap_enu4m(struct pmap *, vaddr_t, vm_prot_t,
-			  int, struct pvlist *, int);
-void	pv_changepte4m(struct pvlist *, int, int);
-int	pv_syncflags4m(struct pvlist *);
-int	pv_link4m(struct pvlist *, struct pmap *, vaddr_t, int);
-void	pv_unlink4m(struct pvlist *, struct pmap *, vaddr_t);
+static void mmu_setup4m_L1 __P((int, struct pmap *));
+static void mmu_setup4m_L2 __P((int, struct regmap *));
+static void  mmu_setup4m_L3 __P((int, struct segmap *));
+void	mmu_reservemon4m __P((struct pmap *));
+
+void	pmap_rmk4m __P((struct pmap *, vaddr_t, vaddr_t, int, int));
+void	pmap_rmu4m __P((struct pmap *, vaddr_t, vaddr_t, int, int));
+int	pmap_enk4m __P((struct pmap *, vaddr_t, vm_prot_t,
+			  int, struct pvlist *, int));
+int	pmap_enu4m __P((struct pmap *, vaddr_t, vm_prot_t,
+			  int, struct pvlist *, int));
+void	pv_changepte4m __P((struct pvlist *, int, int));
+int	pv_syncflags4m __P((struct pvlist *));
+int	pv_link4m __P((struct pvlist *, struct pmap *, vaddr_t, int));
+void	pv_unlink4m __P((struct pvlist *, struct pmap *, vaddr_t));
 #endif
 
 #if defined(SUN4) || defined(SUN4C)
-void	mmu_reservemon4_4c(int *, int *);
-void	pmap_rmk4_4c(struct pmap *, vaddr_t, vaddr_t, int, int);
-void	pmap_rmu4_4c(struct pmap *, vaddr_t, vaddr_t, int, int);
-int	pmap_enk4_4c(struct pmap *, vaddr_t, vm_prot_t, int, struct pvlist *,
-		int);
-int	pmap_enu4_4c(struct pmap *, vaddr_t, vm_prot_t, int, struct pvlist *,
-		int);
-void	pv_changepte4_4c(struct pvlist *, int, int);
-int	pv_syncflags4_4c(struct pvlist *);
-int	pv_link4_4c(struct pvlist *, struct pmap *, vaddr_t, int);
-void	pv_unlink4_4c(struct pvlist *, struct pmap *, vaddr_t);
+void	mmu_reservemon4_4c __P((int *, int *));
+void	pmap_rmk4_4c __P((struct pmap *, vaddr_t, vaddr_t, int, int));
+void	pmap_rmu4_4c __P((struct pmap *, vaddr_t, vaddr_t, int, int));
+int	pmap_enk4_4c __P((struct pmap *, vaddr_t, vm_prot_t,
+			  int, struct pvlist *, int));
+int	pmap_enu4_4c __P((struct pmap *, vaddr_t, vm_prot_t,
+			  int, struct pvlist *, int));
+void	pv_changepte4_4c __P((struct pvlist *, int, int));
+int	pv_syncflags4_4c __P((struct pvlist *));
+int	pv_link4_4c __P((struct pvlist *, struct pmap *, vaddr_t, int));
+void	pv_unlink4_4c __P((struct pvlist *, struct pmap *, vaddr_t));
 #endif
 
 #if !defined(SUN4M) && (defined(SUN4) || defined(SUN4C))
@@ -507,22 +532,22 @@ void	pv_unlink4_4c(struct pvlist *, struct pmap *, vaddr_t);
 
 /* function pointer declarations */
 /* from pmap.h: */
-boolean_t	(*pmap_clear_modify_p)(struct vm_page *);
-boolean_t	(*pmap_clear_reference_p)(struct vm_page *);
-void		(*pmap_copy_page_p)(paddr_t, paddr_t);
-int		(*pmap_enter_p)(pmap_t, vaddr_t, paddr_t, vm_prot_t, int);
-boolean_t	(*pmap_extract_p)(pmap_t, vaddr_t, paddr_t *);
-boolean_t	(*pmap_is_modified_p)(struct vm_page *);
-boolean_t	(*pmap_is_referenced_p)(struct vm_page *);
-void		(*pmap_kenter_pa_p)(vaddr_t, paddr_t, vm_prot_t);
-void		(*pmap_kremove_p)(vaddr_t, vsize_t);
-void		(*pmap_page_protect_p)(struct vm_page *, vm_prot_t);
-void		(*pmap_protect_p)(pmap_t, vaddr_t, vaddr_t, vm_prot_t);
-void            (*pmap_zero_page_p)(paddr_t);
-void	       	(*pmap_changeprot_p)(pmap_t, vaddr_t, vm_prot_t, int);
+boolean_t	(*pmap_clear_modify_p) __P((struct vm_page *));
+boolean_t	(*pmap_clear_reference_p) __P((struct vm_page *));
+void		(*pmap_copy_page_p) __P((paddr_t, paddr_t));
+int		(*pmap_enter_p) __P((pmap_t, vaddr_t, paddr_t, vm_prot_t, int));
+boolean_t	(*pmap_extract_p) __P((pmap_t, vaddr_t, paddr_t *));
+boolean_t	(*pmap_is_modified_p) __P((struct vm_page *));
+boolean_t	(*pmap_is_referenced_p) __P((struct vm_page *));
+void		(*pmap_kenter_pa_p) __P((vaddr_t, paddr_t, vm_prot_t));
+void		(*pmap_kremove_p) __P((vaddr_t, vsize_t));
+void		(*pmap_page_protect_p) __P((struct vm_page *, vm_prot_t));
+void		(*pmap_protect_p) __P((pmap_t, vaddr_t, vaddr_t, vm_prot_t));
+void            (*pmap_zero_page_p) __P((paddr_t));
+void	       	(*pmap_changeprot_p) __P((pmap_t, vaddr_t, vm_prot_t, int));
 /* local: */
-void 		(*pmap_rmk_p)(struct pmap *, vaddr_t, vaddr_t, int, int);
-void 		(*pmap_rmu_p)(struct pmap *, vaddr_t, vaddr_t, int, int);
+void 		(*pmap_rmk_p) __P((struct pmap *, vaddr_t, vaddr_t, int, int));
+void 		(*pmap_rmu_p) __P((struct pmap *, vaddr_t, vaddr_t, int, int));
 
 #define		pmap_rmk	(*pmap_rmk_p)
 #define		pmap_rmu	(*pmap_rmu_p)
@@ -537,7 +562,6 @@ void 		(*pmap_rmu_p)(struct pmap *, vaddr_t, vaddr_t, int, int);
  */
 
 #if defined(SUN4M)
-static u_int	VA2PA(caddr_t);
 
 /*
  * Macros which implement SRMMU TLB flushing/invalidation
@@ -560,7 +584,8 @@ static u_int	VA2PA(caddr_t);
  * during bootup to interact with the ROM's initial L1 mapping of the kernel.
  */
 static u_int
-VA2PA(caddr_t addr)
+VA2PA(addr)
+	caddr_t addr;
 {
 	u_int pte;
 
@@ -599,7 +624,9 @@ VA2PA(caddr_t addr)
  * Assumes level 3 mapping (for now).
  */
 u_int *
-getptep4m(struct pmap *pm, vaddr_t va)
+getptep4m(pm, va)
+        struct pmap *pm;
+        vaddr_t va;
 {
         struct regmap *rm;
         struct segmap *sm;
@@ -627,7 +654,9 @@ getptep4m(struct pmap *pm, vaddr_t va)
  * Set the pte at "ptep" to "pte".
  */
 static __inline void
-setpgt4m(int *ptep, int pte)
+setpgt4m(ptep, pte)
+	int *ptep;
+	int pte;
 {
 	swap(ptep, pte);
 }
@@ -636,7 +665,9 @@ setpgt4m(int *ptep, int pte)
  * Set the page table entry for va to pte. Only legal for kernel mappings.
  */
 void
-setpte4m(vaddr_t va, int pte)
+setpte4m(va, pte)
+	vaddr_t va;
+	int pte;
 {
 	int *ptep;
 
@@ -681,21 +712,24 @@ setpte4m(vaddr_t va, int pte)
 } while (0)
 
 
-static void sortm(struct memarr *, int);
-void	ctx_alloc(struct pmap *);
-void	ctx_free(struct pmap *);
-void	pv_flushcache(struct pvlist *);
+static void sortm __P((struct memarr *, int));
+void	ctx_alloc __P((struct pmap *));
+void	ctx_free __P((struct pmap *));
+void	pv_flushcache __P((struct pvlist *));
+void	kvm_iocache __P((caddr_t, int));
 #ifdef DEBUG
-void	pm_check(char *, struct pmap *);
-void	pm_check_k(char *, struct pmap *);
-void	pm_check_u(char *, struct pmap *);
+void	pm_check __P((char *, struct pmap *));
+void	pm_check_k __P((char *, struct pmap *));
+void	pm_check_u __P((char *, struct pmap *));
 #endif
 
 /*
  * Sort a memory array by address.
  */
 static void
-sortm(struct memarr *mp, int n)
+sortm(mp, n)
+	struct memarr *mp;
+	int n;
 {
 	struct memarr *mpj;
 	int i, j;
@@ -719,11 +753,21 @@ sortm(struct memarr *mp, int n)
 }
 
 /*
+ * For our convenience, vm_page.c implements:
+ *       vm_bootstrap_steal_memory()
+ * using the functions:
+ *       pmap_virtual_space(), pmap_free_pages(), pmap_next_page(),
+ * which are much simpler to implement.
+ */
+
+/*
  * How much virtual space does this kernel have?
  * (After mapping kernel text, data, etc.)
  */
 void
-pmap_virtual_space(vaddr_t *v_start, vaddr_t *v_end)
+pmap_virtual_space(v_start, v_end)
+        vaddr_t *v_start;
+        vaddr_t *v_end;
 {
         *v_start = virtual_avail;
         *v_end   = virtual_end;
@@ -733,7 +777,8 @@ pmap_virtual_space(vaddr_t *v_start, vaddr_t *v_end)
  * Helper routine that hands off available physical pages to the VM system.
  */
 static void
-pmap_page_upload(paddr_t first_pa)
+pmap_page_upload(first_pa)
+	paddr_t first_pa;
 {
 	int	n = 0;
 	paddr_t start, end;
@@ -766,7 +811,8 @@ pmap_page_upload(paddr_t first_pa)
 }
 
 int
-pmap_pa_exists(paddr_t pa)
+pmap_pa_exists(pa)
+	paddr_t pa;
 {
 	return (pa < phys_avail || (pvhead(atop(pa)) != NULL));
 }
@@ -788,7 +834,8 @@ pmap_pa_exists(paddr_t pa)
  */
 #if defined(SUN4) || defined(SUN4C)
 void
-mmu_reservemon4_4c(int *nrp, int *nsp)
+mmu_reservemon4_4c(nrp, nsp)
+	int *nrp, *nsp;
 {
 	u_int va = 0, eva = 0;
 	int mmuseg, i, nr, ns, vr, lastvr;
@@ -886,12 +933,15 @@ mmu_reservemon4_4c(int *nrp, int *nsp)
  * NOTE: This also revokes all user-mode access to the mapped regions.
  */
 void
-mmu_reservemon4m(struct pmap *kpmap)
+mmu_reservemon4m(kpmap)
+	struct pmap *kpmap;
 {
 	unsigned int rom_ctxtbl;
 	int te;
 	unsigned int mmupcrsave;
 
+/*XXX-GCC!*/mmupcrsave = 0;
+
 	/*
 	 * XXX: although the Sun4M can handle 36 bits of physical
 	 * address space, we assume that all these page tables, etc
@@ -942,7 +992,9 @@ mmu_reservemon4m(struct pmap *kpmap)
 }
 
 void
-mmu_setup4m_L1(int regtblptd, struct pmap *kpmap)
+mmu_setup4m_L1(regtblptd, kpmap)
+	int regtblptd;		/* PTD for region table to be remapped */
+	struct pmap *kpmap;
 {
 	unsigned int regtblrover;
 	int i;
@@ -1013,7 +1065,9 @@ mmu_setup4m_L1(int regtblptd, struct pmap *kpmap)
 }
 
 void
-mmu_setup4m_L2(int segtblptd, struct regmap *rp)
+mmu_setup4m_L2(segtblptd, rp)
+	int segtblptd;
+	struct regmap *rp;
 {
 	unsigned int segtblrover;
 	int i, k;
@@ -1536,7 +1590,7 @@ printf("mmu_pagein: kernel wants map at va 0x%x, vr %d, vs %d\n", va, vr, vs);
 		unsigned int tva = VA_ROUNDDOWNTOREG(va);
 		struct segmap *sp = rp->rg_segmap;
 
-		s = splvm();		/* paranoid */
+		s = splpmap();		/* paranoid */
 		smeg = region_alloc(&region_lru, pm, vr)->me_cookie;
 		setregmap(tva, smeg);
 		i = NSEGRG;
@@ -1559,7 +1613,7 @@ printf("mmu_pagein: kernel wants map at va 0x%x, vr %d, vs %d\n", va, vr, vs);
 
 	/* reload segment: write PTEs into a new LRU entry */
 	va = VA_ROUNDDOWNTOSEG(va);
-	s = splvm();		/* paranoid */
+	s = splpmap();		/* paranoid */
 	pmeg = me_alloc(&segm_lru, pm, vr, vs)->me_cookie;
 	setsegmap(va, pmeg);
 	i = NPTESG;
@@ -1601,7 +1655,7 @@ ctx_alloc(pm)
 		gap_end = pm->pm_gap_end;
 	}
 
-	s = splvm();
+	s = splpmap();
 	if ((c = ctx_freelist) != NULL) {
 		ctx_freelist = c->c_nextfree;
 		cnum = c - cpuinfo.ctxinfo;
@@ -1831,7 +1885,7 @@ pv_changepte4_4c(pv0, bis, bic)
 
 	write_user_windows();		/* paranoid? */
 
-	s = splvm();			/* paranoid? */
+	s = splpmap();			/* paranoid? */
 	if (pv0->pv_pmap == NULL) {
 		splx(s);
 		return;
@@ -1930,7 +1984,7 @@ pv_syncflags4_4c(pv0)
 
 	write_user_windows();		/* paranoid? */
 
-	s = splvm();			/* paranoid? */
+	s = splpmap();			/* paranoid? */
 	if (pv0->pv_pmap == NULL) {	/* paranoid */
 		splx(s);
 		return (0);
@@ -2016,7 +2070,7 @@ pv_unlink4_4c(pv, pm, va)
 			pv->pv_va = npv->pv_va;
 			pv->pv_flags &= ~PV_NC;
 			pv->pv_flags |= npv->pv_flags & PV_NC;
-			pool_put(&pvpool, npv);
+			pvfree(npv);
 		} else {
 			/*
 			 * No mappings left; we still need to maintain
@@ -2040,7 +2094,7 @@ pv_unlink4_4c(pv, pm, va)
 				break;
 		}
 		prev->pv_next = npv->pv_next;
-		pool_put(&pvpool, npv);
+		pvfree(npv);
 	}
 	if (pv->pv_flags & PV_ANC && (pv->pv_flags & PV_NC) == 0) {
 		/*
@@ -2113,9 +2167,7 @@ pv_link4_4c(pv, pm, va, nc)
 			}
 		}
 	}
-	npv = pool_get(&pvpool, PR_NOWAIT);
-	if (npv == NULL)
-		panic("pvpool exhausted");
+	npv = pvalloc();
 	npv->pv_next = pv->pv_next;
 	npv->pv_pmap = pm;
 	npv->pv_va = va;
@@ -2139,9 +2191,16 @@ pv_link4_4c(pv, pm, va, nc)
  * as long as the process has a context; this is overly conservative.
  * It also copies ref and mod bits to the pvlist, on the theory that
  * this might save work later.  (XXX should test this theory)
+ *
+ * In addition, if the cacheable bit (SRMMU_PG_C) is updated in the PTE
+ * the corresponding PV_C4M flag is also updated in each pv entry. This
+ * is done so kvm_uncache() can use this routine and have the uncached
+ * status stick.
  */
 void
-pv_changepte4m(struct pvlist *pv0, int bis, int bic)
+pv_changepte4m(pv0, bis, bic)
+	struct pvlist *pv0;
+	int bis, bic;
 {
 	struct pvlist *pv;
 	struct pmap *pm;
@@ -2150,7 +2209,7 @@ pv_changepte4m(struct pvlist *pv0, int bis, int bic)
 
 	write_user_windows();		/* paranoid? */
 
-	s = splvm();			/* paranoid? */
+	s = splpmap();			/* paranoid? */
 	if (pv0->pv_pmap == NULL) {
 		splx(s);
 		return;
@@ -2203,6 +2262,19 @@ pv_changepte4m(struct pvlist *pv0, int bis, int bic)
 		pv0->pv_flags |= MR4M(tpte);
 		tpte = (tpte | bis) & ~bic;
 		setpgt4m(ptep, tpte);
+
+		/* Update PV_C4M flag if required */
+		/*
+		 * XXX - this is incorrect. The PV_C4M means that _this_
+		 *       mapping should be kept uncached. This way we
+		 *       effectively uncache this pa until all mappings
+		 *       to it are gone (see also the XXX in pv_link4m and
+		 *       pv_unlink4m).
+		 */
+		if (bis & SRMMU_PG_C)
+			pv->pv_flags |= PV_C4M;
+		if (bic & SRMMU_PG_C)
+			pv->pv_flags &= ~PV_C4M;
 	}
 	setcontext4m(ctx);
 	splx(s);
@@ -2225,7 +2297,7 @@ pv_syncflags4m(pv0)
 
 	write_user_windows();		/* paranoid? */
 
-	s = splvm();			/* paranoid? */
+	s = splpmap();			/* paranoid? */
 	if (pv0->pv_pmap == NULL) {	/* paranoid */
 		splx(s);
 		return (0);
@@ -2309,7 +2381,7 @@ pv_unlink4m(pv, pm, va)
 			pv->pv_va = npv->pv_va;
 			pv->pv_flags &= ~PV_C4M;
 			pv->pv_flags |= (npv->pv_flags & PV_C4M);
-			pool_put(&pvpool, npv);
+			pvfree(npv);
 		} else {
 			/*
 			 * No mappings left; we still need to maintain
@@ -2333,7 +2405,7 @@ pv_unlink4m(pv, pm, va)
 				break;
 		}
 		prev->pv_next = npv->pv_next;
-		pool_put(&pvpool, npv);
+		pvfree(npv);
 	}
 	if ((pv->pv_flags & (PV_C4M|PV_ANC)) == (PV_C4M|PV_ANC)) {
 		/*
@@ -2367,11 +2439,12 @@ pv_link4m(pv, pm, va, nc)
 	vaddr_t va;
 	int nc;
 {
-	struct pvlist *npv;
+	struct pvlist *npv, *mpv;
 	int ret;
 
 	ret = nc ? SRMMU_PG_C : 0;
 
+retry:
 	if (pv->pv_pmap == NULL) {
 		/* no pvlist entries yet */
 		pmap_stats.ps_enter_firstpv++;
@@ -2386,6 +2459,21 @@ pv_link4m(pv, pm, va, nc)
 	}
 
 	/*
+	 * We do the malloc early so that we catch all changes that happen
+	 * during the (possible) sleep.
+	 */
+	mpv = pvalloc();
+	if (pv->pv_pmap == NULL) {
+		/*
+		 * XXX - remove this printf some day when we know that
+		 * can/can't happen.
+		 */
+		printf("pv_link4m: pv changed during sleep!\n");
+		pvfree(mpv);
+		goto retry;
+	}
+
+	/*
 	 * Before entering the new mapping, see if
 	 * it will cause old mappings to become aliased
 	 * and thus need to be `discached'.
@@ -2423,14 +2511,11 @@ pv_link4m(pv, pm, va, nc)
 		}
 	}
 
-	npv = pool_get(&pvpool, PR_NOWAIT);
-	if (npv == NULL)
-		panic("pvpool exhausted");
-	npv->pv_next = pv->pv_next;
-	npv->pv_pmap = pm;
-	npv->pv_va = va;
-	npv->pv_flags = nc ? 0 : PV_C4M;
-	pv->pv_next = npv;
+	mpv->pv_next = pv->pv_next;
+	mpv->pv_pmap = pm;
+	mpv->pv_va = va;
+	mpv->pv_flags = nc ? 0 : PV_C4M;
+	pv->pv_next = mpv;
 	return (ret);
 }
 #endif
@@ -2448,7 +2533,7 @@ pv_flushcache(pv)
 
 	write_user_windows();	/* paranoia? */
 
-	s = splvm();		/* XXX extreme paranoia */
+	s = splpmap();		/* XXX extreme paranoia */
 	if ((pm = pv->pv_pmap) != NULL) {
 		ctx = getcontext();
 		for (;;) {
@@ -3148,32 +3233,11 @@ pmap_bootstrap4m(void)
 		cpuinfo.ctx_tbl[i] = cpuinfo.ctx_tbl[0];
 #endif
 
-	if ((cpuinfo.flags & CPUFLG_CACHEPAGETABLES) == 0) {
-		/*
-		 * The page tables have been setup. Since we're still
-		 * running on the PROM's memory map, the memory we
-		 * allocated for our page tables might still be cached.
-		 * Flush it now, and don't touch it again until we
-		 * switch to our own tables (will be done immediately below).
-		 */
-		int size = pagetables_end - pagetables_start;
-
-		if (CACHEINFO.c_vactype != VAC_NONE) {
-			int va = (vaddr_t)pagetables_start;
-			while (size != 0) {
-				cache_flush_page(va);
-				va += NBPG;
-				size -= NBPG;
-			}
-		} else if (cpuinfo.pcache_flush_page != NULL) {
-			int pa = pagetables_start;
-			while (size != 0) {
-				pcache_flush_page(pa, 0);
-				pa += NBPG;
-				size -= NBPG;
-			}
-		}
-	}
+	if ((cpuinfo.flags & CPUFLG_CACHEPAGETABLES) == 0)
+		/* Flush page tables from cache */
+		pcache_flush((caddr_t)pagetables_start,
+			     (caddr_t)VA2PA((caddr_t)pagetables_start),
+			     pagetables_end - pagetables_start);
 
 	/*
 	 * Now switch to kernel pagetables (finally!)
@@ -3209,6 +3273,66 @@ mmu_install_tables(sc)
 #endif
 }
 
+/*
+ * Allocate per-CPU page tables.
+ * Note: this routine is called in the context of the boot CPU
+ * during autoconfig.
+ */
+void
+pmap_alloc_cpu(sc)
+	struct cpu_softc *sc;
+{
+	caddr_t cpustore;
+	int *ctxtable;
+	int *regtable;
+	int *segtable;
+	int *pagtable;
+	int vr, vs, vpg;
+	struct regmap *rp;
+	struct segmap *sp;
+
+	/* Allocate properly aligned and physically contiguous memory here */
+	cpustore = 0;
+	ctxtable = 0;
+	regtable = 0;
+	segtable = 0;
+	pagtable = 0;
+
+	vr = VA_VREG(CPUINFO_VA);
+	vs = VA_VSEG(CPUINFO_VA);
+	vpg = VA_VPG(CPUINFO_VA);
+	rp = &pmap_kernel()->pm_regmap[vr];
+	sp = &rp->rg_segmap[vs];
+
+	/*
+	 * Copy page tables, then modify entry for CPUINFO_VA so that
+	 * it points at the per-CPU pages.
+	 */
+	bcopy(cpuinfo.L1_ptps, regtable, SRMMU_L1SIZE * sizeof(int));
+	regtable[vr] =
+		(VA2PA((caddr_t)segtable) >> SRMMU_PPNPASHIFT) | SRMMU_TEPTD;
+
+	bcopy(rp->rg_seg_ptps, segtable, SRMMU_L2SIZE * sizeof(int));
+	segtable[vs] =
+		(VA2PA((caddr_t)pagtable) >> SRMMU_PPNPASHIFT) | SRMMU_TEPTD;
+
+	bcopy(sp->sg_pte, pagtable, SRMMU_L3SIZE * sizeof(int));
+	pagtable[vpg] =
+		(VA2PA((caddr_t)cpustore) >> SRMMU_PPNPASHIFT) |
+		(SRMMU_TEPTE | PPROT_RWX_RWX | SRMMU_PG_C);
+
+	/* Install L1 table in context 0 */
+	ctxtable[0] = ((u_int)regtable >> SRMMU_PPNPASHIFT) | SRMMU_TEPTD;
+
+	sc->ctx_tbl = ctxtable;
+	sc->L1_ptps = regtable;
+
+#if 0
+	if ((sc->flags & CPUFLG_CACHEPAGETABLES) == 0) {
+		kvm_uncache((caddr_t)0, 1);
+	}
+#endif
+}
 #endif /* defined sun4m */
 
 
@@ -3265,6 +3389,35 @@ pmap_init()
 }
 
 /*
+ * Called just after enabling cache (so that CPUFLG_CACHEPAGETABLES is
+ * set correctly).
+ */
+void
+pmap_cache_enable()
+{
+#ifdef SUN4M
+	if (CPU_ISSUN4M) {
+		int pte;
+
+		/*
+		 * Deal with changed CPUFLG_CACHEPAGETABLES.
+		 *
+		 * If the tables were uncached during the initial mapping
+		 * and cache_enable set the flag we recache the tables.
+		 */
+
+		pte = getpte4m(pagetables_start);
+
+		if ((cpuinfo.flags & CPUFLG_CACHEPAGETABLES) != 0 &&
+		    (pte & SRMMU_PG_C) == 0)
+			kvm_recache((caddr_t)pagetables_start,
+				    atop(pagetables_end - pagetables_start));
+	}
+#endif
+}
+
+
+/*
  * Map physical addresses into kernel VM.
  */
 vaddr_t
@@ -3280,7 +3433,6 @@ pmap_map(va, pa, endpa, prot)
 		va += pgsize;
 		pa += pgsize;
 	}
-	pmap_update(pmap_kernel());
 	return (va);
 }
 
@@ -3409,7 +3561,7 @@ pmap_release(pm)
 	struct pmap *pm;
 {
 	union ctxinfo *c;
-	int s = splvm();	/* paranoia */
+	int s = splpmap();	/* paranoia */
 
 #ifdef DEBUG
 	if (pmapdebug & PDB_DESTROY)
@@ -3527,7 +3679,7 @@ pmap_remove(pm, va, endva)
 	}
 
 	ctx = getcontext();
-	s = splvm();		/* XXX conservative */
+	s = splpmap();		/* XXX conservative */
 	simple_lock(&pm->pm_lock);
 	for (; va < endva; va = nva) {
 		/* do one virtual segment at a time */
@@ -4074,7 +4226,7 @@ pmap_page_protect4_4c(pg, prot)
 	 * The logic is much like that for pmap_remove,
 	 * but we know we are removing exactly one page.
 	 */
-	s = splvm();
+	s = splpmap();
 	if ((pm = pv->pv_pmap) == NULL) {
 		splx(s);
 		return;
@@ -4202,7 +4354,7 @@ pmap_page_protect4_4c(pg, prot)
 	nextpv:
 		npv = pv->pv_next;
 		if (pv != pv0)
-			pool_put(&pvpool, pv);
+			pvfree(pv);
 		if ((pv = npv) == NULL)
 			break;
 	}
@@ -4243,7 +4395,7 @@ pmap_protect4_4c(pm, sva, eva, prot)
 
 	write_user_windows();
 	ctx = getcontext4();
-	s = splvm();
+	s = splpmap();
 	simple_lock(&pm->pm_lock);
 
 	for (va = sva; va < eva;) {
@@ -4352,7 +4504,7 @@ pmap_changeprot4_4c(pm, va, prot, wired)
 		newprot = prot & VM_PROT_WRITE ? PG_W : 0;
 	vr = VA_VREG(va);
 	vs = VA_VSEG(va);
-	s = splvm();		/* conservative */
+	s = splpmap();		/* conservative */
 	rp = &pm->pm_regmap[vr];
 	if (rp->rg_nsegmap == 0) {
 		printf("pmap_changeprot: no segments in %d\n", vr);
@@ -4472,7 +4624,7 @@ pmap_page_protect4m(pg, prot)
 	 * The logic is much like that for pmap_remove,
 	 * but we know we are removing exactly one page.
 	 */
-	s = splvm();
+	s = splpmap();
 	if ((pm = pv->pv_pmap) == NULL) {
 		splx(s);
 		return;
@@ -4535,7 +4687,7 @@ pmap_page_protect4m(pg, prot)
 
 		npv = pv->pv_next;
 		if (pv != pv0)
-			pool_put(&pvpool, pv);
+			pvfree(pv);
 		pv = npv;
 	}
 	pv0->pv_pmap = NULL;
@@ -4575,7 +4727,7 @@ pmap_protect4m(pm, sva, eva, prot)
 
 	write_user_windows();
 	ctx = getcontext4m();
-	s = splvm();
+	s = splpmap();
 	simple_lock(&pm->pm_lock);
 
 	for (va = sva; va < eva;) {
@@ -4665,7 +4817,7 @@ pmap_changeprot4m(pm, va, prot, wired)
 
 	pmap_stats.ps_changeprots++;
 
-	s = splvm();		/* conservative */
+	s = splpmap();		/* conservative */
 	ptep = getptep4m(pm, va);
 	if (pm->pm_ctx) {
 		ctx = getcontext4m();
@@ -4786,7 +4938,7 @@ pmap_enk4_4c(pm, va, prot, flags, pv, pteproto)
 	vs = VA_VSEG(va);
 	rp = &pm->pm_regmap[vr];
 	sp = &rp->rg_segmap[vs];
-	s = splvm();		/* XXX way too conservative */
+	s = splpmap();		/* XXX way too conservative */
 
 #if defined(SUN4_MMU3L)
 	if (HASSUN4_MMU3L && rp->rg_smeg == reginval) {
@@ -4803,7 +4955,7 @@ pmap_enk4_4c(pm, va, prot, flags, pv, pteproto)
 		for (i = 0; i < NSEGRG; i++) {
 			setsegmap(tva, rp->rg_segmap[i].sg_pmeg);
 			tva += NBPSG;
-		}
+		};
 	}
 #endif
 	if (sp->sg_pmeg != seginval && (tpte = getpte4(va)) & PG_V) {
@@ -4909,7 +5061,7 @@ pmap_enu4_4c(pm, va, prot, flags, pv, pteproto)
 	vr = VA_VREG(va);
 	vs = VA_VSEG(va);
 	rp = &pm->pm_regmap[vr];
-	s = splvm();			/* XXX conservative */
+	s = splpmap();			/* XXX conservative */
 
 	/*
 	 * If there is no space in which the PTEs can be written
@@ -5048,200 +5200,26 @@ pmap_enu4_4c(pm, va, prot, flags, pv, pteproto)
 
 	splx(s);
 
-	return (0);
+	return (KERN_SUCCESS);
 }
 
 void
-pmap_kenter_pa4_4c(vaddr_t va, paddr_t pa, vm_prot_t prot)
+pmap_kenter_pa4_4c(va, pa, prot)
+	vaddr_t va;
+	paddr_t pa;
+	vm_prot_t prot;
 {
-	struct pmap *pm = pmap_kernel();
-	struct regmap *rp;
-	struct segmap *sp;
-	int vr, vs, i, s;
-	int pteproto, ctx;
-
-	pteproto = PG_V | PMAP_T2PTE_4(pa);
-	pa &= ~PMAP_TNC_4;
-	pteproto |= atop(pa) & PG_PFNUM;
-	if (prot & VM_PROT_WRITE)
-		pteproto |= PG_W;
-
-	vr = VA_VREG(va);
-	vs = VA_VSEG(va);
-	rp = &pm->pm_regmap[vr];
-	sp = &rp->rg_segmap[vs];
-
-	ctx = getcontext4();
-	s = splvm();
-#if defined(SUN4_MMU3L)
-	if (HASSUN4_MMU3L && rp->rg_smeg == reginval) {
-		vaddr_t tva;
-		rp->rg_smeg = region_alloc(&region_locked, pm, vr)->me_cookie;
-		i = ncontext - 1;
-		do {
-			setcontext4(i);
-			setregmap(va, rp->rg_smeg);
-		} while (--i >= 0);
-
-		/* set all PTEs to invalid, then overwrite one PTE below */
-		tva = VA_ROUNDDOWNTOREG(va);
-		for (i = 0; i < NSEGRG; i++) {
-			setsegmap(tva, rp->rg_segmap[i].sg_pmeg);
-			tva += NBPSG;
-		}
-	}
-#endif
-	KASSERT(sp->sg_pmeg == seginval || (getpte4(va) & PG_V) == 0);
-	if (sp->sg_pmeg == seginval) {
-		int tva;
-
-		/*
-		 * Allocate an MMU entry now (on locked list),
-		 * and map it into every context.  Set all its
-		 * PTEs invalid (we will then overwrite one, but
-		 * this is more efficient than looping twice).
-		 */
-
-		sp->sg_pmeg = me_alloc(&segm_locked, pm, vr, vs)->me_cookie;
-		rp->rg_nsegmap++;
-
-#if defined(SUN4_MMU3L)
-		if (HASSUN4_MMU3L)
-			setsegmap(va, sp->sg_pmeg);
-		else
-#endif
-		{
-			i = ncontext - 1;
-			do {
-				setcontext4(i);
-				setsegmap(va, sp->sg_pmeg);
-			} while (--i >= 0);
-		}
-
-		/* set all PTEs to invalid, then overwrite one PTE below */
-		tva = VA_ROUNDDOWNTOSEG(va);
-		i = NPTESG;
-		do {
-			setpte4(tva, 0);
-			tva += NBPG;
-		} while (--i > 0);
-	}
-
-	/* ptes kept in hardware only */
-	setpte4(va, pteproto);
-	sp->sg_npte++;
-	splx(s);
-	setcontext4(ctx);
+	pmap_enter4_4c(pmap_kernel(), va, pa, prot, PMAP_WIRED);
 }
 
 void
-pmap_kremove4_4c(vaddr_t va, vsize_t len)
+pmap_kremove4_4c(va, len)
+	vaddr_t va;
+	vsize_t len;
 {
-	struct pmap *pm = pmap_kernel();
-	struct regmap *rp;
-	struct segmap *sp;
-	vaddr_t nva, endva;
-	int i, tpte, perpage, npg;
-	int nleft, pmeg;
-	int vr, vs, s, ctx;
-
-	endva = va + len;
-#ifdef DEBUG
-	if (pmapdebug & PDB_REMOVE)
-		printf("pmap_kremove(0x%lx, 0x%lx)\n", va, endva);
-#endif
-
-	s = splvm();
-	ctx = getcontext();
-	simple_lock(&pm->pm_lock);
-	for (; va < endva; va = nva) {
-		/* do one virtual segment at a time */
-		vr = VA_VREG(va);
-		vs = VA_VSEG(va);
-		nva = VSTOVA(vr, vs + 1);
-		if (nva == 0 || nva > endva)
-			nva = endva;
-
-		rp = &pm->pm_regmap[vr];
-		sp = &rp->rg_segmap[vs];
-
-		if (rp->rg_nsegmap == 0)
-			continue;
-		nleft = sp->sg_npte;
-		if (nleft == 0)
-			continue;
-		pmeg = sp->sg_pmeg;
-		KASSERT(pmeg != seginval);
-		setcontext4(0);
-		/* decide how to flush cache */
-		npg = (endva - va) >> PGSHIFT;
-		if (npg > PMAP_RMK_MAGIC) {
-			/* flush the whole segment */
-			perpage = 0;
-			cache_flush_segment(vr, vs);
-		} else {
-			/*
-			 * flush each page individually;
-			 * some never need flushing
-			 */
-			perpage = (CACHEINFO.c_vactype != VAC_NONE);
-		}
-		while (va < nva) {
-			tpte = getpte4(va);
-			if ((tpte & PG_V) == 0) {
-				va += NBPG;
-				continue;
-			}
-			if ((tpte & PG_TYPE) == PG_OBMEM) {
-				/* if cacheable, flush page as needed */
-				if (perpage && (tpte & PG_NC) == 0)
-					cache_flush_page(va);
-			}
-			nleft--;
-#ifdef DIAGNOSTIC
-			if (nleft < 0)
-				panic("pmap_kremove: too many PTEs in segment; "
-				      "va 0x%lx; endva 0x%lx", va, endva);
-#endif
-			setpte4(va, 0);
-			va += NBPG;
-		}
-
-		/*
-		 * If the segment is all gone, remove it from everyone and
-		 * free the MMU entry.
-		 */
-
-		sp->sg_npte = nleft;
-		if (nleft == 0) {
-			va = VSTOVA(vr, vs);
-#if defined(SUN4_MMU3L)
-			if (HASSUN4_MMU3L)
-				setsegmap(va, seginval);
-			else
-#endif
-				for (i = ncontext; --i >= 0;) {
-					setcontext4(i);
-					setsegmap(va, seginval);
-				}
-			me_free(pm, pmeg);
-			if (--rp->rg_nsegmap == 0) {
-#if defined(SUN4_MMU3L)
-				if (HASSUN4_MMU3L) {
-					for (i = ncontext; --i >= 0;) {
-						setcontext4(i);
-						setregmap(va, reginval);
-					}
-					/* note: context is 0 */
-					region_free(pm, rp->rg_smeg);
-				}
-#endif
-			}
-		}
+	for (len >>= PAGE_SHIFT; len > 0; len--, va += PAGE_SIZE) {
+		pmap_remove(pmap_kernel(), va, va + PAGE_SIZE);
 	}
-	simple_unlock(&pm->pm_lock);
-	setcontext(ctx);
-	splx(s);
 }
 
 #endif /*sun4,4c*/
@@ -5350,7 +5328,7 @@ pmap_enk4m(pm, va, prot, flags, pv, pteproto)
 	rp = &pm->pm_regmap[VA_VREG(va)];
 	sp = &rp->rg_segmap[VA_VSEG(va)];
 
-	s = splvm();		/* XXX way too conservative */
+	s = splpmap();		/* XXX way too conservative */
 
 #ifdef DEBUG
 	if (rp->rg_seg_ptps == NULL) /* enter new region */
@@ -5538,102 +5516,41 @@ pmap_enu4m(pm, va, prot, flags, pv, pteproto)
 
 	splx(s);
 
-	return (0);
+	return (KERN_SUCCESS);
 }
 
 void
-pmap_kenter_pa4m(vaddr_t va, paddr_t pa, vm_prot_t prot)
+pmap_kenter_pa4m(va, pa, prot)
+	vaddr_t va;
+	paddr_t pa;
+	vm_prot_t prot;
 {
-	int pteproto;
-	struct regmap *rp;
-	struct segmap *sp;
-	int *ptep;
+	struct pvlist *pv;
+	int pteproto, ctx;
 
 	pteproto = ((pa & PMAP_NC) == 0 ? SRMMU_PG_C : 0) |
-	    PMAP_T2PTE_SRMMU(pa) | SRMMU_TEPTE |
-	    ((prot & VM_PROT_WRITE) ? PPROT_N_RWX : PPROT_N_RX) |
-	    (atop((pa & ~PMAP_TNC_SRMMU)) << SRMMU_PPNSHIFT);
-	rp = &pmap_kernel()->pm_regmap[VA_VREG(va)];
-	sp = &rp->rg_segmap[VA_VSEG(va)];
-	ptep = &sp->sg_pte[VA_SUN4M_VPG(va)];
-#ifdef DIAGNOSTIC
-	if ((*ptep & SRMMU_TETYPE) == SRMMU_TEPTE)
-		panic("pmap_kenter_pa4m: mapping exists");
-#endif
-	sp->sg_npte++;
-	setpgt4m(ptep, pteproto);
-}
+		PMAP_T2PTE_SRMMU(pa) | SRMMU_TEPTE |
+		((prot & VM_PROT_WRITE) ? PPROT_N_RWX : PPROT_N_RX);
 
-void
-pmap_kremove4m(vaddr_t va, vsize_t len)
-{
-	struct pmap *pm = pmap_kernel();
-	struct regmap *rp;
-	struct segmap *sp;
-	vaddr_t endva, nva;
-	int vr, vs, ctx;
-	int tpte, perpage, npg;
-	int nleft;
-
-	endva = va + len;
-	ctx = getcontext();
-	for (; va < endva; va = nva) {
-		/* do one virtual segment at a time */
-		vr = VA_VREG(va);
-		vs = VA_VSEG(va);
-		nva = VSTOVA(vr, vs + 1);
-		if (nva == 0 || nva > endva) {
-			nva = endva;
-		}
-
-		rp = &pm->pm_regmap[vr];
-		if (rp->rg_nsegmap == 0) {
-			continue;
-		}
-
-		sp = &rp->rg_segmap[vs];
-		nleft = sp->sg_npte;
-		if (nleft == 0) {
-			continue;
-		}
+	pa &= ~PMAP_TNC_SRMMU;
 
-		setcontext4m(0);
-		/* decide how to flush cache */
-		npg = (nva - va) >> PGSHIFT;
-		if (npg > PMAP_RMK_MAGIC) {
-			/* flush the whole segment */
-			perpage = 0;
-			if (CACHEINFO.c_vactype != VAC_NONE) {
-				cache_flush_segment(vr, vs);
-			}
-		} else {
+	pteproto |= atop(pa) << SRMMU_PPNSHIFT;
 
-			/*
-			 * flush each page individually;
-			 * some never need flushing
-			 */
+	pv = pvhead(atop(pa));
 
-			perpage = (CACHEINFO.c_vactype != VAC_NONE);
-		}
-		for (; va < nva; va += NBPG) {
-			tpte = sp->sg_pte[VA_SUN4M_VPG(va)];
-			if ((tpte & SRMMU_TETYPE) != SRMMU_TEPTE) {
-				continue;
-			}
-			if ((tpte & SRMMU_PGTYPE) == PG_SUN4M_OBMEM) {
-				/* if cacheable, flush page as needed */
-				if (perpage && (tpte & SRMMU_PG_C))
-					cache_flush_page(va);
-			}
-			setpgt4m(&sp->sg_pte[VA_SUN4M_VPG(va)],
-			    SRMMU_TEINVALID);
-			nleft--;
-		}
-		sp->sg_npte = nleft;
-	}
+	ctx = getcontext4m();
+	pmap_enk4m(pmap_kernel(), va, prot, TRUE, pv, pteproto);
 	setcontext(ctx);
 }
 
+void
+pmap_kremove4m(va, len)
+	vaddr_t va;
+	vsize_t len;
+{
+	pmap_remove(pmap_kernel(), va, va + len);
+}
+
 #endif /* sun4m */
 
 /*
@@ -5792,24 +5709,6 @@ pmap_extract4m(pm, va, pa)
 #endif /* sun4m */
 
 /*
- * Copy the range specified by src_addr/len
- * from the source map to the range dst_addr/len
- * in the destination map.
- *
- * This routine is only advisory and need not do anything.
- */
-/* ARGSUSED */
-int pmap_copy_disabled=0;
-void
-pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
-	struct pmap *dst_pmap, *src_pmap;
-	vaddr_t dst_addr;
-	vsize_t len;
-	vaddr_t src_addr;
-{
-}
-
-/*
  * Garbage collects the physical map system for
  * pages which are no longer used.
  * Success need not be guaranteed -- that is, there
@@ -5995,15 +5894,14 @@ pmap_is_referenced4m(pg)
 #if defined(SUN4) || defined(SUN4C)
 
 void
-pmap_zero_page4_4c(paddr_t pa)
+pmap_zero_page4_4c(pa)
+	paddr_t pa;
 {
-	struct pvlist *pv;
 	caddr_t va;
-	u_int pfn;
 	int pte;
+	struct pvlist *pv;
 
-	pfn = atop(pa);
-	if (pmap_initialized && (pv = pvhead(pfn)) != NULL) {
+	if (pmap_initialized && (pv = pvhead(atop(pa))) != NULL) {
 		/*
 		 * The following might not be necessary since the page
 		 * is being cleared because it is about to be allocated,
@@ -6011,7 +5909,7 @@ pmap_zero_page4_4c(paddr_t pa)
 		 */
 		pv_flushcache(pv);
 	}
-	pte = PG_V | PG_S | PG_W | PG_NC | (pfn & PG_PFNUM);
+	pte = PG_V | PG_S | PG_W | PG_NC | (atop(pa) & PG_PFNUM);
 
 	va = vpage[0];
 	setpte4(va, pte);
@@ -6029,22 +5927,24 @@ pmap_zero_page4_4c(paddr_t pa)
  * the processor.
  */
 void
-pmap_copy_page4_4c(paddr_t src, paddr_t dst)
+pmap_copy_page4_4c(src, dst)
+	paddr_t src, dst;
 {
-	struct pvlist *pv;
 	caddr_t sva, dva;
 	int spte, dpte;
-	u_int pfn;
+	struct pvlist *pv;
 
-	pfn = atop(src);
-	if ((pv = pvhead(pfn)) != NULL && CACHEINFO.c_vactype == VAC_WRITEBACK)
+	pv = pvhead(atop(src));
+	if (pv && CACHEINFO.c_vactype == VAC_WRITEBACK)
 		pv_flushcache(pv);
-	spte = PG_V | PG_S | (pfn & PG_PFNUM);
 
-	pfn = atop(dst);
-	if ((pv = pvhead(pfn)) != NULL && CACHEINFO.c_vactype != VAC_NONE)
+	spte = PG_V | PG_S | (atop(src) & PG_PFNUM);
+
+	pv = pvhead(atop(dst));
+	if (pv && CACHEINFO.c_vactype != VAC_NONE)
 		pv_flushcache(pv);
-	dpte = PG_V | PG_S | PG_W | PG_NC | (pfn & PG_PFNUM);
+
+	dpte = PG_V | PG_S | PG_W | PG_NC | (atop(dst) & PG_PFNUM);
 
 	sva = vpage[0];
 	dva = vpage[1];
@@ -6065,26 +5965,29 @@ pmap_copy_page4_4c(paddr_t src, paddr_t dst)
  * XXX	might be faster to use destination's context and allow cache to fill?
  */
 void
-pmap_zero_page4m(paddr_t pa)
+pmap_zero_page4m(pa)
+	paddr_t pa;
 {
+	int pte;
 	struct pvlist *pv;
 	static int *ptep;
 	static vaddr_t va;
-	u_int pfn;
-	int pte;
 
 	if (ptep == NULL)
 		ptep = getptep4m(pmap_kernel(), (va = (vaddr_t)vpage[0]));
 
-	pfn = atop(pa);
-	if (pmap_initialized && (pv = pvhead(pfn)) != NULL) {
-		if (CACHEINFO.c_vactype != VAC_NONE)
-			pv_flushcache(pv);
-		else
-			pcache_flush_page(pa, 1);
+	if (pmap_initialized && (pv = pvhead(atop(pa))) != NULL &&
+	    CACHEINFO.c_vactype != VAC_NONE) {
+		/*
+		 * The following might not be necessary since the page
+		 * is being cleared because it is about to be allocated,
+		 * i.e., is in use by no one.
+		 */
+		pv_flushcache(pv);
 	}
 
-	pte = (SRMMU_TEPTE | PPROT_S | PPROT_WRITE | (pfn << SRMMU_PPNSHIFT));
+	pte = (SRMMU_TEPTE | PPROT_S | PPROT_WRITE |
+	       (atop(pa) << SRMMU_PPNSHIFT));
 	if (cpuinfo.flags & CPUFLG_CACHE_MANDATORY)
 		pte |= SRMMU_PG_C;
 	else
@@ -6107,29 +6010,32 @@ pmap_zero_page4m(paddr_t pa)
  * the processor.
  */
 void
-pmap_copy_page4m(paddr_t src, paddr_t dst)
+pmap_copy_page4m(src, dst)
+	paddr_t src, dst;
 {
 	int spte, dpte;
 	struct pvlist *pv;
 	static int *sptep, *dptep;
 	static vaddr_t sva, dva;
-	u_int pfn;
 
 	if (sptep == NULL) {
 		sptep = getptep4m(pmap_kernel(), (sva = (vaddr_t)vpage[0]));
 		dptep = getptep4m(pmap_kernel(), (dva = (vaddr_t)vpage[1]));
 	}
 
-	pfn = atop(src);
-	if ((pv = pvhead(pfn)) != NULL && CACHEINFO.c_vactype == VAC_WRITEBACK)
+	pv = pvhead(atop(src));
+	if (pv && CACHEINFO.c_vactype == VAC_WRITEBACK)
 		pv_flushcache(pv);
-	spte = SRMMU_TEPTE | SRMMU_PG_C | PPROT_S | (pfn << SRMMU_PPNSHIFT);
 
-	pfn = atop(dst);
-	if ((pv = pvhead(pfn)) != NULL && CACHEINFO.c_vactype != VAC_NONE)
+	spte = SRMMU_TEPTE | SRMMU_PG_C | PPROT_S |
+		(atop(src) << SRMMU_PPNSHIFT);
+
+	pv = pvhead(atop(dst));
+	if (pv && CACHEINFO.c_vactype != VAC_NONE)
 		pv_flushcache(pv);
-	dpte = (SRMMU_TEPTE | PPROT_S | PPROT_WRITE | (pfn << SRMMU_PPNSHIFT));
 
+	dpte = (SRMMU_TEPTE | PPROT_S | PPROT_WRITE |
+	       (atop(dst) << SRMMU_PPNSHIFT));
 	if (cpuinfo.flags & CPUFLG_CACHE_MANDATORY)
 		dpte |= SRMMU_PG_C;
 	else
@@ -6154,8 +6060,10 @@ pmap_copy_page4m(paddr_t src, paddr_t dst)
  *	elsewhere, or even not at all
  */
 paddr_t
-pmap_phys_address(int x)
+pmap_phys_address(x)
+	int x;
 {
+
 	return (x);
 }
 
@@ -6164,31 +6072,70 @@ pmap_phys_address(int x)
  *
  * We just assert PG_NC for each PTE; the addresses must reside
  * in locked kernel space.  A cache flush is also done.
- * Please do not use this function in new code.
- * Doesn't work on sun4m, nor for pages with multiple mappings.
  */
 void
-kvm_uncache(caddr_t va, int npages)
+kvm_setcache(va, npages, cached)
+	caddr_t va;
+	int npages;
+	int cached;
 {
-#if defined(SUN4) || defined(SUN4C)
 	int pte;
 	struct pvlist *pv;
-#endif
 
 	if (CPU_ISSUN4M) {
-		panic("kvm_uncache on 4m");
+#if defined(SUN4M)
+		int ctx = getcontext4m();
+
+		setcontext4m(0);
+		for (; --npages >= 0; va += NBPG) {
+			int *ptep;
+
+			ptep = getptep4m(pmap_kernel(), (vaddr_t)va);
+			pte = *ptep;
+#ifdef DIAGNOSTIC
+			if ((pte & SRMMU_TETYPE) != SRMMU_TEPTE)
+				panic("kvm_uncache: table entry not pte");
+#endif
+			pv = pvhead((pte & SRMMU_PPNMASK) >> SRMMU_PPNSHIFT);
+			if (pv) {
+				if (cached)
+					pv_changepte4m(pv, SRMMU_PG_C, 0);
+				else
+					pv_changepte4m(pv, 0, SRMMU_PG_C);
+			}
+			if (cached)
+				pte |= SRMMU_PG_C;
+			else
+				pte &= ~SRMMU_PG_C;
+			tlb_flush_page((vaddr_t)va);
+			setpgt4m(ptep, pte);
+
+			if ((pte & SRMMU_PGTYPE) == PG_SUN4M_OBMEM)
+				cache_flush_page((int)va);
+
+		}
+		setcontext4m(ctx);
+
+#endif
 	} else {
 #if defined(SUN4) || defined(SUN4C)
 		for (; --npages >= 0; va += NBPG) {
 			pte = getpte4(va);
-#ifdef DIAGNOSTIC
 			if ((pte & PG_V) == 0)
 				panic("kvm_uncache !pg_v");
-#endif
-			if ((pv = pvhead(pte & PG_PFNUM)) != NULL) {
-				pv_changepte4_4c(pv, PG_NC, 0);
+
+			pv = pvhead(pte & PG_PFNUM);
+			/* XXX - we probably don't need to check for OBMEM */
+			if ((pte & PG_TYPE) == PG_OBMEM && pv) {
+				if (cached)
+					pv_changepte4_4c(pv, 0, PG_NC);
+				else
+					pv_changepte4_4c(pv, PG_NC, 0);
 			}
-			pte |= PG_NC;
+			if (cached)
+				pte &= ~PG_NC;
+			else
+				pte |= PG_NC;
 			setpte4(va, pte);
 			if ((pte & PG_TYPE) == PG_OBMEM)
 				cache_flush_page((int)va);
@@ -6197,8 +6144,36 @@ kvm_uncache(caddr_t va, int npages)
 	}
 }
 
+/*
+ * Turn on IO cache for a given (va, number of pages).
+ *
+ * We just assert PG_NC for each PTE; the addresses must reside
+ * in locked kernel space.  A cache flush is also done.
+ */
+void
+kvm_iocache(va, npages)
+	caddr_t va;
+	int npages;
+{
+
+#ifdef SUN4M
+	if (CPU_ISSUN4M) /* %%%: Implement! */
+		panic("kvm_iocache: 4m iocache not implemented");
+#endif
+#if defined(SUN4) || defined(SUN4C)
+	for (; --npages >= 0; va += NBPG) {
+		int pte = getpte4(va);
+		if ((pte & PG_V) == 0)
+			panic("kvm_iocache !pg_v");
+		pte |= PG_IOC;
+		setpte4(va, pte);
+	}
+#endif
+}
+
 int
-pmap_count_ptes(struct pmap *pm)
+pmap_count_ptes(pm)
+	struct pmap *pm;
 {
 	int idx, total;
 	struct regmap *rp;
@@ -6224,7 +6199,9 @@ pmap_count_ptes(struct pmap *pm)
  * (This will just seg-align mappings.)
  */
 void
-pmap_prefer(vaddr_t foff, vaddr_t *vap)
+pmap_prefer(foff, vap)
+	vaddr_t foff;
+	vaddr_t *vap;
 {
 	vaddr_t va = *vap;
 	long d, m;
@@ -6242,7 +6219,7 @@ pmap_prefer(vaddr_t foff, vaddr_t *vap)
 }
 
 void
-pmap_redzone(void)
+pmap_redzone()
 {
 #if defined(SUN4M)
 	if (CPU_ISSUN4M) {
@@ -6263,7 +6240,8 @@ pmap_redzone(void)
  * process is the current process, load the new MMU context.
  */
 void
-pmap_activate(struct proc *p)
+pmap_activate(p)
+	struct proc *p;
 {
 	pmap_t pmap = p->p_vmspace->vm_map.pmap;
 	int s;
@@ -6275,7 +6253,7 @@ pmap_activate(struct proc *p)
 	 * the new context.
 	 */
 
-	s = splvm();
+	s = splpmap();
 	if (p == curproc) {
 		write_user_windows();
 		if (pmap->pm_ctx == NULL) {
@@ -6293,7 +6271,8 @@ pmap_activate(struct proc *p)
  * Deactivate the address space of the specified process.
  */
 void
-pmap_deactivate(struct proc *p)
+pmap_deactivate(p)
+	struct proc *p;
 {
 }
 
@@ -6302,7 +6281,9 @@ pmap_deactivate(struct proc *p)
  * Check consistency of a pmap (time consuming!).
  */
 void
-pm_check(char *s, struct pmap *pm)
+pm_check(s, pm)
+	char *s;
+	struct pmap *pm;
 {
 	if (pm == pmap_kernel())
 		pm_check_k(s, pm);
@@ -6311,7 +6292,9 @@ pm_check(char *s, struct pmap *pm)
 }
 
 void
-pm_check_u(char *s, struct pmap *pm)
+pm_check_u(s, pm)
+	char *s;
+	struct pmap *pm;
 {
 	struct regmap *rp;
 	struct segmap *sp;
@@ -6397,7 +6380,9 @@ pm_check_u(char *s, struct pmap *pm)
 }
 
 void
-pm_check_k(char *s, struct pmap *pm) /* Note: not as extensive as pm_check_u. */
+pm_check_k(s, pm)		/* Note: not as extensive as pm_check_u. */
+	char *s;
+	struct pmap *pm;
 {
 	struct regmap *rp;
 	int vr, vs, n;
@@ -6457,7 +6442,7 @@ pm_check_k(char *s, struct pmap *pm) /* Note: not as extensive as pm_check_u. */
  * The last page or two contains stuff so libkvm can bootstrap.
  */
 int
-pmap_dumpsize(void)
+pmap_dumpsize()
 {
 	long	sz;
 
@@ -6476,15 +6461,17 @@ pmap_dumpsize(void)
  * there is no in-core copy of kernel memory mappings on a 4/4c machine.
  */
 int
-pmap_dumpmmu(int (*dump)(dev_t, daddr_t, caddr_t, size_t), daddr_t blkno)	
+pmap_dumpmmu(dump, blkno)
+	daddr_t blkno;
+	int (*dump)	__P((dev_t, daddr_t, caddr_t, size_t));
 {
 	kcore_seg_t	*ksegp;
 	cpu_kcore_hdr_t	*kcpup;
 	phys_ram_seg_t	memseg;
 	int	error = 0;
 	int	i, memsegoffset, pmegoffset;
-	int	buffer[dbtob(1) / sizeof(int)];
-	int	*bp, *ep;
+	int		buffer[dbtob(1) / sizeof(int)];
+	int		*bp, *ep;
 #if defined(SUN4C) || defined(SUN4)
 	int	pmeg;
 #endif
@@ -6583,12 +6570,14 @@ out:
  * Helper function for debuggers.
  */
 void
-pmap_writetext(unsigned char *dst, int ch)
+pmap_writetext(dst, ch)
+	unsigned char *dst;
+	int ch;
 {
 	int s, pte0, pte, ctx;
 	vaddr_t va;
 
-	s = splvm();
+	s = splpmap();
 	va = (unsigned long)dst & (~PGOFSET);
 	cpuinfo.cache_flush(dst, 1);
 
@@ -6633,10 +6622,10 @@ pmap_writetext(unsigned char *dst, int ch)
 
 #ifdef EXTREME_DEBUG
 
-static void test_region(int, int, int);
+static void test_region __P((int, int, int));
 
 void
-debug_pagetables(void)
+debug_pagetables()
 {
 	int i;
 	int *regtbl;
@@ -6683,7 +6672,10 @@ debug_pagetables(void)
 }
 
 static u_int
-VA2PAsw(int ctx, caddr_t addr, int *pte)
+VA2PAsw(ctx, addr, pte)
+	int ctx;
+	caddr_t addr;
+	int *pte;
 {
 	int *curtbl;
 	int curpte;
@@ -6750,8 +6742,9 @@ VA2PAsw(int ctx, caddr_t addr, int *pte)
 	printf("Bizarreness with address 0x%x!\n",addr);
 }
 
-void
-test_region(int reg, int start, int stop)
+void test_region(reg, start, stop)
+	int reg;
+	int start, stop;
 {
 	int i;
 	int addr;
@@ -6789,8 +6782,7 @@ test_region(int reg, int start, int stop)
 }
 
 
-void
-print_fe_map(void)
+void print_fe_map(void)
 {
 	u_int i, pte;
 
diff --git a/sys/arch/sparc/sparc/vm_machdep.c b/sys/arch/sparc/sparc/vm_machdep.c
index fe0b1314cc1..e3492f7edae 100644
--- a/sys/arch/sparc/sparc/vm_machdep.c
+++ b/sys/arch/sparc/sparc/vm_machdep.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vm_machdep.c,v 1.40 2001/12/09 01:45:32 art Exp $	*/
+/*	$OpenBSD: vm_machdep.c,v 1.41 2001/12/19 08:58:05 art Exp $	*/
 /*	$NetBSD: vm_machdep.c,v 1.30 1997/03/10 23:55:40 pk Exp $ */
 
 /*
@@ -115,87 +115,54 @@ extern int has_iocache;
 #endif
 
 caddr_t
-dvma_malloc_space(size_t len, void *kaddr, int flags, int space)
+dvma_malloc_space(len, kaddr, flags, space)
+	size_t	len;
+	void	*kaddr;
+	int	flags;
 {
-	int waitok = (flags & M_NOWAIT) == 0;
-	vsize_t maplen, tmplen;
 	vaddr_t	kva;
 	vaddr_t	dva;
-	int s;
 
 	len = round_page(len);
-	/* XXX - verify if maybe uvm_km_valloc from kernel_map would be ok. */
-	s = splvm();
-	kva = uvm_km_valloc(kmem_map, len);
-	splx(s);
-	if (kva == 0)
+	kva = (vaddr_t)malloc(len, M_DEVBUF, flags);
+	if (kva == NULL)
 		return (NULL);
 
-	for (maplen = 0; maplen < len; maplen += PAGE_SIZE) {
-		struct vm_page *pg;
-		paddr_t pa;
-
-again:
-		pg = uvm_pagealloc(NULL, 0, NULL, 0);
-		if (pg == NULL) {
-			if (waitok) {
-				uvm_wait("dvmapg");
-				goto again;
-			}
-			goto dropit;
-		}
-		pa = VM_PAGE_TO_PHYS(pg);
 #if defined(SUN4M)
-		if (!has_iocache)
+	if (!has_iocache)
 #endif
-			pa |= PMAP_NC;
-		pmap_kenter_pa(kva + maplen, pa, VM_PROT_ALL);
-	}
-	pmap_update(pmap_kernel());
+		kvm_uncache((caddr_t)kva, atop(len));
 
 	*(vaddr_t *)kaddr = kva;
-	dva = dvma_mapin_space(kernel_map, kva, len, waitok ? 1 : 0, space);
+	dva = dvma_mapin_space(kernel_map, kva, len, (flags & M_NOWAIT) ? 0 : 1, space);
 	if (dva == NULL) {
-		goto dropit;
+		free((void *)kva, M_DEVBUF);
+		return (NULL);
 	}
 	return (caddr_t)dva;
-dropit:
-	for (tmplen = 0; tmplen < maplen; tmplen += PAGE_SIZE) {
-		paddr_t pa;
-
-		if (pmap_extract(pmap_kernel(), kva + tmplen, &pa) == FALSE)
-			panic("dvma_malloc_space: pmap_extract");
-
-		pmap_kremove(kva + tmplen, PAGE_SIZE);
-		uvm_pagefree(PHYS_TO_VM_PAGE(pa));
-	}
-	pmap_update(pmap_kernel());
-
-	uvm_km_free(kmem_map, kva, len);
-
-	return (NULL);
 }
 
 void
-dvma_free(caddr_t dva, size_t len, void *kaddr)
+dvma_free(dva, len, kaddr)
+	caddr_t	dva;
+	size_t	len;
+	void	*kaddr;
 {
-	size_t tmplen;
 	vaddr_t	kva = *(vaddr_t *)kaddr;
 
 	len = round_page(len);
 
 	dvma_mapout((vaddr_t)dva, kva, len);
-	for (tmplen = 0; tmplen < len; tmplen += PAGE_SIZE) {
-		paddr_t pa;
-
-		if (pmap_extract(pmap_kernel(), kva + tmplen, &pa) == FALSE)
-			panic("dvma_malloc_space: pmap_extract");
-
-		pmap_kremove(kva + tmplen, PAGE_SIZE);
-		uvm_pagefree(PHYS_TO_VM_PAGE(pa));
-	}
-
-	uvm_km_free(kmem_map, kva, len);
+	/*
+	 * Even if we're freeing memory here, we can't be sure that it will
+	 * be unmapped, so we must recache the memory range to avoid impact
+	 * on other kernel subsystems.
+	 */
+#if defined(SUN4M)
+	if (!has_iocache)
+#endif
+		kvm_recache(kaddr, atop(len));
+	free((void *)kva, M_DEVBUF);
 }
 
 u_long dvma_cachealign = 0;
diff --git a/sys/conf/files b/sys/conf/files
index b7331c270ea..fe292932daa 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -1,4 +1,4 @@
-#	$OpenBSD: files,v 1.233 2001/12/10 04:45:31 art Exp $
+#	$OpenBSD: files,v 1.234 2001/12/19 08:58:05 art Exp $
 #	$NetBSD: files,v 1.87 1996/05/19 17:17:50 jonathan Exp $
 
 #	@(#)files.newconf	7.5 (Berkeley) 5/10/93
@@ -533,7 +533,6 @@ file kern/vfs_syscalls.c
 file kern/vfs_vnops.c
 file kern/vnode_if.c
 file miscfs/deadfs/dead_vnops.c
-file miscfs/genfs/genfs_vnops.c
 file miscfs/fdesc/fdesc_vfsops.c	fdesc
 file miscfs/fdesc/fdesc_vnops.c		fdesc
 file miscfs/fifofs/fifo_vnops.c		fifo
@@ -802,7 +801,6 @@ file xfs/xfs_syscalls-dummy.c		!xfs
 file uvm/uvm_amap.c
 file uvm/uvm_anon.c
 file uvm/uvm_aobj.c
-file uvm/uvm_bio.c
 file uvm/uvm_device.c
 file uvm/uvm_fault.c
 file uvm/uvm_glue.c
diff --git a/sys/dev/vnd.c b/sys/dev/vnd.c
index 8a62beb64d3..58064eab3c2 100644
--- a/sys/dev/vnd.c
+++ b/sys/dev/vnd.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vnd.c,v 1.30 2001/12/07 00:11:14 niklas Exp $	*/
+/*	$OpenBSD: vnd.c,v 1.31 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: vnd.c,v 1.26 1996/03/30 23:06:11 christos Exp $	*/
 
 /*
@@ -559,6 +559,10 @@ vndstrategy(bp)
 		nbp->vb_buf.b_proc = bp->b_proc;
 		nbp->vb_buf.b_iodone = vndiodone;
 		nbp->vb_buf.b_vp = vp;
+		nbp->vb_buf.b_dirtyoff = bp->b_dirtyoff;
+		nbp->vb_buf.b_dirtyend = bp->b_dirtyend;
+		nbp->vb_buf.b_validoff = bp->b_validoff;
+		nbp->vb_buf.b_validend = bp->b_validend;
 		LIST_INIT(&nbp->vb_buf.b_dep);
 
 		/* save a reference to the old buffer */
diff --git a/sys/isofs/cd9660/cd9660_node.h b/sys/isofs/cd9660/cd9660_node.h
index 31c112e6a16..a1b70cf4a79 100644
--- a/sys/isofs/cd9660/cd9660_node.h
+++ b/sys/isofs/cd9660/cd9660_node.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: cd9660_node.h,v 1.12 2001/12/10 18:49:51 art Exp $	*/
+/*	$OpenBSD: cd9660_node.h,v 1.13 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: cd9660_node.h,v 1.15 1997/04/11 21:52:01 kleink Exp $	*/
 
 /*-
@@ -42,7 +42,6 @@
  */
 
 #include <sys/buf.h>
-#include <miscfs/genfs/genfs.h>
 
 /*
  * Theoretically, directories can be more than 2Gb in length,
@@ -77,7 +76,6 @@ struct iso_dnode {
 #endif
 
 struct iso_node {
-	struct	genfs_node i_gnode;
 	struct	iso_node *i_next, **i_prev;	/* hash chain */
 	struct	vnode *i_vnode;	/* vnode associated with this inode */
 	struct	vnode *i_devvp;	/* vnode for block I/O */
@@ -98,6 +96,7 @@ struct iso_node {
 	long iso_start;		/* actual start of data of file (may be different */
 				/* from iso_extent, if file has extended attributes) */
 	ISO_RRIP_INODE  inode;
+	struct cluster_info i_ci; 
 };
 
 #define	i_forw		i_chain[0]
diff --git a/sys/isofs/cd9660/cd9660_vfsops.c b/sys/isofs/cd9660/cd9660_vfsops.c
index 75eb9be50a5..f91662cc09f 100644
--- a/sys/isofs/cd9660/cd9660_vfsops.c
+++ b/sys/isofs/cd9660/cd9660_vfsops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: cd9660_vfsops.c,v 1.26 2001/12/10 04:45:31 art Exp $	*/
+/*	$OpenBSD: cd9660_vfsops.c,v 1.27 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: cd9660_vfsops.c,v 1.26 1997/06/13 15:38:58 pk Exp $	*/
 
 /*-
@@ -80,10 +80,6 @@ struct vfsops cd9660_vfsops = {
 	cd9660_check_export
 };
 
-struct genfs_ops cd9660_genfsops = {
-	genfs_size,
-};
-
 /*
  * Called by vfs_mountroot when iso is going to be mounted as root.
  */
@@ -363,8 +359,6 @@ iso_mountfs(devvp, mp, p, argp)
 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
 	mp->mnt_maxsymlinklen = 0;
 	mp->mnt_flag |= MNT_LOCAL;
-	mp->mnt_dev_bshift = iso_bsize;
-	mp->mnt_fs_bshift = isomp->im_bshift;
 	isomp->im_mountp = mp;
 	isomp->im_dev = dev;
 	isomp->im_devvp = devvp;
@@ -943,8 +937,7 @@ retry:
 	/*
 	 * XXX need generation number?
 	 */
-
-	genfs_node_init(vp, &cd9660_genfsops);
+	
 	*vpp = vp;
 	return (0);
 }
diff --git a/sys/isofs/cd9660/cd9660_vnops.c b/sys/isofs/cd9660/cd9660_vnops.c
index f95775a9261..537878c775c 100644
--- a/sys/isofs/cd9660/cd9660_vnops.c
+++ b/sys/isofs/cd9660/cd9660_vnops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: cd9660_vnops.c,v 1.19 2001/12/10 18:49:51 art Exp $	*/
+/*	$OpenBSD: cd9660_vnops.c,v 1.20 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: cd9660_vnops.c,v 1.42 1997/10/16 23:56:57 christos Exp $	*/
 
 /*-
@@ -290,6 +290,16 @@ cd9660_getattr(v)
 	return (0);
 }
 
+#ifdef DEBUG
+extern int doclusterread;
+#else
+#define doclusterread 1
+#endif
+
+/* XXX until cluster routines can handle block sizes less than one page */
+#define cd9660_doclusterread \
+	(doclusterread && (ISO_DEFAULT_BLOCK_SIZE >= NBPG))
+
 /*
  * Vnode op for reading.
  */
@@ -304,40 +314,63 @@ cd9660_read(v)
 		struct ucred *a_cred;
 	} */ *ap = v;
 	struct vnode *vp = ap->a_vp;
-	struct uio *uio = ap->a_uio;
-	struct iso_node *ip = VTOI(vp);
-	int error;
+	register struct uio *uio = ap->a_uio;
+	register struct iso_node *ip = VTOI(vp);
+	register struct iso_mnt *imp;
+	struct buf *bp;
+	daddr_t lbn, rablock;
+	off_t diff;
+	int rasize, error = 0;
+	long size, n, on;
 
 	if (uio->uio_resid == 0)
 		return (0);
 	if (uio->uio_offset < 0)
 		return (EINVAL);
-
-	if (vp->v_type != VREG) {
-		/*
-		 * XXXART - maybe we should just panic? this is not possible
-		 *  unless vn_rdwr is called with VDIR and that's an error.
-		 */
-		return (EISDIR);
-	}
-
 	ip->i_flag |= IN_ACCESS;
-
-	while (uio->uio_resid > 0) {
-		void *win;
-		vsize_t bytelen = MIN(ip->i_size - uio->uio_offset,
-		    uio->uio_resid);
-		if (bytelen == 0)
-			break;
-		win = ubc_alloc(&vp->v_uobj, uio->uio_offset, &bytelen,
-		    UBC_READ);
-		error = uiomove(win, bytelen, uio);
-		ubc_release(win, 0);
-		if (error)
+	imp = ip->i_mnt;
+	do {
+		lbn = lblkno(imp, uio->uio_offset);
+		on = blkoff(imp, uio->uio_offset);
+		n = min((u_int)(imp->logical_block_size - on),
+			uio->uio_resid);
+		diff = (off_t)ip->i_size - uio->uio_offset;
+		if (diff <= 0)
+			return (0);
+		if (diff < n)
+			n = diff;
+		size = blksize(imp, ip, lbn);
+		rablock = lbn + 1;
+		if (cd9660_doclusterread) {
+			if (lblktosize(imp, rablock) <= ip->i_size)
+				error = cluster_read(vp, &ip->i_ci,
+				    (off_t)ip->i_size, lbn, size, NOCRED, &bp);
+			else
+				error = bread(vp, lbn, size, NOCRED, &bp);
+		} else {
+			if (ip->i_ci.ci_lastr + 1 == lbn &&
+			    lblktosize(imp, rablock) < ip->i_size) {
+				rasize = blksize(imp, ip, rablock);
+				error = breadn(vp, lbn, size, &rablock,
+					       &rasize, 1, NOCRED, &bp);
+			} else
+				error = bread(vp, lbn, size, NOCRED, &bp);
+		}
+		ip->i_ci.ci_lastr = lbn;
+		n = min(n, size - bp->b_resid);
+		if (error) {
+			brelse(bp);
 			return (error);
-	}
+		}
 
-	return (0);
+		error = uiomove(bp->b_data + on, (int)n, uio);
+
+                if (n + on == imp->logical_block_size ||
+		    uio->uio_offset == (off_t)ip->i_size)
+			bp->b_flags |= B_AGE;
+		brelse(bp);
+	} while (error == 0 && uio->uio_resid > 0 && n != 0);
+	return (error);
 }
 
 /* ARGSUSED */
@@ -1012,9 +1045,7 @@ struct vnodeopv_entry_desc cd9660_vnodeop_entries[] = {
 	{ &vop_pathconf_desc, cd9660_pathconf },/* pathconf */
 	{ &vop_advlock_desc, cd9660_advlock },	/* advlock */
 	{ &vop_bwrite_desc, vop_generic_bwrite },
-	{ &vop_getpages_desc, genfs_getpages },
-	{ &vop_mmap_desc, cd9660_mmap },
-	{ NULL, NULL }
+	{ (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
 };
 struct vnodeopv_desc cd9660_vnodeop_opv_desc =
 	{ &cd9660_vnodeop_p, cd9660_vnodeop_entries };
@@ -1060,8 +1091,7 @@ struct vnodeopv_entry_desc cd9660_specop_entries[] = {
 	{ &vop_pathconf_desc, spec_pathconf },	/* pathconf */
 	{ &vop_advlock_desc, spec_advlock },	/* advlock */
 	{ &vop_bwrite_desc, vop_generic_bwrite },
-	{ &vop_mmap_desc, spec_mmap },
-	{ NULL, NULL }
+	{ (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
 };
 struct vnodeopv_desc cd9660_specop_opv_desc =
 	{ &cd9660_specop_p, cd9660_specop_entries };
@@ -1105,8 +1135,7 @@ struct vnodeopv_entry_desc cd9660_fifoop_entries[] = {
 	{ &vop_pathconf_desc, fifo_pathconf },	/* pathconf */
 	{ &vop_advlock_desc, fifo_advlock },	/* advlock */
 	{ &vop_bwrite_desc, vop_generic_bwrite },
-	{ &vop_mmap_desc, fifo_mmap },
-	{ NULL, NULL }
+	{ (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
 };
 struct vnodeopv_desc cd9660_fifoop_opv_desc =
 	{ &cd9660_fifoop_p, cd9660_fifoop_entries };
diff --git a/sys/kern/exec_subr.c b/sys/kern/exec_subr.c
index 1d816ded073..d1ba66f4a82 100644
--- a/sys/kern/exec_subr.c
+++ b/sys/kern/exec_subr.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: exec_subr.c,v 1.16 2001/11/28 13:47:39 art Exp $	*/
+/*	$OpenBSD: exec_subr.c,v 1.17 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: exec_subr.c,v 1.9 1994/12/04 03:10:42 mycroft Exp $	*/
 
 /*
@@ -138,8 +138,14 @@ vmcmd_map_pagedvn(p, cmd)
 	struct proc *p;
 	struct exec_vmcmd *cmd;
 {
+	/*
+	 * note that if you're going to map part of an process as being
+	 * paged from a vnode, that vnode had damn well better be marked as
+	 * VTEXT.  that's handled in the routine which sets up the vmcmd to
+	 * call this routine.
+	 */
 	struct uvm_object *uobj;
-	int error;
+	int retval;
 
 	/*
 	 * map the vnode in using uvm_map.
@@ -161,22 +167,29 @@ vmcmd_map_pagedvn(p, cmd)
 	uobj = uvn_attach((void *) cmd->ev_vp, VM_PROT_READ|VM_PROT_EXECUTE);
 	if (uobj == NULL)
 		return(ENOMEM);
-	VREF(cmd->ev_vp);
 
 	/*
 	 * do the map
 	 */
 
-	error = uvm_map(&p->p_vmspace->vm_map, &cmd->ev_addr, cmd->ev_len,
+	retval = uvm_map(&p->p_vmspace->vm_map, &cmd->ev_addr, cmd->ev_len,
 	    uobj, cmd->ev_offset, 0,
 	    UVM_MAPFLAG(cmd->ev_prot, VM_PROT_ALL, UVM_INH_COPY,
 	    UVM_ADV_NORMAL, UVM_FLAG_COPYONW|UVM_FLAG_FIXED));
 
-	if (error) {
-		uobj->pgops->pgo_detach(uobj);
-	}
+	/*
+	 * check for error
+	 */
+
+	if (retval == KERN_SUCCESS)
+		return(0);
+
+	/*
+	 * error: detach from object
+	 */
 
-	return(error);
+	uobj->pgops->pgo_detach(uobj);
+	return(EINVAL);
 }
 
 /*
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index d1eeb637489..eb68a253268 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: init_main.c,v 1.88 2001/12/04 23:22:42 art Exp $	*/
+/*	$OpenBSD: init_main.c,v 1.89 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: init_main.c,v 1.84.4.1 1996/06/02 09:08:06 mrg Exp $	*/
 
 /*
@@ -221,8 +221,6 @@ main(framep)
 
 	cpu_configure();
 
-	ubc_init();		/* Initialize the unified buffer cache */
-
 	/* Initialize sysctls (must be done before any processes run) */
 	sysctl_init();
 
@@ -297,7 +295,7 @@ main(framep)
 
 	/* Allocate a prototype map so we have something to fork. */
 	uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
-	    trunc_page(VM_MAX_ADDRESS));
+	    trunc_page(VM_MAX_ADDRESS), TRUE);
 	p->p_vmspace = &vmspace0;
 
 	p->p_addr = proc0paddr;				/* XXX */
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 9f621da43d2..6bec610b8e2 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: kern_exec.c,v 1.61 2001/11/27 05:27:11 art Exp $	*/
+/*	$OpenBSD: kern_exec.c,v 1.62 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: kern_exec.c,v 1.75 1996/02/09 18:59:28 christos Exp $	*/
 
 /*-
@@ -150,7 +150,6 @@ check_exec(p, epp)
 		goto bad1;
 
 	/* now we have the file, get the exec header */
-	uvn_attach(vp, VM_PROT_READ);
 	error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0,
 	    UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred, &resid, p);
 	if (error)
diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c
index aee61ff48d6..4584481ba7d 100644
--- a/sys/kern/kern_malloc.c
+++ b/sys/kern/kern_malloc.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: kern_malloc.c,v 1.44 2001/12/05 17:49:06 art Exp $	*/
+/*	$OpenBSD: kern_malloc.c,v 1.45 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: kern_malloc.c,v 1.15.4.2 1996/06/13 17:10:56 cgd Exp $	*/
 
 /*
@@ -46,7 +46,7 @@
 
 #include <uvm/uvm_extern.h>
 
-static struct vm_map kmem_map_store;
+static struct vm_map_intrsafe kmem_map_store;
 struct vm_map *kmem_map = NULL;
 
 #ifdef NKMEMCLUSTERS
@@ -492,7 +492,7 @@ kmeminit()
 
 	kmem_map = uvm_km_suballoc(kernel_map, (vaddr_t *)&kmembase,
 		(vaddr_t *)&kmemlimit, (vsize_t)(nkmempages * PAGE_SIZE), 
-			VM_MAP_INTRSAFE, FALSE, &kmem_map_store);
+			VM_MAP_INTRSAFE, FALSE, &kmem_map_store.vmi_map);
 	kmemusage = (struct kmemusage *) uvm_km_zalloc(kernel_map,
 		(vsize_t)(nkmempages * sizeof(struct kmemusage)));
 #ifdef KMEMSTATS
diff --git a/sys/kern/spec_vnops.c b/sys/kern/spec_vnops.c
index 2022279f6d8..e24cde8096b 100644
--- a/sys/kern/spec_vnops.c
+++ b/sys/kern/spec_vnops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: spec_vnops.c,v 1.21 2001/12/04 22:44:32 art Exp $	*/
+/*	$OpenBSD: spec_vnops.c,v 1.22 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: spec_vnops.c,v 1.29 1996/04/22 01:42:38 christos Exp $	*/
 
 /*
@@ -104,8 +104,7 @@ struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
 	{ &vop_pathconf_desc, spec_pathconf },		/* pathconf */
 	{ &vop_advlock_desc, spec_advlock },		/* advlock */
 	{ &vop_bwrite_desc, spec_bwrite },		/* bwrite */
-	{ &vop_mmap_desc, spec_mmap },
-	{ NULL, NULL }
+	{ (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
 };
 struct vnodeopv_desc spec_vnodeop_opv_desc =
 	{ &spec_vnodeop_p, spec_vnodeop_entries };
diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c
index 817d7512b6c..d787fde7c5b 100644
--- a/sys/kern/sysv_shm.c
+++ b/sys/kern/sysv_shm.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: sysv_shm.c,v 1.23 2001/11/28 13:47:39 art Exp $	*/
+/*	$OpenBSD: sysv_shm.c,v 1.24 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: sysv_shm.c,v 1.50 1998/10/21 22:24:29 tron Exp $	*/
 
 /*
@@ -144,13 +144,15 @@ shm_delete_mapping(vm, shmmap_s)
 	struct shmmap_state *shmmap_s;
 {
 	struct shmid_ds *shmseg;
-	int segnum;
+	int segnum, result;
 	size_t size;
 	
 	segnum = IPCID_TO_IX(shmmap_s->shmid);
 	shmseg = &shmsegs[segnum];
 	size = round_page(shmseg->shm_segsz);
-	uvm_deallocate(&vm->vm_map, shmmap_s->va, size);
+	result = uvm_deallocate(&vm->vm_map, shmmap_s->va, size);
+	if (result != KERN_SUCCESS)
+		return EINVAL;
 	shmmap_s->shmid = -1;
 	shmseg->shm_dtime = time.tv_sec;
 	if ((--shmseg->shm_nattch <= 0) &&
@@ -205,6 +207,7 @@ sys_shmat(p, v, retval)
 	vaddr_t attach_va;
 	vm_prot_t prot;
 	vsize_t size;
+	int rv;
 
 	shmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm;
 	if (shmmap_s == NULL) {
@@ -249,11 +252,11 @@ sys_shmat(p, v, retval)
 	}
 	shm_handle = shmseg->shm_internal;
 	uao_reference(shm_handle->shm_object);
-	error = uvm_map(&p->p_vmspace->vm_map, &attach_va, size,
+	rv = uvm_map(&p->p_vmspace->vm_map, &attach_va, size,
 	    shm_handle->shm_object, 0, 0, UVM_MAPFLAG(prot, prot,
 	    UVM_INH_SHARE, UVM_ADV_RANDOM, 0));
-	if (error) {
-		return error;
+	if (rv != KERN_SUCCESS) {
+	    return ENOMEM;
 	}
 
 	shmmap_s->va = attach_va;
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 52be1533b43..22ef4dfb385 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vfs_bio.c,v 1.54 2001/11/30 05:45:33 csapuntz Exp $	*/
+/*	$OpenBSD: vfs_bio.c,v 1.55 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: vfs_bio.c,v 1.44 1996/06/11 11:15:36 pk Exp $	*/
 
 /*-
@@ -327,6 +327,23 @@ breadn(vp, blkno, size, rablks, rasizes, nrablks, cred, bpp)
 }
 
 /*
+ * Read with single-block read-ahead.  Defined in Bach (p.55), but
+ * implemented as a call to breadn().
+ * XXX for compatibility with old file systems.
+ */
+int
+breada(vp, blkno, size, rablkno, rabsize, cred, bpp)
+	struct vnode *vp;
+	daddr_t blkno; int size;
+	daddr_t rablkno; int rabsize;
+	struct ucred *cred;
+	struct buf **bpp;
+{
+
+	return (breadn(vp, blkno, size, &rablkno, &rabsize, 1, cred, bpp));	
+}
+
+/*
  * Block write.  Described in Bach (p.56)
  */
 int
@@ -389,6 +406,7 @@ bwrite(bp)
 	/* Initiate disk write.  Make sure the appropriate party is charged. */
 	bp->b_vp->v_numoutput++;
 	splx(s);
+	SET(bp->b_flags, B_WRITEINPROG);
 	VOP_STRATEGY(bp);
 
 	if (async)
@@ -448,6 +466,7 @@ bdwrite(bp)
 	}
 
 	/* Otherwise, the "write" is done, so mark and release the buffer. */
+	CLR(bp->b_flags, B_NEEDCOMMIT);
 	SET(bp->b_flags, B_DONE);
 	brelse(bp);
 }
@@ -569,7 +588,6 @@ brelse(bp)
 
 	/* Unlock the buffer. */
 	CLR(bp->b_flags, (B_AGE | B_ASYNC | B_BUSY | B_NOCACHE | B_DEFERRED));
-	SET(bp->b_flags, B_CACHE);
 
 	/* Allow disk interrupts. */
 	splx(s);
@@ -633,30 +651,44 @@ getblk(vp, blkno, size, slpflag, slptimeo)
 	daddr_t blkno;
 	int size, slpflag, slptimeo;
 {
+	struct bufhashhdr *bh;
 	struct buf *bp, *nbp = NULL;
 	int s, err;
 
+	/*
+	 * XXX
+	 * The following is an inlined version of 'incore()', but with
+	 * the 'invalid' test moved to after the 'busy' test.  It's
+	 * necessary because there are some cases in which the NFS
+	 * code sets B_INVAL prior to writing data to the server, but
+	 * in which the buffers actually contain valid data.  In this
+	 * case, we can't allow the system to allocate a new buffer for
+	 * the block until the write is finished.
+	 */
+	bh = BUFHASH(vp, blkno);
 start:
-	bp = incore(vp, blkno);
-	if (bp != NULL) {
+	bp = bh->lh_first;
+	for (; bp != NULL; bp = bp->b_hash.le_next) {
+		if (bp->b_lblkno != blkno || bp->b_vp != vp)
+			continue;
+
 		s = splbio();
 		if (ISSET(bp->b_flags, B_BUSY)) {
 			SET(bp->b_flags, B_WANTED);
 			err = tsleep(bp, slpflag | (PRIBIO + 1), "getblk",
 			    slptimeo);
 			splx(s);
-			if (err) {
-				if (nbp != NULL) {
-					SET(nbp->b_flags, B_AGE);
-					brelse(nbp);
-				}
+			if (err)
 				return (NULL);
-			}
 			goto start;
 		}
 
-		SET(bp->b_flags, (B_BUSY | B_CACHE));
-		bremfree(bp);
+		if (!ISSET(bp->b_flags, B_INVAL)) {
+			SET(bp->b_flags, (B_BUSY | B_CACHE));
+			bremfree(bp);
+			splx(s);
+			break;
+		}
 		splx(s);
 	}
 
@@ -665,7 +697,7 @@ start:
 			goto start;
 		}
 		bp = nbp;
-		binshash(bp, BUFHASH(vp, blkno));
+		binshash(bp, bh);
 		bp->b_blkno = bp->b_lblkno = blkno;
 		s = splbio();
 		bgetvp(vp, bp);
@@ -868,6 +900,8 @@ start:
 	bp->b_error = 0;
 	bp->b_resid = 0;
 	bp->b_bcount = 0;
+	bp->b_dirtyoff = bp->b_dirtyend = 0;
+	bp->b_validoff = bp->b_validend = 0;
 
 	bremhash(bp);
 	*bpp = bp;
@@ -988,6 +1022,7 @@ biodone(bp)
 		buf_complete(bp);
 
 	if (!ISSET(bp->b_flags, B_READ)) {
+		CLR(bp->b_flags, B_WRITEINPROG);
 		vwakeup(bp->b_vp);
 	}
 
@@ -1063,16 +1098,3 @@ vfs_bufstats()
 	splx(s);
 }
 #endif /* DEBUG */
-
-int
-buf_cleanout(struct buf *bp) {
-	if (bp->b_vp != NULL)
-		brelvp(bp);
-
-	if (bp->b_flags & B_WANTED) {
-		bp->b_flags &= ~B_WANTED;
-		wakeup(bp);
-	}
-
-	return (0);
-}
diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
index 73414b79b42..d987b1a78ec 100644
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c
@@ -1,9 +1,10 @@
-/*       $OpenBSD: vfs_default.c,v 1.15 2001/12/10 04:45:31 art Exp $  */
+/*       $OpenBSD: vfs_default.c,v 1.16 2001/12/19 08:58:06 art Exp $  */
+
 
 /*
  *    Portions of this code are:
  *
- * Copyright (c) 1982, 1986, 1989, 1993
+ * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
@@ -48,11 +49,9 @@
 #include <sys/vnode.h>
 #include <sys/namei.h>
 #include <sys/malloc.h>
-#include <sys/pool.h>
 #include <sys/event.h>
 #include <miscfs/specfs/specdev.h>
 
-#include <uvm/uvm.h>
 
 extern struct simplelock spechash_slock;
 
@@ -311,10 +310,3 @@ lease_check(void *v)
 {
 	return (0);
 }
-
-int
-vop_generic_mmap(v)
-	void *v;
-{
-	return 0;
-}
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 7d000db08bf..aaff1342b67 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vfs_subr.c,v 1.79 2001/12/10 18:47:16 art Exp $	*/
+/*	$OpenBSD: vfs_subr.c,v 1.80 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $	*/
 
 /*
@@ -377,8 +377,6 @@ getnewvnode(tag, mp, vops, vpp)
 	int (**vops) __P((void *));
 	struct vnode **vpp;
 {
-	extern struct uvm_pagerops uvm_vnodeops;
-	struct uvm_object *uobj;
 	struct proc *p = curproc;			/* XXX */
 	struct freelst *listhd;
 	static int toggle;
@@ -412,17 +410,11 @@ getnewvnode(tag, mp, vops, vpp)
 		splx(s);
 		simple_unlock(&vnode_free_list_slock);
 		vp = pool_get(&vnode_pool, PR_WAITOK);
-		bzero(vp, sizeof *vp);
-		/*
-		 * initialize uvm_object within vnode.
-		 */
-		uobj = &vp->v_uobj;
-		uobj->pgops = &uvm_vnodeops;
-		uobj->uo_npages = 0;
-		TAILQ_INIT(&uobj->memq);
+		bzero((char *)vp, sizeof *vp);
 		numvnodes++;
 	} else {
-		TAILQ_FOREACH(vp, listhd, v_freelist) {
+		for (vp = TAILQ_FIRST(listhd); vp != NULLVP;
+		    vp = TAILQ_NEXT(vp, v_freelist)) {
 			if (simple_lock_try(&vp->v_interlock))
 				break;
 		}
@@ -453,13 +445,14 @@ getnewvnode(tag, mp, vops, vpp)
 		else
 			simple_unlock(&vp->v_interlock);
 #ifdef DIAGNOSTIC
-		if (vp->v_data || vp->v_uobj.uo_npages ||
-		    TAILQ_FIRST(&vp->v_uobj.memq)) {
+		if (vp->v_data) {
 			vprint("cleaned vnode", vp);
 			panic("cleaned vnode isn't");
 		}
+		s = splbio();
 		if (vp->v_numoutput)
 			panic("Clean vnode has pending I/O's");
+		splx(s);
 #endif
 		vp->v_flag = 0;
 		vp->v_socket = 0;
@@ -472,10 +465,7 @@ getnewvnode(tag, mp, vops, vpp)
 	*vpp = vp;
 	vp->v_usecount = 1;
 	vp->v_data = 0;
-	simple_lock_init(&vp->v_uobj.vmobjlock);
-
-	vp->v_size = VSIZENOTSET;
-
+	simple_lock_init(&vp->v_uvm.u_obj.vmobjlock);
 	return (0);
 }
 
@@ -679,10 +669,6 @@ vget(vp, flags, p)
 		flags |= LK_INTERLOCK;
 	}
 	if (vp->v_flag & VXLOCK) {
-		if (flags & LK_NOWAIT) {
-			simple_unlock(&vp->v_interlock);
-			return (EBUSY);
-		}
  		vp->v_flag |= VXWANT;
 		simple_unlock(&vp->v_interlock);
 		tsleep((caddr_t)vp, PINOD, "vget", 0);
@@ -801,11 +787,6 @@ vput(vp)
 #endif
 	vputonfreelist(vp);
 
-	if (vp->v_flag & VTEXT) {
-		uvmexp.vtextpages -= vp->v_uobj.uo_npages;
-		uvmexp.vnodepages += vp->v_uobj.uo_npages;
-	}
-	vp->v_flag &= ~VTEXT;
 	simple_unlock(&vp->v_interlock);
 
 	VOP_INACTIVE(vp, p);
@@ -846,21 +827,18 @@ vrele(vp)
 #endif
 	vputonfreelist(vp);
 
-	if (vp->v_flag & VTEXT) {
-		uvmexp.vtextpages -= vp->v_uobj.uo_npages;
-		uvmexp.vnodepages += vp->v_uobj.uo_npages;
-	}
-	vp->v_flag &= ~VTEXT;
 	if (vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p) == 0)
 		VOP_INACTIVE(vp, p);
 }
 
+void vhold __P((struct vnode *vp));
+
 /*
  * Page or buffer structure gets a reference.
- * Must be called at splbio();
  */
 void
-vhold(struct vnode *vp)
+vhold(vp)
+	register struct vnode *vp;
 {
 
 	/*
@@ -880,34 +858,6 @@ vhold(struct vnode *vp)
 }
 
 /*
- * Release a vhold reference.
- * Must be called at splbio();
- */
-void
-vholdrele(struct vnode *vp)
-{
-	simple_lock(&vp->v_interlock);
-#ifdef DIAGNOSTIC
-	if (vp->v_holdcnt == 0)
-		panic("vholdrele: holdcnt");
-#endif
-	vp->v_holdcnt--;
-
-	/*
-	 * If it is on the holdlist and the hold count drops to
-	 * zero, move it to the free list.
-	 */
-	if ((vp->v_bioflag & VBIOONFREELIST) &&
-	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
-		simple_lock(&vnode_free_list_slock);
-		TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
-		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
-		simple_unlock(&vnode_free_list_slock);
-	}
-	simple_unlock(&vp->v_interlock);
-}
-
-/*
  * Remove any vnodes in the vnode table belonging to mount point mp.
  *
  * If MNT_NOFORCE is specified, there should not be any active ones,
@@ -1059,12 +1009,6 @@ vclean(vp, flags, p)
 	if (vp->v_flag & VXLOCK)
 		panic("vclean: deadlock");
 	vp->v_flag |= VXLOCK;
-	if (vp->v_flag & VTEXT) {
-		uvmexp.vtextpages -= vp->v_uobj.uo_npages;
-		uvmexp.vnodepages += vp->v_uobj.uo_npages;
-	}
-	vp->v_flag &= ~VTEXT;
-
 	/*
 	 * Even if the count is zero, the VOP_INACTIVE routine may still
 	 * have the object locked while it cleans it out. The VOP_LOCK
@@ -1075,7 +1019,11 @@ vclean(vp, flags, p)
 	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
 
 	/*
-	 * Clean out any cached data associated with the vnode.
+	 * clean out any VM data associated with the vnode.
+	 */
+	uvm_vnp_terminate(vp);
+	/*
+	 * Clean out any buffers associated with the vnode.
 	 */
 	if (flags & DOCLOSE)
 		vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
@@ -2020,22 +1968,9 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
 	struct proc *p;
 	int slpflag, slptimeo;
 {
-	struct uvm_object *uobj = &vp->v_uobj;
-	struct buf *bp;
+	register struct buf *bp;
 	struct buf *nbp, *blist;
-	int s, error, rv;
-	int flushflags = PGO_ALLPAGES|PGO_FREE|PGO_SYNCIO|
-	    (flags & V_SAVE ? PGO_CLEANIT : 0);
-
-	/* XXXUBC this doesn't look at flags or slp* */
-	if (vp->v_type == VREG) {
-		simple_lock(&uobj->vmobjlock);
-		rv = (uobj->pgops->pgo_flush)(uobj, 0, 0, flushflags);
-		simple_unlock(&uobj->vmobjlock);
-		if (!rv) {
-			return EIO;
-		}
-	}
+	int s, error;
 
 	if (flags & V_SAVE) {
 		s = splbio();
@@ -2105,21 +2040,12 @@ loop:
 
 void
 vflushbuf(vp, sync)
-	struct vnode *vp;
+	register struct vnode *vp;
 	int sync;
 {
-	struct uvm_object *uobj = &vp->v_uobj;
-	struct buf *bp, *nbp;
+	register struct buf *bp, *nbp;
 	int s;
 
-	if (vp->v_type == VREG) {
-		int flags = PGO_CLEANIT|PGO_ALLPAGES| (sync ? PGO_SYNCIO : 0);
-
-		simple_lock(&uobj->vmobjlock);
-		(uobj->pgops->pgo_flush)(uobj, 0, 0, flags);
-		simple_unlock(&uobj->vmobjlock);
-	}
-
 loop:
 	s = splbio();
 	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
@@ -2185,27 +2111,44 @@ bgetvp(vp, bp)
  * Manipulates vnode buffer queues. Must be called at splbio().
  */
 void
-brelvp(struct buf *bp)
+brelvp(bp)
+	register struct buf *bp;
 {
 	struct vnode *vp;
 
-	if ((vp = bp->b_vp) == NULL)
+	if ((vp = bp->b_vp) == (struct vnode *) 0)
 		panic("brelvp: NULL");
-
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
 	if (bp->b_vnbufs.le_next != NOLIST)
 		bufremvn(bp);
-	if (TAILQ_EMPTY(&vp->v_uobj.memq) &&
-	    (vp->v_bioflag & VBIOONSYNCLIST) &&
+	if ((vp->v_bioflag & VBIOONSYNCLIST) &&
 	    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
 		vp->v_bioflag &= ~VBIOONSYNCLIST;
 		LIST_REMOVE(vp, v_synclist);
 	}
-	bp->b_vp = NULL;
+	bp->b_vp = (struct vnode *) 0;
+
+	simple_lock(&vp->v_interlock);
+#ifdef DIAGNOSTIC
+	if (vp->v_holdcnt == 0)
+		panic("brelvp: holdcnt");
+#endif
+	vp->v_holdcnt--;
 
-	vholdrele(vp);
+	/*
+	 * If it is on the holdlist and the hold count drops to
+	 * zero, move it to the free list.
+	 */
+	if ((vp->v_bioflag & VBIOONFREELIST) &&
+	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
+		simple_lock(&vnode_free_list_slock);
+		TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
+		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+		simple_unlock(&vnode_free_list_slock);
+	}
+	simple_unlock(&vp->v_interlock);
 }
 
 /*
@@ -2262,8 +2205,7 @@ reassignbuf(bp)
 	 */
 	if ((bp->b_flags & B_DELWRI) == 0) {
 		listheadp = &vp->v_cleanblkhd;
-		if (TAILQ_EMPTY(&vp->v_uobj.memq) &&
-		    (vp->v_bioflag & VBIOONSYNCLIST) &&
+		if ((vp->v_bioflag & VBIOONSYNCLIST) &&
 		    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
 			vp->v_bioflag &= ~VBIOONSYNCLIST;
 			LIST_REMOVE(vp, v_synclist);
diff --git a/sys/kern/vfs_sync.c b/sys/kern/vfs_sync.c
index 0adeb2f3065..33a8f9b6633 100644
--- a/sys/kern/vfs_sync.c
+++ b/sys/kern/vfs_sync.c
@@ -1,4 +1,4 @@
-/*       $OpenBSD: vfs_sync.c,v 1.21 2001/11/27 05:27:12 art Exp $  */
+/*       $OpenBSD: vfs_sync.c,v 1.22 2001/12/19 08:58:06 art Exp $  */
 
 /*
  *  Portions of this code are:
@@ -176,12 +176,15 @@ sched_sync(p)
 			VOP_UNLOCK(vp, 0, p);
 			s = splbio();
 			if (LIST_FIRST(slp) == vp) {
-#ifdef DIAGNOSTIC
-				if (!(vp->v_bioflag & VBIOONSYNCLIST)) {
-					vprint("vnode", vp);
-					panic("sched_fsync: on synclist, but no flag");
-				}
-#endif
+				/*
+				 * Note: disk vps can remain on the
+				 * worklist too with no dirty blocks, but
+				 * since sync_fsync() moves it to a different
+				 * slot we are safe.
+				 */
+				if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
+				    vp->v_type != VBLK)
+					panic("sched_sync: fsync failed");
 				/*
 				 * Put us back on the worklist.  The worklist
 				 * routine will remove us from our current
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 5433711decd..a74fd9eedf9 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vfs_syscalls.c,v 1.83 2001/11/27 05:27:12 art Exp $	*/
+/*	$OpenBSD: vfs_syscalls.c,v 1.84 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: vfs_syscalls.c,v 1.71 1996/04/23 10:29:02 mycroft Exp $	*/
 
 /*
@@ -493,6 +493,7 @@ sys_sync(p, v, retval)
 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
 			asyncflag = mp->mnt_flag & MNT_ASYNC;
 			mp->mnt_flag &= ~MNT_ASYNC;
+			uvm_vnp_sync(mp);
 			VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, p);
 			if (asyncflag)
 				mp->mnt_flag |= MNT_ASYNC;
@@ -1063,13 +1064,6 @@ sys_fhopen(p, v, retval)
 	}
 	if ((error = VOP_OPEN(vp, flags, cred, p)) != 0)
 		goto bad;
-
-	if (vp->v_type == VREG &&
-	    uvn_attach(vp, flags & FWRITE ? VM_PROT_WRITE : 0) == NULL) {
-		error = EIO;
-		goto bad;
-	}
-
 	if (flags & FWRITE)
 		vp->v_writecount++;
 
@@ -1481,6 +1475,8 @@ sys_unlink(p, v, retval)
 		goto out;
 	}
 
+	(void)uvm_vnp_uncache(vp);
+
 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
@@ -2342,6 +2338,7 @@ out:
 		if (fromnd.ni_dvp != tdvp)
 			VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 		if (tvp) {
+			(void)uvm_vnp_uncache(tvp);
 			VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE);
 		}
 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index a788a93c9df..8314a049da7 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vfs_vnops.c,v 1.38 2001/12/10 18:45:34 art Exp $	*/
+/*	$OpenBSD: vfs_vnops.c,v 1.39 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: vfs_vnops.c,v 1.20 1996/02/04 02:18:41 christos Exp $	*/
 
 /*
@@ -165,11 +165,6 @@ vn_open(ndp, fmode, cmode)
 	}
 	if ((error = VOP_OPEN(vp, fmode, cred, p)) != 0)
 		goto bad;
-	if (vp->v_type == VREG &&
-	    uvn_attach(vp, fmode & FWRITE ? VM_PROT_WRITE : 0) == NULL) {
-		error = EIO;
-		goto bad;
-	}
 	if (fmode & FWRITE)
 		vp->v_writecount++;
 	return (0);
@@ -202,10 +197,11 @@ vn_writechk(vp)
 		}
 	}
 	/*
-	 * If the vnode is in use as a process's text,
-	 * we can't allow writing.
+	 * If there's shared text associated with
+	 * the vnode, try to free it up once.  If
+	 * we fail, we can't allow writing.
 	 */
-	if (vp->v_flag & VTEXT)
+	if ((vp->v_flag & VTEXT) && !uvm_vnp_uncache(vp))
 		return (ETXTBSY);
 
 	return (0);
@@ -218,23 +214,6 @@ void
 vn_marktext(vp)
 	struct vnode *vp;
 {
-	if ((vp->v_flag & VTEXT) == 0) {
-		uvmexp.vnodepages -= vp->v_uobj.uo_npages;
-		uvmexp.vtextpages += vp->v_uobj.uo_npages;
-#if 0
-	/*
-	 * Doesn't help much because the pager is borked and ubc_flush is
-	 * slow.
-	 */
-#ifdef PMAP_PREFER
-		/*
-		 * Get rid of any cached reads from this vnode.
-		 * exec can't respect PMAP_PREFER when mapping the text.
-		 */
-		ubc_flush(&vp->v_uobj, 0, 0);
-#endif
-#endif
-	}
 	vp->v_flag |= VTEXT;
 }
 
@@ -521,10 +500,6 @@ vn_lock(vp, flags, p)
 		if ((flags & LK_INTERLOCK) == 0)
 			simple_lock(&vp->v_interlock);
 		if (vp->v_flag & VXLOCK) {
-			if (flags & LK_NOWAIT) {
-				simple_unlock(&vp->v_interlock);
-				return (EBUSY);
-			}
 			vp->v_flag |= VXWANT;
 			simple_unlock(&vp->v_interlock);
 			tsleep((caddr_t)vp, PINOD, "vn_lock", 0);
diff --git a/sys/kern/vnode_if.c b/sys/kern/vnode_if.c
index dbb2b7a438d..1f30d85c507 100644
--- a/sys/kern/vnode_if.c
+++ b/sys/kern/vnode_if.c
@@ -3,7 +3,7 @@
  * (Modifications made here may easily be lost!)
  *
  * Created from the file:
- *	OpenBSD: vnode_if.src,v 1.17 2001/12/10 04:45:31 art Exp 
+ *	OpenBSD: vnode_if.src,v 1.11 2001/06/23 02:21:05 csapuntz Exp 
  * by the script:
  *	OpenBSD: vnode_if.sh,v 1.8 2001/02/26 17:34:18 art Exp 
  */
@@ -1230,109 +1230,6 @@ int VOP_WHITEOUT(dvp, cnp, flags)
 	return (VCALL(dvp, VOFFSET(vop_whiteout), &a));
 }
 
-int vop_getpages_vp_offsets[] = {
-	VOPARG_OFFSETOF(struct vop_getpages_args,a_vp),
-	VDESC_NO_OFFSET
-};
-struct vnodeop_desc vop_getpages_desc = {
-	0,
-	"vop_getpages",
-	0,
-	vop_getpages_vp_offsets,
-	VDESC_NO_OFFSET,
-	VDESC_NO_OFFSET,
-	VDESC_NO_OFFSET,
-	VDESC_NO_OFFSET,
-	NULL,
-};
-
-int VOP_GETPAGES(vp, offset, m, count, centeridx, access_type, advice, flags)
-	struct vnode *vp;
-	voff_t offset;
-	struct vm_page **m;
-	int *count;
-	int centeridx;
-	vm_prot_t access_type;
-	int advice;
-	int flags;
-{
-	struct vop_getpages_args a;
-	a.a_desc = VDESC(vop_getpages);
-	a.a_vp = vp;
-	a.a_offset = offset;
-	a.a_m = m;
-	a.a_count = count;
-	a.a_centeridx = centeridx;
-	a.a_access_type = access_type;
-	a.a_advice = advice;
-	a.a_flags = flags;
-	return (VCALL(vp, VOFFSET(vop_getpages), &a));
-}
-
-int vop_putpages_vp_offsets[] = {
-	VOPARG_OFFSETOF(struct vop_putpages_args,a_vp),
-	VDESC_NO_OFFSET
-};
-struct vnodeop_desc vop_putpages_desc = {
-	0,
-	"vop_putpages",
-	0,
-	vop_putpages_vp_offsets,
-	VDESC_NO_OFFSET,
-	VDESC_NO_OFFSET,
-	VDESC_NO_OFFSET,
-	VDESC_NO_OFFSET,
-	NULL,
-};
-
-int VOP_PUTPAGES(vp, m, count, flags, rtvals)
-	struct vnode *vp;
-	struct vm_page **m;
-	int count;
-	int flags;
-	int *rtvals;
-{
-	struct vop_putpages_args a;
-	a.a_desc = VDESC(vop_putpages);
-	a.a_vp = vp;
-	a.a_m = m;
-	a.a_count = count;
-	a.a_flags = flags;
-	a.a_rtvals = rtvals;
-	return (VCALL(vp, VOFFSET(vop_putpages), &a));
-}
-
-int vop_mmap_vp_offsets[] = {
-	VOPARG_OFFSETOF(struct vop_mmap_args,a_vp),
-	VDESC_NO_OFFSET
-};
-struct vnodeop_desc vop_mmap_desc = {
-	0,
-	"vop_mmap",
-	0,
-	vop_mmap_vp_offsets,
-	VDESC_NO_OFFSET,
-	VOPARG_OFFSETOF(struct vop_mmap_args, a_cred),
-	VOPARG_OFFSETOF(struct vop_mmap_args, a_p),
-	VDESC_NO_OFFSET,
-	NULL,
-};
-
-int VOP_MMAP(vp, fflags, cred, p)
-	struct vnode *vp;
-	int fflags;
-	struct ucred *cred;
-	struct proc *p;
-{
-	struct vop_mmap_args a;
-	a.a_desc = VDESC(vop_mmap);
-	a.a_vp = vp;
-	a.a_fflags = fflags;
-	a.a_cred = cred;
-	a.a_p = p;
-	return (VCALL(vp, VOFFSET(vop_mmap), &a));
-}
-
 /* Special cases: */
 
 int vop_strategy_vp_offsets[] = {
@@ -1426,9 +1323,6 @@ struct vnodeop_desc *vfs_op_descs[] = {
 	&vop_advlock_desc,
 	&vop_reallocblks_desc,
 	&vop_whiteout_desc,
-	&vop_getpages_desc,
-	&vop_putpages_desc,
-	&vop_mmap_desc,
 	NULL
 };
 
diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src
index 0efb5afdd4f..a1cd5c5b9c5 100644
--- a/sys/kern/vnode_if.src
+++ b/sys/kern/vnode_if.src
@@ -1,4 +1,4 @@
-#	$OpenBSD: vnode_if.src,v 1.17 2001/12/10 04:45:31 art Exp $
+#	$OpenBSD: vnode_if.src,v 1.18 2001/12/19 08:58:06 art Exp $
 #	$NetBSD: vnode_if.src,v 1.10 1996/05/11 18:26:27 mycroft Exp $
 #
 # Copyright (c) 1992, 1993
@@ -467,38 +467,3 @@ vop_whiteout {
 #vop_bwrite {
 #	IN struct buf *bp;
 #};
-
-#
-#% getpages    vp L L L
-#
-vop_getpages {
-	IN struct vnode *vp;
-	IN voff_t offset;
-	IN struct vm_page **m;
-	IN int *count;
-	IN int centeridx;
-	IN vm_prot_t access_type;
-	IN int advice;
-	IN int flags;
-};
-
-#
-#% putpages    vp L L L
-#
-vop_putpages {
-	IN struct vnode *vp;
-	IN struct vm_page **m;
-	IN int count;
-	IN int flags;
-	IN int *rtvals;
-};
-
-#
-#% mmap		vp = = =
-#
-vop_mmap {
-	IN struct vnode *vp;
-	IN int fflags;
-	IN struct ucred *cred;
-	IN struct proc *p;
-};
diff --git a/sys/miscfs/deadfs/dead_vnops.c b/sys/miscfs/deadfs/dead_vnops.c
index 97dc05349c0..820e9b4bf22 100644
--- a/sys/miscfs/deadfs/dead_vnops.c
+++ b/sys/miscfs/deadfs/dead_vnops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: dead_vnops.c,v 1.9 2001/12/04 22:44:31 art Exp $	*/
+/*	$OpenBSD: dead_vnops.c,v 1.10 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: dead_vnops.c,v 1.16 1996/02/13 13:12:48 mycroft Exp $	*/
 
 /*
@@ -84,7 +84,6 @@ int	dead_print	__P((void *));
 #define dead_pathconf	dead_ebadf
 #define dead_advlock	dead_ebadf
 #define dead_bwrite	nullop
-#define dead_mmap	dead_badop
 
 int	chkvnlock __P((struct vnode *));
 
@@ -125,8 +124,7 @@ struct vnodeopv_entry_desc dead_vnodeop_entries[] = {
 	{ &vop_pathconf_desc, dead_pathconf },	/* pathconf */
 	{ &vop_advlock_desc, dead_advlock },	/* advlock */
 	{ &vop_bwrite_desc, dead_bwrite },	/* bwrite */
-	{ &vop_mmap_desc, dead_mmap },
-	{ NULL, NULL }
+	{ (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
 };
 struct vnodeopv_desc dead_vnodeop_opv_desc =
 	{ &dead_vnodeop_p, dead_vnodeop_entries };
diff --git a/sys/miscfs/fdesc/fdesc_vnops.c b/sys/miscfs/fdesc/fdesc_vnops.c
index 9196ed10067..b5c22156ecf 100644
--- a/sys/miscfs/fdesc/fdesc_vnops.c
+++ b/sys/miscfs/fdesc/fdesc_vnops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: fdesc_vnops.c,v 1.26 2001/12/04 22:44:31 art Exp $	*/
+/*	$OpenBSD: fdesc_vnops.c,v 1.27 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: fdesc_vnops.c,v 1.32 1996/04/11 11:24:29 mrg Exp $	*/
 
 /*
@@ -123,7 +123,6 @@ int	fdesc_pathconf	__P((void *));
 #define	fdesc_islocked	vop_generic_islocked
 #define	fdesc_advlock	eopnotsupp
 #define	fdesc_bwrite	eopnotsupp
-#define	fdesc_mmap	eopnotsupp
 
 int (**fdesc_vnodeop_p) __P((void *));
 struct vnodeopv_entry_desc fdesc_vnodeop_entries[] = {
@@ -162,8 +161,7 @@ struct vnodeopv_entry_desc fdesc_vnodeop_entries[] = {
 	{ &vop_pathconf_desc, fdesc_pathconf },	/* pathconf */
 	{ &vop_advlock_desc, fdesc_advlock },	/* advlock */
 	{ &vop_bwrite_desc, fdesc_bwrite },	/* bwrite */
-	{ &vop_mmap_desc, fdesc_mmap },
-	{ NULL, NULL }
+	{ (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
 };
 
 struct vnodeopv_desc fdesc_vnodeop_opv_desc =
diff --git a/sys/miscfs/fifofs/fifo.h b/sys/miscfs/fifofs/fifo.h
index 6e8bae9a060..94b0807d568 100644
--- a/sys/miscfs/fifofs/fifo.h
+++ b/sys/miscfs/fifofs/fifo.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: fifo.h,v 1.9 2001/12/04 22:44:31 art Exp $	*/
+/*	$OpenBSD: fifo.h,v 1.10 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: fifo.h,v 1.10 1996/02/09 22:40:15 christos Exp $	*/
 
 /*
@@ -80,7 +80,6 @@ int	fifo_pathconf	__P((void *));
 int	fifo_advlock	__P((void *));
 #define fifo_reallocblks fifo_badop
 #define fifo_bwrite	nullop
-#define fifo_mmap	fifo_badop
 
 void 	fifo_printinfo __P((struct vnode *));
 
diff --git a/sys/miscfs/fifofs/fifo_vnops.c b/sys/miscfs/fifofs/fifo_vnops.c
index 174f02cca6f..42da5773e63 100644
--- a/sys/miscfs/fifofs/fifo_vnops.c
+++ b/sys/miscfs/fifofs/fifo_vnops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: fifo_vnops.c,v 1.11 2001/12/04 22:44:31 art Exp $	*/
+/*	$OpenBSD: fifo_vnops.c,v 1.12 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: fifo_vnops.c,v 1.18 1996/03/16 23:52:42 christos Exp $	*/
 
 /*
@@ -103,8 +103,7 @@ struct vnodeopv_entry_desc fifo_vnodeop_entries[] = {
 	{ &vop_pathconf_desc, fifo_pathconf },		/* pathconf */
 	{ &vop_advlock_desc, fifo_advlock },		/* advlock */
 	{ &vop_bwrite_desc, fifo_bwrite },		/* bwrite */
-	{ &vop_mmap_desc, fifo_mmap },
-	{ NULL, NULL }
+	{ (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
 };
 
 void	filt_fifordetach(struct knote *kn);
diff --git a/sys/miscfs/genfs/genfs.h b/sys/miscfs/genfs/genfs.h
deleted file mode 100644
index 8cb886b5074..00000000000
--- a/sys/miscfs/genfs/genfs.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*	$OpenBSD: genfs.h,v 1.1 2001/12/10 04:45:31 art Exp $	*/
-/* $NetBSD: genfs_node.h,v 1.2 2001/09/15 22:38:40 chs Exp $ */
-
-/*
- * Copyright (c) 2001 Chuck Silvers.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *      This product includes software developed by Chuck Silvers.
- * 4. The name of the author may not be used to endorse or promote products
- *    derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef	_MISCFS_GENFS_GENFS_NODE_H_
-#define	_MISCFS_GENFS_GENFS_NODE_H_
-
-struct vm_page;
-
-struct genfs_ops {
-	void	(*gop_size)(struct vnode *, off_t, off_t *);
-	int	(*gop_alloc)(struct vnode *, off_t, off_t, int, struct ucred *);
-#ifdef notyet
-	int	(*gop_write)(struct vnode *, struct vm_page **, int, int);
-#endif
-};
-
-#define GOP_SIZE(vp, size, eobp) \
-	(*VTOG(vp)->g_op->gop_size)((vp), (size), (eobp))
-#define GOP_ALLOC(vp, off, len, flags, cred) \
-	(*VTOG(vp)->g_op->gop_alloc)((vp), (off), (len), (flags), (cred))
-#ifdef notyet
-#define GOP_WRITE(vp, pgs, npages, flags) \
-	(*VTOG(vp)->g_op->gop_write)((vp), (pgs), (npages), (flags))
-#endif
-struct genfs_node {
-	struct genfs_ops	*g_op;		/* ops vector */
-	struct lock		g_glock;	/* getpages lock */
-};
-
-#define VTOG(vp) ((struct genfs_node *)(vp)->v_data)
-
-void	genfs_size(struct vnode *, off_t, off_t *);
-void	genfs_node_init(struct vnode *, struct genfs_ops *);
-#ifdef notyet
-int	genfs_gop_write(struct vnode *, struct vm_page **, int, int);
-#endif
-
-int	genfs_getpages __P((void *));
-int	genfs_putpages __P((void *));
-
-#endif	/* _MISCFS_GENFS_GENFS_NODE_H_ */
diff --git a/sys/miscfs/genfs/genfs_vnops.c b/sys/miscfs/genfs/genfs_vnops.c
deleted file mode 100644
index 7630caa60e7..00000000000
--- a/sys/miscfs/genfs/genfs_vnops.c
+++ /dev/null
@@ -1,723 +0,0 @@
-/*	$OpenBSD: genfs_vnops.c,v 1.1 2001/12/10 04:45:31 art Exp $	*/
-/*
- * Copyright (c) 1982, 1986, 1989, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/proc.h>
-#include <sys/kernel.h>
-#include <sys/mount.h>
-#include <sys/namei.h>
-#include <sys/vnode.h>
-#include <sys/fcntl.h>
-#include <sys/malloc.h>
-#include <sys/poll.h>
-#include <sys/mman.h>
-#include <sys/pool.h>
-
-#include <miscfs/genfs/genfs.h>
-#include <miscfs/specfs/specdev.h>
-
-#include <uvm/uvm.h>
-#include <uvm/uvm_pager.h>
-
-/*
- * generic VM getpages routine.
- * Return PG_BUSY pages for the given range,
- * reading from backing store if necessary.
- */
-
-int
-genfs_getpages(v)
-	void *v;
-{
-	struct vop_getpages_args /* {
-		struct vnode *a_vp;
-		voff_t a_offset;
-		vm_page_t *a_m;
-		int *a_count;
-		int a_centeridx;
-		vm_prot_t a_access_type;
-		int a_advice;
-		int a_flags;
-	} */ *ap = v;
-
-	off_t newsize, diskeof, memeof;
-	off_t offset, origoffset, startoffset, endoffset, raoffset;
-	daddr_t lbn, blkno;
-	int s, i, error, npages, orignpages, npgs, run, ridx, pidx, pcount;
-	int fs_bshift, fs_bsize, dev_bshift;
-	int flags = ap->a_flags;
-	size_t bytes, iobytes, tailbytes, totalbytes, skipbytes;
-	vaddr_t kva;
-	struct buf *bp, *mbp;
-	struct vnode *vp = ap->a_vp;
-	struct uvm_object *uobj = &vp->v_uobj;
-	struct vm_page *pgs[16];			/* XXXUBC 16 */
-	struct genfs_node *gp = VTOG(vp);
-	struct ucred *cred = curproc->p_ucred;		/* XXXUBC curproc */
-	boolean_t async = (flags & PGO_SYNCIO) == 0;
-	boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0;
-	boolean_t sawhole = FALSE;
-	struct proc *p = curproc;
-	UVMHIST_FUNC("genfs_getpages"); UVMHIST_CALLED(ubchist);
-
-	UVMHIST_LOG(ubchist, "vp %p off 0x%x/%x count %d",
-		    vp, ap->a_offset >> 32, ap->a_offset, *ap->a_count);
-
-	/* XXXUBC temp limit */
-	if (*ap->a_count > 16) {
-		return EINVAL;
-	}
-
-	error = 0;
-	origoffset = ap->a_offset;
-	orignpages = *ap->a_count;
-	GOP_SIZE(vp, vp->v_size, &diskeof);
-	if (flags & PGO_PASTEOF) {
-		newsize = MAX(vp->v_size,
-		    origoffset + (orignpages << PAGE_SHIFT));
-		GOP_SIZE(vp, newsize, &memeof);
-	} else {
-		memeof = diskeof;
-	}
-	KASSERT(ap->a_centeridx >= 0 || ap->a_centeridx <= orignpages);
-	KASSERT((origoffset & (PAGE_SIZE - 1)) == 0 && origoffset >= 0);
-	KASSERT(orignpages > 0);
-
-	/*
-	 * Bounds-check the request.
-	 */
-
-	if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= memeof) {
-		if ((flags & PGO_LOCKED) == 0) {
-			simple_unlock(&uobj->vmobjlock);
-		}
-		UVMHIST_LOG(ubchist, "off 0x%x count %d goes past EOF 0x%x",
-			    origoffset, *ap->a_count, memeof,0);
-		return EINVAL;
-	}
-
-	/*
-	 * For PGO_LOCKED requests, just return whatever's in memory.
-	 */
-
-	if (flags & PGO_LOCKED) {
-		uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m,
-			      UFP_NOWAIT|UFP_NOALLOC|UFP_NORDONLY);
-
-		return ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0;
-	}
-
-	/* vnode is VOP_LOCKed, uobj is locked */
-
-	if (write && (vp->v_bioflag & VBIOONSYNCLIST) == 0) {
-		vn_syncer_add_to_worklist(vp, syncdelay);
-	}
-
-	/*
-	 * find the requested pages and make some simple checks.
-	 * leave space in the page array for a whole block.
-	 */
-
-	fs_bshift = vp->v_mount->mnt_fs_bshift;
-	fs_bsize = 1 << fs_bshift;
-	dev_bshift = vp->v_mount->mnt_dev_bshift;
-
-	orignpages = MIN(orignpages,
-	    round_page(memeof - origoffset) >> PAGE_SHIFT);
-	npages = orignpages;
-	startoffset = origoffset & ~(fs_bsize - 1);
-	endoffset = round_page((origoffset + (npages << PAGE_SHIFT)
-				+ fs_bsize - 1) & ~(fs_bsize - 1));
-	endoffset = MIN(endoffset, round_page(memeof));
-	ridx = (origoffset - startoffset) >> PAGE_SHIFT;
-
-	memset(pgs, 0, sizeof(pgs));
-	uvn_findpages(uobj, origoffset, &npages, &pgs[ridx], UFP_ALL);
-
-	/*
-	 * if PGO_OVERWRITE is set, don't bother reading the pages.
-	 * PGO_OVERWRITE also means that the caller guarantees
-	 * that the pages already have backing store allocated.
-	 */
-
-	if (flags & PGO_OVERWRITE) {
-		UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0);
-
-		for (i = 0; i < npages; i++) {
-			struct vm_page *pg = pgs[ridx + i];
-
-			if (pg->flags & PG_FAKE) {
-				uvm_pagezero(pg);
-				pg->flags &= ~(PG_FAKE);
-			}
-			pg->flags &= ~(PG_RDONLY);
-		}
-		npages += ridx;
-		goto out;
-	}
-
-	/*
-	 * if the pages are already resident, just return them.
-	 */
-
-	for (i = 0; i < npages; i++) {
-		struct vm_page *pg = pgs[ridx + i];
-
-		if ((pg->flags & PG_FAKE) ||
-		    (write && (pg->flags & PG_RDONLY))) {
-			break;
-		}
-	}
-	if (i == npages) {
-		UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0);
-		raoffset = origoffset + (orignpages << PAGE_SHIFT);
-		npages += ridx;
-		goto raout;
-	}
-
-	/*
-	 * the page wasn't resident and we're not overwriting,
-	 * so we're going to have to do some i/o.
-	 * find any additional pages needed to cover the expanded range.
-	 */
-
-	npages = (endoffset - startoffset) >> PAGE_SHIFT;
-	if (startoffset != origoffset || npages != orignpages) {
-
-		/*
-		 * XXXUBC we need to avoid deadlocks caused by locking
-		 * additional pages at lower offsets than pages we
-		 * already have locked.  for now, unlock them all and
-		 * start over.
-		 */
-
-		for (i = 0; i < orignpages; i++) {
-			struct vm_page *pg = pgs[ridx + i];
-
-			if (pg->flags & PG_FAKE) {
-				pg->flags |= PG_RELEASED;
-			}
-		}
-		uvm_page_unbusy(&pgs[ridx], orignpages);
-		memset(pgs, 0, sizeof(pgs));
-
-		UVMHIST_LOG(ubchist, "reset npages start 0x%x end 0x%x",
-			    startoffset, endoffset, 0,0);
-		npgs = npages;
-		uvn_findpages(uobj, startoffset, &npgs, pgs, UFP_ALL);
-	}
-	simple_unlock(&uobj->vmobjlock);
-
-	/*
-	 * read the desired page(s).
-	 */
-
-	totalbytes = npages << PAGE_SHIFT;
-	bytes = MIN(totalbytes, MAX(diskeof - startoffset, 0));
-	tailbytes = totalbytes - bytes;
-	skipbytes = 0;
-
-	kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WAITOK |
-			     UVMPAGER_MAPIN_READ);
-
-	s = splbio();
-	mbp = pool_get(&bufpool, PR_WAITOK);
-	splx(s);
-	mbp->b_bufsize = totalbytes;
-	mbp->b_data = (void *)kva;
-	mbp->b_resid = mbp->b_bcount = bytes;
-	mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL : 0);
-	mbp->b_iodone = uvm_aio_biodone;
-	mbp->b_vp = NULL;
-	LIST_INIT(&mbp->b_dep);
-	bgetvp(vp, mbp);
-
-	/*
-	 * if EOF is in the middle of the range, zero the part past EOF.
-	 */
-
-	if (tailbytes > 0) {
-		memset((void *)(kva + bytes), 0, tailbytes);
-	}
-
-	/*
-	 * now loop over the pages, reading as needed.
-	 */
-
-	if (write) {
-		lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL, p);
-	} else {
-		lockmgr(&gp->g_glock, LK_SHARED, NULL, p);
-	}
-
-	bp = NULL;
-	for (offset = startoffset;
-	     bytes > 0;
-	     offset += iobytes, bytes -= iobytes) {
-
-		/*
-		 * skip pages which don't need to be read.
-		 */
-
-		pidx = (offset - startoffset) >> PAGE_SHIFT;
-		while ((pgs[pidx]->flags & (PG_FAKE|PG_RDONLY)) == 0) {
-			size_t b;
-
-			KASSERT((offset & (PAGE_SIZE - 1)) == 0);
-			b = MIN(PAGE_SIZE, bytes);
-			offset += b;
-			bytes -= b;
-			skipbytes += b;
-			pidx++;
-			UVMHIST_LOG(ubchist, "skipping, new offset 0x%x",
-				    offset, 0,0,0);
-			if (bytes == 0) {
-				goto loopdone;
-			}
-		}
-
-		/*
-		 * bmap the file to find out the blkno to read from and
-		 * how much we can read in one i/o.  if bmap returns an error,
-		 * skip the rest of the top-level i/o.
-		 */
-
-		lbn = offset >> fs_bshift;
-		error = VOP_BMAP(vp, lbn, NULL, &blkno, &run);
-		if (error) {
-			UVMHIST_LOG(ubchist, "VOP_BMAP lbn 0x%x -> %d\n",
-				    lbn, error,0,0);
-			skipbytes += bytes;
-			goto loopdone;
-		}
-
-		/*
-		 * see how many pages can be read with this i/o.
-		 * reduce the i/o size if necessary to avoid
-		 * overwriting pages with valid data.
-		 */
-
-		iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
-		    bytes);
-		if (offset + iobytes > round_page(offset)) {
-			pcount = 1;
-			while (pidx + pcount < npages &&
-			       pgs[pidx + pcount]->flags & PG_FAKE) {
-				pcount++;
-			}
-			iobytes = MIN(iobytes, (pcount << PAGE_SHIFT) -
-				      (offset - trunc_page(offset)));
-		}
-
-		/*
-		 * if this block isn't allocated, zero it instead of reading it.
-		 * if this is a read access, mark the pages we zeroed PG_RDONLY.
-		 */
-
-		if (blkno < 0) {
-			int holepages = (round_page(offset + iobytes) - 
-					 trunc_page(offset)) >> PAGE_SHIFT;
-			UVMHIST_LOG(ubchist, "lbn 0x%x -> HOLE", lbn,0,0,0);
-
-			sawhole = TRUE;
-			memset((char *)kva + (offset - startoffset), 0,
-			       iobytes);
-			skipbytes += iobytes;
-
-			for (i = 0; i < holepages; i++) {
-				if (write) {
-					pgs[pidx + i]->flags &= ~PG_CLEAN;
-				} else {
-					pgs[pidx + i]->flags |= PG_RDONLY;
-				}
-			}
-			continue;
-		}
-
-		/*
-		 * allocate a sub-buf for this piece of the i/o
-		 * (or just use mbp if there's only 1 piece),
-		 * and start it going.
-		 */
-
-		if (offset == startoffset && iobytes == bytes) {
-			bp = mbp;
-		} else {
-			s = splbio();
-			bp = pool_get(&bufpool, PR_WAITOK);
-			splx(s);
-			bp->b_data = (char *)kva + offset - startoffset;
-			bp->b_resid = bp->b_bcount = iobytes;
-			bp->b_flags = B_BUSY|B_READ|B_CALL;
-			bp->b_iodone = uvm_aio_biodone1;
-			bp->b_vp = vp;
-			LIST_INIT(&bp->b_dep);
-		}
-		bp->b_lblkno = 0;
-		bp->b_private = mbp;
-
-		/* adjust physical blkno for partial blocks */
-		bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
-				       dev_bshift);
-
-		UVMHIST_LOG(ubchist, "bp %p offset 0x%x bcount 0x%x blkno 0x%x",
-			    bp, offset, iobytes, bp->b_blkno);
-
-		VOP_STRATEGY(bp);
-	}
-
-loopdone:
-	if (skipbytes) {
-		s = splbio();
-		if (error) {
-			mbp->b_flags |= B_ERROR;
-			mbp->b_error = error;
-		}
-		mbp->b_resid -= skipbytes;
-		if (mbp->b_resid == 0) {
-			biodone(mbp);
-		}
-		splx(s);
-	}
-
-	if (async) {
-		UVMHIST_LOG(ubchist, "returning 0 (async)",0,0,0,0);
-		lockmgr(&gp->g_glock, LK_RELEASE, NULL, p);
-		return 0;
-	}
-	if (bp != NULL) {
-		error = biowait(mbp);
-	}
-	s = splbio();
-	(void) buf_cleanout(mbp);
-	pool_put(&bufpool, mbp);
-	splx(s);
-	uvm_pagermapout(kva, npages);
-	raoffset = startoffset + totalbytes;
-
-	/*
-	 * if this we encountered a hole then we have to do a little more work.
-	 * for read faults, we marked the page PG_RDONLY so that future
-	 * write accesses to the page will fault again.
-	 * for write faults, we must make sure that the backing store for
-	 * the page is completely allocated while the pages are locked.
-	 */
-
-	if (error == 0 && sawhole && write) {
-		error = GOP_ALLOC(vp, startoffset, npages << PAGE_SHIFT, 0,
-			   cred);
-		if (error) {
-			UVMHIST_LOG(ubchist, "balloc lbn 0x%x -> %d",
-				    lbn, error,0,0);
-			lockmgr(&gp->g_glock, LK_RELEASE, NULL, p);
-			simple_lock(&uobj->vmobjlock);
-			goto out;
-		}
-	}
-	lockmgr(&gp->g_glock, LK_RELEASE, NULL, p);
-	simple_lock(&uobj->vmobjlock);
-
-	/*
-	 * see if we want to start any readahead.
-	 * XXXUBC for now, just read the next 128k on 64k boundaries.
-	 * this is pretty nonsensical, but it is 50% faster than reading
-	 * just the next 64k.
-	 */
-
-raout:
-	if (!error && !async && !write && ((int)raoffset & 0xffff) == 0 &&
-	    PAGE_SHIFT <= 16) {
-		int racount;
-
-		racount = 1 << (16 - PAGE_SHIFT);
-		(void) VOP_GETPAGES(vp, raoffset, NULL, &racount, 0,
-				    VM_PROT_READ, 0, 0);
-		simple_lock(&uobj->vmobjlock);
-
-		racount = 1 << (16 - PAGE_SHIFT);
-		(void) VOP_GETPAGES(vp, raoffset + 0x10000, NULL, &racount, 0,
-				    VM_PROT_READ, 0, 0);
-		simple_lock(&uobj->vmobjlock);
-	}
-
-	/*
-	 * we're almost done!  release the pages...
-	 * for errors, we free the pages.
-	 * otherwise we activate them and mark them as valid and clean.
-	 * also, unbusy pages that were not actually requested.
-	 */
-
-out:
-	if (error) {
-		uvm_lock_pageq();
-		for (i = 0; i < npages; i++) {
-			if (pgs[i] == NULL) {
-				continue;
-			}
-			UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
-				    pgs[i], pgs[i]->flags, 0,0);
-			if (pgs[i]->flags & PG_WANTED) {
-				wakeup(pgs[i]);
-			}
-			if (pgs[i]->flags & PG_RELEASED) {
-				uvm_unlock_pageq();
-				(uobj->pgops->pgo_releasepg)(pgs[i], NULL);
-				uvm_lock_pageq();
-				continue;
-			}
-			if (pgs[i]->flags & PG_FAKE) {
-				uvm_pagefree(pgs[i]);
-				continue;
-			}
-			uvm_pageactivate(pgs[i]);
-			pgs[i]->flags &= ~(PG_WANTED|PG_BUSY);
-			UVM_PAGE_OWN(pgs[i], NULL);
-		}
-		uvm_unlock_pageq();
-		simple_unlock(&uobj->vmobjlock);
-		UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0);
-		return error;
-	}
-
-	UVMHIST_LOG(ubchist, "succeeding, npages %d", npages,0,0,0);
-	uvm_lock_pageq();
-	for (i = 0; i < npages; i++) {
-		if (pgs[i] == NULL) {
-			continue;
-		}
-		UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
-			    pgs[i], pgs[i]->flags, 0,0);
-		if (pgs[i]->flags & PG_FAKE) {
-			UVMHIST_LOG(ubchist, "unfaking pg %p offset 0x%x",
-				    pgs[i], pgs[i]->offset,0,0);
-			pgs[i]->flags &= ~(PG_FAKE);
-			pmap_clear_modify(pgs[i]);
-			pmap_clear_reference(pgs[i]);
-		}
-		if (write) {
-			pgs[i]->flags &= ~(PG_RDONLY);
-		}
-		if (i < ridx || i >= ridx + orignpages || async) {
-			UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x",
-				    pgs[i], pgs[i]->offset,0,0);
-			if (pgs[i]->flags & PG_WANTED) {
-				wakeup(pgs[i]);
-			}
-			if (pgs[i]->flags & PG_RELEASED) {
-				uvm_unlock_pageq();
-				(uobj->pgops->pgo_releasepg)(pgs[i], NULL);
-				uvm_lock_pageq();
-				continue;
-			}
-			uvm_pageactivate(pgs[i]);
-			pgs[i]->flags &= ~(PG_WANTED|PG_BUSY);
-			UVM_PAGE_OWN(pgs[i], NULL);
-		}
-	}
-	uvm_unlock_pageq();
-	simple_unlock(&uobj->vmobjlock);
-	if (ap->a_m != NULL) {
-		memcpy(ap->a_m, &pgs[ridx],
-		       orignpages * sizeof(struct vm_page *));
-	}
-	return 0;
-}
-
-/*
- * generic VM putpages routine.
- * Write the given range of pages to backing store.
- */
-
-int
-genfs_putpages(v)
-	void *v;
-{
-	struct vop_putpages_args /* {
-		struct vnode *a_vp;
-		struct vm_page **a_m;
-		int a_count;
-		int a_flags;
-		int *a_rtvals;
-	} */ *ap = v;
-
-	int s, error, npages, run;
-	int fs_bshift, dev_bshift;
-	vaddr_t kva;
-	off_t eof, offset, startoffset;
-	size_t bytes, iobytes, skipbytes;
-	daddr_t lbn, blkno;
-	struct vm_page *pg;
-	struct buf *mbp, *bp;
-	struct vnode *vp = ap->a_vp;
-	boolean_t async = (ap->a_flags & PGO_SYNCIO) == 0;
-	UVMHIST_FUNC("genfs_putpages"); UVMHIST_CALLED(ubchist);
-	UVMHIST_LOG(ubchist, "vp %p offset 0x%x count %d",
-		    vp, ap->a_m[0]->offset, ap->a_count, 0);
-
-	simple_unlock(&vp->v_uobj.vmobjlock);
-
-	GOP_SIZE(vp, vp->v_size, &eof);
-
-	error = 0;
-	npages = ap->a_count;
-	fs_bshift = vp->v_mount->mnt_fs_bshift;
-	dev_bshift = vp->v_mount->mnt_dev_bshift;
-
-	pg = ap->a_m[0];
-	startoffset = pg->offset;
-	bytes = MIN(npages << PAGE_SHIFT, eof - startoffset);
-	skipbytes = 0;
-	KASSERT(bytes != 0);
-
-	kva = uvm_pagermapin(ap->a_m, npages, UVMPAGER_MAPIN_WAITOK);
-
-	s = splbio();
-	vp->v_numoutput += 2;
-	mbp = pool_get(&bufpool, PR_WAITOK);
-	UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x",
-		    vp, mbp, vp->v_numoutput, bytes);
-	splx(s);
-	mbp->b_bufsize = npages << PAGE_SHIFT;
-	mbp->b_data = (void *)kva;
-	mbp->b_resid = mbp->b_bcount = bytes;
-	mbp->b_flags = B_BUSY|B_WRITE|B_AGE |
-		(async ? B_CALL : 0) |
-		(curproc == uvm.pagedaemon_proc ? B_PDAEMON : 0);
-	mbp->b_iodone = uvm_aio_biodone;
-	mbp->b_vp = NULL;
-	LIST_INIT(&mbp->b_dep);
-	bgetvp(vp, mbp);
-
-	bp = NULL;
-	for (offset = startoffset;
-	     bytes > 0;
-	     offset += iobytes, bytes -= iobytes) {
-		lbn = offset >> fs_bshift;
-		error = VOP_BMAP(vp, lbn, NULL, &blkno, &run);
-		if (error) {
-			UVMHIST_LOG(ubchist, "VOP_BMAP() -> %d", error,0,0,0);
-			skipbytes += bytes;
-			bytes = 0;
-			break;
-		}
-
-		iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
-		    bytes);
-		if (blkno == (daddr_t)-1) {
-			skipbytes += iobytes;
-			continue;
-		}
-
-		/* if it's really one i/o, don't make a second buf */
-		if (offset == startoffset && iobytes == bytes) {
-			bp = mbp;
-		} else {
-			s = splbio();
-			vp->v_numoutput++;
-			bp = pool_get(&bufpool, PR_WAITOK);
-			UVMHIST_LOG(ubchist, "vp %p bp %p num now %d",
-				    vp, bp, vp->v_numoutput, 0);
-			splx(s);
-			bp->b_data = (char *)kva +
-				(vaddr_t)(offset - pg->offset);
-			bp->b_resid = bp->b_bcount = iobytes;
-			bp->b_flags = B_BUSY|B_WRITE|B_CALL|B_ASYNC;
-			bp->b_iodone = uvm_aio_biodone1;
-			bp->b_vp = vp;
-			LIST_INIT(&bp->b_dep);
-		}
-		bp->b_lblkno = 0;
-		bp->b_private = mbp;
-
-		/* adjust physical blkno for partial blocks */
-		bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
-				       dev_bshift);
-		UVMHIST_LOG(ubchist, "vp %p offset 0x%x bcount 0x%x blkno 0x%x",
-			    vp, offset, bp->b_bcount, bp->b_blkno);
-		VOP_STRATEGY(bp);
-	}
-	if (skipbytes) {
-		UVMHIST_LOG(ubchist, "skipbytes %d", skipbytes, 0,0,0);
-		s = splbio();
-		mbp->b_resid -= skipbytes;
-		if (error) {
-			mbp->b_flags |= B_ERROR;
-			mbp->b_error = error;
-		}
-		if (mbp->b_resid == 0) {
-			biodone(mbp);
-		}
-		splx(s);
-	}
-	if (async) {
-		UVMHIST_LOG(ubchist, "returning 0 (async)", 0,0,0,0);
-		return 0;
-	}
-	if (bp != NULL) {
-		UVMHIST_LOG(ubchist, "waiting for mbp %p", mbp,0,0,0);
-		error = biowait(mbp);
-	}
-	if (bioops.io_pageiodone) {
-		(*bioops.io_pageiodone)(mbp);
-	}
-	s = splbio();
-	if (mbp->b_vp) {
-		vwakeup(mbp->b_vp);
-	}
-	buf_cleanout(mbp);
-	pool_put(&bufpool, mbp);
-	splx(s);
-	uvm_pagermapout(kva, npages);
-	UVMHIST_LOG(ubchist, "returning, error %d", error,0,0,0);
-	return error;
-}
-
-void
-genfs_size(struct vnode *vp, off_t size, off_t *eobp)
-{
-	int bsize;
-
-	bsize = 1 << vp->v_mount->mnt_fs_bshift;
-	*eobp = (size + bsize - 1) & ~(bsize - 1);
-}
-
-void
-genfs_node_init(struct vnode *vp, struct genfs_ops *ops)
-{
-	struct genfs_node *gp = VTOG(vp);
-
-	lockinit(&gp->g_glock, PINOD, "glock", 0, 0);
-	gp->g_op = ops;
-}
diff --git a/sys/miscfs/specfs/spec_vnops.c b/sys/miscfs/specfs/spec_vnops.c
index 2022279f6d8..e24cde8096b 100644
--- a/sys/miscfs/specfs/spec_vnops.c
+++ b/sys/miscfs/specfs/spec_vnops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: spec_vnops.c,v 1.21 2001/12/04 22:44:32 art Exp $	*/
+/*	$OpenBSD: spec_vnops.c,v 1.22 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: spec_vnops.c,v 1.29 1996/04/22 01:42:38 christos Exp $	*/
 
 /*
@@ -104,8 +104,7 @@ struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
 	{ &vop_pathconf_desc, spec_pathconf },		/* pathconf */
 	{ &vop_advlock_desc, spec_advlock },		/* advlock */
 	{ &vop_bwrite_desc, spec_bwrite },		/* bwrite */
-	{ &vop_mmap_desc, spec_mmap },
-	{ NULL, NULL }
+	{ (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
 };
 struct vnodeopv_desc spec_vnodeop_opv_desc =
 	{ &spec_vnodeop_p, spec_vnodeop_entries };
diff --git a/sys/miscfs/specfs/specdev.h b/sys/miscfs/specfs/specdev.h
index 51fb9564c51..bdd2008545f 100644
--- a/sys/miscfs/specfs/specdev.h
+++ b/sys/miscfs/specfs/specdev.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: specdev.h,v 1.11 2001/12/04 22:44:32 art Exp $	*/
+/*	$OpenBSD: specdev.h,v 1.12 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: specdev.h,v 1.12 1996/02/13 13:13:01 mycroft Exp $	*/
 
 /*
@@ -121,4 +121,3 @@ int	spec_advlock	__P((void *));
 #define	spec_reallocblks spec_badop
 #define	spec_bwrite	vop_generic_bwrite
 #define spec_revoke     vop_generic_revoke
-#define	spec_mmap	spec_badop
diff --git a/sys/miscfs/union/union_vnops.c b/sys/miscfs/union/union_vnops.c
index 46f27a40e62..df3fb4efc01 100644
--- a/sys/miscfs/union/union_vnops.c
+++ b/sys/miscfs/union/union_vnops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: union_vnops.c,v 1.16 2001/12/04 22:44:32 art Exp $	*/
+/*	$OpenBSD: union_vnops.c,v 1.17 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: union_vnops.c,v 1.30.4.1 1996/05/25 22:10:14 jtc Exp $	*/
 
 /*
@@ -94,7 +94,6 @@ int union_islocked	__P((void *));
 int union_pathconf	__P((void *));
 int union_advlock	__P((void *));
 int union_strategy	__P((void *));
-int union_mmap		__P((void *));
 
 int (**union_vnodeop_p) __P((void *));
 struct vnodeopv_entry_desc union_vnodeop_entries[] = {
@@ -134,8 +133,7 @@ struct vnodeopv_entry_desc union_vnodeop_entries[] = {
 	{ &vop_islocked_desc, union_islocked },		/* islocked */
 	{ &vop_pathconf_desc, union_pathconf },		/* pathconf */
 	{ &vop_advlock_desc, union_advlock },		/* advlock */
-	{ &vop_mmap_desc, union_mmap },
-	{ NULL, NULL }
+	{ (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
 };
 struct vnodeopv_desc union_vnodeop_opv_desc =
 	{ &union_vnodeop_p, union_vnodeop_entries };
@@ -1844,13 +1842,3 @@ union_strategy(v)
 	return (error);
 }
 
-int
-union_mmap(v)
-	void *v;
-{
-	struct vop_mmap_args *ap = v;
-	struct vnode *vp = OTHERVP(ap->a_vp);
-
-	ap->a_vp = vp;
-	return (VCALL(vp, VOFFSET(vop_mmap), ap));
-}
-\ No newline at end of file
diff --git a/sys/msdosfs/denode.h b/sys/msdosfs/denode.h
index db945e35bcd..33ebeb8b122 100644
--- a/sys/msdosfs/denode.h
+++ b/sys/msdosfs/denode.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: denode.h,v 1.11 2001/12/10 04:45:31 art Exp $	*/
+/*	$OpenBSD: denode.h,v 1.12 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: denode.h,v 1.24 1997/10/17 11:23:39 ws Exp $	*/
 
 /*-
@@ -91,8 +91,6 @@
  *   things.
  */
 
-#include <miscfs/genfs/genfs.h>
-
 /*
  * Internal pseudo-offset for (nonexistent) directory entry for the root
  * dir in the root dir
@@ -137,7 +135,6 @@ struct fatcache {
  * contained within a vnode.
  */
 struct denode {
-	struct genfs_node de_gnode;
 	struct denode *de_next;	/* Hash chain forward */
 	struct denode **de_prev; /* Hash chain back */
 	struct vnode *de_vnode;	/* addr of vnode we are part of */
@@ -314,5 +311,4 @@ void reinsert __P((struct denode *));
 int removede __P((struct denode *, struct denode *));
 int uniqdosname __P((struct denode *, struct componentname *, u_char *));
 int findwin95 __P((struct denode *));
-int msdosfs_gop_alloc __P((struct vnode *, off_t, off_t, int, struct ucred *));
 #endif	/* _KERNEL */
diff --git a/sys/msdosfs/msdosfs_denode.c b/sys/msdosfs/msdosfs_denode.c
index e01491efc85..dbc1f372fa2 100644
--- a/sys/msdosfs/msdosfs_denode.c
+++ b/sys/msdosfs/msdosfs_denode.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: msdosfs_denode.c,v 1.22 2001/12/10 04:45:31 art Exp $	*/
+/*	$OpenBSD: msdosfs_denode.c,v 1.23 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: msdosfs_denode.c,v 1.23 1997/10/17 11:23:58 ws Exp $	*/
 
 /*-
@@ -72,13 +72,6 @@ u_long dehash;			/* size of hash table - 1 */
 #define	DEHASH(dev, dcl, doff)	(((dev) + (dcl) + (doff) / sizeof(struct direntry)) \
 				 & dehash)
 
-extern int prtactive;
-
-struct genfs_ops msdosfs_genfsops = {
-	genfs_size,
-	msdosfs_gop_alloc,
-};
-
 static struct denode *msdosfs_hashget __P((dev_t, u_long, u_long));
 static int msdosfs_hashins __P((struct denode *));
 static void msdosfs_hashrem __P((struct denode *));
@@ -337,10 +330,8 @@ retry:
 		}
 	} else
 		nvp->v_type = VREG;
-	genfs_node_init(nvp, &msdosfs_genfsops);
 	VREF(ldep->de_devvp);
 	*depp = ldep;
-	nvp->v_size = ldep->de_FileSize;
 	return (0);
 }
 
@@ -470,7 +461,7 @@ detrunc(dep, length, flags, cred, p)
 #endif
 			return (error);
 		}
-
+		uvm_vnp_uncache(DETOV(dep));
 		/*
 		 * is this the right place for it?
 		 */
@@ -533,7 +524,7 @@ deextend(dep, length, cred)
 	struct ucred *cred;
 {
 	struct msdosfsmount *pmp = dep->de_pmp;
-	u_long count, osize;
+	u_long count;
 	int error;
 	
 	/*
@@ -566,12 +557,8 @@ deextend(dep, length, cred)
 		}
 	}
 		
-	osize = dep->de_FileSize;
 	dep->de_FileSize = length;
-	uvm_vnp_setsize(DETOV(dep), (voff_t)dep->de_FileSize);
 	dep->de_flag |= DE_UPDATE|DE_MODIFIED;
-	uvm_vnp_zerorange(DETOV(dep), (off_t)osize,
-	    (size_t)(dep->de_FileSize - osize));
 	return (deupdat(dep, 1));
 }
 
@@ -606,6 +593,7 @@ msdosfs_reclaim(v)
 	} */ *ap = v;
 	struct vnode *vp = ap->a_vp;
 	struct denode *dep = VTODE(vp);
+	extern int prtactive;
 	
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_reclaim(): dep %08x, file %s, refcnt %d\n",
@@ -646,6 +634,7 @@ msdosfs_inactive(v)
 	struct denode *dep = VTODE(vp);
 	struct proc *p = ap->a_p;
 	int error;
+	extern int prtactive;
 	
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_inactive(): dep %08x, de_Name[0] %x\n", dep, dep->de_Name[0]);
@@ -672,9 +661,7 @@ msdosfs_inactive(v)
 	       dep, dep->de_refcnt, vp->v_mount->mnt_flag, MNT_RDONLY);
 #endif
 	if (dep->de_refcnt <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
-		if (dep->de_FileSize != 0) {
-			error = detrunc(dep, (u_long)0, 0, NOCRED, NULL);
-		}
+		error = detrunc(dep, (u_long)0, 0, NOCRED, NULL);
 		dep->de_Name[0] = SLOT_DELETED;
 	}
 	deupdat(dep, 0);
@@ -693,10 +680,3 @@ out:
 		vrecycle(vp, (struct simplelock *)0, p);
 	return (error);
 }
-
-int
-msdosfs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags,
-    struct ucred *cred)
-{
-	return 0;
-}
diff --git a/sys/msdosfs/msdosfs_fat.c b/sys/msdosfs/msdosfs_fat.c
index 3576a663cdc..d01e16eb89f 100644
--- a/sys/msdosfs/msdosfs_fat.c
+++ b/sys/msdosfs/msdosfs_fat.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: msdosfs_fat.c,v 1.9 2001/11/27 05:27:12 art Exp $	*/
+/*	$OpenBSD: msdosfs_fat.c,v 1.10 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: msdosfs_fat.c,v 1.26 1997/10/17 11:24:02 ws Exp $	*/
 
 /*-
@@ -988,7 +988,8 @@ extendfile(dep, count, bpp, ncp, flags)
 	int flags;
 {
 	int error;
-	u_long frcn = 0, cn, got;
+	u_long frcn;
+	u_long cn, got;
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct buf *bp;
 	
@@ -1059,26 +1060,41 @@ extendfile(dep, count, bpp, ncp, flags)
 		}
 		
 		/*
-		 * Update the "last cluster of the file" entry in the
-		 * denode's fat cache.
+		 * Update the "last cluster of the file" entry in the denode's fat
+		 * cache.
 		 */
-
 		fc_setcache(dep, FC_LASTFC, frcn + got - 1, cn + got - 1);
-		if (flags & DE_CLEAR &&
-		    (dep->de_Attributes & ATTR_DIRECTORY)) {
+		
+		if (flags & DE_CLEAR) {
 			while (got-- > 0) {
-				bp = getblk(pmp->pm_devvp, cntobn(pmp, cn++),
-				    pmp->pm_bpcluster, 0, 0);
+				/*
+				 * Get the buf header for the new block of the file.
+				 */
+				if (dep->de_Attributes & ATTR_DIRECTORY)
+					bp = getblk(pmp->pm_devvp, cntobn(pmp, cn++),
+						    pmp->pm_bpcluster, 0, 0);
+				else {
+					bp = getblk(DETOV(dep), de_cn2bn(pmp, frcn++),
+					    pmp->pm_bpcluster, 0, 0);
+					/*
+					 * Do the bmap now, as in msdosfs_write
+					 */
+					if (pcbmap(dep,
+					    de_bn2cn(pmp, bp->b_lblkno),
+					    &bp->b_blkno, 0, 0))
+						bp->b_blkno = -1;
+					if (bp->b_blkno == -1)
+						panic("extendfile: pcbmap");
+				}
 				clrbuf(bp);
 				if (bpp) {
 					*bpp = bp;
 					bpp = NULL;
-				} else {
+				} else
 					bdwrite(bp);
-				}
 			}
 		}
 	}
-
+	
 	return (0);
 }
diff --git a/sys/msdosfs/msdosfs_vfsops.c b/sys/msdosfs/msdosfs_vfsops.c
index 60162fa8af9..6aa2d72423b 100644
--- a/sys/msdosfs/msdosfs_vfsops.c
+++ b/sys/msdosfs/msdosfs_vfsops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: msdosfs_vfsops.c,v 1.27 2001/12/10 02:19:34 art Exp $	*/
+/*	$OpenBSD: msdosfs_vfsops.c,v 1.28 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: msdosfs_vfsops.c,v 1.48 1997/10/18 02:54:57 briggs Exp $	*/
 
 /*-
@@ -584,9 +584,15 @@ msdosfs_mountfs(devvp, mp, p, argp)
 	mp->mnt_data = (qaddr_t)pmp;
         mp->mnt_stat.f_fsid.val[0] = (long)dev;
         mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
-	mp->mnt_dev_bshift = pmp->pm_bnshift;
-	mp->mnt_fs_bshift = pmp->pm_cnshift;
-
+#ifdef QUOTA
+	/*
+	 * If we ever do quotas for DOS filesystems this would be a place
+	 * to fill in the info in the msdosfsmount structure. You dolt,
+	 * quotas on dos filesystems make no sense because files have no
+	 * owners on dos filesystems. of course there is some empty space
+	 * in the directory entry where we could put uid's and gid's.
+	 */
+#endif
 	devvp->v_specmountpoint = mp;
 
 	return (0);
@@ -714,11 +720,10 @@ msdosfs_sync_vnode(struct vnode *vp, void *arg)
 	struct denode *dep;
 
 	dep = VTODE(vp);
-	if (msa->waitfor == MNT_LAZY || vp->v_type == VNON ||
-	    (((dep->de_flag &
-	    (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0) &&
-	    (LIST_EMPTY(&vp->v_dirtyblkhd) &&
-	     vp->v_uobj.uo_npages == 0))) {
+	if (vp->v_type == VNON || 
+	    ((dep->de_flag & (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0
+		&& vp->v_dirtyblkhd.lh_first == NULL) ||
+	    msa->waitfor == MNT_LAZY) {
 		simple_unlock(&vp->v_interlock);
 		return (0);
 	}
diff --git a/sys/msdosfs/msdosfs_vnops.c b/sys/msdosfs/msdosfs_vnops.c
index 8d08da9e229..3fccaf27353 100644
--- a/sys/msdosfs/msdosfs_vnops.c
+++ b/sys/msdosfs/msdosfs_vnops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: msdosfs_vnops.c,v 1.34 2001/12/10 04:45:31 art Exp $	*/
+/*	$OpenBSD: msdosfs_vnops.c,v 1.35 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: msdosfs_vnops.c,v 1.63 1997/10/17 11:24:19 ws Exp $	*/
 
 /*-
@@ -320,7 +320,6 @@ msdosfs_setattr(v)
 	} */ *ap = v;
 	int error = 0;
 	struct denode *dep = VTODE(ap->a_vp);
-	struct msdosfsmount *pmp = dep->de_pmp;
 	struct vattr *vap = ap->a_vap;
 	struct ucred *cred = ap->a_cred;
 
@@ -332,8 +331,7 @@ msdosfs_setattr(v)
 	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
 	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
 	    (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL) ||
-	    (vap->va_uid != VNOVAL && vap->va_uid != pmp->pm_uid) ||
-	    (vap->va_gid != VNOVAL && vap->va_gid != pmp->pm_gid)) {
+	    (vap->va_uid != VNOVAL) || (vap->va_gid != VNOVAL)) {
 #ifdef MSDOSFS_DEBUG
 		printf("msdosfs_setattr(): returning EINVAL\n");
 		printf("    va_type %d, va_nlink %x, va_fsid %x, va_fileid %x\n",
@@ -415,11 +413,11 @@ msdosfs_read(v)
 	int error = 0;
 	int diff;
 	int blsize;
+	int isadir;
 	long n;
 	long on;
 	daddr_t lbn;
-	void *win;
-	vsize_t bytelen;
+	daddr_t rablock;
 	struct buf *bp;
 	struct vnode *vp = ap->a_vp;
 	struct denode *dep = VTODE(vp);
@@ -434,45 +432,42 @@ msdosfs_read(v)
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 
-	if (vp->v_type == VREG) {
-		while (uio->uio_resid > 0) {
-			bytelen = MIN(dep->de_FileSize - uio->uio_offset,
-			    uio->uio_resid);
-
-			if (bytelen == 0)
-				break;
-			win = ubc_alloc(&vp->v_uobj, uio->uio_offset,
-			    &bytelen, UBC_READ);
-			error = uiomove(win, bytelen, uio);
-			ubc_release(win, 0);
-			if (error)
-				break;
-		}
-		dep->de_flag |= DE_ACCESS;
-		goto out;
-	}
-
-	/* this loop is only for directories now */
+	isadir = dep->de_Attributes & ATTR_DIRECTORY;
 	do {
 		lbn = de_cluster(pmp, uio->uio_offset);
 		on = uio->uio_offset & pmp->pm_crbomask;
-		n = MIN((pmp->pm_bpcluster - on), uio->uio_resid);
+		n = min((u_long) (pmp->pm_bpcluster - on), uio->uio_resid);
 		diff = dep->de_FileSize - uio->uio_offset;
 		if (diff <= 0)
 			return (0);
 		if (diff < n)
 			n = diff;
 		/* convert cluster # to block # if a directory */
-		error = pcbmap(dep, lbn, &lbn, 0, &blsize);
-		if (error)
-			return (error);
+		if (isadir) {
+			error = pcbmap(dep, lbn, &lbn, 0, &blsize);
+			if (error)
+				return (error);
+		}
 		/*
 		 * If we are operating on a directory file then be sure to
 		 * do i/o with the vnode for the filesystem instead of the
 		 * vnode for the directory.
 		 */
-		error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, &bp);
-		n = MIN(n, pmp->pm_bpcluster - bp->b_resid);
+		if (isadir) {
+			error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, &bp);
+		} else {
+			rablock = lbn + 1;
+			if (dep->de_lastr + 1 == lbn &&
+			    de_cn2off(pmp, rablock) < dep->de_FileSize)
+				error = breada(vp, de_cn2bn(pmp, lbn),
+				    pmp->pm_bpcluster, de_cn2bn(pmp, rablock),
+				    pmp->pm_bpcluster, NOCRED, &bp);
+			else
+				error = bread(vp, de_cn2bn(pmp, lbn),
+				    pmp->pm_bpcluster, NOCRED, &bp);
+			dep->de_lastr = lbn;
+		}
+		n = min(n, pmp->pm_bpcluster - bp->b_resid);
 		if (error) {
 			brelse(bp);
 			return (error);
@@ -480,10 +475,8 @@ msdosfs_read(v)
 		error = uiomove(bp->b_data + on, (int) n, uio);
 		brelse(bp);
 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
-
-out:
-	if ((ap->a_ioflag & IO_SYNC) == IO_SYNC)
-		error = deupdat(dep, 1);
+	if (!isadir && !(vp->v_mount->mnt_flag & MNT_NOATIME))
+		dep->de_flag |= DE_ACCESS;
 	return (error);
 }
 
@@ -500,19 +493,19 @@ msdosfs_write(v)
 		int a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap = v;
+	int n;
+	int croffset;
 	int resid;
 	u_long osize;
 	int error = 0;
 	u_long count;
-	daddr_t lastcn;
+	daddr_t bn, lastcn;
+	struct buf *bp;
 	int ioflag = ap->a_ioflag;
-	void *win;
-	vsize_t bytelen;
-	off_t oldoff;
-	boolean_t rv;
 	struct uio *uio = ap->a_uio;
 	struct proc *p = uio->uio_procp;
 	struct vnode *vp = ap->a_vp;
+	struct vnode *thisvp;
 	struct denode *dep = VTODE(vp);
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct ucred *cred = ap->a_cred;
@@ -528,6 +521,7 @@ msdosfs_write(v)
 	case VREG:
 		if (ioflag & IO_APPEND)
 			uio->uio_offset = dep->de_FileSize;
+		thisvp = vp;
 		break;
 	case VDIR:
 		return EISDIR;
@@ -582,52 +576,84 @@ msdosfs_write(v)
 	} else
 		lastcn = de_clcount(pmp, osize) - 1;
 
-	if (dep->de_FileSize < uio->uio_offset + resid) {
-		dep->de_FileSize = uio->uio_offset + resid;
-		uvm_vnp_setsize(vp, dep->de_FileSize);
-	}
-
 	do {
-		oldoff = uio->uio_offset;
-		if (de_cluster(pmp, oldoff) > lastcn) {
+		if (de_cluster(pmp, uio->uio_offset) > lastcn) {
 			error = ENOSPC;
 			break;
 		}
-		bytelen = MIN(dep->de_FileSize - oldoff, uio->uio_resid);
 
+		bn = de_blk(pmp, uio->uio_offset);
+		if ((uio->uio_offset & pmp->pm_crbomask) == 0
+		    && (de_blk(pmp, uio->uio_offset + uio->uio_resid) > de_blk(pmp, uio->uio_offset)
+			|| uio->uio_offset + uio->uio_resid >= dep->de_FileSize)) {
+			/*
+			 * If either the whole cluster gets written,
+			 * or we write the cluster from its start beyond EOF,
+			 * then no need to read data from disk.
+			 */
+			bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0);
+			clrbuf(bp);
+			/*
+			 * Do the bmap now, since pcbmap needs buffers
+			 * for the fat table. (see msdosfs_strategy)
+			 */
+			if (bp->b_blkno == bp->b_lblkno) {
+				error = pcbmap(dep,
+					       de_bn2cn(pmp, bp->b_lblkno),
+					       &bp->b_blkno, 0, 0);
+				if (error)
+					bp->b_blkno = -1;
+			}
+			if (bp->b_blkno == -1) {
+				brelse(bp);
+				if (!error)
+					error = EIO;		/* XXX */
+				break;
+			}
+		} else {
+			/*
+			 * The block we need to write into exists, so read it in.
+			 */
+			error = bread(thisvp, bn, pmp->pm_bpcluster,
+				      NOCRED, &bp);
+			if (error) {
+				brelse(bp);
+				break;
+			}
+		}
+
+		croffset = uio->uio_offset & pmp->pm_crbomask;
+		n = min(uio->uio_resid, pmp->pm_bpcluster - croffset);
+		if (uio->uio_offset + n > dep->de_FileSize) {
+			dep->de_FileSize = uio->uio_offset + n;
+			uvm_vnp_setsize(vp, dep->de_FileSize);
+		}
+		uvm_vnp_uncache(vp);
 		/*
-		 * XXXUBC if file is mapped and this is the last block,
-		 * process one page at a time.
+		 * Should these vnode_pager_* functions be done on dir
+		 * files?
 		 */
 
-		if (bytelen == 0)
-			break;
-		win = ubc_alloc(&vp->v_uobj, oldoff, &bytelen, UBC_WRITE);
-		error = uiomove(win, bytelen, uio);
-		ubc_release(win, 0);
-		if (error) {
-			break;
-		}
 		/*
-		 * flush what we just wrote if necessary.
-		 * XXXUBC simplistic async flushing.
+		 * Copy the data from user space into the buf header.
 		 */
-		if (ioflag & IO_SYNC) {
-			
-			simple_lock(&vp->v_uobj.vmobjlock);
-			rv = vp->v_uobj.pgops->pgo_flush(
-			    &vp->v_uobj, oldoff,
-			    oldoff + bytelen, PGO_CLEANIT|PGO_SYNCIO);
-			simple_unlock(&vp->v_uobj.vmobjlock);
-		} else if (oldoff >> 16 != uio->uio_offset >> 16) {
-			simple_lock(&vp->v_uobj.vmobjlock);
-			rv = vp->v_uobj.pgops->pgo_flush(
-			    &vp->v_uobj, (oldoff >> 16) << 16,
-			    (uio->uio_offset >> 16) << 16, PGO_CLEANIT);
-			simple_unlock(&vp->v_uobj.vmobjlock);
-		}
+		error = uiomove(bp->b_data + croffset, n, uio);
+
+		/*
+		 * If they want this synchronous then write it and wait for
+		 * it.  Otherwise, if on a cluster boundary write it
+		 * asynchronously so we can move on to the next block
+		 * without delay.  Otherwise do a delayed write because we
+		 * may want to write somemore into the block later.
+		 */
+		if (ioflag & IO_SYNC)
+			(void) bwrite(bp);
+		else if (n + croffset == pmp->pm_bpcluster)
+			bawrite(bp);
+		else
+			bdwrite(bp);
+		dep->de_flag |= DE_UPDATE;
 	} while (error == 0 && uio->uio_resid > 0);
-	dep->de_flag |= DE_UPDATE;
 
 	/*
 	 * If the write failed and they want us to, truncate the file back
@@ -640,8 +666,7 @@ errexit:
 			uio->uio_offset -= resid - uio->uio_resid;
 			uio->uio_resid = resid;
 		} else {
-			detrunc(dep, dep->de_FileSize, ioflag & IO_SYNC, NOCRED,
-			    NULL);
+			detrunc(dep, dep->de_FileSize, ioflag & IO_SYNC, NOCRED, NULL);
 			if (uio->uio_resid != resid)
 				error = 0;
 		}
@@ -1481,11 +1506,11 @@ msdosfs_readdir(v)
 	while (uio->uio_resid > 0) {
 		lbn = de_cluster(pmp, offset - bias);
 		on = (offset - bias) & pmp->pm_crbomask;
-		n = MIN(pmp->pm_bpcluster - on, uio->uio_resid);
+		n = min(pmp->pm_bpcluster - on, uio->uio_resid);
 		diff = dep->de_FileSize - (offset - bias);
 		if (diff <= 0)
 			break;
-		n = MIN(n, diff);
+		n = min(n, diff);
 		if ((error = pcbmap(dep, lbn, &bn, &cn, &blsize)) != 0)
 			break;
 		error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp);
@@ -1493,7 +1518,7 @@ msdosfs_readdir(v)
 			brelse(bp);
 			return (error);
 		}
-		n = MIN(n, blsize - bp->b_resid);
+		n = min(n, blsize - bp->b_resid);
 
 		/*
 		 * Convert from dos directory entries to fs-independent
@@ -1692,6 +1717,7 @@ msdosfs_bmap(v)
 		int *a_runp;
 	} */ *ap = v;
 	struct denode *dep = VTODE(ap->a_vp);
+	struct msdosfsmount *pmp = dep->de_pmp;
 
 	if (ap->a_vpp != NULL)
 		*ap->a_vpp = dep->de_devvp;
@@ -1703,7 +1729,7 @@ msdosfs_bmap(v)
 		 */
 		*ap->a_runp = 0;
 	}
-	return (pcbmap(dep, ap->a_bn, ap->a_bnp, 0, 0));
+	return (pcbmap(dep, de_bn2cn(pmp, ap->a_bn), ap->a_bnp, 0, 0));
 }
 
 int
@@ -1876,10 +1902,7 @@ struct vnodeopv_entry_desc msdosfs_vnodeop_entries[] = {
 	{ &vop_advlock_desc, msdosfs_advlock },		/* advlock */
 	{ &vop_reallocblks_desc, msdosfs_reallocblks },	/* reallocblks */
 	{ &vop_bwrite_desc, vop_generic_bwrite },		/* bwrite */
-	{ &vop_getpages_desc, genfs_getpages },
-	{ &vop_putpages_desc, genfs_putpages },
-	{ &vop_mmap_desc, vop_generic_mmap },
-	{ NULL, NULL }
+	{ (struct vnodeop_desc *)NULL, (int (*) __P((void *)))NULL }
 };
 struct vnodeopv_desc msdosfs_vnodeop_opv_desc =
 	{ &msdosfs_vnodeop_p, msdosfs_vnodeop_entries };
diff --git a/sys/nfs/nfs.h b/sys/nfs/nfs.h
index b86819902f2..6956ce54b26 100644
--- a/sys/nfs/nfs.h
+++ b/sys/nfs/nfs.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: nfs.h,v 1.14 2001/11/27 05:27:12 art Exp $	*/
+/*	$OpenBSD: nfs.h,v 1.15 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: nfs.h,v 1.10.4.1 1996/05/27 11:23:56 fvdl Exp $	*/
 
 /*
@@ -78,18 +78,8 @@
  * Ideally, NFS_DIRBLKSIZ should be bigger, but I've seen servers with
  * broken NFS/ethernet drivers that won't work with anything bigger (Linux..)
  */
-#if 1
-/*
- * XXXUBC temp hack because of the removal of b_validend.
- * eventually we'll store NFS VDIR data in the page cache as well,
- * we'll fix this at that point.
- */
-#define	NFS_DIRBLKSIZ		PAGE_SIZE
-#define	NFS_READDIRBLKSIZ	PAGE_SIZE
-#else
-#define	NFS_DIRBLKSIZ		1024	/* Must be a multiple of DIRBLKSIZ */
+#define	NFS_DIRBLKSIZ	1024		/* Must be a multiple of DIRBLKSIZ */
 #define NFS_READDIRBLKSIZ	512	/* Size of read dir blocks. XXX */
-#endif
 
 /*
  * Oddballs
@@ -121,10 +111,10 @@
 #endif
 
 /*
- * Use the vm_page flag reserved for pager use to indicate pages
- * which have been written to the server but not yet committed.
+ * The B_INVAFTERWRITE flag should be set to whatever is required by the
+ * buffer cache code to say "Invalidate the block after it is written back".
  */
-#define	PG_NEEDCOMMIT	PG_PAGER1
+#define	B_INVAFTERWRITE	B_INVAL
 
 /*
  * The IO_METASYNC flag should be implemented for local file systems.
diff --git a/sys/nfs/nfs_bio.c b/sys/nfs/nfs_bio.c
index e1f17ed2482..fb26a59aeae 100644
--- a/sys/nfs/nfs_bio.c
+++ b/sys/nfs/nfs_bio.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: nfs_bio.c,v 1.32 2001/12/14 03:16:02 art Exp $	*/
+/*	$OpenBSD: nfs_bio.c,v 1.33 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: nfs_bio.c,v 1.25.4.2 1996/07/08 20:47:04 jtc Exp $	*/
 
 /*
@@ -50,9 +50,8 @@
 #include <sys/mount.h>
 #include <sys/kernel.h>
 #include <sys/namei.h>
-#include <sys/pool.h>
 
-#include <uvm/uvm.h>
+#include <uvm/uvm_extern.h>
 
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
@@ -71,19 +70,20 @@ struct nfsstats nfsstats;
  */
 int
 nfs_bioread(vp, uio, ioflag, cred)
-	struct vnode *vp;
-	struct uio *uio;
+	register struct vnode *vp;
+	register struct uio *uio;
 	int ioflag;
 	struct ucred *cred;
 {
-	struct nfsnode *np = VTONFS(vp);
-	int biosize;
-	struct buf *bp = NULL;
+	register struct nfsnode *np = VTONFS(vp);
+	register int biosize, diff;
+	struct buf *bp = NULL, *rabp;
 	struct vattr vattr;
 	struct proc *p;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+	daddr_t lbn, bn, rabn;
 	caddr_t baddr;
-	int got_buf = 0, error = 0, n = 0, on = 0;
+	int got_buf = 0, nra, error = 0, n = 0, on = 0, not_readin;
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_READ)
@@ -153,25 +153,87 @@ nfs_bioread(vp, uio, ioflag, cred)
 	    switch (vp->v_type) {
 	    case VREG:
 		nfsstats.biocache_reads++;
-		error = 0;
-		while (uio->uio_resid > 0) {
-			void *win;
-			vsize_t bytelen = MIN(np->n_size - uio->uio_offset,
-					      uio->uio_resid);
+		lbn = uio->uio_offset / biosize;
+		on = uio->uio_offset & (biosize - 1);
+		bn = lbn * (biosize / DEV_BSIZE);
+		not_readin = 1;
 
-			if (bytelen == 0)
-				break;
-			win = ubc_alloc(&vp->v_uobj, uio->uio_offset,
-					&bytelen, UBC_READ);
-			error = uiomove(win, bytelen, uio);
-			ubc_release(win, 0);
-			if (error) {
-				break;
+		/*
+		 * Start the read ahead(s), as required.
+		 */
+		if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
+		    for (nra = 0; nra < nmp->nm_readahead &&
+			(lbn + 1 + nra) * biosize < np->n_size; nra++) {
+			rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
+			if (!incore(vp, rabn)) {
+			    rabp = nfs_getcacheblk(vp, rabn, biosize, p);
+			    if (!rabp)
+				return (EINTR);
+			    if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) {
+				rabp->b_flags |= (B_READ | B_ASYNC);
+				if (nfs_asyncio(rabp)) {
+				    rabp->b_flags |= B_INVAL;
+				    brelse(rabp);
+				}
+			    } else
+				brelse(rabp);
 			}
+		    }
 		}
-		n = 0;
-		break;
 
+		/*
+		 * If the block is in the cache and has the required data
+		 * in a valid region, just copy it out.
+		 * Otherwise, get the block and write back/read in,
+		 * as required.
+		 */
+		if ((bp = incore(vp, bn)) &&
+		    (bp->b_flags & (B_BUSY | B_WRITEINPROG)) ==
+		    (B_BUSY | B_WRITEINPROG))
+			got_buf = 0;
+		else {
+again:
+			bp = nfs_getcacheblk(vp, bn, biosize, p);
+			if (!bp)
+				return (EINTR);
+			got_buf = 1;
+			if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
+				bp->b_flags |= B_READ;
+				not_readin = 0;
+				error = nfs_doio(bp, p);
+				if (error) {
+				    brelse(bp);
+				    return (error);
+				}
+			}
+		}
+		n = min((unsigned)(biosize - on), uio->uio_resid);
+		diff = np->n_size - uio->uio_offset;
+		if (diff < n)
+			n = diff;
+		if (not_readin && n > 0) {
+			if (on < bp->b_validoff || (on + n) > bp->b_validend) {
+				if (!got_buf) {
+				    bp = nfs_getcacheblk(vp, bn, biosize, p);
+				    if (!bp)
+					return (EINTR);
+				    got_buf = 1;
+				}
+				bp->b_flags |= B_INVAFTERWRITE;
+				if (bp->b_dirtyend > 0) {
+				    if ((bp->b_flags & B_DELWRI) == 0)
+					panic("nfsbioread");
+				    if (VOP_BWRITE(bp) == EINTR)
+					return (EINTR);
+				} else
+				    brelse(bp);
+				goto again;
+			}
+		}
+		diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
+		if (diff < n)
+			n = diff;
+		break;
 	    case VLNK:
 		nfsstats.biocache_readlinks++;
 		bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
@@ -185,7 +247,7 @@ nfs_bioread(vp, uio, ioflag, cred)
 				return (error);
 			}
 		}
-		n = MIN(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
+		n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
 		got_buf = 1;
 		on = 0;
 		break;
@@ -227,17 +289,18 @@ nfs_write(v)
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap = v;
-	int biosize;
-	struct uio *uio = ap->a_uio;
+	register int biosize;
+	register struct uio *uio = ap->a_uio;
 	struct proc *p = uio->uio_procp;
-	struct vnode *vp = ap->a_vp;
+	register struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
-	struct ucred *cred = ap->a_cred;
+	register struct ucred *cred = ap->a_cred;
 	int ioflag = ap->a_ioflag;
+	struct buf *bp;
 	struct vattr vattr;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
-	int error = 0;
-	int rv;
+	daddr_t lbn, bn;
+	int n, on, error = 0;
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_WRITE)
@@ -297,52 +360,85 @@ nfs_write(v)
 	 */
 	biosize = nmp->nm_rsize;
 	do {
-		void *win;
-		voff_t oldoff = uio->uio_offset;
-		vsize_t bytelen;
 
 		/*
-		 * XXXART - workaround for compiler bug on 68k. Wieee!
+		 * XXX make sure we aren't cached in the VM page cache
 		 */
-		 *((volatile vsize_t *)&bytelen) = uio->uio_resid;
+		uvm_vnp_uncache(vp);
 
 		nfsstats.biocache_writes++;
+		lbn = uio->uio_offset / biosize;
+		on = uio->uio_offset & (biosize-1);
+		n = min((unsigned)(biosize - on), uio->uio_resid);
+		bn = lbn * (biosize / DEV_BSIZE);
+again:
+		bp = nfs_getcacheblk(vp, bn, biosize, p);
+		if (!bp)
+			return (EINTR);
 		np->n_flag |= NMODIFIED;
-		if (np->n_size < uio->uio_offset + bytelen) {
-			np->n_size = uio->uio_offset + bytelen;
-			uvm_vnp_setsize(vp, np->n_size);
-		}
-		win = ubc_alloc(&vp->v_uobj, uio->uio_offset, &bytelen,
-				UBC_WRITE);
-		error = uiomove(win, bytelen, uio);
-		ubc_release(win, 0);
-		rv = 1;
-		if ((ioflag & IO_SYNC)) {
-			simple_lock(&vp->v_uobj.vmobjlock);
-			rv = vp->v_uobj.pgops->pgo_flush(
-			    &vp->v_uobj,
-			    oldoff & ~(nmp->nm_wsize - 1),
-			    uio->uio_offset & ~(nmp->nm_wsize - 1),
-			    PGO_CLEANIT|PGO_SYNCIO);
-			simple_unlock(&vp->v_uobj.vmobjlock);
-		} else if ((oldoff & ~(nmp->nm_wsize - 1)) !=
-		    (uio->uio_offset & ~(nmp->nm_wsize - 1))) {
-			simple_lock(&vp->v_uobj.vmobjlock);
-			rv = vp->v_uobj.pgops->pgo_flush(
-			    &vp->v_uobj,
-			    oldoff & ~(nmp->nm_wsize - 1),
-			    uio->uio_offset & ~(nmp->nm_wsize - 1),
-			    PGO_CLEANIT|PGO_WEAK);
-			simple_unlock(&vp->v_uobj.vmobjlock);
+		if (uio->uio_offset + n > np->n_size) {
+			np->n_size = uio->uio_offset + n;
+			uvm_vnp_setsize(vp, (u_long)np->n_size);
 		}
-		if (!rv) {
-			error = EIO;
+
+		/*
+		 * If the new write will leave a contiguous dirty
+		 * area, just update the b_dirtyoff and b_dirtyend,
+		 * otherwise force a write rpc of the old dirty area.
+		 */
+		if (bp->b_dirtyend > 0 &&
+		    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
+			bp->b_proc = p;
+			if (VOP_BWRITE(bp) == EINTR)
+				return (EINTR);
+			goto again;
 		}
+
+		error = uiomove((char *)bp->b_data + on, n, uio);
 		if (error) {
-			break;
+			bp->b_flags |= B_ERROR;
+			brelse(bp);
+			return (error);
 		}
-	} while (uio->uio_resid > 0);
-	return (error);
+		if (bp->b_dirtyend > 0) {
+			bp->b_dirtyoff = min(on, bp->b_dirtyoff);
+			bp->b_dirtyend = max((on + n), bp->b_dirtyend);
+		} else {
+			bp->b_dirtyoff = on;
+			bp->b_dirtyend = on + n;
+		}
+		if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
+		    bp->b_validoff > bp->b_dirtyend) {
+			bp->b_validoff = bp->b_dirtyoff;
+			bp->b_validend = bp->b_dirtyend;
+		} else {
+			bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
+			bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
+		}
+
+		/*
+		 * Since this block is being modified, it must be written
+		 * again and not just committed.
+		 */
+		bp->b_flags &= ~B_NEEDCOMMIT;
+
+		/*
+		 * If the lease is non-cachable or IO_SYNC do bwrite().
+		 */
+		if (ioflag & IO_SYNC) {
+			bp->b_proc = p;
+			error = VOP_BWRITE(bp);
+			if (error)
+				return (error);
+		} else if ((n + on) == biosize) {
+			bp->b_proc = (struct proc *)0;
+			bp->b_flags |= B_ASYNC;
+			(void)nfs_writebp(bp, 0);
+		} else {
+			bdwrite(bp);
+		}
+	} while (uio->uio_resid > 0 && n > 0);
+	return (0);
 }
 
 /*
@@ -364,9 +460,9 @@ nfs_getcacheblk(vp, bn, size, p)
 
 	if (nmp->nm_flag & NFSMNT_INT) {
 		bp = getblk(vp, bn, size, PCATCH, 0);
-		while (bp == NULL) {
-			if (nfs_sigintr(nmp, NULL, p))
-				return (NULL);
+		while (bp == (struct buf *)0) {
+			if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
+				return ((struct buf *)0);
 			bp = getblk(vp, bn, size, 0, 2 * hz);
 		}
 	} else
@@ -406,7 +502,7 @@ nfs_vinvalbuf(vp, flags, cred, p, intrflg)
 		np->n_flag |= NFLUSHWANT;
 		error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
 			slptimeo);
-		if (error && intrflg && nfs_sigintr(nmp, NULL, p))
+		if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
 			return (EINTR);
 	}
 
@@ -416,7 +512,7 @@ nfs_vinvalbuf(vp, flags, cred, p, intrflg)
 	np->n_flag |= NFLUSHINPROG;
 	error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
 	while (error) {
-		if (intrflg && nfs_sigintr(nmp, NULL, p)) {
+		if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
 			np->n_flag &= ~NFLUSHINPROG;
 			if (np->n_flag & NFLUSHWANT) {
 				np->n_flag &= ~NFLUSHWANT;
@@ -443,20 +539,41 @@ int
 nfs_asyncio(bp)
 	struct buf *bp;
 {
-	int i;
+	int i,s;
 
 	if (nfs_numasync == 0)
 		return (EIO);
-	for (i = 0; i < NFS_MAXASYNCDAEMON; i++) {
+	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
 	    if (nfs_iodwant[i]) {
+		if ((bp->b_flags & B_READ) == 0) {
+			bp->b_flags |= B_WRITEINPROG;
+		}
+	
 		TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist);
-		nfs_iodwant[i] = NULL;
+		nfs_iodwant[i] = (struct proc *)0;
 		wakeup((caddr_t)&nfs_iodwant[i]);
 		return (0);
 	    }
-	}
 
-	return (EIO);
+	/*
+	 * If it is a read or a write already marked B_WRITEINPROG or B_NOCACHE
+	 * return EIO so the process will call nfs_doio() and do it
+	 * synchronously.
+	 */
+	if (bp->b_flags & (B_READ | B_WRITEINPROG | B_NOCACHE))
+		return (EIO);
+
+	/*
+	 * Just turn the async write into a delayed write, instead of
+	 * doing in synchronously. Hopefully, at least one of the nfsiods
+	 * is currently doing a write for this file and will pick up the
+	 * delayed writes before going back to sleep.
+	 */
+	s = splbio();
+	buf_dirty(bp);
+	splx(s);
+	biodone(bp);
+	return (0);
 }
 
 /*
@@ -472,7 +589,7 @@ nfs_doio(bp, p)
 	register struct vnode *vp;
 	struct nfsnode *np;
 	struct nfsmount *nmp;
-	int error = 0, diff, len, iomode, must_commit = 0;
+	int s, error = 0, diff, len, iomode, must_commit = 0;
 	struct uio uio;
 	struct iovec io;
 
@@ -519,7 +636,9 @@ nfs_doio(bp, p)
 		uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT;
 		nfsstats.read_bios++;
 		error = nfs_readrpc(vp, uiop);
-		if (!error && uiop->uio_resid) {
+		if (!error) {
+		    bp->b_validoff = 0;
+		    if (uiop->uio_resid) {
 			/*
 			 * If len > 0, there is a hole in the file and
 			 * no writes after the hole have been pushed to
@@ -530,9 +649,13 @@ nfs_doio(bp, p)
 			len = np->n_size - ((((off_t)bp->b_blkno) << DEV_BSHIFT)
 				+ diff);
 			if (len > 0) {
-				len = MIN(len, uiop->uio_resid);
-				memset((char *)bp->b_data + diff, 0, len);
-			}
+			    len = min(len, uiop->uio_resid);
+			    bzero((char *)bp->b_data + diff, len);
+			    bp->b_validend = diff + len;
+			} else
+			    bp->b_validend = diff;
+		    } else
+			bp->b_validend = bp->b_bcount;
 		}
 		if (p && (vp->v_flag & VTEXT) &&
 		    (np->n_mtime != np->n_vattr.va_mtime.tv_sec)) {
@@ -549,19 +672,62 @@ nfs_doio(bp, p)
 	    default:
 		printf("nfs_doio:  type %x unexpected\n",vp->v_type);
 		break;
-	    }
+	    };
 	    if (error) {
 		bp->b_flags |= B_ERROR;
 		bp->b_error = error;
 	    }
 	} else {
-	    io.iov_base = bp->b_data;
-	    io.iov_len = uiop->uio_resid = bp->b_bcount;
-	    uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT;
+	    io.iov_len = uiop->uio_resid = bp->b_dirtyend
+		- bp->b_dirtyoff;
+	    uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE
+		+ bp->b_dirtyoff;
+	    io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
 	    uiop->uio_rw = UIO_WRITE;
 	    nfsstats.write_bios++;
-	    iomode = NFSV3WRITE_UNSTABLE;
+	    if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC)
+		iomode = NFSV3WRITE_UNSTABLE;
+	    else
+		iomode = NFSV3WRITE_FILESYNC;
+	    bp->b_flags |= B_WRITEINPROG;
+#ifdef fvdl_debug
+	    printf("nfs_doio(%x): bp %x doff %d dend %d\n", 
+		vp, bp, bp->b_dirtyoff, bp->b_dirtyend);
+#endif
 	    error = nfs_writerpc(vp, uiop, &iomode, &must_commit);
+	    if (!error && iomode == NFSV3WRITE_UNSTABLE)
+		bp->b_flags |= B_NEEDCOMMIT;
+	    else
+		bp->b_flags &= ~B_NEEDCOMMIT;
+	    bp->b_flags &= ~B_WRITEINPROG;
+
+	    /*
+	     * For an interrupted write, the buffer is still valid and the
+	     * write hasn't been pushed to the server yet, so we can't set
+	     * B_ERROR and report the interruption by setting B_EINTR. For
+	     * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt
+	     * is essentially a noop.
+	     * For the case of a V3 write rpc not being committed to stable
+	     * storage, the block is still dirty and requires either a commit
+	     * rpc or another write rpc with iomode == NFSV3WRITE_FILESYNC
+	     * before the block is reused. This is indicated by setting the
+	     * B_DELWRI and B_NEEDCOMMIT flags.
+	     */
+	    if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
+		    s = splbio();
+		    buf_dirty(bp);
+		    splx(s);
+
+		    if (!(bp->b_flags & B_ASYNC) && error)
+			    bp->b_flags |= B_EINTR;
+	    } else {
+		if (error) {
+		    bp->b_flags |= B_ERROR;
+		    bp->b_error = np->n_error = error;
+		    np->n_flag |= NWRITEERR;
+		}
+		bp->b_dirtyoff = bp->b_dirtyend = 0;
+	    }
 	}
 	bp->b_resid = uiop->uio_resid;
 	if (must_commit)
@@ -569,597 +735,3 @@ nfs_doio(bp, p)
 	biodone(bp);
 	return (error);
 }
-
-/*
- * Vnode op for VM getpages.
- */
-int
-nfs_getpages(v)
-	void *v;
-{
-	struct vop_getpages_args /* {
-		struct vnode *a_vp;
-		voff_t a_offset;
-		vm_page_t *a_m;
-		int *a_count;
-		int a_centeridx;
-		vm_prot_t a_access_type;
-		int a_advice;
-		int a_flags;
-	} */ *ap = v;
-
-	off_t eof, offset, origoffset, startoffset, endoffset;
-	int s, i, error, npages, orignpages, npgs, ridx, pidx, pcount;
-	vaddr_t kva;
-	struct buf *bp, *mbp;
-	struct vnode *vp = ap->a_vp;
-	struct nfsnode *np = VTONFS(vp);
-	struct uvm_object *uobj = &vp->v_uobj;
-	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
-	size_t bytes, iobytes, tailbytes, totalbytes, skipbytes;
-	int flags = ap->a_flags;
-	int bsize;
-	struct vm_page *pgs[16];			/* XXXUBC 16 */
-	boolean_t v3 = NFS_ISV3(vp);
-	boolean_t async = (flags & PGO_SYNCIO) == 0;
-	boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0;
-	struct proc *p = curproc;
-
-	UVMHIST_FUNC("nfs_getpages"); UVMHIST_CALLED(ubchist);
-	UVMHIST_LOG(ubchist, "vp %p off 0x%x count %d", vp, (int)ap->a_offset,
-		    *ap->a_count,0);
-
-#ifdef DIAGNOSTIC
-	if (ap->a_centeridx < 0 || ap->a_centeridx >= *ap->a_count) {
-		panic("nfs_getpages: centeridx %d out of range",
-		      ap->a_centeridx);
-	}
-#endif
-
-	error = 0;
-	origoffset = ap->a_offset;
-	eof = vp->v_size;
-	if (origoffset >= eof) {
-		if ((flags & PGO_LOCKED) == 0) {
-			simple_unlock(&uobj->vmobjlock);
-		}
-		UVMHIST_LOG(ubchist, "off 0x%x past EOF 0x%x",
-			    (int)origoffset, (int)eof,0,0);
-		return EINVAL;
-	}
-
-	if (flags & PGO_LOCKED) {
-		uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m,
-			      UFP_NOWAIT|UFP_NOALLOC);
-		return 0;
-	}
-
-	/* vnode is VOP_LOCKed, uobj is locked */
-	if (write && (vp->v_bioflag & VBIOONSYNCLIST) == 0) {
-		vn_syncer_add_to_worklist(vp, syncdelay);
-	}
-	bsize = nmp->nm_rsize;
-	orignpages = MIN(*ap->a_count,
-			 round_page(eof - origoffset) >> PAGE_SHIFT);
-	npages = orignpages;
-	startoffset = origoffset & ~(bsize - 1);
-	endoffset = round_page((origoffset + (npages << PAGE_SHIFT)
-				+ bsize - 1) & ~(bsize - 1));
-	endoffset = MIN(endoffset, round_page(eof));
-	ridx = (origoffset - startoffset) >> PAGE_SHIFT;
-
-	if (!async && !write) {
-		int rapages = MAX(PAGE_SIZE, nmp->nm_rsize) >> PAGE_SHIFT;
-
-		(void) VOP_GETPAGES(vp, endoffset, NULL, &rapages, 0,
-				    VM_PROT_READ, 0, 0);
-		simple_lock(&uobj->vmobjlock);
-	}
-
-	UVMHIST_LOG(ubchist, "npages %d offset 0x%x", npages,
-		    (int)origoffset, 0,0);
-	memset(pgs, 0, sizeof(pgs));
-	uvn_findpages(uobj, origoffset, &npages, &pgs[ridx], UFP_ALL);
-
-	if (flags & PGO_OVERWRITE) {
-		UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0);
-
-		/* XXXUBC for now, zero the page if we allocated it */
-		for (i = 0; i < npages; i++) {
-			struct vm_page *pg = pgs[ridx + i];
-
-			if (pg->flags & PG_FAKE) {
-				uvm_pagezero(pg);
-				pg->flags &= ~(PG_FAKE);
-			}
-		}
-		npages += ridx;
-		if (v3) {
-			simple_unlock(&uobj->vmobjlock);
-			goto uncommit;
-		}
-		goto out;
-	}
-
-	/*
-	 * if the pages are already resident, just return them.
-	 */
-
-	for (i = 0; i < npages; i++) {
-		struct vm_page *pg = pgs[ridx + i];
-
-		if ((pg->flags & PG_FAKE) != 0 ||
-		    ((ap->a_access_type & VM_PROT_WRITE) &&
-		      (pg->flags & PG_RDONLY))) {
-			break;
-		}
-	}
-	if (i == npages) {
-		UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0);
-		npages += ridx;
-		goto out;
-	}
-
-	/*
-	 * the page wasn't resident and we're not overwriting,
-	 * so we're going to have to do some i/o.
-	 * find any additional pages needed to cover the expanded range.
-	 */
-
-	if (startoffset != origoffset ||
-	    startoffset + (npages << PAGE_SHIFT) != endoffset) {
-
-		/*
-		 * XXXUBC we need to avoid deadlocks caused by locking
-		 * additional pages at lower offsets than pages we
-		 * already have locked.  for now, unlock them all and
-		 * start over.
-		 */
-
-		for (i = 0; i < npages; i++) {
-			struct vm_page *pg = pgs[ridx + i];
-
-			if (pg->flags & PG_FAKE) {
-				pg->flags |= PG_RELEASED;
-			}
-		}
-		uvm_page_unbusy(&pgs[ridx], npages);
-		memset(pgs, 0, sizeof(pgs));
-
-		UVMHIST_LOG(ubchist, "reset npages start 0x%x end 0x%x",
-			    startoffset, endoffset, 0,0);
-		npages = (endoffset - startoffset) >> PAGE_SHIFT;
-		npgs = npages;
-		uvn_findpages(uobj, startoffset, &npgs, pgs, UFP_ALL);
-	}
-	simple_unlock(&uobj->vmobjlock);
-
-	/*
-	 * update the cached read creds for this node.
-	 */
-
-	if (np->n_rcred) {
-		crfree(np->n_rcred);
-	}
-	np->n_rcred = curproc->p_ucred;
-	crhold(np->n_rcred);
-
-	/*
-	 * read the desired page(s).
-	 */
-
-	totalbytes = npages << PAGE_SHIFT;
-	bytes = MIN(totalbytes, vp->v_size - startoffset);
-	tailbytes = totalbytes - bytes;
-	skipbytes = 0;
-
-	kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WAITOK |
-			     UVMPAGER_MAPIN_READ);
-
-	s = splbio();
-	mbp = pool_get(&bufpool, PR_WAITOK);
-	splx(s);
-	mbp->b_bufsize = totalbytes;
-	mbp->b_data = (void *)kva;
-	mbp->b_resid = mbp->b_bcount = bytes;
-	mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL|B_ASYNC : 0);
-	mbp->b_iodone = uvm_aio_biodone;
-	mbp->b_vp = NULL;
-	mbp->b_proc = NULL;		/* XXXUBC */
-	LIST_INIT(&mbp->b_dep);
-	bgetvp(vp, mbp);
-
-	/*
-	 * if EOF is in the middle of the last page, zero the part past EOF.
-	 */
-
-	if (tailbytes > 0 && (pgs[bytes >> PAGE_SHIFT]->flags & PG_FAKE)) {
-		memset((char *)kva + bytes, 0, tailbytes);
-	}
-
-	/*
-	 * now loop over the pages, reading as needed.
-	 */
-
-	bp = NULL;
-	for (offset = startoffset;
-	     bytes > 0;
-	     offset += iobytes, bytes -= iobytes) {
-
-		/*
-		 * skip pages which don't need to be read.
-		 */
-
-		pidx = (offset - startoffset) >> PAGE_SHIFT;
-		UVMHIST_LOG(ubchist, "pidx %d offset 0x%x startoffset 0x%x",
-			    pidx, (int)offset, (int)startoffset,0);
-		while ((pgs[pidx]->flags & PG_FAKE) == 0) {
-			size_t b;
-
-			KASSERT((offset & (PAGE_SIZE - 1)) == 0);
-			b = MIN(PAGE_SIZE, bytes);
-			offset += b;
-			bytes -= b;
-			skipbytes += b;
-			pidx++;
-			UVMHIST_LOG(ubchist, "skipping, new offset 0x%x",
-				    (int)offset, 0,0,0);
-			if (bytes == 0) {
-				goto loopdone;
-			}
-		}
-
-		/*
-		 * see how many pages can be read with this i/o.
-		 * reduce the i/o size if necessary.
-		 */
-
-		iobytes = bytes;
-		if (offset + iobytes > round_page(offset)) {
-			pcount = 1;
-			while (pidx + pcount < npages &&
-			       pgs[pidx + pcount]->flags & PG_FAKE) {
-				pcount++;
-			}
-			iobytes = MIN(iobytes, (pcount << PAGE_SHIFT) -
-				      (offset - trunc_page(offset)));
-		}
-		iobytes = MIN(iobytes, nmp->nm_rsize);
-
-		/*
-		 * allocate a sub-buf for this piece of the i/o
-		 * (or just use mbp if there's only 1 piece),
-		 * and start it going.
-		 */
-
-		if (offset == startoffset && iobytes == bytes) {
-			bp = mbp;
-		} else {
-			s = splbio();
-			bp = pool_get(&bufpool, PR_WAITOK);
-			splx(s);
-			bp->b_data = (char *)kva + offset - startoffset;
-			bp->b_resid = bp->b_bcount = iobytes;
-			bp->b_flags = B_BUSY|B_READ|B_CALL|B_ASYNC;
-			bp->b_iodone = uvm_aio_biodone1;
-			bp->b_vp = vp;
-			bp->b_proc = NULL;	/* XXXUBC */
-			LIST_INIT(&bp->b_dep);
-		}
-		bp->b_private = mbp;
-		bp->b_lblkno = bp->b_blkno = offset >> DEV_BSHIFT;
-
-		UVMHIST_LOG(ubchist, "bp %p offset 0x%x bcount 0x%x blkno 0x%x",
-			    bp, offset, iobytes, bp->b_blkno);
-
-		VOP_STRATEGY(bp);
-	}
-
-loopdone:
-	if (skipbytes) {
-		s = splbio();
-		mbp->b_resid -= skipbytes;
-		if (mbp->b_resid == 0) {
-			biodone(mbp);
-		}
-		splx(s);
-	}
-	if (async) {
-		UVMHIST_LOG(ubchist, "returning 0 (async)",0,0,0,0);
-		return 0;
-	}
-	if (bp != NULL) {
-		error = biowait(mbp);
-	}
-	s = splbio();
-	(void) buf_cleanout(mbp);
-	pool_put(&bufpool, mbp);
-	splx(s);
-	uvm_pagermapout(kva, npages);
- 
-	if (write && v3) {
-uncommit:
- 		lockmgr(&np->n_commitlock, LK_EXCLUSIVE, NULL, p);
-		nfs_del_committed_range(vp, origoffset, npages);
-		nfs_del_tobecommitted_range(vp, origoffset, npages);
-		simple_lock(&uobj->vmobjlock);
-		for (i = 0; i < npages; i++) {
-			if (pgs[i] == NULL) {
-				continue;
-			}
-			pgs[i]->flags &= ~(PG_NEEDCOMMIT|PG_RDONLY);
-		}
-		simple_unlock(&uobj->vmobjlock);
- 		lockmgr(&np->n_commitlock, LK_RELEASE, NULL, p);
-	}
-
-	simple_lock(&uobj->vmobjlock);
-
-out:
-	if (error) {
-		uvm_lock_pageq();
-		for (i = 0; i < npages; i++) {
-			if (pgs[i] == NULL) {
-				continue;
-			}
-			UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
-				    pgs[i], pgs[i]->flags, 0,0);
-			if (pgs[i]->flags & PG_WANTED) {
-				wakeup(pgs[i]);
-			}
-			if (pgs[i]->flags & PG_RELEASED) {
-				uvm_unlock_pageq();
-				(uobj->pgops->pgo_releasepg)(pgs[i], NULL);
-				uvm_lock_pageq();
-				continue;
-			}
-			if (pgs[i]->flags & PG_FAKE) {
-				uvm_pagefree(pgs[i]);
-				continue;
-			}
-			uvm_pageactivate(pgs[i]);
-			pgs[i]->flags &= ~(PG_WANTED|PG_BUSY);
-			UVM_PAGE_OWN(pgs[i], NULL);
-		}
-		uvm_unlock_pageq();
-		simple_unlock(&uobj->vmobjlock);
-		UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0);
-		return error;
-	}
-
-	UVMHIST_LOG(ubchist, "ridx %d count %d", ridx, npages, 0,0);
-	uvm_lock_pageq();
-	for (i = 0; i < npages; i++) {
-		if (pgs[i] == NULL) {
-			continue;
-		}
-		UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
-			    pgs[i], pgs[i]->flags, 0,0);
-		if (pgs[i]->flags & PG_FAKE) {
-			UVMHIST_LOG(ubchist, "unfaking pg %p offset 0x%x",
-				    pgs[i], (int)pgs[i]->offset,0,0);
-			pgs[i]->flags &= ~(PG_FAKE);
-			pmap_clear_modify(pgs[i]);
-			pmap_clear_reference(pgs[i]);
-		}
-		if (i < ridx || i >= ridx + orignpages || async) {
-			UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x",
-				    pgs[i], (int)pgs[i]->offset,0,0);
-			if (pgs[i]->flags & PG_WANTED) {
-				wakeup(pgs[i]);
-			}
-			if (pgs[i]->flags & PG_RELEASED) {
-				uvm_unlock_pageq();
-				(uobj->pgops->pgo_releasepg)(pgs[i], NULL);
-				uvm_lock_pageq();
-				continue;
-			}
-			uvm_pageactivate(pgs[i]);
-			pgs[i]->flags &= ~(PG_WANTED|PG_BUSY);
-			UVM_PAGE_OWN(pgs[i], NULL);
-		}
-	}
-	uvm_unlock_pageq();
-	simple_unlock(&uobj->vmobjlock);
-	if (ap->a_m != NULL) {
-		memcpy(ap->a_m, &pgs[ridx],
-		       *ap->a_count * sizeof(struct vm_page *));
-	}
-	return 0;
-}
-
-/*
- * Vnode op for VM putpages.
- */
-int
-nfs_putpages(v)
-	void *v;
-{
-	struct vop_putpages_args /* {
-		struct vnode *a_vp;
-		struct vm_page **a_m;
-		int a_count;
-		int a_flags;
-		int *a_rtvals;
-	} */ *ap = v;
-
-	struct vnode *vp = ap->a_vp;
-	struct nfsnode *np = VTONFS(vp);
-	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
-	struct buf *bp, *mbp;
-	struct vm_page **pgs = ap->a_m;
-	int flags = ap->a_flags;
-	int npages = ap->a_count;
-	int s, error, i;
-	size_t bytes, iobytes, skipbytes;
-	vaddr_t kva;
-	off_t offset, origoffset, commitoff;
-	uint32_t commitbytes;
-	boolean_t v3 = NFS_ISV3(vp);
-	boolean_t async = (flags & PGO_SYNCIO) == 0;
-	boolean_t weak = (flags & PGO_WEAK) && v3;
-	struct proc *p = curproc;
-	UVMHIST_FUNC("nfs_putpages"); UVMHIST_CALLED(ubchist);
-
-	UVMHIST_LOG(ubchist, "vp %p pgp %p count %d",
-		    vp, ap->a_m, ap->a_count,0);
-
-	simple_unlock(&vp->v_uobj.vmobjlock);
-
-	error = 0;
-	origoffset = pgs[0]->offset;
-	bytes = MIN(ap->a_count << PAGE_SHIFT, vp->v_size - origoffset);
-	skipbytes = 0;
-
-	/*
-	 * if the range has been committed already, mark the pages thus.
-	 * if the range just needs to be committed, we're done
-	 * if it's a weak putpage, otherwise commit the range.
-	 */
-
-	if (v3) {
- 		lockmgr(&np->n_commitlock, LK_EXCLUSIVE, NULL, p);
-		if (nfs_in_committed_range(vp, origoffset, bytes)) {
-			goto committed;
-		}
-		if (nfs_in_tobecommitted_range(vp, origoffset, bytes)) {
-			if (weak) {
-				lockmgr(&np->n_commitlock, LK_RELEASE, NULL, p);
-				return 0;
-			} else {
-				commitoff = np->n_pushlo;
-				commitbytes = (uint32_t)(np->n_pushhi -
-							 np->n_pushlo);
-				goto commit;
-			}
-		}
-		lockmgr(&np->n_commitlock, LK_RELEASE, NULL, p);
-	}
-
-	/*
-	 * otherwise write or commit all the pages.
-	 */
-
-	kva = uvm_pagermapin(pgs, ap->a_count, UVMPAGER_MAPIN_WAITOK|
-			     UVMPAGER_MAPIN_WRITE);
-
-	s = splbio();
-	vp->v_numoutput += 2;
-	mbp = pool_get(&bufpool, PR_WAITOK);
-	UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x",
-		    vp, mbp, vp->v_numoutput, bytes);
-	splx(s);
-	mbp->b_bufsize = npages << PAGE_SHIFT;
-	mbp->b_data = (void *)kva;
-	mbp->b_resid = mbp->b_bcount = bytes;
-	mbp->b_flags = B_BUSY|B_WRITE|B_AGE |
-		(async ? B_CALL|B_ASYNC : 0) |
-		(curproc == uvm.pagedaemon_proc ? B_PDAEMON : 0);
-	mbp->b_iodone = uvm_aio_biodone;
-	mbp->b_vp = NULL;
-	mbp->b_proc = NULL;		/* XXXUBC */
-	LIST_INIT(&mbp->b_dep);
-	bgetvp(vp, mbp);
-
-	for (offset = origoffset;
-	     bytes > 0;
-	     offset += iobytes, bytes -= iobytes) {
-		iobytes = MIN(nmp->nm_wsize, bytes);
-
- 		/*
-		 * skip writing any pages which only need a commit.
-		 */
-
-		if ((pgs[(offset - origoffset) >> PAGE_SHIFT]->flags &
-		     PG_NEEDCOMMIT) != 0) {
-			KASSERT((offset & (PAGE_SIZE - 1)) == 0);
-			iobytes = MIN(PAGE_SIZE, bytes);
-			skipbytes += iobytes;
-			continue;
-		}
-
-		/* if it's really one i/o, don't make a second buf */
-		if (offset == origoffset && iobytes == bytes) {
-			bp = mbp;
-		} else {
-			s = splbio();
-			vp->v_numoutput++;
-			bp = pool_get(&bufpool, PR_WAITOK);
-			UVMHIST_LOG(ubchist, "vp %p bp %p num now %d",
-				    vp, bp, vp->v_numoutput, 0);
-			splx(s);
-			bp->b_data = (char *)kva + (offset - origoffset);
-			bp->b_resid = bp->b_bcount = iobytes;
-			bp->b_flags = B_BUSY|B_WRITE|B_CALL|B_ASYNC;
-			bp->b_iodone = uvm_aio_biodone1;
-			bp->b_vp = vp;
-			bp->b_proc = NULL;	/* XXXUBC */
-			LIST_INIT(&bp->b_dep);
-		}
-		bp->b_private = mbp;
-		bp->b_lblkno = bp->b_blkno = (daddr_t)(offset >> DEV_BSHIFT);
-		UVMHIST_LOG(ubchist, "bp %p numout %d",
-			    bp, vp->v_numoutput,0,0);
-		VOP_STRATEGY(bp);
-	}
-	if (skipbytes) {
-		UVMHIST_LOG(ubchist, "skipbytes %d", bytes, 0,0,0);
-		s = splbio();
-		mbp->b_resid -= skipbytes;
-		if (mbp->b_resid == 0) {
-			biodone(mbp);
-		}
-		splx(s);
-	}
-	if (async) {
-		return 0;
-	}
-	if (bp != NULL) {
-		error = biowait(mbp);
-	}
-
-	s = splbio();
-	if (mbp->b_vp) {
-		vwakeup(mbp->b_vp);
-	}
-	(void) buf_cleanout(mbp);
-	pool_put(&bufpool, mbp);
-	splx(s);
-
-	uvm_pagermapout(kva, ap->a_count);
-	if (error || !v3) {
-		UVMHIST_LOG(ubchist, "returning error %d", error, 0,0,0);
-		return error;
-	}
-
-	/*
-	 * for a weak put, mark the range as "to be committed"
-	 * and mark the pages read-only so that we will be notified
-	 * to remove the pages from the "to be committed" range
-	 * if they are made dirty again.
-	 * for a strong put, commit the pages and remove them from the
-	 * "to be committed" range.  also, mark them as writable
-	 * and not cleanable with just a commit.
-	 */
-
-	lockmgr(&np->n_commitlock, LK_EXCLUSIVE, NULL, p);
-	if (weak) {
-		nfs_add_tobecommitted_range(vp, origoffset,
-					    npages << PAGE_SHIFT);
-		for (i = 0; i < npages; i++) {
-			pgs[i]->flags |= PG_NEEDCOMMIT|PG_RDONLY;
-		}
-	} else {
-		commitoff = origoffset;
-		commitbytes = npages << PAGE_SHIFT;
-commit:
-		error = nfs_commit(vp, commitoff, commitbytes, curproc);
-		nfs_del_tobecommitted_range(vp, commitoff, commitbytes);
-committed:
-		for (i = 0; i < npages; i++) {
-			pgs[i]->flags &= ~(PG_NEEDCOMMIT|PG_RDONLY);
-		}
-	}
-	lockmgr(&np->n_commitlock, LK_RELEASE, NULL, p);
-	return error;
-}
diff --git a/sys/nfs/nfs_node.c b/sys/nfs/nfs_node.c
index f0cebcb4566..d88a7649524 100644
--- a/sys/nfs/nfs_node.c
+++ b/sys/nfs/nfs_node.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: nfs_node.c,v 1.18 2001/12/01 01:44:35 art Exp $	*/
+/*	$OpenBSD: nfs_node.c,v 1.19 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: nfs_node.c,v 1.16 1996/02/18 11:53:42 fvdl Exp $	*/
 
 /*
@@ -145,7 +145,6 @@ loop:
 	vp = nvp;
 	np = pool_get(&nfs_node_pool, PR_WAITOK);
 	bzero((caddr_t)np, sizeof *np);
-	lockinit(&np->n_commitlock, PINOD, "nfsclock", 0, 0);
 	vp->v_data = np;
 	np->n_vnode = vp;
 
@@ -170,19 +169,6 @@ loop:
 		np->n_fhp = &np->n_fh;
 	bcopy((caddr_t)fhp, (caddr_t)np->n_fhp, fhsize);
 	np->n_fhsize = fhsize;
-
-	/*
-	 * XXXUBC doing this while holding the nfs_hashlock is bad,
-	 * but there's no alternative at the moment.
-	 */
-	error = VOP_GETATTR(vp, &np->n_vattr, curproc->p_ucred, curproc);
-	if (error) {
-		lockmgr(&nfs_hashlock, LK_RELEASE, 0, p);
-		vrele(vp);
-		return error;
-	}
-	uvm_vnp_setsize(vp, np->n_vattr.va_size);
-
 	lockmgr(&nfs_hashlock, LK_RELEASE, 0, p);
 	*npp = np;
 	return (0);
@@ -199,12 +185,11 @@ nfs_inactive(v)
 	struct nfsnode *np;
 	struct sillyrename *sp;
 	struct proc *p = curproc;	/* XXX */
-	struct vnode *vp = ap->a_vp;
 
-	np = VTONFS(vp);
-	if (prtactive && vp->v_usecount != 0)
-		vprint("nfs_inactive: pushing active", vp);
-	if (vp->v_type != VDIR) {
+	np = VTONFS(ap->a_vp);
+	if (prtactive && ap->a_vp->v_usecount != 0)
+		vprint("nfs_inactive: pushing active", ap->a_vp);
+	if (ap->a_vp->v_type != VDIR) {
 		sp = np->n_sillyrename;
 		np->n_sillyrename = (struct sillyrename *)0;
 	} else
@@ -213,7 +198,7 @@ nfs_inactive(v)
 		/*
 		 * Remove the silly file that was rename'd earlier
 		 */
-		(void) nfs_vinvalbuf(vp, 0, sp->s_cred, p, 1);
+		(void) nfs_vinvalbuf(ap->a_vp, 0, sp->s_cred, p, 1);
 		nfs_removeit(sp);
 		crfree(sp->s_cred);
 		vrele(sp->s_dvp);
@@ -221,7 +206,7 @@ nfs_inactive(v)
 	}
 	np->n_flag &= (NMODIFIED | NFLUSHINPROG | NFLUSHWANT);
 
-	VOP_UNLOCK(vp, 0, ap->a_p);
+	VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
 	return (0);
 }
 
diff --git a/sys/nfs/nfs_serv.c b/sys/nfs/nfs_serv.c
index 9534e7221da..a66f457ceeb 100644
--- a/sys/nfs/nfs_serv.c
+++ b/sys/nfs/nfs_serv.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: nfs_serv.c,v 1.28 2001/11/27 05:27:12 art Exp $	*/
+/*	$OpenBSD: nfs_serv.c,v 1.29 2001/12/19 08:58:06 art Exp $	*/
 /*     $NetBSD: nfs_serv.c,v 1.34 1997/05/12 23:37:12 fvdl Exp $       */
 
 /*
@@ -1663,6 +1663,8 @@ nfsrv_remove(nfsd, slp, procp, mrq)
 			error = EBUSY;
 			goto out;
 		}
+		if (vp->v_flag & VTEXT)
+			uvm_vnp_uncache(vp);
 out:
 		if (!error) {
 			error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
@@ -3274,10 +3276,11 @@ nfsrv_access(vp, flags, cred, rdonly, p, override)
 			}
 		}
 		/*
-		 * If the vnode is in use as a process's text,
-		 * we can't allow writing.
+		 * If there's shared text associated with
+		 * the inode, try to free it up once.  If
+		 * we fail, we can't allow writing.
 		 */
-		if ((vp->v_flag & VTEXT))
+		if ((vp->v_flag & VTEXT) && !uvm_vnp_uncache(vp))
 			return (ETXTBSY);
 	}
 	error = VOP_ACCESS(vp, flags, cred, p);
diff --git a/sys/nfs/nfs_subs.c b/sys/nfs/nfs_subs.c
index efee3069743..38a91d45245 100644
--- a/sys/nfs/nfs_subs.c
+++ b/sys/nfs/nfs_subs.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: nfs_subs.c,v 1.37 2001/12/10 02:19:34 art Exp $	*/
+/*	$OpenBSD: nfs_subs.c,v 1.38 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: nfs_subs.c,v 1.27.4.3 1996/07/08 20:34:24 jtc Exp $	*/
 
 /*
@@ -39,40 +39,6 @@
  *	@(#)nfs_subs.c	8.8 (Berkeley) 5/22/95
  */
 
-/*
- * Copyright 2000 Wasabi Systems, Inc.
- * All rights reserved.
- *
- * Written by Frank van der Linden for Wasabi Systems, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *      This product includes software developed for the NetBSD Project by
- *      Wasabi Systems, Inc.
- * 4. The name of Wasabi Systems, Inc. may not be used to endorse
- *    or promote products derived from this software without specific prior
- *    written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
 
 /*
  * These functions support the macros and help fiddle mbuf chains for
@@ -1275,14 +1241,17 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper)
 		vap->va_filerev = 0;
 	}
 	if (vap->va_size != np->n_size) {
-		if ((np->n_flag & NMODIFIED) && vap->va_size < np->n_size) {
-			vap->va_size = np->n_size;
-		} else {
+		if (vap->va_type == VREG) {
+			if (np->n_flag & NMODIFIED) {
+				if (vap->va_size < np->n_size)
+					vap->va_size = np->n_size;
+				else
+					np->n_size = vap->va_size;
+			} else
+				np->n_size = vap->va_size;
+			uvm_vnp_setsize(vp, np->n_size);
+		} else
 			np->n_size = vap->va_size;
-			if (vap->va_type == VREG) {
-				uvm_vnp_setsize(vp, np->n_size);
-			}
-		}
 	}
 	np->n_attrstamp = time.tv_sec;
 	if (vaper != NULL) {
@@ -1772,216 +1741,26 @@ void
 nfs_clearcommit(mp)
 	struct mount *mp;
 {
-	struct vnode *vp;
-	struct vm_page *pg;
-	struct nfsnode *np;
+	register struct vnode *vp, *nvp;
+	register struct buf *bp, *nbp;
 	int s;
 
 	s = splbio();
-	LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
-		if (vp->v_type == VNON)
-			continue;
-		np = VTONFS(vp);
-		np->n_pushlo = np->n_pushhi = np->n_pushedlo =
-		    np->n_pushedhi = 0;
-		np->n_commitflags &=
-		    ~(NFS_COMMIT_PUSH_VALID | NFS_COMMIT_PUSHED_VALID);
-		simple_lock(&vp->v_uobj.vmobjlock);
-		TAILQ_FOREACH(pg, &vp->v_uobj.memq, listq) {
-			pg->flags &= ~PG_NEEDCOMMIT;
+loop:
+	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
+		if (vp->v_mount != mp)	/* Paranoia */
+			goto loop;
+		nvp = vp->v_mntvnodes.le_next;
+		for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
+			nbp = bp->b_vnbufs.le_next;
+			if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT))
+				== (B_DELWRI | B_NEEDCOMMIT))
+				bp->b_flags &= ~B_NEEDCOMMIT;
 		}
-		simple_unlock(&vp->v_uobj.vmobjlock);
 	}
 	splx(s);
 }
 
-void
-nfs_merge_commit_ranges(vp)
-	struct vnode *vp;
-{
-	struct nfsnode *np = VTONFS(vp);
-
-	if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID)) {
-		np->n_pushedlo = np->n_pushlo;
-		np->n_pushedhi = np->n_pushhi;
-		np->n_commitflags |= NFS_COMMIT_PUSHED_VALID;
-	} else {
-		if (np->n_pushlo < np->n_pushedlo)
-			np->n_pushedlo = np->n_pushlo;
-		if (np->n_pushhi > np->n_pushedhi)
-			np->n_pushedhi = np->n_pushhi;
-	}
-
-	np->n_pushlo = np->n_pushhi = 0;
-	np->n_commitflags &= ~NFS_COMMIT_PUSH_VALID;
-
-#ifdef fvdl_debug
-	printf("merge: committed: %u - %u\n", (unsigned)np->n_pushedlo,
-	    (unsigned)np->n_pushedhi);
-#endif
-}
-
-int
-nfs_in_committed_range(vp, off, len)
-	struct vnode *vp;
-	off_t off, len;
-{
-	struct nfsnode *np = VTONFS(vp);
-	off_t lo, hi;
-
-	if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID))
-		return 0;
-	lo = off;
-	hi = lo + len;
-
-	return (lo >= np->n_pushedlo && hi <= np->n_pushedhi);
-}
-
-int
-nfs_in_tobecommitted_range(vp, off, len)
-	struct vnode *vp;
-	off_t off, len;
-{
-	struct nfsnode *np = VTONFS(vp);
-	off_t lo, hi;
-
-	if (!(np->n_commitflags & NFS_COMMIT_PUSH_VALID))
-		return 0;
-	lo = off;
-	hi = lo + len;
-
-	return (lo >= np->n_pushlo && hi <= np->n_pushhi);
-}
-
-void
-nfs_add_committed_range(vp, off, len)
-	struct vnode *vp;
-	off_t off, len;
-{
-	struct nfsnode *np = VTONFS(vp);
-	off_t lo, hi;
-
-	lo = off;
-	hi = lo + len;
-
-	if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID)) {
-		np->n_pushedlo = lo;
-		np->n_pushedhi = hi;
-		np->n_commitflags |= NFS_COMMIT_PUSHED_VALID;
-	} else {
-		if (hi > np->n_pushedhi)
-			np->n_pushedhi = hi;
-		if (lo < np->n_pushedlo)
-			np->n_pushedlo = lo;
-	}
-#ifdef fvdl_debug
-	printf("add: committed: %u - %u\n", (unsigned)np->n_pushedlo,
-	    (unsigned)np->n_pushedhi);
-#endif
-}
-
-void
-nfs_del_committed_range(vp, off, len)
-	struct vnode *vp;
-	off_t off, len;
-{
-	struct nfsnode *np = VTONFS(vp);
-	off_t lo, hi;
-
-	if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID))
-		return;
-
-	lo = off;
-	hi = lo + len;
-
-	if (lo > np->n_pushedhi || hi < np->n_pushedlo)
-		return;
-	if (lo <= np->n_pushedlo)
-		np->n_pushedlo = hi;
-	else if (hi >= np->n_pushedhi)
-		np->n_pushedhi = lo;
-	else {
-		/*
-		 * XXX There's only one range. If the deleted range
-		 * is in the middle, pick the largest of the
-		 * contiguous ranges that it leaves.
-		 */
-		if ((np->n_pushedlo - lo) > (hi - np->n_pushedhi))
-			np->n_pushedhi = lo;
-		else
-			np->n_pushedlo = hi;
-	}
-#ifdef fvdl_debug
-	printf("del: committed: %u - %u\n", (unsigned)np->n_pushedlo,
-	    (unsigned)np->n_pushedhi);
-#endif
-}
-
-void
-nfs_add_tobecommitted_range(vp, off, len)
-	struct vnode *vp;
-	off_t off, len;
-{
-	struct nfsnode *np = VTONFS(vp);
-	off_t lo, hi;
-
-	lo = off;
-	hi = lo + len;
-
-	if (!(np->n_commitflags & NFS_COMMIT_PUSH_VALID)) {
-		np->n_pushlo = lo;
-		np->n_pushhi = hi;
-		np->n_commitflags |= NFS_COMMIT_PUSH_VALID;
-	} else {
-		if (lo < np->n_pushlo)
-			np->n_pushlo = lo;
-		if (hi > np->n_pushhi)
-			np->n_pushhi = hi;
-	}
-#ifdef fvdl_debug
-	printf("add: tobecommitted: %u - %u\n", (unsigned)np->n_pushlo,
-	    (unsigned)np->n_pushhi);
-#endif
-}
-
-void
-nfs_del_tobecommitted_range(vp, off, len)
-	struct vnode *vp;
-	off_t off, len;
-{
-	struct nfsnode *np = VTONFS(vp);
-	off_t lo, hi;
-
-	if (!(np->n_commitflags & NFS_COMMIT_PUSH_VALID))
-		return;
-
-	lo = off;
-	hi = lo + len;
-
-	if (lo > np->n_pushhi || hi < np->n_pushlo)
-		return;
-
-	if (lo <= np->n_pushlo)
-		np->n_pushlo = hi;
-	else if (hi >= np->n_pushhi)
-		np->n_pushhi = lo;
-	else {
-		/*
-		 * XXX There's only one range. If the deleted range
-		 * is in the middle, pick the largest of the
-		 * contiguous ranges that it leaves.
-		 */
-		if ((np->n_pushlo - lo) > (hi - np->n_pushhi))
-			np->n_pushhi = lo;
-		else
-			np->n_pushlo = hi;
-	}
-#ifdef fvdl_debug
-	printf("del: tobecommitted: %u - %u\n", (unsigned)np->n_pushlo,
-	    (unsigned)np->n_pushhi);
-#endif
-}
-
 /*
  * Map errnos to NFS error numbers. For Version 3 also filter out error
  * numbers not specified for the associated procedure.
diff --git a/sys/nfs/nfs_syscalls.c b/sys/nfs/nfs_syscalls.c
index 5a189ba344d..87c1618a4a4 100644
--- a/sys/nfs/nfs_syscalls.c
+++ b/sys/nfs/nfs_syscalls.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: nfs_syscalls.c,v 1.21 2001/11/27 05:27:12 art Exp $	*/
+/*	$OpenBSD: nfs_syscalls.c,v 1.22 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: nfs_syscalls.c,v 1.19 1996/02/18 11:53:52 fvdl Exp $	*/
 
 /*
@@ -913,9 +913,10 @@ int
 nfssvc_iod(p)
 	struct proc *p;
 {
-	struct buf *bp;
-	int i, myiod;
-	int error = 0;
+	register struct buf *bp, *nbp;
+	register int i, myiod;
+	struct vnode *vp;
+	int error = 0, s;
 
 	/*
 	 * Assign my position or return error if too many already running
@@ -943,7 +944,39 @@ nfssvc_iod(p)
 	    while ((bp = nfs_bufq.tqh_first) != NULL) {
 		/* Take one off the front of the list */
 		TAILQ_REMOVE(&nfs_bufq, bp, b_freelist);
-		(void) nfs_doio(bp, NULL);
+		if (bp->b_flags & B_READ)
+		    (void) nfs_doio(bp, NULL);
+		else do {
+		    /*
+		     * Look for a delayed write for the same vnode, so I can do 
+		     * it now. We must grab it before calling nfs_doio() to
+		     * avoid any risk of the vnode getting vclean()'d while
+		     * we are doing the write rpc.
+		     */
+		    vp = bp->b_vp;
+		    s = splbio();
+		    for (nbp = vp->v_dirtyblkhd.lh_first; nbp;
+			nbp = nbp->b_vnbufs.le_next) {
+			if ((nbp->b_flags &
+			    (B_BUSY|B_DELWRI|B_NEEDCOMMIT|B_NOCACHE))!=B_DELWRI)
+			    continue;
+			bremfree(nbp);
+			nbp->b_flags |= (B_BUSY|B_ASYNC);
+			break;
+		    }
+		    /*
+		     * For the delayed write, do the first part of nfs_bwrite()
+		     * up to, but not including nfs_strategy().
+		     */
+		    if (nbp) {
+			nbp->b_flags &= ~(B_READ|B_DONE|B_ERROR);
+			buf_undirty(bp);
+			nbp->b_vp->v_numoutput++;
+		    }
+		    splx(s);
+
+		    (void) nfs_doio(bp, NULL);
+		} while ((bp = nbp) != NULL);
 	    }
 	    if (error) {
 		PRELE(p);
diff --git a/sys/nfs/nfs_var.h b/sys/nfs/nfs_var.h
index 71985e581a8..bf2c5376815 100644
--- a/sys/nfs/nfs_var.h
+++ b/sys/nfs/nfs_var.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: nfs_var.h,v 1.16 2001/11/27 05:27:12 art Exp $	*/
+/*	$OpenBSD: nfs_var.h,v 1.17 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: nfs_var.h,v 1.3 1996/02/18 11:53:54 fvdl Exp $	*/
 
 /*
@@ -119,7 +119,7 @@ int nfs_sillyrename __P((struct vnode *, struct vnode *,
 			 struct componentname *));
 int nfs_lookitup __P((struct vnode *, char *, int, struct ucred *,
 		      struct proc *, struct nfsnode **));
-int nfs_commit __P((struct vnode *, u_quad_t, unsigned, struct proc *));
+int nfs_commit __P((struct vnode *, u_quad_t, int, struct proc *));
 int nfs_bmap __P((void *));
 int nfs_strategy __P((void *));
 int nfs_mmap __P((void *));
@@ -134,6 +134,7 @@ int nfs_vfree __P((void *));
 int nfs_truncate __P((void *));
 int nfs_update __P((void *));
 int nfs_bwrite __P((void *));
+int nfs_writebp __P((struct buf *, int));
 int nfsspec_access __P((void *));
 int nfsspec_read __P((void *));
 int nfsspec_write __P((void *));
@@ -257,16 +258,7 @@ void nfsm_srvfattr __P((struct nfsrv_descript *, struct vattr *,
 int nfsrv_fhtovp __P((fhandle_t *, int, struct vnode **, struct ucred *,
 		      struct nfssvc_sock *, struct mbuf *, int *, int));
 int netaddr_match __P((int, union nethostaddr *, struct mbuf *));
-
 void nfs_clearcommit __P((struct mount *));
-void nfs_merge_commit_ranges __P((struct vnode *));
-int nfs_in_committed_range __P((struct vnode *, off_t, off_t));
-int nfs_in_tobecommitted_range __P((struct vnode *, off_t, off_t));
-void nfs_add_committed_range __P((struct vnode *, off_t, off_t));
-void nfs_del_committed_range __P((struct vnode *, off_t, off_t));
-void nfs_add_tobecommitted_range __P((struct vnode *, off_t, off_t));
-void nfs_del_tobecommitted_range __P((struct vnode *, off_t, off_t));
-
 int nfsrv_errmap __P((struct nfsrv_descript *, int));
 void nfsrvw_sort __P((gid_t *, int));
 void nfsrv_setcred __P((struct ucred *, struct ucred *));
diff --git a/sys/nfs/nfs_vfsops.c b/sys/nfs/nfs_vfsops.c
index 069783e6bf9..4b7733156c8 100644
--- a/sys/nfs/nfs_vfsops.c
+++ b/sys/nfs/nfs_vfsops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: nfs_vfsops.c,v 1.41 2001/12/11 09:32:46 art Exp $	*/
+/*	$OpenBSD: nfs_vfsops.c,v 1.42 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: nfs_vfsops.c,v 1.46.4.1 1996/05/25 22:40:35 fvdl Exp $	*/
 
 /*
@@ -748,8 +748,6 @@ mountnfs(argp, mp, nam, pth, hst)
 	 * point.
 	 */
 	mp->mnt_stat.f_iosize = NFS_MAXDGRAMDATA;
-	mp->mnt_fs_bshift = DEV_BSHIFT;
-	mp->mnt_dev_bshift = DEV_BSHIFT;
 
 	return (0);
 bad:
@@ -858,9 +856,8 @@ loop:
 		 */
 		if (vp->v_mount != mp)
 			goto loop;
-		if (waitfor == MNT_LAZY ||
-		    (LIST_EMPTY(&vp->v_dirtyblkhd) &&
-		     vp->v_uobj.uo_npages == 0))
+		if (VOP_ISLOCKED(vp) || vp->v_dirtyblkhd.lh_first == NULL ||
+		    waitfor == MNT_LAZY)
 			continue;
 		if (vget(vp, LK_EXCLUSIVE, p))
 			goto loop;
diff --git a/sys/nfs/nfs_vnops.c b/sys/nfs/nfs_vnops.c
index 1af7a6bd1d4..44cceab8a1f 100644
--- a/sys/nfs/nfs_vnops.c
+++ b/sys/nfs/nfs_vnops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: nfs_vnops.c,v 1.44 2001/12/11 09:32:46 art Exp $	*/
+/*	$OpenBSD: nfs_vnops.c,v 1.45 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: nfs_vnops.c,v 1.62.4.1 1996/07/08 20:26:52 jtc Exp $	*/
 
 /*
@@ -126,10 +126,7 @@ struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
 	{ &vop_advlock_desc, nfs_advlock },	/* advlock */
 	{ &vop_reallocblks_desc, nfs_reallocblks },	/* reallocblks */
 	{ &vop_bwrite_desc, nfs_bwrite },
-	{ &vop_getpages_desc, nfs_getpages },		/* getpages */
-	{ &vop_putpages_desc, nfs_putpages },		/* putpages */
-	{ &vop_mmap_desc, vop_generic_mmap },
-	{ NULL, NULL }
+	{ (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
 };
 struct vnodeopv_desc nfsv2_vnodeop_opv_desc =
 	{ &nfsv2_vnodeop_p, nfsv2_vnodeop_entries };
@@ -154,7 +151,7 @@ struct vnodeopv_entry_desc spec_nfsv2nodeop_entries[] = {
 	{ &vop_ioctl_desc, spec_ioctl },	/* ioctl */
 	{ &vop_select_desc, spec_select },	/* select */
 	{ &vop_revoke_desc, spec_revoke },	/* revoke */
-	{ &vop_fsync_desc, spec_fsync },	/* fsync */
+	{ &vop_fsync_desc, nfs_fsync },		/* fsync */
 	{ &vop_remove_desc, spec_remove },	/* remove */
 	{ &vop_link_desc, spec_link },		/* link */
 	{ &vop_rename_desc, spec_rename },	/* rename */
@@ -176,8 +173,7 @@ struct vnodeopv_entry_desc spec_nfsv2nodeop_entries[] = {
 	{ &vop_advlock_desc, spec_advlock },	/* advlock */
 	{ &vop_reallocblks_desc, spec_reallocblks },	/* reallocblks */
 	{ &vop_bwrite_desc, vop_generic_bwrite },
-	{ &vop_mmap_desc, spec_mmap },
-	{ NULL, NULL }
+	{ (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
 };
 struct vnodeopv_desc spec_nfsv2nodeop_opv_desc =
 	{ &spec_nfsv2nodeop_p, spec_nfsv2nodeop_entries };
@@ -222,8 +218,7 @@ struct vnodeopv_entry_desc fifo_nfsv2nodeop_entries[] = {
 	{ &vop_advlock_desc, fifo_advlock },	/* advlock */
 	{ &vop_reallocblks_desc, fifo_reallocblks },	/* reallocblks */
 	{ &vop_bwrite_desc, vop_generic_bwrite },
-	{ &vop_mmap_desc, fifo_mmap },
-	{ NULL, NULL }
+	{ (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
 };
 struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc =
 	{ &fifo_nfsv2nodeop_p, fifo_nfsv2nodeop_entries };
@@ -378,30 +373,11 @@ nfs_open(v)
 		return (EACCES);
 	}
 
-	/*
-	 * Initialize read and write creds here, for swapfiles
-	 * and other paths that don't set the creds themselves.
-	 */
-
-	if (ap->a_mode & FREAD) {
-		if (np->n_rcred) {
-			crfree(np->n_rcred);
-		}
-		np->n_rcred = ap->a_cred;
-		crhold(np->n_rcred);
-	}
-	if (ap->a_mode & FWRITE) {
-		if (np->n_wcred) {
-			crfree(np->n_wcred);
-		}
-		np->n_wcred = ap->a_cred;
-		crhold(np->n_wcred);
-	}
-
 	if (np->n_flag & NMODIFIED) {
 		if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
 			 ap->a_p, 1)) == EINTR)
 			return (error);
+		uvm_vnp_uncache(vp);
 		np->n_attrstamp = 0;
 		if (vp->v_type == VDIR)
 			np->n_direofoffset = 0;
@@ -419,6 +395,7 @@ nfs_open(v)
 			if ((error = nfs_vinvalbuf(vp, V_SAVE,
 				 ap->a_cred, ap->a_p, 1)) == EINTR)
 				return (error);
+			uvm_vnp_uncache(vp);
 			np->n_mtime = vattr.va_mtime.tv_sec;
 		}
 	}
@@ -2534,7 +2511,7 @@ int
 nfs_commit(vp, offset, cnt, procp)
 	struct vnode *vp;
 	u_quad_t offset;
-	unsigned cnt;
+	int cnt;
 	struct proc *procp;
 {
 	caddr_t cp;
@@ -2589,7 +2566,7 @@ nfs_bmap(v)
 		daddr_t *a_bnp;
 		int *a_runp;
 	} */ *ap = v;
-	struct vnode *vp = ap->a_vp;
+	register struct vnode *vp = ap->a_vp;
 
 	if (ap->a_vpp != NULL)
 		*ap->a_vpp = vp;
@@ -2649,7 +2626,9 @@ nfs_fsync(v)
 }
 
 /*
- * Flush all the data associated with a vnode.
+ * Flush all the blocks associated with a vnode.
+ * 	Walk through the buffer pool and push any dirty pages
+ *	associated with the vnode.
  */
 int
 nfs_flush(vp, cred, waitfor, p, commit)
@@ -2659,19 +2638,154 @@ nfs_flush(vp, cred, waitfor, p, commit)
 	struct proc *p;
 	int commit;
 {
-	struct uvm_object *uobj = &vp->v_uobj;
 	struct nfsnode *np = VTONFS(vp);
-	int error;
-	int flushflags = PGO_ALLPAGES|PGO_CLEANIT|PGO_SYNCIO;
-	int rv;
+	struct buf *bp;
+	int i;
+	struct buf *nbp;
+	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+	int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
+	int passone = 1;
+	u_quad_t off = (u_quad_t)-1, endoff = 0, toff;
+#ifndef NFS_COMMITBVECSIZ
+#define NFS_COMMITBVECSIZ	20
+#endif
+	struct buf *bvec[NFS_COMMITBVECSIZ];
 
-	error = 0;
+	if (nmp->nm_flag & NFSMNT_INT)
+		slpflag = PCATCH;
+	if (!commit)
+		passone = 0;
+	/*
+	 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
+	 * server, but nas not been committed to stable storage on the server
+	 * yet. On the first pass, the byte range is worked out and the commit
+	 * rpc is done. On the second pass, nfs_writebp() is called to do the
+	 * job.
+	 */
+again:
+	bvecpos = 0;
+	if (NFS_ISV3(vp) && commit) {
+		s = splbio();
+		for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
+			nbp = bp->b_vnbufs.le_next;
+			if (bvecpos >= NFS_COMMITBVECSIZ)
+				break;
+			if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT))
+				!= (B_DELWRI | B_NEEDCOMMIT))
+				continue;
+			bremfree(bp);
+			bp->b_flags |= (B_BUSY | B_WRITEINPROG);
+			/*
+			 * A list of these buffers is kept so that the
+			 * second loop knows which buffers have actually
+			 * been committed. This is necessary, since there
+			 * may be a race between the commit rpc and new
+			 * uncommitted writes on the file.
+			 */
+			bvec[bvecpos++] = bp;
+			toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
+				bp->b_dirtyoff;
+			if (toff < off)
+				off = toff;
+			toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
+			if (toff > endoff)
+				endoff = toff;
+		}
+		splx(s);
+	}
+	if (bvecpos > 0) {
+		/*
+		 * Commit data on the server, as required.
+		 */
+		retv = nfs_commit(vp, off, (int)(endoff - off), p);
+		if (retv == NFSERR_STALEWRITEVERF)
+			nfs_clearcommit(vp->v_mount);
+		/*
+		 * Now, either mark the blocks I/O done or mark the
+		 * blocks dirty, depending on whether the commit
+		 * succeeded.
+		 */
+		for (i = 0; i < bvecpos; i++) {
+			bp = bvec[i];
+			bp->b_flags &= ~(B_NEEDCOMMIT | B_WRITEINPROG);
+			if (retv)
+			    brelse(bp);
+			else {
+			    s = splbio();
+			    buf_undirty(bp);
+			    vp->v_numoutput++;
+			    bp->b_flags |= B_ASYNC;
+			    bp->b_flags &= ~(B_READ|B_DONE|B_ERROR);
+			    bp->b_dirtyoff = bp->b_dirtyend = 0;
+			    splx(s);
+			    biodone(bp);
+			}
+		}
+	}
 
-	simple_lock(&uobj->vmobjlock);
-	rv = (uobj->pgops->pgo_flush)(uobj, 0, 0, flushflags);
-	simple_unlock(&uobj->vmobjlock);
-	if (!rv) {
-		error = EIO;
+	/*
+	 * Start/do any write(s) that are required.
+	 */
+loop:
+	s = splbio();
+	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
+		nbp = bp->b_vnbufs.le_next;
+		if (bp->b_flags & B_BUSY) {
+			if (waitfor != MNT_WAIT || passone)
+				continue;
+			bp->b_flags |= B_WANTED;
+			error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
+				"nfsfsync", slptimeo);
+			splx(s);
+			if (error) {
+			    if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
+				return (EINTR);
+			    if (slpflag == PCATCH) {
+				slpflag = 0;
+				slptimeo = 2 * hz;
+			    }
+			}
+			goto loop;
+		}
+		if ((bp->b_flags & B_DELWRI) == 0)
+			panic("nfs_fsync: not dirty");
+		if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT))
+			continue;
+		bremfree(bp);
+		if (passone || !commit)
+		    bp->b_flags |= (B_BUSY|B_ASYNC);
+		else
+		    bp->b_flags |= (B_BUSY|B_ASYNC|B_WRITEINPROG|B_NEEDCOMMIT);
+		splx(s);
+		VOP_BWRITE(bp);
+		goto loop;
+	}
+	splx(s);
+	if (passone) {
+		passone = 0;
+		goto again;
+	}
+	if (waitfor == MNT_WAIT) {
+ loop2:
+	        s = splbio();
+		error = vwaitforio(vp, slpflag, "nfs_fsync", slptimeo);
+		splx(s);
+		if (error) {
+			if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
+				return (EINTR);
+			if (slpflag == PCATCH) {
+				slpflag = 0;
+				slptimeo = 2 * hz;
+			}
+			goto loop2;
+		}
+			
+		if (vp->v_dirtyblkhd.lh_first && commit) {
+#if 0
+			vprint("nfs_fsync: dirty", vp);
+#endif
+			goto loop;
+		}
 	}
 	if (np->n_flag & NWRITEERR) {
 		error = np->n_error;
@@ -2746,7 +2860,7 @@ nfs_print(v)
 }
 
 /*
- * Just call bwrite().
+ * Just call nfs_writebp() with the force argument set to 1.
  */
 int
 nfs_bwrite(v)
@@ -2756,7 +2870,76 @@ nfs_bwrite(v)
 		struct buf *a_bp;
 	} */ *ap = v;
 
-	return (bwrite(ap->a_bp));
+	return (nfs_writebp(ap->a_bp, 1));
+}
+
+/*
+ * This is a clone of vop_generic_bwrite(), except that B_WRITEINPROG isn't set unless
+ * the force flag is one and it also handles the B_NEEDCOMMIT flag.
+ */
+int
+nfs_writebp(bp, force)
+	register struct buf *bp;
+	int force;
+{
+	register int oldflags = bp->b_flags, retv = 1;
+	register struct proc *p = curproc;	/* XXX */
+	off_t off;
+	int   s;
+
+	if(!(bp->b_flags & B_BUSY))
+		panic("bwrite: buffer is not busy???");
+
+#ifdef fvdl_debug
+	printf("nfs_writebp(%x): vp %x voff %d vend %d doff %d dend %d\n",
+	    bp, bp->b_vp, bp->b_validoff, bp->b_validend, bp->b_dirtyoff,
+	    bp->b_dirtyend);
+#endif
+	bp->b_flags &= ~(B_READ|B_DONE|B_ERROR);
+
+	s = splbio();
+	buf_undirty(bp);
+
+	if ((oldflags & B_ASYNC) && !(oldflags & B_DELWRI) && p)
+		++p->p_stats->p_ru.ru_oublock;
+
+	bp->b_vp->v_numoutput++;
+	splx(s);
+
+	/*
+	 * If B_NEEDCOMMIT is set, a commit rpc may do the trick. If not
+	 * an actual write will have to be scheduled via. VOP_STRATEGY().
+	 * If B_WRITEINPROG is already set, then push it with a write anyhow.
+	 */
+	if ((oldflags & (B_NEEDCOMMIT | B_WRITEINPROG)) == B_NEEDCOMMIT) {
+		off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff;
+		bp->b_flags |= B_WRITEINPROG;
+		retv = nfs_commit(bp->b_vp, off, bp->b_dirtyend-bp->b_dirtyoff,
+			bp->b_proc);
+		bp->b_flags &= ~B_WRITEINPROG;
+		if (!retv) {
+			bp->b_dirtyoff = bp->b_dirtyend = 0;
+			bp->b_flags &= ~B_NEEDCOMMIT;
+			biodone(bp);
+		} else if (retv == NFSERR_STALEWRITEVERF)
+			nfs_clearcommit(bp->b_vp->v_mount);
+	}
+	if (retv) {
+		if (force)
+			bp->b_flags |= B_WRITEINPROG;
+		VOP_STRATEGY(bp);
+	}
+
+	if( (oldflags & B_ASYNC) == 0) {
+		int rtval = biowait(bp);
+		if (!(oldflags & B_DELWRI) && p) {
+			++p->p_stats->p_ru.ru_oublock;
+		}
+		brelse(bp);
+		return (rtval);
+	} 
+
+	return (0);
 }
 
 /*
diff --git a/sys/nfs/nfsnode.h b/sys/nfs/nfsnode.h
index 42aaddfa637..17c02979154 100644
--- a/sys/nfs/nfsnode.h
+++ b/sys/nfs/nfsnode.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: nfsnode.h,v 1.12 2001/11/27 05:27:12 art Exp $	*/
+/*	$OpenBSD: nfsnode.h,v 1.13 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: nfsnode.h,v 1.16 1996/02/18 11:54:04 fvdl Exp $	*/
 
 /*
@@ -119,20 +119,8 @@ struct nfsnode {
 	nfsfh_t			n_fh;		/* Small File Handle */
 	struct ucred		*n_rcred;
 	struct ucred		*n_wcred;
-	off_t			n_pushedlo;     /* 1st blk in commited range */
-	off_t			n_pushedhi;     /* Last block in range */
-	off_t			n_pushlo;       /* 1st block in commit range */
-	off_t			n_pushhi;       /* Last block in range */
-	struct lock		n_commitlock;   /* Serialize commits XXX */
-	int			n_commitflags;
 };
 
-/*
- * Values for n_commitflags
- */
-#define NFS_COMMIT_PUSH_VALID		0x0001	/* push range valid */
-#define NFS_COMMIT_PUSHED_VALID		0x0002	/* pushed range valid */
-
 #define n_atim		n_un1.nf_atim
 #define n_mtim		n_un2.nf_mtim
 #define n_sillyrename	n_un3.nf_silly
@@ -211,8 +199,6 @@ int	nfs_bwrite __P((void *));
 int	nfs_vget __P((struct mount *, ino_t, struct vnode **));
 #define nfs_reallocblks \
 	((int (*) __P((void *)))eopnotsupp)
-int	nfs_getpages __P((void *));
-int	nfs_putpages __P((void *));
 
 /* other stuff */
 int	nfs_removeit __P((struct sillyrename *));
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
index bf752691c3c..ede1b021c65 100644
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: buf.h,v 1.36 2001/11/30 05:45:33 csapuntz Exp $	*/
+/*	$OpenBSD: buf.h,v 1.37 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: buf.h,v 1.25 1997/04/09 21:12:17 mycroft Exp $	*/
 
 /*
@@ -68,7 +68,6 @@ extern struct bio_ops {
 	void	(*io_deallocate) __P((struct buf *));
 	void	(*io_movedeps) __P((struct buf *, struct buf *));
 	int	(*io_countdeps) __P((struct buf *, int, int));
-	void	(*io_pageiodone) __P((struct buf *));
 } bioops;
 
 /*
@@ -97,7 +96,10 @@ struct buf {
 					/* Function to call upon completion. */
 	void	(*b_iodone) __P((struct buf *));
 	struct	vnode *b_vp;		/* Device vnode. */
-	void	*b_private;
+	int	b_dirtyoff;		/* Offset in buffer of dirty region. */
+	int	b_dirtyend;		/* Offset of end of dirty region. */
+	int	b_validoff;		/* Offset in buffer of valid region. */
+	int	b_validend;		/* Offset of end of valid region. */
  	struct	workhead b_dep;		/* List of filesystem dependencies. */
 };
 
@@ -118,6 +120,7 @@ struct buf {
  * These flags are kept in b_flags.
  */
 #define	B_AGE		0x00000001	/* Move to age queue when I/O done. */
+#define	B_NEEDCOMMIT	0x00000002	/* Needs committing to stable storage */
 #define	B_ASYNC		0x00000004	/* Start I/O, do not wait. */
 #define	B_BAD		0x00000008	/* Bad block revectoring in progress. */
 #define	B_BUSY		0x00000010	/* I/O in progress. */
@@ -141,6 +144,7 @@ struct buf {
 #define	B_UAREA		0x00400000	/* Buffer describes Uarea I/O. */
 #define	B_WANTED	0x00800000	/* Process wants this buffer. */
 #define	B_WRITE		0x00000000	/* Write buffer (pseudo flag). */
+#define	B_WRITEINPROG	0x01000000	/* Write in progress. */
 #define	B_XXX		0x02000000	/* Debugging flag. */
 #define	B_DEFERRED	0x04000000	/* Skipped over for cleaning */
 #define	B_SCANNED	0x08000000	/* Block already pushed during sync */
@@ -199,6 +203,8 @@ void	biodone __P((struct buf *));
 int	biowait __P((struct buf *));
 int	bread __P((struct vnode *, daddr_t, int,
 		   struct ucred *, struct buf **));
+int	breada __P((struct vnode *, daddr_t, int, daddr_t, int,
+		    struct ucred *, struct buf **));
 int	breadn __P((struct vnode *, daddr_t, int, daddr_t *, int *, int,
 		    struct ucred *, struct buf **));
 void	brelse __P((struct buf *));
@@ -266,8 +272,6 @@ int	cluster_read __P((struct vnode *, struct cluster_info *,
 	    u_quad_t, daddr_t, long, struct ucred *, struct buf **));
 void	cluster_write __P((struct buf *, struct cluster_info *, u_quad_t));
 
-int buf_cleanout(struct buf *bp);
-
 __END_DECLS
 #endif
 #endif /* !_SYS_BUF_H_ */
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
index 50f59e4a532..6709ef88a7b 100644
--- a/sys/sys/mount.h
+++ b/sys/sys/mount.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: mount.h,v 1.41 2001/11/27 05:27:12 art Exp $	*/
+/*	$OpenBSD: mount.h,v 1.42 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: mount.h,v 1.48 1996/02/18 11:55:47 fvdl Exp $	*/
 
 /*
@@ -336,8 +336,6 @@ struct mount {
 	struct lock     mnt_lock;               /* mount structure lock */
 	int		mnt_flag;		/* flags */
 	int		mnt_maxsymlinklen;	/* max size of short symlink */
-	int		mnt_fs_bshift;		/* offset shift for lblkno */
-	int		mnt_dev_bshift;		/* shift for device sectors */
 	struct statfs	mnt_stat;		/* cache of filesystem stats */
 	qaddr_t		mnt_data;		/* private data */
 };
diff --git a/sys/sys/param.h b/sys/sys/param.h
index 998594a680a..0dbb101ed1b 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: param.h,v 1.44 2001/12/10 03:03:10 art Exp $	*/
+/*	$OpenBSD: param.h,v 1.45 2001/12/19 08:58:07 art Exp $	*/
 /*	$NetBSD: param.h,v 1.23 1996/03/17 01:02:29 thorpej Exp $	*/
 
 /*-
@@ -227,17 +227,3 @@
 #define RFCNAMEG	(1<<10) /* UNIMPL zero plan9 `name space' */
 #define RFCENVG		(1<<11) /* UNIMPL zero plan9 `env space' */
 #define RFCFDG		(1<<12)	/* zero fd table */
-
-#ifdef _KERNEL
-/*
- * Defaults for Unified Buffer Cache parameters.
- * May be overridden in  <machine/param.h>
- */
-
-#ifndef UBC_WINSHIFT
-#define UBC_WINSHIFT 13
-#endif
-#ifndef UBC_NWINS
-#define UBC_NWINS 1024
-#endif
-#endif /* _KERNEL */
diff --git a/sys/sys/specdev.h b/sys/sys/specdev.h
index 51fb9564c51..bdd2008545f 100644
--- a/sys/sys/specdev.h
+++ b/sys/sys/specdev.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: specdev.h,v 1.11 2001/12/04 22:44:32 art Exp $	*/
+/*	$OpenBSD: specdev.h,v 1.12 2001/12/19 08:58:06 art Exp $	*/
 /*	$NetBSD: specdev.h,v 1.12 1996/02/13 13:13:01 mycroft Exp $	*/
 
 /*
@@ -121,4 +121,3 @@ int	spec_advlock	__P((void *));
 #define	spec_reallocblks spec_badop
 #define	spec_bwrite	vop_generic_bwrite
 #define spec_revoke     vop_generic_revoke
-#define	spec_mmap	spec_badop
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index e97fa77b696..051ddaee942 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vnode.h,v 1.46 2001/12/10 04:45:31 art Exp $	*/
+/*	$OpenBSD: vnode.h,v 1.47 2001/12/19 08:58:07 art Exp $	*/
 /*	$NetBSD: vnode.h,v 1.38 1996/02/29 20:59:05 cgd Exp $	*/
 
 /*
@@ -45,6 +45,7 @@
 #include <uvm/uvm_pglist.h>	/* XXX */
 #include <sys/lock.h>		/* XXX */
 #include <uvm/uvm.h>		/* XXX */
+#include <uvm/uvm_vnode.h>	/* XXX */
 
 /*
  * The vnode is the focus of all file activity in UNIX.  There is a
@@ -86,14 +87,11 @@ LIST_HEAD(buflists, buf);
  */
 
 struct vnode {
-	struct uvm_object v_uobj;		/* the VM object */
-#define v_usecount v_uobj.uo_refs
-#define v_interlock v_uobj.vmobjlock
-	voff_t	v_size;
-	int	v_flag;
-	int	v_numoutput;
+	struct uvm_vnode v_uvm;			/* uvm data */
 	int	(**v_op) __P((void *));		/* vnode operations vector */
 	enum	vtype v_type;			/* vnode type */
+	u_int	v_flag;				/* vnode flags (see below) */
+	u_int   v_usecount;			/* reference count of users */
 	/* reference count of writers */
 	u_int   v_writecount;			
 	/* Flags that can be read/written in interrupts */
@@ -105,6 +103,7 @@ struct vnode {
 	LIST_ENTRY(vnode) v_mntvnodes;		/* vnodes for mount point */
 	struct	buflists v_cleanblkhd;		/* clean blocklist head */
 	struct	buflists v_dirtyblkhd;		/* dirty blocklist head */
+	u_int   v_numoutput;			/* num of writes in progress */
 	LIST_ENTRY(vnode) v_synclist;		/* vnode with dirty buffers */
 	union {
 		struct mount	*vu_mountedhere;/* ptr to mounted vfs (VDIR) */
@@ -113,6 +112,7 @@ struct vnode {
 		struct fifoinfo	*vu_fifoinfo;	/* fifo (VFIFO) */
 	} v_un;
 
+	struct  simplelock v_interlock;		/* lock on usecount and flag */
 	struct  lock *v_vnlock;			/* used for non-locking fs's */
 	enum	vtagtype v_tag;			/* type of underlying data */
 	void 	*v_data;			/* private data for fs */
@@ -137,9 +137,6 @@ struct vnode {
 #define	VXWANT		0x0200	/* process is waiting for vnode */
 #define	VALIASED	0x0800	/* vnode has an alias */
 #define VLOCKSWORK	0x4000	/* FS supports locking discipline */
-#define	VDIRTY		0x8000	/* vnode possibly has dirty pages */
-
-#define VSIZENOTSET	((voff_t)-1)
 
 /*
  * (v_bioflag) Flags that may be manipulated by interrupt handlers
@@ -252,9 +249,6 @@ vref(vp)
 }
 #endif /* DIAGNOSTIC */
 
-void vhold __P((struct vnode *));
-void vholdrele __P((struct vnode *));
-
 #define	NULLVP	((struct vnode *)NULL)
 
 /*
@@ -451,7 +445,6 @@ int	vop_generic_lock __P((void *));
 int	vop_generic_unlock __P((void *));
 int	vop_generic_revoke __P((void *));
 int	vop_generic_kqfilter __P((void *));
-int	vop_generic_mmap __P((void *));
 
 int	vn_stat __P((struct vnode *vp, struct stat *sb, struct proc *p));
 int	vn_statfile __P((struct file *fp, struct stat *sb, struct proc *p));
diff --git a/sys/sys/vnode_if.h b/sys/sys/vnode_if.h
index d64945fa5a4..00cdadabe25 100644
--- a/sys/sys/vnode_if.h
+++ b/sys/sys/vnode_if.h
@@ -3,7 +3,7 @@
  * (Modifications made here may easily be lost!)
  *
  * Created from the file:
- *	OpenBSD: vnode_if.src,v 1.17 2001/12/10 04:45:31 art Exp 
+ *	OpenBSD: vnode_if.src,v 1.11 2001/06/23 02:21:05 csapuntz Exp 
  * by the script:
  *	OpenBSD: vnode_if.sh,v 1.8 2001/02/26 17:34:18 art Exp 
  */
@@ -397,42 +397,6 @@ struct vop_whiteout_args {
 extern struct vnodeop_desc vop_whiteout_desc;
 int VOP_WHITEOUT __P((struct vnode *, struct componentname *, int));
 
-struct vop_getpages_args {
-	struct vnodeop_desc *a_desc;
-	struct vnode *a_vp;
-	voff_t a_offset;
-	struct vm_page **a_m;
-	int *a_count;
-	int a_centeridx;
-	vm_prot_t a_access_type;
-	int a_advice;
-	int a_flags;
-};
-extern struct vnodeop_desc vop_getpages_desc;
-int VOP_GETPAGES __P((struct vnode *, voff_t, struct vm_page **, int *, int, 
-    vm_prot_t, int, int));
-
-struct vop_putpages_args {
-	struct vnodeop_desc *a_desc;
-	struct vnode *a_vp;
-	struct vm_page **a_m;
-	int a_count;
-	int a_flags;
-	int *a_rtvals;
-};
-extern struct vnodeop_desc vop_putpages_desc;
-int VOP_PUTPAGES __P((struct vnode *, struct vm_page **, int, int, int *));
-
-struct vop_mmap_args {
-	struct vnodeop_desc *a_desc;
-	struct vnode *a_vp;
-	int a_fflags;
-	struct ucred *a_cred;
-	struct proc *a_p;
-};
-extern struct vnodeop_desc vop_mmap_desc;
-int VOP_MMAP __P((struct vnode *, int, struct ucred *, struct proc *));
-
 /* Special cases: */
 #include <sys/buf.h>
 
diff --git a/sys/ufs/ext2fs/ext2fs_balloc.c b/sys/ufs/ext2fs/ext2fs_balloc.c
index 390f02dc13f..eb2d7a6f414 100644
--- a/sys/ufs/ext2fs/ext2fs_balloc.c
+++ b/sys/ufs/ext2fs/ext2fs_balloc.c
@@ -1,4 +1,5 @@
-/*	$NetBSD: ext2fs_balloc.c,v 1.8 2000/12/10 06:38:31 chs Exp $	*/
+/*	$OpenBSD: ext2fs_balloc.c,v 1.11 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: ext2fs_balloc.c,v 1.10 2001/07/04 21:16:01 chs Exp $	*/
 
 /*
  * Copyright (c) 1997 Manuel Bouyer.
@@ -43,9 +44,8 @@
 #include <sys/proc.h>
 #include <sys/file.h>
 #include <sys/vnode.h>
-#include <sys/mount.h>
 
-#include <uvm/uvm.h>
+#include <uvm/uvm_extern.h>
 
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
@@ -73,13 +73,8 @@ ext2fs_buf_alloc(struct inode *ip, daddr_t bn, int size, struct ucred *cred,
 	u_int deallocated;
 	ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
 	int unwindidx = -1;
-	UVMHIST_FUNC("ext2fs_buf_alloc"); UVMHIST_CALLED(ubchist);
 
-	UVMHIST_LOG(ubchist, "bn 0x%x", bn,0,0,0);
-
-	if (bpp != NULL) {
-		*bpp = NULL;
-	}
+	*bpp = NULL;
 	if (bn < 0)
 		return (EFBIG);
 	fs = ip->i_e2fs;
@@ -91,29 +86,20 @@ ext2fs_buf_alloc(struct inode *ip, daddr_t bn, int size, struct ucred *cred,
 	if (bn < NDADDR) {
 		nb = fs2h32(ip->i_e2fs_blocks[bn]);
 		if (nb != 0) {
-
-			/*
-			 * the block is already allocated, just read it.
-			 */
-
-			if (bpp != NULL) {
-				error = bread(vp, bn, fs->e2fs_bsize, NOCRED,
-					      &bp);
-				if (error) {
-					brelse(bp);
-					return (error);
-				}
-				*bpp = bp;
+			error = bread(vp, bn, fs->e2fs_bsize, NOCRED, &bp);
+			if (error) {
+				brelse(bp);
+				return (error);
 			}
+			*bpp = bp;
 			return (0);
 		}
 
 		/*
 		 * allocate a new direct block.
 		 */
-
 		error = ext2fs_alloc(ip, bn,
-		    ext2fs_blkpref(ip, bn, bn, &ip->i_e2fs_blocks[0]),
+		    ext2fs_blkpref(ip, bn, (int)bn, &ip->i_e2fs_blocks[0]),
 		    cred, &newb);
 		if (error)
 			return (error);
@@ -121,13 +107,11 @@ ext2fs_buf_alloc(struct inode *ip, daddr_t bn, int size, struct ucred *cred,
 		ip->i_e2fs_last_blk = newb;
 		ip->i_e2fs_blocks[bn] = h2fs32(newb);
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
-		if (bpp != NULL) {
-			bp = getblk(vp, bn, fs->e2fs_bsize, 0, 0);
-			bp->b_blkno = fsbtodb(fs, newb);
-			if (flags & B_CLRBUF)
-				clrbuf(bp);
-			*bpp = bp;
-		}
+		bp = getblk(vp, bn, fs->e2fs_bsize, 0, 0);
+		bp->b_blkno = fsbtodb(fs, newb);
+		if (flags & B_CLRBUF)
+			clrbuf(bp);
+		*bpp = bp;
 		return (0);
 	}
 	/*
@@ -245,30 +229,26 @@ ext2fs_buf_alloc(struct inode *ip, daddr_t bn, int size, struct ucred *cred,
 		} else {
 			bdwrite(bp);
 		}
-		if (bpp != NULL) {
-			nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0);
-			nbp->b_blkno = fsbtodb(fs, nb);
-			if (flags & B_CLRBUF)
-				clrbuf(nbp);
-			*bpp = nbp;
-		}
+		nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0);
+		nbp->b_blkno = fsbtodb(fs, nb);
+		if (flags & B_CLRBUF)
+			clrbuf(nbp);
+		*bpp = nbp;
 		return (0);
 	}
 	brelse(bp);
-	if (bpp != NULL) {
-		if (flags & B_CLRBUF) {
-			error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED,
-				      &nbp);
-			if (error) {
-				brelse(nbp);
-				goto fail;
-			}
-		} else {
-			nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0);
-			nbp->b_blkno = fsbtodb(fs, nb);
+	if (flags & B_CLRBUF) {
+		error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED, &nbp);
+		if (error) {
+			brelse(nbp);
+			goto fail;
 		}
-		*bpp = nbp;
+	} else {
+		nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0);
+		nbp->b_blkno = fsbtodb(fs, nb);
 	}
+
+	*bpp = nbp;
 	return (0);
 fail:
 	/*
@@ -312,142 +292,3 @@ fail:
 	}
 	return error;
 }
-
-int
-ext2fs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags,
-    struct ucred *cred)
-{
-	struct inode *ip = VTOI(vp);
-	struct m_ext2fs *fs = ip->i_e2fs;
-	int error, delta, bshift, bsize;
-	UVMHIST_FUNC("ext2fs_gop_alloc"); UVMHIST_CALLED(ubchist);
-
-	bshift = fs->e2fs_bshift;
-	bsize = 1 << bshift;
-
-	delta = off & (bsize - 1);
-	off -= delta;
-	len += delta;
-
-	while (len > 0) {
-		bsize = min(bsize, len);
-		UVMHIST_LOG(ubchist, "off 0x%x len 0x%x bsize 0x%x",
-			    off, len, bsize, 0);
-
-		error = ext2fs_buf_alloc(ip, lblkno(fs, off), bsize, cred,
-		    NULL, flags);
-		if (error) {
-			UVMHIST_LOG(ubchist, "error %d", error, 0,0,0);
-			return error;
-		}
-
-		/*
-		 * increase file size now, VOP_BALLOC() requires that
-		 * EOF be up-to-date before each call.
-		 */
-
-		if (ip->i_e2fs_size < off + bsize) {
-			UVMHIST_LOG(ubchist, "old 0x%x new 0x%x",
-				    ip->i_e2fs_size, off + bsize,0,0);
-			ip->i_e2fs_size = off + bsize;
-			if (vp->v_size < ip->i_e2fs_size) {
-				uvm_vnp_setsize(vp, ip->i_e2fs_size);
-			}
-		}
-
-		off += bsize;
-		len -= bsize;
-	}
-	return 0;
-}
-
-/*
- * allocate a range of blocks in a file.
- * after this function returns, any page entirely contained within the range
- * will map to invalid data and thus must be overwritten before it is made
- * accessible to others.
- */
-
-int
-ext2fs_balloc_range(vp, off, len, cred, flags)
-	struct vnode *vp;
-	off_t off, len;
-	struct ucred *cred;
-	int flags;
-{
-	off_t oldeof, eof, pagestart;
-	struct uvm_object *uobj;
-	struct genfs_node *gp = VTOG(vp);
-	int i, delta, error, npages;
-	int bshift = vp->v_mount->mnt_fs_bshift;
-	int bsize = 1 << bshift;
-	int ppb = max(bsize >> PAGE_SHIFT, 1);
-	struct vm_page *pgs[ppb];
-	UVMHIST_FUNC("ext2fs_balloc_range"); UVMHIST_CALLED(ubchist);
-	UVMHIST_LOG(ubchist, "vp %p off 0x%x len 0x%x u_size 0x%x",
-		    vp, off, len, vp->v_size);
-
-	error = 0;
-	uobj = &vp->v_uobj;
-	oldeof = vp->v_size;
-	eof = max(oldeof, off + len);
-	UVMHIST_LOG(ubchist, "new eof 0x%x", eof,0,0,0);
-	pgs[0] = NULL;
-
-	/*
-	 * cache the new range of the file.  this will create zeroed pages
-	 * where the new block will be and keep them locked until the
-	 * new block is allocated, so there will be no window where
-	 * the old contents of the new block is visible to racing threads.
-	 */
-
-	pagestart = trunc_page(off) & ~(bsize - 1);
-	npages = min(ppb, (round_page(eof) - pagestart) >> PAGE_SHIFT);
-	memset(pgs, 0, npages);
-	simple_lock(&uobj->vmobjlock);
-	error = VOP_GETPAGES(vp, pagestart, pgs, &npages, 0,
-	    VM_PROT_READ, 0, PGO_SYNCIO | PGO_PASTEOF);
-	if (error) {
-		UVMHIST_LOG(ubchist, "getpages %d", error,0,0,0);
-		goto errout;
-	}
-	for (i = 0; i < npages; i++) {
-		UVMHIST_LOG(ubchist, "got pgs[%d] %p", i, pgs[i],0,0);
-		KASSERT((pgs[i]->flags & PG_RELEASED) == 0);
-		pgs[i]->flags &= ~PG_CLEAN;
-		uvm_pageactivate(pgs[i]);
-	}
-
-	/*
-	 * adjust off to be block-aligned.
-	 */
-
-	delta = off & (bsize - 1);
-	off -= delta;
-	len += delta;
-
-	/*
-	 * now allocate the range.
-	 */
-
-	lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL, curproc);
-	error = GOP_ALLOC(vp, off, len, flags, cred);
-	UVMHIST_LOG(ubchist, "alloc %d", error,0,0,0);
-	lockmgr(&gp->g_glock, LK_RELEASE, NULL, curproc);
-
-	/*
-	 * unbusy any pages we are holding.
-	 */
-
-errout:
-	simple_lock(&uobj->vmobjlock);
-	if (error) {
-		(void) (uobj->pgops->pgo_flush)(uobj, oldeof, pagestart + ppb,
-		    PGO_FREE);
-	}
-	if (pgs[0] != NULL) {
-		uvm_page_unbusy(pgs, npages);
-	}
-	simple_unlock(&uobj->vmobjlock);
-	return (error);
-}
diff --git a/sys/ufs/ext2fs/ext2fs_extern.h b/sys/ufs/ext2fs/ext2fs_extern.h
index 5f5b2c3a47b..5063d34427a 100644
--- a/sys/ufs/ext2fs/ext2fs_extern.h
+++ b/sys/ufs/ext2fs/ext2fs_extern.h
@@ -1,5 +1,5 @@
-/*	$OpenBSD: ext2fs_extern.h,v 1.12 2001/12/10 04:45:31 art Exp $	*/
-/*	$NetBSD: ext2fs_extern.h,v 1.9 2000/11/27 08:39:53 chs Exp $	*/
+/*	$OpenBSD: ext2fs_extern.h,v 1.13 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: ext2fs_extern.h,v 1.1 1997/06/11 09:33:55 bouyer Exp $	*/
 
 /*-
  * Copyright (c) 1997 Manuel Bouyer.
@@ -74,9 +74,6 @@ int ext2fs_inode_free(struct inode *pip, ino_t ino, int mode);
 /* ext2fs_balloc.c */
 int ext2fs_buf_alloc(struct inode *, daddr_t, int, struct ucred *,
 			struct buf **, int);
-int ext2fs_gop_alloc __P((struct vnode *, off_t, off_t, int, struct ucred *));
-int ext2fs_balloc_range __P((struct vnode *, off_t, off_t, struct ucred *,
-			     int));
 
 /* ext2fs_bmap.c */
 int ext2fs_bmap __P((void *));
diff --git a/sys/ufs/ext2fs/ext2fs_inode.c b/sys/ufs/ext2fs/ext2fs_inode.c
index f77c99c47b5..0e2a975e333 100644
--- a/sys/ufs/ext2fs/ext2fs_inode.c
+++ b/sys/ufs/ext2fs/ext2fs_inode.c
@@ -1,4 +1,5 @@
-/*	$NetBSD: ext2fs_inode.c,v 1.23 2001/02/18 20:17:04 chs Exp $	*/
+/*	$OpenBSD: ext2fs_inode.c,v 1.19 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: ext2fs_inode.c,v 1.24 2001/06/19 12:59:18 wiz Exp $	*/
 
 /*
  * Copyright (c) 1997 Manuel Bouyer.
@@ -58,10 +59,8 @@
 #include <ufs/ext2fs/ext2fs.h>
 #include <ufs/ext2fs/ext2fs_extern.h>
 
-extern int prtactive;
-
 static int ext2fs_indirtrunc __P((struct inode *, ufs_daddr_t, ufs_daddr_t,
-				  ufs_daddr_t, int, long *));
+				ufs_daddr_t, int, long *));
 
 /*
  * Last reference to an inode.  If necessary, write or delete it.
@@ -79,6 +78,7 @@ ext2fs_inactive(v)
 	struct proc *p = ap->a_p;
 	struct timespec ts;
 	int error = 0;
+	extern int prtactive;
 	
 	if (prtactive && vp->v_usecount != 0)
 		vprint("ext2fs_inactive: pushing active", vp);
@@ -171,13 +171,14 @@ ext2fs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred)
 {
 	struct vnode *ovp = ITOV(oip);
 	ufs_daddr_t lastblock;
-	ufs_daddr_t bn, lastiblock[NIADDR], indir_lbn[NIADDR];
+	ufs_daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR];
 	ufs_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
 	struct m_ext2fs *fs;
+	struct buf *bp;
 	int offset, size, level;
 	long count, nblocks, vflags, blocksreleased = 0;
 	int i;
-	int error, allerror;
+	int aflags, error, allerror;
 	off_t osize;
 
 	if (length < 0)
@@ -218,8 +219,22 @@ ext2fs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred)
 		if (length > fs->fs_maxfilesize)
 			return (EFBIG);
 #endif
-		ext2fs_balloc_range(ovp, length - 1, 1, cred,
-		    flags & IO_SYNC ? B_SYNC : 0);
+		offset = blkoff(fs, length - 1);
+		lbn = lblkno(fs, length - 1);
+		aflags = B_CLRBUF;
+		if (flags & IO_SYNC)
+			aflags |= B_SYNC;
+		error = ext2fs_buf_alloc(oip, lbn, offset + 1, cred, &bp,
+		    aflags);
+		if (error)
+			return (error);
+		oip->i_e2fs_size = length;
+		uvm_vnp_setsize(ovp, length);
+		uvm_vnp_uncache(ovp);
+		if (aflags & B_SYNC)
+			bwrite(bp);
+		else
+			bawrite(bp);
 		oip->i_flag |= IN_CHANGE | IN_UPDATE;
 		return (ext2fs_update(oip, NULL, NULL, 1));
 	}
@@ -231,15 +246,28 @@ ext2fs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred)
 	 * of subsequent file growth.
 	 */
 	offset = blkoff(fs, length);
-	if (offset != 0) {
+	if (offset == 0) {
+		oip->i_e2fs_size = length;
+	} else {
+		lbn = lblkno(fs, length);
+		aflags = B_CLRBUF;
+		if (flags & IO_SYNC)
+			aflags |= B_SYNC;
+		error = ext2fs_buf_alloc(oip, lbn, offset, cred, &bp, 
+		    aflags);
+		if (error)
+			return (error);
+		oip->i_e2fs_size = length;
 		size = fs->e2fs_bsize;
-
-		/* XXXUBC we should handle more than just VREG */
-		uvm_vnp_zerorange(ovp, length, size - offset);
+		uvm_vnp_setsize(ovp, length);
+		uvm_vnp_uncache(ovp);
+		bzero((char *)bp->b_data + offset, (u_int)(size - offset));
+		allocbuf(bp, size);
+		if (aflags & B_SYNC)
+			bwrite(bp);
+		else
+			bawrite(bp);
 	}
-	oip->i_e2fs_size = length;
-	uvm_vnp_setsize(ovp, length);
-
 	/*
 	 * Calculate index into inode's block list of
 	 * last direct and indirect blocks (if any)
diff --git a/sys/ufs/ext2fs/ext2fs_readwrite.c b/sys/ufs/ext2fs/ext2fs_readwrite.c
index 03768d06b42..9ae4322756f 100644
--- a/sys/ufs/ext2fs/ext2fs_readwrite.c
+++ b/sys/ufs/ext2fs/ext2fs_readwrite.c
@@ -79,8 +79,6 @@ ext2fs_read(v)
 	struct uio *uio;
 	struct m_ext2fs *fs;
 	struct buf *bp;
-	void *win;
-	vsize_t bytelen;
 	ufs_daddr_t lbn, nextlbn;
 	off_t bytesinfile;
 	long size, xfersize, blkoffset;
@@ -109,27 +107,6 @@ ext2fs_read(v)
 	if (uio->uio_resid == 0)
 		return (0);
 
-	if (vp->v_type == VREG) {
-		error = 0;
-		while (uio->uio_resid > 0) {
-
-			bytelen = MIN(ip->i_e2fs_size - uio->uio_offset,
-			    uio->uio_resid);
-
-			if (bytelen == 0) {
-				break;
-			}
-			win = ubc_alloc(&vp->v_uobj, uio->uio_offset,
-					&bytelen, UBC_READ);
-			error = uiomove(win, bytelen, uio);
-			ubc_release(win, 0);
-			if (error) {
-				break;
-			}
-		}
-		goto out;
-	}
-
 	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
 		if ((bytesinfile = ip->i_e2fs_size - uio->uio_offset) <= 0)
 			break;
@@ -179,11 +156,8 @@ ext2fs_read(v)
 	if (bp != NULL)
 		brelse(bp);
 
-out:
 	if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) {
 		ip->i_flag |= IN_ACCESS;
-		if ((ap->a_ioflag & IO_SYNC) == IO_SYNC)
-			error = ext2fs_update(ip, NULL, NULL, 1);
 	}
 	return (error);
 }
@@ -209,17 +183,12 @@ ext2fs_write(v)
 	struct proc *p;
 	ufs_daddr_t lbn;
 	off_t osize;
-	int blkoffset, error, flags, ioflag, resid, xfersize;
-	vsize_t bytelen;
-	void *win;
-	off_t oldoff;
-	boolean_t rv;
+	int blkoffset, error, flags, ioflag, resid, size, xfersize;
 
 	ioflag = ap->a_ioflag;
 	uio = ap->a_uio;
 	vp = ap->a_vp;
 	ip = VTOI(vp);
-	error = 0;
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_WRITE)
@@ -263,65 +232,35 @@ ext2fs_write(v)
 
 	resid = uio->uio_resid;
 	osize = ip->i_e2fs_size;
-
-	if (vp->v_type == VREG) {
-		while (uio->uio_resid > 0) {
-			oldoff = uio->uio_offset;
-			blkoffset = blkoff(fs, uio->uio_offset);
-			bytelen = MIN(fs->e2fs_bsize - blkoffset,
-			    uio->uio_resid);
-
-			/*
-			 * XXXUBC if file is mapped and this is the last block,
-			 * process one page at a time.
-			 */
-
-			error = ext2fs_balloc_range(vp, uio->uio_offset,
-			    bytelen, ap->a_cred, 0);
-			if (error) {
-				break;
-			}
-			win = ubc_alloc(&vp->v_uobj, uio->uio_offset,
-			    &bytelen, UBC_WRITE);
-			error = uiomove(win, bytelen, uio);
-			ubc_release(win, 0);
-			if (error) {
-				break;
-			}
-
-			/*
-			 * flush what we just wrote if necessary.
-			 * XXXUBC simplistic async flushing.
-			 */
-
-			if (oldoff >> 16 != uio->uio_offset >> 16) {
-				simple_lock(&vp->v_uobj.vmobjlock);
-				rv = vp->v_uobj.pgops->pgo_flush(
-				    &vp->v_uobj, (oldoff >> 16) << 16,
-				    (uio->uio_offset >> 16) << 16, PGO_CLEANIT);
-				simple_unlock(&vp->v_uobj.vmobjlock);
-			}
-		}
-		goto out;
-	}
-
 	flags = ioflag & IO_SYNC ? B_SYNC : 0;
+
 	for (error = 0; uio->uio_resid > 0;) {
 		lbn = lblkno(fs, uio->uio_offset);
 		blkoffset = blkoff(fs, uio->uio_offset);
-		xfersize = MIN(fs->e2fs_bsize - blkoffset, uio->uio_resid);
-		if (xfersize < fs->e2fs_bsize)
+		xfersize = fs->e2fs_bsize - blkoffset;
+		if (uio->uio_resid < xfersize)
+			xfersize = uio->uio_resid;
+		if (fs->e2fs_bsize > xfersize)
 			flags |= B_CLRBUF;
 		else
 			flags &= ~B_CLRBUF;
+
 		error = ext2fs_buf_alloc(ip,
-		    lbn, blkoffset + xfersize, ap->a_cred, &bp, flags);
+			lbn, blkoffset + xfersize, ap->a_cred, &bp, flags);
 		if (error)
 			break;
-		if (ip->i_e2fs_size < uio->uio_offset + xfersize) {
+		if (uio->uio_offset + xfersize > ip->i_e2fs_size) {
 			ip->i_e2fs_size = uio->uio_offset + xfersize;
+			uvm_vnp_setsize(vp, ip->i_e2fs_size);
 		}
-		error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
+		uvm_vnp_uncache(vp);
+
+		size = fs->e2fs_bsize - bp->b_resid;
+		if (size < xfersize)
+			xfersize = size;
+
+		error =
+			uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
 		if (ioflag & IO_SYNC)
 			(void)bwrite(bp);
 		else if (xfersize + blkoffset == fs->e2fs_bsize) {
@@ -333,14 +272,13 @@ ext2fs_write(v)
 			bdwrite(bp);
 		if (error || xfersize == 0)
 			break;
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 	}
 	/*
 	 * If we successfully wrote any data, and we are not the superuser
 	 * we clear the setuid and setgid bits as a precaution against
 	 * tampering.
 	 */
-out:
-	ip->i_flag |= IN_CHANGE | IN_UPDATE;
 	if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
 		ip->i_e2fs_mode &= ~(ISUID | ISGID);
 	if (error) {
@@ -350,7 +288,8 @@ out:
 			uio->uio_offset -= resid - uio->uio_resid;
 			uio->uio_resid = resid;
 		}
-	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC) == IO_SYNC)
+	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
 		error = ext2fs_update(ip, NULL, NULL, 1);
+	}
 	return (error);
 }
diff --git a/sys/ufs/ext2fs/ext2fs_subr.c b/sys/ufs/ext2fs/ext2fs_subr.c
index 3263f7e5391..02d84be4302 100644
--- a/sys/ufs/ext2fs/ext2fs_subr.c
+++ b/sys/ufs/ext2fs/ext2fs_subr.c
@@ -1,4 +1,5 @@
-/*	$NetBSD: ext2fs_subr.c,v 1.4 2000/03/30 12:41:11 augustss Exp $	*/
+/*	$OpenBSD: ext2fs_subr.c,v 1.8 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: ext2fs_subr.c,v 1.1 1997/06/11 09:34:03 bouyer Exp $	*/
 
 /*
  * Copyright (c) 1997 Manuel Bouyer.
@@ -95,7 +96,7 @@ ext2fs_checkoverlap(bp, ip)
 		if (ep == bp || (ep->b_flags & B_INVAL) ||
 			ep->b_vp == NULLVP)
 			continue;
-		if (VOP_BMAP(ep->b_vp, (ufs_daddr_t)0, &vp, (ufs_daddr_t)0, NULL))
+		if (VOP_BMAP(ep->b_vp, (daddr_t)0, &vp, (daddr_t)0, NULL))
 			continue;
 		if (vp != ip->i_devvp)
 			continue;
diff --git a/sys/ufs/ext2fs/ext2fs_vfsops.c b/sys/ufs/ext2fs/ext2fs_vfsops.c
index b77f4edec62..6f404d98157 100644
--- a/sys/ufs/ext2fs/ext2fs_vfsops.c
+++ b/sys/ufs/ext2fs/ext2fs_vfsops.c
@@ -1,5 +1,5 @@
-/*	$OpenBSD: ext2fs_vfsops.c,v 1.19 2001/12/10 04:45:31 art Exp $	*/
-/*	$NetBSD: ext2fs_vfsops.c,v 1.40 2000/11/27 08:39:53 chs Exp $	*/
+/*	$OpenBSD: ext2fs_vfsops.c,v 1.20 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: ext2fs_vfsops.c,v 1.1 1997/06/11 09:34:07 bouyer Exp $	*/
 
 /*
  * Copyright (c) 1997 Manuel Bouyer.
@@ -100,11 +100,6 @@ struct vfsops ext2fs_vfsops = {
 	ufs_check_export
 };
 
-struct genfs_ops ext2fs_genfsops = {
-	genfs_size,
-	ext2fs_gop_alloc,
-};
-
 struct pool ext2fs_inode_pool;
 
 extern u_long ext2gennumber;
@@ -407,11 +402,9 @@ ext2fs_reload(mountp, cred, p)
 	 * Step 1: invalidate all cached meta-data.
 	 */
 	devvp = VFSTOUFS(mountp)->um_devvp;
-	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
-	error = vinvalbuf(devvp, 0, cred, p, 0, 0);
-	VOP_UNLOCK(devvp, 0, p);
-	if (error)
+	if (vinvalbuf(devvp, 0, cred, p, 0, 0))
 		panic("ext2fs_reload: dirty1");
+
 	/*
 	 * Step 2: re-read superblock from disk.
 	 */
@@ -590,18 +583,14 @@ ext2fs_mountfs(devvp, mp, p)
 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
 	mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN;
 	mp->mnt_flag |= MNT_LOCAL;
-	mp->mnt_dev_bshift = DEV_BSHIFT;	/* XXX */
-	mp->mnt_fs_bshift = m_fs->e2fs_bshift;
 	ump->um_mountp = mp;
 	ump->um_dev = dev;
 	ump->um_devvp = devvp;
 	ump->um_nindir = NINDIR(m_fs);
-	ump->um_lognindir = ffs(NINDIR(m_fs)) - 1;
 	ump->um_bptrtodb = m_fs->e2fs_fsbtodb;
 	ump->um_seqinc = 1; /* no frags */
 	devvp->v_specmountpoint = mp;
 	return (0);
-
 out:
 	if (bp)
 		brelse(bp);
@@ -921,7 +910,6 @@ ext2fs_vget(mp, ino, vpp)
 	/*
 	 * Finish inode initialization now that aliasing has been resolved.
 	 */
-	genfs_node_init(vp, &ext2fs_genfsops);
 	ip->i_devvp = ump->um_devvp;
 	VREF(ip->i_devvp);
 	/*
@@ -936,7 +924,6 @@ ext2fs_vget(mp, ino, vpp)
 			ip->i_flag |= IN_MODIFIED;
 	}
 
-	vp->v_size = ip->i_e2fs_size;
 	*vpp = vp;
 	return (0);
 }
diff --git a/sys/ufs/ext2fs/ext2fs_vnops.c b/sys/ufs/ext2fs/ext2fs_vnops.c
index 6e82f66279a..d85d4eba5d9 100644
--- a/sys/ufs/ext2fs/ext2fs_vnops.c
+++ b/sys/ufs/ext2fs/ext2fs_vnops.c
@@ -1,5 +1,5 @@
-/*	$OpenBSD: ext2fs_vnops.c,v 1.20 2001/12/10 04:45:31 art Exp $	*/
-/*	$NetBSD: ext2fs_vnops.c,v 1.30 2000/11/27 08:39:53 chs Exp $	*/
+/*	$OpenBSD: ext2fs_vnops.c,v 1.21 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: ext2fs_vnops.c,v 1.1 1997/06/11 09:34:09 bouyer Exp $	*/
 
 /*
  * Copyright (c) 1997 Manuel Bouyer.
@@ -402,6 +402,8 @@ ext2fs_chmod(vp, mode, cred, p)
 	ip->i_e2fs_mode &= ~ALLPERMS;
 	ip->i_e2fs_mode |= (mode & ALLPERMS);
 	ip->i_flag |= IN_CHANGE;
+	if ((vp->v_flag & VTEXT) && (ip->i_e2fs_mode & S_ISTXT) == 0)
+		(void) uvm_vnp_uncache(vp);
 	return (0);
 }
 
@@ -1467,10 +1469,7 @@ struct vnodeopv_entry_desc ext2fs_vnodeop_entries[] = {
 	{ &vop_pathconf_desc, ufs_pathconf },	/* pathconf */
 	{ &vop_advlock_desc, ext2fs_advlock },	/* advlock */
 	{ &vop_bwrite_desc, vop_generic_bwrite },		/* bwrite */
-	{ &vop_getpages_desc, genfs_getpages },
-	{ &vop_putpages_desc, genfs_putpages },
-	{ &vop_mmap_desc, ufs_mmap },
-	{ NULL, NULL }
+	{ (struct vnodeop_desc*)NULL, (int(*) __P((void*)))NULL }
 };
 struct vnodeopv_desc ext2fs_vnodeop_opv_desc =
 	{ &ext2fs_vnodeop_p, ext2fs_vnodeop_entries };
@@ -1513,8 +1512,7 @@ struct vnodeopv_entry_desc ext2fs_specop_entries[] = {
 	{ &vop_pathconf_desc, spec_pathconf },	/* pathconf */
 	{ &vop_advlock_desc, spec_advlock },	/* advlock */
 	{ &vop_bwrite_desc, vop_generic_bwrite },		/* bwrite */
-	{ &vop_mmap_desc, spec_mmap },
-	{ NULL, NULL }
+	{ (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
 };
 struct vnodeopv_desc ext2fs_specop_opv_desc =
 	{ &ext2fs_specop_p, ext2fs_specop_entries };
@@ -1558,8 +1556,7 @@ struct vnodeopv_entry_desc ext2fs_fifoop_entries[] = {
 	{ &vop_pathconf_desc, fifo_pathconf },	/* pathconf */
 	{ &vop_advlock_desc, fifo_advlock },	/* advlock */
 	{ &vop_bwrite_desc, vop_generic_bwrite },		/* bwrite */
-	{ &vop_mmap_desc, fifo_mmap },
-	{ NULL, NULL }
+	{ (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
 };
 struct vnodeopv_desc ext2fs_fifoop_opv_desc =
 	{ &ext2fs_fifoop_p, ext2fs_fifoop_entries };
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index 92b4d993c2d..c42897ac4d4 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ffs_alloc.c,v 1.37 2001/11/30 16:37:57 art Exp $	*/
+/*	$OpenBSD: ffs_alloc.c,v 1.38 2001/12/19 08:58:07 art Exp $	*/
 /*	$NetBSD: ffs_alloc.c,v 1.11 1996/05/11 18:27:09 mycroft Exp $	*/
 
 /*
@@ -169,7 +169,7 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp, blknop)
 	struct buf **bpp;
 	ufs_daddr_t *blknop;
 {
-	struct fs *fs;
+	register struct fs *fs;
 	struct buf *bp = NULL;
 	ufs_daddr_t quota_updated = 0;
 	int cg, request, error;
@@ -177,7 +177,6 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp, blknop)
 
 	if (bpp != NULL)
 		*bpp = NULL;
-
 	fs = ip->i_fs;
 #ifdef DIAGNOSTIC
 	if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
@@ -283,6 +282,7 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp, blknop)
 	if (bno <= 0) 
 		goto nospace;
 
+	(void) uvm_vnp_uncache(ITOV(ip));
 	if (!DOINGSOFTDEP(ITOV(ip)))
 		ffs_blkfree(ip, bprev, (long)osize);
 	if (nsize < request)
@@ -362,8 +362,7 @@ ffs_reallocblks(v)
 	struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
 	int i, len, start_lvl, end_lvl, pref, ssize;
 
-	/* XXXUBC - don't reallocblks for now */
-	if (1 || doreallocblks == 0)
+	if (doreallocblks == 0)
 		return (ENOSPC);
 
 	vp = ap->a_vp;
diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c
index aa452edeabb..daf5b86082b 100644
--- a/sys/ufs/ffs/ffs_balloc.c
+++ b/sys/ufs/ffs/ffs_balloc.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ffs_balloc.c,v 1.21 2001/12/10 04:45:32 art Exp $	*/
+/*	$OpenBSD: ffs_balloc.c,v 1.22 2001/12/19 08:58:07 art Exp $	*/
 /*	$NetBSD: ffs_balloc.c,v 1.3 1996/02/09 22:22:21 christos Exp $	*/
 
 /*
@@ -402,47 +402,3 @@ fail:
 
 	return (error);
 }
-
-int
-ffs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags,
-    struct ucred *cred)
-{
-	struct inode *ip = VTOI(vp);
-	struct fs *fs = ip->i_fs;
-	int error, delta, bshift, bsize;
-
-	error = 0;
-	bshift = fs->fs_bshift;
-	bsize = 1 << bshift;
-
-	delta = off & (bsize - 1);
-	off -= delta;
-	len += delta;
-
-	while (len > 0) {
-		bsize = MIN(bsize, len);
-
-		error = ffs_balloc(ip, off, bsize, cred, flags, NULL);
-		if (error) {
-			goto out;
-		}
-
-		/*
-		 * increase file size now, VOP_BALLOC() requires that
-		 * EOF be up-to-date before each call.
-		 */
-
-		if (ip->i_ffs_size < off + bsize) {
-			ip->i_ffs_size = off + bsize;
-			if (vp->v_size < ip->i_ffs_size) {
-				uvm_vnp_setsize(vp, ip->i_ffs_size);
-			}
-		}
-
-		off += bsize;
-		len -= bsize;
-	}
-
-out:
-	return error;
- }
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h
index 7aac0f33de9..15eb204efd3 100644
--- a/sys/ufs/ffs/ffs_extern.h
+++ b/sys/ufs/ffs/ffs_extern.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ffs_extern.h,v 1.16 2001/12/10 04:45:32 art Exp $	*/
+/*	$OpenBSD: ffs_extern.h,v 1.17 2001/12/19 08:58:07 art Exp $	*/
 /*	$NetBSD: ffs_extern.h,v 1.4 1996/02/09 22:22:22 christos Exp $	*/
 
 /*-
@@ -87,7 +87,6 @@ void ffs_clusteracct __P((struct fs *, struct cg *, daddr_t, int));
 
 /* ffs_balloc.c */
 int ffs_balloc(struct inode *, off_t, int, struct ucred *, int, struct buf **);
-int ffs_gop_alloc(struct vnode *, off_t, off_t, int, struct ucred *);
 
 /* ffs_inode.c */
 int ffs_init __P((struct vfsconf *));
@@ -129,7 +128,7 @@ int ffs_read __P((void *));
 int ffs_write __P((void *));
 int ffs_fsync __P((void *));
 int ffs_reclaim __P((void *));
-void ffs_gop_size __P((struct vnode *, off_t, off_t *));
+
 
 /*
  * Soft dependency function prototypes.
diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c
index 3bec117a700..fecb1fbed77 100644
--- a/sys/ufs/ffs/ffs_inode.c
+++ b/sys/ufs/ffs/ffs_inode.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ffs_inode.c,v 1.28 2001/12/10 04:45:32 art Exp $	*/
+/*	$OpenBSD: ffs_inode.c,v 1.29 2001/12/19 08:58:07 art Exp $	*/
 /*	$NetBSD: ffs_inode.c,v 1.10 1996/05/11 18:27:19 mycroft Exp $	*/
 
 /*
@@ -148,21 +148,21 @@ ffs_update(struct inode *ip, struct timespec *atime,
 int
 ffs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred)
 {
-	struct vnode *ovp = ITOV(oip);
-	struct genfs_node *gp = VTOG(ovp);
+	struct vnode *ovp;
 	daddr_t lastblock;
-	daddr_t bn, lastiblock[NIADDR], indir_lbn[NIADDR];
+	daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR];
 	daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
 	struct fs *fs;
-	struct proc *p = curproc;
+	struct buf *bp;
 	int offset, size, level;
 	long count, nblocks, vflags, blocksreleased = 0;
 	register int i;
-	int error, allerror;
+	int aflags, error, allerror;
 	off_t osize;
 
 	if (length < 0)
 		return (EINVAL);
+	ovp = ITOV(oip);
 
 	if (ovp->v_type != VREG &&
 	    ovp->v_type != VDIR &&
@@ -188,55 +188,10 @@ ffs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred)
 	if ((error = getinoquota(oip)) != 0)
 		return (error);
 
-	fs = oip->i_fs;
-	if (length > fs->fs_maxfilesize)
-		return (EFBIG);
-	osize = oip->i_ffs_size; 
+	uvm_vnp_setsize(ovp, length);
 	oip->i_ci.ci_lasta = oip->i_ci.ci_clen 
 	    = oip->i_ci.ci_cstart = oip->i_ci.ci_lastw = 0;
 
-	/*
-	 * Lengthen the size of the file. We must ensure that the
-	 * last byte of the file is allocated. Since the smallest
-	 * value of osize is 0, length will be at least 1.
-	 */
-
-	if (osize < length) {
-		ufs_balloc_range(ovp, length - 1, 1, cred,
-		    flags & IO_SYNC ? B_SYNC : 0);
-		oip->i_flag |= IN_CHANGE | IN_UPDATE;
-		return (UFS_UPDATE(oip, 1));
-	}
-
-	/*
-	 * When truncating a regular file down to a non-block-aligned size,
-	 * we must zero the part of last block which is past the new EOF.
-	 * We must synchronously flush the zeroed pages to disk
-	 * since the new pages will be invalidated as soon as we
-	 * inform the VM system of the new, smaller size.
-	 * We must to this before acquiring the GLOCK, since fetching
-	 * the pages will acquire the GLOCK internally.
-	 * So there is a window where another thread could see a whole
-	 * zeroed page past EOF, but that's life.
-	 */
-
-	offset = blkoff(fs, length);
-	if (ovp->v_type == VREG && length < osize && offset != 0) {
-		struct uvm_object *uobj;
-		voff_t eoz;
-
-		size = blksize(fs, oip, lblkno(fs, length));
-		eoz = min(lblktosize(fs, lblkno(fs, length)) + size, osize);
-		uvm_vnp_zerorange(ovp, length, eoz - length);
-		uobj = &ovp->v_uobj;
-		simple_lock(&uobj->vmobjlock);
-		uobj->pgops->pgo_flush(uobj, length, eoz,
-		    PGO_CLEANIT|PGO_DEACTIVATE|PGO_SYNCIO);
-		simple_unlock(&uobj->vmobjlock);
-	}
-
-	lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL, p);
-
 	if (DOINGSOFTDEP(ovp)) {
 		if (length > 0 || softdep_slowdown(ovp)) {
 			/*
@@ -249,29 +204,80 @@ ffs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred)
 			 * so that it will have no data structures left.
 			 */
 			if ((error = VOP_FSYNC(ovp, cred, MNT_WAIT,
-			    curproc)) != 0) {
-				lockmgr(&gp->g_glock, LK_RELEASE, NULL, p);
+					       curproc)) != 0)
 				return (error);
-			}
 		} else {
-			uvm_vnp_setsize(ovp, length);
 			(void)ufs_quota_free_blocks(oip, oip->i_ffs_blocks, 
 			    NOCRED);
 			softdep_setup_freeblocks(oip, length);
 			(void) vinvalbuf(ovp, 0, cred, curproc, 0, 0);
-			lockmgr(&gp->g_glock, LK_RELEASE, NULL, p);
 			oip->i_flag |= IN_CHANGE | IN_UPDATE;
 			return (UFS_UPDATE(oip, 0));
 		}
 	}
 
+	fs = oip->i_fs;
+	osize = oip->i_ffs_size; 
 	/*
-	 * Reduce the size of the file.
+	 * Lengthen the size of the file. We must ensure that the
+	 * last byte of the file is allocated. Since the smallest
+	 * value of osize is 0, length will be at least 1.
 	 */
-	oip->i_ffs_size = length;
+	if (osize < length) {
+		if (length > fs->fs_maxfilesize)
+			return (EFBIG);
+		aflags = B_CLRBUF;
+		if (flags & IO_SYNC)
+			aflags |= B_SYNC;
+		error = UFS_BUF_ALLOC(oip, length - 1, 1, 
+				   cred, aflags, &bp);
+		if (error)
+			return (error);
+		oip->i_ffs_size = length;
+		uvm_vnp_setsize(ovp, length);
+		(void) uvm_vnp_uncache(ovp);
+		if (aflags & B_SYNC)
+			bwrite(bp);
+		else
+			bawrite(bp);
+		oip->i_flag |= IN_CHANGE | IN_UPDATE;
+		return (UFS_UPDATE(oip, MNT_WAIT));
+	}
 	uvm_vnp_setsize(ovp, length);
 
 	/*
+	 * Shorten the size of the file. If the file is not being
+	 * truncated to a block boundary, the contents of the
+	 * partial block following the end of the file must be
+	 * zero'ed in case it ever becomes accessible again because
+	 * of subsequent file growth. Directories however are not
+	 * zero'ed as they should grow back initialized to empty.
+	 */
+	offset = blkoff(fs, length);
+	if (offset == 0) {
+		oip->i_ffs_size = length;
+	} else {
+		lbn = lblkno(fs, length);
+		aflags = B_CLRBUF;
+		if (flags & IO_SYNC)
+			aflags |= B_SYNC;
+		error = UFS_BUF_ALLOC(oip, length - 1, 1,
+				   cred, aflags, &bp);
+		if (error)
+			return (error);
+		oip->i_ffs_size = length;
+		size = blksize(fs, oip, lbn);
+		(void) uvm_vnp_uncache(ovp);
+		if (ovp->v_type != VDIR)
+			bzero((char *)bp->b_data + offset,
+			      (u_int)(size - offset));
+		allocbuf(bp, size);
+		if (aflags & B_SYNC)
+			bwrite(bp);
+		else
+			bawrite(bp);
+	}
+	/*
 	 * Calculate index into inode's block list of
 	 * last direct and indirect blocks (if any)
 	 * which we want to keep.  Lastblock is -1 when
@@ -396,7 +402,6 @@ done:
 	oip->i_ffs_blocks -= blocksreleased;
 	if (oip->i_ffs_blocks < 0)			/* sanity */
 		oip->i_ffs_blocks = 0;
-	lockmgr(&gp->g_glock, LK_RELEASE, NULL, p);
 	oip->i_flag |= IN_CHANGE;
 	(void)ufs_quota_free_blocks(oip, blocksreleased, NOCRED);
 	return (allerror);
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 2dfed4d83bd..dcb58550fc1 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ffs_softdep.c,v 1.30 2001/12/10 02:19:34 art Exp $	*/
+/*	$OpenBSD: ffs_softdep.c,v 1.31 2001/12/19 08:58:07 art Exp $	*/
 /*
  * Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved.
  *
@@ -53,7 +53,6 @@
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/proc.h>
-#include <sys/pool.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/vnode.h>
@@ -67,10 +66,6 @@
 #include <ufs/ffs/ffs_extern.h>
 #include <ufs/ufs/ufs_extern.h>
 
-#include <uvm/uvm.h>
-struct pool sdpcpool;
-int softdep_lockedbufs;
-
 #define STATIC
 
 /*
@@ -111,13 +106,6 @@ extern char *memname[];
  */
 
 /*
- * Definitions for page cache info hashtable.
- */
-#define PCBPHASHSIZE 1024
-LIST_HEAD(, buf) pcbphashhead[PCBPHASHSIZE];
-#define PCBPHASH(vp, lbn) ((((vaddr_t)(vp) >> 8) ^ (lbn)) & (PCBPHASHSIZE - 1))
-
-/*
  * Internal function prototypes.
  */
 STATIC	void softdep_error __P((char *, int));
@@ -169,13 +157,6 @@ STATIC	void pause_timer __P((void *));
 STATIC	int request_cleanup __P((int, int));
 STATIC	int process_worklist_item __P((struct mount *, int));
 STATIC	void add_to_worklist __P((struct worklist *));
-STATIC struct buf *softdep_setup_pagecache __P((struct inode *, ufs_lbn_t,
-						long));
-STATIC void softdep_collect_pagecache __P((struct inode *));
-STATIC void softdep_free_pagecache __P((struct inode *));
-STATIC struct vnode *softdep_lookupvp(struct fs *, ino_t);
-STATIC struct buf *softdep_lookup_pcbp __P((struct vnode *, ufs_lbn_t));
-void softdep_pageiodone __P((struct buf *));
 
 /*
  * Exported softdep operations.
@@ -192,7 +173,6 @@ struct bio_ops bioops = {
 	softdep_deallocate_dependencies,	/* io_deallocate */
 	softdep_move_dependencies,		/* io_movedeps */
 	softdep_count_dependencies,		/* io_countdeps */
-	softdep_pageiodone,			/* io_pagedone */
 };
 
 /*
@@ -1081,7 +1061,6 @@ top:
 void 
 softdep_initialize()
 {
-	int i;
 
 	LIST_INIT(&mkdirlisthd);
 	LIST_INIT(&softdep_workitem_pending);
@@ -1100,11 +1079,6 @@ softdep_initialize()
 	newblk_hashtbl = hashinit(64, M_NEWBLK, M_WAITOK, &newblk_hash);
 	sema_init(&newblk_in_progress, "newblk", PRIBIO, 0);
 	timeout_set(&proc_waiting_timeout, pause_timer, 0);
-	pool_init(&sdpcpool, sizeof(struct buf), 0, 0, 0, "sdpcpool",
-	    0, pool_page_alloc_nointr, pool_page_free_nointr, M_TEMP);
-	for (i = 0; i < PCBPHASHSIZE; i++) {
-		LIST_INIT(&pcbphashhead[i]);
-	}
 }
 
 /*
@@ -1357,16 +1331,11 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
 	LIST_REMOVE(newblk, nb_hash);
 	FREE(newblk, M_NEWBLK);
 
-	/*
-	 * If we were not passed a bp to attach the dep to,
-	 * then this must be for a regular file.
-	 * Allocate a buffer to represent the page cache pages
-	 * that are the real dependency.  The pages themselves
-	 * cannot refer to the dependency since we don't want to
-	 * add a field to struct vm_page for this.
-	 */
 	if (bp == NULL) {
-		bp = softdep_setup_pagecache(ip, lbn, newsize);
+		/*
+		 * XXXUBC - Yes, I know how to fix this, but not right now.
+		 */
+		panic("softdep_setup_allocdirect: Bonk art in the head\n");
 	}
 	WORKLIST_INSERT(&bp->b_dep, &adp->ad_list);
 	if (lbn >= NDADDR) {
@@ -1600,7 +1569,10 @@ softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp)
 	    pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0)
 		WORKLIST_INSERT(&nbp->b_dep, &pagedep->pd_list);
 	if (nbp == NULL) {
-		nbp = softdep_setup_pagecache(ip, lbn, ip->i_fs->fs_bsize);
+		/*
+		 * XXXUBC - Yes, I know how to fix this, but not right now.
+		 */
+		panic("softdep_setup_allocindir_page: Bonk art in the head\n");
 	}
 	WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list);
 	FREE_LOCK(&lk);
@@ -1779,7 +1751,6 @@ softdep_setup_freeblocks(ip, length)
 	int i, delay, error;
 
 	fs = ip->i_fs;
-	vp = ITOV(ip);
 	if (length != 0)
 		panic("softdep_setup_freeblocks: non-zero length");
 	MALLOC(freeblks, struct freeblks *, sizeof(struct freeblks),
@@ -1839,15 +1810,9 @@ softdep_setup_freeblocks(ip, length)
 	 * with this inode are obsolete and can simply be de-allocated.
 	 * We must first merge the two dependency lists to get rid of
 	 * any duplicate freefrag structures, then purge the merged list.
-	 * We must remove any pagecache markers from the pagecache
-	 * hashtable first because any I/Os in flight will want to see
-	 * dependencies attached to their pagecache markers.  We cannot
-	 * free the pagecache markers until after we've freed all the
-	 * dependencies that reference them later.
 	 * If we still have a bitmap dependency, then the inode has never
 	 * been written to disk, so we can free any fragments without delay.
 	 */
-	softdep_collect_pagecache(ip);
 	merge_inode_lists(inodedep);
 	while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0)
 		free_allocdirect(&inodedep->id_inoupdt, adp, delay);
@@ -1859,6 +1824,7 @@ softdep_setup_freeblocks(ip, length)
 	 * Once they are all there, walk the list and get rid of
 	 * any dependencies.
 	 */
+	vp = ITOV(ip);
 	ACQUIRE_LOCK(&lk);
 	drain_output(vp, 1);
 	while (getdirtybuf(&LIST_FIRST(&vp->v_dirtyblkhd), MNT_WAIT)) {
@@ -1870,7 +1836,6 @@ softdep_setup_freeblocks(ip, length)
 		brelse(bp);
 		ACQUIRE_LOCK(&lk);
 	}
-	softdep_free_pagecache(ip);
 	if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) != 0)
 		(void) free_inodedep(inodedep);
 	FREE_LOCK(&lk);
@@ -2939,6 +2904,7 @@ handle_workitem_freefile(freefile)
 	struct freefile *freefile;
 {
 	struct fs *fs;
+	struct vnode vp;
 	struct inode tip;
 #ifdef DEBUG
 	struct inodedep *idp;
@@ -2956,7 +2922,8 @@ handle_workitem_freefile(freefile)
 	tip.i_devvp = freefile->fx_devvp;
 	tip.i_dev = freefile->fx_devvp->v_rdev;
 	tip.i_fs = fs;
-	tip.i_vnode = NULL;
+	tip.i_vnode = &vp;
+	vp.v_data = &tip;
 
 	if ((error = ffs_freefile(&tip, freefile->fx_oldinum, 
 		 freefile->fx_mode)) != 0) {
@@ -4354,15 +4321,6 @@ flush_inodedep_deps(fs, ino)
 	struct allocdirect *adp;
 	int error, waitfor;
 	struct buf *bp;
-	struct vnode *vp;
-	struct uvm_object *uobj;
-
-	vp = softdep_lookupvp(fs, ino);
-#ifdef DIAGNOSTIC
-	if (vp == NULL)
-		panic("flush_inodedep_deps: null vp");
-#endif
-	uobj = &vp->v_uobj;
 
 	/*
 	 * This work is done in two passes. The first pass grabs most
@@ -4382,26 +4340,6 @@ flush_inodedep_deps(fs, ino)
 		ACQUIRE_LOCK(&lk);
 		if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
 			return (0);
-
-		/*
-		 * When file data was in the buffer cache,
-		 * softdep_sync_metadata() would start i/o on
-		 * file data buffers itself.  But now that
-		 * we're using the page cache to hold file data,
-		 * we need something else to trigger those flushes.
-		 * let's just do it here.
-		 */
-		FREE_LOCK(&lk);
-		simple_lock(&uobj->vmobjlock);
-		(uobj->pgops->pgo_flush)(uobj, 0, 0,
-		    PGO_ALLPAGES|PGO_CLEANIT|
-		    (waitfor == MNT_NOWAIT ? 0: PGO_SYNCIO));
-		simple_unlock(&uobj->vmobjlock);
-		if (waitfor == MNT_WAIT) {
-			drain_output(vp, 0);
-		}
-		ACQUIRE_LOCK(&lk);
-
 		TAILQ_FOREACH(adp, &inodedep->id_inoupdt, ad_next) {
 			if (adp->ad_state & DEPCOMPLETE)
 				continue;
@@ -5017,194 +4955,3 @@ softdep_error(func, error)
 	/* XXX should do something better! */
 	printf("%s: got error %d while accessing filesystem\n", func, error);
 }
-
-/*
- * Allocate a buffer on which to attach a dependency.
- */
-STATIC struct buf *
-softdep_setup_pagecache(ip, lbn, size)
-	struct inode *ip;
-	ufs_lbn_t lbn;
-	long size;
-{
-	struct vnode *vp = ITOV(ip);
-	struct buf *bp;
-	int s;
-
-	/*
-	 * Enter pagecache dependency buf in hash.
-	 * Always reset b_resid to be the full amount of data in the block
-	 * since the caller has the corresponding pages locked and dirty.
-	 */
-
-	bp = softdep_lookup_pcbp(vp, lbn);
-	if (bp == NULL) {
-		s = splbio();
-		bp = pool_get(&sdpcpool, PR_WAITOK);
-		splx(s);
-
-		bp->b_vp = vp;
-		bp->b_lblkno = lbn;
-		LIST_INIT(&bp->b_dep);
-		LIST_INSERT_HEAD(&pcbphashhead[PCBPHASH(vp, lbn)], bp, b_hash);
-		LIST_INSERT_HEAD(&ip->i_pcbufhd, bp, b_vnbufs);
-	}
-	bp->b_bcount = bp->b_resid = size;
-	return bp;
-}
-
-/*
- * softdep_collect_pagecache() and softdep_free_pagecache()
- * are used to remove page cache dependency buffers when
- * a file is being truncated to 0.
- */
-
-STATIC void
-softdep_collect_pagecache(ip)
-	struct inode *ip;
-{
-	struct buf *bp;
-
-	LIST_FOREACH(bp, &ip->i_pcbufhd, b_vnbufs) {
-		LIST_REMOVE(bp, b_hash);
-	}
-}
-
-STATIC void
-softdep_free_pagecache(ip)
-	struct inode *ip;
-{
-	struct buf *bp, *nextbp;
-
-	for (bp = LIST_FIRST(&ip->i_pcbufhd); bp != NULL; bp = nextbp) {
-		nextbp = LIST_NEXT(bp, b_vnbufs);
-		LIST_REMOVE(bp, b_vnbufs);
-		KASSERT(LIST_FIRST(&bp->b_dep) == NULL);
-		pool_put(&sdpcpool, bp);
-	}
-}
-
-STATIC struct vnode *
-softdep_lookupvp(fs, ino)
-	struct fs *fs;
-	ino_t ino;
-{
-	struct mount *mp;
-	extern struct vfsops ffs_vfsops;
-
-	CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
-		if (mp->mnt_op == &ffs_vfsops &&
-		    VFSTOUFS(mp)->um_fs == fs) {
-			break;
-		}
-	}
-	if (mp == NULL) {
-		return NULL;
-	}
-	return ufs_ihashlookup(VFSTOUFS(mp)->um_dev, ino);
-}
-
-STATIC struct buf *
-softdep_lookup_pcbp(vp, lbn)
-	struct vnode *vp;
-	ufs_lbn_t lbn;
-{
-	struct buf *bp;
-
-	LIST_FOREACH(bp, &pcbphashhead[PCBPHASH(vp, lbn)], b_hash) {
-		if (bp->b_vp == vp && bp->b_lblkno == lbn) {
-			break;
-		}
-	}
-	return bp;	     
-}
-
-/*
- * Do softdep i/o completion processing for page cache writes.
- */
- 
-void
-softdep_pageiodone(bp)
-	struct buf *bp;
-{
-	int npages = bp->b_bufsize >> PAGE_SHIFT;
-	struct vnode *vp = bp->b_vp;
-	struct vm_page *pg;
-	struct buf *pcbp = NULL;
-	struct allocdirect *adp;
-	struct allocindir *aip;
-	struct worklist *wk;
-	ufs_lbn_t lbn;
-	voff_t off;
-	long iosize = bp->b_bcount;
-	int size, asize, bshift, bsize;
-	int i;
-
-	KASSERT(!(bp->b_flags & B_READ));
-	bshift = vp->v_mount->mnt_fs_bshift;
-	bsize = 1 << bshift;
-	asize = min(PAGE_SIZE, bsize);
-	ACQUIRE_LOCK(&lk);
-	for (i = 0; i < npages; i++) {
-		pg = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT));
-		if (pg == NULL) {
-			continue;
-		}
-
-		for (off = pg->offset;
-		     off < pg->offset + PAGE_SIZE;
-		     off += bsize) {
-			size = min(asize, iosize);
-			iosize -= size;
-			lbn = off >> bshift;
-			if (pcbp == NULL || pcbp->b_lblkno != lbn) {
-				pcbp = softdep_lookup_pcbp(vp, lbn);
-			}
-			if (pcbp == NULL) {
-				continue;
-			}
-			pcbp->b_resid -= size;
-			if (pcbp->b_resid < 0) {
-				panic("softdep_pageiodone: "
-				      "resid < 0, vp %p lbn 0x%lx pcbp %p",
-				      vp, lbn, pcbp);
-			}
-			if (pcbp->b_resid > 0) {
-				continue;
-			}
-
-			/*
-			 * We've completed all the i/o for this block.
-			 * mark the dep complete.
-			 */
-
-			KASSERT(LIST_FIRST(&pcbp->b_dep) != NULL);
-			while ((wk = LIST_FIRST(&pcbp->b_dep))) {
-				WORKLIST_REMOVE(wk);
-				switch (wk->wk_type) {
-				case D_ALLOCDIRECT:
-					adp = WK_ALLOCDIRECT(wk);
-					adp->ad_state |= COMPLETE;
-					handle_allocdirect_partdone(adp);
-					break;
-
-				case D_ALLOCINDIR:
-					aip = WK_ALLOCINDIR(wk);
-					aip->ai_state |= COMPLETE;
-					handle_allocindir_partdone(aip);
-					break;
-
-				default:
-					panic("softdep_pageiodone: "
-					      "bad type %d, pcbp %p wk %p",
-					      wk->wk_type, pcbp, wk);
-				}
-			}
-			LIST_REMOVE(pcbp, b_hash);
-			LIST_REMOVE(pcbp, b_vnbufs);
-			pool_put(&sdpcpool, pcbp);
-			pcbp = NULL;
-		}
-	}
-	FREE_LOCK(&lk);
-}
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index 4aac12f8752..8aec3d7de20 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ffs_vfsops.c,v 1.47 2001/12/10 04:45:32 art Exp $	*/
+/*	$OpenBSD: ffs_vfsops.c,v 1.48 2001/12/19 08:58:07 art Exp $	*/
 /*	$NetBSD: ffs_vfsops.c,v 1.19 1996/02/09 22:22:26 christos Exp $	*/
 
 /*
@@ -96,11 +96,6 @@ struct inode_vtbl ffs_vtbl = {
 	ffs_bufatoff
 };
 
-struct genfs_ops ffs_genfsops = {
-	ffs_gop_size,
-	ffs_gop_alloc,
-};
-
 extern u_long nextgennumber;
 
 /*
@@ -742,14 +737,11 @@ ffs_mountfs(devvp, mp, p)
 	else
 		mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
-	mp->mnt_fs_bshift = fs->fs_bshift;
-	mp->mnt_dev_bshift = DEV_BSHIFT;
 	mp->mnt_flag |= MNT_LOCAL;
 	ump->um_mountp = mp;
 	ump->um_dev = dev;
 	ump->um_devvp = devvp;
 	ump->um_nindir = fs->fs_nindir;
-	ump->um_lognindir = ffs(fs->fs_nindir) - 1;
 	ump->um_bptrtodb = fs->fs_fsbtodb;
 	ump->um_seqinc = fs->fs_frag;
 	for (i = 0; i < MAXQUOTAS; i++)
@@ -1127,7 +1119,6 @@ retry:
 	ip->i_fs = fs = ump->um_fs;
 	ip->i_dev = dev;
 	ip->i_number = ino;
-	LIST_INIT(&ip->i_pcbufhd);
 	ip->i_vtbl = &ffs_vtbl;
 
 	/*
@@ -1187,8 +1178,6 @@ retry:
 	/*
 	 * Finish inode initialization now that aliasing has been resolved.
 	 */
-
-	genfs_node_init(vp, &ffs_genfsops);
 	ip->i_devvp = ump->um_devvp;
 	VREF(ip->i_devvp);
 	/*
@@ -1210,7 +1199,6 @@ retry:
 		ip->i_ffs_uid = ip->i_din.ffs_din.di_ouid;	/* XXX */
 		ip->i_ffs_gid = ip->i_din.ffs_din.di_ogid;	/* XXX */
 	}							/* XXX */
-	uvm_vnp_setsize(vp, ip->i_ffs_size);
 
 	*vpp = vp;
 	return (0);
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
index 3794d5e8049..1020b14a2bb 100644
--- a/sys/ufs/ffs/ffs_vnops.c
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ffs_vnops.c,v 1.24 2001/12/10 04:45:32 art Exp $	*/
+/*	$OpenBSD: ffs_vnops.c,v 1.25 2001/12/19 08:58:07 art Exp $	*/
 /*	$NetBSD: ffs_vnops.c,v 1.7 1996/05/11 18:27:24 mycroft Exp $	*/
 
 /*
@@ -107,12 +107,8 @@ struct vnodeopv_entry_desc ffs_vnodeop_entries[] = {
 	{ &vop_advlock_desc, ufs_advlock },		/* advlock */
 	{ &vop_reallocblks_desc, ffs_reallocblks },	/* reallocblks */
 	{ &vop_bwrite_desc, vop_generic_bwrite },
-	{ &vop_getpages_desc, genfs_getpages },
-	{ &vop_putpages_desc, genfs_putpages },
-	{ &vop_mmap_desc, ufs_mmap },
-	{ NULL, NULL }
+	{ (struct vnodeop_desc*)NULL, (int(*) __P((void*)))NULL }
 };
-
 struct vnodeopv_desc ffs_vnodeop_opv_desc =
 	{ &ffs_vnodeop_p, ffs_vnodeop_entries };
 
@@ -233,7 +229,6 @@ ffs_fsync(v)
 	struct vnode *vp = ap->a_vp;
 	struct buf *bp, *nbp;
 	int s, error, passes, skipmeta;
-	struct uvm_object *uobj;
 
 	if (vp->v_type == VBLK &&
 	    vp->v_specmountpoint != NULL &&
@@ -241,22 +236,13 @@ ffs_fsync(v)
 		softdep_fsync_mountdev(vp);
 
 	/*
-	 * Flush all dirty data associated with a vnode.
+	 * Flush all dirty buffers associated with a vnode.
 	 */
 	passes = NIADDR + 1;
 	skipmeta = 0;
 	if (ap->a_waitfor == MNT_WAIT)
 		skipmeta = 1;
 	s = splbio();
-
-	if (vp->v_type == VREG) {
-		uobj = &vp->v_uobj;
-		simple_lock(&uobj->vmobjlock);
-		(uobj->pgops->pgo_flush)(uobj, 0, 0, PGO_ALLPAGES|PGO_CLEANIT|
-		    ((ap->a_waitfor == MNT_WAIT) ? PGO_SYNCIO : 0));
-		simple_unlock(&uobj->vmobjlock);
-	}
-
 loop:
 	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp;
 	     bp = LIST_NEXT(bp, b_vnbufs))
@@ -295,10 +281,8 @@ loop:
 		 */
 		if (passes > 0 || ap->a_waitfor != MNT_WAIT)
 			(void) bawrite(bp);
-		else if ((error = bwrite(bp)) != 0) {
-			printf("ffs_fsync: bwrite failed %d\n", error);
+		else if ((error = bwrite(bp)) != 0)
 			return (error);
-		}
 		s = splbio();
 		/*
 		 * Since we may have slept during the I/O, we need
@@ -341,11 +325,7 @@ loop:
 		}
 	}
 	splx(s);
-	
-	error = (UFS_UPDATE(VTOI(vp), ap->a_waitfor == MNT_WAIT));
-	if (error)
-		printf("ffs_fsync: UFS_UPDATE failed. %d\n", error);
-	return (error);
+	return (UFS_UPDATE(VTOI(vp), ap->a_waitfor == MNT_WAIT));
 }
 
 /*
@@ -369,23 +349,3 @@ ffs_reclaim(v)
 	vp->v_data = NULL;
 	return (0);
 }
-
-/*
- * Return the last logical file offset that should be written for this file
- * if we're doing a write that ends at "size".
- */
-void
-ffs_gop_size(struct vnode *vp, off_t size, off_t *eobp)
-{
-	struct inode *ip = VTOI(vp);
-	struct fs *fs = ip->i_fs;
-	ufs_lbn_t olbn, nlbn;
-
-	olbn = lblkno(fs, ip->i_ffs_size);
-	nlbn = lblkno(fs, size);
-	if (nlbn < NDADDR && olbn <= nlbn) {
-		*eobp = fragroundup(fs, size);
-	} else {
-		*eobp = blkroundup(fs, size);
-	}
-}
diff --git a/sys/ufs/mfs/mfs_vnops.c b/sys/ufs/mfs/mfs_vnops.c
index cacf6fce8ee..619e5327c26 100644
--- a/sys/ufs/mfs/mfs_vnops.c
+++ b/sys/ufs/mfs/mfs_vnops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: mfs_vnops.c,v 1.17 2001/12/04 22:44:32 art Exp $	*/
+/*	$OpenBSD: mfs_vnops.c,v 1.18 2001/12/19 08:58:07 art Exp $	*/
 /*	$NetBSD: mfs_vnops.c,v 1.8 1996/03/17 02:16:32 christos Exp $	*/
 
 /*
@@ -93,8 +93,7 @@ struct vnodeopv_entry_desc mfs_vnodeop_entries[] = {
 	{ &vop_pathconf_desc, mfs_pathconf },		/* pathconf */
 	{ &vop_advlock_desc, mfs_advlock },		/* advlock */
 	{ &vop_bwrite_desc, mfs_bwrite },		/* bwrite */
-	{ &vop_mmap_desc, mfs_mmap },
-	{ NULL, NULL }
+	{ (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
 };
 struct vnodeopv_desc mfs_vnodeop_opv_desc =
 	{ &mfs_vnodeop_p, mfs_vnodeop_entries };
diff --git a/sys/ufs/mfs/mfsnode.h b/sys/ufs/mfs/mfsnode.h
index dbf8b7dd2c2..0ea03f72d90 100644
--- a/sys/ufs/mfs/mfsnode.h
+++ b/sys/ufs/mfs/mfsnode.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: mfsnode.h,v 1.8 2001/12/04 22:44:32 art Exp $	*/
+/*	$OpenBSD: mfsnode.h,v 1.9 2001/12/19 08:58:07 art Exp $	*/
 /*	$NetBSD: mfsnode.h,v 1.3 1996/02/09 22:31:31 christos Exp $	*/
 
 /*
@@ -87,4 +87,3 @@ struct mfsnode {
 #define	mfs_truncate	mfs_badop
 #define	mfs_update	nullop
 #define	mfs_bwrite	vop_generic_bwrite
-#define mfs_mmap	mfs_badop
-\ No newline at end of file
diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h
index eb3f0069790..52a78783351 100644
--- a/sys/ufs/ufs/inode.h
+++ b/sys/ufs/ufs/inode.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: inode.h,v 1.18 2001/12/10 04:45:32 art Exp $	*/
+/*	$OpenBSD: inode.h,v 1.19 2001/12/19 08:58:07 art Exp $	*/
 /*	$NetBSD: inode.h,v 1.8 1995/06/15 23:22:50 cgd Exp $	*/
 
 /*
@@ -45,7 +45,6 @@
 #include <ufs/ufs/dinode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ext2fs/ext2fs_dinode.h>
-#include <miscfs/genfs/genfs.h>
 
 typedef long ufs_lbn_t;
 
@@ -67,7 +66,6 @@ struct ext2fs_inode_ext {
  * active, and is put back when the file is no longer being used.
  */
 struct inode {
-	struct genfs_node i_gnode;
 	LIST_ENTRY(inode) i_hash; /* Hash chain */
 	struct	vnode  *i_vnode;/* Vnode associated with this inode. */
 	struct	vnode  *i_devvp;/* Vnode for block I/O. */
@@ -86,7 +84,6 @@ struct inode {
 #define i_e2fs  inode_u.e2fs
 
 	struct   cluster_info i_ci;
-	LIST_HEAD(,buf) i_pcbufhd;
 	struct	 dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */
 	u_quad_t i_modrev;	/* Revision level for NFS lease. */
 	struct	 lockf *i_lockf;/* Head of byte-level lock list. */
diff --git a/sys/ufs/ufs/ufs_bmap.c b/sys/ufs/ufs/ufs_bmap.c
index fdf5c1be055..fa060e3c6b8 100644
--- a/sys/ufs/ufs/ufs_bmap.c
+++ b/sys/ufs/ufs/ufs_bmap.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ufs_bmap.c,v 1.11 2001/11/27 05:27:12 art Exp $	*/
+/*	$OpenBSD: ufs_bmap.c,v 1.12 2001/12/19 08:58:07 art Exp $	*/
 /*	$NetBSD: ufs_bmap.c,v 1.3 1996/02/09 22:36:00 christos Exp $	*/
 
 /*
@@ -233,7 +233,6 @@ ufs_getlbns(vp, bn, ap, nump)
 	long metalbn, realbn;
 	struct ufsmount *ump;
 	int64_t blockcnt;
-	int lbc;
 	int i, numlevels, off;
 
 	ump = VFSTOUFS(vp->v_mount);
@@ -261,14 +260,10 @@ ufs_getlbns(vp, bn, ap, nump)
 	 * at the given level of indirection, and NIADDR - i is the number
 	 * of levels of indirection needed to locate the requested block.
 	 */
-	bn -= NDADDR;
-	for (lbc = 0, i = NIADDR;; i--, bn -= blockcnt) {
+	for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) {
 		if (i == 0)
 			return (EFBIG);
-
-		lbc += ump->um_lognindir;
-		blockcnt = (int64_t)1 << lbc;
-
+		blockcnt *= MNINDIR(ump);
 		if (bn < blockcnt)
 			break;
 	}
@@ -294,9 +289,8 @@ ufs_getlbns(vp, bn, ap, nump)
 		if (metalbn == realbn)
 			break;
 
-		lbc -= ump->um_lognindir;
-		blockcnt = (int64_t)1 << lbc;
-		off = (bn >> lbc) & (MNINDIR(ump) - 1);
+		blockcnt /= MNINDIR(ump);
+		off = (bn / blockcnt) % MNINDIR(ump);
 
 		++numlevels;
 		ap->in_lbn = metalbn;
diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h
index 85df8cf99ec..894187d0b7b 100644
--- a/sys/ufs/ufs/ufs_extern.h
+++ b/sys/ufs/ufs/ufs_extern.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ufs_extern.h,v 1.15 2001/12/10 04:45:32 art Exp $	*/
+/*	$OpenBSD: ufs_extern.h,v 1.16 2001/12/19 08:58:07 art Exp $	*/
 /*	$NetBSD: ufs_extern.h,v 1.5 1996/02/09 22:36:03 christos Exp $	*/
 
 /*-
@@ -78,6 +78,7 @@ int	 ufs_lock	__P((void *));
 int	 ufs_lookup	__P((void *));
 int	 ufs_mkdir	__P((void *));
 int	 ufs_mknod	__P((void *));
+int	 ufs_mmap	__P((void *));
 int	 ufs_open	__P((void *));
 int	 ufs_pathconf	__P((void *));
 int	 ufs_print	__P((void *));
@@ -98,7 +99,6 @@ int	 ufs_whiteout	__P((void *));
 int	 ufsspec_close	__P((void *));
 int	 ufsspec_read	__P((void *));
 int	 ufsspec_write	__P((void *));
-#define	 ufs_mmap vop_generic_mmap
 
 #ifdef FIFO
 int	ufsfifo_read	__P((void *));
@@ -121,7 +121,6 @@ void ufs_ihashrem __P((struct inode *));
 /* ufs_inode.c */
 int ufs_init __P((struct vfsconf *));
 int ufs_reclaim __P((struct vnode *, struct proc *));
-int ufs_balloc_range __P((struct vnode *, off_t, off_t, struct ucred *, int));
 
 /* ufs_lookup.c */
 void ufs_dirbad __P((struct inode *, doff_t, char *));
diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c
index 61ec4eeede9..dd2c6574d30 100644
--- a/sys/ufs/ufs/ufs_inode.c
+++ b/sys/ufs/ufs/ufs_inode.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ufs_inode.c,v 1.15 2001/12/10 04:45:32 art Exp $	*/
+/*	$OpenBSD: ufs_inode.c,v 1.16 2001/12/19 08:58:07 art Exp $	*/
 /*	$NetBSD: ufs_inode.c,v 1.7 1996/05/11 18:27:52 mycroft Exp $	*/
 
 /*
@@ -101,9 +101,7 @@ ufs_inactive(v)
 		if (getinoquota(ip) != 0)
 			(void)ufs_quota_free_inode(ip, NOCRED);
 
-		if (ip->i_ffs_size != 0) {
-			(void) UFS_TRUNCATE(ip, (off_t)0, 0, NOCRED);
-		}
+		(void) UFS_TRUNCATE(ip, (off_t)0, 0, NOCRED);
 		ip->i_ffs_rdev = 0;
 		mode = ip->i_ffs_mode;
 		ip->i_ffs_mode = 0;
@@ -153,153 +151,3 @@ ufs_reclaim(vp, p)
 	ufs_quota_delete(ip);
 	return (0);
 }
-
-/*
- * allocate a range of blocks in a file.
- * after this function returns, any page entirely contained within the range
- * will map to invalid data and thus must be overwritten before it is made
- * accessible to others.
- */
-
-int
-ufs_balloc_range(vp, off, len, cred, flags)
-	struct vnode *vp;
-	off_t off, len;
-	struct ucred *cred;
-	int flags;
-{
-	off_t oldeof, neweof, oldeob, neweob, oldpagestart, pagestart;
-	struct uvm_object *uobj;
-	struct genfs_node *gp = VTOG(vp);
-	int i, delta, error, npages1, npages2;
-	int bshift = vp->v_mount->mnt_fs_bshift;
-	int bsize = 1 << bshift;
-	int ppb = MAX(bsize >> PAGE_SHIFT, 1);
-	struct vm_page *pgs1[ppb], *pgs2[ppb];
-	UVMHIST_FUNC("ufs_balloc_range"); UVMHIST_CALLED(ubchist);
-	UVMHIST_LOG(ubchist, "vp %p off 0x%x len 0x%x u_size 0x%x",
-		    vp, off, len, vp->v_size);
-
-	oldeof = vp->v_size;
-	GOP_SIZE(vp, oldeof, &oldeob);
-
-	neweof = MAX(vp->v_size, off + len);
-	GOP_SIZE(vp, neweof, &neweob);
-
-	error = 0;
-	uobj = &vp->v_uobj;
-	pgs1[0] = pgs2[0] = NULL;
-
-	/*
-	 * if the last block in the file is not a full block (ie. it is a
-	 * fragment), and this allocation is causing the fragment to change
-	 * size (either to expand the fragment or promote it to a full block),
-	 * cache the old last block (at its new size).
-	 */
-
-	oldpagestart = trunc_page(oldeof) & ~(bsize - 1);
-	if ((oldeob & (bsize - 1)) != 0 && oldeob != neweob) {
-		npages1 = MIN(ppb, (round_page(neweob) - oldpagestart) >>
-			      PAGE_SHIFT);
-		memset(pgs1, 0, npages1 * sizeof(struct vm_page *));
-		simple_lock(&uobj->vmobjlock);
-		error = VOP_GETPAGES(vp, oldpagestart, pgs1, &npages1,
-		    0, VM_PROT_READ, 0, PGO_SYNCIO|PGO_PASTEOF);
-		if (error) {
-			goto out;
-		}
-		simple_lock(&uobj->vmobjlock);
-		uvm_lock_pageq();
-		for (i = 0; i < npages1; i++) {
-			UVMHIST_LOG(ubchist, "got pgs1[%d] %p", i, pgs1[i],0,0);
-			KASSERT((pgs1[i]->flags & PG_RELEASED) == 0);
-			pgs1[i]->flags &= ~PG_CLEAN;
-			uvm_pageactivate(pgs1[i]);
-		}
-		uvm_unlock_pageq();
-		simple_unlock(&uobj->vmobjlock);
-	}
-
-	/*
-	 * cache the new range as well.  this will create zeroed pages
-	 * where the new block will be and keep them locked until the
-	 * new block is allocated, so there will be no window where
-	 * the old contents of the new block is visible to racing threads.
-	 */
-
-	pagestart = trunc_page(off) & ~(bsize - 1);
-	if (pagestart != oldpagestart || pgs1[0] == NULL) {
-		npages2 = MIN(ppb, (round_page(neweob) - pagestart) >>
-			      PAGE_SHIFT);
-		memset(pgs2, 0, npages2 * sizeof(struct vm_page *));
-		simple_lock(&uobj->vmobjlock);
-		error = VOP_GETPAGES(vp, pagestart, pgs2, &npages2, 0,
-		    VM_PROT_READ, 0, PGO_SYNCIO|PGO_PASTEOF);
-		if (error) {
-			goto out;
-		}
-		simple_lock(&uobj->vmobjlock);
-		uvm_lock_pageq();
-		for (i = 0; i < npages2; i++) {
-			UVMHIST_LOG(ubchist, "got pgs2[%d] %p", i, pgs2[i],0,0);
-			KASSERT((pgs2[i]->flags & PG_RELEASED) == 0);
-			pgs2[i]->flags &= ~PG_CLEAN;
-			uvm_pageactivate(pgs2[i]);
-		}
-		uvm_unlock_pageq();
-		simple_unlock(&uobj->vmobjlock);
-	}
-
-	/*
-	 * adjust off to be block-aligned.
-	 */
-
-	delta = off & (bsize - 1);
-	off -= delta;
-	len += delta;
-
-	/*
-	 * now allocate the range.
-	 */
-
-	lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL, curproc);
-	error = GOP_ALLOC(vp, off, len, flags, cred);
-	lockmgr(&gp->g_glock, LK_RELEASE, NULL, curproc);
-
-	/*
-	 * clear PG_RDONLY on any pages we are holding
-	 * (since they now have backing store) and unbusy them.
-	 * if we got an error, free any pages we created past the old eob.
-	 */
-
-out:
-	simple_lock(&uobj->vmobjlock);
-	if (error) {
-		(void) (uobj->pgops->pgo_flush)(uobj, round_page(oldeob), 0,
-		    PGO_FREE);
-	}
-	if (pgs1[0] != NULL) {
-		for (i = 0; i < npages1; i++) {
-			pgs1[i]->flags &= ~PG_RDONLY;
-		}
-		uvm_page_unbusy(pgs1, npages1);
-
-		/*
-		 * The data in the frag might be moving to a new disk location.
-		 * We need to flush pages to the new disk locations.
-		 */
-
-		if ((flags & B_SYNC) == 0)
-			(*uobj->pgops->pgo_flush)(uobj, oldeof & ~(bsize - 1),
-			    MIN((oldeof + bsize) & ~(bsize - 1), neweof),
-			    PGO_CLEANIT | PGO_SYNCIO);
-	}
-	if (pgs2[0] != NULL) {
-		for (i = 0; i < npages2; i++) {
-			pgs2[i]->flags &= ~PG_RDONLY;
-		}
-		uvm_page_unbusy(pgs2, npages2);
-	}
-	simple_unlock(&uobj->vmobjlock);
-	return error;
-}
diff --git a/sys/ufs/ufs/ufs_readwrite.c b/sys/ufs/ufs/ufs_readwrite.c
index 50e4657c6e7..5b562568de7 100644
--- a/sys/ufs/ufs/ufs_readwrite.c
+++ b/sys/ufs/ufs/ufs_readwrite.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ufs_readwrite.c,v 1.22 2001/12/10 03:04:58 art Exp $	*/
+/*	$OpenBSD: ufs_readwrite.c,v 1.23 2001/12/19 08:58:07 art Exp $	*/
 /*	$NetBSD: ufs_readwrite.c,v 1.9 1996/05/11 18:27:57 mycroft Exp $	*/
 
 /*-
@@ -76,22 +76,21 @@ READ(v)
 		int a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap = v;
-	struct vnode *vp;
-	struct inode *ip;
-	struct uio *uio;
-	FS *fs;
-	void *win;
-	vsize_t bytelen;
+	register struct vnode *vp;
+	register struct inode *ip;
+	register struct uio *uio;
+	register FS *fs;
 	struct buf *bp;
 	daddr_t lbn, nextlbn;
 	off_t bytesinfile;
 	long size, xfersize, blkoffset;
 	int error;
+	u_short mode;
 
 	vp = ap->a_vp;
 	ip = VTOI(vp);
+	mode = ip->i_ffs_mode;
 	uio = ap->a_uio;
-	error = 0;
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_READ)
@@ -111,24 +110,6 @@ READ(v)
 
 	if (uio->uio_resid == 0)
 		return (0);
-	if (uio->uio_offset >= ip->i_ffs_size)
-		goto out;
-
-	if (vp->v_type == VREG) {
-		while (uio->uio_resid > 0) {
-			bytelen = min(ip->i_ffs_size - uio->uio_offset,
-			    uio->uio_resid);
-			if (bytelen == 0)
-				break;
-			win = ubc_alloc(&vp->v_uobj, uio->uio_offset,
-			    &bytelen, UBC_READ);
-			error = uiomove(win, bytelen, uio);
-			ubc_release(win, 0);
-			if (error)
-			        break;
-		}
-		goto out;
-	}
 
 	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
 		if ((bytesinfile = ip->i_ffs_size - uio->uio_offset) <= 0)
@@ -143,14 +124,23 @@ READ(v)
 		if (bytesinfile < xfersize)
 			xfersize = bytesinfile;
 
+#ifdef LFS_READWRITE
+		(void)lfs_check(vp, lbn);
+		error = cluster_read(vp, &ip->i_ci, ip->i_ffs_size, lbn, 
+		    size, NOCRED, &bp);
+#else
 		if (lblktosize(fs, nextlbn) >= ip->i_ffs_size)
 			error = bread(vp, lbn, size, NOCRED, &bp);
+		else if (doclusterread)
+			error = cluster_read(vp, &ip->i_ci,
+			    ip->i_ffs_size, lbn, size, NOCRED, &bp);
 		else if (lbn - 1 == ip->i_ci.ci_lastr) {
 			int nextsize = BLKSIZE(fs, ip, nextlbn);
 			error = breadn(vp, lbn,
 			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
 		} else
 			error = bread(vp, lbn, size, NOCRED, &bp);
+#endif
 		if (error)
 			break;
 		ip->i_ci.ci_lastr = lbn;
@@ -168,7 +158,7 @@ READ(v)
 				break;
 			xfersize = size;
 		}
-		error = uiomove((char *)bp->b_data + blkoffset, xfersize,
+		error = uiomove((char *)bp->b_data + blkoffset, (int)xfersize,
 				uio);
 		if (error)
 			break;
@@ -176,7 +166,6 @@ READ(v)
 	}
 	if (bp != NULL)
 		brelse(bp);
-out:
 	ip->i_flag |= IN_ACCESS;
 	return (error);
 }
@@ -194,19 +183,15 @@ WRITE(v)
 		int a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap = v;
-	struct vnode *vp;
-	struct uio *uio;
-	struct inode *ip;
-	FS *fs;
+	register struct vnode *vp;
+	register struct uio *uio;
+	register struct inode *ip;
+	register FS *fs;
 	struct buf *bp;
 	struct proc *p;
 	daddr_t lbn;
 	off_t osize;
 	int blkoffset, error, extended, flags, ioflag, resid, size, xfersize;
-	void *win;
-	vsize_t bytelen;
-	off_t oldoff;
-	boolean_t rv;
 
 	extended = 0;
 	ioflag = ap->a_ioflag;
@@ -254,76 +239,9 @@ WRITE(v)
 
 	resid = uio->uio_resid;
 	osize = ip->i_ffs_size;
-	error = 0;
-
-	if (vp->v_type != VREG)
-		goto bcache;
-
-	while (uio->uio_resid > 0) {
-		struct uvm_object *uobj = &vp->v_uobj;
-		oldoff = uio->uio_offset;
-		blkoffset = blkoff(fs, uio->uio_offset);
-		bytelen = min(fs->fs_bsize - blkoffset, uio->uio_resid);
- 
-		/*
-		 * XXXUBC if file is mapped and this is the last block,
-		 * process one page at a time.
-		 */
-
-		error = ufs_balloc_range(vp, uio->uio_offset, bytelen,
-		    ap->a_cred, ioflag & IO_SYNC ? B_SYNC : 0);
-		if (error) {
-			return error;
-		}
-
-		win = ubc_alloc(uobj, uio->uio_offset, &bytelen, UBC_WRITE);
-		error = uiomove(win, bytelen, uio);
-		ubc_release(win, 0);
-
-		/*
-		 * flush what we just wrote if necessary.
-		 * XXXUBC simplistic async flushing.
-		 */
-
-		if (ioflag & IO_SYNC) {
-			simple_lock(&uobj->vmobjlock);
-#if 1
-			/*
-			 * XXX 
-			 * flush whole blocks in case there are deps.
-			 * otherwise we can dirty and flush part of
-			 * a block multiple times and the softdep code
-			 * will get confused.  fixing this the right way
-			 * is complicated so we'll work around it for now.
-			 */
-                      
-			rv = uobj->pgops->pgo_flush(
-			    uobj, oldoff & ~(fs->fs_bsize - 1),
-			    (oldoff + bytelen + fs->fs_bsize - 1) &
-			    ~(fs->fs_bsize - 1),
-			    PGO_CLEANIT|PGO_SYNCIO);
-#else
-			rv = uobj->pgops->pgo_flush(
-			    uobj, oldoff, oldoff + bytelen,
-			    PGO_CLEANIT|PGO_SYNCIO);
-#endif
-			simple_unlock(uobj->vmobjlock);
-		} else if (oldoff >> 16 != uio->uio_offset >> 16) {
-			simple_lock(&uobj->vmobjlock);
-			rv = uobj->pgops->pgo_flush(uobj,
-			    (oldoff >> 16) << 16,
-			    (uio->uio_offset >> 16) << 16, PGO_CLEANIT);
-			simple_unlock(&uobj->vmobjlock);
-		}
-		if (error) {
-			break;
-		}
-	}
-	goto out;
-
-bcache:
 	flags = ioflag & IO_SYNC ? B_SYNC : 0;
-	while (uio->uio_resid > 0) {
+
+	for (error = 0; uio->uio_resid > 0;) {
 		lbn = lblkno(fs, uio->uio_offset);
 		blkoffset = blkoff(fs, uio->uio_offset);
 		xfersize = fs->fs_bsize - blkoffset;
@@ -342,16 +260,21 @@ bcache:
 			uvm_vnp_setsize(vp, ip->i_ffs_size);
 			extended = 1;
 		}
+		(void)uvm_vnp_uncache(vp);
 
 		size = BLKSIZE(fs, ip, lbn) - bp->b_resid;
 		if (size < xfersize)
 			xfersize = size;
 
-		error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
+		error =
+		    uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
 
 		if (error != 0)
 			bzero((char *)bp->b_data + blkoffset, xfersize);
 
+#ifdef LFS_READWRITE
+		(void)VOP_BWRITE(bp);
+#else
 		if (ioflag & IO_SYNC)
 			(void)bwrite(bp);
 		else if (xfersize + blkoffset == fs->fs_bsize) {
@@ -361,16 +284,16 @@ bcache:
 				bawrite(bp);
 		} else
 			bdwrite(bp);
+#endif
 		if (error || xfersize == 0)
 			break;
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 	}
 	/*
 	 * If we successfully wrote any data, and we are not the superuser
 	 * we clear the setuid and setgid bits as a precaution against
 	 * tampering.
 	 */
-out:
-	ip->i_flag |= IN_CHANGE | IN_UPDATE;
 	if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
 		ip->i_ffs_mode &= ~(ISUID | ISGID);
 	if (resid > uio->uio_resid)
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index e61259fa820..cb6060f1cc7 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ufs_vnops.c,v 1.41 2001/12/04 22:44:32 art Exp $	*/
+/*	$OpenBSD: ufs_vnops.c,v 1.42 2001/12/19 08:58:07 art Exp $	*/
 /*	$NetBSD: ufs_vnops.c,v 1.18 1996/05/11 18:28:04 mycroft Exp $	*/
 
 /*
@@ -469,6 +469,8 @@ ufs_chmod(vp, mode, cred, p)
 	ip->i_ffs_mode &= ~ALLPERMS;
 	ip->i_ffs_mode |= (mode & ALLPERMS);
 	ip->i_flag |= IN_CHANGE;
+	if ((vp->v_flag & VTEXT) && (ip->i_ffs_mode & S_ISTXT) == 0)
+		(void) uvm_vnp_uncache(vp);
 	return (0);
 }
 
diff --git a/sys/ufs/ufs/ufsmount.h b/sys/ufs/ufs/ufsmount.h
index 981eb21474b..847ee1558e8 100644
--- a/sys/ufs/ufs/ufsmount.h
+++ b/sys/ufs/ufs/ufsmount.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ufsmount.h,v 1.6 2001/11/27 05:27:12 art Exp $	*/
+/*	$OpenBSD: ufsmount.h,v 1.7 2001/12/19 08:58:07 art Exp $	*/
 /*	$NetBSD: ufsmount.h,v 1.4 1994/12/21 20:00:23 mycroft Exp $	*/
 
 /*
@@ -64,7 +64,6 @@ struct ufsmount {
 	struct	vnode *um_quotas[MAXQUOTAS];	/* pointer to quota files */
 	struct	ucred *um_cred[MAXQUOTAS];	/* quota file access cred */
 	u_long	um_nindir;			/* indirect ptrs per block */
-	u_long	um_lognindir;			/* log2 of um_nindir */
 	u_long	um_bptrtodb;			/* indir ptr to disk block */
 	u_long	um_seqinc;			/* inc between seq blocks */
 	time_t	um_btime[MAXQUOTAS];		/* block quota time limit */
diff --git a/sys/uvm/uvm.h b/sys/uvm/uvm.h
index 92d420cd160..b2216fcc92f 100644
--- a/sys/uvm/uvm.h
+++ b/sys/uvm/uvm.h
@@ -1,5 +1,5 @@
-/*	$OpenBSD: uvm.h,v 1.15 2001/11/28 19:28:14 art Exp $	*/
-/*	$NetBSD: uvm.h,v 1.30 2001/06/27 21:18:34 thorpej Exp $	*/
+/*	$OpenBSD: uvm.h,v 1.16 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm.h,v 1.24 2000/11/27 08:40:02 chs Exp $	*/
 
 /*
  *
@@ -38,12 +38,6 @@
 #ifndef _UVM_UVM_H_
 #define _UVM_UVM_H_
 
-#if defined(_KERNEL_OPT)
-#include "opt_lockdebug.h"
-#include "opt_multiprocessor.h"
-#include "opt_uvmhist.h"
-#endif
-
 #include <uvm/uvm_extern.h>
 
 #include <uvm/uvm_stat.h>
@@ -83,11 +77,11 @@ struct uvm {
 
 		/* vm_page queues */
 	struct pgfreelist page_free[VM_NFREELIST]; /* unallocated pages */
-	int page_free_nextcolor;	/* next color to allocate from */
 	struct pglist page_active;	/* allocated pages, in use */
-	struct pglist page_inactive;	/* pages between the clock hands */
-	struct simplelock pageqlock;	/* lock for active/inactive page q */
-	struct simplelock fpageqlock;	/* lock for free page q */
+	struct pglist page_inactive_swp;/* pages inactive (reclaim or free) */
+	struct pglist page_inactive_obj;/* pages inactive (reclaim or free) */
+	simple_lock_data_t pageqlock;	/* lock for active/inactive page q */
+	simple_lock_data_t fpageqlock;	/* lock for free page q */
 	boolean_t page_init_done;	/* TRUE if uvm_page_init() finished */
 	boolean_t page_idle_zero;	/* TRUE if we should try to zero
 					   pages in the idle loop */
@@ -95,26 +89,26 @@ struct uvm {
 		/* page daemon trigger */
 	int pagedaemon;			/* daemon sleeps on this */
 	struct proc *pagedaemon_proc;	/* daemon's pid */
-	struct simplelock pagedaemon_lock;
+	simple_lock_data_t pagedaemon_lock;
 
 		/* aiodone daemon trigger */
 	int aiodoned;			/* daemon sleeps on this */
 	struct proc *aiodoned_proc;	/* daemon's pid */
-	struct simplelock aiodoned_lock;
+	simple_lock_data_t aiodoned_lock;
 
 		/* page hash */
 	struct pglist *page_hash;	/* page hash table (vp/off->page) */
 	int page_nhash;			/* number of buckets */
 	int page_hashmask;		/* hash mask */
-	struct simplelock hashlock;	/* lock on page_hash array */
+	simple_lock_data_t hashlock;	/* lock on page_hash array */
 
 	/* anon stuff */
 	struct vm_anon *afree;		/* anon free list */
-	struct simplelock afreelock; 	/* lock on anon free list */
+	simple_lock_data_t afreelock; 	/* lock on anon free list */
 
 	/* static kernel map entry pool */
-	struct vm_map_entry *kentry_free;	/* free page pool */
-	struct simplelock kentry_lock;
+	vm_map_entry_t kentry_free;	/* free page pool */
+	simple_lock_data_t kentry_lock;
 
 	/* aio_done is locked by uvm.pagedaemon_lock and splbio! */
 	TAILQ_HEAD(, buf) aio_done;		/* done async i/o reqs */
@@ -124,7 +118,7 @@ struct uvm {
 	vaddr_t pager_eva;		/* end of pager VA area */
 
 	/* swap-related items */
-	struct simplelock swap_data_lock;
+	simple_lock_data_t swap_data_lock;
 
 	/* kernel object: to support anonymous pageable kernel memory */
 	struct uvm_object *kernel_object;
@@ -171,20 +165,6 @@ do {									\
 } while (0)
 
 /*
- * UVM_KICK_PDAEMON: perform checks to determine if we need to
- * give the pagedaemon a nudge, and do so if necessary.
- */
-
-#define	UVM_KICK_PDAEMON()						\
-do {									\
-	if (uvmexp.free + uvmexp.paging < uvmexp.freemin ||		\
-	    (uvmexp.free + uvmexp.paging < uvmexp.freetarg &&		\
-	     uvmexp.inactive < uvmexp.inactarg)) {			\
-		wakeup(&uvm.pagedaemon);				\
-	}								\
-} while (/*CONSTCOND*/0)
-
-/*
  * UVM_PAGE_OWN: track page ownership (only if UVM_PAGE_TRKOWN)
  */
 
diff --git a/sys/uvm/uvm_amap.c b/sys/uvm/uvm_amap.c
index a8a1a527367..29263bf7d60 100644
--- a/sys/uvm/uvm_amap.c
+++ b/sys/uvm/uvm_amap.c
@@ -1,5 +1,5 @@
-/*	$OpenBSD: uvm_amap.c,v 1.17 2001/12/04 23:22:42 art Exp $	*/
-/*	$NetBSD: uvm_amap.c,v 1.33 2001/07/22 13:34:12 wiz Exp $	*/
+/*	$OpenBSD: uvm_amap.c,v 1.18 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_amap.c,v 1.30 2001/02/18 21:19:09 chs Exp $	*/
 
 /*
  *
@@ -101,7 +101,7 @@ static struct vm_amap *amap_alloc1 __P((int, int, int));
  * chunk.    note that the "plus one" part is needed because a reference
  * count of zero is neither positive or negative (need a way to tell
  * if we've got one zero or a bunch of them).
- *
+ * 
  * here are some in-line functions to help us.
  */
 
@@ -157,7 +157,7 @@ amap_init()
 	 * Initialize the vm_amap pool.
 	 */
 	pool_init(&uvm_amap_pool, sizeof(struct vm_amap), 0, 0, 0,
-	    "amappl", 0, pool_page_alloc_nointr, pool_page_free_nointr,
+	    "amappl", 0, pool_page_alloc_nointr, pool_page_free_nointr, 
 	    M_UVMAMAP);
 }
 
@@ -283,7 +283,7 @@ amap_free(amap)
  */
 void
 amap_extend(entry, addsize)
-	struct vm_map_entry *entry;
+	vm_map_entry_t entry;
 	vsize_t addsize;
 {
 	struct vm_amap *amap = entry->aref.ar_amap;
@@ -324,7 +324,7 @@ amap_extend(entry, addsize)
 		}
 #endif
 		amap_unlock(amap);
-		UVMHIST_LOG(maphist,"<- done (case 1), amap = 0x%x, sltneed=%d",
+		UVMHIST_LOG(maphist,"<- done (case 1), amap = 0x%x, sltneed=%d", 
 		    amap, slotneed, 0, 0);
 		return;				/* done! */
 	}
@@ -337,10 +337,10 @@ amap_extend(entry, addsize)
 #ifdef UVM_AMAP_PPREF
 		if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
 			if ((slotoff + slotmapped) < amap->am_nslot)
-				amap_pp_adjref(amap, slotoff + slotmapped,
+				amap_pp_adjref(amap, slotoff + slotmapped, 
 				    (amap->am_nslot - (slotoff + slotmapped)),
 				    1);
-			pp_setreflen(amap->am_ppref, amap->am_nslot, 1,
+			pp_setreflen(amap->am_ppref, amap->am_nslot, 1, 
 			   slotneed - amap->am_nslot);
 		}
 #endif
@@ -350,7 +350,7 @@ amap_extend(entry, addsize)
 		 * no need to zero am_anon since that was done at
 		 * alloc time and we never shrink an allocation.
 		 */
-		UVMHIST_LOG(maphist,"<- done (case 2), amap = 0x%x, slotneed=%d",
+		UVMHIST_LOG(maphist,"<- done (case 2), amap = 0x%x, slotneed=%d", 
 		    amap, slotneed, 0, 0);
 		return;
 	}
@@ -359,7 +359,7 @@ amap_extend(entry, addsize)
 	 * case 3: we need to malloc a new amap and copy all the amap
 	 * data over from old amap to the new one.
 	 *
-	 * XXXCDC: could we take advantage of a kernel realloc()?
+	 * XXXCDC: could we take advantage of a kernel realloc()?  
 	 */
 
 	amap_unlock(amap);	/* unlock in case we sleep in malloc */
@@ -412,7 +412,7 @@ amap_extend(entry, addsize)
 		memset(newppref + amap->am_nslot, 0, sizeof(int) * slotadded);
 		amap->am_ppref = newppref;
 		if ((slotoff + slotmapped) < amap->am_nslot)
-			amap_pp_adjref(amap, slotoff + slotmapped,
+			amap_pp_adjref(amap, slotoff + slotmapped, 
 			    (amap->am_nslot - (slotoff + slotmapped)), 1);
 		pp_setreflen(newppref, amap->am_nslot, 1, slotadded);
 	}
@@ -433,7 +433,7 @@ amap_extend(entry, addsize)
 	if (oldppref && oldppref != PPREF_NONE)
 		free(oldppref, M_UVMAMAP);
 #endif
-	UVMHIST_LOG(maphist,"<- done (case 3), amap = 0x%x, slotneed=%d",
+	UVMHIST_LOG(maphist,"<- done (case 3), amap = 0x%x, slotneed=%d", 
 	    amap, slotneed, 0, 0);
 }
 
@@ -452,7 +452,7 @@ amap_extend(entry, addsize)
  */
 void
 amap_share_protect(entry, prot)
-	struct vm_map_entry *entry;
+	vm_map_entry_t entry;
 	vm_prot_t prot;
 {
 	struct vm_amap *amap = entry->aref.ar_amap;
@@ -489,7 +489,7 @@ amap_share_protect(entry, prot)
 /*
  * amap_wipeout: wipeout all anon's in an amap; then free the amap!
  *
- * => called from amap_unref when the final reference to an amap is
+ * => called from amap_unref when the final reference to an amap is 
  *	discarded (i.e. when reference count == 1)
  * => the amap should be locked (by the caller)
  */
@@ -511,12 +511,12 @@ amap_wipeout(amap)
 		slot = amap->am_slots[lcv];
 		anon = amap->am_anon[slot];
 
-		if (anon == NULL || anon->an_ref == 0)
+		if (anon == NULL || anon->an_ref == 0) 
 			panic("amap_wipeout: corrupt amap");
 
 		simple_lock(&anon->an_lock); /* lock anon */
 
-		UVMHIST_LOG(maphist,"  processing anon 0x%x, ref=%d", anon,
+		UVMHIST_LOG(maphist,"  processing anon 0x%x, ref=%d", anon, 
 		    anon->an_ref, 0, 0);
 
 		refs = --anon->an_ref;
@@ -542,7 +542,7 @@ amap_wipeout(amap)
 /*
  * amap_copy: ensure that a map entry's "needs_copy" flag is false
  *	by copying the amap if necessary.
- *
+ * 
  * => an entry with a null amap pointer will get a new (blank) one.
  * => the map that the map entry belongs to must be locked by caller.
  * => the amap currently attached to "entry" (if any) must be unlocked.
@@ -555,8 +555,8 @@ amap_wipeout(amap)
 
 void
 amap_copy(map, entry, waitf, canchunk, startva, endva)
-	struct vm_map *map;
-	struct vm_map_entry *entry;
+	vm_map_t map;
+	vm_map_entry_t entry;
 	int waitf;
 	boolean_t canchunk;
 	vaddr_t startva, endva;
@@ -595,7 +595,7 @@ amap_copy(map, entry, waitf, canchunk, startva, endva)
 				UVM_MAP_CLIP_END(map, entry, endva);
 		}
 
-		UVMHIST_LOG(maphist, "<- done [creating new amap 0x%x->0x%x]",
+		UVMHIST_LOG(maphist, "<- done [creating new amap 0x%x->0x%x]", 
 		entry->start, entry->end, 0, 0);
 		entry->aref.ar_pageoff = 0;
 		entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0,
@@ -626,7 +626,7 @@ amap_copy(map, entry, waitf, canchunk, startva, endva)
 	 * looks like we need to copy the map.
 	 */
 
-	UVMHIST_LOG(maphist,"  amap=%p, ref=%d, must copy it",
+	UVMHIST_LOG(maphist,"  amap=%p, ref=%d, must copy it", 
 	    entry->aref.ar_amap, entry->aref.ar_amap->am_ref, 0, 0);
 	AMAP_B2SLOT(slots, entry->end - entry->start);
 	amap = amap_alloc1(slots, 0, waitf);
@@ -683,7 +683,7 @@ amap_copy(map, entry, waitf, canchunk, startva, endva)
 		srcamap->am_flags &= ~AMAP_SHARED;   /* clear shared flag */
 #ifdef UVM_AMAP_PPREF
 	if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
-		amap_pp_adjref(srcamap, entry->aref.ar_pageoff,
+		amap_pp_adjref(srcamap, entry->aref.ar_pageoff, 
 		    (entry->end - entry->start) >> PAGE_SHIFT, -1);
 	}
 #endif
@@ -813,7 +813,7 @@ ReStart:
 				uvm_wait("cownowpage");
 				goto ReStart;
 			}
-
+	
 			/*
 			 * got it... now we can copy the data and replace anon
 			 * with our new one...
diff --git a/sys/uvm/uvm_amap.h b/sys/uvm/uvm_amap.h
index e6b071d5b63..811f121ea9e 100644
--- a/sys/uvm/uvm_amap.h
+++ b/sys/uvm/uvm_amap.h
@@ -1,5 +1,5 @@
-/*	$OpenBSD: uvm_amap.h,v 1.9 2001/11/28 19:28:14 art Exp $	*/
-/*	$NetBSD: uvm_amap.h,v 1.17 2001/06/02 18:09:25 chs Exp $	*/
+/*	$OpenBSD: uvm_amap.h,v 1.10 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_amap.h,v 1.14 2001/02/18 21:19:08 chs Exp $	*/
 
 /*
  *
@@ -60,7 +60,7 @@
 /*
  * forward definition of vm_amap structure.  only amap
  * implementation-specific code should directly access the fields of
- * this structure.
+ * this structure.  
  */
 
 struct vm_amap;
@@ -72,13 +72,13 @@ struct vm_amap;
 
 #ifdef UVM_AMAP_INLINE			/* defined/undef'd in uvm_amap.c */
 #define AMAP_INLINE static __inline	/* inline enabled */
-#else
+#else 
 #define AMAP_INLINE			/* inline disabled */
 #endif /* UVM_AMAP_INLINE */
 
 
 /*
- * prototypes for the amap interface
+ * prototypes for the amap interface 
  */
 
 AMAP_INLINE
@@ -88,16 +88,16 @@ void		amap_add 	/* add an anon to an amap */
 struct vm_amap	*amap_alloc	/* allocate a new amap */
 			__P((vaddr_t, vaddr_t, int));
 void		amap_copy	/* clear amap needs-copy flag */
-			__P((struct vm_map *, struct vm_map_entry *, int,
+			__P((vm_map_t, vm_map_entry_t, int, 
 			     boolean_t,	vaddr_t, vaddr_t));
 void		amap_cow_now	/* resolve all COW faults now */
-			__P((struct vm_map *, struct vm_map_entry *));
+			__P((vm_map_t, vm_map_entry_t));
 void		amap_extend	/* make amap larger */
-			__P((struct vm_map_entry *, vsize_t));
+			__P((vm_map_entry_t, vsize_t));
 int		amap_flags	/* get amap's flags */
 			__P((struct vm_amap *));
 void		amap_free	/* free amap */
-			__P((struct vm_amap *));
+			__P((struct vm_amap *)); 
 void		amap_init	/* init amap module (at boot time) */
 			__P((void));
 void		amap_lock	/* lock amap */
@@ -107,7 +107,7 @@ struct vm_anon	*amap_lookup	/* lookup an anon @ offset in amap */
 			__P((struct vm_aref *, vaddr_t));
 AMAP_INLINE
 void		amap_lookups	/* lookup multiple anons */
-			__P((struct vm_aref *, vaddr_t,
+			__P((struct vm_aref *, vaddr_t, 
 			     struct vm_anon **, int));
 AMAP_INLINE
 void		amap_ref	/* add a reference to an amap */
@@ -115,9 +115,9 @@ void		amap_ref	/* add a reference to an amap */
 int		amap_refs	/* get number of references of amap */
 			__P((struct vm_amap *));
 void		amap_share_protect /* protect pages in a shared amap */
-			__P((struct vm_map_entry *, vm_prot_t));
+			__P((vm_map_entry_t, vm_prot_t));
 void		amap_splitref	/* split reference to amap into two */
-			__P((struct vm_aref *, struct vm_aref *,
+			__P((struct vm_aref *, struct vm_aref *, 
 			     vaddr_t));
 AMAP_INLINE
 void		amap_unadd	/* remove an anon from an amap */
@@ -159,7 +159,7 @@ void		amap_wipeout	/* remove all anons from amap */
  */
 
 struct vm_amap {
-	struct simplelock am_l; /* simple lock [locks all vm_amap fields] */
+	simple_lock_data_t am_l; /* simple lock [locks all vm_amap fields] */
 	int am_ref;		/* reference count */
 	int am_flags;		/* flags */
 	int am_maxslot;		/* max # of slots allocated */
@@ -177,7 +177,7 @@ struct vm_amap {
  * note that am_slots, am_bckptr, and am_anon are arrays.   this allows
  * fast lookup of pages based on their virual address at the expense of
  * some extra memory.   in the future we should be smarter about memory
- * usage and fall back to a non-array based implementation on systems
+ * usage and fall back to a non-array based implementation on systems 
  * that are short of memory (XXXCDC).
  *
  * the entries in the array are called slots... for example an amap that
@@ -185,13 +185,13 @@ struct vm_amap {
  * is an example of the array usage for a four slot amap.   note that only
  * slots one and three have anons assigned to them.  "D/C" means that we
  * "don't care" about the value.
- *
+ * 
  *            0     1      2     3
  * am_anon:   NULL, anon0, NULL, anon1		(actual pointers to anons)
  * am_bckptr: D/C,  1,     D/C,  0		(points to am_slots entry)
  *
  * am_slots:  3, 1, D/C, D/C    		(says slots 3 and 1 are in use)
- *
+ * 
  * note that am_bckptr is D/C if the slot in am_anon is set to NULL.
  * to find the entry in am_slots for an anon, look at am_bckptr[slot],
  * thus the entry for slot 3 in am_slots[] is at am_slots[am_bckptr[3]].
@@ -203,7 +203,7 @@ struct vm_amap {
 
 /*
  * defines for handling of large sparce amaps:
- *
+ * 
  * one of the problems of array-based amaps is that if you allocate a
  * large sparcely-used area of virtual memory you end up allocating
  * large arrays that, for the most part, don't get used.  this is a
@@ -216,15 +216,15 @@ struct vm_amap {
  * it makes sense for it to live in an amap, but if we allocated an
  * amap for the entire stack range we could end up wasting a large
  * amount of malloc'd KVM.
- *
- * for example, on the i386 at boot time we allocate two amaps for the stack
- * of /sbin/init:
+ * 
+ * for example, on the i386 at boot time we allocate two amaps for the stack 
+ * of /sbin/init: 
  *  1. a 7680 slot amap at protection 0 (reserve space for stack)
  *  2. a 512 slot amap at protection 7 (top of stack)
  *
- * most of the array allocated for the amaps for this is never used.
+ * most of the array allocated for the amaps for this is never used.  
  * the amap interface provides a way for us to avoid this problem by
- * allowing amap_copy() to break larger amaps up into smaller sized
+ * allowing amap_copy() to break larger amaps up into smaller sized 
  * chunks (controlled by the "canchunk" option).   we use this feature
  * to reduce our memory usage with the BSD stack management.  if we
  * are asked to create an amap with more than UVM_AMAP_LARGE slots in it,
diff --git a/sys/uvm/uvm_amap_i.h b/sys/uvm/uvm_amap_i.h
index c88f7916bae..d2d8f73d350 100644
--- a/sys/uvm/uvm_amap_i.h
+++ b/sys/uvm/uvm_amap_i.h
@@ -1,5 +1,5 @@
-/*	$OpenBSD: uvm_amap_i.h,v 1.11 2001/11/28 19:28:14 art Exp $	*/
-/*	$NetBSD: uvm_amap_i.h,v 1.17 2001/05/25 04:06:11 chs Exp $	*/
+/*	$OpenBSD: uvm_amap_i.h,v 1.12 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_amap_i.h,v 1.15 2000/11/25 06:27:59 chs Exp $	*/
 
 /*
  *
@@ -109,9 +109,10 @@ amap_lookups(aref, offset, anons, npages)
 /*
  * amap_add: add (or replace) a page to an amap
  *
- * => caller must lock amap.
+ * => caller must lock amap.   
  * => if (replace) caller must lock anon because we might have to call
  *	pmap_page_protect on the anon's page.
+ * => returns an "offset" which is meaningful to amap_unadd().
  */
 AMAP_INLINE void
 amap_add(aref, offset, anon, replace)
@@ -134,7 +135,7 @@ amap_add(aref, offset, anon, replace)
 
 		if (amap->am_anon[slot] == NULL)
 			panic("amap_add: replacing null anon");
-		if (amap->am_anon[slot]->u.an_page != NULL &&
+		if (amap->am_anon[slot]->u.an_page != NULL && 
 		    (amap->am_flags & AMAP_SHARED) != 0) {
 			pmap_page_protect(amap->am_anon[slot]->u.an_page,
 			    VM_PROT_NONE);
diff --git a/sys/uvm/uvm_anon.c b/sys/uvm/uvm_anon.c
index b05abc32642..9cf22f1f21f 100644
--- a/sys/uvm/uvm_anon.c
+++ b/sys/uvm/uvm_anon.c
@@ -1,5 +1,5 @@
-/*	$OpenBSD: uvm_anon.c,v 1.18 2001/11/28 19:28:14 art Exp $	*/
-/*	$NetBSD: uvm_anon.c,v 1.17 2001/05/25 04:06:12 chs Exp $	*/
+/*	$OpenBSD: uvm_anon.c,v 1.19 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_anon.c,v 1.15 2001/02/18 21:19:08 chs Exp $	*/
 
 /*
  *
@@ -116,7 +116,7 @@ uvm_anon_add(count)
 	anonblock->anons = anon;
 	LIST_INSERT_HEAD(&anonblock_list, anonblock, list);
 	memset(anon, 0, sizeof(*anon) * needed);
-
+ 
 	simple_lock(&uvm.afreelock);
 	uvmexp.nanon += needed;
 	uvmexp.nfreeanon += needed;
@@ -214,7 +214,7 @@ uvm_anfree(anon)
 	if (pg) {
 
 		/*
-		 * if the page is owned by a uobject (now locked), then we must
+		 * if the page is owned by a uobject (now locked), then we must 
 		 * kill the loan on the page rather than free it.
 		 */
 
@@ -240,10 +240,10 @@ uvm_anfree(anon)
 				/* tell them to dump it when done */
 				pg->flags |= PG_RELEASED;
 				UVMHIST_LOG(maphist,
-				    "  anon 0x%x, page 0x%x: BUSY (released!)",
+				    "  anon 0x%x, page 0x%x: BUSY (released!)", 
 				    anon, pg, 0, 0);
 				return;
-			}
+			} 
 			pmap_page_protect(pg, VM_PROT_NONE);
 			uvm_lock_pageq();	/* lock out pagedaemon */
 			uvm_pagefree(pg);	/* bye bye */
@@ -272,7 +272,7 @@ uvm_anfree(anon)
 
 /*
  * uvm_anon_dropswap:  release any swap resources from this anon.
- *
+ * 
  * => anon must be locked or have a reference count of 0.
  */
 void
@@ -294,7 +294,7 @@ uvm_anon_dropswap(anon)
 		simple_lock(&uvm.swap_data_lock);
 		uvmexp.swpgonly--;
 		simple_unlock(&uvm.swap_data_lock);
-	}
+	} 
 }
 
 /*
@@ -398,7 +398,7 @@ uvm_anon_lockloanpg(anon)
 
 /*
  * page in every anon that is paged out to a range of swslots.
- *
+ * 
  * swap_syscall_lock should be held (protects anonblock_list).
  */
 
@@ -482,20 +482,20 @@ anon_pagein(anon)
 	rv = uvmfault_anonget(NULL, NULL, anon);
 
 	/*
-	 * if rv == 0, anon is still locked, else anon
+	 * if rv == VM_PAGER_OK, anon is still locked, else anon
 	 * is unlocked
 	 */
 
 	switch (rv) {
-	case 0:
+	case VM_PAGER_OK:
 		break;
 
-	case EIO:
-	case ERESTART:
+	case VM_PAGER_ERROR:
+	case VM_PAGER_REFAULT:
 
 		/*
 		 * nothing more to do on errors.
-		 * ERESTART can only mean that the anon was freed,
+		 * VM_PAGER_REFAULT can only mean that the anon was freed,
 		 * so again there's nothing to do.
 		 */
 
@@ -518,6 +518,9 @@ anon_pagein(anon)
 	 */
 
 	pmap_clear_reference(pg);
+#ifndef UBC
+	pmap_page_protect(pg, VM_PROT_NONE);
+#endif
 	uvm_lock_pageq();
 	uvm_pagedeactivate(pg);
 	uvm_unlock_pageq();
diff --git a/sys/uvm/uvm_anon.h b/sys/uvm/uvm_anon.h
index 1dc9ff7b566..702b5dc4e62 100644
--- a/sys/uvm/uvm_anon.h
+++ b/sys/uvm/uvm_anon.h
@@ -1,5 +1,5 @@
-/*	$OpenBSD: uvm_anon.h,v 1.9 2001/11/28 19:28:14 art Exp $	*/
-/*	$NetBSD: uvm_anon.h,v 1.15 2001/05/26 16:32:46 chs Exp $	*/
+/*	$OpenBSD: uvm_anon.h,v 1.10 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_anon.h,v 1.13 2000/12/27 09:17:04 chs Exp $	*/
 
 /*
  *
@@ -50,12 +50,12 @@
 
 struct vm_anon {
 	int an_ref;			/* reference count [an_lock] */
-	struct simplelock an_lock;	/* lock for an_ref */
+	simple_lock_data_t an_lock;	/* lock for an_ref */
 	union {
 		struct vm_anon *an_nxt;	/* if on free list [afreelock] */
 		struct vm_page *an_page;/* if in RAM [an_lock] */
 	} u;
-	int an_swslot;		/* drum swap slot # (if != 0)
+	int an_swslot;		/* drum swap slot # (if != 0) 
 				   [an_lock.  also, it is ok to read
 				   an_swslot if we hold an_page PG_BUSY] */
 };
@@ -79,7 +79,7 @@ struct vm_anon {
  */
 
 /*
- * processes reference anonymous virtual memory maps with an anonymous
+ * processes reference anonymous virtual memory maps with an anonymous 
  * reference structure:
  */
 
diff --git a/sys/uvm/uvm_aobj.c b/sys/uvm/uvm_aobj.c
index 924769d66bf..9a7f135cb98 100644
--- a/sys/uvm/uvm_aobj.c
+++ b/sys/uvm/uvm_aobj.c
@@ -1,5 +1,5 @@
-/*	$OpenBSD: uvm_aobj.c,v 1.23 2001/11/28 19:28:14 art Exp $	*/
-/*	$NetBSD: uvm_aobj.c,v 1.45 2001/06/23 20:52:03 chs Exp $	*/
+/*	$OpenBSD: uvm_aobj.c,v 1.24 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_aobj.c,v 1.39 2001/02/18 21:19:08 chs Exp $	*/
 
 /*
  * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
@@ -174,7 +174,7 @@ static boolean_t		 uao_flush __P((struct uvm_object *,
 						voff_t, voff_t, int));
 static void			 uao_free __P((struct uvm_aobj *));
 static int			 uao_get __P((struct uvm_object *, voff_t,
-					      struct vm_page **, int *, int,
+					      vm_page_t *, int *, int,
 					      vm_prot_t, int, int));
 static boolean_t		 uao_releasepg __P((struct vm_page *,
 						    struct vm_page **));
@@ -183,7 +183,7 @@ static boolean_t		 uao_pagein_page __P((struct uvm_aobj *, int));
 
 /*
  * aobj_pager
- *
+ * 
  * note that some functions (e.g. put) are handled elsewhere
  */
 
@@ -205,7 +205,7 @@ struct uvm_pagerops aobj_pager = {
  */
 
 static LIST_HEAD(aobjlist, uvm_aobj) uao_list;
-static struct simplelock uao_list_lock;
+static simple_lock_data_t uao_list_lock;
 
 
 /*
@@ -233,41 +233,38 @@ uao_find_swhash_elt(aobj, pageidx, create)
 	struct uao_swhash_elt *elt;
 	voff_t page_tag;
 
-	swhash = UAO_SWHASH_HASH(aobj, pageidx);
-	page_tag = UAO_SWHASH_ELT_TAG(pageidx);
+	swhash = UAO_SWHASH_HASH(aobj, pageidx); /* first hash to get bucket */
+	page_tag = UAO_SWHASH_ELT_TAG(pageidx);	/* tag to search for */
 
 	/*
 	 * now search the bucket for the requested tag
 	 */
-
 	LIST_FOREACH(elt, swhash, list) {
-		if (elt->tag == page_tag) {
-			return elt;
-		}
+		if (elt->tag == page_tag)
+			return(elt);
 	}
-	if (!create) {
+
+	/* fail now if we are not allowed to create a new entry in the bucket */
+	if (!create)
 		return NULL;
-	}
+
 
 	/*
 	 * allocate a new entry for the bucket and init/insert it in
 	 */
-
-	elt = pool_get(&uao_swhash_elt_pool, PR_NOWAIT);
-	if (elt == NULL) {
-		return NULL;
-	}
+	elt = pool_get(&uao_swhash_elt_pool, PR_WAITOK);
 	LIST_INSERT_HEAD(swhash, elt, list);
 	elt->tag = page_tag;
 	elt->count = 0;
 	memset(elt->slots, 0, sizeof(elt->slots));
-	return elt;
+
+	return(elt);
 }
 
 /*
  * uao_find_swslot: find the swap slot number for an aobj/pageidx
  *
- * => object must be locked by caller
+ * => object must be locked by caller 
  */
 __inline static int
 uao_find_swslot(aobj, pageidx)
@@ -296,7 +293,7 @@ uao_find_swslot(aobj, pageidx)
 			return(0);
 	}
 
-	/*
+	/* 
 	 * otherwise, look in the array
 	 */
 	return(aobj->u_swslots[pageidx]);
@@ -307,8 +304,6 @@ uao_find_swslot(aobj, pageidx)
  *
  * => setting a slot to zero frees the slot
  * => object must be locked by caller
- * => we return the old slot number, or -1 if we failed to allocate
- *    memory to record the new slot number
  */
 int
 uao_set_swslot(uobj, pageidx, slot)
@@ -316,7 +311,6 @@ uao_set_swslot(uobj, pageidx, slot)
 	int pageidx, slot;
 {
 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
-	struct uao_swhash_elt *elt;
 	int oldslot;
 	UVMHIST_FUNC("uao_set_swslot"); UVMHIST_CALLED(pdhist);
 	UVMHIST_LOG(pdhist, "aobj %p pageidx %d slot %d",
@@ -348,9 +342,11 @@ uao_set_swslot(uobj, pageidx, slot)
 		 * we are freeing.
 		 */
 
-		elt = uao_find_swhash_elt(aobj, pageidx, slot ? TRUE : FALSE);
+		struct uao_swhash_elt *elt =
+		    uao_find_swhash_elt(aobj, pageidx, slot ? TRUE : FALSE);
 		if (elt == NULL) {
-			return slot ? -1 : 0;
+			KASSERT(slot == 0);
+			return (0);
 		}
 
 		oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
@@ -365,8 +361,8 @@ uao_set_swslot(uobj, pageidx, slot)
 		if (slot) {
 			if (oldslot == 0)
 				elt->count++;
-		} else {
-			if (oldslot)
+		} else {		/* freeing slot ... */
+			if (oldslot)	/* to be safe */
 				elt->count--;
 
 			if (elt->count == 0) {
@@ -374,7 +370,7 @@ uao_set_swslot(uobj, pageidx, slot)
 				pool_put(&uao_swhash_elt_pool, elt);
 			}
 		}
-	} else {
+	} else { 
 		/* we are using an array */
 		oldslot = aobj->u_swslots[pageidx];
 		aobj->u_swslots[pageidx] = slot;
@@ -630,7 +626,7 @@ uao_reference_locked(uobj)
 		return;
 
 	uobj->uo_refs++;		/* bump! */
-	UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)",
+	UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)", 
 		    uobj, uobj->uo_refs,0,0);
 }
 
@@ -663,7 +659,7 @@ uao_detach_locked(uobj)
 	struct uvm_object *uobj;
 {
 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
-	struct vm_page *pg, *nextpg;
+	struct vm_page *pg;
 	boolean_t busybody;
 	UVMHIST_FUNC("uao_detach"); UVMHIST_CALLED(maphist);
 
@@ -695,8 +691,9 @@ uao_detach_locked(uobj)
 	 * mark for release any that are.
  	 */
 	busybody = FALSE;
-	for (pg = TAILQ_FIRST(&uobj->memq); pg != NULL; pg = nextpg) {
-		nextpg = TAILQ_NEXT(pg, listq);
+	for (pg = TAILQ_FIRST(&uobj->memq);
+	     pg != NULL;
+	     pg = TAILQ_NEXT(pg, listq)) {
 		if (pg->flags & PG_BUSY) {
 			pg->flags |= PG_RELEASED;
 			busybody = TRUE;
@@ -864,7 +861,7 @@ uao_flush(uobj, start, stop, flags)
 			if (pp == NULL)
 				continue;
 		}
-
+		
 		switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
 		/*
 		 * XXX In these first 3 cases, we always just
@@ -881,8 +878,15 @@ uao_flush(uobj, start, stop, flags)
 			    pp->wire_count != 0)
 				continue;
 
+#ifdef UBC
 			/* ...and deactivate the page. */
 			pmap_clear_reference(pp);
+#else
+			/* zap all mappings for the page. */
+			pmap_page_protect(pp, VM_PROT_NONE);
+
+			/* ...and deactivate the page. */
+#endif
 			uvm_pagedeactivate(pp);
 
 			continue;
@@ -938,7 +942,7 @@ uao_flush(uobj, start, stop, flags)
  *
  * cases 1 and 2 can be handled with PGO_LOCKED, case 3 cannot.
  * so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES),
- * then we will need to return EBUSY.
+ * then we will need to return VM_PAGER_UNLOCK.
  *
  * => prefer map unlocked (not required)
  * => object must be locked!  we will _unlock_ it before starting any I/O.
@@ -958,7 +962,7 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
 {
 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
 	voff_t current_offset;
-	struct vm_page *ptmp;
+	vm_page_t ptmp;
 	int lcv, gotpages, maxpages, swslot, rv, pageidx;
 	boolean_t done;
 	UVMHIST_FUNC("uao_get"); UVMHIST_CALLED(pdhist);
@@ -1017,7 +1021,7 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
 				if (lcv == centeridx ||
 				    (flags & PGO_ALLPAGES) != 0)
 					/* need to do a wait or I/O! */
-					done = FALSE;
+					done = FALSE;	
 					continue;
 			}
 
@@ -1026,7 +1030,7 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
 			 * result array
 			 */
 			/* caller must un-busy this page */
-			ptmp->flags |= PG_BUSY;
+			ptmp->flags |= PG_BUSY;	
 			UVM_PAGE_OWN(ptmp, "uao_get1");
 			pps[lcv] = ptmp;
 			gotpages++;
@@ -1043,10 +1047,10 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
 		*npagesp = gotpages;
 		if (done)
 			/* bingo! */
-			return(0);
+			return(VM_PAGER_OK);	
 		else
 			/* EEK!   Need to unlock and I/O */
-			return(EBUSY);
+			return(VM_PAGER_UNLOCK);
 	}
 
 	/*
@@ -1103,7 +1107,7 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
 					uvm_wait("uao_getpage");
 					simple_lock(&uobj->vmobjlock);
 					/* goto top of pps while loop */
-					continue;
+					continue;	
 				}
 
 				/*
@@ -1112,7 +1116,7 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
 				 */
 				ptmp->pqflags |= PQ_AOBJ;
 
-				/*
+				/* 
 				 * got new page ready for I/O.  break pps while
 				 * loop.  pps[lcv] is still NULL.
 				 */
@@ -1130,8 +1134,8 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
 				simple_lock(&uobj->vmobjlock);
 				continue;	/* goto top of pps while loop */
 			}
-
-			/*
+			
+			/* 
  			 * if we get here then the page has become resident and
 			 * unbusy between steps 1 and 2.  we busy it now (so we
 			 * own it) and set pps[lcv] (so that we exit the while
@@ -1151,7 +1155,7 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
 			continue;			/* next lcv */
 
 		/*
- 		 * we have a "fake/busy/clean" page that we just allocated.
+ 		 * we have a "fake/busy/clean" page that we just allocated.  
  		 * do the needed "i/o", either reading from swap or zeroing.
  		 */
 		swslot = uao_find_swslot(aobj, pageidx);
@@ -1180,7 +1184,7 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
 			/*
 			 * I/O done.  check for errors.
 			 */
-			if (rv != 0)
+			if (rv != VM_PAGER_OK)
 			{
 				UVMHIST_LOG(pdhist, "<- done (error=%d)",
 				    rv,0,0,0);
@@ -1195,9 +1199,7 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
 				 */
 				swslot = uao_set_swslot(&aobj->u_obj, pageidx,
 							SWSLOT_BAD);
-				if (swslot != -1) {
-					uvm_swap_markbad(swslot, 1);
-				}
+				uvm_swap_markbad(swslot, 1);
 
 				ptmp->flags &= ~(PG_WANTED|PG_BUSY);
 				UVM_PAGE_OWN(ptmp, NULL);
@@ -1210,10 +1212,10 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
 			}
 		}
 
-		/*
+		/* 
  		 * we got the page!   clear the fake flag (indicates valid
 		 * data now in page) and plug into our result array.   note
-		 * that page is still busy.
+		 * that page is still busy.   
  		 *
  		 * it is the callers job to:
  		 * => check if the page is released
@@ -1233,12 +1235,12 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
 
 	simple_unlock(&uobj->vmobjlock);
 	UVMHIST_LOG(pdhist, "<- done (OK)",0,0,0,0);
-	return(0);
+	return(VM_PAGER_OK);
 }
 
 /*
  * uao_releasepg: handle released page in an aobj
- *
+ * 
  * => "pg" is a PG_BUSY [caller owns it], PG_RELEASED page that we need
  *      to dispose of.
  * => caller must handle PG_WANTED case
@@ -1299,7 +1301,7 @@ uao_releasepg(pg, nextpgp)
 
 /*
  * uao_dropswap:  release any swap resources from this aobj page.
- *
+ * 
  * => aobj must be locked or have a reference count of 0.
  */
 
@@ -1319,7 +1321,7 @@ uao_dropswap(uobj, pageidx)
 
 /*
  * page in every page in every aobj that is paged-out to a range of swslots.
- *
+ * 
  * => nothing should be locked.
  * => returns TRUE if pagein was aborted due to lack of memory.
  */
@@ -1420,7 +1422,7 @@ restart:
 					/*
 					 * if the slot isn't in range, skip it.
 					 */
-					if (slot < startslot ||
+					if (slot < startslot || 
 					    slot >= endslot) {
 						continue;
 					}
@@ -1493,14 +1495,14 @@ uao_pagein_page(aobj, pageidx)
 	simple_lock(&aobj->u_obj.vmobjlock);
 
 	switch (rv) {
-	case 0:
+	case VM_PAGER_OK:
 		break;
 
-	case EIO:
-	case ERESTART:
+	case VM_PAGER_ERROR:
+	case VM_PAGER_REFAULT:
 		/*
 		 * nothing more to do on errors.
-		 * ERESTART can only mean that the anon was freed,
+		 * VM_PAGER_REFAULT can only mean that the anon was freed,
 		 * so again there's nothing to do.
 		 */
 		return FALSE;
@@ -1521,6 +1523,9 @@ uao_pagein_page(aobj, pageidx)
 	 * deactivate the page (to put it on a page queue).
 	 */
 	pmap_clear_reference(pg);
+#ifndef UBC
+	pmap_page_protect(pg, VM_PROT_NONE);
+#endif
 	uvm_lock_pageq();
 	uvm_pagedeactivate(pg);
 	uvm_unlock_pageq();
diff --git a/sys/uvm/uvm_bio.c b/sys/uvm/uvm_bio.c
deleted file mode 100644
index f6ce9852451..00000000000
--- a/sys/uvm/uvm_bio.c
+++ /dev/null
@@ -1,558 +0,0 @@
-/*	$NetBSD: uvm_bio.c,v 1.17 2001/09/10 21:19:43 chris Exp $	*/
-
-/*
- * Copyright (c) 1998 Chuck Silvers.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote products
- *    derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- */
-
-/*
- * uvm_bio.c: buffered i/o vnode mapping cache
- */
-
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/kernel.h>
-#include <sys/vnode.h>
-
-#include <uvm/uvm.h>
-#include <uvm/uvm_page.h>
-
-/*
- * global data structures
- */
-
-/*
- * local functions
- */
-
-static int	ubc_fault __P((struct uvm_faultinfo *, vaddr_t,
-		    struct vm_page **, int, int, vm_fault_t, vm_prot_t, int));
-static struct ubc_map *ubc_find_mapping __P((struct uvm_object *, voff_t));
-
-/*
- * local data structues
- */
-
-#define UBC_HASH(uobj, offset) (((((u_long)(uobj)) >> 8) + \
-				 (((u_long)(offset)) >> PAGE_SHIFT)) & \
-				ubc_object.hashmask)
-
-#define UBC_QUEUE(offset) (&ubc_object.inactive[((offset) >> ubc_winshift) & \
-					       (UBC_NQUEUES - 1)])
-
-struct ubc_map
-{
-	struct uvm_object *	uobj;		/* mapped object */
-	voff_t			offset;		/* offset into uobj */
-	int			refcount;	/* refcount on mapping */
-	voff_t			writeoff;	/* overwrite offset */
-	vsize_t			writelen;	/* overwrite len */
-
-	LIST_ENTRY(ubc_map)	hash;		/* hash table */
-	TAILQ_ENTRY(ubc_map)	inactive;	/* inactive queue */
-};
-
-static struct ubc_object
-{
-	struct uvm_object uobj;		/* glue for uvm_map() */
-	char *kva;			/* where ubc_object is mapped */
-	struct ubc_map *umap;		/* array of ubc_map's */
-
-	LIST_HEAD(, ubc_map) *hash;	/* hashtable for cached ubc_map's */
-	u_long hashmask;		/* mask for hashtable */
-
-	TAILQ_HEAD(ubc_inactive_head, ubc_map) *inactive;
-					/* inactive queues for ubc_map's */
-
-} ubc_object;
-
-struct uvm_pagerops ubc_pager =
-{
-	NULL,		/* init */
-	NULL,		/* reference */
-	NULL,		/* detach */
-	ubc_fault,	/* fault */
-	/* ... rest are NULL */
-};
-
-int ubc_nwins = UBC_NWINS;
-int ubc_winshift = UBC_WINSHIFT;
-int ubc_winsize;
-#ifdef PMAP_PREFER
-int ubc_nqueues;
-boolean_t ubc_release_unmap = FALSE;
-#define UBC_NQUEUES ubc_nqueues
-#define UBC_RELEASE_UNMAP ubc_release_unmap
-#else
-#define UBC_NQUEUES 1
-#define UBC_RELEASE_UNMAP FALSE
-#endif
-
-/*
- * ubc_init
- *
- * init pager private data structures.
- */
-
-void
-ubc_init(void)
-{
-	struct ubc_map *umap;
-	vaddr_t va;
-	int i;
-
-	/*
-	 * Make sure ubc_winshift is sane.
-	 */
-	if (ubc_winshift < PAGE_SHIFT)
-		ubc_winshift = PAGE_SHIFT;
-
-	/*
-	 * init ubc_object.
-	 * alloc and init ubc_map's.
-	 * init inactive queues.
-	 * alloc and init hashtable.
-	 * map in ubc_object.
-	 */
-
-	simple_lock_init(&ubc_object.uobj.vmobjlock);
-	ubc_object.uobj.pgops = &ubc_pager;
-	TAILQ_INIT(&ubc_object.uobj.memq);
-	ubc_object.uobj.uo_npages = 0;
-	ubc_object.uobj.uo_refs = UVM_OBJ_KERN;
-
-	ubc_object.umap = malloc(ubc_nwins * sizeof(struct ubc_map),
-				 M_TEMP, M_NOWAIT);
-	if (ubc_object.umap == NULL)
-		panic("ubc_init: failed to allocate ubc_map");
-	memset(ubc_object.umap, 0, ubc_nwins * sizeof(struct ubc_map));
-
-	va = (vaddr_t)1L;
-#ifdef PMAP_PREFER
-	PMAP_PREFER(0, &va);
-	ubc_nqueues = va >> ubc_winshift;
-	if (ubc_nqueues == 0) {
-		ubc_nqueues = 1;
-	}
-	if (ubc_nqueues != 1) {
-		ubc_release_unmap = TRUE;
-	}
-#endif
-	ubc_winsize = 1 << ubc_winshift;
-	ubc_object.inactive = malloc(UBC_NQUEUES *
-				     sizeof(struct ubc_inactive_head),
-				     M_TEMP, M_NOWAIT);
-	if (ubc_object.inactive == NULL)
-		panic("ubc_init: failed to allocate inactive queue heads");
-	for (i = 0; i < UBC_NQUEUES; i++) {
-		TAILQ_INIT(&ubc_object.inactive[i]);
-	}
-	for (i = 0; i < ubc_nwins; i++) {
-		umap = &ubc_object.umap[i];
-		TAILQ_INSERT_TAIL(&ubc_object.inactive[i & (UBC_NQUEUES - 1)],
-				  umap, inactive);
-	}
-
-	ubc_object.hash = hashinit(ubc_nwins, M_TEMP, M_NOWAIT,
-				   &ubc_object.hashmask);
-	for (i = 0; i <= ubc_object.hashmask; i++) {
-		LIST_INIT(&ubc_object.hash[i]);
-	}
-
-	if (uvm_map(kernel_map, (vaddr_t *)&ubc_object.kva,
-		    ubc_nwins << ubc_winshift, &ubc_object.uobj, 0, (vsize_t)va,
-		    UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE,
-				UVM_ADV_RANDOM, UVM_FLAG_NOMERGE)) != 0) {
-		panic("ubc_init: failed to map ubc_object\n");
-	}
-	UVMHIST_INIT(ubchist, 300);
-}
-
-
-/*
- * ubc_fault: fault routine for ubc mapping
- */
-int
-ubc_fault(ufi, ign1, ign2, ign3, ign4, fault_type, access_type, flags)
-	struct uvm_faultinfo *ufi;
-	vaddr_t ign1;
-	struct vm_page **ign2;
-	int ign3, ign4;
-	vm_fault_t fault_type;
-	vm_prot_t access_type;
-	int flags;
-{
-	struct uvm_object *uobj;
-	struct vnode *vp;
-	struct ubc_map *umap;
-	vaddr_t va, eva, ubc_offset, slot_offset;
-	int i, error, rv, npages;
-	struct vm_page *pgs[(1 << ubc_winshift) >> PAGE_SHIFT], *pg;
-	UVMHIST_FUNC("ubc_fault");  UVMHIST_CALLED(ubchist);
-
-	/*
-	 * no need to try with PGO_LOCKED...
-	 * we don't need to have the map locked since we know that
-	 * no one will mess with it until our reference is released.
-	 */
-	if (flags & PGO_LOCKED) {
-#if 0
-		return EBUSY;
-#else
-		uvmfault_unlockall(ufi, NULL, &ubc_object.uobj, NULL);
-		flags &= ~PGO_LOCKED;
-#endif
-	}
-
-	va = ufi->orig_rvaddr;
-	ubc_offset = va - (vaddr_t)ubc_object.kva;
-
-	UVMHIST_LOG(ubchist, "va 0x%lx ubc_offset 0x%lx at %d",
-		    va, ubc_offset, access_type,0);
-
-	umap = &ubc_object.umap[ubc_offset >> ubc_winshift];
-	KASSERT(umap->refcount != 0);
-	slot_offset = trunc_page(ubc_offset & (ubc_winsize - 1));
-
-	/* no umap locking needed since we have a ref on the umap */
-	uobj = umap->uobj;
-	vp = (struct vnode *)uobj;
-	KASSERT(uobj != NULL);
-
-	npages = (ubc_winsize - slot_offset) >> PAGE_SHIFT;
-
-	/*
-	 * XXXUBC
-	 * if npages is more than 1 we have to be sure that
-	 * we set PGO_OVERWRITE correctly.
-	 */
-	if (access_type == VM_PROT_WRITE) {
-		npages = 1;
-	}
-
-again:
-	memset(pgs, 0, sizeof (pgs));
-	simple_lock(&uobj->vmobjlock);
-
-	UVMHIST_LOG(ubchist, "slot_offset 0x%x writeoff 0x%x writelen 0x%x "
-		    "u_size 0x%x", slot_offset, umap->writeoff, umap->writelen,
-		    vp->v_size);
-
-	if (access_type & VM_PROT_WRITE &&
-	    slot_offset >= umap->writeoff &&
-	    (slot_offset + PAGE_SIZE <= umap->writeoff + umap->writelen ||
-	     slot_offset + PAGE_SIZE >= vp->v_size - umap->offset)) {
-		UVMHIST_LOG(ubchist, "setting PGO_OVERWRITE", 0,0,0,0);
-		flags |= PGO_OVERWRITE;
-	}
-	else { UVMHIST_LOG(ubchist, "NOT setting PGO_OVERWRITE", 0,0,0,0); }
-	/* XXX be sure to zero any part of the page past EOF */
-
-	/*
-	 * XXX
-	 * ideally we'd like to pre-fault all of the pages we're overwriting.
-	 * so for PGO_OVERWRITE, we should call VOP_GETPAGES() with all of the
-	 * pages in [writeoff, writeoff+writesize] instead of just the one.
-	 */
-
-	UVMHIST_LOG(ubchist, "getpages vp %p offset 0x%x npages %d",
-		    uobj, umap->offset + slot_offset, npages, 0);
-
-	error = VOP_GETPAGES(vp, umap->offset + slot_offset, pgs, &npages, 0,
-	    access_type, 0, flags);
-	UVMHIST_LOG(ubchist, "getpages error %d npages %d", error, npages,0,0);
-
-	if (error == EAGAIN) {
-		tsleep(&lbolt, PVM, "ubc_fault", 0);
-		goto again;
-	}
-	if (error) {
-		return error;
-	}
-	if (npages == 0) {
-		return 0;
-	}
-
-	va = ufi->orig_rvaddr;
-	eva = ufi->orig_rvaddr + (npages << PAGE_SHIFT);
-
-	UVMHIST_LOG(ubchist, "va 0x%lx eva 0x%lx", va, eva, 0,0);
-	simple_lock(&uobj->vmobjlock);
-	for (i = 0; va < eva; i++, va += PAGE_SIZE) {
-		UVMHIST_LOG(ubchist, "pgs[%d] = %p", i, pgs[i],0,0);
-		pg = pgs[i];
-
-		if (pg == NULL || pg == PGO_DONTCARE) {
-			continue;
-		}
-		if (pg->flags & PG_WANTED) {
-			wakeup(pg);
-		}
-		KASSERT((pg->flags & PG_FAKE) == 0);
-		if (pg->flags & PG_RELEASED) {
-			rv = uobj->pgops->pgo_releasepg(pg, NULL);
-			KASSERT(rv);
-			continue;
-		}
-		KASSERT(access_type == VM_PROT_READ ||
-			(pg->flags & PG_RDONLY) == 0);
-
-		uvm_lock_pageq();
-		uvm_pageactivate(pg);
-		uvm_unlock_pageq();
-
-		pmap_enter(ufi->orig_map->pmap, va, VM_PAGE_TO_PHYS(pg),
-			   VM_PROT_READ | VM_PROT_WRITE, access_type);
-
-		pg->flags &= ~(PG_BUSY);
-		UVM_PAGE_OWN(pg, NULL);
-	}
-	simple_unlock(&uobj->vmobjlock);
-	pmap_update(ufi->orig_map->pmap);
-	return 0;
-}
-
-/*
- * local functions
- */
-
-struct ubc_map *
-ubc_find_mapping(uobj, offset)
-	struct uvm_object *uobj;
-	voff_t offset;
-{
-	struct ubc_map *umap;
-
-	LIST_FOREACH(umap, &ubc_object.hash[UBC_HASH(uobj, offset)], hash) {
-		if (umap->uobj == uobj && umap->offset == offset) {
-			return umap;
-		}
-	}
-	return NULL;
-}
-
-
-/*
- * ubc interface functions
- */
-
-/*
- * ubc_alloc:  allocate a buffer mapping
- */
-void *
-ubc_alloc(uobj, offset, lenp, flags)
-	struct uvm_object *uobj;
-	voff_t offset;
-	vsize_t *lenp;
-	int flags;
-{
-	int s;
-	vaddr_t slot_offset, va;
-	struct ubc_map *umap;
-	voff_t umap_offset;
-	UVMHIST_FUNC("ubc_alloc"); UVMHIST_CALLED(ubchist);
-
-	UVMHIST_LOG(ubchist, "uobj %p offset 0x%lx len 0x%lx filesize 0x%x",
-		    uobj, offset, *lenp, ((struct vnode *)vp)->v_size);
-
-	umap_offset = (offset & ~((voff_t)ubc_winsize - 1));
-	slot_offset = (vaddr_t)(offset & ((voff_t)ubc_winsize - 1));
-	*lenp = min(*lenp, ubc_winsize - slot_offset);
-
-	/*
-	 * the vnode is always locked here, so we don't need to add a ref.
-	 */
-
-	s = splbio();
-
-again:
-	simple_lock(&ubc_object.uobj.vmobjlock);
-	umap = ubc_find_mapping(uobj, umap_offset);
-	if (umap == NULL) {
-		umap = TAILQ_FIRST(UBC_QUEUE(offset));
-		if (umap == NULL) {
-			simple_unlock(&ubc_object.uobj.vmobjlock);
-			tsleep(&lbolt, PVM, "ubc_alloc", 0);
-			goto again;
-		}
-
-		/*
-		 * remove from old hash (if any),
-		 * add to new hash.
-		 */
-
-		if (umap->uobj != NULL) {
-			LIST_REMOVE(umap, hash);
-		}
-
-		umap->uobj = uobj;
-		umap->offset = umap_offset;
-
-		LIST_INSERT_HEAD(&ubc_object.hash[UBC_HASH(uobj, umap_offset)],
-				 umap, hash);
-
-		va = (vaddr_t)(ubc_object.kva +
-			       ((umap - ubc_object.umap) << ubc_winshift));
-		pmap_remove(pmap_kernel(), va, va + ubc_winsize);
-		pmap_update(pmap_kernel());
-	}
-
-	if (umap->refcount == 0) {
-		TAILQ_REMOVE(UBC_QUEUE(offset), umap, inactive);
-	}
-
-#ifdef DIAGNOSTIC
-	if ((flags & UBC_WRITE) &&
-	    (umap->writeoff || umap->writelen)) {
-		panic("ubc_fault: concurrent writes vp %p", uobj);
-	}
-#endif
-	if (flags & UBC_WRITE) {
-		umap->writeoff = slot_offset;
-		umap->writelen = *lenp;
-	}
-
-	umap->refcount++;
-	simple_unlock(&ubc_object.uobj.vmobjlock);
-	splx(s);
-	UVMHIST_LOG(ubchist, "umap %p refs %d va %p",
-		    umap, umap->refcount,
-		    ubc_object.kva + ((umap - ubc_object.umap) << ubc_winshift),
-		    0);
-
-	return ubc_object.kva +
-		((umap - ubc_object.umap) << ubc_winshift) + slot_offset;
-}
-
-
-void
-ubc_release(va, wlen)
-	void *va;
-	vsize_t wlen;
-{
-	struct ubc_map *umap;
-	struct uvm_object *uobj;
-	int s;
-	UVMHIST_FUNC("ubc_release"); UVMHIST_CALLED(ubchist);
-
-	UVMHIST_LOG(ubchist, "va %p", va,0,0,0);
-
-	s = splbio();
-	simple_lock(&ubc_object.uobj.vmobjlock);
-
-	umap = &ubc_object.umap[((char *)va - ubc_object.kva) >> ubc_winshift];
-	uobj = umap->uobj;
-	KASSERT(uobj != NULL);
-
-	umap->writeoff = 0;
-	umap->writelen = 0;
-	umap->refcount--;
-	if (umap->refcount == 0) {
-		if (UBC_RELEASE_UNMAP &&
-		    (((struct vnode *)uobj)->v_flag & VTEXT)) {
-			vaddr_t va;
-
-			/*
-			 * if this file is the executable image of
-			 * some process, that process will likely have
-			 * the file mapped at an alignment other than
-			 * what PMAP_PREFER() would like.  we'd like
-			 * to have process text be able to use the
-			 * cache even if someone is also reading the
-			 * file, so invalidate mappings of such files
-			 * as soon as possible.
-			 */
-
-			va = (vaddr_t)(ubc_object.kva +
-			    ((umap - ubc_object.umap) << ubc_winshift));
-			pmap_remove(pmap_kernel(), va, va + ubc_winsize);
-			pmap_update(pmap_kernel());
-			LIST_REMOVE(umap, hash);
-			umap->uobj = NULL;
-			TAILQ_INSERT_HEAD(UBC_QUEUE(umap->offset), umap,
-			    inactive);
-		} else {
-			TAILQ_INSERT_TAIL(UBC_QUEUE(umap->offset), umap,
-			    inactive);
-		}
-	}
-	UVMHIST_LOG(ubchist, "umap %p refs %d", umap, umap->refcount,0,0);
-	simple_unlock(&ubc_object.uobj.vmobjlock);
-	splx(s);
-}
-
-
-/*
- * removing a range of mappings from the ubc mapping cache.
- */
-
-void
-ubc_flush(uobj, start, end)
-	struct uvm_object *uobj;
-	voff_t start, end;
-{
-	struct ubc_map *umap;
-	vaddr_t va;
-	int s;
-	UVMHIST_FUNC("ubc_flush");  UVMHIST_CALLED(ubchist);
-
-	UVMHIST_LOG(ubchist, "uobj %p start 0x%lx end 0x%lx",
-		    uobj, start, end,0);
-
-	s = splbio();
-	simple_lock(&ubc_object.uobj.vmobjlock);
-	for (umap = ubc_object.umap;
-	     umap < &ubc_object.umap[ubc_nwins];
-	     umap++) {
-
-		if (umap->uobj != uobj ||
-		    umap->offset < start ||
-		    (umap->offset >= end && end != 0) ||
-		    umap->refcount > 0) {
-			continue;
-		}
-
-		/*
-		 * remove from hash,
-		 * move to head of inactive queue.
-		 */
-
-		va = (vaddr_t)(ubc_object.kva +
-			       ((umap - ubc_object.umap) << ubc_winshift));
-		pmap_remove(pmap_kernel(), va, va + ubc_winsize);
-		pmap_update(pmap_kernel());
-
-		LIST_REMOVE(umap, hash);
-		umap->uobj = NULL;
-		TAILQ_REMOVE(UBC_QUEUE(umap->offset), umap, inactive);
-		TAILQ_INSERT_HEAD(UBC_QUEUE(umap->offset), umap, inactive);
-	}
-	simple_unlock(&ubc_object.uobj.vmobjlock);
-	splx(s);
-}
diff --git a/sys/uvm/uvm_ddb.h b/sys/uvm/uvm_ddb.h
index f2de2a1c9e8..469b381a6df 100644
--- a/sys/uvm/uvm_ddb.h
+++ b/sys/uvm/uvm_ddb.h
@@ -1,5 +1,5 @@
-/*	$OpenBSD: uvm_ddb.h,v 1.8 2001/11/28 19:28:14 art Exp $	*/
-/*	$NetBSD: uvm_ddb.h,v 1.7 2001/06/02 18:09:26 chs Exp $	*/
+/*	$OpenBSD: uvm_ddb.h,v 1.9 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_ddb.h,v 1.5 2000/11/25 06:27:59 chs Exp $	*/
 
 /*
  *
@@ -41,7 +41,7 @@
 #ifdef _KERNEL
 
 #ifdef DDB
-void	uvm_map_printit __P((struct vm_map *, boolean_t,
+void	uvm_map_printit __P((vm_map_t, boolean_t,
 	    int (*) __P((const char *, ...))));
 void	uvm_object_printit __P((struct uvm_object *, boolean_t,
 	    int (*) __P((const char *, ...))));
diff --git a/sys/uvm/uvm_device.c b/sys/uvm/uvm_device.c
index 0f5f2214ec8..08bdccca0d0 100644
--- a/sys/uvm/uvm_device.c
+++ b/sys/uvm/uvm_device.c
@@ -1,5 +1,5 @@
-/*	$OpenBSD: uvm_device.c,v 1.20 2001/12/04 23:22:42 art Exp $	*/
-/*	$NetBSD: uvm_device.c,v 1.37 2001/09/10 21:19:42 chris Exp $	*/
+/*	$OpenBSD: uvm_device.c,v 1.21 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_device.c,v 1.30 2000/11/25 06:27:59 chs Exp $	*/
 
 /*
  *
@@ -57,7 +57,7 @@
 
 LIST_HEAD(udv_list_struct, uvm_device);
 static struct udv_list_struct udv_list;
-static struct simplelock udv_lock;
+static simple_lock_data_t udv_lock;
 
 /*
  * functions
@@ -67,7 +67,7 @@ static void		udv_init __P((void));
 static void             udv_reference __P((struct uvm_object *));
 static void             udv_detach __P((struct uvm_object *));
 static int		udv_fault __P((struct uvm_faultinfo *, vaddr_t,
-				       struct vm_page **, int, int, vm_fault_t,
+				       vm_page_t *, int, int, vm_fault_t,
 				       vm_prot_t, int));
 static boolean_t        udv_flush __P((struct uvm_object *, voff_t, voff_t,
 				       int));
@@ -145,7 +145,7 @@ udv_attach(arg, accessprot, off, size)
 	/*
 	 * Check that the specified range of the device allows the
 	 * desired protection.
-	 *
+	 * 
 	 * XXX assumes VM_PROT_* == PROT_*
 	 * XXX clobbers off and size, but nothing else here needs them.
 	 */
@@ -163,7 +163,7 @@ udv_attach(arg, accessprot, off, size)
 	for (;;) {
 
 		/*
-		 * first, attempt to find it on the main list
+		 * first, attempt to find it on the main list 
 		 */
 
 		simple_lock(&udv_lock);
@@ -259,7 +259,7 @@ udv_attach(arg, accessprot, off, size)
 	}
 	/*NOTREACHED*/
 }
-
+	
 /*
  * udv_reference
  *
@@ -278,7 +278,7 @@ udv_reference(uobj)
 
 	simple_lock(&uobj->vmobjlock);
 	uobj->uo_refs++;
-	UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)",
+	UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)", 
 		    uobj, uobj->uo_refs,0,0);
 	simple_unlock(&uobj->vmobjlock);
 }
@@ -306,7 +306,7 @@ again:
 	if (uobj->uo_refs > 1) {
 		uobj->uo_refs--;
 		simple_unlock(&uobj->vmobjlock);
-		UVMHIST_LOG(maphist," <- done, uobj=0x%x, ref=%d",
+		UVMHIST_LOG(maphist," <- done, uobj=0x%x, ref=%d", 
 			  uobj,uobj->uo_refs,0,0);
 		return;
 	}
@@ -374,7 +374,7 @@ static int
 udv_fault(ufi, vaddr, pps, npages, centeridx, fault_type, access_type, flags)
 	struct uvm_faultinfo *ufi;
 	vaddr_t vaddr;
-	struct vm_page **pps;
+	vm_page_t *pps;
 	int npages, centeridx, flags;
 	vm_fault_t fault_type;
 	vm_prot_t access_type;
@@ -396,16 +396,16 @@ udv_fault(ufi, vaddr, pps, npages, centeridx, fault_type, access_type, flags)
 	 * we do not allow device mappings to be mapped copy-on-write
 	 * so we kill any attempt to do so here.
 	 */
-
+	
 	if (UVM_ET_ISCOPYONWRITE(entry)) {
-		UVMHIST_LOG(maphist, "<- failed -- COW entry (etype=0x%x)",
+		UVMHIST_LOG(maphist, "<- failed -- COW entry (etype=0x%x)", 
 		entry->etype, 0,0,0);
 		uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj, NULL);
-		return(EIO);
+		return(VM_PAGER_ERROR);
 	}
 
 	/*
-	 * get device map function.
+	 * get device map function.   
 	 */
 
 	device = udv->u_device;
@@ -422,12 +422,12 @@ udv_fault(ufi, vaddr, pps, npages, centeridx, fault_type, access_type, flags)
 	curr_offset = entry->offset + (vaddr - entry->start);
 	/* pmap va = vaddr (virtual address of pps[0]) */
 	curr_va = vaddr;
-
+	
 	/*
 	 * loop over the page range entering in as needed
 	 */
 
-	retval = 0;
+	retval = VM_PAGER_OK;
 	for (lcv = 0 ; lcv < npages ; lcv++, curr_offset += PAGE_SIZE,
 	    curr_va += PAGE_SIZE) {
 		if ((flags & PGO_ALLPAGES) == 0 && lcv != centeridx)
@@ -438,7 +438,7 @@ udv_fault(ufi, vaddr, pps, npages, centeridx, fault_type, access_type, flags)
 
 		mdpgno = (*mapfn)(device, curr_offset, access_type);
 		if (mdpgno == -1) {
-			retval = EIO;
+			retval = VM_PAGER_ERROR;
 			break;
 		}
 		paddr = pmap_phys_address(mdpgno);
@@ -447,7 +447,7 @@ udv_fault(ufi, vaddr, pps, npages, centeridx, fault_type, access_type, flags)
 		    "  MAPPING: device: pm=0x%x, va=0x%x, pa=0x%lx, at=%d",
 		    ufi->orig_map->pmap, curr_va, paddr, mapprot);
 		if (pmap_enter(ufi->orig_map->pmap, curr_va, paddr,
-		    mapprot, PMAP_CANFAIL | mapprot) != 0) {
+		    mapprot, PMAP_CANFAIL | mapprot) != KERN_SUCCESS) {
 			/*
 			 * pmap_enter() didn't have the resource to
 			 * enter this mapping.  Unlock everything,
@@ -460,13 +460,11 @@ udv_fault(ufi, vaddr, pps, npages, centeridx, fault_type, access_type, flags)
 			 */
 			uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap,
 			    uobj, NULL);
-			pmap_update(ufi->orig_map->pmap);	/* sync what we have so far */
 			uvm_wait("udv_fault");
-			return (ERESTART);
+			return (VM_PAGER_REFAULT);
 		}
 	}
 
 	uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj, NULL);
-	pmap_update(ufi->orig_map->pmap);
 	return (retval);
 }
diff --git a/sys/uvm/uvm_extern.h b/sys/uvm/uvm_extern.h
index ac984530ff3..39d6fcb6767 100644
--- a/sys/uvm/uvm_extern.h
+++ b/sys/uvm/uvm_extern.h
@@ -1,5 +1,5 @@
-/*	$OpenBSD: uvm_extern.h,v 1.39 2001/12/06 23:01:07 niklas Exp $	*/
-/*	$NetBSD: uvm_extern.h,v 1.66 2001/08/16 01:37:50 chs Exp $	*/
+/*	$OpenBSD: uvm_extern.h,v 1.40 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_extern.h,v 1.57 2001/03/09 01:02:12 chs Exp $	*/
 
 /*
  *
@@ -88,12 +88,24 @@
  * typedefs, necessary for standard UVM headers.
  */
 
-typedef unsigned int uvm_flag_t;
+typedef unsigned int  uvm_flag_t;
 typedef int vm_fault_t;
 
 typedef int vm_inherit_t;	/* XXX: inheritance codes */
 typedef off_t voff_t;		/* XXX: offset within a uvm_object */
 
+union vm_map_object;
+typedef union vm_map_object vm_map_object_t;
+
+struct vm_map_entry;
+typedef struct vm_map_entry *vm_map_entry_t;
+
+struct vm_map;
+typedef struct vm_map *vm_map_t;
+
+struct vm_page;
+typedef struct vm_page  *vm_page_t;
+
 /*
  * defines
  */
@@ -211,21 +223,6 @@ typedef int		vm_prot_t;
 #define	UVM_PGA_ZERO		0x0002	/* returned page must be zero'd */
 
 /*
- * the following defines are for ubc_alloc's flags
- */
-#define UBC_READ	0
-#define UBC_WRITE	1
-
-/*
- * flags for uvn_findpages().
- */
-#define UFP_ALL		0x0
-#define UFP_NOWAIT	0x1
-#define UFP_NOALLOC	0x2
-#define UFP_NOCACHE	0x4
-#define UFP_NORDONLY	0x8
-
-/*
  * lockflags that control the locking behavior of various functions.
  */
 #define	UVM_LK_ENTER	0x00000001	/* map locked on entry */
@@ -251,9 +248,6 @@ struct pmap;
 struct vnode;
 struct pool;
 struct simplelock;
-struct vm_map_entry;
-struct vm_map;
-struct vm_page;
 
 extern struct pool *uvm_aiobuf_pool;
 
@@ -276,9 +270,6 @@ struct uvmexp {
 	int paging;	/* number of pages in the process of being paged out */
 	int wired;      /* number of wired pages */
 
-	int ncolors;	/* number of page color buckets: must be p-o-2 */
-	int colormask;	/* color bucket mask */
-
 	int zeropages;		/* number of zero'd pages */
 	int reserve_pagedaemon; /* number of pages reserved for pagedaemon */
 	int reserve_kernel;	/* number of pages reserved for kernel */
@@ -328,9 +319,8 @@ struct uvmexp {
 				   was available */
 	int pga_zeromiss;	/* pagealloc where zero wanted and zero
 				   not available */
-	int zeroaborts;		/* number of times page zeroing was aborted */
-	int colorhit;		/* pagealloc where we got optimal color */
-	int colormiss;		/* pagealloc where we didn't */
+	int zeroaborts;		/* number of times page zeroing was
+				   aborted */
 
 	/* fault subcounters */
 	int fltnoram;	/* number of times fault was out of ram */
@@ -402,7 +392,7 @@ struct vmspace {
 	caddr_t	vm_shm;		/* SYS5 shared memory private data XXX */
 /* we copy from vm_startcopy to the end of the structure on fork */
 #define vm_startcopy vm_rssize
-	segsz_t vm_rssize;	/* current resident set size in pages */
+	segsz_t vm_rssize; 	/* current resident set size in pages */
 	segsz_t vm_swrss;	/* resident set size before last swap */
 	segsz_t vm_tsize;	/* text size (pages) XXX */
 	segsz_t vm_dsize;	/* data size (pages) XXX */
@@ -424,6 +414,7 @@ extern struct vm_map *kmem_map;
 extern struct vm_map *mb_map;
 extern struct vm_map *phys_map;
 
+
 /*
  * macros
  */
@@ -434,7 +425,11 @@ extern struct vm_map *phys_map;
 
 #endif /* _KERNEL */
 
+#ifdef	pmap_resident_count
 #define vm_resident_count(vm) (pmap_resident_count((vm)->vm_map.pmap))
+#else
+#define vm_resident_count(vm) ((vm)->vm_rssize)
+#endif
 
 /* XXX clean up later */
 struct buf;
@@ -469,16 +464,9 @@ void			uao_detach_locked __P((struct uvm_object *));
 void			uao_reference __P((struct uvm_object *));
 void			uao_reference_locked __P((struct uvm_object *));
 
-/* uvm_bio.c */
-void			ubc_init __P((void));
-void *			ubc_alloc __P((struct uvm_object *, voff_t, vsize_t *,
-				       int));
-void			ubc_release __P((void *, vsize_t));
-void			ubc_flush __P((struct uvm_object *, voff_t, voff_t));
-
 /* uvm_fault.c */
-int			uvm_fault __P((struct vm_map *, vaddr_t, vm_fault_t,
-				       vm_prot_t));
+int			uvm_fault __P((vm_map_t, vaddr_t, 
+				vm_fault_t, vm_prot_t));
 				/* handle a page fault */
 
 /* uvm_glue.c */
@@ -499,53 +487,50 @@ void			uvm_vsunlock __P((struct proc *, caddr_t, size_t));
 
 
 /* uvm_init.c */
-void			uvm_init __P((void));
+void			uvm_init __P((void));	
 				/* init the uvm system */
 
 /* uvm_io.c */
-int			uvm_io __P((struct vm_map *, struct uio *));
+int			uvm_io __P((vm_map_t, struct uio *));
 
 /* uvm_km.c */
-vaddr_t			uvm_km_alloc1 __P((struct vm_map *, vsize_t,
-			    boolean_t));
-void			uvm_km_free __P((struct vm_map *, vaddr_t, vsize_t));
-void			uvm_km_free_wakeup __P((struct vm_map *, vaddr_t,
-			    vsize_t));
-vaddr_t			uvm_km_kmemalloc __P((struct vm_map *, struct
-			    uvm_object *, vsize_t, int));
-struct vm_map		*uvm_km_suballoc __P((struct vm_map *, vaddr_t *,
-			    vaddr_t *, vsize_t, int, boolean_t,
-			    struct vm_map *));
-vaddr_t			uvm_km_valloc __P((struct vm_map *, vsize_t));
-vaddr_t			uvm_km_valloc_align __P((struct vm_map *, vsize_t,
-			    vsize_t));
-vaddr_t			uvm_km_valloc_wait __P((struct vm_map *, vsize_t));
-vaddr_t			uvm_km_valloc_prefer_wait __P((struct vm_map *, vsize_t,
-			    voff_t));
-vaddr_t			uvm_km_alloc_poolpage1 __P((struct vm_map *,
-			    struct uvm_object *, boolean_t));
-void			uvm_km_free_poolpage1 __P((struct vm_map *, vaddr_t));
-
-#define	uvm_km_alloc_poolpage(waitok) \
-	uvm_km_alloc_poolpage1(kmem_map, uvmexp.kmem_object, (waitok))
-#define	uvm_km_free_poolpage(addr) \
-	uvm_km_free_poolpage1(kmem_map, (addr))
+vaddr_t			uvm_km_alloc1 __P((vm_map_t, vsize_t, boolean_t));
+void			uvm_km_free __P((vm_map_t, vaddr_t, vsize_t));
+void			uvm_km_free_wakeup __P((vm_map_t, vaddr_t,
+						vsize_t));
+vaddr_t			uvm_km_kmemalloc __P((vm_map_t, struct uvm_object *,
+						vsize_t, int));
+struct vm_map		*uvm_km_suballoc __P((vm_map_t, vaddr_t *,
+				vaddr_t *, vsize_t, int,
+				boolean_t, vm_map_t));
+vaddr_t			uvm_km_valloc __P((vm_map_t, vsize_t));
+vaddr_t			uvm_km_valloc_align __P((vm_map_t, vsize_t, vsize_t));
+vaddr_t			uvm_km_valloc_wait __P((vm_map_t, vsize_t));
+vaddr_t			uvm_km_valloc_prefer_wait __P((vm_map_t, vsize_t,
+					voff_t));
+vaddr_t			uvm_km_alloc_poolpage1 __P((vm_map_t,
+				struct uvm_object *, boolean_t));
+void			uvm_km_free_poolpage1 __P((vm_map_t, vaddr_t));
+
+#define	uvm_km_alloc_poolpage(waitok)	uvm_km_alloc_poolpage1(kmem_map, \
+						uvmexp.kmem_object, (waitok))
+#define	uvm_km_free_poolpage(addr)	uvm_km_free_poolpage1(kmem_map, (addr))
 
 /* uvm_map.c */
-int			uvm_map __P((struct vm_map *, vaddr_t *, vsize_t,
+int			uvm_map __P((vm_map_t, vaddr_t *, vsize_t,
 				struct uvm_object *, voff_t, vsize_t,
 				uvm_flag_t));
-int			uvm_map_pageable __P((struct vm_map *, vaddr_t,
+int			uvm_map_pageable __P((vm_map_t, vaddr_t, 
 				vaddr_t, boolean_t, int));
-int			uvm_map_pageable_all __P((struct vm_map *, int,
-				vsize_t));
-boolean_t		uvm_map_checkprot __P((struct vm_map *, vaddr_t,
+int			uvm_map_pageable_all __P((vm_map_t, int, vsize_t));
+boolean_t		uvm_map_checkprot __P((vm_map_t, vaddr_t,
 				vaddr_t, vm_prot_t));
-int			uvm_map_protect __P((struct vm_map *, vaddr_t,
+int			uvm_map_protect __P((vm_map_t, vaddr_t, 
 				vaddr_t, vm_prot_t, boolean_t));
-struct vmspace		*uvmspace_alloc __P((vaddr_t, vaddr_t));
+struct vmspace		*uvmspace_alloc __P((vaddr_t, vaddr_t,
+				boolean_t));
 void			uvmspace_init __P((struct vmspace *, struct pmap *,
-				vaddr_t, vaddr_t));
+				vaddr_t, vaddr_t, boolean_t));
 void			uvmspace_exec __P((struct proc *, vaddr_t, vaddr_t));
 struct vmspace		*uvmspace_fork __P((struct vmspace *));
 void			uvmspace_free __P((struct vmspace *));
@@ -555,14 +540,14 @@ void			uvmspace_unshare __P((struct proc *));
 
 /* uvm_meter.c */
 void			uvm_meter __P((void));
-int			uvm_sysctl __P((int *, u_int, void *, size_t *,
+int			uvm_sysctl __P((int *, u_int, void *, size_t *, 
 				void *, size_t, struct proc *));
 void			uvm_total __P((struct vmtotal *));
 
 /* uvm_mmap.c */
-int			uvm_mmap __P((struct vm_map *, vaddr_t *, vsize_t,
-				vm_prot_t, vm_prot_t, int,
-				void *, voff_t, vsize_t));
+int			uvm_mmap __P((vm_map_t, vaddr_t *, vsize_t,
+				vm_prot_t, vm_prot_t, int, 
+				caddr_t, voff_t, vsize_t));
 
 /* uvm_page.c */
 struct vm_page		*uvm_pagealloc_strat __P((struct uvm_object *,
@@ -570,7 +555,9 @@ struct vm_page		*uvm_pagealloc_strat __P((struct uvm_object *,
 #define	uvm_pagealloc(obj, off, anon, flags) \
 	    uvm_pagealloc_strat((obj), (off), (anon), (flags), \
 				UVM_PGA_STRAT_NORMAL, 0)
-void			uvm_pagerealloc __P((struct vm_page *,
+vaddr_t			uvm_pagealloc_contig __P((vaddr_t, vaddr_t,
+				vaddr_t, vaddr_t));
+void			uvm_pagerealloc __P((struct vm_page *, 
 					     struct uvm_object *, voff_t));
 /* Actually, uvm_page_physload takes PF#s which need their own type */
 void			uvm_page_physload __P((paddr_t, paddr_t,
@@ -589,28 +576,27 @@ void			uvm_aiodone_daemon __P((void *));
 /* uvm_pglist.c */
 int			uvm_pglistalloc __P((psize_t, paddr_t,
 				paddr_t, paddr_t, paddr_t,
-				struct pglist *, int, int));
+				struct pglist *, int, int)); 
 void			uvm_pglistfree __P((struct pglist *));
 
 /* uvm_swap.c */
 void			uvm_swap_init __P((void));
 
 /* uvm_unix.c */
-int			uvm_coredump __P((struct proc *, struct vnode *,
+int			uvm_coredump __P((struct proc *, struct vnode *, 
 				struct ucred *, struct core *));
 int			uvm_grow __P((struct proc *, vaddr_t));
 
 /* uvm_user.c */
-void			uvm_deallocate __P((struct vm_map *, vaddr_t, vsize_t));
+int			uvm_deallocate __P((vm_map_t, vaddr_t, vsize_t));
 
 /* uvm_vnode.c */
 void			uvm_vnp_setsize __P((struct vnode *, voff_t));
 void			uvm_vnp_sync __P((struct mount *));
+void 			uvm_vnp_terminate __P((struct vnode *));
+				/* terminate a uvm/uvn object */
+boolean_t		uvm_vnp_uncache __P((struct vnode *));
 struct uvm_object	*uvn_attach __P((void *, vm_prot_t));
-void			uvn_findpages __P((struct uvm_object *, voff_t,
-					   int *, struct vm_page **, int));
-void			uvm_vnp_zerorange __P((struct vnode *, off_t, size_t));
-void			uvm_vnp_asyncget __P((struct vnode *, off_t, size_t));
 
 /* kern_malloc.c */
 void			kmeminit_nkmempages __P((void));
diff --git a/sys/uvm/uvm_fault.c b/sys/uvm/uvm_fault.c
index 4e08eaa63a4..6736aa6a8d5 100644
--- a/sys/uvm/uvm_fault.c
+++ b/sys/uvm/uvm_fault.c
@@ -1,5 +1,5 @@
-/*	$OpenBSD: uvm_fault.c,v 1.28 2001/12/04 23:22:42 art Exp $	*/
-/*	$NetBSD: uvm_fault.c,v 1.68 2001/09/10 21:19:42 chris Exp $	*/
+/*	$OpenBSD: uvm_fault.c,v 1.29 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_fault.c,v 1.56 2001/02/18 21:19:08 chs Exp $	*/
 
 /*
  *
@@ -59,7 +59,7 @@
  *
  *    CASE 1A         CASE 1B                  CASE 2A        CASE 2B
  *    read/write1     write>1                  read/write   +-cow_write/zero
- *         |             |                         |        |
+ *         |             |                         |        |        
  *      +--|--+       +--|--+     +-----+       +  |  +     | +-----+
  * amap |  V  |       |  ----------->new|          |        | |  ^  |
  *      +-----+       +-----+     +-----+       +  |  +     | +--|--+
@@ -69,7 +69,7 @@
  *      +-----+       +-----+                   +-----+       +-----+
  *
  * d/c = don't care
- *
+ * 
  *   case [0]: layerless fault
  *	no amap or uobj is present.   this is an error.
  *
@@ -83,17 +83,17 @@
  *     2A: [read on non-NULL uobj] or [write to non-copy_on_write area]
  *		I/O takes place directly in object.
  *     2B: [write to copy_on_write] or [read on NULL uobj]
- *		data is "promoted" from uobj to a new anon.
+ *		data is "promoted" from uobj to a new anon.   
  *		if uobj is null, then we zero fill.
  *
  * we follow the standard UVM locking protocol ordering:
  *
- * MAPS => AMAP => UOBJ => ANON => PAGE QUEUES (PQ)
+ * MAPS => AMAP => UOBJ => ANON => PAGE QUEUES (PQ) 
  * we hold a PG_BUSY page if we unlock for I/O
  *
  *
  * the code is structured as follows:
- *
+ *  
  *     - init the "IN" params in the ufi structure
  *   ReFault:
  *     - do lookups [locks maps], check protection, handle needs_copy
@@ -125,7 +125,7 @@
  *
  *  alternative 1: unbusy the page in question and restart the page fault
  *    from the top (ReFault).   this is easy but does not take advantage
- *    of the information that we already have from our previous lookup,
+ *    of the information that we already have from our previous lookup, 
  *    although it is possible that the "hints" in the vm_map will help here.
  *
  * alternative 2: the system already keeps track of a "version" number of
@@ -159,7 +159,7 @@ struct uvm_advice {
 
 /*
  * page range array:
- * note: index in array must match "advice" value
+ * note: index in array must match "advice" value 
  * XXX: borrowed numbers from freebsd.   do they work well for us?
  */
 
@@ -195,7 +195,7 @@ uvmfault_anonflush(anons, n)
 {
 	int lcv;
 	struct vm_page *pg;
-
+	
 	for (lcv = 0 ; lcv < n ; lcv++) {
 		if (anons[lcv] == NULL)
 			continue;
@@ -204,7 +204,11 @@ uvmfault_anonflush(anons, n)
 		if (pg && (pg->flags & PG_BUSY) == 0 && pg->loan_count == 0) {
 			uvm_lock_pageq();
 			if (pg->wire_count == 0) {
+#ifdef UBC
 				pmap_clear_reference(pg);
+#else
+				pmap_page_protect(pg, VM_PROT_NONE);
+#endif
 				uvm_pagedeactivate(pg);
 			}
 			uvm_unlock_pageq();
@@ -248,7 +252,7 @@ uvmfault_amapcopy(ufi)
 		 */
 
 		if (UVM_ET_ISNEEDSCOPY(ufi->entry))
-			amap_copy(ufi->map, ufi->entry, M_NOWAIT, TRUE,
+			amap_copy(ufi->map, ufi->entry, M_NOWAIT, TRUE, 
 				ufi->orig_rvaddr, ufi->orig_rvaddr + 1);
 
 		/*
@@ -264,7 +268,7 @@ uvmfault_amapcopy(ufi)
 		/*
 		 * got it!   unlock and return.
 		 */
-
+		
 		uvmfault_unlockmaps(ufi, TRUE);
 		return;
 	}
@@ -276,7 +280,7 @@ uvmfault_amapcopy(ufi)
  * page in that anon.
  *
  * => maps, amap, and anon locked by caller.
- * => if we fail (result != 0) we unlock everything.
+ * => if we fail (result != VM_PAGER_OK) we unlock everything.
  * => if we are successful, we return with everything still locked.
  * => we don't move the page on the queues [gets moved later]
  * => if we allocate a new page [we_own], it gets put on the queues.
@@ -296,12 +300,12 @@ uvmfault_anonget(ufi, amap, anon)
 	boolean_t we_own;	/* we own anon's page? */
 	boolean_t locked;	/* did we relock? */
 	struct vm_page *pg;
-	int error;
+	int result;
 	UVMHIST_FUNC("uvmfault_anonget"); UVMHIST_CALLED(maphist);
 
 	LOCK_ASSERT(simple_lock_held(&anon->an_lock));
 
-	error = 0;
+	result = 0;		/* XXX shut up gcc */
 	uvmexp.fltanget++;
         /* bump rusage counters */
 	if (anon->u.an_page)
@@ -309,7 +313,7 @@ uvmfault_anonget(ufi, amap, anon)
 	else
 		curproc->p_addr->u_stats.p_ru.ru_majflt++;
 
-	/*
+	/* 
 	 * loop until we get it, or fail.
 	 */
 
@@ -342,7 +346,7 @@ uvmfault_anonget(ufi, amap, anon)
 
 			if ((pg->flags & (PG_BUSY|PG_RELEASED)) == 0) {
 				UVMHIST_LOG(maphist, "<- OK",0,0,0,0);
-				return (0);
+				return (VM_PAGER_OK);
 			}
 			pg->flags |= PG_WANTED;
 			uvmexp.fltpgwait++;
@@ -369,7 +373,7 @@ uvmfault_anonget(ufi, amap, anon)
 			/* ready to relock and try again */
 
 		} else {
-
+		
 			/*
 			 * no page, we must try and bring it in.
 			 */
@@ -385,9 +389,9 @@ uvmfault_anonget(ufi, amap, anon)
 				/* ready to relock and try again */
 
 			} else {
-
+	
 				/* we set the PG_BUSY bit */
-				we_own = TRUE;
+				we_own = TRUE;	
 				uvmfault_unlockall(ufi, amap, NULL, anon);
 
 				/*
@@ -398,7 +402,7 @@ uvmfault_anonget(ufi, amap, anon)
 				 * we hold PG_BUSY on the page.
 				 */
 				uvmexp.pageins++;
-				error = uvm_swap_get(pg, anon->an_swslot,
+				result = uvm_swap_get(pg, anon->an_swslot,
 				    PGO_SYNCIO);
 
 				/*
@@ -425,23 +429,23 @@ uvmfault_anonget(ufi, amap, anon)
 		 * to clean up after the I/O. there are three cases to
 		 * consider:
 		 *   [1] page released during I/O: free anon and ReFault.
-		 *   [2] I/O not OK.   free the page and cause the fault
+		 *   [2] I/O not OK.   free the page and cause the fault 
 		 *       to fail.
 		 *   [3] I/O OK!   activate the page and sync with the
 		 *       non-we_own case (i.e. drop anon lock if not locked).
 		 */
-
+		
 		if (we_own) {
 
 			if (pg->flags & PG_WANTED) {
 				/* still holding object lock */
-				wakeup(pg);
+				wakeup(pg);	
 			}
 			/* un-busy! */
 			pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
 			UVM_PAGE_OWN(pg, NULL);
 
-			/*
+			/* 
 			 * if we were RELEASED during I/O, then our anon is
 			 * no longer part of an amap.   we need to free the
 			 * anon and try again.
@@ -455,10 +459,12 @@ uvmfault_anonget(ufi, amap, anon)
 							   NULL);
 				uvmexp.fltpgrele++;
 				UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0);
-				return (ERESTART);	/* refault! */
+				return (VM_PAGER_REFAULT);	/* refault! */
 			}
 
-			if (error) {
+			if (result != VM_PAGER_OK) {
+				KASSERT(result != VM_PAGER_PEND);
+
 				/* remove page from anon */
 				anon->u.an_page = NULL;
 
@@ -486,9 +492,9 @@ uvmfault_anonget(ufi, amap, anon)
 				else
 					simple_unlock(&anon->an_lock);
 				UVMHIST_LOG(maphist, "<- ERROR", 0,0,0,0);
-				return error;
+				return (VM_PAGER_ERROR);
 			}
-
+			
 			/*
 			 * must be OK, clear modify (already PG_CLEAN)
 			 * and activate
@@ -507,7 +513,7 @@ uvmfault_anonget(ufi, amap, anon)
 
 		if (!locked) {
 			UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0);
-			return (ERESTART);
+			return (VM_PAGER_REFAULT);
 		}
 
 		/*
@@ -515,16 +521,16 @@ uvmfault_anonget(ufi, amap, anon)
 		 */
 
 		if (ufi != NULL &&
-		    amap_lookup(&ufi->entry->aref,
+		    amap_lookup(&ufi->entry->aref, 
 				ufi->orig_rvaddr - ufi->entry->start) != anon) {
-
+			
 			uvmfault_unlockall(ufi, amap, NULL, anon);
 			UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0);
-			return (ERESTART);
+			return (VM_PAGER_REFAULT);
 		}
-
+			
 		/*
-		 * try it again!
+		 * try it again! 
 		 */
 
 		uvmexp.fltanretry++;
@@ -543,12 +549,11 @@ uvmfault_anonget(ufi, amap, anon)
  * uvm_fault: page fault handler
  *
  * => called from MD code to resolve a page fault
- * => VM data structures usually should be unlocked.   however, it is
+ * => VM data structures usually should be unlocked.   however, it is 
  *	possible to call here with the main map locked if the caller
  *	gets a write lock, sets it recusive, and then calls us (c.f.
  *	uvm_map_pageable).   this should be avoided because it keeps
  *	the map locked off during I/O.
- * => MUST NEVER BE CALLED IN INTERRUPT CONTEXT
  */
 
 #define MASK(entry)     (UVM_ET_ISCOPYONWRITE(entry) ? \
@@ -556,7 +561,7 @@ uvmfault_anonget(ufi, amap, anon)
 
 int
 uvm_fault(orig_map, vaddr, fault_type, access_type)
-	struct vm_map *orig_map;
+	vm_map_t orig_map;
 	vaddr_t vaddr;
 	vm_fault_t fault_type;
 	vm_prot_t access_type;
@@ -564,9 +569,9 @@ uvm_fault(orig_map, vaddr, fault_type, access_type)
 	struct uvm_faultinfo ufi;
 	vm_prot_t enter_prot;
 	boolean_t wired, narrow, promote, locked, shadowed;
-	int npages, nback, nforw, centeridx, error, lcv, gotpages;
+	int npages, nback, nforw, centeridx, result, lcv, gotpages;
 	vaddr_t startva, objaddr, currva, offset, uoff;
-	paddr_t pa;
+	paddr_t pa; 
 	struct vm_amap *amap;
 	struct uvm_object *uobj;
 	struct vm_anon *anons_store[UVM_MAXRANGE], **anons, *anon, *oanon;
@@ -595,6 +600,19 @@ uvm_fault(orig_map, vaddr, fault_type, access_type)
 		narrow = FALSE;		/* normal fault */
 
 	/*
+	 * before we do anything else, if this is a fault on a kernel
+	 * address, check to see if the address is managed by an
+	 * interrupt-safe map.  If it is, we fail immediately.  Intrsafe
+	 * maps are never pageable, and this approach avoids an evil
+	 * locking mess.
+	 */
+	if (orig_map == kernel_map && uvmfault_check_intrsafe(&ufi)) {
+		UVMHIST_LOG(maphist, "<- VA 0x%lx in intrsafe map %p",
+		    ufi.orig_rvaddr, ufi.map, 0, 0);
+		return (KERN_FAILURE);
+	}
+
+	/*
 	 * "goto ReFault" means restart the page fault from ground zero.
 	 */
 ReFault:
@@ -605,20 +623,10 @@ ReFault:
 
 	if (uvmfault_lookup(&ufi, FALSE) == FALSE) {
 		UVMHIST_LOG(maphist, "<- no mapping @ 0x%x", vaddr, 0,0,0);
-		return (EFAULT);
+		return (KERN_INVALID_ADDRESS);
 	}
 	/* locked: maps(read) */
 
-#ifdef DIAGNOSTIC
-	if ((ufi.map->flags & VM_MAP_PAGEABLE) == 0) {
-		printf("Page fault on non-pageable map:\n");
-		printf("ufi.map = %p\n", ufi.map);
-		printf("ufi.orig_map = %p\n", ufi.orig_map);
-		printf("ufi.orig_rvaddr = 0x%lx\n", (u_long) ufi.orig_rvaddr);
-		panic("uvm_fault: (ufi.map->flags & VM_MAP_PAGEABLE) == 0");
-	}
-#endif
-
 	/*
 	 * check protection
 	 */
@@ -628,7 +636,18 @@ ReFault:
 		    "<- protection failure (prot=0x%x, access=0x%x)",
 		    ufi.entry->protection, access_type, 0, 0);
 		uvmfault_unlockmaps(&ufi, FALSE);
-		return EACCES;
+		return (KERN_PROTECTION_FAILURE);
+	}
+
+	/*
+	 * if the map is not a pageable map, a page fault always fails.
+	 */
+
+	if ((ufi.map->flags & VM_MAP_PAGEABLE) == 0) {
+		UVMHIST_LOG(maphist,
+		    "<- map %p not pageable", ufi.map, 0, 0, 0);
+		uvmfault_unlockmaps(&ufi, FALSE);
+		return (KERN_FAILURE);
 	}
 
 	/*
@@ -667,7 +686,7 @@ ReFault:
 			 * ensure that we pmap_enter page R/O since
 			 * needs_copy is still true
 			 */
-			enter_prot &= ~VM_PROT_WRITE;
+			enter_prot &= ~VM_PROT_WRITE; 
 
 		}
 	}
@@ -687,13 +706,13 @@ ReFault:
 	if (amap == NULL && uobj == NULL) {
 		uvmfault_unlockmaps(&ufi, FALSE);
 		UVMHIST_LOG(maphist,"<- no backing store, no overlay",0,0,0,0);
-		return (EFAULT);
+		return (KERN_INVALID_ADDRESS);
 	}
 
 	/*
 	 * establish range of interest based on advice from mapper
 	 * and then clip to fit map entry.   note that we only want
-	 * to do this the first time through the fault.   if we
+	 * to do this the first time through the fault.   if we 
 	 * ReFault we will disable this by setting "narrow" to true.
 	 */
 
@@ -718,7 +737,7 @@ ReFault:
 		narrow = TRUE;	/* ensure only once per-fault */
 
 	} else {
-
+		
 		/* narrow fault! */
 		nback = nforw = 0;
 		startva = ufi.orig_rvaddr;
@@ -758,7 +777,7 @@ ReFault:
 		UVMHIST_LOG(maphist, "  MADV_SEQUENTIAL: flushing backpages",
 		    0,0,0,0);
 		/* flush back-page anons? */
-		if (amap)
+		if (amap) 
 			uvmfault_anonflush(anons, nback);
 
 		/* flush object? */
@@ -766,7 +785,7 @@ ReFault:
 			objaddr =
 			    (startva - ufi.entry->start) + ufi.entry->offset;
 			simple_lock(&uobj->vmobjlock);
-			(void) uobj->pgops->pgo_flush(uobj, objaddr, objaddr +
+			(void) uobj->pgops->pgo_flush(uobj, objaddr, objaddr + 
 				    (nback << PAGE_SHIFT), PGO_DEACTIVATE);
 			simple_unlock(&uobj->vmobjlock);
 		}
@@ -845,12 +864,11 @@ ReFault:
 			     (VM_MAPENT_ISWIRED(ufi.entry) ? PMAP_WIRED : 0));
 		}
 		simple_unlock(&anon->an_lock);
-		pmap_update(ufi.orig_map->pmap);
 	}
 
 	/* locked: maps(read), amap(if there) */
 	/* (shadowed == TRUE) if there is an anon at the faulting address */
-	UVMHIST_LOG(maphist, "  shadowed=%d, will_get=%d", shadowed,
+	UVMHIST_LOG(maphist, "  shadowed=%d, will_get=%d", shadowed, 
 	    (uobj && shadowed == FALSE),0,0);
 
 	/*
@@ -860,7 +878,7 @@ ReFault:
 	 * XXX Actually, that is bad; pmap_enter() should just fail in that
 	 * XXX case.  --thorpej
 	 */
-
+	
 	/*
 	 * if the desired page is not shadowed by the amap and we have a
 	 * backing object, then we check to see if the backing object would
@@ -873,17 +891,18 @@ ReFault:
 		simple_lock(&uobj->vmobjlock);
 
 		/* locked: maps(read), amap (if there), uobj */
-		error = uobj->pgops->pgo_fault(&ufi, startva, pages, npages,
-		    centeridx, fault_type, access_type, PGO_LOCKED|PGO_SYNCIO);
+		result = uobj->pgops->pgo_fault(&ufi, startva, pages, npages,
+				    centeridx, fault_type, access_type,
+				    PGO_LOCKED|PGO_SYNCIO);
 
 		/* locked: nothing, pgo_fault has unlocked everything */
 
-		if (error == ERESTART)
+		if (result == VM_PAGER_OK)
+			return (KERN_SUCCESS);	/* pgo_fault did pmap enter */
+		else if (result == VM_PAGER_REFAULT)
 			goto ReFault;		/* try again! */
-		/*
-		 * object fault routine responsible for pmap_update().
-		 */
-		return error;
+		else
+			return (KERN_PROTECTION_FAILURE);
 	}
 
 	/*
@@ -936,16 +955,16 @@ ReFault:
 				 * us a handle to it.   remember this
 				 * page as "uobjpage." (for later use).
 				 */
-
+				
 				if (lcv == centeridx) {
 					uobjpage = pages[lcv];
 					UVMHIST_LOG(maphist, "  got uobjpage "
-					    "(0x%x) with locked get",
+					    "(0x%x) with locked get", 
 					    uobjpage, 0,0,0);
 					continue;
 				}
-
-				/*
+	
+				/* 
 				 * note: calling pgo_get with locked data
 				 * structures returns us pages which are
 				 * neither busy nor released, so we don't
@@ -976,7 +995,7 @@ ReFault:
 				    PMAP_CANFAIL |
 				     (wired ? PMAP_WIRED : 0));
 
-				/*
+				/* 
 				 * NOTE: page can't be PG_WANTED or PG_RELEASED
 				 * because we've held the lock the whole time
 				 * we've had the handle.
@@ -985,7 +1004,6 @@ ReFault:
 				pages[lcv]->flags &= ~(PG_BUSY); /* un-busy! */
 				UVM_PAGE_OWN(pages[lcv], NULL);
 			}	/* for "lcv" loop */
-			pmap_update(ufi.orig_map->pmap);
 		}   /* "gotpages" != 0 */
 		/* note: object still _locked_ */
 	} else {
@@ -993,7 +1011,7 @@ ReFault:
 	}
 
 	/* locked (shadowed): maps(read), amap */
-	/* locked (!shadowed): maps(read), amap(if there),
+	/* locked (!shadowed): maps(read), amap(if there), 
 		 uobj(if !null), uobjpage(if !null) */
 
 	/*
@@ -1015,7 +1033,7 @@ ReFault:
 	 * redirect case 2: if we are not shadowed, go to case 2.
 	 */
 
-	if (shadowed == FALSE)
+	if (shadowed == FALSE) 
 		goto Case2;
 
 	/* locked: maps(read), amap */
@@ -1044,20 +1062,24 @@ ReFault:
 	 * lock that object for us if it does not fail.
 	 */
 
-	error = uvmfault_anonget(&ufi, amap, anon);
-	switch (error) {
-	case 0:
-		break;
+	result = uvmfault_anonget(&ufi, amap, anon);
+	switch (result) {
+	case VM_PAGER_OK:
+		break; 
 
-	case ERESTART:
+	case VM_PAGER_REFAULT:
 		goto ReFault;
 
-	case EAGAIN:
+	case VM_PAGER_AGAIN:
 		tsleep(&lbolt, PVM, "fltagain1", 0);
 		goto ReFault;
 
 	default:
-		return error;
+#ifdef DIAGNOSTIC
+		panic("uvm_fault: uvmfault_anonget -> %d", result);
+#else
+		return (KERN_PROTECTION_FAILURE);
+#endif
 	}
 
 	/*
@@ -1069,13 +1091,13 @@ ReFault:
 	/* locked: maps(read), amap, anon, uobj(if one) */
 
 	/*
-	 * special handling for loaned pages
+	 * special handling for loaned pages 
 	 */
 
 	if (anon->u.an_page->loan_count) {
 
 		if ((access_type & VM_PROT_WRITE) == 0) {
-
+			
 			/*
 			 * for read faults on loaned pages we just cap the
 			 * protection at read-only.
@@ -1151,8 +1173,8 @@ ReFault:
 	 * also note that the ref count can't drop to zero here because
 	 * it is > 1 and we are only dropping one ref.
 	 *
-	 * in the (hopefully very rare) case that we are out of RAM we
-	 * will unlock, wait for more RAM, and refault.
+	 * in the (hopefully very rare) case that we are out of RAM we 
+	 * will unlock, wait for more RAM, and refault.    
 	 *
 	 * if we are out of anon VM we kill the process (XXX: could wait?).
 	 */
@@ -1181,7 +1203,7 @@ ReFault:
 				UVMHIST_LOG(maphist,
 				    "<- failed.  out of VM",0,0,0,0);
 				uvmexp.fltnoanon++;
-				return ENOMEM;
+				return (KERN_RESOURCE_SHORTAGE);
 			}
 
 			uvmexp.fltnoram++;
@@ -1229,7 +1251,7 @@ ReFault:
 	    ufi.orig_map->pmap, ufi.orig_rvaddr, pg, 0);
 	if (pmap_enter(ufi.orig_map->pmap, ufi.orig_rvaddr, VM_PAGE_TO_PHYS(pg),
 	    enter_prot, access_type | PMAP_CANFAIL | (wired ? PMAP_WIRED : 0))
-	    != 0) {
+	    != KERN_SUCCESS) {
 		/*
 		 * No need to undo what we did; we can simply think of
 		 * this as the pmap throwing away the mapping information.
@@ -1245,7 +1267,7 @@ ReFault:
 			UVMHIST_LOG(maphist,
 			    "<- failed.  out of VM",0,0,0,0);
 			/* XXX instrumentation */
-			return ENOMEM;
+			return (KERN_RESOURCE_SHORTAGE);
 		}
 		/* XXX instrumentation */
 		uvm_wait("flt_pmfail1");
@@ -1284,8 +1306,7 @@ ReFault:
 	if (anon != oanon)
 		simple_unlock(&anon->an_lock);
 	uvmfault_unlockall(&ufi, amap, uobj, oanon);
-	pmap_update(ufi.orig_map->pmap);
-	return 0;
+	return (KERN_SUCCESS);
 
 
 Case2:
@@ -1306,7 +1327,7 @@ Case2:
 	 */
 
 	if (uobj == NULL) {
-		uobjpage = PGO_DONTCARE;
+		uobjpage = PGO_DONTCARE;	
 		promote = TRUE;		/* always need anon here */
 	} else {
 		KASSERT(uobjpage != PGO_DONTCARE);
@@ -1320,7 +1341,7 @@ Case2:
 	 * if uobjpage is not null then we do not need to do I/O to get the
 	 * uobjpage.
 	 *
-	 * if uobjpage is null, then we need to unlock and ask the pager to
+	 * if uobjpage is null, then we need to unlock and ask the pager to 
 	 * get the data for us.   once we have the data, we need to reverify
 	 * the state the world.   we are currently not holding any resources.
 	 */
@@ -1331,7 +1352,7 @@ Case2:
 	} else {
 		/* update rusage counters */
 		curproc->p_addr->u_stats.p_ru.ru_majflt++;
-
+		
 		/* locked: maps(read), amap(if there), uobj */
 		uvmfault_unlockall(&ufi, amap, NULL, NULL);
 		/* locked: uobj */
@@ -1339,27 +1360,29 @@ Case2:
 		uvmexp.fltget++;
 		gotpages = 1;
 		uoff = (ufi.orig_rvaddr - ufi.entry->start) + ufi.entry->offset;
-		error = uobj->pgops->pgo_get(uobj, uoff, &uobjpage, &gotpages,
+		result = uobj->pgops->pgo_get(uobj, uoff, &uobjpage, &gotpages,
 		    0, access_type & MASK(ufi.entry), ufi.entry->advice,
 		    PGO_SYNCIO);
 
-		/* locked: uobjpage(if no error) */
+		/* locked: uobjpage(if result OK) */
 
 		/*
 		 * recover from I/O
 		 */
 
-		if (error) {
-			if (error == EAGAIN) {
+		if (result != VM_PAGER_OK) {
+			KASSERT(result != VM_PAGER_PEND);
+
+			if (result == VM_PAGER_AGAIN) {
 				UVMHIST_LOG(maphist,
 				    "  pgo_get says TRY AGAIN!",0,0,0,0);
-				tsleep(&lbolt, PVM, "fltagain2", 0);
+				tsleep((caddr_t)&lbolt, PVM, "fltagain2", 0);
 				goto ReFault;
 			}
 
 			UVMHIST_LOG(maphist, "<- pgo_get failed (code %d)",
-			    error, 0,0,0);
-			return error;
+			    result, 0,0,0);
+			return (KERN_PROTECTION_FAILURE); /* XXX i/o error */
 		}
 
 		/* locked: uobjpage */
@@ -1373,7 +1396,7 @@ Case2:
 		if (locked && amap)
 			amap_lock(amap);
 		simple_lock(&uobj->vmobjlock);
-
+		
 		/* locked(locked): maps(read), amap(if !null), uobj, uobjpage */
 		/* locked(!locked): uobj, uobjpage */
 
@@ -1384,10 +1407,10 @@ Case2:
 		 */
 
 		if ((uobjpage->flags & PG_RELEASED) != 0 ||
-		    (locked && amap &&
+		    (locked && amap && 
 		    amap_lookup(&ufi.entry->aref,
 		      ufi.orig_rvaddr - ufi.entry->start))) {
-			if (locked)
+			if (locked) 
 				uvmfault_unlockall(&ufi, amap, NULL, NULL);
 			locked = FALSE;
 		}
@@ -1399,7 +1422,7 @@ Case2:
 		if (locked == FALSE) {
 
 			UVMHIST_LOG(maphist,
-			    "  wasn't able to relock after fault: retry",
+			    "  wasn't able to relock after fault: retry", 
 			    0,0,0,0);
 			if (uobjpage->flags & PG_WANTED)
 				/* still holding object lock */
@@ -1449,7 +1472,7 @@ Case2:
 	 *  for it above)
 	 *  - at this point uobjpage could be PG_WANTED (handle later)
 	 */
-
+		
 	if (promote == FALSE) {
 
 		/*
@@ -1553,7 +1576,7 @@ Case2:
 		}		/* if loan_count */
 
 	} else {
-
+		
 		/*
 		 * if we are going to promote the data to an anon we
 		 * allocate a blank anon here and plug it into our amap.
@@ -1610,7 +1633,7 @@ Case2:
 				UVMHIST_LOG(maphist, "  promote: out of VM",
 				    0,0,0,0);
 				uvmexp.fltnoanon++;
-				return ENOMEM;
+				return (KERN_RESOURCE_SHORTAGE);
 			}
 
 			UVMHIST_LOG(maphist, "  out of RAM, waiting for more",
@@ -1635,11 +1658,8 @@ Case2:
 			 */
 			if ((amap_flags(amap) & AMAP_SHARED) != 0) {
 				pmap_page_protect(uobjpage, VM_PROT_NONE);
-				/*
-				 * XXX: PAGE MIGHT BE WIRED!
-				 */
 			}
-
+			
 			/*
 			 * dispose of uobjpage.  it can't be PG_RELEASED
 			 * since we still hold the object lock.
@@ -1694,7 +1714,8 @@ Case2:
 	KASSERT(access_type == VM_PROT_READ || (pg->flags & PG_RDONLY) == 0);
 	if (pmap_enter(ufi.orig_map->pmap, ufi.orig_rvaddr, VM_PAGE_TO_PHYS(pg),
 	    pg->flags & PG_RDONLY ? VM_PROT_READ : enter_prot,
-	    access_type | PMAP_CANFAIL | (wired ? PMAP_WIRED : 0)) != 0) {
+	    access_type | PMAP_CANFAIL | (wired ? PMAP_WIRED : 0))
+	    != KERN_SUCCESS) {
 
 		/*
 		 * No need to undo what we did; we can simply think of
@@ -1707,11 +1728,11 @@ Case2:
 		if (pg->flags & PG_WANTED)
 			wakeup(pg);		/* lock still held */
 
-		/*
+		/* 
 		 * note that pg can't be PG_RELEASED since we did not drop
 		 * the object lock since the last time we checked.
 		 */
-
+ 
 		pg->flags &= ~(PG_BUSY|PG_FAKE|PG_WANTED);
 		UVM_PAGE_OWN(pg, NULL);
 		uvmfault_unlockall(&ufi, amap, uobj, anon);
@@ -1720,7 +1741,7 @@ Case2:
 			UVMHIST_LOG(maphist,
 			    "<- failed.  out of VM",0,0,0,0);
 			/* XXX instrumentation */
-			return ENOMEM;
+			return (KERN_RESOURCE_SHORTAGE);
 		}
 		/* XXX instrumentation */
 		uvm_wait("flt_pmfail2");
@@ -1752,19 +1773,17 @@ Case2:
 	if (pg->flags & PG_WANTED)
 		wakeup(pg);		/* lock still held */
 
-	/*
-	 * note that pg can't be PG_RELEASED since we did not drop the object
+	/* 
+	 * note that pg can't be PG_RELEASED since we did not drop the object 
 	 * lock since the last time we checked.
 	 */
-
+ 
 	pg->flags &= ~(PG_BUSY|PG_FAKE|PG_WANTED);
 	UVM_PAGE_OWN(pg, NULL);
 	uvmfault_unlockall(&ufi, amap, uobj, anon);
 
-	pmap_update(ufi.orig_map->pmap);
-
 	UVMHIST_LOG(maphist, "<- done (SUCCESS!)",0,0,0,0);
-	return 0;
+	return (KERN_SUCCESS);
 }
 
 
@@ -1779,37 +1798,33 @@ Case2:
 
 int
 uvm_fault_wire(map, start, end, access_type)
-	struct vm_map *map;
+	vm_map_t map;
 	vaddr_t start, end;
 	vm_prot_t access_type;
 {
 	vaddr_t va;
-	int error;
+	pmap_t  pmap;
+	int rv;
+
+	pmap = vm_map_pmap(map);
 
 	/*
 	 * now fault it in a page at a time.   if the fault fails then we have
-	 * to undo what we have done.   note that in uvm_fault VM_PROT_NONE
+	 * to undo what we have done.   note that in uvm_fault VM_PROT_NONE 
 	 * is replaced with the max protection if fault_type is VM_FAULT_WIRE.
 	 */
 
-	/*
-	 * XXX work around overflowing a vaddr_t.  this prevents us from
-	 * wiring the last page in the address space, though.
-	 */
-	if (start > end) {
-		return EFAULT;
-	}
-
 	for (va = start ; va < end ; va += PAGE_SIZE) {
-		error = uvm_fault(map, va, VM_FAULT_WIRE, access_type);
-		if (error) {
+		rv = uvm_fault(map, va, VM_FAULT_WIRE, access_type);
+		if (rv) {
 			if (va != start) {
 				uvm_fault_unwire(map, start, va);
 			}
-			return error;
+			return (rv);
 		}
 	}
-	return 0;
+
+	return (KERN_SUCCESS);
 }
 
 /*
@@ -1818,7 +1833,7 @@ uvm_fault_wire(map, start, end, access_type)
 
 void
 uvm_fault_unwire(map, start, end)
-	struct vm_map *map;
+	vm_map_t map;
 	vaddr_t start, end;
 {
 
@@ -1835,10 +1850,10 @@ uvm_fault_unwire(map, start, end)
 
 void
 uvm_fault_unwire_locked(map, start, end)
-	struct vm_map *map;
+	vm_map_t map;
 	vaddr_t start, end;
 {
-	struct vm_map_entry *entry;
+	vm_map_entry_t entry;
 	pmap_t pmap = vm_map_pmap(map);
 	vaddr_t va;
 	paddr_t pa;
diff --git a/sys/uvm/uvm_fault.h b/sys/uvm/uvm_fault.h
index 8bb25b00b12..a0a80dca0a2 100644
--- a/sys/uvm/uvm_fault.h
+++ b/sys/uvm/uvm_fault.h
@@ -1,5 +1,5 @@
-/*	$OpenBSD: uvm_fault.h,v 1.10 2001/11/28 19:28:14 art Exp $	*/
-/*	$NetBSD: uvm_fault.h,v 1.15 2001/06/02 18:09:26 chs Exp $	*/
+/*	$OpenBSD: uvm_fault.h,v 1.11 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_fault.h,v 1.14 2000/06/26 14:21:17 mrg Exp $	*/
 
 /*
  *
@@ -57,12 +57,12 @@
 
 
 struct uvm_faultinfo {
-	struct vm_map *orig_map;		/* IN: original map */
+	vm_map_t orig_map;		/* IN: original map */
 	vaddr_t orig_rvaddr;		/* IN: original rounded VA */
 	vsize_t orig_size;		/* IN: original size of interest */
-	struct vm_map *map;			/* map (could be a submap) */
+	vm_map_t map;			/* map (could be a submap) */
 	unsigned int mapv;		/* map's version number */
-	struct vm_map_entry *entry;		/* map entry (from 'map') */
+	vm_map_entry_t entry;		/* map entry (from 'map') */
 	vsize_t size;			/* size of interest */
 };
 
@@ -76,9 +76,9 @@ struct uvm_faultinfo {
 int uvmfault_anonget __P((struct uvm_faultinfo *, struct vm_amap *,
 			  struct vm_anon *));
 
-int uvm_fault_wire __P((struct vm_map *, vaddr_t, vaddr_t, vm_prot_t));
-void uvm_fault_unwire __P((struct vm_map *, vaddr_t, vaddr_t));
-void uvm_fault_unwire_locked __P((struct vm_map *, vaddr_t, vaddr_t));
+int uvm_fault_wire __P((vm_map_t, vaddr_t, vaddr_t, vm_prot_t));
+void uvm_fault_unwire __P((vm_map_t, vaddr_t, vaddr_t));
+void uvm_fault_unwire_locked __P((vm_map_t, vaddr_t, vaddr_t));
 
 #endif /* _KERNEL */
 
diff --git a/sys/uvm/uvm_fault_i.h b/sys/uvm/uvm_fault_i.h
index f262e48f09f..8f8edb93d6a 100644
--- a/sys/uvm/uvm_fault_i.h
+++ b/sys/uvm/uvm_fault_i.h
@@ -1,5 +1,5 @@
-/*	$OpenBSD: uvm_fault_i.h,v 1.8 2001/11/28 19:28:14 art Exp $	*/
-/*	$NetBSD: uvm_fault_i.h,v 1.14 2001/06/26 17:55:15 thorpej Exp $	*/
+/*	$OpenBSD: uvm_fault_i.h,v 1.9 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_fault_i.h,v 1.11 2000/06/26 14:21:17 mrg Exp $	*/
 
 /*
  *
@@ -41,6 +41,7 @@
 /*
  * uvm_fault_i.h: fault inline functions
  */
+static boolean_t uvmfault_check_intrsafe __P((struct uvm_faultinfo *));
 static boolean_t uvmfault_lookup __P((struct uvm_faultinfo *, boolean_t));
 static boolean_t uvmfault_relock __P((struct uvm_faultinfo *));
 static void uvmfault_unlockall __P((struct uvm_faultinfo *, struct vm_amap *,
@@ -96,6 +97,39 @@ uvmfault_unlockall(ufi, amap, uobj, anon)
 }
 
 /*
+ * uvmfault_check_intrsafe: check for a virtual address managed by
+ * an interrupt-safe map.
+ *
+ * => caller must provide a uvm_faultinfo structure with the IN
+ *	params properly filled in
+ * => if we find an intersafe VA, we fill in ufi->map, and return TRUE
+ */
+
+static __inline boolean_t
+uvmfault_check_intrsafe(ufi)
+	struct uvm_faultinfo *ufi;
+{
+	struct vm_map_intrsafe *vmi;
+	int s;
+
+	s = vmi_list_lock();
+	for (vmi = LIST_FIRST(&vmi_list); vmi != NULL;
+	     vmi = LIST_NEXT(vmi, vmi_list)) {
+		if (ufi->orig_rvaddr >= vm_map_min(&vmi->vmi_map) &&
+		    ufi->orig_rvaddr < vm_map_max(&vmi->vmi_map))
+			break;
+	}
+	vmi_list_unlock(s);
+
+	if (vmi != NULL) {
+		ufi->map = &vmi->vmi_map;
+		return (TRUE);
+	}
+
+	return (FALSE);
+}
+
+/*
  * uvmfault_lookup: lookup a virtual address in a map
  *
  * => caller must provide a uvm_faultinfo structure with the IN
@@ -104,7 +138,7 @@ uvmfault_unlockall(ufi, amap, uobj, anon)
  * => if the lookup is a success we will return with the maps locked
  * => if "write_lock" is TRUE, we write_lock the map, otherwise we only
  *	get a read lock.
- * => note that submaps can only appear in the kernel and they are
+ * => note that submaps can only appear in the kernel and they are 
  *	required to use the same virtual addresses as the map they
  *	are referenced by (thus address translation between the main
  *	map and the submap is unnecessary).
@@ -115,7 +149,7 @@ uvmfault_lookup(ufi, write_lock)
 	struct uvm_faultinfo *ufi;
 	boolean_t write_lock;
 {
-	struct vm_map *tmpmap;
+	vm_map_t tmpmap;
 
 	/*
 	 * init ufi values for lookup.
@@ -130,13 +164,6 @@ uvmfault_lookup(ufi, write_lock)
 	 */
 
 	while (1) {
-		/*
-		 * Make sure this is not an "interrupt safe" map.
-		 * Such maps are never supposed to be involved in
-		 * a fault.
-		 */
-		if (ufi->map->flags & VM_MAP_INTRSAFE)
-			return (FALSE);
 
 		/*
 		 * lock map
@@ -150,7 +177,7 @@ uvmfault_lookup(ufi, write_lock)
 		/*
 		 * lookup
 		 */
-		if (!uvm_map_lookup_entry(ufi->map, ufi->orig_rvaddr,
+		if (!uvm_map_lookup_entry(ufi->map, ufi->orig_rvaddr, 
 								&ufi->entry)) {
 			uvmfault_unlockmaps(ufi, write_lock);
 			return(FALSE);
@@ -212,7 +239,7 @@ uvmfault_relock(ufi)
 	uvmexp.fltrelck++;
 
 	/*
-	 * relock map.   fail if version mismatch (in which case nothing
+	 * relock map.   fail if version mismatch (in which case nothing 
 	 * gets locked).
 	 */
 
diff --git a/sys/uvm/uvm_glue.c b/sys/uvm/uvm_glue.c
index 7e6057194f1..b8840cf8f92 100644
--- a/sys/uvm/uvm_glue.c
+++ b/sys/uvm/uvm_glue.c
@@ -1,9 +1,9 @@
-/*	$OpenBSD: uvm_glue.c,v 1.30 2001/12/04 23:22:42 art Exp $	*/
-/*	$NetBSD: uvm_glue.c,v 1.51 2001/09/10 21:19:42 chris Exp $	*/
+/*	$OpenBSD: uvm_glue.c,v 1.31 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_glue.c,v 1.44 2001/02/06 19:54:44 eeh Exp $	*/
 
-/*
+/* 
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
- * Copyright (c) 1991, 1993, The Regents of the University of California.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.  
  *
  * All rights reserved.
  *
@@ -21,7 +21,7 @@
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Charles D. Cranor,
- *      Washington University, the University of California, Berkeley and
+ *      Washington University, the University of California, Berkeley and 
  *      its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
@@ -45,17 +45,17 @@
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
- *
+ * 
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
+ * 
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
@@ -148,7 +148,7 @@ uvm_useracc(addr, len, rw)
 	size_t len;
 	int rw;
 {
-	struct vm_map *map;
+	vm_map_t map;
 	boolean_t rv;
 	vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
 
@@ -191,12 +191,14 @@ uvm_chgkprot(addr, len, rw)
 	for (sva = trunc_page((vaddr_t)addr); sva < eva; sva += PAGE_SIZE) {
 		/*
 		 * Extract physical address for the page.
+		 * We use a cheezy hack to differentiate physical
+		 * page 0 from an invalid mapping, not that it
+		 * really matters...
 		 */
 		if (pmap_extract(pmap_kernel(), sva, &pa) == FALSE)
 			panic("chgkprot: invalid page");
 		pmap_enter(pmap_kernel(), sva, pa, prot, PMAP_WIRED);
 	}
-	pmap_update(pmap_kernel());
 }
 #endif
 
@@ -214,15 +216,17 @@ uvm_vslock(p, addr, len, access_type)
 	size_t	len;
 	vm_prot_t access_type;
 {
-	struct vm_map *map;
+	vm_map_t map;
 	vaddr_t start, end;
-	int error;
+	int rv;
 
 	map = &p->p_vmspace->vm_map;
 	start = trunc_page((vaddr_t)addr);
 	end = round_page((vaddr_t)addr + len);
-	error = uvm_fault_wire(map, start, end, access_type);
-	return error;
+
+	rv = uvm_fault_wire(map, start, end, access_type);
+
+	return (rv);
 }
 
 /*
@@ -267,7 +271,7 @@ uvm_fork(p1, p2, shared, stack, stacksize, func, arg)
 	void *arg;
 {
 	struct user *up = p2->p_addr;
-	int error;
+	int rv;
 
 	if (shared == TRUE) {
 		p2->p_vmspace = NULL;
@@ -284,10 +288,10 @@ uvm_fork(p1, p2, shared, stack, stacksize, func, arg)
 	 * Note the kernel stack gets read/write accesses right off
 	 * the bat.
 	 */
-	error = uvm_fault_wire(kernel_map, (vaddr_t)up,
+	rv = uvm_fault_wire(kernel_map, (vaddr_t)up,
 	    (vaddr_t)up + USPACE, VM_PROT_READ | VM_PROT_WRITE);
-	if (error)
-		panic("uvm_fork: uvm_fault_wire failed: %d", error);
+	if (rv != KERN_SUCCESS)
+		panic("uvm_fork: uvm_fault_wire failed: %d", rv);
 
 	/*
 	 * p_stats currently points at a field in the user struct.  Copy
@@ -300,7 +304,7 @@ uvm_fork(p1, p2, shared, stack, stacksize, func, arg)
 	memcpy(&up->u_stats.pstat_startcopy, &p1->p_stats->pstat_startcopy,
 	       ((caddr_t)&up->u_stats.pstat_endcopy -
 		(caddr_t)&up->u_stats.pstat_startcopy));
-
+	
 	/*
 	 * cpu_fork() copy and update the pcb, and make the child ready
 	 * to run.  If this is a normal user fork, the child will exit
@@ -500,7 +504,7 @@ uvm_swapout_threads()
 	struct proc *outp, *outp2;
 	int outpri, outpri2;
 	int didswap = 0;
-	extern int maxslp;
+	extern int maxslp; 
 	/* XXXCDC: should move off to uvmexp. or uvm., also in uvm_meter */
 
 #ifdef DEBUG
@@ -524,7 +528,7 @@ uvm_swapout_threads()
 				outpri2 = p->p_swtime;
 			}
 			continue;
-
+			
 		case SSLEEP:
 		case SSTOP:
 			if (p->p_slptime >= maxslp) {
@@ -559,7 +563,7 @@ uvm_swapout_threads()
 /*
  * uvm_swapout: swap out process "p"
  *
- * - currently "swapout" means "unwire U-area" and "pmap_collect()"
+ * - currently "swapout" means "unwire U-area" and "pmap_collect()" 
  *   the pmap.
  * - XXXCDC: should deactivate all process' private anonymous memory
  */
diff --git a/sys/uvm/uvm_io.c b/sys/uvm/uvm_io.c
index 100e82cfe3b..ecb007827dc 100644
--- a/sys/uvm/uvm_io.c
+++ b/sys/uvm/uvm_io.c
@@ -1,5 +1,5 @@
-/*	$OpenBSD: uvm_io.c,v 1.12 2001/11/28 19:28:14 art Exp $	*/
-/*	$NetBSD: uvm_io.c,v 1.15 2001/06/02 18:09:26 chs Exp $	*/
+/*	$OpenBSD: uvm_io.c,v 1.13 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_io.c,v 1.12 2000/06/27 17:29:23 mrg Exp $	*/
 
 /*
  *
@@ -61,12 +61,12 @@
 
 int
 uvm_io(map, uio)
-	struct vm_map *map;
+	vm_map_t map;
 	struct uio *uio;
 {
 	vaddr_t baseva, endva, pageoffset, kva;
 	vsize_t chunksz, togo, sz;
-	struct vm_map_entry *dead_entries;
+	vm_map_entry_t dead_entries;
 	int error;
 
 	/*
@@ -106,7 +106,7 @@ uvm_io(map, uio)
 		 */
 
 		error = uvm_map_extract(map, baseva, chunksz, kernel_map, &kva,
-			    UVM_EXTRACT_QREF | UVM_EXTRACT_CONTIG |
+			    UVM_EXTRACT_QREF | UVM_EXTRACT_CONTIG | 
 			    UVM_EXTRACT_FIXPROT);
 		if (error) {
 
@@ -138,7 +138,8 @@ uvm_io(map, uio)
 		 */
 
 		vm_map_lock(kernel_map);
-		uvm_unmap_remove(kernel_map, kva, kva + chunksz, &dead_entries);
+		(void)uvm_unmap_remove(kernel_map, kva, kva+chunksz,
+		    &dead_entries);
 		vm_map_unlock(kernel_map);
 
 		if (dead_entries != NULL)
diff --git a/sys/uvm/uvm_km.c b/sys/uvm/uvm_km.c
index afc2ac92d10..652ddafcc77 100644
--- a/sys/uvm/uvm_km.c
+++ b/sys/uvm/uvm_km.c
@@ -1,9 +1,9 @@
-/*	$OpenBSD: uvm_km.c,v 1.26 2001/12/04 23:22:42 art Exp $	*/
-/*	$NetBSD: uvm_km.c,v 1.51 2001/09/10 21:19:42 chris Exp $	*/
+/*	$OpenBSD: uvm_km.c,v 1.27 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_km.c,v 1.42 2001/01/14 02:10:01 thorpej Exp $	*/
 
-/*
+/* 
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
- * Copyright (c) 1991, 1993, The Regents of the University of California.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.  
  *
  * All rights reserved.
  *
@@ -21,7 +21,7 @@
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Charles D. Cranor,
- *      Washington University, the University of California, Berkeley and
+ *      Washington University, the University of California, Berkeley and 
  *      its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
@@ -45,17 +45,17 @@
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
- *
+ * 
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
+ * 
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
@@ -78,11 +78,11 @@
  * starts at VM_MIN_KERNEL_ADDRESS and goes to VM_MAX_KERNEL_ADDRESS.
  * note that VM_MIN_KERNEL_ADDRESS is equal to vm_map_min(kernel_map).
  *
- * the kernel_map has several "submaps."   submaps can only appear in
+ * the kernel_map has several "submaps."   submaps can only appear in 
  * the kernel_map (user processes can't use them).   submaps "take over"
  * the management of a sub-range of the kernel's address space.  submaps
  * are typically allocated at boot time and are never released.   kernel
- * virtual address space that is mapped by a submap is locked by the
+ * virtual address space that is mapped by a submap is locked by the 
  * submap's lock -- not the kernel_map's lock.
  *
  * thus, the useful feature of submaps is that they allow us to break
@@ -102,19 +102,19 @@
  * the kernel allocates its private memory out of special uvm_objects whose
  * reference count is set to UVM_OBJ_KERN (thus indicating that the objects
  * are "special" and never die).   all kernel objects should be thought of
- * as large, fixed-sized, sparsely populated uvm_objects.   each kernel
+ * as large, fixed-sized, sparsely populated uvm_objects.   each kernel 
  * object is equal to the size of kernel virtual address space (i.e. the
  * value "VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS").
  *
  * most kernel private memory lives in kernel_object.   the only exception
  * to this is for memory that belongs to submaps that must be protected
- * by splvm().    each of these submaps has their own private kernel
+ * by splvm().    each of these submaps has their own private kernel 
  * object (e.g. kmem_object, mb_object).
  *
  * note that just because a kernel object spans the entire kernel virutal
  * address space doesn't mean that it has to be mapped into the entire space.
- * large chunks of a kernel object's space go unused either because
- * that area of kernel VM is unmapped, or there is some other type of
+ * large chunks of a kernel object's space go unused either because 
+ * that area of kernel VM is unmapped, or there is some other type of 
  * object mapped into that range (e.g. a vnode).    for submap's kernel
  * objects, the only part of the object that can ever be populated is the
  * offsets that are managed by the submap.
@@ -126,7 +126,7 @@
  *   uvm_km_alloc(kernel_map, PAGE_SIZE) [allocate 1 wired down page in the
  *   kernel map].    if uvm_km_alloc returns virtual address 0xf8235000,
  *   then that means that the page at offset 0x235000 in kernel_object is
- *   mapped at 0xf8235000.
+ *   mapped at 0xf8235000.   
  *
  * note that the offsets in kmem_object and mb_object also follow this
  * rule.   this means that the offsets for kmem_object must fall in the
@@ -151,7 +151,10 @@
  * global data structures
  */
 
-struct vm_map *kernel_map = NULL;
+vm_map_t kernel_map = NULL;
+
+struct vmi_list vmi_list;
+simple_lock_data_t vmi_list_slock;
 
 /*
  * local data structues
@@ -184,6 +187,12 @@ uvm_km_init(start, end)
 	vaddr_t base = VM_MIN_KERNEL_ADDRESS;
 
 	/*
+	 * first, initialize the interrupt-safe map list.
+	 */
+	LIST_INIT(&vmi_list);
+	simple_lock_init(&vmi_list_slock);
+
+	/*
 	 * next, init kernel memory objects.
 	 */
 
@@ -202,7 +211,7 @@ uvm_km_init(start, end)
 	TAILQ_INIT(&kmem_object_store.memq);
 	kmem_object_store.uo_npages = 0;
 	/* we are special.  we never die */
-	kmem_object_store.uo_refs = UVM_OBJ_KERN_INTRSAFE;
+	kmem_object_store.uo_refs = UVM_OBJ_KERN_INTRSAFE; 
 	uvmexp.kmem_object = &kmem_object_store;
 
 	/*
@@ -215,11 +224,11 @@ uvm_km_init(start, end)
 	TAILQ_INIT(&mb_object_store.memq);
 	mb_object_store.uo_npages = 0;
 	/* we are special.  we never die */
-	mb_object_store.uo_refs = UVM_OBJ_KERN_INTRSAFE;
+	mb_object_store.uo_refs = UVM_OBJ_KERN_INTRSAFE; 
 	uvmexp.mb_object = &mb_object_store;
 
 	/*
-	 * init the map and reserve allready allocated kernel space
+	 * init the map and reserve allready allocated kernel space 
 	 * before installing.
 	 */
 
@@ -227,9 +236,9 @@ uvm_km_init(start, end)
 	kernel_map_store.pmap = pmap_kernel();
 	if (uvm_map(&kernel_map_store, &base, start - base, NULL,
 	    UVM_UNKNOWN_OFFSET, 0, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL,
-	    UVM_INH_NONE, UVM_ADV_RANDOM,UVM_FLAG_FIXED)) != 0)
+	    UVM_INH_NONE, UVM_ADV_RANDOM,UVM_FLAG_FIXED)) != KERN_SUCCESS)
 		panic("uvm_km_init: could not reserve space for kernel");
-
+	
 	/*
 	 * install!
 	 */
@@ -266,7 +275,7 @@ uvm_km_suballoc(map, min, max, size, flags, fixed, submap)
 
 	if (uvm_map(map, min, size, NULL, UVM_UNKNOWN_OFFSET, 0,
 	    UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE,
-	    UVM_ADV_RANDOM, mapflags)) != 0) {
+	    UVM_ADV_RANDOM, mapflags)) != KERN_SUCCESS) {
 	       panic("uvm_km_suballoc: unable to allocate space in parent map");
 	}
 
@@ -294,7 +303,7 @@ uvm_km_suballoc(map, min, max, size, flags, fixed, submap)
 	 * now let uvm_map_submap plug in it...
 	 */
 
-	if (uvm_map_submap(map, *min, *max, submap) != 0)
+	if (uvm_map_submap(map, *min, *max, submap) != KERN_SUCCESS)
 		panic("uvm_km_suballoc: submap allocation failed");
 
 	return(submap);
@@ -325,7 +334,7 @@ uvm_km_pgremove(uobj, start, end)
 	/* choose cheapest traversal */
 	by_list = (uobj->uo_npages <=
 	     ((end - start) >> PAGE_SHIFT) * UKM_HASH_PENALTY);
-
+ 
 	if (by_list)
 		goto loop_by_list;
 
@@ -417,7 +426,7 @@ uvm_km_pgremove_intrsafe(uobj, start, end)
 	/* choose cheapest traversal */
 	by_list = (uobj->uo_npages <=
 	     ((end - start) >> PAGE_SHIFT) * UKM_HASH_PENALTY);
-
+ 
 	if (by_list)
 		goto loop_by_list;
 
@@ -472,14 +481,13 @@ loop_by_list:
 
 vaddr_t
 uvm_km_kmemalloc(map, obj, size, flags)
-	struct vm_map *map;
+	vm_map_t map;
 	struct uvm_object *obj;
 	vsize_t size;
 	int flags;
 {
 	vaddr_t kva, loopva;
 	vaddr_t offset;
-	vsize_t loopsize;
 	struct vm_page *pg;
 	UVMHIST_FUNC("uvm_km_kmemalloc"); UVMHIST_CALLED(maphist);
 
@@ -500,8 +508,8 @@ uvm_km_kmemalloc(map, obj, size, flags)
 
 	if (__predict_false(uvm_map(map, &kva, size, obj, UVM_UNKNOWN_OFFSET,
 	      0, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE,
-			  UVM_ADV_RANDOM, (flags & UVM_KMF_TRYLOCK)))
-			!= 0)) {
+			  UVM_ADV_RANDOM, (flags & UVM_KMF_TRYLOCK))) 
+			!= KERN_SUCCESS)) {
 		UVMHIST_LOG(maphist, "<- done (no VM)",0,0,0,0);
 		return(0);
 	}
@@ -528,16 +536,15 @@ uvm_km_kmemalloc(map, obj, size, flags)
 	 */
 
 	loopva = kva;
-	loopsize = size;
-	while (loopsize) {
+	while (size) {
 		simple_lock(&obj->vmobjlock);
 		pg = uvm_pagealloc(obj, offset, NULL, 0);
-		if (__predict_true(pg != NULL)) {
+		if (pg) {
 			pg->flags &= ~PG_BUSY;	/* new page */
 			UVM_PAGE_OWN(pg, NULL);
 		}
 		simple_unlock(&obj->vmobjlock);
-
+		
 		/*
 		 * out of memory?
 		 */
@@ -552,7 +559,7 @@ uvm_km_kmemalloc(map, obj, size, flags)
 				continue;
 			}
 		}
-
+		
 		/*
 		 * map it in: note that we call pmap_enter with the map and
 		 * object unlocked in case we are kmem_map/kmem_object
@@ -570,11 +577,8 @@ uvm_km_kmemalloc(map, obj, size, flags)
 		}
 		loopva += PAGE_SIZE;
 		offset += PAGE_SIZE;
-		loopsize -= PAGE_SIZE;
+		size -= PAGE_SIZE;
 	}
-	
-       	pmap_update(pmap_kernel());
-	 
 	UVMHIST_LOG(maphist,"<- done (kva=0x%x)", kva,0,0,0);
 	return(kva);
 }
@@ -585,7 +589,7 @@ uvm_km_kmemalloc(map, obj, size, flags)
 
 void
 uvm_km_free(map, addr, size)
-	struct vm_map *map;
+	vm_map_t map;
 	vaddr_t addr;
 	vsize_t size;
 {
@@ -601,17 +605,18 @@ uvm_km_free(map, addr, size)
 
 void
 uvm_km_free_wakeup(map, addr, size)
-	struct vm_map *map;
+	vm_map_t map;
 	vaddr_t addr;
 	vsize_t size;
 {
-	struct vm_map_entry *dead_entries;
+	vm_map_entry_t dead_entries;
 
 	vm_map_lock(map);
-	uvm_unmap_remove(map, trunc_page(addr), round_page(addr + size),
-	    &dead_entries);
+	(void)uvm_unmap_remove(map, trunc_page(addr), round_page(addr+size), 
+			 &dead_entries);
 	wakeup(map);
 	vm_map_unlock(map);
+
 	if (dead_entries != NULL)
 		uvm_unmap_detach(dead_entries, 0);
 }
@@ -624,7 +629,7 @@ uvm_km_free_wakeup(map, addr, size)
 
 vaddr_t
 uvm_km_alloc1(map, size, zeroit)
-	struct vm_map *map;
+	vm_map_t map;
 	vsize_t size;
 	boolean_t zeroit;
 {
@@ -645,7 +650,7 @@ uvm_km_alloc1(map, size, zeroit)
 	if (__predict_false(uvm_map(map, &kva, size, uvm.kernel_object,
 	      UVM_UNKNOWN_OFFSET, 0, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL,
 					      UVM_INH_NONE, UVM_ADV_RANDOM,
-					      0)) != 0)) {
+					      0)) != KERN_SUCCESS)) {
 		UVMHIST_LOG(maphist,"<- done (no VM)",0,0,0,0);
 		return(0);
 	}
@@ -678,7 +683,7 @@ uvm_km_alloc1(map, size, zeroit)
 			    FALSE, "km_alloc", 0);
 			continue;   /* retry */
 		}
-
+		
 		/* allocate ram */
 		pg = uvm_pagealloc(uvm.kernel_object, offset, NULL, 0);
 		if (pg) {
@@ -690,7 +695,7 @@ uvm_km_alloc1(map, size, zeroit)
 			uvm_wait("km_alloc1w");	/* wait for memory */
 			continue;
 		}
-
+		
 		/*
 		 * map it in; note we're never called with an intrsafe
 		 * object, so we always use regular old pmap_enter().
@@ -702,9 +707,7 @@ uvm_km_alloc1(map, size, zeroit)
 		offset += PAGE_SIZE;
 		size -= PAGE_SIZE;
 	}
-
-	pmap_update(map->pmap);
-
+	
 	/*
 	 * zero on request (note that "size" is now zero due to the above loop
 	 * so we need to subtract kva from loopva to reconstruct the size).
@@ -725,7 +728,7 @@ uvm_km_alloc1(map, size, zeroit)
 
 vaddr_t
 uvm_km_valloc(map, size)
-	struct vm_map *map;
+	vm_map_t map;
 	vsize_t size;
 {
 	return(uvm_km_valloc_align(map, size, 0));
@@ -733,7 +736,7 @@ uvm_km_valloc(map, size)
 
 vaddr_t
 uvm_km_valloc_align(map, size, align)
-	struct vm_map *map;
+	vm_map_t map;
 	vsize_t size;
 	vsize_t align;
 {
@@ -753,7 +756,7 @@ uvm_km_valloc_align(map, size, align)
 	if (__predict_false(uvm_map(map, &kva, size, uvm.kernel_object,
 	    UVM_UNKNOWN_OFFSET, align, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL,
 					    UVM_INH_NONE, UVM_ADV_RANDOM,
-					    0)) != 0)) {
+					    0)) != KERN_SUCCESS)) {
 		UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0);
 		return(0);
 	}
@@ -772,7 +775,7 @@ uvm_km_valloc_align(map, size, align)
 
 vaddr_t
 uvm_km_valloc_prefer_wait(map, size, prefer)
-	struct vm_map *map;
+	vm_map_t map;
 	vsize_t size;
 	voff_t prefer;
 {
@@ -797,7 +800,7 @@ uvm_km_valloc_prefer_wait(map, size, prefer)
 		if (__predict_true(uvm_map(map, &kva, size, uvm.kernel_object,
 		    prefer, 0, UVM_MAPFLAG(UVM_PROT_ALL,
 		    UVM_PROT_ALL, UVM_INH_NONE, UVM_ADV_RANDOM, 0))
-		    == 0)) {
+		    == KERN_SUCCESS)) {
 			UVMHIST_LOG(maphist,"<- done (kva=0x%x)", kva,0,0,0);
 			return(kva);
 		}
@@ -814,7 +817,7 @@ uvm_km_valloc_prefer_wait(map, size, prefer)
 
 vaddr_t
 uvm_km_valloc_wait(map, size)
-	struct vm_map *map;
+	vm_map_t map;
 	vsize_t size;
 {
 	return uvm_km_valloc_prefer_wait(map, size, UVM_UNKNOWN_OFFSET);
@@ -835,7 +838,7 @@ uvm_km_valloc_wait(map, size)
 /* ARGSUSED */
 vaddr_t
 uvm_km_alloc_poolpage1(map, obj, waitok)
-	struct vm_map *map;
+	vm_map_t map;
 	struct uvm_object *obj;
 	boolean_t waitok;
 {
@@ -886,7 +889,7 @@ uvm_km_alloc_poolpage1(map, obj, waitok)
 /* ARGSUSED */
 void
 uvm_km_free_poolpage1(map, addr)
-	struct vm_map *map;
+	vm_map_t map;
 	vaddr_t addr;
 {
 #if defined(PMAP_UNMAP_POOLPAGE)
diff --git a/sys/uvm/uvm_loan.c b/sys/uvm/uvm_loan.c
index cc82286e91b..e3c99ea8bb9 100644
--- a/sys/uvm/uvm_loan.c
+++ b/sys/uvm/uvm_loan.c
@@ -1,5 +1,5 @@
-/*	$OpenBSD: uvm_loan.c,v 1.16 2001/12/04 23:22:42 art Exp $	*/
-/*	$NetBSD: uvm_loan.c,v 1.31 2001/08/27 02:34:29 chuck Exp $	*/
+/*	$OpenBSD: uvm_loan.c,v 1.17 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_loan.c,v 1.23 2001/01/23 02:27:39 thorpej Exp $	*/
 
 /*
  *
@@ -49,7 +49,7 @@
 #include <uvm/uvm.h>
 
 /*
- * "loaned" pages are pages which are (read-only, copy-on-write) loaned
+ * "loaned" pages are pages which are (read-only, copy-on-write) loaned 
  * from the VM system to other parts of the kernel.   this allows page
  * copying to be avoided (e.g. you can loan pages from objs/anons to
  * the mbuf system).
@@ -75,7 +75,7 @@
  * object/anon which the page is owned by.  this is a good side-effect,
  * since a kernel write to a loaned page is an error.
  *
- * owners that want to free their pages and discover that they are
+ * owners that want to free their pages and discover that they are 
  * loaned out simply "disown" them (the page becomes an orphan).  these
  * pages should be freed when the last loan is dropped.   in some cases
  * an anon may "adopt" an orphaned page.
@@ -92,7 +92,7 @@
  * use "try" locking.
  *
  * loans are typically broken by the following events:
- *  1. user-level xwrite fault to a loaned page
+ *  1. write fault to a loaned page 
  *  2. pageout of clean+inactive O->A loaned page
  *  3. owner frees page (e.g. pager flush)
  *
@@ -105,10 +105,10 @@
  * local prototypes
  */
 
-static int	uvm_loananon __P((struct uvm_faultinfo *, void ***,
+static int	uvm_loananon __P((struct uvm_faultinfo *, void ***, 
 				int, struct vm_anon *));
 static int	uvm_loanentry __P((struct uvm_faultinfo *, void ***, int));
-static int	uvm_loanuobj __P((struct uvm_faultinfo *, void ***,
+static int	uvm_loanuobj __P((struct uvm_faultinfo *, void ***, 
 				int, vaddr_t));
 static int	uvm_loanzero __P((struct uvm_faultinfo *, void ***, int));
 
@@ -120,14 +120,10 @@ static int	uvm_loanzero __P((struct uvm_faultinfo *, void ***, int));
  * uvm_loanentry: loan out pages in a map entry (helper fn for uvm_loan())
  *
  * => "ufi" is the result of a successful map lookup (meaning that
- *	on entry the map is locked by the caller)
- * => we may unlock and then relock the map if needed (for I/O)
+ *	the maps are locked by the caller)
+ * => we may unlock the maps if needed (for I/O)
  * => we put our output result in "output"
- * => we always return with the map unlocked
- * => possible return values:
- *	-1 == error, map is unlocked
- *	 0 == map relock error (try again!), map is unlocked
- *	>0 == number of pages we loaned, map is unlocked
+ * => we return the number of pages we loaned, or -1 if we had an error
  */
 
 static __inline int
@@ -144,7 +140,7 @@ uvm_loanentry(ufi, output, flags)
 	int rv, result = 0;
 
 	/*
-	 * lock us the rest of the way down (we unlock before return)
+	 * lock us the rest of the way down
 	 */
 	if (aref->ar_amap)
 		amap_lock(aref->ar_amap);
@@ -166,7 +162,6 @@ uvm_loanentry(ufi, output, flags)
 			anon = NULL;
 		}
 
-		/* locked: map, amap, uobj */
 		if (anon) {
 			rv = uvm_loananon(ufi, output, flags, anon);
 		} else if (uobj) {
@@ -174,17 +169,16 @@ uvm_loanentry(ufi, output, flags)
 		} else if (UVM_ET_ISCOPYONWRITE(ufi->entry)) {
 			rv = uvm_loanzero(ufi, output, flags);
 		} else {
-			rv = -1;	/* null map entry... fail now */
+			rv = -1;		/* null map entry... fail now */
 		}
-		/* locked: if (rv > 0) => map, amap, uobj  [o.w. unlocked] */
 
 		/* total failure */
 		if (rv < 0)
-			return(-1);		/* everything unlocked */
+			return(-1);
 
 		/* relock failed, need to do another lookup */
 		if (rv == 0)
-			return(result);		/* everything unlocked */
+			return(result);
 
 		/*
 		 * got it... advance to next page
@@ -195,13 +189,9 @@ uvm_loanentry(ufi, output, flags)
 	}
 
 	/*
-	 * unlock what we locked, unlock the maps and return
+	 * unlock everything and return
 	 */
-	if (aref->ar_amap)
-		amap_unlock(aref->ar_amap);
-	if (uobj)
-		simple_unlock(&uobj->vmobjlock);
-	uvmfault_unlockmaps(ufi, FALSE);
+	uvmfault_unlockall(ufi, aref->ar_amap, uobj, NULL);
 	return(result);
 }
 
@@ -210,15 +200,14 @@ uvm_loanentry(ufi, output, flags)
  */
 
 /*
- * uvm_loan: loan pages in a map out to anons or to the kernel
- *
+ * uvm_loan: loan pages out to anons or to the kernel
+ * 
  * => map should be unlocked
  * => start and len should be multiples of PAGE_SIZE
  * => result is either an array of anon's or vm_pages (depending on flags)
  * => flag values: UVM_LOAN_TOANON - loan to anons
  *                 UVM_LOAN_TOPAGE - loan to wired kernel page
  *    one and only one of these flags must be set!
- * => returns 0 (success), or an appropriate error number
  */
 
 int
@@ -231,15 +220,21 @@ uvm_loan(map, start, len, result, flags)
 {
 	struct uvm_faultinfo ufi;
 	void **output;
-	int rv, error;
+	int rv;
+
+#ifdef DIAGNOSTIC
+	if (map->flags & VM_MAP_INTRSAFE)
+		panic("uvm_loan: intrsafe map");
+#endif
 
 	/*
 	 * ensure that one and only one of the flags is set
 	 */
 
-	KASSERT(((flags & UVM_LOAN_TOANON) == 0) ^
-		((flags & UVM_LOAN_TOPAGE) == 0));
-	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
+	if ((flags & (UVM_LOAN_TOANON|UVM_LOAN_TOPAGE)) == 
+	    (UVM_LOAN_TOANON|UVM_LOAN_TOPAGE) ||
+	    (flags & (UVM_LOAN_TOANON|UVM_LOAN_TOPAGE)) == 0)
+		return(KERN_FAILURE);
 
 	/*
 	 * "output" is a pointer to the current place to put the loaned
@@ -261,51 +256,40 @@ uvm_loan(map, start, len, result, flags)
 		ufi.orig_map = map;
 		ufi.orig_rvaddr = start;
 		ufi.orig_size = len;
-
+		
 		/*
 		 * do the lookup, the only time this will fail is if we hit on
 		 * an unmapped region (an error)
 		 */
 
-		if (!uvmfault_lookup(&ufi, FALSE)) {
-			error = ENOENT;
+		if (!uvmfault_lookup(&ufi, FALSE)) 
 			goto fail;
-		}
 
 		/*
-		 * map now locked.  now do the loanout...
+		 * now do the loanout
 		 */
 		rv = uvm_loanentry(&ufi, &output, flags);
-		if (rv < 0) {
-			/* all unlocked due to error */
-			error = EINVAL;
+		if (rv < 0) 
 			goto fail;
-		}
 
 		/*
-		 * done!  the map is unlocked.  advance, if possible.
-		 *
-		 * XXXCDC: could be recoded to hold the map lock with 
-		 *	   smarter code (but it only happens on map entry 
-		 *	   boundaries, so it isn't that bad).
+		 * done!   advance pointers and unlock.
 		 */
-		if (rv) {
-			rv <<= PAGE_SHIFT;
-			len -= rv;
-			start += rv;
-		}
+		rv <<= PAGE_SHIFT;
+		len -= rv;
+		start += rv;
+		uvmfault_unlockmaps(&ufi, FALSE);
 	}
-
+	
 	/*
 	 * got it!   return success.
 	 */
 
-	return 0;
+	return(KERN_SUCCESS);
 
 fail:
 	/*
 	 * fail: failed to do it.   drop our loans and return failure code.
-	 * map is already unlocked.
 	 */
 	if (output - result) {
 		if (flags & UVM_LOAN_TOANON)
@@ -315,13 +299,12 @@ fail:
 			uvm_unloanpage((struct vm_page **)result,
 			    output - result);
 	}
-	return (error);
+	return(KERN_FAILURE);
 }
 
 /*
  * uvm_loananon: loan a page from an anon out
- *
- * => called with map, amap, uobj locked
+ * 
  * => return value:
  *	-1 = fatal error, everything is unlocked, abort.
  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
@@ -340,16 +323,15 @@ uvm_loananon(ufi, output, flags, anon)
 	int result;
 
 	/*
-	 * if we are loaning to "another" anon then it is easy, we just
+	 * if we are loaning to another anon then it is easy, we just
 	 * bump the reference count on the current anon and return a
-	 * pointer to it (it becomes copy-on-write shared).
+	 * pointer to it.
 	 */
 	if (flags & UVM_LOAN_TOANON) {
 		simple_lock(&anon->an_lock);
 		pg = anon->u.an_page;
-		/* if (in RAM) and (owned by this anon) and (only 1 ref) */
 		if (pg && (pg->pqflags & PQ_ANON) != 0 && anon->an_ref == 1)
-			/* write-protect it */
+			/* read protect it */
 			pmap_page_protect(pg, VM_PROT_READ);
 		anon->an_ref++;
 		**output = anon;
@@ -371,15 +353,16 @@ uvm_loananon(ufi, output, flags, anon)
 	 * if we were unable to get the anon, then uvmfault_anonget has
 	 * unlocked everything and returned an error code.
 	 */
-	if (result != 0) {
+
+	if (result != VM_PAGER_OK) {
 
 		/* need to refault (i.e. refresh our lookup) ? */
-		if (result == ERESTART)
+		if (result == VM_PAGER_REFAULT)
 			return(0);
 
 		/* "try again"?   sleep a bit and retry ... */
-		if (result == EAGAIN) {
-			tsleep(&lbolt, PVM, "loanagain", 0);
+		if (result == VM_PAGER_AGAIN) {
+			tsleep((caddr_t)&lbolt, PVM, "loanagain", 0);
 			return(0);
 		}
 
@@ -402,7 +385,7 @@ uvm_loananon(ufi, output, flags, anon)
 	*output = (*output) + 1;
 
 	/* unlock anon and return success */
-	if (pg->uobject)	/* XXXCDC: what if this is our uobj? bad */
+	if (pg->uobject)
 		simple_unlock(&pg->uobject->vmobjlock);
 	simple_unlock(&anon->an_lock);
 	return(1);
@@ -411,7 +394,6 @@ uvm_loananon(ufi, output, flags, anon)
 /*
  * uvm_loanuobj: loan a page from a uobj out
  *
- * => called with map, amap, uobj locked
  * => return value:
  *	-1 = fatal error, everything is unlocked, abort.
  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
@@ -439,13 +421,13 @@ uvm_loanuobj(ufi, output, flags, va)
 	 * XXXCDC: duplicate code with uvm_fault().
 	 */
 
-	if (uobj->pgops->pgo_get) {	/* try locked pgo_get */
+	if (uobj->pgops->pgo_get) {
 		npages = 1;
 		pg = NULL;
 		result = uobj->pgops->pgo_get(uobj, va - ufi->entry->start,
 		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_LOCKED);
 	} else {
-		result = EIO;		/* must have pgo_get op */
+		result = VM_PAGER_ERROR;
 	}
 
 	/*
@@ -453,7 +435,7 @@ uvm_loanuobj(ufi, output, flags, va)
 	 * then we fail the loan.
 	 */
 
-	if (result != 0 && result != EBUSY) {
+	if (result != VM_PAGER_OK && result != VM_PAGER_UNLOCK) {
 		uvmfault_unlockall(ufi, amap, uobj, NULL);
 		return(-1);
 	}
@@ -462,24 +444,24 @@ uvm_loanuobj(ufi, output, flags, va)
 	 * if we need to unlock for I/O, do so now.
 	 */
 
-	if (result == EBUSY) {
+	if (result == VM_PAGER_UNLOCK) {
 		uvmfault_unlockall(ufi, amap, NULL, NULL);
-
+		
 		npages = 1;
 		/* locked: uobj */
 		result = uobj->pgops->pgo_get(uobj, va - ufi->entry->start,
-		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_SYNCIO);
+		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, 0);
 		/* locked: <nothing> */
-
+		
 		/*
 		 * check for errors
 		 */
 
-		if (result != 0) {
-			 if (result == EAGAIN) {
-				tsleep(&lbolt, PVM, "fltagain2", 0);
+		if (result != VM_PAGER_OK) {
+			 if (result == VM_PAGER_AGAIN) {
+				tsleep((caddr_t)&lbolt, PVM, "fltagain2", 0);
 				return(0); /* redo the lookup and try again */
-			}
+			} 
 			return(-1);	/* total failure */
 		}
 
@@ -497,15 +479,15 @@ uvm_loanuobj(ufi, output, flags, va)
 		 * that amap slot is still free.   if there is a problem we
 		 * drop our lock (thus force a lookup refresh/retry).
 		 */
-
+			
 		if ((pg->flags & PG_RELEASED) != 0 ||
 		    (locked && amap && amap_lookup(&ufi->entry->aref,
 		    ufi->orig_rvaddr - ufi->entry->start))) {
-
+			
 			if (locked)
 				uvmfault_unlockall(ufi, amap, NULL, NULL);
 			locked = FALSE;
-		}
+		} 
 
 		/*
 		 * didn't get the lock?   release the page and retry.
@@ -544,7 +526,7 @@ uvm_loanuobj(ufi, output, flags, va)
 	 * not be PG_RELEASED (we caught this above).
 	 */
 
-	if ((flags & UVM_LOAN_TOANON) == 0) { /* loan to wired-kernel page? */
+	if ((flags & UVM_LOAN_TOANON) == 0) {	/* loan to wired-kernel page? */
 		uvm_lock_pageq();
 		if (pg->loan_count == 0)
 			pmap_page_protect(pg, VM_PROT_READ);
@@ -563,7 +545,7 @@ uvm_loanuobj(ufi, output, flags, va)
 	/*
 	 * must be a loan to an anon.   check to see if there is already
 	 * an anon associated with this page.  if so, then just return
-	 * a reference to this object.   the page should already be
+	 * a reference to this object.   the page should already be 
 	 * mapped read-only because it is already on loan.
 	 */
 
@@ -583,7 +565,7 @@ uvm_loanuobj(ufi, output, flags, va)
 		UVM_PAGE_OWN(pg, NULL);
 		return(1);
 	}
-
+	
 	/*
 	 * need to allocate a new anon
 	 */
@@ -619,7 +601,6 @@ uvm_loanuobj(ufi, output, flags, va)
 /*
  * uvm_loanzero: "loan" a zero-fill page out
  *
- * => called with map, amap, uobj locked
  * => return value:
  *	-1 = fatal error, everything is unlocked, abort.
  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
@@ -640,7 +621,7 @@ uvm_loanzero(ufi, output, flags)
 
 		while ((pg = uvm_pagealloc(NULL, 0, NULL,
 		    UVM_PGA_ZERO)) == NULL) {
-			uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap,
+			uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, 
 			    ufi->entry->object.uvm_obj, NULL);
 			uvm_wait("loanzero1");
 			if (!uvmfault_relock(ufi))
@@ -652,7 +633,7 @@ uvm_loanzero(ufi, output, flags)
 				    &ufi->entry->object.uvm_obj->vmobjlock);
 			/* ... and try again */
 		}
-
+		
 		/* got a zero'd page; return */
 		pg->flags &= ~(PG_BUSY|PG_FAKE);
 		UVM_PAGE_OWN(pg, NULL);
@@ -667,7 +648,7 @@ uvm_loanzero(ufi, output, flags)
 	}
 
 	/* loaning to an anon */
-	while ((anon = uvm_analloc()) == NULL ||
+	while ((anon = uvm_analloc()) == NULL || 
 	    (pg = uvm_pagealloc(NULL, 0, anon, UVM_PGA_ZERO)) == NULL) {
 
 		/* unlock everything */
@@ -761,7 +742,7 @@ uvm_unloanpage(ploans, npages)
 			panic("uvm_unloanpage: page %p isn't loaned", pg);
 
 		pg->loan_count--;		/* drop loan */
-		uvm_pageunwire(pg);		/* and unwire */
+		uvm_pageunwire(pg);		/* and wire */
 
 		/*
 		 * if page is unowned and we killed last loan, then we can
diff --git a/sys/uvm/uvm_map.c b/sys/uvm/uvm_map.c
index f2ebe948eb9..058d8e53d80 100644
--- a/sys/uvm/uvm_map.c
+++ b/sys/uvm/uvm_map.c
@@ -1,9 +1,9 @@
-/*	$OpenBSD: uvm_map.c,v 1.34 2001/12/04 23:22:42 art Exp $	*/
-/*	$NetBSD: uvm_map.c,v 1.105 2001/09/10 21:19:42 chris Exp $	*/
+/*	$OpenBSD: uvm_map.c,v 1.35 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_map.c,v 1.93 2001/02/11 01:34:23 eeh Exp $	*/
 
-/*
+/* 
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
- * Copyright (c) 1991, 1993, The Regents of the University of California.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.  
  *
  * All rights reserved.
  *
@@ -21,7 +21,7 @@
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Charles D. Cranor,
- *      Washington University, the University of California, Berkeley and
+ *      Washington University, the University of California, Berkeley and 
  *      its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
@@ -45,17 +45,17 @@
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
- *
+ * 
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
+ * 
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
@@ -77,7 +77,6 @@
 #include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/pool.h>
-#include <sys/kernel.h>
 
 #ifdef SYSVSHM
 #include <sys/shm.h>
@@ -106,7 +105,6 @@ struct pool uvm_vmspace_pool;
  */
 
 struct pool uvm_map_entry_pool;
-struct pool uvm_map_entry_kmem_pool;
 
 #ifdef PMAP_GROWKERNEL
 /*
@@ -178,12 +176,12 @@ vaddr_t uvm_maxkaddr;
  * local prototypes
  */
 
-static struct vm_map_entry *uvm_mapent_alloc __P((struct vm_map *));
-static void uvm_mapent_copy __P((struct vm_map_entry *, struct vm_map_entry *));
-static void uvm_mapent_free __P((struct vm_map_entry *));
-static void uvm_map_entry_unwire __P((struct vm_map *, struct vm_map_entry *));
-static void uvm_map_reference_amap __P((struct vm_map_entry *, int));
-static void uvm_map_unreference_amap __P((struct vm_map_entry *, int));
+static vm_map_entry_t	uvm_mapent_alloc __P((vm_map_t));
+static void		uvm_mapent_copy __P((vm_map_entry_t,vm_map_entry_t));
+static void		uvm_mapent_free __P((vm_map_entry_t));
+static void		uvm_map_entry_unwire __P((vm_map_t, vm_map_entry_t));
+static void		uvm_map_reference_amap __P((vm_map_entry_t, int));
+static void		uvm_map_unreference_amap __P((vm_map_entry_t, int));
 
 /*
  * local inlines
@@ -191,66 +189,66 @@ static void uvm_map_unreference_amap __P((struct vm_map_entry *, int));
 
 /*
  * uvm_mapent_alloc: allocate a map entry
+ *
+ * => XXX: static pool for kernel map?
  */
 
-static __inline struct vm_map_entry *
+static __inline vm_map_entry_t
 uvm_mapent_alloc(map)
-	struct vm_map *map;
+	vm_map_t map;
 {
-	struct vm_map_entry *me;
+	vm_map_entry_t me;
 	int s;
-	UVMHIST_FUNC("uvm_mapent_alloc"); UVMHIST_CALLED(maphist);
+	UVMHIST_FUNC("uvm_mapent_alloc");
+	UVMHIST_CALLED(maphist);
 
-	if (map->flags & VM_MAP_INTRSAFE || cold) {
-		s = splvm();
+	if ((map->flags & VM_MAP_INTRSAFE) == 0 &&
+	    map != kernel_map && kernel_map != NULL /* XXX */) {
+		me = pool_get(&uvm_map_entry_pool, PR_WAITOK);
+		me->flags = 0;
+		/* me can't be null, wait ok */
+	} else {
+		s = splvm();	/* protect kentry_free list with splvm */
 		simple_lock(&uvm.kentry_lock);
 		me = uvm.kentry_free;
 		if (me) uvm.kentry_free = me->next;
 		simple_unlock(&uvm.kentry_lock);
 		splx(s);
-		if (me == NULL) {
-			panic("uvm_mapent_alloc: out of static map entries, "
-			      "check MAX_KMAPENT (currently %d)",
-			      MAX_KMAPENT);
-		}
+		if (!me)
+	panic("mapent_alloc: out of static map entries, check MAX_KMAPENT");
 		me->flags = UVM_MAP_STATIC;
-	} else if (map == kernel_map) {
-		me = pool_get(&uvm_map_entry_kmem_pool, PR_WAITOK);
-		me->flags = UVM_MAP_KMEM;
-	} else {
-		me = pool_get(&uvm_map_entry_pool, PR_WAITOK);
-		me->flags = 0;
 	}
 
-	UVMHIST_LOG(maphist, "<- new entry=0x%x [kentry=%d]", me,
-	    ((map->flags & VM_MAP_INTRSAFE) != 0 || map == kernel_map), 0, 0);
+	UVMHIST_LOG(maphist, "<- new entry=0x%x [kentry=%d]", 
+		me, ((map->flags & VM_MAP_INTRSAFE) != 0 || map == kernel_map)
+		? TRUE : FALSE, 0, 0);
 	return(me);
 }
 
 /*
  * uvm_mapent_free: free map entry
+ *
+ * => XXX: static pool for kernel map?
  */
 
 static __inline void
 uvm_mapent_free(me)
-	struct vm_map_entry *me;
+	vm_map_entry_t me;
 {
 	int s;
-	UVMHIST_FUNC("uvm_mapent_free"); UVMHIST_CALLED(maphist);
-
-	UVMHIST_LOG(maphist,"<- freeing map entry=0x%x [flags=%d]",
+	UVMHIST_FUNC("uvm_mapent_free");
+	UVMHIST_CALLED(maphist);
+	UVMHIST_LOG(maphist,"<- freeing map entry=0x%x [flags=%d]", 
 		me, me->flags, 0, 0);
-	if (me->flags & UVM_MAP_STATIC) {
-		s = splvm();
+	if ((me->flags & UVM_MAP_STATIC) == 0) {
+		pool_put(&uvm_map_entry_pool, me);
+	} else {
+		s = splvm();	/* protect kentry_free list with splvm */
 		simple_lock(&uvm.kentry_lock);
 		me->next = uvm.kentry_free;
 		uvm.kentry_free = me;
 		simple_unlock(&uvm.kentry_lock);
 		splx(s);
-	} else if (me->flags & UVM_MAP_KMEM) {
-		pool_put(&uvm_map_entry_kmem_pool, me);
-	} else {
-		pool_put(&uvm_map_entry_pool, me);
 	}
 }
 
@@ -260,11 +258,11 @@ uvm_mapent_free(me)
 
 static __inline void
 uvm_mapent_copy(src, dst)
-	struct vm_map_entry *src;
-	struct vm_map_entry *dst;
+	vm_map_entry_t src;
+	vm_map_entry_t dst;
 {
-	memcpy(dst, src,
-	       ((char *)&src->uvm_map_entry_stop_copy) - ((char *)src));
+
+	memcpy(dst, src, ((char *)&src->uvm_map_entry_stop_copy) - ((char*)src));
 }
 
 /*
@@ -275,9 +273,10 @@ uvm_mapent_copy(src, dst)
 
 static __inline void
 uvm_map_entry_unwire(map, entry)
-	struct vm_map *map;
-	struct vm_map_entry *entry;
+	vm_map_t map;
+	vm_map_entry_t entry;
 {
+
 	entry->wired_count = 0;
 	uvm_fault_unwire_locked(map, entry->start, entry->end);
 }
@@ -288,34 +287,34 @@ uvm_map_entry_unwire(map, entry)
  */
 static __inline void
 uvm_map_reference_amap(entry, flags)
-	struct vm_map_entry *entry;
+	vm_map_entry_t entry;
 	int flags;
 {
-	amap_ref(entry->aref.ar_amap, entry->aref.ar_pageoff,
+    amap_ref(entry->aref.ar_amap, entry->aref.ar_pageoff,
 	     (entry->end - entry->start) >> PAGE_SHIFT, flags);
 }
 
 
 /*
- * wrapper for calling amap_unref()
+ * wrapper for calling amap_unref() 
  */
 static __inline void
 uvm_map_unreference_amap(entry, flags)
-	struct vm_map_entry *entry;
+	vm_map_entry_t entry;
 	int flags;
 {
-	amap_unref(entry->aref.ar_amap, entry->aref.ar_pageoff,
+    amap_unref(entry->aref.ar_amap, entry->aref.ar_pageoff,
 	     (entry->end - entry->start) >> PAGE_SHIFT, flags);
 }
 
 
 /*
  * uvm_map_init: init mapping system at boot time.   note that we allocate
- * and init the static pool of struct vm_map_entry *'s for the kernel here.
+ * and init the static pool of vm_map_entry_t's for the kernel here.
  */
 
 void
-uvm_map_init()
+uvm_map_init() 
 {
 	static struct vm_map_entry kernel_map_entry[MAX_KMAPENT];
 #if defined(UVMHIST)
@@ -361,8 +360,6 @@ uvm_map_init()
 	pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry),
 	    0, 0, 0, "vmmpepl", 0,
 	    pool_page_alloc_nointr, pool_page_free_nointr, M_VMMAP);
-	pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry),
-	    0, 0, 0, "vmmpekpl", 0, NULL, NULL, M_VMMAP);
 }
 
 /*
@@ -372,19 +369,18 @@ uvm_map_init()
 /*
  * uvm_map_clip_start: ensure that the entry begins at or after
  *	the starting address, if it doesn't we split the entry.
- *
+ * 
  * => caller should use UVM_MAP_CLIP_START macro rather than calling
  *    this directly
  * => map must be locked by caller
  */
 
-void
-uvm_map_clip_start(map, entry, start)
-	struct vm_map *map;
-	struct vm_map_entry *entry;
-	vaddr_t start;
+void uvm_map_clip_start(map, entry, start)
+	vm_map_t       map;
+	vm_map_entry_t entry;
+	vaddr_t    start;
 {
-	struct vm_map_entry *new_entry;
+	vm_map_entry_t new_entry;
 	vaddr_t new_adj;
 
 	/* uvm_map_simplify_entry(map, entry); */ /* XXX */
@@ -398,7 +394,7 @@ uvm_map_clip_start(map, entry, start)
 	new_entry = uvm_mapent_alloc(map);
 	uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */
 
-	new_entry->end = start;
+	new_entry->end = start; 
 	new_adj = start - new_entry->start;
 	if (entry->object.uvm_obj)
 		entry->offset += new_adj;	/* shift start over */
@@ -414,7 +410,7 @@ uvm_map_clip_start(map, entry, start)
 		/* ... unlikely to happen, but play it safe */
 		 uvm_map_reference(new_entry->object.sub_map);
 	} else {
-		if (UVM_ET_ISOBJ(entry) &&
+		if (UVM_ET_ISOBJ(entry) && 
 		    entry->object.uvm_obj->pgops &&
 		    entry->object.uvm_obj->pgops->pgo_reference)
 			entry->object.uvm_obj->pgops->pgo_reference(
@@ -425,7 +421,7 @@ uvm_map_clip_start(map, entry, start)
 /*
  * uvm_map_clip_end: ensure that the entry ends at or before
  *	the ending address, if it does't we split the reference
- *
+ * 
  * => caller should use UVM_MAP_CLIP_END macro rather than calling
  *    this directly
  * => map must be locked by caller
@@ -433,11 +429,11 @@ uvm_map_clip_start(map, entry, start)
 
 void
 uvm_map_clip_end(map, entry, end)
-	struct vm_map *map;
-	struct vm_map_entry *entry;
+	vm_map_t	map;
+	vm_map_entry_t	entry;
 	vaddr_t	end;
 {
-	struct vm_map_entry *	new_entry;
+	vm_map_entry_t	new_entry;
 	vaddr_t new_adj; /* #bytes we move start forward */
 
 	/*
@@ -487,7 +483,7 @@ uvm_map_clip_end(map, entry, end)
  *	 [2] <NULL,UVM_UNKNOWN_OFFSET>	== don't PMAP_PREFER
  *	 [3] <uobj,uoffset>		== normal mapping
  *	 [4] <uobj,UVM_UNKNOWN_OFFSET>	== uvm_map finds offset based on VA
- *
+ *	
  *    case [4] is for kernel mappings where we don't know the offset until
  *    we've found a virtual address.   note that kernel object offsets are
  *    always relative to vm_map_min(kernel_map).
@@ -502,7 +498,7 @@ uvm_map_clip_end(map, entry, end)
 
 int
 uvm_map(map, startp, size, uobj, uoffset, align, flags)
-	struct vm_map *map;
+	vm_map_t map;
 	vaddr_t *startp;	/* IN/OUT */
 	vsize_t size;
 	struct uvm_object *uobj;
@@ -510,7 +506,7 @@ uvm_map(map, startp, size, uobj, uoffset, align, flags)
 	vsize_t align;
 	uvm_flag_t flags;
 {
-	struct vm_map_entry *prev_entry, *new_entry;
+	vm_map_entry_t prev_entry, new_entry;
 	vm_prot_t prot = UVM_PROTECTION(flags), maxprot =
 	    UVM_MAXPROTECTION(flags);
 	vm_inherit_t inherit = UVM_INHERIT(flags);
@@ -527,9 +523,9 @@ uvm_map(map, startp, size, uobj, uoffset, align, flags)
 	 */
 
 	if ((prot & maxprot) != prot) {
-		UVMHIST_LOG(maphist, "<- prot. failure:  prot=0x%x, max=0x%x",
+		UVMHIST_LOG(maphist, "<- prot. failure:  prot=0x%x, max=0x%x", 
 		prot, maxprot,0,0);
-		return EACCES;
+		return(KERN_PROTECTION_FAILURE);
 	}
 
 	/*
@@ -538,14 +534,14 @@ uvm_map(map, startp, size, uobj, uoffset, align, flags)
 
 	if (vm_map_lock_try(map) == FALSE) {
 		if (flags & UVM_FLAG_TRYLOCK)
-			return EAGAIN;
+			return(KERN_FAILURE);
 		vm_map_lock(map); /* could sleep here */
 	}
-	if ((prev_entry = uvm_map_findspace(map, *startp, size, startp,
+	if ((prev_entry = uvm_map_findspace(map, *startp, size, startp, 
 	    uobj, uoffset, align, flags)) == NULL) {
 		UVMHIST_LOG(maphist,"<- uvm_map_findspace failed!",0,0,0,0);
 		vm_map_unlock(map);
-		return ENOMEM;
+		return (KERN_NO_SPACE);
 	}
 
 #ifdef PMAP_GROWKERNEL
@@ -563,11 +559,11 @@ uvm_map(map, startp, size, uobj, uoffset, align, flags)
 
 	/*
 	 * if uobj is null, then uoffset is either a VAC hint for PMAP_PREFER
-	 * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET.   in
-	 * either case we want to zero it  before storing it in the map entry
+	 * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET.   in 
+	 * either case we want to zero it  before storing it in the map entry 
 	 * (because it looks strange and confusing when debugging...)
-	 *
-	 * if uobj is not null
+	 * 
+	 * if uobj is not null 
 	 *   if uoffset is not UVM_UNKNOWN_OFFSET then we have a normal mapping
 	 *      and we do not need to change uoffset.
 	 *   if uoffset is UVM_UNKNOWN_OFFSET then we need to find the offset
@@ -593,7 +589,7 @@ uvm_map(map, startp, size, uobj, uoffset, align, flags)
 	 * for a stack, but we are currently allocating our stack in advance.
 	 */
 
-	if ((flags & UVM_FLAG_NOMERGE) == 0 &&
+	if ((flags & UVM_FLAG_NOMERGE) == 0 && 
 	    prev_entry->end == *startp && prev_entry != &map->header &&
 	    prev_entry->object.uvm_obj == uobj) {
 
@@ -604,7 +600,7 @@ uvm_map(map, startp, size, uobj, uoffset, align, flags)
 		if (UVM_ET_ISSUBMAP(prev_entry))
 			goto step3;
 
-		if (prev_entry->protection != prot ||
+		if (prev_entry->protection != prot || 
 		    prev_entry->max_protection != maxprot)
 			goto step3;
 
@@ -614,10 +610,10 @@ uvm_map(map, startp, size, uobj, uoffset, align, flags)
 
 		/* wiring status must match (new area is unwired) */
 		if (VM_MAPENT_ISWIRED(prev_entry))
-			goto step3;
+			goto step3; 
 
 		/*
-		 * can't extend a shared amap.  note: no need to lock amap to
+		 * can't extend a shared amap.  note: no need to lock amap to 
 		 * look at refs since we don't care about its exact value.
 		 * if it is one (i.e. we have only reference) it will stay there
 		 */
@@ -648,7 +644,7 @@ uvm_map(map, startp, size, uobj, uoffset, align, flags)
 
 		UVMHIST_LOG(maphist,"<- done (via backmerge)!", 0, 0, 0, 0);
 		vm_map_unlock(map);
-		return 0;
+		return (KERN_SUCCESS);
 
 	}
 step3:
@@ -656,11 +652,11 @@ step3:
 
 	/*
 	 * check for possible forward merge (which we don't do) and count
-	 * the number of times we missed a *possible* chance to merge more
+	 * the number of times we missed a *possible* chance to merge more 
 	 */
 
 	if ((flags & UVM_FLAG_NOMERGE) == 0 &&
-	    prev_entry->next != &map->header &&
+	    prev_entry->next != &map->header && 
 	    prev_entry->next->start == (*startp + size))
 		UVMCNT_INCR(map_forwmerge);
 
@@ -674,7 +670,7 @@ step3:
 	new_entry->object.uvm_obj = uobj;
 	new_entry->offset = uoffset;
 
-	if (uobj)
+	if (uobj) 
 		new_entry->etype = UVM_ET_OBJ;
 	else
 		new_entry->etype = 0;
@@ -695,7 +691,7 @@ step3:
 		 * to_add: for BSS we overallocate a little since we
 		 * are likely to extend
 		 */
-		vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ?
+		vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ? 
 			UVM_AMAP_CHUNK << PAGE_SHIFT : 0;
 		struct vm_amap *amap = amap_alloc(size, to_add, M_WAITOK);
 		new_entry->aref.ar_pageoff = 0;
@@ -704,7 +700,9 @@ step3:
 		new_entry->aref.ar_pageoff = 0;
 		new_entry->aref.ar_amap = NULL;
 	}
+
 	uvm_map_entry_link(map, prev_entry, new_entry);
+
 	map->size += size;
 
 	/*
@@ -717,7 +715,7 @@ step3:
 
 	UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
 	vm_map_unlock(map);
-	return 0;
+	return(KERN_SUCCESS);
 }
 
 /*
@@ -730,12 +728,12 @@ step3:
 
 boolean_t
 uvm_map_lookup_entry(map, address, entry)
-	struct vm_map *map;
+	vm_map_t	map;
 	vaddr_t	address;
-	struct vm_map_entry **entry;		/* OUT */
+	vm_map_entry_t		*entry;		/* OUT */
 {
-	struct vm_map_entry *cur;
-	struct vm_map_entry *last;
+	vm_map_entry_t		cur;
+	vm_map_entry_t		last;
 	UVMHIST_FUNC("uvm_map_lookup_entry");
 	UVMHIST_CALLED(maphist);
 
@@ -756,7 +754,6 @@ uvm_map_lookup_entry(map, address, entry)
 
 	UVMCNT_INCR(uvm_mlk_call);
 	if (address >= cur->start) {
-
 	    	/*
 		 * go from hint to end of list.
 		 *
@@ -768,7 +765,6 @@ uvm_map_lookup_entry(map, address, entry)
 		 * at the header, in which case the hint didn't
 		 * buy us anything anyway).
 		 */
-
 		last = &map->header;
 		if ((cur != last) && (cur->end > address)) {
 			UVMCNT_INCR(uvm_mlk_hint);
@@ -778,11 +774,9 @@ uvm_map_lookup_entry(map, address, entry)
 			return (TRUE);
 		}
 	} else {
-
 	    	/*
 		 * go from start to hint, *inclusively*
 		 */
-
 		last = cur->next;
 		cur = map->header.next;
 	}
@@ -828,9 +822,9 @@ uvm_map_lookup_entry(map, address, entry)
  * => note this is a cross between the old vm_map_findspace and vm_map_find
  */
 
-struct vm_map_entry *
+vm_map_entry_t
 uvm_map_findspace(map, hint, length, result, uobj, uoffset, align, flags)
-	struct vm_map *map;
+	vm_map_t map;
 	vaddr_t hint;
 	vsize_t length;
 	vaddr_t *result; /* OUT */
@@ -839,12 +833,12 @@ uvm_map_findspace(map, hint, length, result, uobj, uoffset, align, flags)
 	vsize_t align;
 	int flags;
 {
-	struct vm_map_entry *entry, *next, *tmp;
+	vm_map_entry_t entry, next, tmp;
 	vaddr_t end, orig_hint;
 	UVMHIST_FUNC("uvm_map_findspace");
 	UVMHIST_CALLED(maphist);
 
-	UVMHIST_LOG(maphist, "(map=0x%x, hint=0x%x, len=%d, flags=0x%x)",
+	UVMHIST_LOG(maphist, "(map=0x%x, hint=0x%x, len=%d, flags=0x%x)", 
 		    map, hint, length, flags);
 	KASSERT((align & (align - 1)) == 0);
 	KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0);
@@ -875,7 +869,7 @@ uvm_map_findspace(map, hint, length, result, uobj, uoffset, align, flags)
 	 */
 
 	if ((flags & UVM_FLAG_FIXED) == 0 && hint == map->min_offset) {
-		if ((entry = map->first_free) != &map->header)
+		if ((entry = map->first_free) != &map->header) 
 			hint = entry->end;
 	} else {
 		if (uvm_map_lookup_entry(map, hint, &tmp)) {
@@ -896,9 +890,7 @@ uvm_map_findspace(map, hint, length, result, uobj, uoffset, align, flags)
 	 * note: entry->end   = base VA of current gap,
 	 *	 next->start  = VA of end of current gap
 	 */
-
 	for (;; hint = (entry = next)->end) {
-
 		/*
 		 * Find the end of the proposed new region.  Be sure we didn't
 		 * go beyond the end of the map, or wrap around the address;
@@ -912,7 +904,6 @@ uvm_map_findspace(map, hint, length, result, uobj, uoffset, align, flags)
 		 * push hint forward as needed to avoid VAC alias problems.
 		 * we only do this if a valid offset is specified.
 		 */
-
 		if ((flags & UVM_FLAG_FIXED) == 0 &&
 		    uoffset != UVM_UNKNOWN_OFFSET)
 			PMAP_PREFER(uoffset, &hint);
@@ -957,21 +948,22 @@ uvm_map_findspace(map, hint, length, result, uobj, uoffset, align, flags)
 /*
  * uvm_unmap_remove: remove mappings from a vm_map (from "start" up to "stop")
  *
- * => caller must check alignment and size
+ * => caller must check alignment and size 
  * => map must be locked by caller
  * => we return a list of map entries that we've remove from the map
  *    in "entry_list"
  */
 
-void
+int
 uvm_unmap_remove(map, start, end, entry_list)
-	struct vm_map *map;
-	vaddr_t start, end;
-	struct vm_map_entry **entry_list;	/* OUT */
+	vm_map_t map;
+	vaddr_t start,end;
+	vm_map_entry_t *entry_list;	/* OUT */
 {
-	struct vm_map_entry *entry, *first_entry, *next;
+	vm_map_entry_t entry, first_entry, next;
 	vaddr_t len;
-	UVMHIST_FUNC("uvm_unmap_remove"); UVMHIST_CALLED(maphist);
+	UVMHIST_FUNC("uvm_unmap_remove");
+	UVMHIST_CALLED(maphist);
 
 	UVMHIST_LOG(maphist,"(map=0x%x, start=0x%x, end=0x%x)",
 	    map, start, end, 0);
@@ -981,13 +973,13 @@ uvm_unmap_remove(map, start, end, entry_list)
 	/*
 	 * find first entry
 	 */
-
 	if (uvm_map_lookup_entry(map, start, &first_entry) == TRUE) {
 		/* clip and go... */
 		entry = first_entry;
 		UVM_MAP_CLIP_START(map, entry, start);
 		/* critical!  prevents stale hint */
 		SAVE_HINT(map, entry, entry->prev);
+
 	} else {
 		entry = first_entry->next;
 	}
@@ -1011,14 +1003,13 @@ uvm_unmap_remove(map, start, end, entry_list)
 	 *   [3] dropping references may trigger pager I/O, and if we hit
 	 *       a pager that does synchronous I/O we may have to wait for it.
 	 *   [4] we would like all waiting for I/O to occur with maps unlocked
-	 *       so that we don't block other threads.
+	 *       so that we don't block other threads.  
 	 */
-
 	first_entry = NULL;
 	*entry_list = NULL;		/* to be safe */
 
 	/*
-	 * break up the area into map entry sized regions and unmap.  note
+	 * break up the area into map entry sized regions and unmap.  note 
 	 * that all mappings have to be removed before we can even consider
 	 * dropping references to amaps or VM objects (otherwise we could end
 	 * up with a mapping to a page on the free list which would be very bad)
@@ -1026,7 +1017,7 @@ uvm_unmap_remove(map, start, end, entry_list)
 
 	while ((entry != &map->header) && (entry->start < end)) {
 
-		UVM_MAP_CLIP_END(map, entry, end);
+		UVM_MAP_CLIP_END(map, entry, end); 
 		next = entry->next;
 		len = entry->end - entry->start;
 
@@ -1042,7 +1033,6 @@ uvm_unmap_remove(map, start, end, entry_list)
 		 * special case: handle mappings to anonymous kernel objects.
 		 * we want to free these pages right away...
 		 */
-
 		if (UVM_ET_ISOBJ(entry) &&
 		    UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {
 			KASSERT(vm_map_pmap(map) == pmap_kernel());
@@ -1068,8 +1058,8 @@ uvm_unmap_remove(map, start, end, entry_list)
 			 * doesn't hurt to call uvm_km_pgremove just to be
 			 * safe?]
 			 *
-			 * uvm_km_pgremove currently does the following:
-			 *   for pages in the kernel object in range:
+			 * uvm_km_pgremove currently does the following: 
+			 *   for pages in the kernel object in range: 
 			 *     - drops the swap slot
 			 *     - uvm_pagefree the page
 			 *
@@ -1082,7 +1072,6 @@ uvm_unmap_remove(map, start, end, entry_list)
 			 * from the object.  offsets are always relative
 			 * to vm_map_min(kernel_map).
 			 */
-
 			if (UVM_OBJ_IS_INTRSAFE_OBJECT(entry->object.uvm_obj)) {
 				pmap_kremove(entry->start, len);
 				uvm_km_pgremove_intrsafe(entry->object.uvm_obj,
@@ -1100,24 +1089,20 @@ uvm_unmap_remove(map, start, end, entry_list)
 			 * null out kernel_object reference, we've just
 			 * dropped it
 			 */
-
 			entry->etype &= ~UVM_ET_OBJ;
 			entry->object.uvm_obj = NULL;	/* to be safe */
 
 		} else {
-
 			/*
 		 	 * remove mappings the standard way.
 		 	 */
-
 			pmap_remove(map->pmap, entry->start, entry->end);
 		}
 
 		/*
-		 * remove entry from map and put it on our list of entries
+		 * remove entry from map and put it on our list of entries 
 		 * that we've nuked.  then go do next entry.
 		 */
-
 		UVMHIST_LOG(maphist, "  removed map entry 0x%x", entry, 0, 0,0);
 
 		/* critical! prevents stale hint */
@@ -1129,15 +1114,15 @@ uvm_unmap_remove(map, start, end, entry_list)
 		first_entry = entry;
 		entry = next;		/* next entry, please */
 	}
-	pmap_update(vm_map_pmap(map));
 
 	/*
 	 * now we've cleaned up the map and are ready for the caller to drop
-	 * references to the mapped objects.
+	 * references to the mapped objects.  
 	 */
 
 	*entry_list = first_entry;
 	UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
+	return(KERN_SUCCESS);
 }
 
 /*
@@ -1148,17 +1133,17 @@ uvm_unmap_remove(map, start, end, entry_list)
 
 void
 uvm_unmap_detach(first_entry, flags)
-	struct vm_map_entry *first_entry;
+	vm_map_entry_t first_entry;
 	int flags;
 {
-	struct vm_map_entry *next_entry;
+	vm_map_entry_t next_entry;
 	UVMHIST_FUNC("uvm_unmap_detach"); UVMHIST_CALLED(maphist);
 
 	while (first_entry) {
 		KASSERT(!VM_MAPENT_ISWIRED(first_entry));
 		UVMHIST_LOG(maphist,
-		    "  detach 0x%x: amap=0x%x, obj=0x%x, submap?=%d",
-		    first_entry, first_entry->aref.ar_amap,
+		    "  detach 0x%x: amap=0x%x, obj=0x%x, submap?=%d", 
+		    first_entry, first_entry->aref.ar_amap, 
 		    first_entry->object.uvm_obj,
 		    UVM_ET_ISSUBMAP(first_entry));
 
@@ -1182,6 +1167,7 @@ uvm_unmap_detach(first_entry, flags)
 				first_entry->object.uvm_obj->pgops->
 				    pgo_detach(first_entry->object.uvm_obj);
 		}
+
 		next_entry = first_entry->next;
 		uvm_mapent_free(first_entry);
 		first_entry = next_entry;
@@ -1193,10 +1179,10 @@ uvm_unmap_detach(first_entry, flags)
  *   E X T R A C T I O N   F U N C T I O N S
  */
 
-/*
+/* 
  * uvm_map_reserve: reserve space in a vm_map for future use.
  *
- * => we reserve space in a map by putting a dummy map entry in the
+ * => we reserve space in a map by putting a dummy map entry in the 
  *    map (dummy means obj=NULL, amap=NULL, prot=VM_PROT_NONE)
  * => map should be unlocked (we will write lock it)
  * => we return true if we were able to reserve space
@@ -1205,13 +1191,13 @@ uvm_unmap_detach(first_entry, flags)
 
 int
 uvm_map_reserve(map, size, offset, align, raddr)
-	struct vm_map *map;
+	vm_map_t map;
 	vsize_t size;
 	vaddr_t offset;	/* hint for pmap_prefer */
 	vsize_t align;	/* alignment hint */
 	vaddr_t *raddr;	/* IN:hint, OUT: reserved VA */
 {
-	UVMHIST_FUNC("uvm_map_reserve"); UVMHIST_CALLED(maphist);
+	UVMHIST_FUNC("uvm_map_reserve"); UVMHIST_CALLED(maphist); 
 
 	UVMHIST_LOG(maphist, "(map=0x%x, size=0x%x, offset=0x%x,addr=0x%x)",
 	      map,size,offset,raddr);
@@ -1226,20 +1212,20 @@ uvm_map_reserve(map, size, offset, align, raddr)
 
 	if (uvm_map(map, raddr, size, NULL, offset, 0,
 	    UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE,
-	    UVM_ADV_RANDOM, UVM_FLAG_NOMERGE)) != 0) {
+	    UVM_ADV_RANDOM, UVM_FLAG_NOMERGE)) != KERN_SUCCESS) {
 	    UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0);
 		return (FALSE);
-	}
+	}     
 
 	UVMHIST_LOG(maphist, "<- done (*raddr=0x%x)", *raddr,0,0,0);
 	return (TRUE);
 }
 
 /*
- * uvm_map_replace: replace a reserved (blank) area of memory with
+ * uvm_map_replace: replace a reserved (blank) area of memory with 
  * real mappings.
  *
- * => caller must WRITE-LOCK the map
+ * => caller must WRITE-LOCK the map 
  * => we return TRUE if replacement was a success
  * => we expect the newents chain to have nnewents entrys on it and
  *    we expect newents->prev to point to the last entry on the list
@@ -1250,10 +1236,10 @@ int
 uvm_map_replace(map, start, end, newents, nnewents)
 	struct vm_map *map;
 	vaddr_t start, end;
-	struct vm_map_entry *newents;
+	vm_map_entry_t newents;
 	int nnewents;
 {
-	struct vm_map_entry *oldent, *last;
+	vm_map_entry_t oldent, last;
 
 	/*
 	 * first find the blank map entry at the specified address
@@ -1267,19 +1253,17 @@ uvm_map_replace(map, start, end, newents, nnewents)
 	 * check to make sure we have a proper blank entry
 	 */
 
-	if (oldent->start != start || oldent->end != end ||
+	if (oldent->start != start || oldent->end != end || 
 	    oldent->object.uvm_obj != NULL || oldent->aref.ar_amap != NULL) {
 		return (FALSE);
 	}
 
 #ifdef DIAGNOSTIC
-
 	/*
 	 * sanity check the newents chain
 	 */
-
 	{
-		struct vm_map_entry *tmpent = newents;
+		vm_map_entry_t tmpent = newents;
 		int nent = 0;
 		vaddr_t cur = start;
 
@@ -1313,7 +1297,8 @@ uvm_map_replace(map, start, end, newents, nnewents)
 	 */
 
 	if (newents) {
-		last = newents->prev;
+
+		last = newents->prev;		/* we expect this */
 
 		/* critical: flush stale hints out of map */
 		SAVE_HINT(map, map->hint, newents);
@@ -1366,15 +1351,15 @@ uvm_map_replace(map, start, end, newents, nnewents)
 
 int
 uvm_map_extract(srcmap, start, len, dstmap, dstaddrp, flags)
-	struct vm_map *srcmap, *dstmap;
+	vm_map_t srcmap, dstmap;
 	vaddr_t start, *dstaddrp;
 	vsize_t len;
 	int flags;
 {
 	vaddr_t dstaddr, end, newend, oldoffset, fudge, orig_fudge,
 	    oldstart;
-	struct vm_map_entry *chain, *endchain, *entry, *orig_entry, *newentry,
-	    *deadentry, *oldentry;
+	vm_map_entry_t chain, endchain, entry, orig_entry, newentry, deadentry;
+	vm_map_entry_t oldentry;
 	vsize_t elen;
 	int nchain, error, copy_ok;
 	UVMHIST_FUNC("uvm_map_extract"); UVMHIST_CALLED(maphist);
@@ -1404,7 +1389,7 @@ uvm_map_extract(srcmap, start, len, dstmap, dstaddrp, flags)
 	UVMHIST_LOG(maphist, "  dstaddr=0x%x", dstaddr,0,0,0);
 
 	/*
-	 * step 2: setup for the extraction process loop by init'ing the
+	 * step 2: setup for the extraction process loop by init'ing the 
 	 * map entry chain, locking src map, and looking up the first useful
 	 * entry in the map.
 	 */
@@ -1514,8 +1499,8 @@ uvm_map_extract(srcmap, start, len, dstmap, dstaddrp, flags)
 			newentry->offset = 0;
 		}
 		newentry->etype = entry->etype;
-		newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ?
-			entry->max_protection : entry->protection;
+		newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ? 
+			entry->max_protection : entry->protection; 
 		newentry->max_protection = entry->max_protection;
 		newentry->inheritance = entry->inheritance;
 		newentry->wired_count = 0;
@@ -1540,7 +1525,7 @@ uvm_map_extract(srcmap, start, len, dstmap, dstaddrp, flags)
 		}
 
 		/* end of 'while' loop! */
-		if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end &&
+		if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end && 
 		    (entry->next == &srcmap->header ||
 		    entry->next->start != entry->end)) {
 			error = EINVAL;
@@ -1559,7 +1544,7 @@ uvm_map_extract(srcmap, start, len, dstmap, dstaddrp, flags)
 
 	/*
 	 * step 5: attempt to lock the dest map so we can pmap_copy.
-	 * note usage of copy_ok:
+	 * note usage of copy_ok: 
 	 *   1 => dstmap locked, pmap_copy ok, and we "replace" here (step 5)
 	 *   0 => dstmap unlocked, NO pmap_copy, and we will "replace" in step 7
 	 */
@@ -1610,7 +1595,7 @@ uvm_map_extract(srcmap, start, len, dstmap, dstaddrp, flags)
 
 			/* we advance "entry" in the following if statement */
 			if (flags & UVM_EXTRACT_REMOVE) {
-				pmap_remove(srcmap->pmap, entry->start,
+				pmap_remove(srcmap->pmap, entry->start, 
 						entry->end);
         			oldentry = entry;	/* save entry */
         			entry = entry->next;	/* advance */
@@ -1625,7 +1610,6 @@ uvm_map_extract(srcmap, start, len, dstmap, dstaddrp, flags)
 			/* end of 'while' loop */
 			fudge = 0;
 		}
-		pmap_update(srcmap->pmap);
 
 		/*
 		 * unlock dstmap.  we will dispose of deadentry in
@@ -1635,9 +1619,9 @@ uvm_map_extract(srcmap, start, len, dstmap, dstaddrp, flags)
 		if (copy_ok && srcmap != dstmap)
 			vm_map_unlock(dstmap);
 
-	} else {
-		deadentry = NULL;
 	}
+	else
+		deadentry = NULL; /* XXX: gcc */
 
 	/*
 	 * step 7: we are done with the source map, unlock.   if copy_ok
@@ -1688,7 +1672,7 @@ bad2:			/* src already unlocked */
  *	call [with uobj==NULL] to create a blank map entry in the main map.
  *	[And it had better still be blank!]
  * => maps which contain submaps should never be copied or forked.
- * => to remove a submap, use uvm_unmap() on the main map
+ * => to remove a submap, use uvm_unmap() on the main map 
  *	and then uvm_map_deallocate() the submap.
  * => main map must be unlocked.
  * => submap must have been init'd and have a zero reference count.
@@ -1697,11 +1681,11 @@ bad2:			/* src already unlocked */
 
 int
 uvm_map_submap(map, start, end, submap)
-	struct vm_map *map, *submap;
+	vm_map_t map, submap;
 	vaddr_t start, end;
 {
-	struct vm_map_entry *entry;
-	int error;
+	vm_map_entry_t entry;
+	int result;
 
 	vm_map_lock(map);
 
@@ -1714,7 +1698,7 @@ uvm_map_submap(map, start, end, submap)
 		entry = NULL;
 	}
 
-	if (entry != NULL &&
+	if (entry != NULL && 
 	    entry->start == start && entry->end == end &&
 	    entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL &&
 	    !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) {
@@ -1722,12 +1706,12 @@ uvm_map_submap(map, start, end, submap)
 		entry->object.sub_map = submap;
 		entry->offset = 0;
 		uvm_map_reference(submap);
-		error = 0;
+		result = KERN_SUCCESS;
 	} else {
-		error = EINVAL;
+		result = KERN_INVALID_ARGUMENT;
 	}
 	vm_map_unlock(map);
-	return error;
+	return(result);
 }
 
 
@@ -1740,22 +1724,25 @@ uvm_map_submap(map, start, end, submap)
 
 #define MASK(entry)     (UVM_ET_ISCOPYONWRITE(entry) ? \
 			 ~VM_PROT_WRITE : VM_PROT_ALL)
+#define max(a,b)        ((a) > (b) ? (a) : (b))
 
 int
 uvm_map_protect(map, start, end, new_prot, set_max)
-	struct vm_map *map;
+	vm_map_t map;
 	vaddr_t start, end;
 	vm_prot_t new_prot;
 	boolean_t set_max;
 {
-	struct vm_map_entry *current, *entry;
-	int error = 0;
+	vm_map_entry_t current, entry;
+	int rv = KERN_SUCCESS;
 	UVMHIST_FUNC("uvm_map_protect"); UVMHIST_CALLED(maphist);
 	UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_prot=0x%x)",
 		    map, start, end, new_prot);
 
 	vm_map_lock(map);
+
 	VM_MAP_RANGE_CHECK(map, start, end);
+
 	if (uvm_map_lookup_entry(map, start, &entry)) {
 		UVM_MAP_CLIP_START(map, entry, start);
 	} else {
@@ -1769,11 +1756,11 @@ uvm_map_protect(map, start, end, new_prot, set_max)
 	current = entry;
 	while ((current != &map->header) && (current->start < end)) {
 		if (UVM_ET_ISSUBMAP(current)) {
-			error = EINVAL;
+			rv = KERN_INVALID_ARGUMENT;
 			goto out;
 		}
 		if ((new_prot & current->max_protection) != new_prot) {
-			error = EACCES;
+			rv = KERN_PROTECTION_FAILURE;
 			goto out;
 		}
 		current = current->next;
@@ -1782,10 +1769,12 @@ uvm_map_protect(map, start, end, new_prot, set_max)
 	/* go back and fix up protections (no need to clip this time). */
 
 	current = entry;
+
 	while ((current != &map->header) && (current->start < end)) {
 		vm_prot_t old_prot;
 
 		UVM_MAP_CLIP_END(map, current, end);
+
 		old_prot = current->protection;
 		if (set_max)
 			current->protection =
@@ -1794,7 +1783,7 @@ uvm_map_protect(map, start, end, new_prot, set_max)
 			current->protection = new_prot;
 
 		/*
-		 * update physical map if necessary.  worry about copy-on-write
+		 * update physical map if necessary.  worry about copy-on-write 
 		 * here -- CHECK THIS XXX
 		 */
 
@@ -1816,14 +1805,13 @@ uvm_map_protect(map, start, end, new_prot, set_max)
 		    new_prot != VM_PROT_NONE) {
 			if (uvm_map_pageable(map, entry->start,
 			    entry->end, FALSE,
-			    UVM_LK_ENTER|UVM_LK_EXIT) != 0) {
-
+			    UVM_LK_ENTER|UVM_LK_EXIT) != KERN_SUCCESS) {
 				/*
 				 * If locking the entry fails, remember the
 				 * error if it's the first one.  Note we
 				 * still continue setting the protection in
-				 * the map, but will return the error
-				 * condition regardless.
+				 * the map, but will return the resource
+				 * shortage condition regardless.
 				 *
 				 * XXX Ignore what the actual error is,
 				 * XXX just call it a resource shortage
@@ -1831,23 +1819,23 @@ uvm_map_protect(map, start, end, new_prot, set_max)
 				 * XXX what uvm_map_protect() itself would
 				 * XXX normally return.
 				 */
-
-				error = ENOMEM;
+				rv = KERN_RESOURCE_SHORTAGE;
 			}
 		}
+
 		current = current->next;
 	}
-	pmap_update(map->pmap);
 
  out:
 	vm_map_unlock(map);
-	UVMHIST_LOG(maphist, "<- done, error=%d",error,0,0,0);
-	return error;
+	UVMHIST_LOG(maphist, "<- done, rv=%d",rv,0,0,0);
+	return (rv);
 }
 
+#undef  max
 #undef  MASK
 
-/*
+/* 
  * uvm_map_inherit: set inheritance code for range of addrs in map.
  *
  * => map must be unlocked
@@ -1857,12 +1845,12 @@ uvm_map_protect(map, start, end, new_prot, set_max)
 
 int
 uvm_map_inherit(map, start, end, new_inheritance)
-	struct vm_map *map;
+	vm_map_t map;
 	vaddr_t start;
 	vaddr_t end;
 	vm_inherit_t new_inheritance;
 {
-	struct vm_map_entry *entry, *temp_entry;
+	vm_map_entry_t entry, temp_entry;
 	UVMHIST_FUNC("uvm_map_inherit"); UVMHIST_CALLED(maphist);
 	UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_inh=0x%x)",
 	    map, start, end, new_inheritance);
@@ -1874,11 +1862,13 @@ uvm_map_inherit(map, start, end, new_inheritance)
 		break;
 	default:
 		UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0);
-		return EINVAL;
+		return (KERN_INVALID_ARGUMENT);
 	}
 
-	vm_map_lock(map);	
+	vm_map_lock(map);
+	
 	VM_MAP_RANGE_CHECK(map, start, end);
+	
 	if (uvm_map_lookup_entry(map, start, &temp_entry)) {
 		entry = temp_entry;
 		UVM_MAP_CLIP_START(map, entry, start);
@@ -1891,12 +1881,13 @@ uvm_map_inherit(map, start, end, new_inheritance)
 		entry->inheritance = new_inheritance;
 		entry = entry->next;
 	}
+
 	vm_map_unlock(map);
 	UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0);
-	return 0;
+	return(KERN_SUCCESS);
 }
 
-/*
+/* 
  * uvm_map_advice: set advice code for range of addrs in map.
  *
  * => map must be unlocked
@@ -1904,12 +1895,12 @@ uvm_map_inherit(map, start, end, new_inheritance)
 
 int
 uvm_map_advice(map, start, end, new_advice)
-	struct vm_map *map;
+	vm_map_t map;
 	vaddr_t start;
 	vaddr_t end;
 	int new_advice;
 {
-	struct vm_map_entry *entry, *temp_entry;
+	vm_map_entry_t entry, temp_entry;
 	UVMHIST_FUNC("uvm_map_advice"); UVMHIST_CALLED(maphist);
 	UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_adv=0x%x)",
 	    map, start, end, new_advice);
@@ -1940,7 +1931,7 @@ uvm_map_advice(map, start, end, new_advice)
 		default:
 			vm_map_unlock(map);
 			UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0);
-			return EINVAL;
+			return (KERN_INVALID_ARGUMENT);
 		}
 		entry->advice = new_advice;
 		entry = entry->next;
@@ -1948,7 +1939,7 @@ uvm_map_advice(map, start, end, new_advice)
 
 	vm_map_unlock(map);
 	UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0);
-	return 0;
+	return (KERN_SUCCESS);
 }
 
 /*
@@ -1967,12 +1958,12 @@ uvm_map_advice(map, start, end, new_advice)
 
 int
 uvm_map_pageable(map, start, end, new_pageable, lockflags)
-	struct vm_map *map;
+	vm_map_t map;
 	vaddr_t start, end;
 	boolean_t new_pageable;
 	int lockflags;
 {
-	struct vm_map_entry *entry, *start_entry, *failed_entry;
+	vm_map_entry_t entry, start_entry, failed_entry;
 	int rv;
 #ifdef DIAGNOSTIC
 	u_int timestamp_save;
@@ -1984,26 +1975,27 @@ uvm_map_pageable(map, start, end, new_pageable, lockflags)
 
 	if ((lockflags & UVM_LK_ENTER) == 0)
 		vm_map_lock(map);
+
 	VM_MAP_RANGE_CHECK(map, start, end);
 
-	/*
+	/* 
 	 * only one pageability change may take place at one time, since
 	 * uvm_fault_wire assumes it will be called only once for each
 	 * wiring/unwiring.  therefore, we have to make sure we're actually
 	 * changing the pageability for the entire region.  we do so before
-	 * making any changes.
+	 * making any changes.  
 	 */
 
 	if (uvm_map_lookup_entry(map, start, &start_entry) == FALSE) {
 		if ((lockflags & UVM_LK_EXIT) == 0)
 			vm_map_unlock(map);
 
-		UVMHIST_LOG(maphist,"<- done (fault)",0,0,0,0);
-		return EFAULT;
+		UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0);
+		return (KERN_INVALID_ADDRESS);
 	}
 	entry = start_entry;
 
-	/*
+	/* 
 	 * handle wiring and unwiring separately.
 	 */
 
@@ -2012,7 +2004,7 @@ uvm_map_pageable(map, start, end, new_pageable, lockflags)
 
 		/*
 		 * unwiring.  first ensure that the range to be unwired is
-		 * really wired down and that there are no holes.
+		 * really wired down and that there are no holes.  
 		 */
 
 		while ((entry != &map->header) && (entry->start < end)) {
@@ -2022,13 +2014,14 @@ uvm_map_pageable(map, start, end, new_pageable, lockflags)
 			      entry->next->start > entry->end))) {
 				if ((lockflags & UVM_LK_EXIT) == 0)
 					vm_map_unlock(map);
-				UVMHIST_LOG(maphist, "<- done (INVAL)",0,0,0,0);
-				return EINVAL;
+				UVMHIST_LOG(maphist,
+				    "<- done (INVALID UNWIRE ARG)",0,0,0,0);
+				return (KERN_INVALID_ARGUMENT);
 			}
 			entry = entry->next;
 		}
 
-		/*
+		/* 
 		 * POSIX 1003.1b - a single munlock call unlocks a region,
 		 * regardless of the number of mlock calls made on that
 		 * region.
@@ -2044,7 +2037,7 @@ uvm_map_pageable(map, start, end, new_pageable, lockflags)
 		if ((lockflags & UVM_LK_EXIT) == 0)
 			vm_map_unlock(map);
 		UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0);
-		return 0;
+		return(KERN_SUCCESS);
 	}
 
 	/*
@@ -2052,7 +2045,7 @@ uvm_map_pageable(map, start, end, new_pageable, lockflags)
 	 *
 	 * 1: holding the write lock, we create any anonymous maps that need
 	 *    to be created.  then we clip each map entry to the region to
-	 *    be wired and increment its wiring count.
+	 *    be wired and increment its wiring count.  
 	 *
 	 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault
 	 *    in the pages for any newly wired area (wired_count == 1).
@@ -2080,11 +2073,11 @@ uvm_map_pageable(map, start, end, new_pageable, lockflags)
 			 */
 
 			if (!UVM_ET_ISSUBMAP(entry)) {  /* not submap */
-				if (UVM_ET_ISNEEDSCOPY(entry) &&
+				if (UVM_ET_ISNEEDSCOPY(entry) && 
 				    ((entry->protection & VM_PROT_WRITE) ||
 				     (entry->object.uvm_obj == NULL))) {
 					amap_copy(map, entry, M_WAITOK, TRUE,
-					    start, end);
+					    start, end); 
 					/* XXXCDC: wait OK? */
 				}
 			}
@@ -2094,7 +2087,7 @@ uvm_map_pageable(map, start, end, new_pageable, lockflags)
 		entry->wired_count++;
 
 		/*
-		 * Check for holes
+		 * Check for holes 
 		 */
 
 		if (entry->protection == VM_PROT_NONE ||
@@ -2104,7 +2097,7 @@ uvm_map_pageable(map, start, end, new_pageable, lockflags)
 
 			/*
 			 * found one.  amap creation actions do not need to
-			 * be undone, but the wired counts need to be restored.
+			 * be undone, but the wired counts need to be restored. 
 			 */
 
 			while (entry != &map->header && entry->end > start) {
@@ -2114,7 +2107,7 @@ uvm_map_pageable(map, start, end, new_pageable, lockflags)
 			if ((lockflags & UVM_LK_EXIT) == 0)
 				vm_map_unlock(map);
 			UVMHIST_LOG(maphist,"<- done (INVALID WIRE)",0,0,0,0);
-			return EINVAL;
+			return (KERN_INVALID_ARGUMENT);
 		}
 		entry = entry->next;
 	}
@@ -2136,13 +2129,11 @@ uvm_map_pageable(map, start, end, new_pageable, lockflags)
 			rv = uvm_fault_wire(map, entry->start, entry->end,
 			    entry->protection);
 			if (rv) {
-
 				/*
 				 * wiring failed.  break out of the loop.
 				 * we'll clean up the map below, once we
 				 * have a write lock again.
 				 */
-
 				break;
 			}
 		}
@@ -2207,7 +2198,7 @@ uvm_map_pageable(map, start, end, new_pageable, lockflags)
 	}
 
 	UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0);
-	return 0;
+	return(KERN_SUCCESS);
 }
 
 /*
@@ -2221,11 +2212,11 @@ uvm_map_pageable(map, start, end, new_pageable, lockflags)
 
 int
 uvm_map_pageable_all(map, flags, limit)
-	struct vm_map *map;
+	vm_map_t map;
 	int flags;
 	vsize_t limit;
 {
-	struct vm_map_entry *entry, *failed_entry;
+	vm_map_entry_t entry, failed_entry;
 	vsize_t size;
 	int rv;
 #ifdef DIAGNOSTIC
@@ -2243,12 +2234,10 @@ uvm_map_pageable_all(map, flags, limit)
 	 */
 
 	if (flags == 0) {			/* unwire */
-
 		/*
 		 * POSIX 1003.1b -- munlockall unlocks all regions,
 		 * regardless of how many times mlockall has been called.
 		 */
-
 		for (entry = map->header.next; entry != &map->header;
 		     entry = entry->next) {
 			if (VM_MAPENT_ISWIRED(entry))
@@ -2257,27 +2246,27 @@ uvm_map_pageable_all(map, flags, limit)
 		vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
 		vm_map_unlock(map);
 		UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0);
-		return 0;
+		return (KERN_SUCCESS);
+
+		/*
+		 * end of unwire case!
+		 */
 	}
 
 	if (flags & MCL_FUTURE) {
-
 		/*
 		 * must wire all future mappings; remember this.
 		 */
-
 		vm_map_modflags(map, VM_MAP_WIREFUTURE, 0);
 	}
 
 	if ((flags & MCL_CURRENT) == 0) {
-
 		/*
 		 * no more work to do!
 		 */
-
 		UVMHIST_LOG(maphist,"<- done (OK no wire)",0,0,0,0);
 		vm_map_unlock(map);
-		return 0;
+		return (KERN_SUCCESS);
 	}
 
 	/*
@@ -2313,7 +2302,7 @@ uvm_map_pageable_all(map, flags, limit)
 
 	if (atop(size) + uvmexp.wired > uvmexp.wiredmax) {
 		vm_map_unlock(map);
-		return ENOMEM;
+		return (KERN_NO_SPACE);		/* XXX overloaded */
 	}
 
 	/* XXX non-pmap_wired_count case must be handled by caller */
@@ -2321,7 +2310,7 @@ uvm_map_pageable_all(map, flags, limit)
 	if (limit != 0 &&
 	    (size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit)) {
 		vm_map_unlock(map);
-		return ENOMEM;
+		return (KERN_NO_SPACE);		/* XXX overloaded */
 	}
 #endif
 
@@ -2334,7 +2323,6 @@ uvm_map_pageable_all(map, flags, limit)
 		if (entry->protection == VM_PROT_NONE)
 			continue;
 		if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */
-
 			/*
 			 * perform actions of vm_map_lookup that need the
 			 * write lock on the map: create an anonymous map
@@ -2342,9 +2330,8 @@ uvm_map_pageable_all(map, flags, limit)
 			 * for a zero-fill region.  (XXXCDC: submap case
 			 * ok?)
 			 */
-
 			if (!UVM_ET_ISSUBMAP(entry)) {	/* not submap */
-				if (UVM_ET_ISNEEDSCOPY(entry) &&
+				if (UVM_ET_ISNEEDSCOPY(entry) && 
 				    ((entry->protection & VM_PROT_WRITE) ||
 				     (entry->object.uvm_obj == NULL))) {
 					amap_copy(map, entry, M_WAITOK, TRUE,
@@ -2366,31 +2353,27 @@ uvm_map_pageable_all(map, flags, limit)
 	vm_map_busy(map);
 	vm_map_downgrade(map);
 
-	rv = 0;
+	rv = KERN_SUCCESS;
 	for (entry = map->header.next; entry != &map->header;
 	     entry = entry->next) {
 		if (entry->wired_count == 1) {
 			rv = uvm_fault_wire(map, entry->start, entry->end,
 			     entry->protection);
 			if (rv) {
-
 				/*
 				 * wiring failed.  break out of the loop.
 				 * we'll clean up the map below, once we
 				 * have a write lock again.
 				 */
-
 				break;
 			}
 		}
 	}
 
-	if (rv) {
-
+	if (rv) {	/* failed? */
 		/*
 		 * Get back an exclusive (write) lock.
 		 */
-
 		vm_map_upgrade(map);
 		vm_map_unbusy(map);
 
@@ -2405,7 +2388,6 @@ uvm_map_pageable_all(map, flags, limit)
 		 *
 		 * Skip VM_PROT_NONE entries like we did above.
 		 */
-
 		failed_entry = entry;
 		for (/* nothing */; entry != &map->header;
 		     entry = entry->next) {
@@ -2420,7 +2402,6 @@ uvm_map_pageable_all(map, flags, limit)
 		 *
 		 * Skip VM_PROT_NONE entries like we did above.
 		 */
-
 		for (entry = map->header.next; entry != failed_entry;
 		     entry = entry->next) {
 			if (entry->protection == VM_PROT_NONE)
@@ -2439,7 +2420,7 @@ uvm_map_pageable_all(map, flags, limit)
 	vm_map_unlock_read(map);
 
 	UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0);
-	return 0;
+	return (KERN_SUCCESS);
 }
 
 /*
@@ -2451,7 +2432,7 @@ uvm_map_pageable_all(map, flags, limit)
  *   if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean
  *   if (flags & PGO_FREE): any cached pages are freed after clean
  * => returns an error if any part of the specified range isn't mapped
- * => never a need to flush amap layer since the anonymous memory has
+ * => never a need to flush amap layer since the anonymous memory has 
  *	no permanent home, but may deactivate pages there
  * => called from sys_msync() and sys_madvise()
  * => caller must not write-lock map (read OK).
@@ -2460,11 +2441,11 @@ uvm_map_pageable_all(map, flags, limit)
 
 int
 uvm_map_clean(map, start, end, flags)
-	struct vm_map *map;
+	vm_map_t map;
 	vaddr_t start, end;
 	int flags;
 {
-	struct vm_map_entry *current, *entry;
+	vm_map_entry_t current, entry;
 	struct uvm_object *uobj;
 	struct vm_amap *amap;
 	struct vm_anon *anon;
@@ -2483,7 +2464,7 @@ uvm_map_clean(map, start, end, flags)
 	VM_MAP_RANGE_CHECK(map, start, end);
 	if (uvm_map_lookup_entry(map, start, &entry) == FALSE) {
 		vm_map_unlock_read(map);
-		return EFAULT;
+		return(KERN_INVALID_ADDRESS);
 	}
 
 	/*
@@ -2493,18 +2474,19 @@ uvm_map_clean(map, start, end, flags)
 	for (current = entry; current->start < end; current = current->next) {
 		if (UVM_ET_ISSUBMAP(current)) {
 			vm_map_unlock_read(map);
-			return EINVAL;
+			return (KERN_INVALID_ARGUMENT);
 		}
 		if (end <= current->end) {
 			break;
 		}
 		if (current->end != current->next->start) {
 			vm_map_unlock_read(map);
-			return EFAULT;
+			return (KERN_INVALID_ADDRESS);
 		}
 	}
 
-	error = 0;
+	error = KERN_SUCCESS;
+
 	for (current = entry; start < end; current = current->next) {
 		amap = current->aref.ar_amap;	/* top layer */
 		uobj = current->object.uvm_obj;	/* bottom layer */
@@ -2604,7 +2586,7 @@ uvm_map_clean(map, start, end, flags)
 				continue;
 
 			default:
-				panic("uvm_map_clean: weird flags");
+				panic("uvm_map_clean: wierd flags");
 			}
 		}
 		amap_unlock(amap);
@@ -2623,12 +2605,12 @@ uvm_map_clean(map, start, end, flags)
 			simple_unlock(&uobj->vmobjlock);
 
 			if (rv == FALSE)
-				error = EIO;
+				error = KERN_FAILURE;
 		}
 		start += size;
 	}
 	vm_map_unlock_read(map);
-	return (error);
+	return (error); 
 }
 
 
@@ -2641,41 +2623,44 @@ uvm_map_clean(map, start, end, flags)
 
 boolean_t
 uvm_map_checkprot(map, start, end, protection)
-	struct vm_map * map;
-	vaddr_t start, end;
-	vm_prot_t protection;
+	vm_map_t       map;
+	vaddr_t    start, end;
+	vm_prot_t      protection;
 {
-	struct vm_map_entry *entry;
-	struct vm_map_entry *tmp_entry;
-
-	if (!uvm_map_lookup_entry(map, start, &tmp_entry)) {
-		return(FALSE);
-	}
-	entry = tmp_entry;
-	while (start < end) {
-		if (entry == &map->header) {
-			return(FALSE);
-		}
+	 vm_map_entry_t entry;
+	 vm_map_entry_t tmp_entry;
+
+	 if (!uvm_map_lookup_entry(map, start, &tmp_entry)) {
+		 return(FALSE);
+	 }
+	 entry = tmp_entry;
+	 while (start < end) {
+		 if (entry == &map->header) {
+			 return(FALSE);
+		 }
 
 		/*
 		 * no holes allowed
 		 */
 
-		if (start < entry->start) {
-			return(FALSE);
-		}
+		 if (start < entry->start) {
+			 return(FALSE);
+		 }
 
 		/*
 		 * check protection associated with entry
 		 */
 
-		if ((entry->protection & protection) != protection) {
-			return(FALSE);
-		}
-		start = entry->end;
-		entry = entry->next;
-	}
-	return(TRUE);
+		 if ((entry->protection & protection) != protection) {
+			 return(FALSE);
+		 }
+
+		 /* go to next entry */
+
+		 start = entry->end;
+		 entry = entry->next;
+	 }
+	 return(TRUE);
 }
 
 /*
@@ -2686,14 +2671,15 @@ uvm_map_checkprot(map, start, end, protection)
  * - refcnt set to 1, rest must be init'd by caller
  */
 struct vmspace *
-uvmspace_alloc(min, max)
+uvmspace_alloc(min, max, pageable)
 	vaddr_t min, max;
+	int pageable;
 {
 	struct vmspace *vm;
 	UVMHIST_FUNC("uvmspace_alloc"); UVMHIST_CALLED(maphist);
 
 	vm = pool_get(&uvm_vmspace_pool, PR_WAITOK);
-	uvmspace_init(vm, NULL, min, max);
+	uvmspace_init(vm, NULL, min, max, pageable);
 	UVMHIST_LOG(maphist,"<- done (vm=0x%x)", vm,0,0,0);
 	return (vm);
 }
@@ -2705,20 +2691,24 @@ uvmspace_alloc(min, max)
  * - refcnt set to 1, rest must me init'd by caller
  */
 void
-uvmspace_init(vm, pmap, min, max)
+uvmspace_init(vm, pmap, min, max, pageable)
 	struct vmspace *vm;
 	struct pmap *pmap;
 	vaddr_t min, max;
+	boolean_t pageable;
 {
 	UVMHIST_FUNC("uvmspace_init"); UVMHIST_CALLED(maphist);
 
 	memset(vm, 0, sizeof(*vm));
-	uvm_map_setup(&vm->vm_map, min, max, VM_MAP_PAGEABLE);
+
+	uvm_map_setup(&vm->vm_map, min, max, pageable ? VM_MAP_PAGEABLE : 0);
+
 	if (pmap)
 		pmap_reference(pmap);
 	else
 		pmap = pmap_create();
 	vm->vm_map.pmap = pmap;
+
 	vm->vm_refcnt = 1;
 	UVMHIST_LOG(maphist,"<- done",0,0,0,0);
 }
@@ -2746,7 +2736,7 @@ uvmspace_share(p1, p2)
 
 void
 uvmspace_unshare(p)
-	struct proc *p;
+	struct proc *p; 
 {
 	struct vmspace *nvm, *ovm = p->p_vmspace;
 
@@ -2758,7 +2748,7 @@ uvmspace_unshare(p)
 	nvm = uvmspace_fork(ovm);
 
 	pmap_deactivate(p);		/* unbind old vmspace */
-	p->p_vmspace = nvm;
+	p->p_vmspace = nvm; 
 	pmap_activate(p);		/* switch to new vmspace */
 
 	uvmspace_free(ovm);		/* drop reference to old vmspace */
@@ -2776,7 +2766,7 @@ uvmspace_exec(p, start, end)
 	vaddr_t start, end;
 {
 	struct vmspace *nvm, *ovm = p->p_vmspace;
-	struct vm_map *map = &ovm->vm_map;
+	vm_map_t map = &ovm->vm_map;
 
 #ifdef __sparc__
 	/* XXX cgd 960926: the sparc #ifdef should be a MD hook */
@@ -2798,7 +2788,6 @@ uvmspace_exec(p, start, end)
 		/*
 		 * SYSV SHM semantics require us to kill all segments on an exec
 		 */
-
 		if (ovm->vm_shm)
 			shmexit(ovm);
 #endif
@@ -2807,7 +2796,6 @@ uvmspace_exec(p, start, end)
 		 * POSIX 1003.1b -- "lock future mappings" is revoked
 		 * when a process execs another program image.
 		 */
-
 		vm_map_lock(map);
 		vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
 		vm_map_unlock(map);
@@ -2815,17 +2803,17 @@ uvmspace_exec(p, start, end)
 		/*
 		 * now unmap the old program
 		 */
-
 		uvm_unmap(map, map->min_offset, map->max_offset);
 
 		/*
 		 * resize the map
 		 */
-
 		vm_map_lock(map);
 		map->min_offset = start;
 		map->max_offset = end;
 		vm_map_unlock(map);
+	
+
 	} else {
 
 		/*
@@ -2833,8 +2821,8 @@ uvmspace_exec(p, start, end)
 		 * it is still being used for others.   allocate a new vmspace
 		 * for p
 		 */
-
-		nvm = uvmspace_alloc(start, end);
+		nvm = uvmspace_alloc(start, end,
+			 (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE);
 
 		/*
 		 * install new vmspace and drop our ref to the old one.
@@ -2858,18 +2846,16 @@ void
 uvmspace_free(vm)
 	struct vmspace *vm;
 {
-	struct vm_map_entry *dead_entries;
+	vm_map_entry_t dead_entries;
 	UVMHIST_FUNC("uvmspace_free"); UVMHIST_CALLED(maphist);
 
 	UVMHIST_LOG(maphist,"(vm=0x%x) ref=%d", vm, vm->vm_refcnt,0,0);
 	if (--vm->vm_refcnt == 0) {
-
 		/*
 		 * lock the map, to wait out all other references to it.  delete
 		 * all of the mappings and pages they hold, then call the pmap
 		 * module to reclaim anything left.
 		 */
-
 #ifdef SYSVSHM
 		/* Get rid of any SYSV shared memory segments. */
 		if (vm->vm_shm != NULL)
@@ -2877,7 +2863,7 @@ uvmspace_free(vm)
 #endif
 		vm_map_lock(&vm->vm_map);
 		if (vm->vm_map.nentries) {
-			uvm_unmap_remove(&vm->vm_map,
+			(void)uvm_unmap_remove(&vm->vm_map,
 			    vm->vm_map.min_offset, vm->vm_map.max_offset,
 			    &dead_entries);
 			if (dead_entries != NULL)
@@ -2905,17 +2891,18 @@ uvmspace_fork(vm1)
 	struct vmspace *vm1;
 {
 	struct vmspace *vm2;
-	struct vm_map *old_map = &vm1->vm_map;
-	struct vm_map *new_map;
-	struct vm_map_entry *old_entry;
-	struct vm_map_entry *new_entry;
-	pmap_t new_pmap;
-	boolean_t protect_child;
+	vm_map_t        old_map = &vm1->vm_map;
+	vm_map_t        new_map;
+	vm_map_entry_t  old_entry;
+	vm_map_entry_t  new_entry;
+	pmap_t          new_pmap;
+	boolean_t	protect_child;
 	UVMHIST_FUNC("uvmspace_fork"); UVMHIST_CALLED(maphist);
 
 	vm_map_lock(old_map);
 
-	vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset);
+	vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset,
+		      (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE);
 	memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy,
 	(caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
 	new_map = &vm2->vm_map;		  /* XXX */
@@ -2932,26 +2919,27 @@ uvmspace_fork(vm1)
 		/*
 		 * first, some sanity checks on the old entry
 		 */
+		if (UVM_ET_ISSUBMAP(old_entry))
+		    panic("fork: encountered a submap during fork (illegal)");
+
+		if (!UVM_ET_ISCOPYONWRITE(old_entry) &&
+			    UVM_ET_ISNEEDSCOPY(old_entry))
+	panic("fork: non-copy_on_write map entry marked needs_copy (illegal)");
 
-		KASSERT(!UVM_ET_ISSUBMAP(old_entry));
-		KASSERT(UVM_ET_ISCOPYONWRITE(old_entry) ||
-			!UVM_ET_ISNEEDSCOPY(old_entry));
 
 		switch (old_entry->inheritance) {
 		case MAP_INHERIT_NONE:
-
 			/*
 			 * drop the mapping
 			 */
-
 			break;
 
 		case MAP_INHERIT_SHARE:
-
 			/*
 			 * share the mapping: this means we want the old and
 			 * new entries to share amaps and backing objects.
 			 */
+
 			/*
 			 * if the old_entry needs a new amap (due to prev fork)
 			 * then we need to allocate it now so that we have
@@ -2962,7 +2950,7 @@ uvmspace_fork(vm1)
 			if (UVM_ET_ISNEEDSCOPY(old_entry)) {
 				/* get our own amap, clears needs_copy */
 				amap_copy(old_map, old_entry, M_WAITOK, FALSE,
-				    0, 0);
+				    0, 0); 
 				/* XXXCDC: WAITOK??? */
 			}
 
@@ -2977,8 +2965,8 @@ uvmspace_fork(vm1)
 			 * gain reference to object backing the map (can't
 			 * be a submap, already checked this case).
 			 */
-
 			if (new_entry->aref.ar_amap)
+				/* share reference */
 				uvm_map_reference_amap(new_entry, AMAP_SHARED);
 
 			if (new_entry->object.uvm_obj &&
@@ -2991,7 +2979,7 @@ uvmspace_fork(vm1)
 			uvm_map_entry_link(new_map, new_map->header.prev,
 			    new_entry);
 
-			/*
+			/* 
 			 * pmap_copy the mappings: this routine is optional
 			 * but if it is there it will reduce the number of
 			 * page faults in the new proc.
@@ -3009,7 +2997,7 @@ uvmspace_fork(vm1)
 			 * copy-on-write the mapping (using mmap's
 			 * MAP_PRIVATE semantics)
 			 *
-			 * allocate new_entry, adjust reference counts.
+			 * allocate new_entry, adjust reference counts.  
 			 * (note that new references are read-only).
 			 */
 
@@ -3045,20 +3033,20 @@ uvmspace_fork(vm1)
 			 * conditions hold:
 			 * 1. the old entry has an amap and that amap is
 			 *    being shared.  this means that the old (parent)
-			 *    process is sharing the amap with another
+			 *    process is sharing the amap with another 
 			 *    process.  if we do not clear needs_copy here
 			 *    we will end up in a situation where both the
 			 *    parent and child process are refering to the
-			 *    same amap with "needs_copy" set.  if the
+			 *    same amap with "needs_copy" set.  if the 
 			 *    parent write-faults, the fault routine will
 			 *    clear "needs_copy" in the parent by allocating
-			 *    a new amap.   this is wrong because the
+			 *    a new amap.   this is wrong because the 
 			 *    parent is supposed to be sharing the old amap
 			 *    and the new amap will break that.
 			 *
 			 * 2. if the old entry has an amap and a non-zero
 			 *    wire count then we are going to have to call
-			 *    amap_cow_now to avoid page faults in the
+			 *    amap_cow_now to avoid page faults in the 
 			 *    parent process.   since amap_cow_now requires
 			 *    "needs_copy" to be clear we might as well
 			 *    clear it here as well.
@@ -3066,14 +3054,15 @@ uvmspace_fork(vm1)
 			 */
 
 			if (old_entry->aref.ar_amap != NULL) {
-				if ((amap_flags(old_entry->aref.ar_amap) &
-				     AMAP_SHARED) != 0 ||
-				    VM_MAPENT_ISWIRED(old_entry)) {
 
-					amap_copy(new_map, new_entry, M_WAITOK,
-					    FALSE, 0, 0);
-					/* XXXCDC: M_WAITOK ... ok? */
-				}
+			  if ((amap_flags(old_entry->aref.ar_amap) & 
+			       AMAP_SHARED) != 0 ||
+			      VM_MAPENT_ISWIRED(old_entry)) {
+
+			    amap_copy(new_map, new_entry, M_WAITOK, FALSE,
+				      0, 0);
+			    /* XXXCDC: M_WAITOK ... ok? */
+			  }
 			}
 
 			/*
@@ -3089,9 +3078,9 @@ uvmspace_fork(vm1)
 
 			if (VM_MAPENT_ISWIRED(old_entry)) {
 
-			  /*
+			  /* 
 			   * resolve all copy-on-write faults now
-			   * (note that there is nothing to do if
+			   * (note that there is nothing to do if 
 			   * the old mapping does not have an amap).
 			   * XXX: is it worthwhile to bother with pmap_copy
 			   * in this case?
@@ -3099,7 +3088,7 @@ uvmspace_fork(vm1)
 			  if (old_entry->aref.ar_amap)
 			    amap_cow_now(new_map, new_entry);
 
-			} else {
+			} else { 
 
 			  /*
 			   * setup mappings to trigger copy-on-write faults
@@ -3127,7 +3116,6 @@ uvmspace_fork(vm1)
 					     old_entry->end,
 					     old_entry->protection &
 					     ~VM_PROT_WRITE);
-				pmap_update(old_map->pmap);
 			      }
 			      old_entry->etype |= UVM_ET_NEEDSCOPY;
 			    }
@@ -3139,7 +3127,7 @@ uvmspace_fork(vm1)
 			  } else {
 
 			    /*
-			     * we only need to protect the child if the
+			     * we only need to protect the child if the 
 			     * parent has write access.
 			     */
 			    if (old_entry->max_protection & VM_PROT_WRITE)
@@ -3164,10 +3152,9 @@ uvmspace_fork(vm1)
 			   */
 			  if (protect_child) {
 			    pmap_protect(new_pmap, new_entry->start,
-					 new_entry->end,
-					 new_entry->protection &
+					 new_entry->end, 
+					 new_entry->protection & 
 					          ~VM_PROT_WRITE);
-			    pmap_update(new_pmap);
 			  }
 
 			}
@@ -3177,7 +3164,7 @@ uvmspace_fork(vm1)
 	}
 
 	new_map->size = old_map->size;
-	vm_map_unlock(old_map);
+	vm_map_unlock(old_map); 
 
 #ifdef SYSVSHM
 	if (vm1->vm_shm)
@@ -3189,7 +3176,7 @@ uvmspace_fork(vm1)
 #endif
 
 	UVMHIST_LOG(maphist,"<- done",0,0,0,0);
-	return(vm2);
+	return(vm2);    
 }
 
 
@@ -3205,11 +3192,11 @@ uvmspace_fork(vm1)
 
 void
 uvm_map_printit(map, full, pr)
-	struct vm_map *map;
+	vm_map_t map;
 	boolean_t full;
 	int (*pr) __P((const char *, ...));
 {
-	struct vm_map_entry *entry;
+	vm_map_entry_t entry;
 
 	(*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset);
 	(*pr)("\t#ent=%d, sz=%d, ref=%d, version=%d, flags=0x%x\n",
@@ -3234,12 +3221,12 @@ uvm_map_printit(map, full, pr)
 		    "\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, "
 		    "wc=%d, adv=%d\n",
 		    (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',
-		    (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F',
+		    (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', 
 		    (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',
 		    entry->protection, entry->max_protection,
 		    entry->inheritance, entry->wired_count, entry->advice);
 	}
-}
+} 
 
 /*
  * uvm_object_printit: actually prints the object
@@ -3276,7 +3263,7 @@ uvm_object_printit(uobj, full, pr)
 	if ((cnt % 3) != 2) {
 		(*pr)("\n");
 	}
-}
+} 
 
 /*
  * uvm_page_printit: actually print the page
@@ -3348,11 +3335,11 @@ uvm_page_printit(pg, full, pr)
 	/* cross-verify page queue */
 	if (pg->pqflags & PQ_FREE) {
 		int fl = uvm_page_lookup_freelist(pg);
-		int color = VM_PGCOLOR_BUCKET(pg);
-		pgl = &uvm.page_free[fl].pgfl_buckets[color].pgfl_queues[
-		    ((pg)->flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN];
+		pgl = &uvm.page_free[fl].pgfl_queues[((pg)->flags & PG_ZERO) ?
+		    PGFL_ZEROS : PGFL_UNKNOWN];
 	} else if (pg->pqflags & PQ_INACTIVE) {
-		pgl = &uvm.page_inactive;
+		pgl = (pg->pqflags & PQ_SWAPBACKED) ?
+		    &uvm.page_inactive_swp : &uvm.page_inactive_obj;
 	} else if (pg->pqflags & PQ_ACTIVE) {
 		pgl = &uvm.page_active;
  	} else {
diff --git a/sys/uvm/uvm_map.h b/sys/uvm/uvm_map.h
index d0d1509fc4c..724bd78ab23 100644
--- a/sys/uvm/uvm_map.h
+++ b/sys/uvm/uvm_map.h
@@ -1,9 +1,9 @@
-/*	$OpenBSD: uvm_map.h,v 1.19 2001/12/04 23:22:42 art Exp $	*/
-/*	$NetBSD: uvm_map.h,v 1.30 2001/09/09 19:38:23 chs Exp $	*/
+/*	$OpenBSD: uvm_map.h,v 1.20 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_map.h,v 1.24 2001/02/18 21:19:08 chs Exp $	*/
 
-/*
+/* 
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
- * Copyright (c) 1991, 1993, The Regents of the University of California.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.  
  *
  * All rights reserved.
  *
@@ -21,7 +21,7 @@
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Charles D. Cranor,
- *      Washington University, the University of California, Berkeley and
+ *      Washington University, the University of California, Berkeley and 
  *      its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
@@ -45,17 +45,17 @@
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
- *
+ * 
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
+ * 
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
@@ -83,7 +83,7 @@
 /*
  * UVM_MAP_CLIP_START: ensure that the entry begins at or after
  * the starting address, if it doesn't we split the entry.
- *
+ * 
  * => map must be locked by caller
  */
 
@@ -113,6 +113,26 @@
 #include <uvm/uvm_anon.h>
 
 /*
+ * types defined:
+ *
+ *	vm_map_t		the high-level address map data structure.
+ *	vm_map_entry_t		an entry in an address map.
+ *	vm_map_version_t	a timestamp of a map, for use with vm_map_lookup
+ */
+
+/*
+ * Objects which live in maps may be either VM objects, or another map
+ * (called a "sharing map") which denotes read-write sharing with other maps.
+ *
+ * XXXCDC: private pager data goes here now
+ */
+
+union vm_map_object {
+	struct uvm_object	*uvm_obj;	/* UVM OBJECT */
+	struct vm_map		*sub_map;	/* belongs to another map */
+};
+
+/*
  * Address map entries consist of start and end addresses,
  * a VM object (or sharing map) and offset into that object,
  * and user-exported inheritance and protection information.
@@ -123,10 +143,7 @@ struct vm_map_entry {
 	struct vm_map_entry	*next;		/* next entry */
 	vaddr_t			start;		/* start address */
 	vaddr_t			end;		/* end address */
-	union {
-		struct uvm_object *uvm_obj;	/* uvm object */
-		struct vm_map	*sub_map;	/* belongs to another map */
-	} object;				/* object I point to */
+	union vm_map_object	object;		/* object I point to */
 	voff_t			offset;		/* offset into object */
 	int			etype;		/* entry type */
 	vm_prot_t		protection;	/* protection code */
@@ -139,7 +156,6 @@ struct vm_map_entry {
 	u_int8_t		flags;		/* flags */
 
 #define UVM_MAP_STATIC		0x01		/* static map entry */
-#define UVM_MAP_KMEM		0x02		/* from kmem entry pool */
 
 };
 
@@ -199,17 +215,17 @@ struct vm_map_entry {
  */
 struct vm_map {
 	struct pmap *		pmap;		/* Physical map */
-	struct lock		lock;		/* Lock for map data */
+	lock_data_t		lock;		/* Lock for map data */
 	struct vm_map_entry	header;		/* List of entries */
 	int			nentries;	/* Number of entries */
 	vsize_t			size;		/* virtual size */
 	int			ref_count;	/* Reference count */
-	struct simplelock	ref_lock;	/* Lock for ref_count field */
-	struct vm_map_entry *	hint;		/* hint for quick lookups */
-	struct simplelock	hint_lock;	/* lock for hint storage */
-	struct vm_map_entry *	first_free;	/* First free space hint */
+	simple_lock_data_t	ref_lock;	/* Lock for ref_count field */
+	vm_map_entry_t		hint;		/* hint for quick lookups */
+	simple_lock_data_t	hint_lock;	/* lock for hint storage */
+	vm_map_entry_t		first_free;	/* First free space hint */
 	int			flags;		/* flags */
-	struct simplelock	flags_lock;	/* Lock for flags field */
+	simple_lock_data_t	flags_lock;	/* Lock for flags field */
 	unsigned int		timestamp;	/* Version number */
 #define	min_offset		header.start
 #define max_offset		header.end
@@ -242,12 +258,49 @@ do {									\
 #endif /* _KERNEL */
 
 /*
+ *	Interrupt-safe maps must also be kept on a special list,
+ *	to assist uvm_fault() in avoiding locking problems.
+ */
+struct vm_map_intrsafe {
+	struct vm_map	vmi_map;
+	LIST_ENTRY(vm_map_intrsafe) vmi_list;
+};
+
+LIST_HEAD(vmi_list, vm_map_intrsafe);
+#ifdef _KERNEL
+extern simple_lock_data_t vmi_list_slock;
+extern struct vmi_list vmi_list;
+
+static __inline int vmi_list_lock __P((void));
+static __inline void vmi_list_unlock __P((int));
+
+static __inline int
+vmi_list_lock()
+{
+	int s;
+
+	s = splhigh();
+	simple_lock(&vmi_list_slock);
+	return (s);
+}
+
+static __inline void
+vmi_list_unlock(s)
+	int s;
+{
+
+	simple_unlock(&vmi_list_slock);
+	splx(s);
+}
+#endif /* _KERNEL */
+
+/*
  * handle inline options
  */
 
 #ifdef UVM_MAP_INLINE
 #define MAP_INLINE static __inline
-#else
+#else 
 #define MAP_INLINE /* nothing */
 #endif /* UVM_MAP_INLINE */
 
@@ -266,39 +319,34 @@ extern vaddr_t	uvm_maxkaddr;
  */
 
 MAP_INLINE
-void		uvm_map_deallocate __P((struct vm_map *));
+void		uvm_map_deallocate __P((vm_map_t));
 
-int		uvm_map_clean __P((struct vm_map *, vaddr_t, vaddr_t, int));
-void		uvm_map_clip_start __P((struct vm_map *, struct vm_map_entry *,
-		    vaddr_t));
-void		uvm_map_clip_end __P((struct vm_map *, struct vm_map_entry *,
-		    vaddr_t));
+int		uvm_map_clean __P((vm_map_t, vaddr_t, vaddr_t, int));
+void		uvm_map_clip_start __P((vm_map_t, vm_map_entry_t, vaddr_t));
+void		uvm_map_clip_end __P((vm_map_t, vm_map_entry_t, vaddr_t));
 MAP_INLINE
-struct vm_map	*uvm_map_create __P((pmap_t, vaddr_t, vaddr_t, int));
-int		uvm_map_extract __P((struct vm_map *, vaddr_t, vsize_t,
-		    struct vm_map *, vaddr_t *, int));
-struct vm_map_entry *uvm_map_findspace __P((struct vm_map *, vaddr_t, vsize_t,
-		    vaddr_t *, struct uvm_object *, voff_t, vsize_t, int));
-int		uvm_map_inherit __P((struct vm_map *, vaddr_t, vaddr_t,
-		    vm_inherit_t));
-int		uvm_map_advice __P((struct vm_map *, vaddr_t, vaddr_t, int));
+vm_map_t	uvm_map_create __P((pmap_t, vaddr_t, vaddr_t, int));
+int		uvm_map_extract __P((vm_map_t, vaddr_t, vsize_t, 
+			vm_map_t, vaddr_t *, int));
+vm_map_entry_t	uvm_map_findspace __P((vm_map_t, vaddr_t, vsize_t, vaddr_t *,
+			struct uvm_object *, voff_t, vsize_t, int));
+int		uvm_map_inherit __P((vm_map_t, vaddr_t, vaddr_t, vm_inherit_t));
+int		uvm_map_advice __P((vm_map_t, vaddr_t, vaddr_t, int));
 void		uvm_map_init __P((void));
-boolean_t	uvm_map_lookup_entry __P((struct vm_map *, vaddr_t,
-		    struct vm_map_entry **));
+boolean_t	uvm_map_lookup_entry __P((vm_map_t, vaddr_t, vm_map_entry_t *));
 MAP_INLINE
-void		uvm_map_reference __P((struct vm_map *));
-int		uvm_map_replace __P((struct vm_map *, vaddr_t, vaddr_t,
-		    struct vm_map_entry *, int));
-int		uvm_map_reserve __P((struct vm_map *, vsize_t, vaddr_t, vsize_t,
-		    vaddr_t *));
-void		uvm_map_setup __P((struct vm_map *, vaddr_t, vaddr_t, int));
-int		uvm_map_submap __P((struct vm_map *, vaddr_t, vaddr_t,
-		    struct vm_map *));
+void		uvm_map_reference __P((vm_map_t));
+int		uvm_map_replace __P((vm_map_t, vaddr_t, vaddr_t,
+			vm_map_entry_t, int));
+int		uvm_map_reserve __P((vm_map_t, vsize_t, vaddr_t, vsize_t,
+			vaddr_t *));
+void		uvm_map_setup __P((vm_map_t, vaddr_t, vaddr_t, int));
+int		uvm_map_submap __P((vm_map_t, vaddr_t, vaddr_t, vm_map_t));
 MAP_INLINE
-void		uvm_unmap __P((struct vm_map *, vaddr_t, vaddr_t));
-void		uvm_unmap_detach __P((struct vm_map_entry *,int));
-void		uvm_unmap_remove __P((struct vm_map *, vaddr_t, vaddr_t,
-		    struct vm_map_entry **));
+int		uvm_unmap __P((vm_map_t, vaddr_t, vaddr_t));
+void		uvm_unmap_detach __P((vm_map_entry_t,int));
+int		uvm_unmap_remove __P((vm_map_t, vaddr_t, vaddr_t,
+				      vm_map_entry_t *));
 
 #endif /* _KERNEL */
 
@@ -336,13 +384,13 @@ void		uvm_unmap_remove __P((struct vm_map *, vaddr_t, vaddr_t,
 #include <sys/proc.h>	/* for tsleep(), wakeup() */
 #include <sys/systm.h>	/* for panic() */
 
-static __inline boolean_t vm_map_lock_try __P((struct vm_map *));
-static __inline void vm_map_lock __P((struct vm_map *));
+static __inline boolean_t vm_map_lock_try __P((vm_map_t));
+static __inline void vm_map_lock __P((vm_map_t));
 extern const char vmmapbsy[];
 
 static __inline boolean_t
 vm_map_lock_try(map)
-	struct vm_map *map;
+	vm_map_t map;
 {
 	boolean_t rv;
 
@@ -366,7 +414,7 @@ vm_map_lock_try(map)
 
 static __inline void
 vm_map_lock(map)
-	struct vm_map *map;
+	vm_map_t map;
 {
 	int error;
 
@@ -379,7 +427,7 @@ vm_map_lock(map)
 	simple_lock(&map->flags_lock);
 	while (map->flags & VM_MAP_BUSY) {
 		map->flags |= VM_MAP_WANTLOCK;
-		ltsleep(&map->flags, PVM, vmmapbsy, 0, &map->flags_lock);
+		ltsleep(&map->flags, PVM, (char *)vmmapbsy, 0, &map->flags_lock);
 	}
 
 	error = lockmgr(&map->lock, LK_EXCLUSIVE|LK_SLEEPFAIL|LK_INTERLOCK,
diff --git a/sys/uvm/uvm_map_i.h b/sys/uvm/uvm_map_i.h
index 069cbd5f125..54625e7fb4e 100644
--- a/sys/uvm/uvm_map_i.h
+++ b/sys/uvm/uvm_map_i.h
@@ -1,9 +1,9 @@
-/*	$OpenBSD: uvm_map_i.h,v 1.11 2001/11/28 19:28:15 art Exp $	*/
-/*	$NetBSD: uvm_map_i.h,v 1.22 2001/06/26 17:55:15 thorpej Exp $	*/
+/*	$OpenBSD: uvm_map_i.h,v 1.12 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_map_i.h,v 1.18 2000/11/27 08:40:04 chs Exp $	*/
 
-/*
+/* 
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
- * Copyright (c) 1991, 1993, The Regents of the University of California.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.  
  *
  * All rights reserved.
  *
@@ -21,7 +21,7 @@
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Charles D. Cranor,
- *      Washington University, the University of California, Berkeley and
+ *      Washington University, the University of California, Berkeley and 
  *      its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
@@ -45,17 +45,17 @@
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
- *
+ * 
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
+ * 
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
@@ -84,15 +84,17 @@
  * uvm_map_create: create map
  */
 
-MAP_INLINE struct vm_map *
+MAP_INLINE vm_map_t
 uvm_map_create(pmap, min, max, flags)
 	pmap_t pmap;
 	vaddr_t min, max;
 	int flags;
 {
-	struct vm_map *result;
+	vm_map_t result;
 
-	MALLOC(result, struct vm_map *, sizeof(struct vm_map),
+	MALLOC(result, vm_map_t,
+	    (flags & VM_MAP_INTRSAFE) ? sizeof(struct vm_map_intrsafe) :
+					sizeof(struct vm_map),
 	    M_VMMAP, M_WAITOK);
 	uvm_map_setup(result, min, max, flags);
 	result->pmap = pmap;
@@ -107,7 +109,7 @@ uvm_map_create(pmap, min, max, flags)
 
 MAP_INLINE void
 uvm_map_setup(map, min, max, flags)
-	struct vm_map *map;
+	vm_map_t map;
 	vaddr_t min, max;
 	int flags;
 {
@@ -126,6 +128,23 @@ uvm_map_setup(map, min, max, flags)
 	simple_lock_init(&map->ref_lock);
 	simple_lock_init(&map->hint_lock);
 	simple_lock_init(&map->flags_lock);
+
+	/*
+	 * If the map is interrupt safe, place it on the list
+	 * of interrupt safe maps, for uvm_fault().
+	 *
+	 * We almost never set up an interrupt-safe map, but we set
+	 * up quite a few regular ones (at every fork!), so put
+	 * interrupt-safe map setup in the slow path.
+	 */
+	if (__predict_false(flags & VM_MAP_INTRSAFE)) {
+		struct vm_map_intrsafe *vmi = (struct vm_map_intrsafe *)map;
+		int s;
+
+		s = vmi_list_lock();
+		LIST_INSERT_HEAD(&vmi_list, vmi, vmi_list);
+		vmi_list_unlock(s);
+	}
 }
 
 
@@ -136,16 +155,17 @@ uvm_map_setup(map, min, max, flags)
 /*
  * uvm_unmap: remove mappings from a vm_map (from "start" up to "stop")
  *
- * => caller must check alignment and size
+ * => caller must check alignment and size 
  * => map must be unlocked (we will lock it)
  */
 
-MAP_INLINE void
+MAP_INLINE int
 uvm_unmap(map, start, end)
-	struct vm_map *map;
+	vm_map_t map;
 	vaddr_t start,end;
 {
-	struct vm_map_entry *dead_entries;
+	int result;
+	vm_map_entry_t dead_entries;
 	UVMHIST_FUNC("uvm_unmap"); UVMHIST_CALLED(maphist);
 
 	UVMHIST_LOG(maphist, "  (map=0x%x, start=0x%x, end=0x%x)",
@@ -155,13 +175,14 @@ uvm_unmap(map, start, end)
 	 * detach from the dead entries...
 	 */
 	vm_map_lock(map);
-	uvm_unmap_remove(map, start, end, &dead_entries);
+	result = uvm_unmap_remove(map, start, end, &dead_entries);
 	vm_map_unlock(map);
 
 	if (dead_entries != NULL)
 		uvm_unmap_detach(dead_entries, 0);
 
 	UVMHIST_LOG(maphist, "<- done", 0,0,0,0);
+	return(result);
 }
 
 
@@ -173,10 +194,10 @@ uvm_unmap(map, start, end)
 
 MAP_INLINE void
 uvm_map_reference(map)
-	struct vm_map *map;
+	vm_map_t map;
 {
 	simple_lock(&map->ref_lock);
-	map->ref_count++;
+	map->ref_count++; 
 	simple_unlock(&map->ref_lock);
 }
 
@@ -189,7 +210,7 @@ uvm_map_reference(map)
 
 MAP_INLINE void
 uvm_map_deallocate(map)
-	struct vm_map *map;
+	vm_map_t map;
 {
 	int c;
 
diff --git a/sys/uvm/uvm_mmap.c b/sys/uvm/uvm_mmap.c
index 8ff16e98351..c5a49768f7a 100644
--- a/sys/uvm/uvm_mmap.c
+++ b/sys/uvm/uvm_mmap.c
@@ -1,11 +1,11 @@
-/*	$OpenBSD: uvm_mmap.c,v 1.32 2001/12/10 02:19:34 art Exp $	*/
-/*	$NetBSD: uvm_mmap.c,v 1.55 2001/08/17 05:52:46 chs Exp $	*/
+/*	$OpenBSD: uvm_mmap.c,v 1.33 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_mmap.c,v 1.49 2001/02/18 21:19:08 chs Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
- * Copyright (c) 1991, 1993 The Regents of the University of California.
+ * Copyright (c) 1991, 1993 The Regents of the University of California.  
  * Copyright (c) 1988 University of Utah.
- *
+ * 
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
@@ -23,7 +23,7 @@
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by the Charles D. Cranor,
- *	Washington University, University of California, Berkeley and
+ *	Washington University, University of California, Berkeley and 
  *	its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
@@ -69,6 +69,7 @@
 
 #include <uvm/uvm.h>
 #include <uvm/uvm_device.h>
+#include <uvm/uvm_vnode.h>
 
 
 /*
@@ -131,14 +132,14 @@ sys_mincore(p, v, retval)
 		syscallarg(size_t) len;
 		syscallarg(char *) vec;
 	} */ *uap = v;
-	struct vm_page *m;
+	vm_page_t m;
 	char *vec, pgi;
 	struct uvm_object *uobj;
 	struct vm_amap *amap;
 	struct vm_anon *anon;
-	struct vm_map_entry *entry;
+	vm_map_entry_t entry;
 	vaddr_t start, end, lim;
-	struct vm_map *map;
+	vm_map_t map;
 	vsize_t len;
 	int error = 0, npgs;
 
@@ -164,8 +165,8 @@ sys_mincore(p, v, retval)
 	 * Lock down vec, so our returned status isn't outdated by
 	 * storing the status byte for a page.
 	 */
-
 	uvm_vslock(p, vec, npgs, VM_PROT_WRITE);
+
 	vm_map_lock_read(map);
 
 	if (uvm_map_lookup_entry(map, start, &entry) == FALSE) {
@@ -193,7 +194,6 @@ sys_mincore(p, v, retval)
 		 * Special case for objects with no "real" pages.  Those
 		 * are always considered resident (mapped devices).
 		 */
-
 		if (UVM_ET_ISOBJ(entry)) {
 			KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj));
 			if (entry->object.uvm_obj->pgops->pgo_releasepg
@@ -221,31 +221,30 @@ sys_mincore(p, v, retval)
 				    start - entry->start);
 				/* Don't need to lock anon here. */
 				if (anon != NULL && anon->u.an_page != NULL) {
-
 					/*
 					 * Anon has the page for this entry
 					 * offset.
 					 */
-
 					pgi = 1;
 				}
 			}
+
 			if (uobj != NULL && pgi == 0) {
 				/* Check the bottom layer. */
 				m = uvm_pagelookup(uobj,
 				    entry->offset + (start - entry->start));
 				if (m != NULL) {
-
 					/*
 					 * Object has the page for this entry
 					 * offset.
 					 */
-
 					pgi = 1;
 				}
 			}
+
 			(void) subyte(vec, pgi);
 		}
+
 		if (uobj != NULL)
 			simple_unlock(&uobj->vmobjlock);
 		if (amap != NULL)
@@ -292,15 +291,15 @@ sys_mmap(p, v, retval)
 	struct filedesc *fdp = p->p_fd;
 	struct file *fp;
 	struct vnode *vp;
-	void *handle;
+	caddr_t handle;
 	int error;
 
 	/*
 	 * first, extract syscall args from the uap.
 	 */
 
-	addr = (vaddr_t)SCARG(uap, addr);
-	size = (vsize_t)SCARG(uap, len);
+	addr = (vaddr_t) SCARG(uap, addr);
+	size = (vsize_t) SCARG(uap, len);
 	prot = SCARG(uap, prot) & VM_PROT_ALL;
 	flags = SCARG(uap, flags);
 	fd = SCARG(uap, fd);
@@ -322,12 +321,12 @@ sys_mmap(p, v, retval)
 	pageoff = (pos & PAGE_MASK);
 	pos  -= pageoff;
 	size += pageoff;			/* add offset */
-	size = (vsize_t)round_page(size);	/* round up */
+	size = (vsize_t) round_page(size);	/* round up */
 	if ((ssize_t) size < 0)
 		return (EINVAL);			/* don't allow wrap */
 
 	/*
-	 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
+	 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr" 
 	 */
 
 	if (flags & MAP_FIXED) {
@@ -352,8 +351,10 @@ sys_mmap(p, v, retval)
 		 * we will refine our guess later (e.g. to account for VAC, etc)
 		 */
 
-		addr = MAX(addr, round_page((vaddr_t)p->p_vmspace->vm_daddr +
-					    MAXDSIZ));
+		if (addr < round_page((vaddr_t)p->p_vmspace->vm_daddr +
+		    MAXDSIZ))
+			addr = round_page((vaddr_t)p->p_vmspace->vm_daddr +
+			    MAXDSIZ);
 	}
 
 	/*
@@ -401,7 +402,7 @@ sys_mmap(p, v, retval)
 				flags |= MAP_PRIVATE;	/* for a file */
 		}
 
-		/*
+		/* 
 		 * MAP_PRIVATE device mappings don't make sense (and aren't
 		 * supported anyway).  However, some programs rely on this,
 		 * so just change it to MAP_SHARED.
@@ -445,7 +446,12 @@ sys_mmap(p, v, retval)
 			/* MAP_PRIVATE mappings can always write to */
 			maxprot |= VM_PROT_WRITE;
 		}
-		handle = vp;
+
+		/*
+		 * set handle to vnode
+		 */
+
+		handle = (caddr_t)vp;
 
 	} else {		/* MAP_ANON case */
 		/*
@@ -470,8 +476,7 @@ sys_mmap(p, v, retval)
 	if ((flags & MAP_ANON) != 0 ||
 	    ((flags & MAP_PRIVATE) != 0 && (prot & PROT_WRITE) != 0)) {
 		if (size >
-		    (p->p_rlimit[RLIMIT_DATA].rlim_cur -
-		     ctob(p->p_vmspace->vm_dsize))) {
+		    (p->p_rlimit[RLIMIT_DATA].rlim_cur - ctob(p->p_vmspace->vm_dsize))) {
 			return (ENOMEM);
 		}
 	}
@@ -507,8 +512,8 @@ sys_msync(p, v, retval)
 	} */ *uap = v;
 	vaddr_t addr;
 	vsize_t size, pageoff;
-	struct vm_map *map;
-	int error, rv, flags, uvmflags;
+	vm_map_t map;
+	int rv, flags, uvmflags;
 
 	/*
 	 * extract syscall args from the uap
@@ -527,13 +532,13 @@ sys_msync(p, v, retval)
 	  flags |= MS_SYNC;
 
 	/*
-	 * align the address to a page boundary and adjust the size accordingly.
+	 * align the address to a page boundary, and adjust the size accordingly
 	 */
 
 	pageoff = (addr & PAGE_MASK);
 	addr -= pageoff;
 	size += pageoff;
-	size = (vsize_t)round_page(size);
+	size = (vsize_t) round_page(size);
 
 	/* disallow wrap-around. */
 	if (addr + size < addr)
@@ -555,10 +560,9 @@ sys_msync(p, v, retval)
 	 * This can be incorrect if the region splits or is coalesced
 	 * with a neighbor.
 	 */
-
 	if (size == 0) {
-		struct vm_map_entry *entry;
-
+		vm_map_entry_t entry;
+		
 		vm_map_lock_read(map);
 		rv = uvm_map_lookup_entry(map, addr, &entry);
 		if (rv == TRUE) {
@@ -573,7 +577,6 @@ sys_msync(p, v, retval)
 	/*
 	 * translate MS_ flags into PGO_ flags
 	 */
-
 	uvmflags = PGO_CLEANIT;
 	if (flags & MS_INVALIDATE)
 		uvmflags |= PGO_FREE;
@@ -582,8 +585,15 @@ sys_msync(p, v, retval)
 	else
 		uvmflags |= PGO_SYNCIO;	 /* XXXCDC: force sync for now! */
 
-	error = uvm_map_clean(map, addr, addr+size, uvmflags);
-	return error;
+	/*
+	 * doit!
+	 */
+	rv = uvm_map_clean(map, addr, addr+size, uvmflags);
+
+	/*
+	 * and return... 
+	 */
+	return (rv);
 }
 
 /*
@@ -602,25 +612,25 @@ sys_munmap(p, v, retval)
 	} */ *uap = v;
 	vaddr_t addr;
 	vsize_t size, pageoff;
-	struct vm_map *map;
+	vm_map_t map;
 	vaddr_t vm_min_address = VM_MIN_ADDRESS;
 	struct vm_map_entry *dead_entries;
 
 	/*
-	 * get syscall args.
+	 * get syscall args...
 	 */
 
-	addr = (vaddr_t)SCARG(uap, addr);
-	size = (vsize_t)SCARG(uap, len);
-
+	addr = (vaddr_t) SCARG(uap, addr);
+	size = (vsize_t) SCARG(uap, len);
+	
 	/*
-	 * align the address to a page boundary and adjust the size accordingly.
+	 * align the address to a page boundary, and adjust the size accordingly
 	 */
 
 	pageoff = (addr & PAGE_MASK);
 	addr -= pageoff;
 	size += pageoff;
-	size = (vsize_t)round_page(size);
+	size = (vsize_t) round_page(size);
 
 	if ((int)size < 0)
 		return (EINVAL);
@@ -639,20 +649,29 @@ sys_munmap(p, v, retval)
 		return (EINVAL);
 	map = &p->p_vmspace->vm_map;
 
+
+	vm_map_lock(map);	/* lock map so we can checkprot */
+
 	/*
-	 * interesting system call semantic: make sure entire range is
+	 * interesting system call semantic: make sure entire range is 
 	 * allocated before allowing an unmap.
 	 */
 
-	vm_map_lock(map);
 	if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
 		vm_map_unlock(map);
 		return (EINVAL);
 	}
-	uvm_unmap_remove(map, addr, addr + size, &dead_entries);
-	vm_map_unlock(map);
+
+	/*
+	 * doit!
+	 */
+	(void) uvm_unmap_remove(map, addr, addr + size, &dead_entries);
+
+	vm_map_unlock(map);	/* and unlock */
+
 	if (dead_entries != NULL)
 		uvm_unmap_detach(dead_entries, 0);
+
 	return (0);
 }
 
@@ -674,7 +693,7 @@ sys_mprotect(p, v, retval)
 	vaddr_t addr;
 	vsize_t size, pageoff;
 	vm_prot_t prot;
-	int error;
+	int rv;
 
 	/*
 	 * extract syscall args from uap
@@ -685,19 +704,27 @@ sys_mprotect(p, v, retval)
 	prot = SCARG(uap, prot) & VM_PROT_ALL;
 
 	/*
-	 * align the address to a page boundary and adjust the size accordingly.
+	 * align the address to a page boundary, and adjust the size accordingly
 	 */
-
 	pageoff = (addr & PAGE_MASK);
 	addr -= pageoff;
 	size += pageoff;
-	size = (vsize_t)round_page(size);
-
+	size = (vsize_t) round_page(size);
 	if ((int)size < 0)
 		return (EINVAL);
-	error = uvm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot,
-				FALSE);
-	return error;
+
+	/*
+	 * doit
+	 */
+
+	rv = uvm_map_protect(&p->p_vmspace->vm_map, 
+			   addr, addr+size, prot, FALSE);
+
+	if (rv == KERN_SUCCESS)
+		return (0);
+	if (rv == KERN_PROTECTION_FAILURE)
+		return (EACCES);
+	return (EINVAL);
 }
 
 /*
@@ -718,26 +745,30 @@ sys_minherit(p, v, retval)
 	vaddr_t addr;
 	vsize_t size, pageoff;
 	vm_inherit_t inherit;
-	int error;
-
+	
 	addr = (vaddr_t)SCARG(uap, addr);
 	size = (vsize_t)SCARG(uap, len);
 	inherit = SCARG(uap, inherit);
-
 	/*
-	 * align the address to a page boundary and adjust the size accordingly.
+	 * align the address to a page boundary, and adjust the size accordingly
 	 */
 
 	pageoff = (addr & PAGE_MASK);
 	addr -= pageoff;
 	size += pageoff;
-	size = (vsize_t)round_page(size);
+	size = (vsize_t) round_page(size);
 
 	if ((int)size < 0)
 		return (EINVAL);
-	error = uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr + size,
-				inherit);
-	return error;
+	
+	switch (uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
+			 inherit)) {
+	case KERN_SUCCESS:
+		return (0);
+	case KERN_PROTECTION_FAILURE:
+		return (EACCES);
+	}
+	return (EINVAL);
 }
 
 /*
@@ -758,8 +789,8 @@ sys_madvise(p, v, retval)
 	} */ *uap = v;
 	vaddr_t addr;
 	vsize_t size, pageoff;
-	int advice, error;
-
+	int advice, rv;;
+	
 	addr = (vaddr_t)SCARG(uap, addr);
 	size = (vsize_t)SCARG(uap, len);
 	advice = SCARG(uap, behav);
@@ -767,11 +798,10 @@ sys_madvise(p, v, retval)
 	/*
 	 * align the address to a page boundary, and adjust the size accordingly
 	 */
-
 	pageoff = (addr & PAGE_MASK);
 	addr -= pageoff;
 	size += pageoff;
-	size = (vsize_t)round_page(size);
+	size = (vsize_t) round_page(size);
 
 	if ((ssize_t)size <= 0)
 		return (EINVAL);
@@ -780,12 +810,11 @@ sys_madvise(p, v, retval)
 	case MADV_NORMAL:
 	case MADV_RANDOM:
 	case MADV_SEQUENTIAL:
-		error = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size,
+		rv = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size,
 		    advice);
 		break;
 
 	case MADV_WILLNEED:
-
 		/*
 		 * Activate all these pages, pre-faulting them in if
 		 * necessary.
@@ -795,35 +824,29 @@ sys_madvise(p, v, retval)
 		 * Should invent a "weak" mode for uvm_fault()
 		 * which would only do the PGO_LOCKED pgo_get().
 		 */
-
 		return (0);
 
 	case MADV_DONTNEED:
-
 		/*
 		 * Deactivate all these pages.  We don't need them
 		 * any more.  We don't, however, toss the data in
 		 * the pages.
 		 */
-
-		error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
+		rv = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
 		    PGO_DEACTIVATE);
 		break;
 
 	case MADV_FREE:
-
 		/*
 		 * These pages contain no valid data, and may be
 		 * garbage-collected.  Toss all resources, including
 		 * any swap space in use.
 		 */
-
-		error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
+		rv = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
 		    PGO_FREE);
 		break;
 
 	case MADV_SPACEAVAIL:
-
 		/*
 		 * XXXMRG What is this?  I think it's:
 		 *
@@ -834,14 +857,13 @@ sys_madvise(p, v, retval)
 		 * as it will free swap space allocated to pages in core.
 		 * There's also what to do for device/file/anonymous memory.
 		 */
-
 		return (EINVAL);
 
 	default:
 		return (EINVAL);
 	}
 
-	return error;
+	return (rv);
 }
 
 /*
@@ -865,21 +887,19 @@ sys_mlock(p, v, retval)
 	/*
 	 * extract syscall args from uap
 	 */
-
 	addr = (vaddr_t)SCARG(uap, addr);
 	size = (vsize_t)SCARG(uap, len);
 
 	/*
 	 * align the address to a page boundary and adjust the size accordingly
 	 */
-
 	pageoff = (addr & PAGE_MASK);
 	addr -= pageoff;
 	size += pageoff;
-	size = (vsize_t)round_page(size);
-
+	size = (vsize_t) round_page(size);
+	
 	/* disallow wrap-around. */
-	if (addr + size < addr)
+	if (addr + (int)size < addr)
 		return (EINVAL);
 
 	if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
@@ -896,7 +916,7 @@ sys_mlock(p, v, retval)
 
 	error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE,
 	    0);
-	return error;
+	return (error == KERN_SUCCESS ? 0 : ENOMEM);
 }
 
 /*
@@ -927,14 +947,13 @@ sys_munlock(p, v, retval)
 	/*
 	 * align the address to a page boundary, and adjust the size accordingly
 	 */
-
 	pageoff = (addr & PAGE_MASK);
 	addr -= pageoff;
 	size += pageoff;
-	size = (vsize_t)round_page(size);
+	size = (vsize_t) round_page(size);
 
 	/* disallow wrap-around. */
-	if (addr + size < addr)
+	if (addr + (int)size < addr)
 		return (EINVAL);
 
 #ifndef pmap_wired_count
@@ -944,7 +963,7 @@ sys_munlock(p, v, retval)
 
 	error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE,
 	    0);
-	return error;
+	return (error == KERN_SUCCESS ? 0 : ENOMEM);
 }
 
 /*
@@ -975,6 +994,23 @@ sys_mlockall(p, v, retval)
 
 	error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags,
 	    p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
+	switch (error) {
+	case KERN_SUCCESS:
+		error = 0;
+		break;
+
+	case KERN_NO_SPACE:	/* XXX overloaded */
+		error = ENOMEM;
+		break;
+
+	default:
+		/*
+		 * "Some or all of the memory could not be locked when
+		 * the call was made."
+		 */
+		error = EAGAIN;
+	}
+
 	return (error);
 }
 
@@ -1004,18 +1040,18 @@ sys_munlockall(p, v, retval)
 
 int
 uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff, locklimit)
-	struct vm_map *map;
+	vm_map_t map;
 	vaddr_t *addr;
 	vsize_t size;
 	vm_prot_t prot, maxprot;
 	int flags;
-	void *handle;
+	caddr_t handle;		/* XXX: VNODE? */
 	voff_t foff;
 	vsize_t locklimit;
 {
 	struct uvm_object *uobj;
 	struct vnode *vp;
-	int error;
+	int retval;
 	int advice = UVM_ADV_NORMAL;
 	uvm_flag_t uvmflag = 0;
 
@@ -1038,6 +1074,7 @@ uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff, locklimit)
 	if ((flags & MAP_FIXED) == 0) {
 		*addr = round_page(*addr);	/* round */
 	} else {
+		
 		if (*addr & PAGE_MASK)
 			return(EINVAL);
 		uvmflag |= UVM_FLAG_FIXED;
@@ -1060,18 +1097,46 @@ uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff, locklimit)
 			uvmflag |= UVM_FLAG_OVERLAY;
 
 	} else {
-		vp = (struct vnode *)handle;
-		if (vp->v_type != VCHR) {
-			error = VOP_MMAP(vp, 0, curproc->p_ucred, curproc);
-			if (error) {
-				return error;
-			}
 
-			uobj = uvn_attach((void *)vp, (flags & MAP_SHARED) ?
+		vp = (struct vnode *) handle;	/* get vnode */
+		if (vp->v_type != VCHR) {
+			uobj = uvn_attach((void *) vp, (flags & MAP_SHARED) ?
 			   maxprot : (maxprot & ~VM_PROT_WRITE));
 
+#ifndef UBC
+			/*
+			 * XXXCDC: hack from old code
+			 * don't allow vnodes which have been mapped
+			 * shared-writeable to persist [forces them to be
+			 * flushed out when last reference goes].
+			 * XXXCDC: interesting side effect: avoids a bug.
+			 * note that in WRITE [ufs_readwrite.c] that we
+			 * allocate buffer, uncache, and then do the write.
+			 * the problem with this is that if the uncache causes
+			 * VM data to be flushed to the same area of the file
+			 * we are writing to... in that case we've got the
+			 * buffer locked and our process goes to sleep forever.
+			 *
+			 * XXXCDC: checking maxprot protects us from the
+			 * "persistbug" program but this is not a long term
+			 * solution.
+			 * 
+			 * XXXCDC: we don't bother calling uncache with the vp
+			 * VOP_LOCKed since we know that we are already
+			 * holding a valid reference to the uvn (from the
+			 * uvn_attach above), and thus it is impossible for
+			 * the uncache to kill the uvn and trigger I/O.
+			 */
+			if (flags & MAP_SHARED) {
+				if ((prot & VM_PROT_WRITE) ||
+				    (maxprot & VM_PROT_WRITE)) {
+					uvm_vnp_uncache(vp);
+				}
+			}
+#else
 			/* XXX for now, attach doesn't gain a ref */
 			VREF(vp);
+#endif
 		} else {
 			uobj = udv_attach((void *) &vp->v_rdev,
 			    (flags & MAP_SHARED) ? maxprot :
@@ -1083,67 +1148,88 @@ uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff, locklimit)
 			 */
 			if (uobj == NULL && (prot & PROT_EXEC) == 0) {
 				maxprot &= ~VM_PROT_EXECUTE;
-				uobj = udv_attach((void *)&vp->v_rdev,
+				uobj = udv_attach((void *) &vp->v_rdev,
 				    (flags & MAP_SHARED) ? maxprot :
 				    (maxprot & ~VM_PROT_WRITE), foff, size);
 			}
 			advice = UVM_ADV_RANDOM;
 		}
+		
 		if (uobj == NULL)
 			return((vp->v_type == VREG) ? ENOMEM : EINVAL);
+
 		if ((flags & MAP_SHARED) == 0)
 			uvmflag |= UVM_FLAG_COPYONW;
 	}
 
-	uvmflag = UVM_MAPFLAG(prot, maxprot,
+	/*
+	 * set up mapping flags
+	 */
+
+	uvmflag = UVM_MAPFLAG(prot, maxprot, 
 			(flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY,
 			advice, uvmflag);
-	error = uvm_map(map, addr, size, uobj, foff, 0, uvmflag);
-	if (error) {
-		if (uobj)
-			uobj->pgops->pgo_detach(uobj);
-		return error;
-	}
 
 	/*
-	 * POSIX 1003.1b -- if our address space was configured
-	 * to lock all future mappings, wire the one we just made.
+	 * do it!
 	 */
 
-	if (prot == VM_PROT_NONE) {
+	retval = uvm_map(map, addr, size, uobj, foff, 0, uvmflag);
 
+	if (retval == KERN_SUCCESS) {
 		/*
-		 * No more work to do in this case.
+		 * POSIX 1003.1b -- if our address space was configured
+		 * to lock all future mappings, wire the one we just made.
 		 */
+		if (prot == VM_PROT_NONE) {
+			/*
+			 * No more work to do in this case.
+			 */
+			return (0);
+		}
+		
+		vm_map_lock(map);
 
-		return (0);
-	}
-	vm_map_lock(map);
-	if (map->flags & VM_MAP_WIREFUTURE) {
-		if ((atop(size) + uvmexp.wired) > uvmexp.wiredmax
+		if (map->flags & VM_MAP_WIREFUTURE) {
+			if ((atop(size) + uvmexp.wired) > uvmexp.wiredmax
 #ifdef pmap_wired_count
-		    || (locklimit != 0 && (size +
-		    ptoa(pmap_wired_count(vm_map_pmap(map)))) >
-			locklimit)
+			    || (locklimit != 0 && (size +
+			         ptoa(pmap_wired_count(vm_map_pmap(map)))) >
+			        locklimit)
 #endif
-		) {
-			vm_map_unlock(map);
-			uvm_unmap(map, *addr, *addr + size);
-			return ENOMEM;
+			) {
+				retval = KERN_RESOURCE_SHORTAGE;
+				vm_map_unlock(map);
+				/* unmap the region! */
+				(void) uvm_unmap(map, *addr, *addr + size);
+				goto bad;
+			}
+			/*
+			 * uvm_map_pageable() always returns the map
+			 * unlocked.
+			 */
+			retval = uvm_map_pageable(map, *addr, *addr + size,
+			    FALSE, UVM_LK_ENTER);
+			if (retval != KERN_SUCCESS) {
+				/* unmap the region! */
+				(void) uvm_unmap(map, *addr, *addr + size);
+				goto bad;
+			}
+			return (0);
 		}
 
-		/*
-		 * uvm_map_pageable() always returns the map unlocked.
-		 */
+		vm_map_unlock(map);
 
-		error = uvm_map_pageable(map, *addr, *addr + size,
-					 FALSE, UVM_LK_ENTER);
-		if (error) {
-			uvm_unmap(map, *addr, *addr + size);
-			return error;
-		}
 		return (0);
 	}
-	vm_map_unlock(map);
-	return 0;
+
+	/*
+	 * errors: first detach from the uobj, if any.
+	 */
+	
+	if (uobj)
+		uobj->pgops->pgo_detach(uobj);
+
+ bad:
+	return (retval);
 }
diff --git a/sys/uvm/uvm_object.h b/sys/uvm/uvm_object.h
index 239152fb5fe..b1b1daa9490 100644
--- a/sys/uvm/uvm_object.h
+++ b/sys/uvm/uvm_object.h
@@ -1,5 +1,5 @@
-/*	$OpenBSD: uvm_object.h,v 1.7 2001/11/28 19:28:15 art Exp $	*/
-/*	$NetBSD: uvm_object.h,v 1.12 2001/05/26 16:32:47 chs Exp $	*/
+/*	$OpenBSD: uvm_object.h,v 1.8 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_object.h,v 1.11 2001/03/09 01:02:12 chs Exp $	*/
 
 /*
  *
@@ -47,7 +47,7 @@
  */
 
 struct uvm_object {
-	struct simplelock	vmobjlock;	/* lock on memq */
+	simple_lock_data_t	vmobjlock;	/* lock on memq */
 	struct uvm_pagerops	*pgops;		/* pager ops */
 	struct pglist		memq;		/* pages in this object */
 	int			uo_npages;	/* # of pages in memq */
diff --git a/sys/uvm/uvm_page.c b/sys/uvm/uvm_page.c
index ed2a8c6f601..edfb5b1ca31 100644
--- a/sys/uvm/uvm_page.c
+++ b/sys/uvm/uvm_page.c
@@ -1,9 +1,9 @@
-/*	$OpenBSD: uvm_page.c,v 1.38 2001/12/06 12:43:20 art Exp $	*/
-/*	$NetBSD: uvm_page.c,v 1.66 2001/09/10 21:19:43 chris Exp $	*/
+/*	$OpenBSD: uvm_page.c,v 1.39 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_page.c,v 1.51 2001/03/09 01:02:12 chs Exp $	*/
 
-/*
+/* 
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
- * Copyright (c) 1991, 1993, The Regents of the University of California.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.  
  *
  * All rights reserved.
  *
@@ -21,7 +21,7 @@
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Charles D. Cranor,
- *      Washington University, the University of California, Berkeley and
+ *      Washington University, the University of California, Berkeley and 
  *      its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
@@ -45,17 +45,17 @@
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
- *
+ * 
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
+ * 
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
@@ -126,16 +126,6 @@ static vaddr_t      virtual_space_end;
 static struct pglist uvm_bootbucket;
 
 /*
- * we allocate an initial number of page colors in uvm_page_init(),
- * and remember them.  We may re-color pages as cache sizes are
- * discovered during the autoconfiguration phase.  But we can never
- * free the initial set of buckets, since they are allocated using
- * uvm_pageboot_alloc().
- */
-
-static boolean_t have_recolored_pages /* = FALSE */;
-
-/*
  * local prototypes
  */
 
@@ -197,14 +187,10 @@ uvm_pageremove(pg)
 	simple_unlock(&uvm.hashlock);
 	splx(s);
 
-	if (UVM_OBJ_IS_VTEXT(pg->uobject) || UVM_OBJ_IS_VNODE(pg->uobject)) {
-		if (UVM_OBJ_IS_VNODE(pg->uobject))
-			uvmexp.vnodepages--;
-		else
-			uvmexp.vtextpages--;
-		s = splbio();
-		vholdrele((struct vnode *)pg->uobject);
-		splx(s);
+	if (UVM_OBJ_IS_VTEXT(pg->uobject)) {
+		uvmexp.vtextpages--;
+	} else if (UVM_OBJ_IS_VNODE(pg->uobject)) {
+		uvmexp.vnodepages--;
 	}
 
 	/* object should be locked */
@@ -216,22 +202,9 @@ uvm_pageremove(pg)
 	pg->version++;
 }
 
-static void
-uvm_page_init_buckets(struct pgfreelist *pgfl)
-{
-	int color, i;
-
-	for (color = 0; color < uvmexp.ncolors; color++) {
-		for (i = 0; i < PGFL_NQUEUES; i++) {
-			TAILQ_INIT(&pgfl->pgfl_buckets[
-			    color].pgfl_queues[i]);
-		}
-	}
-}
-
 /*
  * uvm_page_init: init the page system.   called from uvm_init().
- *
+ * 
  * => we return the range of kernel virtual memory in kvm_startp/kvm_endp
  */
 
@@ -239,20 +212,22 @@ void
 uvm_page_init(kvm_startp, kvm_endp)
 	vaddr_t *kvm_startp, *kvm_endp;
 {
-	vsize_t freepages, pagecount, bucketcount, n;
-	struct pgflbucket *bucketarray;
-	struct vm_page *pagearray;
-	int lcv, i;
+	vsize_t freepages, pagecount, n;
+	vm_page_t pagearray;
+	int lcv, i;  
 	paddr_t paddr;
 
 	/*
-	 * init the page queues and page queue locks, except the free
-	 * list; we allocate that later (with the initial vm_page
-	 * structures).
+	 * init the page queues and page queue locks
 	 */
 
+	for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
+		for (i = 0; i < PGFL_NQUEUES; i++)
+			TAILQ_INIT(&uvm.page_free[lcv].pgfl_queues[i]);
+	}
 	TAILQ_INIT(&uvm.page_active);
-	TAILQ_INIT(&uvm.page_inactive);
+	TAILQ_INIT(&uvm.page_inactive_swp);
+	TAILQ_INIT(&uvm.page_inactive_obj);
 	simple_lock_init(&uvm.pageqlock);
 	simple_lock_init(&uvm.fpageqlock);
 
@@ -268,7 +243,7 @@ uvm_page_init(kvm_startp, kvm_endp)
 	TAILQ_INIT(uvm.page_hash);		/* init hash table */
 	simple_lock_init(&uvm.hashlock);	/* init hash table lock */
 
-	/*
+	/* 
 	 * allocate vm_page structures.
 	 */
 
@@ -281,28 +256,20 @@ uvm_page_init(kvm_startp, kvm_endp)
 
 	if (vm_nphysseg == 0)
 		panic("uvm_page_bootstrap: no memory pre-allocated");
-
+	
 	/*
-	 * first calculate the number of free pages...
+	 * first calculate the number of free pages...  
 	 *
 	 * note that we use start/end rather than avail_start/avail_end.
 	 * this allows us to allocate extra vm_page structures in case we
 	 * want to return some memory to the pool after booting.
 	 */
-
+	 
 	freepages = 0;
 	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
 		freepages += (vm_physmem[lcv].end - vm_physmem[lcv].start);
 
 	/*
-	 * Let MD code initialize the number of colors, or default
-	 * to 1 color if MD code doesn't care.
-	 */
-	if (uvmexp.ncolors == 0)
-		uvmexp.ncolors = 1;
-	uvmexp.colormask = uvmexp.ncolors - 1;
-
-	/*
 	 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can
 	 * use.   for each page of memory we use we need a vm_page structure.
 	 * thus, the total number of pages we can use is the total size of
@@ -311,24 +278,13 @@ uvm_page_init(kvm_startp, kvm_endp)
 	 * truncation errors (since we can only allocate in terms of whole
 	 * pages).
 	 */
-
-	bucketcount = uvmexp.ncolors * VM_NFREELIST;
+	 
 	pagecount = ((freepages + 1) << PAGE_SHIFT) /
 	    (PAGE_SIZE + sizeof(struct vm_page));
-
-	bucketarray = (void *) uvm_pageboot_alloc((bucketcount *
-	    sizeof(struct pgflbucket)) + (pagecount *
-	    sizeof(struct vm_page)));
-	pagearray = (struct vm_page *)(bucketarray + bucketcount);
-
-	for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
-		uvm.page_free[lcv].pgfl_buckets =
-		    (bucketarray + (lcv * uvmexp.ncolors));
-		uvm_page_init_buckets(&uvm.page_free[lcv]);
-	}
-
+	pagearray = (vm_page_t)uvm_pageboot_alloc(pagecount *
+	    sizeof(struct vm_page));
 	memset(pagearray, 0, pagecount * sizeof(struct vm_page));
-
+					 
 	/*
 	 * init the vm_page structures and put them in the correct place.
 	 */
@@ -352,9 +308,6 @@ uvm_page_init(kvm_startp, kvm_endp)
 		paddr = ptoa(vm_physmem[lcv].start);
 		for (i = 0 ; i < n ; i++, paddr += PAGE_SIZE) {
 			vm_physmem[lcv].pgs[i].phys_addr = paddr;
-#ifdef __HAVE_VM_PAGE_MD
-			VM_MDPAGE_INIT(&vm_physmem[lcv].pgs[i]);
-#endif
 			if (atop(paddr) >= vm_physmem[lcv].avail_start &&
 			    atop(paddr) <= vm_physmem[lcv].avail_end) {
 				uvmexp.npages++;
@@ -408,9 +361,9 @@ uvm_page_init(kvm_startp, kvm_endp)
 
 /*
  * uvm_setpagesize: set the page size
- *
+ * 
  * => sets page_shift and page_mask from uvmexp.pagesize.
- */
+ */   
 
 void
 uvm_setpagesize()
@@ -433,12 +386,28 @@ vaddr_t
 uvm_pageboot_alloc(size)
 	vsize_t size;
 {
-	static boolean_t initialized = FALSE;
+#if defined(PMAP_STEAL_MEMORY)
 	vaddr_t addr;
-#if !defined(PMAP_STEAL_MEMORY)
-	vaddr_t vaddr;
+
+	/* 
+	 * defer bootstrap allocation to MD code (it may want to allocate 
+	 * from a direct-mapped segment).  pmap_steal_memory should round
+	 * off virtual_space_start/virtual_space_end.
+	 */
+
+	addr = pmap_steal_memory(size, &virtual_space_start,
+	    &virtual_space_end);
+
+	return(addr);
+
+#else /* !PMAP_STEAL_MEMORY */
+
+	static boolean_t initialized = FALSE;
+	vaddr_t addr, vaddr;
 	paddr_t paddr;
-#endif
+
+	/* round to page size */
+	size = round_page(size);
 
 	/*
 	 * on first call to this function, initialize ourselves.
@@ -453,24 +422,6 @@ uvm_pageboot_alloc(size)
 		initialized = TRUE;
 	}
 
-	/* round to page size */
-	size = round_page(size);
-
-#if defined(PMAP_STEAL_MEMORY)
-
-	/*
-	 * defer bootstrap allocation to MD code (it may want to allocate
-	 * from a direct-mapped segment).  pmap_steal_memory should adjust
-	 * virtual_space_start/virtual_space_end if necessary.
-	 */
-
-	addr = pmap_steal_memory(size, &virtual_space_start,
-	    &virtual_space_end);
-
-	return(addr);
-
-#else /* !PMAP_STEAL_MEMORY */
-
 	/*
 	 * allocate virtual memory for this request
 	 */
@@ -510,7 +461,6 @@ uvm_pageboot_alloc(size)
 		 */
 		pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE);
 	}
-	pmap_update(pmap_kernel());
 	return(addr);
 #endif	/* PMAP_STEAL_MEMORY */
 }
@@ -881,76 +831,6 @@ uvm_page_rehash()
 	return;
 }
 
-/*
- * uvm_page_recolor: Recolor the pages if the new bucket count is
- * larger than the old one.
- */
-
-void
-uvm_page_recolor(int newncolors)
-{
-	struct pgflbucket *bucketarray, *oldbucketarray;
-	struct pgfreelist pgfl;
-	struct vm_page *pg;
-	vsize_t bucketcount;
-	int s, lcv, color, i, ocolors;
-
-	if (newncolors <= uvmexp.ncolors)
-		return;
-
-	bucketcount = newncolors * VM_NFREELIST;
-	bucketarray = malloc(bucketcount * sizeof(struct pgflbucket),
-	    M_VMPAGE, M_NOWAIT);
-	if (bucketarray == NULL) {
-		printf("WARNING: unable to allocate %ld page color buckets\n",
-		    (long) bucketcount);
-		return;
-	}
-
-	s = uvm_lock_fpageq();
-
-	/* Make sure we should still do this. */
-	if (newncolors <= uvmexp.ncolors) {
-		uvm_unlock_fpageq(s);
-		free(bucketarray, M_VMPAGE);
-		return;
-	}
-
-	oldbucketarray = uvm.page_free[0].pgfl_buckets;
-	ocolors = uvmexp.ncolors;
-
-	uvmexp.ncolors = newncolors;
-	uvmexp.colormask = uvmexp.ncolors - 1;
-
-	for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
-		pgfl.pgfl_buckets = (bucketarray + (lcv * newncolors));
-		uvm_page_init_buckets(&pgfl);
-		for (color = 0; color < ocolors; color++) {
-			for (i = 0; i < PGFL_NQUEUES; i++) {
-				while ((pg = TAILQ_FIRST(&uvm.page_free[
-				    lcv].pgfl_buckets[color].pgfl_queues[i]))
-				    != NULL) {
-					TAILQ_REMOVE(&uvm.page_free[
-					    lcv].pgfl_buckets[
-					    color].pgfl_queues[i], pg, pageq);
-					TAILQ_INSERT_TAIL(&pgfl.pgfl_buckets[
-					    VM_PGCOLOR_BUCKET(pg)].pgfl_queues[
-					    i], pg, pageq);
-				}
-			}
-		}
-		uvm.page_free[lcv].pgfl_buckets = pgfl.pgfl_buckets;
-	}
-
-	if (have_recolored_pages) {
-		uvm_unlock_fpageq(s);
-		free(oldbucketarray, M_VMPAGE);
-		return;
-	}
-
-	have_recolored_pages = TRUE;
-	uvm_unlock_fpageq(s);
-}
 
 #if 1 /* XXXCDC: TMP TMP TMP DEBUG DEBUG DEBUG */
 
@@ -982,49 +862,6 @@ uvm_page_physdump()
 #endif
 
 /*
- * uvm_pagealloc_pgfl: helper routine for uvm_pagealloc_strat
- */
-
-static __inline struct vm_page *
-uvm_pagealloc_pgfl(struct pgfreelist *pgfl, int try1, int try2,
-    unsigned int *trycolorp)
-{
-	struct pglist *freeq;
-	struct vm_page *pg;
-	int color, trycolor = *trycolorp;
-
-	color = trycolor;
-	do {
-		if ((pg = TAILQ_FIRST((freeq =
-		    &pgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL)
-			goto gotit;
-		if ((pg = TAILQ_FIRST((freeq =
-		    &pgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL)
-			goto gotit;
-		color = (color + 1) & uvmexp.colormask;
-	} while (color != trycolor);
-
-	return (NULL);
-
- gotit:
-	TAILQ_REMOVE(freeq, pg, pageq);
-	uvmexp.free--;
-
-	/* update zero'd page count */
-	if (pg->flags & PG_ZERO)
-		uvmexp.zeropages--;
-
-	if (color == trycolor)
-		uvmexp.colorhit++;
-	else {
-		uvmexp.colormiss++;
-		*trycolorp = color;
-	}
-
-	return (pg);
-}
-
-/*
  * uvm_pagealloc_strat: allocate vm_page from a particular free list.
  *
  * => return null if no pages free
@@ -1050,8 +887,10 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list)
 	struct vm_anon *anon;
 	int strat, free_list;
 {
-	int lcv, try1, try2, s, zeroit = 0, color;
+	int lcv, try1, try2, s, zeroit = 0;
 	struct vm_page *pg;
+	struct pglist *freeq;
+	struct pgfreelist *pgfl;
 	boolean_t use_reserve;
 
 	KASSERT(obj == NULL || anon == NULL);
@@ -1063,20 +902,21 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list)
 	s = uvm_lock_fpageq();
 
 	/*
-	 * This implements a global round-robin page coloring
-	 * algorithm.
-	 *
-	 * XXXJRT: Should we make the `nextcolor' per-cpu?
-	 * XXXJRT: What about virtually-indexed caches?
-	 */
-	color = uvm.page_free_nextcolor;
-
-	/*
 	 * check to see if we need to generate some free pages waking
 	 * the pagedaemon.
 	 */
 
-	UVM_KICK_PDAEMON();
+#ifdef UBC
+	if (uvmexp.free + uvmexp.paging < uvmexp.freemin ||
+	    (uvmexp.free + uvmexp.paging < uvmexp.freetarg &&
+	     uvmexp.inactive < uvmexp.inactarg)) {
+		wakeup(&uvm.pagedaemon);
+	}
+#else
+	if (uvmexp.free < uvmexp.freemin || (uvmexp.free < uvmexp.freetarg &&
+	    uvmexp.inactive < uvmexp.inactarg))
+		wakeup(&uvm.pagedaemon);
+#endif
 
 	/*
 	 * fail if any of these conditions is true:
@@ -1116,9 +956,11 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list)
 	case UVM_PGA_STRAT_NORMAL:
 		/* Check all freelists in descending priority order. */
 		for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
-			pg = uvm_pagealloc_pgfl(&uvm.page_free[lcv],
-			    try1, try2, &color);
-			if (pg != NULL)
+			pgfl = &uvm.page_free[lcv];
+			if ((pg = TAILQ_FIRST((freeq =
+			      &pgfl->pgfl_queues[try1]))) != NULL ||
+			    (pg = TAILQ_FIRST((freeq =
+			      &pgfl->pgfl_queues[try2]))) != NULL)
 				goto gotit;
 		}
 
@@ -1129,9 +971,11 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list)
 	case UVM_PGA_STRAT_FALLBACK:
 		/* Attempt to allocate from the specified free list. */
 		KASSERT(free_list >= 0 && free_list < VM_NFREELIST);
-		pg = uvm_pagealloc_pgfl(&uvm.page_free[free_list],
-		    try1, try2, &color);
-		if (pg != NULL)
+		pgfl = &uvm.page_free[free_list];
+		if ((pg = TAILQ_FIRST((freeq =
+		      &pgfl->pgfl_queues[try1]))) != NULL ||
+		    (pg = TAILQ_FIRST((freeq =
+		      &pgfl->pgfl_queues[try2]))) != NULL)
 			goto gotit;
 
 		/* Fall back, if possible. */
@@ -1149,11 +993,12 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list)
 	}
 
  gotit:
-	/*
-	 * We now know which color we actually allocated from; set
-	 * the next color accordingly.
-	 */
-	uvm.page_free_nextcolor = (color + 1) & uvmexp.colormask;
+	TAILQ_REMOVE(freeq, pg, pageq);
+	uvmexp.free--;
+
+	/* update zero'd page count */
+	if (pg->flags & PG_ZERO)
+		uvmexp.zeropages--;
 
 	/*
 	 * update allocation statistics and remember if we have to
@@ -1275,24 +1120,24 @@ uvm_pagefree(pg)
 		 * if the object page is on loan we are going to drop ownership.
 		 * it is possible that an anon will take over as owner for this
 		 * page later on.   the anon will want a !PG_CLEAN page so that
-		 * it knows it needs to allocate swap if it wants to page the
-		 * page out.
+		 * it knows it needs to allocate swap if it wants to page the 
+		 * page out. 
 		 */
 
 		if (saved_loan_count)
 			pg->flags &= ~PG_CLEAN;	/* in case an anon takes over */
 		uvm_pageremove(pg);
-
+		
 		/*
 		 * if our page was on loan, then we just lost control over it
 		 * (in fact, if it was loaned to an anon, the anon may have
 		 * already taken over ownership of the page by now and thus
-		 * changed the loan_count [e.g. in uvmfault_anonget()]) we just
-		 * return (when the last loan is dropped, then the page can be
+		 * changed the loan_count [e.g. in uvmfault_anonget()]) we just 
+		 * return (when the last loan is dropped, then the page can be 
 		 * freed by whatever was holding the last loan).
 		 */
 
-		if (saved_loan_count)
+		if (saved_loan_count) 
 			return;
 	} else if (saved_loan_count && (pg->pqflags & PQ_ANON)) {
 
@@ -1318,8 +1163,12 @@ uvm_pagefree(pg)
 		TAILQ_REMOVE(&uvm.page_active, pg, pageq);
 		pg->pqflags &= ~PQ_ACTIVE;
 		uvmexp.active--;
-	} else if (pg->pqflags & PQ_INACTIVE) {
-		TAILQ_REMOVE(&uvm.page_inactive, pg, pageq);
+	}
+	if (pg->pqflags & PQ_INACTIVE) {
+		if (pg->pqflags & PQ_SWAPBACKED)
+			TAILQ_REMOVE(&uvm.page_inactive_swp, pg, pageq);
+		else
+			TAILQ_REMOVE(&uvm.page_inactive_obj, pg, pageq);
 		pg->pqflags &= ~PQ_INACTIVE;
 		uvmexp.inactive--;
 	}
@@ -1345,8 +1194,7 @@ uvm_pagefree(pg)
 
 	s = uvm_lock_fpageq();
 	TAILQ_INSERT_TAIL(&uvm.page_free[
-	    uvm_page_lookup_freelist(pg)].pgfl_buckets[
-	    VM_PGCOLOR_BUCKET(pg)].pgfl_queues[PGFL_UNKNOWN], pg, pageq);
+	    uvm_page_lookup_freelist(pg)].pgfl_queues[PGFL_UNKNOWN], pg, pageq);
 	pg->pqflags = PQ_FREE;
 #ifdef DEBUG
 	pg->uobject = (void *)0xdeadbeef;
@@ -1450,8 +1298,7 @@ uvm_page_own(pg, tag)
 /*
  * uvm_pageidlezero: zero free pages while the system is idle.
  *
- * => try to complete one color bucket at a time, to reduce our impact
- *	on the CPU cache.
+ * => we do at least one iteration per call, if we are below the target.
  * => we loop until we either reach the target or whichqs indicates that
  *	there is a process ready to run.
  */
@@ -1460,17 +1307,10 @@ uvm_pageidlezero()
 {
 	struct vm_page *pg;
 	struct pgfreelist *pgfl;
-	int free_list, s, firstbucket;
-	static int nextbucket;
+	int free_list, s;
 
-	s = uvm_lock_fpageq();
-
-	firstbucket = nextbucket;
 	do {
-		if (whichqs != 0) {
-			uvm_unlock_fpageq(s);
-			return;
-		}
+		s = uvm_lock_fpageq();
 
 		if (uvmexp.zeropages >= UVM_PAGEZERO_TARGET) {
 			uvm.page_idle_zero = FALSE;
@@ -1480,52 +1320,54 @@ uvm_pageidlezero()
 
 		for (free_list = 0; free_list < VM_NFREELIST; free_list++) {
 			pgfl = &uvm.page_free[free_list];
-			while ((pg = TAILQ_FIRST(&pgfl->pgfl_buckets[
-			    nextbucket].pgfl_queues[PGFL_UNKNOWN])) != NULL) {
-				if (whichqs != 0) {
-					uvm_unlock_fpageq(s);
-					return;
-				}
-
-				TAILQ_REMOVE(&pgfl->pgfl_buckets[
-				    nextbucket].pgfl_queues[PGFL_UNKNOWN],
-				    pg, pageq);
-				uvmexp.free--;
-				uvm_unlock_fpageq(s);
-#ifdef PMAP_PAGEIDLEZERO
-				if (PMAP_PAGEIDLEZERO(VM_PAGE_TO_PHYS(pg)) ==
-				    FALSE) {
-					/*
-					 * The machine-dependent code detected
-					 * some reason for us to abort zeroing
-					 * pages, probably because there is a
-					 * process now ready to run.
-					 */
-					s = uvm_lock_fpageq();
-					TAILQ_INSERT_HEAD(&pgfl->pgfl_buckets[
-					    nextbucket].pgfl_queues[
-					    PGFL_UNKNOWN], pg, pageq);
-					uvmexp.free++;
-					uvmexp.zeroaborts++;
-					uvm_unlock_fpageq(s);
-					return;
-				}
-#else
-				pmap_zero_page(VM_PAGE_TO_PHYS(pg));
-#endif /* PMAP_PAGEIDLEZERO */
-				pg->flags |= PG_ZERO;
-
-				s = uvm_lock_fpageq();
-				TAILQ_INSERT_HEAD(&pgfl->pgfl_buckets[
-				    nextbucket].pgfl_queues[PGFL_ZEROS],
-				    pg, pageq);
-				uvmexp.free++;
-				uvmexp.zeropages++;
-			}
+			if ((pg = TAILQ_FIRST(&pgfl->pgfl_queues[
+			    PGFL_UNKNOWN])) != NULL)
+				break;
 		}
 
-		nextbucket = (nextbucket + 1) & uvmexp.colormask;
-	} while (nextbucket != firstbucket);
+		if (pg == NULL) {
+			/*
+			 * No non-zero'd pages; don't bother trying again
+			 * until we know we have non-zero'd pages free.
+			 */
+			uvm.page_idle_zero = FALSE;
+			uvm_unlock_fpageq(s);
+			return;
+		}
 
-	uvm_unlock_fpageq(s);
+		TAILQ_REMOVE(&pgfl->pgfl_queues[PGFL_UNKNOWN], pg, pageq);
+		uvmexp.free--;
+		uvm_unlock_fpageq(s);
+
+#ifdef PMAP_PAGEIDLEZERO
+		if (PMAP_PAGEIDLEZERO(VM_PAGE_TO_PHYS(pg)) == FALSE) {
+			/*
+			 * The machine-dependent code detected some
+			 * reason for us to abort zeroing pages,
+			 * probably because there is a process now
+			 * ready to run.
+			 */
+			s = uvm_lock_fpageq();
+			TAILQ_INSERT_HEAD(&pgfl->pgfl_queues[PGFL_UNKNOWN],
+			    pg, pageq);
+			uvmexp.free++;
+			uvmexp.zeroaborts++;
+			uvm_unlock_fpageq(s);
+			return;
+		}
+#else
+		/*
+		 * XXX This will toast the cache unless the pmap_zero_page()
+		 * XXX implementation does uncached access.
+		 */
+		pmap_zero_page(VM_PAGE_TO_PHYS(pg));
+#endif
+		pg->flags |= PG_ZERO;
+
+		s = uvm_lock_fpageq();
+		TAILQ_INSERT_HEAD(&pgfl->pgfl_queues[PGFL_ZEROS], pg, pageq);
+		uvmexp.free++;
+		uvmexp.zeropages++;
+		uvm_unlock_fpageq(s);
+	} while (whichqs == 0);
 }
diff --git a/sys/uvm/uvm_page.h b/sys/uvm/uvm_page.h
index 45b26021f3e..d1f531cbff4 100644
--- a/sys/uvm/uvm_page.h
+++ b/sys/uvm/uvm_page.h
@@ -1,9 +1,9 @@
-/*	$OpenBSD: uvm_page.h,v 1.16 2001/12/04 23:22:42 art Exp $	*/
-/*	$NetBSD: uvm_page.h,v 1.30 2001/07/25 23:05:04 thorpej Exp $	*/
+/*	$OpenBSD: uvm_page.h,v 1.17 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_page.h,v 1.19 2000/12/28 08:24:55 chs Exp $	*/
 
-/*
+/* 
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
- * Copyright (c) 1991, 1993, The Regents of the University of California.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.  
  *
  * All rights reserved.
  *
@@ -21,7 +21,7 @@
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Charles D. Cranor,
- *      Washington University, the University of California, Berkeley and
+ *      Washington University, the University of California, Berkeley and 
  *      its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
@@ -45,17 +45,17 @@
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
- *
+ * 
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
+ * 
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
@@ -110,7 +110,7 @@
  * fields were dumped and all the flags were lumped into one short.
  * that is fine for a single threaded uniprocessor OS, but bad if you
  * want to actual make use of locking (simple_lock's).  so, we've
- * separated things back out again.
+ * seperated things back out again.
  *
  * note the page structure has no lock of its own.
  */
@@ -128,22 +128,14 @@ struct vm_page {
 	struct uvm_object	*uobject;	/* object (O,P) */
 	voff_t			offset;		/* offset into object (O,P) */
 
-	u_int			flags:      16,	/* object flags [O] */
-				version:    16;	/* version count [O] */
-
-	u_int			wire_count: 16,	/* wired down map refs [P] */
-				pqflags:    8,	/* page queue flags [P] */
-				       :    8;
-
+	u_short			flags;		/* object flags [O] */
+	u_short			version;	/* version count [O] */
+	u_short			wire_count;	/* wired down map refs [P] */
+	u_short			pqflags;	/* page queue flags [P] */
 	u_int			loan_count;	/* number of active loans
 						 * to read: [O or P]
 						 * to modify: [O _and_ P] */
 	paddr_t			phys_addr;	/* physical address of page */
-
-#ifdef __HAVE_VM_PAGE_MD
-	struct vm_page_md	mdpage;		/* pmap-specific data */
-#endif
-
 #if defined(UVM_PAGE_TRKOWN)
 	/* debugging fields to track page ownership */
 	pid_t			owner;		/* proc that set PG_BUSY */
@@ -153,12 +145,14 @@ struct vm_page {
 
 /*
  * These are the flags defined for vm_page.
+ *
+ * Note: PG_FILLED and PG_DIRTY are added for the filesystems.
  */
 
 /*
  * locking rules:
  *   PG_ ==> locked by object lock
- *   PQ_ ==> lock by page queue lock
+ *   PQ_ ==> lock by page queue lock 
  *   PQ_FREE is locked by free queue lock and is mutex with all other PQs
  *
  * PG_ZERO is used to indicate that a page has been pre-zero'd.  This flag
@@ -178,12 +172,12 @@ struct vm_page {
 
 #define PG_PAGER1	0x1000		/* pager-specific flag */
 
-#define PQ_FREE		0x01		/* page is on free list */
-#define PQ_INACTIVE	0x02		/* page is in inactive list */
-#define PQ_ACTIVE	0x04		/* page is in active list */
-#define PQ_ANON		0x10		/* page is part of an anon, rather
+#define PQ_FREE		0x0001		/* page is on free list */
+#define PQ_INACTIVE	0x0002		/* page is in inactive list */
+#define PQ_ACTIVE	0x0004		/* page is in active list */
+#define PQ_ANON		0x0010		/* page is part of an anon, rather
 					   than an uvm_object */
-#define PQ_AOBJ		0x20		/* page is part of an anonymous
+#define PQ_AOBJ		0x0020		/* page is part of an anonymous
 					   uvm_object */
 #define PQ_SWAPBACKED	(PQ_ANON|PQ_AOBJ)
 #define	PQ_ENCRYPT	0x0040		/* page needs {en,de}cryption */
@@ -216,9 +210,7 @@ struct vm_physseg {
 	int	free_list;		/* which free list they belong on */
 	struct	vm_page *pgs;		/* vm_page structures (from start) */
 	struct	vm_page *lastpg;	/* vm_page structure for end */
-#ifdef __HAVE_PMAP_PHYSSEG
 	struct	pmap_physseg pmseg;	/* pmap specific (MD) data */
-#endif
 };
 
 #ifdef _KERNEL
@@ -232,7 +224,7 @@ extern boolean_t vm_page_zero_enable;
 /*
  *	Each pageable resident page falls into one of three lists:
  *
- *	free
+ *	free	
  *		Available for allocation now.
  *	inactive
  *		Not referenced in any map, but still has an
@@ -262,7 +254,7 @@ extern int vm_nphysseg;
 
 #ifdef UVM_PAGE_INLINE
 #define PAGE_INLINE static __inline
-#else
+#else 
 #define PAGE_INLINE /* nothing */
 #endif /* UVM_PAGE_INLINE */
 
@@ -278,7 +270,6 @@ void uvm_page_own __P((struct vm_page *, char *));
 boolean_t uvm_page_physget __P((paddr_t *));
 #endif
 void uvm_page_rehash __P((void));
-void uvm_page_recolor __P((int));
 void uvm_pageidlezero __P((void));
 
 PAGE_INLINE int uvm_lock_fpageq __P((void));
@@ -317,12 +308,6 @@ static int vm_physseg_find __P((paddr_t, int *));
 #define VM_PAGE_TO_PHYS(entry)	((entry)->phys_addr)
 
 /*
- * Compute the page color bucket for a given page.
- */
-#define	VM_PGCOLOR_BUCKET(pg) \
-	(atop(VM_PAGE_TO_PHYS((pg))) & uvmexp.colormask)
-
-/*
  * when VM_PHYSSEG_MAX is 1, we can simplify these functions
  */
 
diff --git a/sys/uvm/uvm_page_i.h b/sys/uvm/uvm_page_i.h
index cf8636bb42d..024c692b5b9 100644
--- a/sys/uvm/uvm_page_i.h
+++ b/sys/uvm/uvm_page_i.h
@@ -1,9 +1,9 @@
-/*	$OpenBSD: uvm_page_i.h,v 1.12 2001/11/28 19:28:15 art Exp $	*/
-/*	$NetBSD: uvm_page_i.h,v 1.19 2001/06/27 23:57:17 thorpej Exp $	*/
+/*	$OpenBSD: uvm_page_i.h,v 1.13 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_page_i.h,v 1.16 2001/01/28 23:30:45 thorpej Exp $	*/
 
-/*
+/* 
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
- * Copyright (c) 1991, 1993, The Regents of the University of California.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.  
  *
  * All rights reserved.
  *
@@ -21,7 +21,7 @@
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Charles D. Cranor,
- *      Washington University, the University of California, Berkeley and
+ *      Washington University, the University of California, Berkeley and 
  *      its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
@@ -45,17 +45,17 @@
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
- *
+ * 
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
+ * 
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
@@ -159,8 +159,12 @@ uvm_pagewire(pg)
 			TAILQ_REMOVE(&uvm.page_active, pg, pageq);
 			pg->pqflags &= ~PQ_ACTIVE;
 			uvmexp.active--;
-		} else if (pg->pqflags & PQ_INACTIVE) {
-			TAILQ_REMOVE(&uvm.page_inactive, pg, pageq);
+		}
+		if (pg->pqflags & PQ_INACTIVE) {
+			if (pg->pqflags & PQ_SWAPBACKED)
+				TAILQ_REMOVE(&uvm.page_inactive_swp, pg, pageq);
+			else
+				TAILQ_REMOVE(&uvm.page_inactive_obj, pg, pageq);
 			pg->pqflags &= ~PQ_INACTIVE;
 			uvmexp.inactive--;
 		}
@@ -170,12 +174,12 @@ uvm_pagewire(pg)
 }
 
 /*
- * uvm_pageunwire: unwire the page.
+ * uvm_pageunwire: unwire the page.   
  *
  * => activate if wire count goes to zero.
  * => caller must lock page queues
  */
-
+ 
 PAGE_INLINE void
 uvm_pageunwire(pg)
 	struct vm_page *pg;
@@ -209,9 +213,15 @@ uvm_pagedeactivate(pg)
 	}
 	if ((pg->pqflags & PQ_INACTIVE) == 0) {
 		KASSERT(pg->wire_count == 0);
-		TAILQ_INSERT_TAIL(&uvm.page_inactive, pg, pageq);
+		if (pg->pqflags & PQ_SWAPBACKED)
+			TAILQ_INSERT_TAIL(&uvm.page_inactive_swp, pg, pageq);
+		else
+			TAILQ_INSERT_TAIL(&uvm.page_inactive_obj, pg, pageq);
 		pg->pqflags |= PQ_INACTIVE;
 		uvmexp.inactive++;
+#ifndef UBC
+		pmap_clear_reference(pg);
+#endif
 		/*
 		 * update the "clean" bit.  this isn't 100%
 		 * accurate, and doesn't have to be.  we'll
@@ -235,7 +245,10 @@ uvm_pageactivate(pg)
 	struct vm_page *pg;
 {
 	if (pg->pqflags & PQ_INACTIVE) {
-		TAILQ_REMOVE(&uvm.page_inactive, pg, pageq);
+		if (pg->pqflags & PQ_SWAPBACKED)
+			TAILQ_REMOVE(&uvm.page_inactive_swp, pg, pageq);
+		else
+			TAILQ_REMOVE(&uvm.page_inactive_obj, pg, pageq);
 		pg->pqflags &= ~PQ_INACTIVE;
 		uvmexp.inactive--;
 	}
diff --git a/sys/uvm/uvm_pager.c b/sys/uvm/uvm_pager.c
index 8259df56237..5c93fe9f9db 100644
--- a/sys/uvm/uvm_pager.c
+++ b/sys/uvm/uvm_pager.c
@@ -1,5 +1,5 @@
-/*	$OpenBSD: uvm_pager.c,v 1.28 2001/12/04 23:22:42 art Exp $	*/
-/*	$NetBSD: uvm_pager.c,v 1.49 2001/09/10 21:19:43 chris Exp $	*/
+/*	$OpenBSD: uvm_pager.c,v 1.29 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_pager.c,v 1.41 2001/02/18 19:26:50 chs Exp $	*/
 
 /*
  *
@@ -58,21 +58,25 @@ struct pool *uvm_aiobuf_pool;
 
 extern struct uvm_pagerops uvm_deviceops;
 extern struct uvm_pagerops uvm_vnodeops;
+#ifdef UBC
 extern struct uvm_pagerops ubc_pager;
+#endif
 
 struct uvm_pagerops *uvmpagerops[] = {
 	&aobj_pager,
 	&uvm_deviceops,
 	&uvm_vnodeops,
+#ifdef UBC
 	&ubc_pager,
+#endif
 };
 
 /*
  * the pager map: provides KVA for I/O
  */
 
-struct vm_map *pager_map;		/* XXX */
-struct simplelock pager_map_wanted_lock;
+vm_map_t pager_map;		/* XXX */
+simple_lock_data_t pager_map_wanted_lock;
 boolean_t pager_map_wanted;	/* locked by pager map */
 static vaddr_t emergva;
 static boolean_t emerginuse;
@@ -100,7 +104,7 @@ uvm_pager_init()
 	/*
 	 * init ASYNC I/O queue
 	 */
-
+	
 	TAILQ_INIT(&uvm.aio_done);
 
 	/*
@@ -148,8 +152,8 @@ ReStart:
 	size = npages << PAGE_SHIFT;
 	kva = 0;			/* let system choose VA */
 
-	if (uvm_map(pager_map, &kva, size, NULL,
-	    UVM_UNKNOWN_OFFSET, 0, UVM_FLAG_NOMERGE) != 0) {
+	if (uvm_map(pager_map, &kva, size, NULL, 
+	      UVM_UNKNOWN_OFFSET, 0, UVM_FLAG_NOMERGE) != KERN_SUCCESS) {
 		if (curproc == uvm.pagedaemon_proc) {
 			simple_lock(&pager_map_wanted_lock);
 			if (emerginuse) {
@@ -169,9 +173,9 @@ ReStart:
 			return(0);
 		}
 		simple_lock(&pager_map_wanted_lock);
-		pager_map_wanted = TRUE;
+		pager_map_wanted = TRUE; 
 		UVMHIST_LOG(maphist, "  SLEEPING on pager_map",0,0,0,0);
-		UVM_UNLOCK_AND_WAIT(pager_map, &pager_map_wanted_lock, FALSE,
+		UVM_UNLOCK_AND_WAIT(pager_map, &pager_map_wanted_lock, FALSE, 
 		    "pager_map", 0);
 		goto ReStart;
 	}
@@ -186,7 +190,6 @@ enter:
 		    prot, PMAP_WIRED | ((pp->flags & PG_FAKE) ? prot :
 					VM_PROT_READ));
 	}
-	pmap_update(vm_map_pmap(pager_map));
 
 	UVMHIST_LOG(maphist, "<- done (KVA=0x%x)", kva,0,0,0);
 	return(kva);
@@ -205,7 +208,7 @@ uvm_pagermapout(kva, npages)
 	int npages;
 {
 	vsize_t size = npages << PAGE_SHIFT;
-	struct vm_map_entry *entries;
+	vm_map_entry_t entries;
 	UVMHIST_FUNC("uvm_pagermapout"); UVMHIST_CALLED(maphist);
 
 	UVMHIST_LOG(maphist, " (kva=0x%x, npages=%d)", kva, npages,0,0);
@@ -224,7 +227,7 @@ uvm_pagermapout(kva, npages)
 	}
 
 	vm_map_lock(pager_map);
-	uvm_unmap_remove(pager_map, kva, kva + size, &entries);
+	(void) uvm_unmap_remove(pager_map, kva, kva + size, &entries);
 	simple_lock(&pager_map_wanted_lock);
 	if (pager_map_wanted) {
 		pager_map_wanted = FALSE;
@@ -232,12 +235,11 @@ uvm_pagermapout(kva, npages)
 	}
 	simple_unlock(&pager_map_wanted_lock);
 	vm_map_unlock(pager_map);
-
 remove:
 	pmap_remove(pmap_kernel(), kva, kva + (npages << PAGE_SHIFT));
 	if (entries)
 		uvm_unmap_detach(entries, 0);
-	pmap_update(pmap_kernel());
+
 	UVMHIST_LOG(maphist,"<- done",0,0,0,0);
 }
 
@@ -275,7 +277,7 @@ uvm_mk_pcluster(uobj, pps, npages, center, flags, mlo, mhi)
 	int center_idx, forward, incr;
 	UVMHIST_FUNC("uvm_mk_pcluster"); UVMHIST_CALLED(maphist);
 
-	/*
+	/* 
 	 * center page should already be busy and write protected.  XXX:
 	 * suppose page is wired?  if we lock, then a process could
 	 * fault/block on it.  if we don't lock, a process could write the
@@ -311,8 +313,8 @@ uvm_mk_pcluster(uobj, pps, npages, center, flags, mlo, mhi)
 	*npages = 1;
 
 	/*
-	 * attempt to cluster around the left [backward], and then
-	 * the right side [forward].
+	 * attempt to cluster around the left [backward], and then 
+	 * the right side [forward].    
 	 */
 
 	for (forward  = 0 ; forward <= 1 ; forward++) {
@@ -371,7 +373,7 @@ uvm_mk_pcluster(uobj, pps, npages, center, flags, mlo, mhi)
 			(*npages)++;
 		}
 	}
-
+	
 	/*
 	 * done!  return the cluster array to the caller!!!
 	 */
@@ -398,22 +400,22 @@ uvm_mk_pcluster(uobj, pps, npages, center, flags, mlo, mhi)
  * => flags (first two for non-swap-backed pages)
  *	PGO_ALLPAGES: all pages in uobj are valid targets
  *	PGO_DOACTCLUST: include "PQ_ACTIVE" pages as valid targets
- *	PGO_SYNCIO: wait for i/o to complete
+ *	PGO_SYNCIO: do SYNC I/O (no async)
  *	PGO_PDFREECLUST: pagedaemon: drop cluster on successful I/O
  * => start/stop: if (uobj && !PGO_ALLPAGES) limit targets to this range
  *		  if (!uobj) start is the (daddr_t) of the starting swapblk
  * => return state:
- *	1. we return the error code of the pageout
+ *	1. we return the VM_PAGER status code of the pageout
  *	2. we return with the page queues unlocked
  *	3. if (uobj != NULL) [!swap_backed] we return with
- *		uobj locked _only_ if PGO_PDFREECLUST is set
- *		AND result == 0 AND async.   in all other cases
+ *		uobj locked _only_ if PGO_PDFREECLUST is set 
+ *		AND result != VM_PAGER_PEND.   in all other cases
  *		we return with uobj unlocked.   [this is a hack
  *		that allows the pagedaemon to save one lock/unlock
  *		pair in the !swap_backed case since we have to
  *		lock the uobj to drop the cluster anyway]
  *	4. on errors we always drop the cluster.   thus, if we return
- *		an error, then the caller only has to worry about
+ *		!PEND, !OK, then the caller only has to worry about
  *		un-busying the main page (not the cluster pages).
  *	5. on success, if !PGO_PDFREECLUST, we return the cluster
  *		with all pages busy (caller must un-busy and check
@@ -430,7 +432,6 @@ uvm_pager_put(uobj, pg, ppsp_ptr, npages, flags, start, stop)
 {
 	int result;
 	daddr_t swblk;
-	boolean_t async = (flags & PGO_SYNCIO) == 0;
 	struct vm_page **ppsp = *ppsp_ptr;
 	UVMHIST_FUNC("uvm_pager_put"); UVMHIST_CALLED(ubchist);
 
@@ -496,7 +497,7 @@ ReTry:
 	 * we have attempted the I/O.
 	 *
 	 * if the I/O was a success then:
-	 * 	if !PGO_PDFREECLUST, we return the cluster to the
+	 * 	if !PGO_PDFREECLUST, we return the cluster to the 
 	 *		caller (who must un-busy all pages)
 	 *	else we un-busy cluster pages for the pagedaemon
 	 *
@@ -505,21 +506,20 @@ ReTry:
 	 *  i/o is done...]
 	 */
 
-	if (result == 0) {
-		if (flags & PGO_PDFREECLUST && !async) {
-
+	if (result == VM_PAGER_PEND || result == VM_PAGER_OK) {
+		if (result == VM_PAGER_OK && (flags & PGO_PDFREECLUST)) {
 			/*
-			 * drop cluster and relock object for sync i/o.
+			 * drop cluster and relock object (only if I/O is
+			 * not pending)
 			 */
-
 			if (uobj)
 				/* required for dropcluster */
 				simple_lock(&uobj->vmobjlock);
 			if (*npages > 1 || pg == NULL)
 				uvm_pager_dropcluster(uobj, pg, ppsp, npages,
 				    PGO_PDFREECLUST);
-
-			/* if (uobj): object still locked, as per #3 */
+			/* if (uobj): object still locked, as per
+			 * return-state item #3 */
 		}
 		return (result);
 	}
@@ -537,24 +537,27 @@ ReTry:
 		uvm_pager_dropcluster(uobj, pg, ppsp, npages, PGO_REALLOCSWAP);
 
 		/*
-		 * for hard failures on swap-backed pageouts with a "pg"
-		 * we need to clear pg's swslot since uvm_pager_dropcluster()
-		 * didn't do it and we aren't going to retry.
+		 * for failed swap-backed pageouts with a "pg",
+		 * we need to reset pg's swslot to either:
+		 * "swblk" (for transient errors, so we can retry),
+		 * or 0 (for hard errors).
 		 */
 
-		if (uobj == NULL && pg != NULL && result != EAGAIN) {
+		if (uobj == NULL && pg != NULL) {
+			int nswblk = (result == VM_PAGER_AGAIN) ? swblk : 0;
 			if (pg->pqflags & PQ_ANON) {
 				simple_lock(&pg->uanon->an_lock);
-				pg->uanon->an_swslot = 0;
+				pg->uanon->an_swslot = nswblk;
 				simple_unlock(&pg->uanon->an_lock);
 			} else {
 				simple_lock(&pg->uobject->vmobjlock);
 				uao_set_swslot(pg->uobject,
-				    pg->offset >> PAGE_SHIFT, 0);
+					       pg->offset >> PAGE_SHIFT,
+					       nswblk);
 				simple_unlock(&pg->uobject->vmobjlock);
 			}
 		}
-		if (result == EAGAIN) {
+		if (result == VM_PAGER_AGAIN) {
 
 			/*
 			 * for transient failures, free all the swslots that
@@ -590,18 +593,18 @@ ReTry:
 	 * was one).    give up!   the caller only has one page ("pg")
 	 * to worry about.
 	 */
-
+	
 	if (uobj && (flags & PGO_PDFREECLUST) != 0)
 		simple_lock(&uobj->vmobjlock);
 	return(result);
 }
 
 /*
- * uvm_pager_dropcluster: drop a cluster we have built (because we
+ * uvm_pager_dropcluster: drop a cluster we have built (because we 
  * got an error, or, if PGO_PDFREECLUST we are un-busying the
  * cluster pages on behalf of the pagedaemon).
  *
- * => uobj, if non-null, is a non-swap-backed object that is
+ * => uobj, if non-null, is a non-swap-backed object that is 
  *	locked by the caller.   we return with this object still
  *	locked.
  * => page queues are not locked
@@ -609,7 +612,7 @@ ReTry:
  * => ppsp/npages is our current cluster
  * => flags: PGO_PDFREECLUST: pageout was a success: un-busy cluster
  *	pages on behalf of the pagedaemon.
- *           PGO_REALLOCSWAP: drop previously allocated swap slots for
+ *           PGO_REALLOCSWAP: drop previously allocated swap slots for 
  *		clustered swap-backed pages (except for "pg" if !NULL)
  *		"swblk" is the start of swap alloc (e.g. for ppsp[0])
  *		[only meaningful if swap-backed (uobj == NULL)]
@@ -623,7 +626,7 @@ uvm_pager_dropcluster(uobj, pg, ppsp, npages, flags)
 	int flags;
 {
 	int lcv;
-	boolean_t obj_is_alive;
+	boolean_t obj_is_alive; 
 	struct uvm_object *saved_uobj;
 
 	/*
@@ -635,7 +638,7 @@ uvm_pager_dropcluster(uobj, pg, ppsp, npages, flags)
 		/* skip "pg" or empty slot */
 		if (ppsp[lcv] == pg || ppsp[lcv] == NULL)
 			continue;
-
+	
 		/*
 		 * if swap-backed, gain lock on object that owns page.  note
 		 * that PQ_ANON bit can't change as long as we are holding
@@ -688,7 +691,7 @@ uvm_pager_dropcluster(uobj, pg, ppsp, npages, flags)
 			saved_uobj = ppsp[lcv]->uobject;
 			obj_is_alive =
 			    saved_uobj->pgops->pgo_releasepg(ppsp[lcv], NULL);
-
+			
 			/* for normal objects, "pg" is still PG_BUSY by us,
 			 * so obj can't die */
 			KASSERT(!uobj || obj_is_alive);
@@ -711,7 +714,7 @@ uvm_pager_dropcluster(uobj, pg, ppsp, npages, flags)
 		}
 
 		/*
-		 * if we are operating on behalf of the pagedaemon and we
+		 * if we are operating on behalf of the pagedaemon and we 
 		 * had a successful pageout update the page!
 		 */
 		if (flags & PGO_PDFREECLUST) {
@@ -730,6 +733,7 @@ uvm_pager_dropcluster(uobj, pg, ppsp, npages, flags)
 	}
 }
 
+#ifdef UBC
 /*
  * interrupt-context iodone handler for nested i/o bufs.
  *
@@ -753,6 +757,7 @@ uvm_aio_biodone1(bp)
 		biodone(mbp);
 	}
 }
+#endif
 
 /*
  * interrupt-context iodone handler for single-buf i/os
@@ -793,10 +798,12 @@ uvm_aio_aiodone(bp)
 
 	error = (bp->b_flags & B_ERROR) ? (bp->b_error ? bp->b_error : EIO) : 0;
 	write = (bp->b_flags & B_READ) == 0;
+#ifdef UBC
 	/* XXXUBC B_NOCACHE is for swap pager, should be done differently */
 	if (write && !(bp->b_flags & B_NOCACHE) && bioops.io_pageiodone) {
 		(*bioops.io_pageiodone)(bp);
 	}
+#endif
 
 	uobj = NULL;
 	for (i = 0; i < npages; i++) {
@@ -873,12 +880,35 @@ uvm_aio_aiodone(bp)
 freed:
 #endif
 	s = splbio();
-	if (bp->b_vp != NULL) {
-		if (write && (bp->b_flags & B_AGE) != 0) {
-			vwakeup(bp->b_vp);
-		}
+	if (write && (bp->b_flags & B_AGE) != 0 && bp->b_vp != NULL) {
+		vwakeup(bp->b_vp);
 	}
-	(void) buf_cleanout(bp);
 	pool_put(&bufpool, bp);
 	splx(s);
 }
+
+/*
+ * translate unix errno values to VM_PAGER_*.
+ */
+
+int
+uvm_errno2vmerror(errno)
+	int errno;
+{
+	switch (errno) {
+	case 0:
+		return VM_PAGER_OK;
+	case EINVAL:
+		return VM_PAGER_BAD;
+	case EINPROGRESS:
+		return VM_PAGER_PEND;
+	case EIO:
+		return VM_PAGER_ERROR;
+	case EAGAIN:
+		return VM_PAGER_AGAIN;
+	case EBUSY:
+		return VM_PAGER_UNLOCK;
+	default:
+		return VM_PAGER_ERROR;
+	}
+}
diff --git a/sys/uvm/uvm_pager.h b/sys/uvm/uvm_pager.h
index 6b7ddc02d24..37592460b28 100644
--- a/sys/uvm/uvm_pager.h
+++ b/sys/uvm/uvm_pager.h
@@ -1,5 +1,5 @@
-/*	$OpenBSD: uvm_pager.h,v 1.16 2001/11/28 19:28:15 art Exp $	*/
-/*	$NetBSD: uvm_pager.h,v 1.23 2001/05/26 21:27:21 chs Exp $	*/
+/*	$OpenBSD: uvm_pager.h,v 1.17 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_pager.h,v 1.20 2000/11/27 08:40:05 chs Exp $	*/
 
 /*
  *
@@ -89,21 +89,20 @@
 struct uvm_pagerops {
 	void		(*pgo_init) __P((void));/* init pager */
 	void		(*pgo_reference)	/* add reference to obj */
-			 __P((struct uvm_object *));
+			 __P((struct uvm_object *));		
 	void			(*pgo_detach)	/* drop reference to obj */
 			 __P((struct uvm_object *));
 	int			(*pgo_fault)	/* special nonstd fault fn */
 			 __P((struct uvm_faultinfo *, vaddr_t,
-				 struct vm_page **, int, int, vm_fault_t,
+				 vm_page_t *, int, int, vm_fault_t,
 				 vm_prot_t, int));
 	boolean_t		(*pgo_flush)	/* flush pages out of obj */
 			 __P((struct uvm_object *, voff_t, voff_t, int));
 	int			(*pgo_get)	/* get/read page */
 			 __P((struct uvm_object *, voff_t,
-				 struct vm_page **, int *, int, vm_prot_t, int,
-			         int));
+				 vm_page_t *, int *, int, vm_prot_t, int, int));
 	int			(*pgo_put)	/* put/write page */
-			 __P((struct uvm_object *, struct vm_page **,
+			 __P((struct uvm_object *, vm_page_t *, 
 				 int, boolean_t));
 	void			(*pgo_cluster)	/* return range of cluster */
 			__P((struct uvm_object *, voff_t, voff_t *,
@@ -144,7 +143,7 @@ struct uvm_pagerops {
 
 #ifdef UVM_PAGER_INLINE
 #define PAGER_INLINE static __inline
-#else
+#else 
 #define PAGER_INLINE /* nothing */
 #endif /* UVM_PAGER_INLINE */
 
@@ -152,12 +151,12 @@ struct uvm_pagerops {
  * prototypes
  */
 
-void		uvm_pager_dropcluster __P((struct uvm_object *,
-					struct vm_page *, struct vm_page **,
+void		uvm_pager_dropcluster __P((struct uvm_object *, 
+					struct vm_page *, struct vm_page **, 
 					int *, int));
 void		uvm_pager_init __P((void));
-int		uvm_pager_put __P((struct uvm_object *, struct vm_page *,
-				   struct vm_page ***, int *, int,
+int		uvm_pager_put __P((struct uvm_object *, struct vm_page *, 
+				   struct vm_page ***, int *, int, 
 				   voff_t, voff_t));
 
 PAGER_INLINE struct vm_page *uvm_pageratop __P((vaddr_t));
@@ -165,8 +164,9 @@ PAGER_INLINE struct vm_page *uvm_pageratop __P((vaddr_t));
 vaddr_t		uvm_pagermapin __P((struct vm_page **, int, int));
 void		uvm_pagermapout __P((vaddr_t, int));
 struct vm_page **uvm_mk_pcluster  __P((struct uvm_object *, struct vm_page **,
-				       int *, struct vm_page *, int,
+				       int *, struct vm_page *, int, 
 				       voff_t, voff_t));
+int		uvm_errno2vmerror __P((int));
 
 /* Flags to uvm_pagermapin() */
 #define	UVMPAGER_MAPIN_WAITOK	0x01	/* it's okay to wait */
@@ -174,6 +174,27 @@ struct vm_page **uvm_mk_pcluster  __P((struct uvm_object *, struct vm_page **,
 #define	UVMPAGER_MAPIN_WRITE	0x00	/* device -> host (pseudo flag) */
 
 /*
+ * get/put return values
+ * OK	   operation was successful
+ * BAD	   specified data was out of the accepted range
+ * FAIL	   specified data was in range, but doesn't exist
+ * PEND	   operations was initiated but not completed
+ * ERROR   error while accessing data that is in range and exists
+ * AGAIN   temporary resource shortage prevented operation from happening
+ * UNLOCK  unlock the map and try again
+ * REFAULT [uvm_fault internal use only!] unable to relock data structures,
+ *         thus the mapping needs to be reverified before we can procede
+ */
+#define	VM_PAGER_OK		0
+#define	VM_PAGER_BAD		1
+#define	VM_PAGER_FAIL		2
+#define	VM_PAGER_PEND		3
+#define	VM_PAGER_ERROR		4
+#define VM_PAGER_AGAIN		5
+#define VM_PAGER_UNLOCK		6
+#define VM_PAGER_REFAULT	7
+
+/*
  * XXX
  * this is needed until the device strategy interface
  * is changed to do physically-addressed i/o.
diff --git a/sys/uvm/uvm_pager_i.h b/sys/uvm/uvm_pager_i.h
index f1b9f5e42f2..c027cd17fb2 100644
--- a/sys/uvm/uvm_pager_i.h
+++ b/sys/uvm/uvm_pager_i.h
@@ -1,5 +1,5 @@
-/*	$OpenBSD: uvm_pager_i.h,v 1.9 2001/11/28 19:28:15 art Exp $	*/
-/*	$NetBSD: uvm_pager_i.h,v 1.11 2001/05/25 04:06:16 chs Exp $	*/
+/*	$OpenBSD: uvm_pager_i.h,v 1.10 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_pager_i.h,v 1.10 2000/11/25 06:28:00 chs Exp $	*/
 
 /*
  *
@@ -32,7 +32,7 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * from: Id: uvm_pager_i.h,v 1.1.2.2 1997/10/09 23:05:46 chuck Exp
+ * from: Id: uvm_pager_i.h,v 1.1.2.2 1997/10/09 23:05:46 chuck Exp 
  */
 
 #ifndef _UVM_UVM_PAGER_I_H_
@@ -60,13 +60,13 @@ uvm_pageratop(kva)
 	struct vm_page *pg;
 	paddr_t pa;
 	boolean_t rv;
-
+ 
 	rv = pmap_extract(pmap_kernel(), kva, &pa);
 	KASSERT(rv);
 	pg = PHYS_TO_VM_PAGE(pa);
 	KASSERT(pg != NULL);
 	return (pg);
-}
+} 
 
 #endif /* defined(UVM_PAGER_INLINE) || defined(UVM_PAGER) */
 
diff --git a/sys/uvm/uvm_param.h b/sys/uvm/uvm_param.h
index 5cc1be262cc..60e5296d90b 100644
--- a/sys/uvm/uvm_param.h
+++ b/sys/uvm/uvm_param.h
@@ -1,7 +1,7 @@
-/*	$OpenBSD: uvm_param.h,v 1.6 2001/12/04 23:22:42 art Exp $	*/
-/*	$NetBSD: uvm_param.h,v 1.12 2001/08/05 03:33:16 matt Exp $	*/
+/*	$OpenBSD: uvm_param.h,v 1.7 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_param.h,v 1.5 2001/03/09 01:02:12 chs Exp $	*/
 
-/*
+/* 
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
@@ -72,9 +72,7 @@
 #ifndef	_VM_PARAM_
 #define	_VM_PARAM_
 
-#ifdef _KERNEL
 #include <machine/vmparam.h>
-#endif
 
 /*
  * This belongs in types.h, but breaks too many existing programs.
@@ -141,6 +139,18 @@ struct _ps_strings {
 
 #define SWAPSKIPBYTES	8192	/* never use at the start of a swap space */
 
+/* 
+ *	Return values from the VM routines.
+ */
+#define	KERN_SUCCESS		0
+#define	KERN_INVALID_ADDRESS	EFAULT
+#define	KERN_PROTECTION_FAILURE	EACCES
+#define	KERN_NO_SPACE		ENOMEM
+#define	KERN_INVALID_ARGUMENT	EINVAL
+#define	KERN_FAILURE		EFAULT
+#define	KERN_RESOURCE_SHORTAGE	ENOMEM
+#define	KERN_PAGES_LOCKED	9		/* XXX never returned */
+
 #ifndef ASSEMBLER
 /*
  *	Convert addresses to pages and vice versa.
@@ -158,8 +168,10 @@ struct _ps_strings {
 #define	trunc_page(x)	((x) & ~PAGE_MASK)
 
 extern psize_t		mem_size;	/* size of physical memory (bytes) */
+#ifdef UBC
 extern int		ubc_nwins;	/* number of UBC mapping windows */
-extern int		ubc_winshift;	/* shift for a UBC mapping window */
+extern int		ubc_winsize;	/* size of a UBC mapping window */
+#endif
 
 #else
 /* out-of-kernel versions of round_page and trunc_page */
diff --git a/sys/uvm/uvm_pdaemon.c b/sys/uvm/uvm_pdaemon.c
index 2e46a28ec7d..42fa8b0809d 100644
--- a/sys/uvm/uvm_pdaemon.c
+++ b/sys/uvm/uvm_pdaemon.c
@@ -1,9 +1,9 @@
-/*	$OpenBSD: uvm_pdaemon.c,v 1.20 2001/11/28 19:28:15 art Exp $	*/
-/*	$NetBSD: uvm_pdaemon.c,v 1.36 2001/06/27 18:52:10 thorpej Exp $	*/
+/*	$OpenBSD: uvm_pdaemon.c,v 1.21 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_pdaemon.c,v 1.30 2001/03/09 01:02:12 chs Exp $	*/
 
-/*
+/* 
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
- * Copyright (c) 1991, 1993, The Regents of the University of California.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.  
  *
  * All rights reserved.
  *
@@ -21,7 +21,7 @@
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Charles D. Cranor,
- *      Washington University, the University of California, Berkeley and
+ *      Washington University, the University of California, Berkeley and 
  *      its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
@@ -45,17 +45,17 @@
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
- *
+ * 
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
+ * 
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
@@ -373,6 +373,14 @@ uvmpd_scan_inactive(pglst)
 	UVMHIST_FUNC("uvmpd_scan_inactive"); UVMHIST_CALLED(pdhist);
 
 	/*
+	 * note: we currently keep swap-backed pages on a seperate inactive
+	 * list from object-backed pages.   however, merging the two lists
+	 * back together again hasn't been ruled out.   thus, we keep our
+	 * swap cluster in "swpps" rather than in pps (allows us to mix
+	 * clustering types in the event of a mixed inactive queue).
+	 */
+
+	/*
 	 * swslot is non-zero if we are building a swap cluster.  we want
 	 * to stay in the loop while we have a page to scan or we have
 	 * a swap-cluster to build.
@@ -687,20 +695,13 @@ uvmpd_scan_inactive(pglst)
 				 * add block to cluster
 				 */
 
-				if (anon) {
+				swpps[swcpages] = p;
+				if (anon)
 					anon->an_swslot = swslot + swcpages;
-				} else {
-					result = uao_set_swslot(uobj,
+				else
+					uao_set_swslot(uobj,
 					    p->offset >> PAGE_SHIFT,
 					    swslot + swcpages);
-					if (result == -1) {
-						p->flags &= ~PG_BUSY;
-						UVM_PAGE_OWN(p, NULL);
-						simple_unlock(&uobj->vmobjlock);
-						continue;
-					}
-				}
-				swpps[swcpages] = p;
 				swcpages++;
 			}
 		} else {
@@ -766,14 +767,18 @@ uvmpd_scan_inactive(pglst)
 		 *
 		 * note locking semantics of uvm_pager_put with PGO_PDFREECLUST:
 		 *  IN: locked: uobj (if !swap_backed), page queues
-		 * OUT:!locked: pageqs, uobj
+		 * OUT: locked: uobj (if !swap_backed && result !=VM_PAGER_PEND)
+		 *     !locked: pageqs, uobj (if swap_backed || VM_PAGER_PEND)
+		 *
+		 * [the bit about VM_PAGER_PEND saves us one lock-unlock pair]
 		 */
 
 		/* locked: uobj (if !swap_backed), page queues */
 		uvmexp.pdpageouts++;
 		result = uvm_pager_put(swap_backed ? NULL : uobj, p,
 		    &ppsp, &npages, PGO_ALLPAGES|PGO_PDFREECLUST, start, 0);
-		/* unlocked: pageqs, uobj */
+		/* locked: uobj (if !swap_backed && result != PEND) */
+		/* unlocked: pageqs, object (if swap_backed ||result == PEND) */
 
 		/*
 		 * if we did i/o to swap, zero swslot to indicate that we are
@@ -784,10 +789,35 @@ uvmpd_scan_inactive(pglst)
 			swslot = 0;		/* done with this cluster */
 
 		/*
-		 * if the pageout failed, reactivate the page and continue.
+		 * first, we check for VM_PAGER_PEND which means that the
+		 * async I/O is in progress and the async I/O done routine
+		 * will clean up after us.   in this case we move on to the
+		 * next page.
+		 *
+		 * there is a very remote chance that the pending async i/o can
+		 * finish _before_ we get here.   if that happens, our page "p"
+		 * may no longer be on the inactive queue.   so we verify this
+		 * when determining the next page (starting over at the head if
+		 * we've lost our inactive page).
 		 */
 
-		if (result == EIO && curproc == uvm.pagedaemon_proc) {
+		if (result == VM_PAGER_PEND) {
+			uvmexp.paging += npages;
+			uvm_lock_pageq();
+			uvmexp.pdpending++;
+			if (p) {
+				if (p->pqflags & PQ_INACTIVE)
+					nextpg = TAILQ_NEXT(p, pageq);
+				else
+					nextpg = TAILQ_FIRST(pglst);
+			} else {
+				nextpg = NULL;
+			}
+			continue;
+		}
+
+		if (result == VM_PAGER_ERROR &&
+		    curproc == uvm.pagedaemon_proc) {
 			uvm_lock_pageq();
 			nextpg = TAILQ_NEXT(p, pageq);
 			uvm_pageactivate(p);
@@ -795,20 +825,134 @@ uvmpd_scan_inactive(pglst)
 		}
 
 		/*
-		 * the pageout is in progress.  bump counters and set up
-		 * for the next loop.
+		 * clean up "p" if we have one
 		 */
 
-		uvm_lock_pageq();
-		uvmexp.paging += npages;
-		uvmexp.pdpending++;
 		if (p) {
-			if (p->pqflags & PQ_INACTIVE)
+			/*
+			 * the I/O request to "p" is done and uvm_pager_put
+			 * has freed any cluster pages it may have allocated
+			 * during I/O.  all that is left for us to do is
+			 * clean up page "p" (which is still PG_BUSY).
+			 *
+			 * our result could be one of the following:
+			 *   VM_PAGER_OK: successful pageout
+			 *
+			 *   VM_PAGER_AGAIN: tmp resource shortage, we skip
+			 *     to next page
+			 *   VM_PAGER_{FAIL,ERROR,BAD}: an error.   we
+			 *     "reactivate" page to get it out of the way (it
+			 *     will eventually drift back into the inactive
+			 *     queue for a retry).
+			 *   VM_PAGER_UNLOCK: should never see this as it is
+			 *     only valid for "get" operations
+			 */
+
+			/* relock p's object: page queues not lock yet, so
+			 * no need for "try" */
+
+			/* !swap_backed case: already locked... */
+			if (swap_backed) {
+				if (anon)
+					simple_lock(&anon->an_lock);
+				else
+					simple_lock(&uobj->vmobjlock);
+			}
+
+			/* handle PG_WANTED now */
+			if (p->flags & PG_WANTED)
+				/* still holding object lock */
+				wakeup(p);
+
+			p->flags &= ~(PG_BUSY|PG_WANTED);
+			UVM_PAGE_OWN(p, NULL);
+
+			/* released during I/O? */
+			if (p->flags & PG_RELEASED) {
+				if (anon) {
+					/* remove page so we can get nextpg */
+					anon->u.an_page = NULL;
+
+					simple_unlock(&anon->an_lock);
+					uvm_anfree(anon);	/* kills anon */
+					pmap_page_protect(p, VM_PROT_NONE);
+					anon = NULL;
+					uvm_lock_pageq();
+					nextpg = TAILQ_NEXT(p, pageq);
+					/* free released page */
+					uvm_pagefree(p);
+
+				} else {
+
+					/*
+					 * pgo_releasepg nukes the page and
+					 * gets "nextpg" for us.  it returns
+					 * with the page queues locked (when
+					 * given nextpg ptr).
+					 */
+
+					if (!uobj->pgops->pgo_releasepg(p,
+					    &nextpg))
+						/* uobj died after release */
+						uobj = NULL;
+
+					/*
+					 * lock page queues here so that they're
+					 * always locked at the end of the loop.
+					 */
+
+					uvm_lock_pageq();
+				}
+			} else {	/* page was not released during I/O */
+				uvm_lock_pageq();
 				nextpg = TAILQ_NEXT(p, pageq);
-			else
-				nextpg = TAILQ_FIRST(pglst);
+				if (result != VM_PAGER_OK) {
+					/* pageout was a failure... */
+					if (result != VM_PAGER_AGAIN)
+						uvm_pageactivate(p);
+					pmap_clear_reference(p);
+					/* XXXCDC: if (swap_backed) FREE p's
+					 * swap block? */
+				} else {
+					/* pageout was a success... */
+					pmap_clear_reference(p);
+					pmap_clear_modify(p);
+					p->flags |= PG_CLEAN;
+				}
+			}
+
+			/*
+			 * drop object lock (if there is an object left).   do
+			 * a safety check of nextpg to make sure it is on the
+			 * inactive queue (it should be since PG_BUSY pages on
+			 * the inactive queue can't be re-queued [note: not
+			 * true for active queue]).
+			 */
+
+			if (anon)
+				simple_unlock(&anon->an_lock);
+			else if (uobj)
+				simple_unlock(&uobj->vmobjlock);
+
 		} else {
+
+			/*
+			 * if p is null in this loop, make sure it stays null
+			 * in the next loop.
+			 */
+
 			nextpg = NULL;
+
+			/*
+			 * lock page queues here just so they're always locked
+			 * at the end of the loop.
+			 */
+
+			uvm_lock_pageq();
+		}
+
+		if (nextpg && (nextpg->pqflags & PQ_INACTIVE) == 0) {
+			nextpg = TAILQ_FIRST(pglst);	/* reload! */
 		}
 	}
 	return (retval);
@@ -871,7 +1015,12 @@ uvmpd_scan()
 
 	got_it = FALSE;
 	pages_freed = uvmexp.pdfreed;
-	(void) uvmpd_scan_inactive(&uvm.page_inactive);
+	if ((uvmexp.pdrevs & 1) != 0 && uvmexp.nswapdev != 0)
+		got_it = uvmpd_scan_inactive(&uvm.page_inactive_swp);
+	if (!got_it)
+		got_it = uvmpd_scan_inactive(&uvm.page_inactive_obj);
+	if (!got_it && (uvmexp.pdrevs & 1) == 0 && uvmexp.nswapdev != 0)
+		(void) uvmpd_scan_inactive(&uvm.page_inactive_swp);
 	pages_freed = uvmexp.pdfreed - pages_freed;
 
 	/*
@@ -959,14 +1108,13 @@ uvmpd_scan()
 		}
 
 		/*
-		 * If we're short on inactive pages, move this over
-		 * to the inactive list.  The second hand will sweep
-		 * it later, and if it has been referenced again, it
-		 * will be moved back to active.
+		 * If the page has not been referenced since the
+		 * last scan, deactivate the page if there is a
+		 * shortage of inactive pages.
 		 */
 
-		if (inactive_shortage > 0) {
-			pmap_clear_reference(p);
+		if (inactive_shortage > 0 &&
+		    pmap_clear_reference(p) == FALSE) {
 			/* no need to check wire_count as pg is "active" */
 			uvm_pagedeactivate(p);
 			uvmexp.pddeact++;
diff --git a/sys/uvm/uvm_pdaemon.h b/sys/uvm/uvm_pdaemon.h
index bc6b96f5a07..dbae4b6ba97 100644
--- a/sys/uvm/uvm_pdaemon.h
+++ b/sys/uvm/uvm_pdaemon.h
@@ -1,9 +1,9 @@
-/*	$OpenBSD: uvm_pdaemon.h,v 1.8 2001/11/28 19:28:15 art Exp $	*/
-/*	$NetBSD: uvm_pdaemon.h,v 1.9 2001/05/25 04:06:17 chs Exp $	*/
+/*	$OpenBSD: uvm_pdaemon.h,v 1.9 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_pdaemon.h,v 1.8 1999/11/04 21:51:42 thorpej Exp $	*/
 
-/*
+/* 
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
- * Copyright (c) 1991, 1993, The Regents of the University of California.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.  
  *
  * All rights reserved.
  *
@@ -21,7 +21,7 @@
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Charles D. Cranor,
- *      Washington University, the University of California, Berkeley and
+ *      Washington University, the University of California, Berkeley and 
  *      its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
@@ -45,17 +45,17 @@
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
- *
+ * 
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
+ * 
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
diff --git a/sys/uvm/uvm_pglist.c b/sys/uvm/uvm_pglist.c
index e747f827e6b..7d89a04c969 100644
--- a/sys/uvm/uvm_pglist.c
+++ b/sys/uvm/uvm_pglist.c
@@ -1,20 +1,20 @@
-/*	$OpenBSD: uvm_pglist.c,v 1.11 2001/11/28 19:28:15 art Exp $	*/
-/*	$NetBSD: uvm_pglist.c,v 1.17 2001/06/27 21:18:34 thorpej Exp $	*/
+/*	$OpenBSD: uvm_pglist.c,v 1.12 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_pglist.c,v 1.13 2001/02/18 21:19:08 chs Exp $	*/
 
 /*-
  * Copyright (c) 1997 The NetBSD Foundation, Inc.
  * All rights reserved.
- *
+ *  
  * This code is derived from software contributed to The NetBSD Foundation
  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
- * NASA Ames Research Center.
+ * NASA Ames Research Center.  
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
+ * 2. Redistributions in binary form must reproduce the above copyright 
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
@@ -24,7 +24,7 @@
  * 4. Neither the name of The NetBSD Foundation nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
- *
+ *      
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -78,7 +78,7 @@ u_long	uvm_pglistalloc_npages;
  *	low		the low address of the allowed allocation range.
  *	high		the high address of the allowed allocation range.
  *	alignment	memory must be aligned to this power-of-two boundary.
- *	boundary	no segment in the allocation may cross this
+ *	boundary	no segment in the allocation may cross this 
  *			power-of-two boundary (relative to zero).
  */
 
@@ -92,16 +92,16 @@ uvm_pglistalloc(size, low, high, alignment, boundary, rlist, nsegs, waitok)
 	paddr_t try, idxpa, lastidxpa;
 	int psi;
 	struct vm_page *pgs;
-	int s, tryidx, idx, pgflidx, end, error, free_list, color;
-	struct vm_page *m;
+	int s, tryidx, idx, pgflidx, end, error, free_list;
+	vm_page_t m;
 	u_long pagemask;
 #ifdef DEBUG
-	struct vm_page *tp;
+	vm_page_t tp;
 #endif
 
 	KASSERT((alignment & (alignment - 1)) == 0);
 	KASSERT((boundary & (boundary - 1)) == 0);
-
+	
 	/*
 	 * Our allocations are always page granularity, so our alignment
 	 * must be, too.
@@ -198,11 +198,10 @@ uvm_pglistalloc(size, low, high, alignment, boundary, rlist, nsegs, waitok)
 	while (idx < end) {
 		m = &pgs[idx];
 		free_list = uvm_page_lookup_freelist(m);
-		color = VM_PGCOLOR_BUCKET(m);
 		pgflidx = (m->flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN;
 #ifdef DEBUG
 		for (tp = TAILQ_FIRST(&uvm.page_free[
-			free_list].pgfl_buckets[color].pgfl_queues[pgflidx]);
+			free_list].pgfl_queues[pgflidx]);
 		     tp != NULL;
 		     tp = TAILQ_NEXT(tp, pageq)) {
 			if (tp == m)
@@ -211,8 +210,8 @@ uvm_pglistalloc(size, low, high, alignment, boundary, rlist, nsegs, waitok)
 		if (tp == NULL)
 			panic("uvm_pglistalloc: page not on freelist");
 #endif
-		TAILQ_REMOVE(&uvm.page_free[free_list].pgfl_buckets[
-		    color].pgfl_queues[pgflidx], m, pageq);
+		TAILQ_REMOVE(&uvm.page_free[free_list].pgfl_queues[pgflidx],
+		    m, pageq);
 		uvmexp.free--;
 		if (m->flags & PG_ZERO)
 			uvmexp.zeropages--;
@@ -232,8 +231,12 @@ out:
 	 * check to see if we need to generate some free pages waking
 	 * the pagedaemon.
 	 */
-
-	UVM_KICK_PDAEMON();
+	 
+	if (uvmexp.free + uvmexp.paging < uvmexp.freemin ||
+	    (uvmexp.free + uvmexp.paging < uvmexp.freetarg &&
+	     uvmexp.inactive < uvmexp.inactarg)) {
+		wakeup(&uvm.pagedaemon);
+	}
 
 	uvm_unlock_fpageq(s);
 
@@ -250,7 +253,7 @@ void
 uvm_pglistfree(list)
 	struct pglist *list;
 {
-	struct vm_page *m;
+	vm_page_t m;
 	int s;
 
 	/*
@@ -263,8 +266,8 @@ uvm_pglistfree(list)
 		TAILQ_REMOVE(list, m, pageq);
 		m->pqflags = PQ_FREE;
 		TAILQ_INSERT_TAIL(&uvm.page_free[
-		    uvm_page_lookup_freelist(m)].pgfl_buckets[
-		    VM_PGCOLOR_BUCKET(m)].pgfl_queues[PGFL_UNKNOWN], m, pageq);
+		    uvm_page_lookup_freelist(m)].pgfl_queues[PGFL_UNKNOWN],
+		    m, pageq);
 		uvmexp.free++;
 		if (uvmexp.zeropages < UVM_PAGEZERO_TARGET)
 			uvm.page_idle_zero = vm_page_zero_enable;
diff --git a/sys/uvm/uvm_pglist.h b/sys/uvm/uvm_pglist.h
index 883171ebb86..3020df4d5b0 100644
--- a/sys/uvm/uvm_pglist.h
+++ b/sys/uvm/uvm_pglist.h
@@ -1,5 +1,5 @@
-/*	$OpenBSD: uvm_pglist.h,v 1.4 2001/12/04 23:22:42 art Exp $	*/
-/*	$NetBSD: uvm_pglist.h,v 1.5 2001/08/25 20:37:46 chs Exp $	*/
+/*	$OpenBSD: uvm_pglist.h,v 1.5 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_pglist.h,v 1.3 2001/05/02 01:22:20 thorpej Exp $	*/
 
 /*-
  * Copyright (c) 2000, 2001 The NetBSD Foundation, Inc.
@@ -37,8 +37,8 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#ifndef _UVM_UVM_PGLIST_H_
-#define _UVM_UVM_PGLIST_H_
+#ifndef _PGLIST_H_
+#define _PGLIST_H_
 
 /*
  * This defines the type of a page queue, e.g. active list, inactive
@@ -54,12 +54,8 @@ TAILQ_HEAD(pglist, vm_page);
 #define	PGFL_ZEROS	1
 #define	PGFL_NQUEUES	2
 
-struct pgflbucket {
-	struct pglist pgfl_queues[PGFL_NQUEUES];
-};
-
 struct pgfreelist {
-	struct pgflbucket *pgfl_buckets;
+	struct pglist pgfl_queues[PGFL_NQUEUES];
 };
 
-#endif /* _UVM_UVM_PGLIST_H_ */
+#endif
diff --git a/sys/uvm/uvm_pmap.h b/sys/uvm/uvm_pmap.h
index f4f2e4ce0ea..5e9617bc624 100644
--- a/sys/uvm/uvm_pmap.h
+++ b/sys/uvm/uvm_pmap.h
@@ -1,6 +1,6 @@
-/*	$NetBSD: uvm_pmap.h,v 1.9 2001/09/10 21:19:43 chris Exp $	*/
+/*	$NetBSD: uvm_pmap.h,v 1.1 2000/06/27 09:00:14 mrg Exp $	*/
 
-/*
+/* 
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
@@ -42,17 +42,17 @@
  * All rights reserved.
  *
  * Author: Avadis Tevanian, Jr.
- *
+ * 
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
+ * 
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
@@ -86,9 +86,7 @@ struct pmap_statistics {
 };
 typedef struct pmap_statistics	*pmap_statistics_t;
 
-#ifdef _KERNEL
 #include <machine/pmap.h>
-#endif
 
 /*
  * Flags passed to pmap_enter().  Note the bottom 3 bits are VM_PROT_*
@@ -101,10 +99,7 @@ typedef struct pmap_statistics	*pmap_statistics_t;
 #ifndef PMAP_EXCLUDE_DECLS	/* Used in Sparc port to virtualize pmap mod */
 #ifdef _KERNEL
 __BEGIN_DECLS
-#if !defined(pmap_kernel)
-struct pmap	*pmap_kernel __P((void));
-#endif
-
+void		*pmap_bootstrap_alloc __P((int));
 void		 pmap_activate __P((struct proc *));
 void		 pmap_deactivate __P((struct proc *));
 void		 pmap_unwire __P((pmap_t, vaddr_t));
@@ -143,19 +138,13 @@ boolean_t	 pmap_is_referenced __P((struct vm_page *));
 void		 pmap_page_protect __P((struct vm_page *, vm_prot_t));
 
 #if !defined(pmap_phys_address)
-paddr_t		pmap_phys_address __P((int));
+paddr_t	 pmap_phys_address __P((int));
 #endif
 void		 pmap_protect __P((pmap_t,
 		    vaddr_t, vaddr_t, vm_prot_t));
 void		 pmap_reference __P((pmap_t));
 void		 pmap_remove __P((pmap_t, vaddr_t, vaddr_t));
-void		 pmap_update __P((pmap_t));
-#if !defined(pmap_resident_count)
-long		 pmap_resident_count __P((pmap_t));
-#endif
-#if !defined(pmap_wired_count)
-long		 pmap_wired_count __P((pmap_t));
-#endif
+void		 pmap_update __P((void));
 void		 pmap_zero_page __P((paddr_t));
 
 void		 pmap_virtual_space __P((vaddr_t *, vaddr_t *));
diff --git a/sys/uvm/uvm_stat.c b/sys/uvm/uvm_stat.c
index 4746b59f6df..801d240fdf0 100644
--- a/sys/uvm/uvm_stat.c
+++ b/sys/uvm/uvm_stat.c
@@ -1,5 +1,5 @@
-/*	$OpenBSD: uvm_stat.c,v 1.10 2001/11/28 19:28:15 art Exp $	 */
-/*	$NetBSD: uvm_stat.c,v 1.19 2001/05/25 04:06:17 chs Exp $	 */
+/*	$OpenBSD: uvm_stat.c,v 1.11 2001/12/19 08:58:07 art Exp $	 */
+/*	$NetBSD: uvm_stat.c,v 1.18 2001/03/09 01:02:13 chs Exp $	 */
 
 /*
  *
@@ -135,7 +135,7 @@ restart:
 					cur[lcv] = -1;
 				goto restart;
 			}
-
+				
 			/*
 			 * if the time hasn't been set yet, or this entry is
 			 * earlier than the current tv, set the time and history
@@ -158,7 +158,7 @@ restart:
 		if (cur[hi] == hists[hi]->f)
 			cur[hi] = -1;
 	}
-
+	
 	/* done! */
 	splx(s);
 }
diff --git a/sys/uvm/uvm_stat.h b/sys/uvm/uvm_stat.h
index 2644314f99c..94dc3bb1a39 100644
--- a/sys/uvm/uvm_stat.h
+++ b/sys/uvm/uvm_stat.h
@@ -1,5 +1,5 @@
-/*	$OpenBSD: uvm_stat.h,v 1.11 2001/11/28 19:28:15 art Exp $	*/
-/*	$NetBSD: uvm_stat.h,v 1.22 2001/05/30 11:57:17 mrg Exp $	*/
+/*	$OpenBSD: uvm_stat.h,v 1.12 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_stat.h,v 1.19 2001/02/04 10:55:58 mrg Exp $	*/
 
 /*
  *
@@ -38,10 +38,6 @@
 #ifndef _UVM_UVM_STAT_H_
 #define _UVM_UVM_STAT_H_
 
-#if defined(_KERNEL_OPT)
-#include "opt_uvmhist.h"
-#endif
-
 #include <sys/queue.h>
 
 /*
@@ -117,7 +113,7 @@ struct uvm_history {
 	LIST_ENTRY(uvm_history) list;	/* link on list of all histories */
 	int n;				/* number of entries */
 	int f; 				/* next free one */
-	struct simplelock l;		/* lock on this history */
+	simple_lock_data_t l;		/* lock on this history */
 	struct uvm_history_ent *e;	/* the malloc'd entries */
 };
 
@@ -232,7 +228,7 @@ do { \
 #define UVMHIST_FUNC(FNAME) \
 	static int _uvmhist_cnt = 0; \
 	static char *_uvmhist_name = FNAME; \
-	int _uvmhist_call;
+	int _uvmhist_call; 
 
 static __inline void uvmhist_print __P((struct uvm_history_ent *));
 
diff --git a/sys/uvm/uvm_swap.c b/sys/uvm/uvm_swap.c
index 02d7901ba9f..3ed77ab3555 100644
--- a/sys/uvm/uvm_swap.c
+++ b/sys/uvm/uvm_swap.c
@@ -1,5 +1,5 @@
-/*	$OpenBSD: uvm_swap.c,v 1.46 2001/12/04 23:22:42 art Exp $	*/
-/*	$NetBSD: uvm_swap.c,v 1.53 2001/08/26 00:43:53 chs Exp $	*/
+/*	$OpenBSD: uvm_swap.c,v 1.47 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_swap.c,v 1.46 2001/02/18 21:19:08 chs Exp $	*/
 
 /*
  * Copyright (c) 1995, 1996, 1997 Matthew R. Green
@@ -63,7 +63,7 @@
 
 /*
  * swap space is managed in the following way:
- *
+ * 
  * each swap partition or file is described by a "swapdev" structure.
  * each "swapdev" structure contains a "swapent" structure which contains
  * information that is passed up to the user (via system calls).
@@ -74,7 +74,7 @@
  * the system maintains a global data structure describing all swap
  * partitions/files.   there is a sorted LIST of "swappri" structures
  * which describe "swapdev"'s at that priority.   this LIST is headed
- * by the "swap_priority" global var.    each "swappri" contains a
+ * by the "swap_priority" global var.    each "swappri" contains a 
  * CIRCLEQ of "swapdev" structures at that priority.
  *
  * locking:
@@ -99,7 +99,7 @@
  * userland controls and configures swap with the swapctl(2) system call.
  * the sys_swapctl performs the following operations:
  *  [1] SWAP_NSWAP: returns the number of swap devices currently configured
- *  [2] SWAP_STATS: given a pointer to an array of swapent structures
+ *  [2] SWAP_STATS: given a pointer to an array of swapent structures 
  *	(passed in via "arg") of a size passed in via "misc" ... we load
  *	the current swap config into the array.
  *  [3] SWAP_ON: given a pathname in arg (could be device or file) and a
@@ -227,15 +227,16 @@ LIST_HEAD(swap_priority, swappri);
 static struct swap_priority swap_priority;
 
 /* locks */
-struct lock swap_syscall_lock;
+lock_data_t swap_syscall_lock;
 
 /*
  * prototypes
  */
+static void		 swapdrum_add __P((struct swapdev *, int));
 static struct swapdev	*swapdrum_getsdp __P((int));
 
 static struct swapdev	*swaplist_find __P((struct vnode *, int));
-static void		 swaplist_insert __P((struct swapdev *,
+static void		 swaplist_insert __P((struct swapdev *, 
 					     struct swappri *, int));
 static void		 swaplist_trim __P((void));
 
@@ -261,7 +262,7 @@ void uvm_swap_initcrypt __P((struct swapdev *, int));
 /*
  * uvm_swap_init: init the swap system data structures and locks
  *
- * => called at boot time from init_main.c after the filesystems
+ * => called at boot time from init_main.c after the filesystems 
  *	are brought up (which happens after uvm_init())
  */
 void
@@ -287,7 +288,7 @@ uvm_swap_init()
 	/*
 	 * create swap block resource map to map /dev/drum.   the range
 	 * from 1 to INT_MAX allows 2 gigablocks of swap space.  note
-	 * that block 0 is reserved (used to indicate an allocation
+	 * that block 0 is reserved (used to indicate an allocation 
 	 * failure, or no allocation).
 	 */
 	swapmap = extent_create("swapmap", 1, INT_MAX,
@@ -562,6 +563,27 @@ swaplist_trim()
 }
 
 /*
+ * swapdrum_add: add a "swapdev"'s blocks into /dev/drum's area.
+ *
+ * => caller must hold swap_syscall_lock
+ * => uvm.swap_data_lock should be unlocked (we may sleep)
+ */
+static void
+swapdrum_add(sdp, npages)
+	struct swapdev *sdp;
+	int	npages;
+{
+	u_long result;
+
+	if (extent_alloc(swapmap, npages, EX_NOALIGN, 0, EX_NOBOUNDARY,
+	    EX_WAITOK, &result))
+		panic("swapdrum_add");
+
+	sdp->swd_drumoffset = result;
+	sdp->swd_drumsize = npages;
+}
+
+/*
  * swapdrum_getsdp: given a page offset in /dev/drum, convert it back
  *	to the "swapdev" that maps that section of the drum.
  *
@@ -574,19 +596,16 @@ swapdrum_getsdp(pgno)
 {
 	struct swapdev *sdp;
 	struct swappri *spp;
-
+	
 	for (spp = LIST_FIRST(&swap_priority); spp != NULL;
 	     spp = LIST_NEXT(spp, spi_swappri))
 		for (sdp = CIRCLEQ_FIRST(&spp->spi_swapdev);
 		     sdp != (void *)&spp->spi_swapdev;
-		     sdp = CIRCLEQ_NEXT(sdp, swd_next)) {
-			if (sdp->swd_flags & SWF_FAKE)
-				continue;
+		     sdp = CIRCLEQ_NEXT(sdp, swd_next))
 			if (pgno >= sdp->swd_drumoffset &&
 			    pgno < (sdp->swd_drumoffset + sdp->swd_drumsize)) {
 				return sdp;
 			}
-		}
 	return NULL;
 }
 
@@ -627,7 +646,7 @@ sys_swapctl(p, v, retval)
 	/*
 	 * we handle the non-priv NSWAP and STATS request first.
 	 *
-	 * SWAP_NSWAP: return number of config'd swap devices
+	 * SWAP_NSWAP: return number of config'd swap devices 
 	 * [can also be obtained with uvmexp sysctl]
 	 */
 	if (SCARG(uap, cmd) == SWAP_NSWAP) {
@@ -641,9 +660,9 @@ sys_swapctl(p, v, retval)
 	/*
 	 * SWAP_STATS: get stats on current # of configured swap devs
 	 *
-	 * note that the swap_priority list can't change as long
+	 * note that the swap_priority list can't change as long 
 	 * as we are holding the swap_syscall_lock.  we don't want
-	 * to grab the uvm.swap_data_lock because we may fault&sleep during
+	 * to grab the uvm.swap_data_lock because we may fault&sleep during 
 	 * copyout() and we don't want to be holding that lock then!
 	 */
 	if (SCARG(uap, cmd) == SWAP_STATS
@@ -659,7 +678,7 @@ sys_swapctl(p, v, retval)
 			for (sdp = CIRCLEQ_FIRST(&spp->spi_swapdev);
 			     sdp != (void *)&spp->spi_swapdev && misc-- > 0;
 			     sdp = CIRCLEQ_NEXT(sdp, swd_next)) {
-				sdp->swd_inuse =
+				sdp->swd_inuse = 
 				    btodb((u_int64_t)sdp->swd_npginuse <<
 				    PAGE_SHIFT);
 				error = copyout(&sdp->swd_se, sep,
@@ -679,8 +698,7 @@ sys_swapctl(p, v, retval)
 				count++;
 #if defined(COMPAT_13)
 				if (SCARG(uap, cmd) == SWAP_OSTATS)
-					sep = (struct swapent *)
-					    ((struct oswapent *)sep + 1);
+					((struct oswapent *)sep)++;
 				else
 #endif
 					sep++;
@@ -692,7 +710,7 @@ sys_swapctl(p, v, retval)
 		*retval = count;
 		error = 0;
 		goto out;
-	}
+	} 
 
 	/*
 	 * all other requests require superuser privs.   verify.
@@ -779,16 +797,14 @@ sys_swapctl(p, v, retval)
 		 */
 
 		priority = SCARG(uap, misc);
-		sdp = malloc(sizeof *sdp, M_VMSWAP, M_WAITOK);
-		spp = malloc(sizeof *spp, M_VMSWAP, M_WAITOK);
 		simple_lock(&uvm.swap_data_lock);
-		if (swaplist_find(vp, 0) != NULL) {
+		if ((sdp = swaplist_find(vp, 0)) != NULL) {
 			error = EBUSY;
 			simple_unlock(&uvm.swap_data_lock);
-			free(sdp, M_VMSWAP);
-			free(spp, M_VMSWAP);
 			break;
 		}
+		sdp = malloc(sizeof *sdp, M_VMSWAP, M_WAITOK);
+		spp = malloc(sizeof *spp, M_VMSWAP, M_WAITOK);
 		memset(sdp, 0, sizeof(*sdp));
 		sdp->swd_flags = SWF_FAKE;	/* placeholder only */
 		sdp->swd_vp = vp;
@@ -889,7 +905,6 @@ swap_on(p, sdp)
 	struct vnode *vp;
 	int error, npages, nblocks, size;
 	long addr;
-	u_long result;
 	struct vattr va;
 #if defined(NFSCLIENT)
 	extern int (**nfsv2_vnodeop_p) __P((void *));
@@ -1018,9 +1033,9 @@ swap_on(p, sdp)
 	}
 
 	/*
-	 * if the vnode we are swapping to is the root vnode
+	 * if the vnode we are swapping to is the root vnode 
 	 * (i.e. we are swapping to the miniroot) then we want
-	 * to make sure we don't overwrite it.   do a statfs to
+	 * to make sure we don't overwrite it.   do a statfs to 
 	 * find its size and skip over it.
 	 */
 	if (vp == rootvp) {
@@ -1035,7 +1050,7 @@ swap_on(p, sdp)
 		if (rootpages > size)
 			panic("swap_on: miniroot larger than swap?");
 
-		if (extent_alloc_region(sdp->swd_ex, addr,
+		if (extent_alloc_region(sdp->swd_ex, addr, 
 					rootpages, EX_WAITOK))
 			panic("swap_on: unable to preserve miniroot");
 
@@ -1065,14 +1080,9 @@ swap_on(p, sdp)
 	/*
 	 * now add the new swapdev to the drum and enable.
 	 */
-	if (extent_alloc(swapmap, npages, EX_NOALIGN, 0, EX_NOBOUNDARY,
-	    EX_WAITOK, &result))
-		panic("swapdrum_add");
-
-	sdp->swd_drumoffset = (int)result;
-	sdp->swd_drumsize = npages;
-	sdp->swd_npages = size;
 	simple_lock(&uvm.swap_data_lock);
+	swapdrum_add(sdp, npages);
+	sdp->swd_npages = size;
 	sdp->swd_flags &= ~SWF_FAKE;	/* going live */
 	sdp->swd_flags |= (SWF_INUSE|SWF_ENABLE);
 	uvmexp.swpages += size;
@@ -1121,7 +1131,7 @@ swap_off(p, sdp)
 			 sdp->swd_drumoffset + sdp->swd_drumsize) ||
 	    anon_swap_off(sdp->swd_drumoffset,
 			  sdp->swd_drumoffset + sdp->swd_drumsize)) {
-
+		
 		simple_lock(&uvm.swap_data_lock);
 		sdp->swd_flags |= SWF_ENABLE;
 		simple_unlock(&uvm.swap_data_lock);
@@ -1151,7 +1161,6 @@ swap_off(p, sdp)
 	if (swaplist_find(sdp->swd_vp, 1) == NULL)
 		panic("swap_off: swapdev not in list\n");
 	swaplist_trim();
-	simple_unlock(&uvm.swap_data_lock);
 
 	/*
 	 * free all resources!
@@ -1160,6 +1169,7 @@ swap_off(p, sdp)
 		    EX_WAITOK);
 	extent_destroy(sdp->swd_ex);
 	free(sdp, M_VMSWAP);
+	simple_unlock(&uvm.swap_data_lock);
 	return (0);
 }
 
@@ -1326,7 +1336,7 @@ sw_reg_strategy(sdp, bp, bn)
 				 	&vp, &nbn, &nra);
 
 		if (error == 0 && nbn == (daddr_t)-1) {
-			/*
+			/* 
 			 * this used to just set error, but that doesn't
 			 * do the right thing.  Instead, it causes random
 			 * memory errors.  The panic() should remain until
@@ -1383,6 +1393,32 @@ sw_reg_strategy(sdp, bp, bn)
 		nbp->vb_buf.b_vnbufs.le_next = NOLIST;
 		LIST_INIT(&nbp->vb_buf.b_dep);
 
+		/* 
+		 * set b_dirtyoff/end and b_validoff/end.   this is
+		 * required by the NFS client code (otherwise it will
+		 * just discard our I/O request).
+		 */
+		if (bp->b_dirtyend == 0) {
+			nbp->vb_buf.b_dirtyoff = 0;
+			nbp->vb_buf.b_dirtyend = sz;
+		} else {
+			nbp->vb_buf.b_dirtyoff =
+			    max(0, bp->b_dirtyoff - (bp->b_bcount-resid));
+			nbp->vb_buf.b_dirtyend =
+			    min(sz,
+				max(0, bp->b_dirtyend - (bp->b_bcount-resid)));
+		}
+		if (bp->b_validend == 0) {
+			nbp->vb_buf.b_validoff = 0;
+			nbp->vb_buf.b_validend = sz;
+		} else {
+			nbp->vb_buf.b_validoff =
+			    max(0, bp->b_validoff - (bp->b_bcount-resid));
+			nbp->vb_buf.b_validend =
+			    min(sz,
+				max(0, bp->b_validend - (bp->b_bcount-resid)));
+		}
+
 		nbp->vb_xfer = vnx;	/* patch it back in to vnx */
 
 		/*
@@ -1503,7 +1539,9 @@ sw_reg_iodone(bp)
 	/*
 	 * disassociate this buffer from the vnode (if any).
 	 */
-	(void) buf_cleanout(&vbp->vb_buf);
+	if (vbp->vb_buf.b_vp != NULL) {
+		brelvp(&vbp->vb_buf);
+	}
 
 	/*
 	 * kill vbp structure
@@ -1566,7 +1604,7 @@ uvm_swap_alloc(nslots, lessok)
 	 */
 	if (uvmexp.nswapdev < 1)
 		return 0;
-
+	
 	/*
 	 * lock data lock, convert slots into blocks, and enter loop
 	 */
@@ -1670,8 +1708,8 @@ uvm_swap_free(startslot, nslots)
 	}
 
 	/*
-	 * convert drum slot offset back to sdp, free the blocks
-	 * in the extent, and return.   must hold pri lock to do
+	 * convert drum slot offset back to sdp, free the blocks 
+	 * in the extent, and return.   must hold pri lock to do 
 	 * lookup and access the extent.
 	 */
 
@@ -1742,26 +1780,23 @@ uvm_swap_get(page, swslot, flags)
 	uvmexp.nswget++;
 	KASSERT(flags & PGO_SYNCIO);
 	if (swslot == SWSLOT_BAD) {
-		return EIO;
+		return VM_PAGER_ERROR;
 	}
 
 	/*
 	 * this page is (about to be) no longer only in swap.
 	 */
-
 	simple_lock(&uvm.swap_data_lock);
 	uvmexp.swpgonly--;
 	simple_unlock(&uvm.swap_data_lock);
 
-	result = uvm_swap_io(&page, swslot, 1, B_READ |
+	result = uvm_swap_io(&page, swslot, 1, B_READ | 
 	    ((flags & PGO_SYNCIO) ? 0 : B_ASYNC));
 
-	if (result != 0) {
-
+	if (result != VM_PAGER_OK && result != VM_PAGER_PEND) {
 		/*
 		 * oops, the read failed so it really is still only in swap.
 		 */
-
 		simple_lock(&uvm.swap_data_lock);
 		uvmexp.swpgonly++;
 		simple_unlock(&uvm.swap_data_lock);
@@ -1782,7 +1817,7 @@ uvm_swap_io(pps, startslot, npages, flags)
 	daddr_t startblk;
 	struct	buf *bp;
 	vaddr_t kva;
-	int	error, s, mapinflags, pflag;
+	int	result, s, mapinflags, pflag;
 	boolean_t write, async;
 #ifdef UVM_SWAP_ENCRYPT
 	vaddr_t dstkva;
@@ -1812,7 +1847,7 @@ uvm_swap_io(pps, startslot, npages, flags)
 		mapinflags |= UVMPAGER_MAPIN_WAITOK;
 	kva = uvm_pagermapin(pps, npages, mapinflags);
 	if (kva == 0)
-		return (EAGAIN);
+		return (VM_PAGER_AGAIN);
 
 #ifdef UVM_SWAP_ENCRYPT
 	if (write) {
@@ -1858,14 +1893,14 @@ uvm_swap_io(pps, startslot, npages, flags)
 
 		if (!uvm_swap_allocpages(tpps, npages)) {
 			uvm_pagermapout(kva, npages);
-			return (EAGAIN);
+			return (VM_PAGER_AGAIN);
 		}
 		
 		dstkva = uvm_pagermapin(tpps, npages, swmapflags);
 		if (dstkva == NULL) {
 			uvm_pagermapout(kva, npages);
 			uvm_swap_freepages(tpps, npages);
-			return (EAGAIN);
+			return (VM_PAGER_AGAIN);
 		}
 
 		src = (caddr_t) kva;
@@ -1894,7 +1929,7 @@ uvm_swap_io(pps, startslot, npages, flags)
 	}
 #endif /* UVM_SWAP_ENCRYPT */
 
-	/*
+	/* 
 	 * now allocate a buf for the i/o.
 	 * [make sure we don't put the pagedaemon to sleep...]
 	 */
@@ -1919,7 +1954,7 @@ uvm_swap_io(pps, startslot, npages, flags)
 			uvm_swap_freepages(tpps, npages);
 		}
 #endif
-		return (EAGAIN);
+		return (VM_PAGER_AGAIN);
 	}
 	
 #ifdef UVM_SWAP_ENCRYPT
@@ -1950,10 +1985,13 @@ uvm_swap_io(pps, startslot, npages, flags)
 	splx(s);
 	bp->b_bufsize = bp->b_bcount = npages << PAGE_SHIFT;
 
-	/*
-	 * bump v_numoutput (counter of number of active outputs).
+	/* 
+	 * for pageouts we must set "dirtyoff" [NFS client code needs it].
+	 * and we bump v_numoutput (counter of number of active outputs).
 	 */
 	if (write) {
+		bp->b_dirtyoff = 0;
+		bp->b_dirtyend = npages << PAGE_SHIFT;
 #ifdef UVM_SWAP_ENCRYPT
 		/* mark the pages in the drum for decryption */
 		if (swap_encrypt_initalized)
@@ -1982,12 +2020,13 @@ uvm_swap_io(pps, startslot, npages, flags)
 	 */
 	VOP_STRATEGY(bp);
 	if (async)
-		return 0;
+		return (VM_PAGER_PEND);
 
 	/*
 	 * must be sync i/o.   wait for it to finish
 	 */
-	error = biowait(bp);
+	(void) biowait(bp);
+	result = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK;
 
 #ifdef UVM_SWAP_ENCRYPT
 	/* 
@@ -2028,18 +2067,19 @@ uvm_swap_io(pps, startslot, npages, flags)
 	 * now dispose of the buf
 	 */
 	s = splbio();
+	if (bp->b_vp)
+		brelvp(bp);
+
 	if (write && bp->b_vp)
 		vwakeup(bp->b_vp);
-
-	(void) buf_cleanout(bp);
 	pool_put(&bufpool, bp);
 	splx(s);
 
 	/*
 	 * finally return.
 	 */
-	UVMHIST_LOG(pdhist, "<- done (sync)  error=%d", error, 0, 0, 0);
-	return (error);
+	UVMHIST_LOG(pdhist, "<- done (sync)  result=%d", result, 0, 0, 0);
+	return (result);
 }
 
 static void
diff --git a/sys/uvm/uvm_unix.c b/sys/uvm/uvm_unix.c
index a6debf6ff8d..98724938298 100644
--- a/sys/uvm/uvm_unix.c
+++ b/sys/uvm/uvm_unix.c
@@ -1,9 +1,9 @@
-/*	$OpenBSD: uvm_unix.c,v 1.19 2001/11/28 19:28:15 art Exp $	*/
-/*	$NetBSD: uvm_unix.c,v 1.24 2001/06/06 21:28:51 mrg Exp $	*/
+/*	$OpenBSD: uvm_unix.c,v 1.20 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_unix.c,v 1.18 2000/09/13 15:00:25 thorpej Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
- * Copyright (c) 1991, 1993 The Regents of the University of California.
+ * Copyright (c) 1991, 1993 The Regents of the University of California.  
  * Copyright (c) 1988 University of Utah.
  *
  * All rights reserved.
@@ -23,7 +23,7 @@
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Charles D. Cranor,
- *	Washington University, the University of California, Berkeley and
+ *	Washington University, the University of California, Berkeley and 
  *	its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
@@ -77,39 +77,44 @@ sys_obreak(p, v, retval)
 	} */ *uap = v;
 	struct vmspace *vm = p->p_vmspace;
 	vaddr_t new, old;
-	int error;
+	ssize_t diff;
+	int rv;
 
 	old = (vaddr_t)vm->vm_daddr;
 	new = round_page((vaddr_t)SCARG(uap, nsize));
-	if ((new - old) > p->p_rlimit[RLIMIT_DATA].rlim_cur && new > old)
+	if ((new - old) > p->p_rlimit[RLIMIT_DATA].rlim_cur)
 		return (ENOMEM);
 
 	old = round_page(old + ptoa(vm->vm_dsize));
+	diff = new - old;
 
-	if (new == old)
+	if (diff == 0)
 		return (0);
 
 	/*
 	 * grow or shrink?
 	 */
-	if (new > old) {
-		error = uvm_map(&vm->vm_map, &old, new - old, NULL,
-		    UVM_UNKNOWN_OFFSET, 0,
-		    UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
+	if (diff > 0) {
+		rv = uvm_map(&vm->vm_map, &old, diff, NULL, UVM_UNKNOWN_OFFSET,
+		    0, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
 		    UVM_ADV_NORMAL, UVM_FLAG_AMAPPAD|UVM_FLAG_FIXED|
 		    UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW));
-		if (error) {
-			uprintf("sbrk: grow %ld failed, error = %d\n",
-				new - old, error);
-			return error;
+		if (rv == KERN_SUCCESS) {
+			vm->vm_dsize += atop(diff);
+			return (0);
 		}
-		vm->vm_dsize += atop(new - old);
 	} else {
-		uvm_deallocate(&vm->vm_map, new, old - new);
-		vm->vm_dsize -= atop(old - new);
+		rv = uvm_deallocate(&vm->vm_map, new, -diff);
+		if (rv == KERN_SUCCESS) {
+			vm->vm_dsize -= atop(-diff);
+			return (0);
+		}
 	}
 
-	return (0);
+	uprintf("sbrk: %s %ld failed, return = %d\n",
+	    diff > 0 ? "grow" : "shrink",
+	    (long)(diff > 0 ? diff : -diff), rv);
+	return (ENOMEM);
 }
 
 /*
@@ -190,8 +195,8 @@ uvm_coredump(p, vp, cred, chdr)
 	struct core *chdr;
 {
 	struct vmspace *vm = p->p_vmspace;
-	struct vm_map *map = &vm->vm_map;
-	struct vm_map_entry *entry;
+	vm_map_t map = &vm->vm_map;
+	vm_map_entry_t entry;
 	vaddr_t start, end, maxstack;
 	struct coreseg cseg;
 	off_t offset;
diff --git a/sys/uvm/uvm_user.c b/sys/uvm/uvm_user.c
index 502d2aca440..01677547711 100644
--- a/sys/uvm/uvm_user.c
+++ b/sys/uvm/uvm_user.c
@@ -1,5 +1,5 @@
-/*	$OpenBSD: uvm_user.c,v 1.8 2001/11/28 19:28:15 art Exp $	*/
-/*	$NetBSD: uvm_user.c,v 1.10 2001/06/02 18:09:27 chs Exp $	*/
+/*	$OpenBSD: uvm_user.c,v 1.9 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_user.c,v 1.8 2000/06/27 17:29:37 mrg Exp $	*/
 
 /*
  *
@@ -50,15 +50,19 @@
  * uvm_deallocate: deallocate memory (unmap)
  */
 
-void
+int
 uvm_deallocate(map, start, size)
-	struct vm_map *map;
+	vm_map_t map;
 	vaddr_t start;
 	vsize_t size;
 {
 
-	if (size == 0)
-		return;
+	if (map == NULL)
+		panic("uvm_deallocate with null map");
+
+	if (size == (vaddr_t) 0)
+		return (KERN_SUCCESS);
+
+	return(uvm_unmap(map, trunc_page(start), round_page(start+size)));
 
-	uvm_unmap(map, trunc_page(start), round_page(start + size));
 }
diff --git a/sys/uvm/uvm_vnode.c b/sys/uvm/uvm_vnode.c
index d58d0cf93f4..4783597df3d 100644
--- a/sys/uvm/uvm_vnode.c
+++ b/sys/uvm/uvm_vnode.c
@@ -1,10 +1,10 @@
-/*	$OpenBSD: uvm_vnode.c,v 1.31 2001/12/10 02:19:34 art Exp $	*/
-/*	$NetBSD: uvm_vnode.c,v 1.51 2001/08/17 05:53:02 chs Exp $	*/
+/*	$OpenBSD: uvm_vnode.c,v 1.32 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_vnode.c,v 1.36 2000/11/24 20:34:01 chs Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
  * Copyright (c) 1991, 1993
- *      The Regents of the University of California.
+ *      The Regents of the University of California.  
  * Copyright (c) 1990 University of Utah.
  *
  * All rights reserved.
@@ -24,7 +24,7 @@
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Charles D. Cranor,
- *	Washington University, the University of California, Berkeley and
+ *	Washington University, the University of California, Berkeley and 
  *	its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
@@ -52,7 +52,6 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/vnode.h>
@@ -60,47 +59,62 @@
 #include <sys/ioctl.h>
 #include <sys/fcntl.h>
 #include <sys/conf.h>
-#include <sys/pool.h>
-#include <sys/mount.h>
 
 #include <miscfs/specfs/specdev.h>
 
 #include <uvm/uvm.h>
+#include <uvm/uvm_vnode.h>
+
+/*
+ * private global data structure
+ *
+ * we keep a list of writeable active vnode-backed VM objects for sync op.
+ * we keep a simpleq of vnodes that are currently being sync'd.
+ */
+
+LIST_HEAD(uvn_list_struct, uvm_vnode);
+static struct uvn_list_struct uvn_wlist;	/* writeable uvns */
+static simple_lock_data_t uvn_wl_lock;		/* locks uvn_wlist */
+
+SIMPLEQ_HEAD(uvn_sq_struct, uvm_vnode);
+static struct uvn_sq_struct uvn_sync_q;		/* sync'ing uvns */
+lock_data_t uvn_sync_lock;			/* locks sync operation */
 
 /*
  * functions
  */
 
-static void		uvn_cluster __P((struct uvm_object *, voff_t, voff_t *,
-					 voff_t *));
-static void		uvn_detach __P((struct uvm_object *));
-static int		uvn_findpage __P((struct uvm_object *, voff_t,
-					  struct vm_page **, int));
-boolean_t		uvn_flush __P((struct uvm_object *, voff_t, voff_t,
-				       int));
-int			uvn_get __P((struct uvm_object *, voff_t,
-				     struct vm_page **, int *, int, vm_prot_t,
-				     int, int));
-int			uvn_put __P((struct uvm_object *, struct vm_page **,
-				     int, boolean_t));
-static void		uvn_reference __P((struct uvm_object *));
-static boolean_t	uvn_releasepg __P((struct vm_page *,
-					   struct vm_page **));
+static void		   uvn_cluster __P((struct uvm_object *, voff_t,
+					   voff_t *, voff_t *));
+static void                uvn_detach __P((struct uvm_object *));
+static boolean_t           uvn_flush __P((struct uvm_object *, voff_t, 
+					 voff_t, int));
+static int                 uvn_get __P((struct uvm_object *, voff_t,
+					vm_page_t *, int *, int, 
+					vm_prot_t, int, int));
+static void		   uvn_init __P((void));
+static int		   uvn_io __P((struct uvm_vnode *, vm_page_t *,
+				      int, int, int));
+static int		   uvn_put __P((struct uvm_object *, vm_page_t *,
+					int, boolean_t));
+static void                uvn_reference __P((struct uvm_object *));
+static boolean_t	   uvn_releasepg __P((struct vm_page *, 
+					      struct vm_page **));
 
 /*
  * master pager structure
  */
 
 struct uvm_pagerops uvm_vnodeops = {
-	NULL,
+	uvn_init,
 	uvn_reference,
 	uvn_detach,
-	NULL,
+	NULL,			/* no specialized fault routine required */
 	uvn_flush,
 	uvn_get,
 	uvn_put,
 	uvn_cluster,
-	uvm_mk_pcluster,
+	uvm_mk_pcluster, /* use generic version of this: see uvm_pager.c */
 	uvn_releasepg,
 };
 
@@ -109,6 +123,22 @@ struct uvm_pagerops uvm_vnodeops = {
  */
 
 /*
+ * uvn_init
+ *
+ * init pager private data structures.
+ */
+
+static void
+uvn_init()
+{
+
+	LIST_INIT(&uvn_wlist);
+	simple_lock_init(&uvn_wl_lock);
+	/* note: uvn_sync_q init'd in uvm_vnp_sync() */
+	lockinit(&uvn_sync_lock, PVM, "uvnsync", 0, 0);
+}
+
+/*
  * uvn_attach
  *
  * attach a vnode structure to a VM object.  if the vnode is already
@@ -129,26 +159,29 @@ uvn_attach(arg, accessprot)
 	vm_prot_t accessprot;
 {
 	struct vnode *vp = arg;
-	struct uvm_object *uobj = &vp->v_uobj;
+	struct uvm_vnode *uvn = &vp->v_uvm;
 	struct vattr vattr;
-	int result;
+	int oldflags, result;
 	struct partinfo pi;
-	voff_t used_vnode_size;
+	u_quad_t used_vnode_size;
 	UVMHIST_FUNC("uvn_attach"); UVMHIST_CALLED(maphist);
 
 	UVMHIST_LOG(maphist, "(vn=0x%x)", arg,0,0,0);
-	used_vnode_size = (voff_t)0;
+
+	used_vnode_size = (u_quad_t)0;	/* XXX gcc -Wuninitialized */
 
 	/*
 	 * first get a lock on the uvn.
 	 */
-	simple_lock(uobj->vmobjlock);
-	while (vp->v_flag & VXLOCK) {
-		vp->v_flag |= VXWANT;
+	simple_lock(&uvn->u_obj.vmobjlock);
+	while (uvn->u_flags & UVM_VNODE_BLOCKED) {
+		printf("uvn_attach: blocked at 0x%p flags 0x%x\n",
+		    uvn, uvn->u_flags);
+		uvn->u_flags |= UVM_VNODE_WANTED;
 		UVMHIST_LOG(maphist, "  SLEEPING on blocked vn",0,0,0,0);
-		UVM_UNLOCK_AND_WAIT(vp, &uobj->vmobjlock, FALSE,
+		UVM_UNLOCK_AND_WAIT(uvn, &uvn->u_obj.vmobjlock, FALSE,
 		    "uvn_attach", 0);
-		simple_lock(&uobj->vmobjlock);
+		simple_lock(&uvn->u_obj.vmobjlock);
 		UVMHIST_LOG(maphist,"  WOKE UP",0,0,0,0);
 	}
 
@@ -156,21 +189,56 @@ uvn_attach(arg, accessprot)
 	 * if we're mapping a BLK device, make sure it is a disk.
 	 */
 	if (vp->v_type == VBLK && bdevsw[major(vp->v_rdev)].d_type != D_DISK) {
-		simple_unlock(&uobj->vmobjlock);
+		simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock */
 		UVMHIST_LOG(maphist,"<- done (VBLK not D_DISK!)", 0,0,0,0);
 		return(NULL);
 	}
-	KASSERT(vp->v_type == VREG || vp->v_type == VBLK);
 
 	/*
-	 * set up our idea of the size
-	 * if this hasn't been done already.
+	 * now we have lock and uvn must not be in a blocked state.
+	 * first check to see if it is already active, in which case
+	 * we can bump the reference count, check to see if we need to
+	 * add it to the writeable list, and then return.
 	 */
-	if (vp->v_size == VSIZENOTSET) {
+	if (uvn->u_flags & UVM_VNODE_VALID) {	/* already active? */
+
+		/* regain VREF if we were persisting */
+		if (uvn->u_obj.uo_refs == 0) {
+			VREF(vp);
+			UVMHIST_LOG(maphist," VREF (reclaim persisting vnode)",
+			    0,0,0,0);
+		}
+		uvn->u_obj.uo_refs++;		/* bump uvn ref! */
+
+		/* check for new writeable uvn */
+		if ((accessprot & VM_PROT_WRITE) != 0 && 
+		    (uvn->u_flags & UVM_VNODE_WRITEABLE) == 0) {
+			simple_lock(&uvn_wl_lock);
+			LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist);
+			simple_unlock(&uvn_wl_lock);
+			/* we are now on wlist! */
+			uvn->u_flags |= UVM_VNODE_WRITEABLE;
+		}
+
+		/* unlock and return */
+		simple_unlock(&uvn->u_obj.vmobjlock);
+		UVMHIST_LOG(maphist,"<- done, refcnt=%d", uvn->u_obj.uo_refs,
+		    0, 0, 0);
+		return (&uvn->u_obj);
+	} 
 
-	vp->v_flag |= VXLOCK;
-	simple_unlock(&uobj->vmobjlock); /* drop lock in case we sleep */
+	/*
+	 * need to call VOP_GETATTR() to get the attributes, but that could
+	 * block (due to I/O), so we want to unlock the object before calling.
+	 * however, we want to keep anyone else from playing with the object
+	 * while it is unlocked.   to do this we set UVM_VNODE_ALOCK which
+	 * prevents anyone from attaching to the vnode until we are done with
+	 * it.
+	 */
+	uvn->u_flags = UVM_VNODE_ALOCK;
+	simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock in case we sleep */
 		/* XXX: curproc? */
+
 	if (vp->v_type == VBLK) {
 		/*
 		 * We could implement this as a specfs getattr call, but:
@@ -184,8 +252,8 @@ uvn_attach(arg, accessprot)
 		    DIOCGPART, (caddr_t)&pi, FREAD, curproc);
 		if (result == 0) {
 			/* XXX should remember blocksize */
-			used_vnode_size = (voff_t)pi.disklab->d_secsize *
-			    (voff_t)pi.part->p_size;
+			used_vnode_size = (u_quad_t)pi.disklab->d_secsize *
+			    (u_quad_t)pi.part->p_size;
 		}
 	} else {
 		result = VOP_GETATTR(vp, &vattr, curproc->p_ucred, curproc);
@@ -194,26 +262,58 @@ uvn_attach(arg, accessprot)
 	}
 
 	/* relock object */
-	simple_lock(&uobj->vmobjlock);
-
-	if (vp->v_flag & VXWANT)
-		wakeup(vp);
-	vp->v_flag &= ~(VXLOCK|VXWANT);
+	simple_lock(&uvn->u_obj.vmobjlock); 
 
 	if (result != 0) {
-		simple_unlock(&uobj->vmobjlock); /* drop lock */
+		if (uvn->u_flags & UVM_VNODE_WANTED)
+			wakeup(uvn);
+		uvn->u_flags = 0;
+		simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock */
 		UVMHIST_LOG(maphist,"<- done (VOP_GETATTR FAILED!)", 0,0,0,0);
 		return(NULL);
 	}
-	vp->v_size = used_vnode_size;
 
+	/*
+	 * make sure that the newsize fits within a vaddr_t
+	 * XXX: need to revise addressing data types
+	 */
+#ifdef DEBUG
+	if (vp->v_type == VBLK)
+		printf("used_vnode_size = %llu\n", (long long)used_vnode_size);
+#endif
+
+	/*
+	 * now set up the uvn.
+	 */
+	uvn->u_obj.pgops = &uvm_vnodeops;
+	TAILQ_INIT(&uvn->u_obj.memq);
+	uvn->u_obj.uo_npages = 0;
+	uvn->u_obj.uo_refs = 1;			/* just us... */
+	oldflags = uvn->u_flags;
+	uvn->u_flags = UVM_VNODE_VALID|UVM_VNODE_CANPERSIST;
+	uvn->u_nio = 0;
+	uvn->u_size = used_vnode_size;
+
+	/* if write access, we need to add it to the wlist */
+	if (accessprot & VM_PROT_WRITE) {
+		simple_lock(&uvn_wl_lock);
+		LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist);
+		simple_unlock(&uvn_wl_lock);
+		uvn->u_flags |= UVM_VNODE_WRITEABLE;	/* we are on wlist! */
 	}
 
-	/* unlock and return */
-	simple_unlock(&uobj->vmobjlock);
-	UVMHIST_LOG(maphist,"<- done, refcnt=%d", uvn->u_obj.uo_refs,
-	    0, 0, 0);
-	return (uobj);
+	/*
+	 * add a reference to the vnode.   this reference will stay as long
+	 * as there is a valid mapping of the vnode.   dropped when the
+	 * reference count goes to zero [and we either free or persist].
+	 */
+	VREF(vp);
+	simple_unlock(&uvn->u_obj.vmobjlock);
+	if (oldflags & UVM_VNODE_WANTED)
+		wakeup(uvn);
+
+	UVMHIST_LOG(maphist,"<- done/VREF, ret 0x%x", &uvn->u_obj,0,0,0);
+	return(&uvn->u_obj);
 }
 
 
@@ -221,10 +321,10 @@ uvn_attach(arg, accessprot)
  * uvn_reference
  *
  * duplicate a reference to a VM object.  Note that the reference
- * count must already be at least one (the passed in reference) so
+ * count must already be at least one (the passed in reference) so 
  * there is no chance of the uvn being killed or locked out here.
  *
- * => caller must call with object unlocked.
+ * => caller must call with object unlocked.  
  * => caller must be using the same accessprot as was used at attach time
  */
 
@@ -233,7 +333,23 @@ static void
 uvn_reference(uobj)
 	struct uvm_object *uobj;
 {
-	VREF((struct vnode *)uobj);
+#ifdef DEBUG
+	struct uvm_vnode *uvn = (struct uvm_vnode *) uobj;
+#endif
+	UVMHIST_FUNC("uvn_reference"); UVMHIST_CALLED(maphist);
+
+	simple_lock(&uobj->vmobjlock);
+#ifdef DEBUG
+	if ((uvn->u_flags & UVM_VNODE_VALID) == 0) {
+		printf("uvn_reference: ref=%d, flags=0x%x\n", uvn->u_flags,
+		    uobj->uo_refs);
+		panic("uvn_reference: invalid state");
+	}
+#endif
+	uobj->uo_refs++;
+	UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)", 
+	uobj, uobj->uo_refs,0,0);
+	simple_unlock(&uobj->vmobjlock);
 }
 
 /*
@@ -242,12 +358,298 @@ uvn_reference(uobj)
  * remove a reference to a VM object.
  *
  * => caller must call with object unlocked and map locked.
+ * => this starts the detach process, but doesn't have to finish it
+ *    (async i/o could still be pending).
  */
 static void
 uvn_detach(uobj)
 	struct uvm_object *uobj;
 {
-	vrele((struct vnode *)uobj);
+	struct uvm_vnode *uvn;
+	struct vnode *vp;
+	int oldflags;
+	UVMHIST_FUNC("uvn_detach"); UVMHIST_CALLED(maphist);
+
+	simple_lock(&uobj->vmobjlock);
+
+	UVMHIST_LOG(maphist,"  (uobj=0x%x)  ref=%d", uobj,uobj->uo_refs,0,0);
+	uobj->uo_refs--;			/* drop ref! */
+	if (uobj->uo_refs) {			/* still more refs */
+		simple_unlock(&uobj->vmobjlock);
+		UVMHIST_LOG(maphist, "<- done (rc>0)", 0,0,0,0);
+		return;
+	}
+
+	/*
+	 * get other pointers ...
+	 */
+
+	uvn = (struct uvm_vnode *) uobj;
+	vp = (struct vnode *) uobj;
+
+	/*
+	 * clear VTEXT flag now that there are no mappings left (VTEXT is used
+	 * to keep an active text file from being overwritten).
+	 */
+	vp->v_flag &= ~VTEXT;
+
+	/*
+	 * we just dropped the last reference to the uvn.   see if we can
+	 * let it "stick around".
+	 */
+
+	if (uvn->u_flags & UVM_VNODE_CANPERSIST) {
+		/* won't block */
+		uvn_flush(uobj, 0, 0, PGO_DEACTIVATE|PGO_ALLPAGES);
+		simple_unlock(&uobj->vmobjlock);
+		vrele(vp);			/* drop vnode reference */
+		UVMHIST_LOG(maphist,"<- done/vrele!  (persist)", 0,0,0,0);
+		return;
+	}
+
+	/*
+	 * its a goner!
+	 */
+
+	UVMHIST_LOG(maphist,"  its a goner (flushing)!", 0,0,0,0);
+
+	uvn->u_flags |= UVM_VNODE_DYING;
+
+	/*
+	 * even though we may unlock in flush, no one can gain a reference
+	 * to us until we clear the "dying" flag [because it blocks
+	 * attaches].  we will not do that until after we've disposed of all
+	 * the pages with uvn_flush().  note that before the flush the only
+	 * pages that could be marked PG_BUSY are ones that are in async
+	 * pageout by the daemon.  (there can't be any pending "get"'s
+	 * because there are no references to the object).
+	 */
+
+	(void) uvn_flush(uobj, 0, 0, PGO_CLEANIT|PGO_FREE|PGO_ALLPAGES);
+
+	UVMHIST_LOG(maphist,"  its a goner (done flush)!", 0,0,0,0);
+
+	/*
+	 * given the structure of this pager, the above flush request will
+	 * create the following state: all the pages that were in the object
+	 * have either been free'd or they are marked PG_BUSY|PG_RELEASED.
+	 * the PG_BUSY bit was set either by us or the daemon for async I/O.
+	 * in either case, if we have pages left we can't kill the object
+	 * yet because i/o is pending.  in this case we set the "relkill"
+	 * flag which will cause pgo_releasepg to kill the object once all
+	 * the I/O's are done [pgo_releasepg will be called from the aiodone
+	 * routine or from the page daemon].
+	 */
+
+	if (uobj->uo_npages) {		/* I/O pending.  iodone will free */
+#ifdef DEBUG
+		/* 
+		 * XXXCDC: very unlikely to happen until we have async i/o
+		 * so print a little info message in case it does.
+		 */
+		printf("uvn_detach: vn %p has pages left after flush - "
+		    "relkill mode\n", uobj);
+#endif
+		uvn->u_flags |= UVM_VNODE_RELKILL;
+		simple_unlock(&uobj->vmobjlock);
+		UVMHIST_LOG(maphist,"<- done! (releasepg will kill obj)", 0, 0,
+		    0, 0);
+		return;
+	}
+
+	/*
+	 * kill object now.   note that we can't be on the sync q because
+	 * all references are gone.
+	 */
+	if (uvn->u_flags & UVM_VNODE_WRITEABLE) {
+		simple_lock(&uvn_wl_lock);		/* protect uvn_wlist */
+		LIST_REMOVE(uvn, u_wlist);
+		simple_unlock(&uvn_wl_lock);
+	}
+#ifdef DIAGNOSTIC
+	if (uobj->memq.tqh_first != NULL)
+		panic("uvn_deref: vnode VM object still has pages afer "
+		    "syncio/free flush");
+#endif
+	oldflags = uvn->u_flags;
+	uvn->u_flags = 0;
+	simple_unlock(&uobj->vmobjlock);
+	
+	/* wake up any sleepers */
+	if (oldflags & UVM_VNODE_WANTED)
+		wakeup(uvn);
+
+	/*
+	 * drop our reference to the vnode.
+	 */
+	vrele(vp);
+	UVMHIST_LOG(maphist,"<- done (vrele) final", 0,0,0,0);
+
+	return;
+}
+
+/*
+ * uvm_vnp_terminate: external hook to clear out a vnode's VM
+ *
+ * called in two cases:
+ *  [1] when a persisting vnode vm object (i.e. one with a zero reference
+ *      count) needs to be freed so that a vnode can be reused.  this
+ *      happens under "getnewvnode" in vfs_subr.c.   if the vnode from
+ *      the free list is still attached (i.e. not VBAD) then vgone is
+ *	called.   as part of the vgone trace this should get called to
+ *	free the vm object.   this is the common case.
+ *  [2] when a filesystem is being unmounted by force (MNT_FORCE, 
+ *	"umount -f") the vgone() function is called on active vnodes
+ *	on the mounted file systems to kill their data (the vnodes become
+ *	"dead" ones [see src/sys/miscfs/deadfs/...]).  that results in a
+ *	call here (even if the uvn is still in use -- i.e. has a non-zero
+ *	reference count).  this case happens at "umount -f" and during a
+ *	"reboot/halt" operation.
+ *
+ * => the caller must XLOCK and VOP_LOCK the vnode before calling us
+ *	[protects us from getting a vnode that is already in the DYING
+ *	 state...]
+ * => unlike uvn_detach, this function must not return until all the
+ *	uvn's pages are disposed of.
+ * => in case [2] the uvn is still alive after this call, but all I/O
+ *	ops will fail (due to the backing vnode now being "dead").  this
+ *	will prob. kill any process using the uvn due to pgo_get failing.
+ */
+
+void
+uvm_vnp_terminate(vp)
+	struct vnode *vp;
+{
+	struct uvm_vnode *uvn = &vp->v_uvm;
+	int oldflags;
+	UVMHIST_FUNC("uvm_vnp_terminate"); UVMHIST_CALLED(maphist);
+
+	/*
+	 * lock object and check if it is valid
+	 */
+	simple_lock(&uvn->u_obj.vmobjlock);
+	UVMHIST_LOG(maphist, "  vp=0x%x, ref=%d, flag=0x%x", vp,
+	    uvn->u_obj.uo_refs, uvn->u_flags, 0);
+	if ((uvn->u_flags & UVM_VNODE_VALID) == 0) {
+		simple_unlock(&uvn->u_obj.vmobjlock);
+		UVMHIST_LOG(maphist, "<- done (not active)", 0, 0, 0, 0);
+		return;
+	}
+
+	/*
+	 * must be a valid uvn that is not already dying (because XLOCK
+	 * protects us from that).   the uvn can't in the ALOCK state
+	 * because it is valid, and uvn's that are in the ALOCK state haven't
+	 * been marked valid yet.
+	 */
+
+#ifdef DEBUG
+	/*
+	 * debug check: are we yanking the vnode out from under our uvn?
+	 */
+	if (uvn->u_obj.uo_refs) {
+		printf("uvm_vnp_terminate(%p): terminating active vnode "
+		    "(refs=%d)\n", uvn, uvn->u_obj.uo_refs);
+	} 
+#endif
+	
+	/*
+	 * it is possible that the uvn was detached and is in the relkill
+	 * state [i.e. waiting for async i/o to finish so that releasepg can
+	 * kill object].  we take over the vnode now and cancel the relkill.
+	 * we want to know when the i/o is done so we can recycle right
+	 * away.   note that a uvn can only be in the RELKILL state if it
+	 * has a zero reference count.
+	 */
+	
+	if (uvn->u_flags & UVM_VNODE_RELKILL)
+		uvn->u_flags &= ~UVM_VNODE_RELKILL;	/* cancel RELKILL */
+
+	/*
+	 * block the uvn by setting the dying flag, and then flush the
+	 * pages.  (note that flush may unlock object while doing I/O, but
+	 * it will re-lock it before it returns control here).
+	 *
+	 * also, note that we tell I/O that we are already VOP_LOCK'd so
+	 * that uvn_io doesn't attempt to VOP_LOCK again.
+	 *
+	 * XXXCDC: setting VNISLOCKED on an active uvn which is being terminated
+	 *	due to a forceful unmount might not be a good idea.  maybe we
+	 *	need a way to pass in this info to uvn_flush through a
+	 *	pager-defined PGO_ constant [currently there are none].
+	 */
+	uvn->u_flags |= UVM_VNODE_DYING|UVM_VNODE_VNISLOCKED;
+
+	(void) uvn_flush(&uvn->u_obj, 0, 0, PGO_CLEANIT|PGO_FREE|PGO_ALLPAGES);
+
+	/*
+	 * as we just did a flush we expect all the pages to be gone or in 
+	 * the process of going.  sleep to wait for the rest to go [via iosync].
+	 */
+
+	while (uvn->u_obj.uo_npages) {
+#ifdef DEBUG
+		struct vm_page *pp;
+		for (pp = uvn->u_obj.memq.tqh_first ; pp != NULL ; 
+		     pp = pp->listq.tqe_next) {
+			if ((pp->flags & PG_BUSY) == 0)
+				panic("uvm_vnp_terminate: detected unbusy pg");
+		}
+		if (uvn->u_nio == 0)
+			panic("uvm_vnp_terminate: no I/O to wait for?");
+		printf("uvm_vnp_terminate: waiting for I/O to fin.\n");
+		/* 
+		 * XXXCDC: this is unlikely to happen without async i/o so we 
+		 * put a printf in just to keep an eye on it.
+		 */
+#endif
+		uvn->u_flags |= UVM_VNODE_IOSYNC;
+		UVM_UNLOCK_AND_WAIT(&uvn->u_nio, &uvn->u_obj.vmobjlock, FALSE, 
+		    "uvn_term",0);
+		simple_lock(&uvn->u_obj.vmobjlock);
+	}
+
+	/*
+	 * done.   now we free the uvn if its reference count is zero
+	 * (true if we are zapping a persisting uvn).   however, if we are
+	 * terminating a uvn with active mappings we let it live ... future
+	 * calls down to the vnode layer will fail.
+	 */
+
+	oldflags = uvn->u_flags;
+	if (uvn->u_obj.uo_refs) {
+
+		/*
+		 * uvn must live on it is dead-vnode state until all references 
+		 * are gone.   restore flags.    clear CANPERSIST state.
+		 */
+
+		uvn->u_flags &= ~(UVM_VNODE_DYING|UVM_VNODE_VNISLOCKED|
+		      UVM_VNODE_WANTED|UVM_VNODE_CANPERSIST);
+	
+	} else {
+
+		/*
+		 * free the uvn now.   note that the VREF reference is already
+		 * gone [it is dropped when we enter the persist state].
+		 */
+		if (uvn->u_flags & UVM_VNODE_IOSYNCWANTED)
+			panic("uvm_vnp_terminate: io sync wanted bit set");
+
+		if (uvn->u_flags & UVM_VNODE_WRITEABLE) {
+			simple_lock(&uvn_wl_lock);
+			LIST_REMOVE(uvn, u_wlist);
+			simple_unlock(&uvn_wl_lock);
+		}
+		uvn->u_flags = 0;	/* uvn is history, clear all bits */
+	}
+
+	if (oldflags & UVM_VNODE_WANTED)
+		wakeup(uvn);		/* object lock still held */
+
+	simple_unlock(&uvn->u_obj.vmobjlock);
+	UVMHIST_LOG(maphist, "<- done", 0, 0, 0, 0);
+
 }
 
 /*
@@ -260,7 +662,7 @@ uvn_detach(uobj)
  * => returns TRUE if page's object is still alive, FALSE if we
  *	killed the page's object.    if we return TRUE, then we
  *	return with the object locked.
- * => if (nextpgp != NULL) => we return the next page on the queue, and return
+ * => if (nextpgp != NULL) => we return pageq.tqe_next here, and return
  *				with the page queues locked [for pagedaemon]
  * => if (nextpgp == NULL) => we return with page queues unlocked [normal case]
  * => we kill the uvn if it is not referenced and we are suppose to
@@ -272,33 +674,76 @@ uvn_releasepg(pg, nextpgp)
 	struct vm_page *pg;
 	struct vm_page **nextpgp;	/* OUT */
 {
-	KASSERT(pg->flags & PG_RELEASED);
-
+	struct uvm_vnode *uvn = (struct uvm_vnode *) pg->uobject;
+#ifdef DIAGNOSTIC
+	if ((pg->flags & PG_RELEASED) == 0)
+		panic("uvn_releasepg: page not released!");
+#endif
+	
 	/*
 	 * dispose of the page [caller handles PG_WANTED]
 	 */
 	pmap_page_protect(pg, VM_PROT_NONE);
 	uvm_lock_pageq();
 	if (nextpgp)
-		*nextpgp = TAILQ_NEXT(pg, pageq);
+		*nextpgp = pg->pageq.tqe_next;	/* next page for daemon */
 	uvm_pagefree(pg);
 	if (!nextpgp)
 		uvm_unlock_pageq();
 
+	/*
+	 * now see if we need to kill the object
+	 */
+	if (uvn->u_flags & UVM_VNODE_RELKILL) {
+		if (uvn->u_obj.uo_refs)
+			panic("uvn_releasepg: kill flag set on referenced "
+			    "object!");
+		if (uvn->u_obj.uo_npages == 0) {
+			if (uvn->u_flags & UVM_VNODE_WRITEABLE) {
+				simple_lock(&uvn_wl_lock);
+				LIST_REMOVE(uvn, u_wlist);
+				simple_unlock(&uvn_wl_lock);
+			}
+#ifdef DIAGNOSTIC
+			if (uvn->u_obj.memq.tqh_first)
+	panic("uvn_releasepg: pages in object with npages == 0");
+#endif
+			if (uvn->u_flags & UVM_VNODE_WANTED)
+				/* still holding object lock */
+				wakeup(uvn);
+
+			uvn->u_flags = 0;		/* DEAD! */
+			simple_unlock(&uvn->u_obj.vmobjlock);
+			return (FALSE);
+		}
+	}
 	return (TRUE);
 }
 
 /*
+ * NOTE: currently we have to use VOP_READ/VOP_WRITE because they go
+ * through the buffer cache and allow I/O in any size.  These VOPs use
+ * synchronous i/o.  [vs. VOP_STRATEGY which can be async, but doesn't
+ * go through the buffer cache or allow I/O sizes larger than a
+ * block].  we will eventually want to change this.
+ *
  * issues to consider:
+ *   uvm provides the uvm_aiodesc structure for async i/o management.
  * there are two tailq's in the uvm. structure... one for pending async
  * i/o and one for "done" async i/o.   to do an async i/o one puts
- * a buf on the "pending" list (protected by splbio()), starts the
- * i/o and returns 0.    when the i/o is done, we expect
+ * an aiodesc on the "pending" list (protected by splbio()), starts the
+ * i/o and returns VM_PAGER_PEND.    when the i/o is done, we expect
  * some sort of "i/o done" function to be called (at splbio(), interrupt
- * time).   this function should remove the buf from the pending list
+ * time).   this function should remove the aiodesc from the pending list
  * and place it on the "done" list and wakeup the daemon.   the daemon
  * will run at normal spl() and will remove all items from the "done"
- * list and call the iodone hook for each done request (see uvm_pager.c).
+ * list and call the "aiodone" hook for each done request (see uvm_pager.c).
+ * [in the old vm code, this was done by calling the "put" routine with
+ * null arguments which made the code harder to read and understand because
+ * you had one function ("put") doing two things.]  
+ *
+ * so the current pager needs: 
+ *   int uvn_aiodone(struct uvm_aiodesc *)
  *
  * => return KERN_SUCCESS (aio finished, free it).  otherwise requeue for
  *	later collection.
@@ -319,17 +764,15 @@ uvn_releasepg(pg, nextpgp)
 /*
  * uvn_flush: flush pages out of a uvm object.
  *
- * => "stop == 0" means flush all pages at or after "start".
  * => object should be locked by caller.   we may _unlock_ the object
- *	if (and only if) we need to clean a page (PGO_CLEANIT), or
- *	if PGO_SYNCIO is set and there are pages busy.
+ *	if (and only if) we need to clean a page (PGO_CLEANIT).
  *	we return with the object locked.
- * => if PGO_CLEANIT or PGO_SYNCIO is set, we may block (due to I/O).
- *	thus, a caller might want to unlock higher level resources
- *	(e.g. vm_map) before calling flush.
- * => if neither PGO_CLEANIT nor PGO_SYNCIO is set, then we will neither
- *	unlock the object nor block.
- * => if PGO_ALLPAGES is set, then all pages in the object are valid targets
+ * => if PGO_CLEANIT is set, we may block (due to I/O).   thus, a caller
+ *	might want to unlock higher level resources (e.g. vm_map)
+ *	before calling flush.
+ * => if PGO_CLEANIT is not set, then we will neither unlock the object
+ *	or block.
+ * => if PGO_ALLPAGE is set, then all pages in the object are valid targets
  *	for flushing.
  * => NOTE: we rely on the fact that the object's memq is a TAILQ and
  *	that new pages are inserted on the tail end of the list.   thus,
@@ -349,9 +792,9 @@ uvn_releasepg(pg, nextpgp)
  *	in, then it can not be dirty (!PG_CLEAN) because no one has
  *	had a chance to modify it yet.    if the PG_BUSY page is being
  *	paged out then it means that someone else has already started
- *	cleaning the page for us (how nice!).    in this case, if we
+ *	cleaning the page for us (how nice!).    in this case, if we 
  *	have syncio specified, then after we make our pass through the
- *	object we need to wait for the other PG_BUSY pages to clear
+ *	object we need to wait for the other PG_BUSY pages to clear 
  *	off (i.e. we need to do an iosync).   also note that once a
  *	page is PG_BUSY it must stay in its object until it is un-busyed.
  *
@@ -359,76 +802,53 @@ uvn_releasepg(pg, nextpgp)
  *	we can traverse the pages in an object either by going down the
  *	linked list in "uobj->memq", or we can go over the address range
  *	by page doing hash table lookups for each address.    depending
- *	on how many pages are in the object it may be cheaper to do one
+ *	on how many pages are in the object it may be cheaper to do one 
  *	or the other.   we set "by_list" to true if we are using memq.
  *	if the cost of a hash lookup was equal to the cost of the list
  *	traversal we could compare the number of pages in the start->stop
  *	range to the total number of pages in the object.   however, it
  *	seems that a hash table lookup is more expensive than the linked
- *	list traversal, so we multiply the number of pages in the
+ *	list traversal, so we multiply the number of pages in the 
  *	start->stop range by a penalty which we define below.
  */
 
 #define UVN_HASH_PENALTY 4	/* XXX: a guess */
 
-boolean_t
+static boolean_t
 uvn_flush(uobj, start, stop, flags)
 	struct uvm_object *uobj;
 	voff_t start, stop;
 	int flags;
 {
-	struct vnode *vp = (struct vnode *)uobj;
+	struct uvm_vnode *uvn = (struct uvm_vnode *) uobj;
 	struct vm_page *pp, *ppnext, *ptmp;
-	struct vm_page *pps[256], **ppsp;
-	int s;
+	struct vm_page *pps[MAXBSIZE >> PAGE_SHIFT], **ppsp;
 	int npages, result, lcv;
-	boolean_t retval, need_iosync, by_list, needs_clean, all, wasclean;
-	boolean_t async = (flags & PGO_SYNCIO) == 0;
+	boolean_t retval, need_iosync, by_list, needs_clean, all;
 	voff_t curoff;
 	u_short pp_version;
 	UVMHIST_FUNC("uvn_flush"); UVMHIST_CALLED(maphist);
-	UVMHIST_LOG(maphist, "uobj %p start 0x%x stop 0x%x flags 0x%x",
-		    uobj, start, stop, flags);
-	KASSERT(flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE));
-
-	if (uobj->uo_npages == 0) {
-		s = splbio();
-		if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
-		    (vp->v_bioflag & VBIOONSYNCLIST)) {
-			vp->v_bioflag &= ~VBIOONSYNCLIST;
-			LIST_REMOVE(vp, v_synclist);
-		}
-		splx(s);
-		return TRUE;
-	}
-
-#ifdef DIAGNOSTIC
-	if (vp->v_size == VSIZENOTSET) {
-		printf("uvn_flush: size not set vp %p\n", vp);
-		vprint("uvn_flush VSIZENOTSET", vp);
-		flags |= PGO_ALLPAGES;
-	}
-#endif
 
+	curoff = 0;	/* XXX: shut up gcc */
 	/*
 	 * get init vals and determine how we are going to traverse object
 	 */
 
-	if (stop == 0) {
-		stop = trunc_page(LLONG_MAX);
-	}
-	curoff = 0;
 	need_iosync = FALSE;
-	retval = TRUE;
-	wasclean = TRUE;
+	retval = TRUE;		/* return value */
 	if (flags & PGO_ALLPAGES) {
 		all = TRUE;
-		by_list = TRUE;
+		by_list = TRUE;		/* always go by the list */
 	} else {
 		start = trunc_page(start);
 		stop = round_page(stop);
+#ifdef DEBUG
+		if (stop > round_page(uvn->u_size))
+			printf("uvn_flush: strange, got an out of range "
+			    "flush (fixed)\n");
+#endif
 		all = FALSE;
-		by_list = (uobj->uo_npages <=
+		by_list = (uobj->uo_npages <= 
 		    ((stop - start) >> PAGE_SHIFT) * UVN_HASH_PENALTY);
 	}
 
@@ -450,7 +870,8 @@ uvn_flush(uobj, start, stop, flags)
 	if ((flags & PGO_CLEANIT) != 0 &&
 	    uobj->pgops->pgo_mk_pcluster != NULL) {
 		if (by_list) {
-			TAILQ_FOREACH(pp, &uobj->memq, listq) {
+			for (pp = uobj->memq.tqh_first ; pp != NULL ;
+			    pp = pp->listq.tqe_next) {
 				if (!all &&
 				    (pp->offset < start || pp->offset >= stop))
 					continue;
@@ -474,39 +895,52 @@ uvn_flush(uobj, start, stop, flags)
 	 */
 
 	if (by_list) {
-		pp = TAILQ_FIRST(&uobj->memq);
+		pp = uobj->memq.tqh_first;
 	} else {
 		curoff = start;
 		pp = uvm_pagelookup(uobj, curoff);
 	}
 
-	ppnext = NULL;
-	ppsp = NULL;
-	uvm_lock_pageq();
+	ppnext = NULL;	/* XXX: shut up gcc */ 
+	ppsp = NULL;		/* XXX: shut up gcc */
+	uvm_lock_pageq();	/* page queues locked */
 
 	/* locked: both page queues and uobj */
-	for ( ; (by_list && pp != NULL) ||
-		      (!by_list && curoff < stop) ; pp = ppnext) {
+	for ( ; (by_list && pp != NULL) || 
+	  (!by_list && curoff < stop) ; pp = ppnext) {
+
 		if (by_list) {
+
+			/*
+			 * range check
+			 */
+
 			if (!all &&
 			    (pp->offset < start || pp->offset >= stop)) {
-				ppnext = TAILQ_NEXT(pp, listq);
+				ppnext = pp->listq.tqe_next;
 				continue;
 			}
+
 		} else {
+
+			/*
+			 * null check
+			 */
+
 			curoff += PAGE_SIZE;
 			if (pp == NULL) {
 				if (curoff < stop)
 					ppnext = uvm_pagelookup(uobj, curoff);
 				continue;
 			}
+
 		}
 
 		/*
 		 * handle case where we do not need to clean page (either
 		 * because we are not clean or because page is not dirty or
 		 * is busy):
-		 *
+		 * 
 		 * NOTE: we are allowed to deactivate a non-wired active
 		 * PG_BUSY page, but once a PG_BUSY page is on the inactive
 		 * queue it must stay put until it is !PG_BUSY (so as not to
@@ -515,23 +949,24 @@ uvn_flush(uobj, start, stop, flags)
 
 		if ((flags & PGO_CLEANIT) == 0 || (pp->flags & PG_BUSY) != 0) {
 			needs_clean = FALSE;
-			if (!async)
+			if ((pp->flags & PG_BUSY) != 0 &&
+			    (flags & (PGO_CLEANIT|PGO_SYNCIO)) ==
+			             (PGO_CLEANIT|PGO_SYNCIO))
 				need_iosync = TRUE;
 		} else {
-
 			/*
 			 * freeing: nuke all mappings so we can sync
 			 * PG_CLEAN bit with no race
 			 */
-			if ((pp->flags & PG_CLEAN) != 0 &&
+			if ((pp->flags & PG_CLEAN) != 0 && 
 			    (flags & PGO_FREE) != 0 &&
-			    /* XXX ACTIVE|INACTIVE test unnecessary? */
-			    (pp->pqflags & (PQ_ACTIVE|PQ_INACTIVE)) != 0)
+			    (pp->pqflags & PQ_ACTIVE) != 0)
 				pmap_page_protect(pp, VM_PROT_NONE);
 			if ((pp->flags & PG_CLEAN) != 0 &&
 			    pmap_is_modified(pp))
 				pp->flags &= ~(PG_CLEAN);
-			pp->flags |= PG_CLEANCHK;
+			pp->flags |= PG_CLEANCHK;	/* update "hint" */
+
 			needs_clean = ((pp->flags & PG_CLEAN) == 0);
 		}
 
@@ -539,26 +974,29 @@ uvn_flush(uobj, start, stop, flags)
 		 * if we don't need a clean... load ppnext and dispose of pp
 		 */
 		if (!needs_clean) {
+			/* load ppnext */
 			if (by_list)
-				ppnext = TAILQ_NEXT(pp, listq);
+				ppnext = pp->listq.tqe_next;
 			else {
 				if (curoff < stop)
 					ppnext = uvm_pagelookup(uobj, curoff);
 			}
 
+			/* now dispose of pp */
 			if (flags & PGO_DEACTIVATE) {
 				if ((pp->pqflags & PQ_INACTIVE) == 0 &&
-				    (pp->flags & PG_BUSY) == 0 &&
 				    pp->wire_count == 0) {
-					pmap_clear_reference(pp);
+					pmap_page_protect(pp, VM_PROT_NONE);
 					uvm_pagedeactivate(pp);
 				}
 
 			} else if (flags & PGO_FREE) {
 				if (pp->flags & PG_BUSY) {
+					/* release busy pages */
 					pp->flags |= PG_RELEASED;
 				} else {
 					pmap_page_protect(pp, VM_PROT_NONE);
+					/* removed page from object */
 					uvm_pagefree(pp);
 				}
 			}
@@ -575,23 +1013,23 @@ uvn_flush(uobj, start, stop, flags)
 		 * note: locked: uobj and page queues.
 		 */
 
-		wasclean = FALSE;
 		pp->flags |= PG_BUSY;	/* we 'own' page now */
 		UVM_PAGE_OWN(pp, "uvn_flush");
 		pmap_page_protect(pp, VM_PROT_READ);
 		pp_version = pp->version;
+ReTry:
 		ppsp = pps;
 		npages = sizeof(pps) / sizeof(struct vm_page *);
 
 		/* locked: page queues, uobj */
-		result = uvm_pager_put(uobj, pp, &ppsp, &npages,
-				       flags | PGO_DOACTCLUST, start, stop);
+		result = uvm_pager_put(uobj, pp, &ppsp, &npages, 
+			   flags | PGO_DOACTCLUST, start, stop);
 		/* unlocked: page queues, uobj */
 
 		/*
 		 * at this point nothing is locked.   if we did an async I/O
-		 * it is remotely possible for the async i/o to complete and
-		 * the page "pp" be freed or what not before we get a chance
+		 * it is remotely possible for the async i/o to complete and 
+		 * the page "pp" be freed or what not before we get a chance 
 		 * to relock the object.   in order to detect this, we have
 		 * saved the version number of the page in "pp_version".
 		 */
@@ -601,10 +1039,33 @@ uvn_flush(uobj, start, stop, flags)
 		uvm_lock_pageq();
 
 		/*
-		 * the cleaning operation is now done.  finish up.  note that
-		 * on error uvm_pager_put drops the cluster for us.
-		 * on success uvm_pager_put returns the cluster to us in
-		 * ppsp/npages.
+		 * VM_PAGER_AGAIN: given the structure of this pager, this 
+		 * can only happen when  we are doing async I/O and can't
+		 * map the pages into kernel memory (pager_map) due to lack
+		 * of vm space.   if this happens we drop back to sync I/O.
+		 */
+
+		if (result == VM_PAGER_AGAIN) {
+			/* 
+			 * it is unlikely, but page could have been released
+			 * while we had the object lock dropped.   we ignore
+			 * this now and retry the I/O.  we will detect and
+			 * handle the released page after the syncio I/O
+			 * completes.
+			 */
+#ifdef DIAGNOSTIC
+			if (flags & PGO_SYNCIO)
+	panic("uvn_flush: PGO_SYNCIO return 'try again' error (impossible)");
+#endif
+			flags |= PGO_SYNCIO;
+			goto ReTry;
+		}
+
+		/*
+		 * the cleaning operation is now done.   finish up.  note that
+		 * on error (!OK, !PEND) uvm_pager_put drops the cluster for us.
+		 * if success (OK, PEND) then uvm_pager_put returns the cluster
+		 * to us in ppsp/npages.
 		 */
 
 		/*
@@ -612,29 +1073,34 @@ uvn_flush(uobj, start, stop, flags)
 		 * we can move on to the next page.
 		 */
 
-		if (result == 0 && async &&
-		    (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
+		if (result == VM_PAGER_PEND) {
 
-			/*
-			 * no per-page ops: refresh ppnext and continue
-			 */
-			if (by_list) {
-				if (pp->version == pp_version)
-					ppnext = TAILQ_NEXT(pp, listq);
-				else
-					ppnext = TAILQ_FIRST(&uobj->memq);
-			} else {
-				if (curoff < stop)
-					ppnext = uvm_pagelookup(uobj, curoff);
+			if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
+				/*
+				 * no per-page ops: refresh ppnext and continue
+				 */
+				if (by_list) {
+					if (pp->version == pp_version)
+						ppnext = pp->listq.tqe_next;
+					else
+						/* reset */
+						ppnext = uobj->memq.tqh_first;
+				} else {
+					if (curoff < stop)
+						ppnext = uvm_pagelookup(uobj,
+						    curoff);
+				}
+				continue;
 			}
-			continue;
+
+			/* need to do anything here? */
 		}
 
 		/*
-		 * need to look at each page of the I/O operation.  we defer
-		 * processing "pp" until the last trip through this "for" loop
+		 * need to look at each page of the I/O operation.  we defer 
+		 * processing "pp" until the last trip through this "for" loop 
 		 * so that we can load "ppnext" for the main loop after we
-		 * play with the cluster pages [thus the "npages + 1" in the
+		 * play with the cluster pages [thus the "npages + 1" in the 
 		 * loop below].
 		 */
 
@@ -654,84 +1120,77 @@ uvn_flush(uobj, start, stop, flags)
 				/* set up next page for outer loop */
 				if (by_list) {
 					if (pp->version == pp_version)
-						ppnext = TAILQ_NEXT(pp, listq);
+						ppnext = pp->listq.tqe_next;
 					else
-						ppnext = TAILQ_FIRST(
-						    &uobj->memq);
+						/* reset */
+						ppnext = uobj->memq.tqh_first;
 				} else {
 					if (curoff < stop)
-						ppnext = uvm_pagelookup(uobj,
-						    curoff);
+					ppnext = uvm_pagelookup(uobj, curoff);
 				}
 			}
 
 			/*
-			 * verify the page wasn't moved while obj was
+			 * verify the page didn't get moved while obj was
 			 * unlocked
 			 */
-			if (result == 0 && async && ptmp->uobject != uobj)
+			if (result == VM_PAGER_PEND && ptmp->uobject != uobj)
 				continue;
 
 			/*
 			 * unbusy the page if I/O is done.   note that for
-			 * async I/O it is possible that the I/O op
+			 * pending I/O it is possible that the I/O op
 			 * finished before we relocked the object (in
 			 * which case the page is no longer busy).
 			 */
 
-			if (result != 0 || !async) {
-				if (ptmp->flags & PG_WANTED) {
+			if (result != VM_PAGER_PEND) {
+				if (ptmp->flags & PG_WANTED)
 					/* still holding object lock */
 					wakeup(ptmp);
-				}
+
 				ptmp->flags &= ~(PG_WANTED|PG_BUSY);
 				UVM_PAGE_OWN(ptmp, NULL);
 				if (ptmp->flags & PG_RELEASED) {
+
+					/* pgo_releasepg wants this */
 					uvm_unlock_pageq();
-					if (!uvn_releasepg(ptmp, NULL)) {
-						UVMHIST_LOG(maphist,
-							    "released %p",
-							    ptmp, 0,0,0);
+					if (!uvn_releasepg(ptmp, NULL))
 						return (TRUE);
-					}
-					uvm_lock_pageq();
-					continue;
+
+					uvm_lock_pageq();	/* relock */
+					continue;		/* next page */
+
 				} else {
-					if ((flags & PGO_WEAK) == 0 &&
-					    !(result == EIO &&
-					      curproc == uvm.pagedaemon_proc)) {
-						ptmp->flags |=
-							(PG_CLEAN|PG_CLEANCHK);
-						if ((flags & PGO_FREE) == 0) {
-							pmap_clear_modify(ptmp);
-						}
-					}
+					ptmp->flags |= (PG_CLEAN|PG_CLEANCHK);
+					if ((flags & PGO_FREE) == 0)
+						pmap_clear_modify(ptmp);
 				}
 			}
-
+	  
 			/*
 			 * dispose of page
 			 */
 
 			if (flags & PGO_DEACTIVATE) {
 				if ((pp->pqflags & PQ_INACTIVE) == 0 &&
-				    (pp->flags & PG_BUSY) == 0 &&
 				    pp->wire_count == 0) {
-					pmap_clear_reference(ptmp);
+					pmap_page_protect(ptmp, VM_PROT_NONE);
 					uvm_pagedeactivate(ptmp);
 				}
+
 			} else if (flags & PGO_FREE) {
-				if (result == 0 && async) {
+				if (result == VM_PAGER_PEND) {
 					if ((ptmp->flags & PG_BUSY) != 0)
 						/* signal for i/o done */
 						ptmp->flags |= PG_RELEASED;
 				} else {
-					if (result != 0) {
+					if (result != VM_PAGER_OK) {
 						printf("uvn_flush: obj=%p, "
-						   "offset=0x%llx.  error %d\n",
+						   "offset=0x%llx.  error "
+						   "during pageout.\n",
 						    pp->uobject,
-						    (long long)pp->offset,
-						    result);
+						    (long long)pp->offset);
 						printf("uvn_flush: WARNING: "
 						    "changes to page may be "
 						    "lost!\n");
@@ -741,38 +1200,31 @@ uvn_flush(uobj, start, stop, flags)
 					uvm_pagefree(ptmp);
 				}
 			}
+
 		}		/* end of "lcv" for loop */
+
 	}		/* end of "pp" for loop */
 
+	/*
+	 * done with pagequeues: unlock
+	 */
 	uvm_unlock_pageq();
-	s = splbio();
-	if ((flags & PGO_CLEANIT) && all && wasclean &&
-	    LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
-	    (vp->v_bioflag & VBIOONSYNCLIST)) {
-		vp->v_bioflag &= ~VBIOONSYNCLIST;
-		LIST_REMOVE(vp, v_synclist);
-	}
-	splx(s);
-	if (need_iosync) {
-		UVMHIST_LOG(maphist,"  <<DOING IOSYNC>>",0,0,0,0);
-
-		/*
-		 * XXX this doesn't use the new two-flag scheme,
-		 * but to use that, all i/o initiators will have to change.
-		 */
 
-		s = splbio();
-		while (vp->v_numoutput != 0) {
-			UVMHIST_LOG(ubchist, "waiting for vp %p num %d",
-				    vp, vp->v_numoutput,0,0);
+	/*
+	 * now wait for all I/O if required.
+	 */
+	if (need_iosync) {
 
-	                vp->v_bioflag |= VBIOWAIT;
-			UVM_UNLOCK_AND_WAIT(&vp->v_numoutput,
-					    &uobj->vmobjlock,
-					    FALSE, "uvn_flush",0);
-			simple_lock(&uobj->vmobjlock);
+		UVMHIST_LOG(maphist,"  <<DOING IOSYNC>>",0,0,0,0);
+		while (uvn->u_nio != 0) {
+			uvn->u_flags |= UVM_VNODE_IOSYNC;
+			UVM_UNLOCK_AND_WAIT(&uvn->u_nio, &uvn->u_obj.vmobjlock, 
+			  FALSE, "uvn_flush",0);
+			simple_lock(&uvn->u_obj.vmobjlock);
 		}
-		splx(s);
+		if (uvn->u_flags & UVM_VNODE_IOSYNCWANTED)
+			wakeup(&uvn->u_flags);
+		uvn->u_flags &= ~(UVM_VNODE_IOSYNC|UVM_VNODE_IOSYNCWANTED);
 	}
 
 	/* return, with object locked! */
@@ -796,31 +1248,46 @@ uvn_cluster(uobj, offset, loffset, hoffset)
 	voff_t offset;
 	voff_t *loffset, *hoffset; /* OUT */
 {
-	struct vnode *vp = (struct vnode *)uobj;
-
+	struct uvm_vnode *uvn = (struct uvm_vnode *) uobj;
 	*loffset = offset;
-	*hoffset = MIN(offset + MAXBSIZE, round_page(vp->v_size));
+
+	if (*loffset >= uvn->u_size)
+		panic("uvn_cluster: offset out of range");
+
+	/*
+	 * XXX: old pager claims we could use VOP_BMAP to get maxcontig value.
+	 */
+	*hoffset = *loffset + MAXBSIZE;
+	if (*hoffset > round_page(uvn->u_size))	/* past end? */
+		*hoffset = round_page(uvn->u_size);
+
+	return;
 }
 
 /*
  * uvn_put: flush page data to backing store.
  *
+ * => prefer map unlocked (not required)
  * => object must be locked!   we will _unlock_ it before starting I/O.
  * => flags: PGO_SYNCIO -- use sync. I/O
  * => note: caller must set PG_CLEAN and pmap_clear_modify (if needed)
+ * => XXX: currently we use VOP_READ/VOP_WRITE which are only sync.
+ *	[thus we never do async i/o!  see iodone comment]
  */
 
-int
+static int
 uvn_put(uobj, pps, npages, flags)
 	struct uvm_object *uobj;
 	struct vm_page **pps;
 	int npages, flags;
 {
-	struct vnode *vp = (struct vnode *)uobj;
-	int error;
+	int retval;
+
+	/* note: object locked */
+	retval = uvn_io((struct uvm_vnode*)uobj, pps, npages, flags, UIO_WRITE);
+	/* note: object unlocked */
 
-	error = VOP_PUTPAGES(vp, pps, npages, flags, NULL);
-	return error;
+	return(retval);
 }
 
 
@@ -834,140 +1301,558 @@ uvn_put(uobj, pps, npages, flags)
  * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
  * => NOTE: caller must check for released pages!!
  */
-
-int
+ 
+static int
 uvn_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
 	struct uvm_object *uobj;
 	voff_t offset;
 	struct vm_page **pps;		/* IN/OUT */
 	int *npagesp;			/* IN (OUT if PGO_LOCKED) */
-	int centeridx;
+	int centeridx, advice, flags;
 	vm_prot_t access_type;
-	int advice, flags;
 {
-	struct vnode *vp = (struct vnode *)uobj;
-	struct proc *p = curproc;
-	int error;
-	UVMHIST_FUNC("uvn_get"); UVMHIST_CALLED(ubchist);
-
-	UVMHIST_LOG(ubchist, "vp %p off 0x%x", vp, (int)offset, 0,0);
-	error = vn_lock(vp, LK_EXCLUSIVE|LK_RECURSEFAIL|LK_NOWAIT, p);
-	if (error) {
-		if (error == EBUSY)
-			return EAGAIN;
-		return error;
+	voff_t current_offset;
+	struct vm_page *ptmp;
+	int lcv, result, gotpages;
+	boolean_t done;
+	UVMHIST_FUNC("uvn_get"); UVMHIST_CALLED(maphist);
+	UVMHIST_LOG(maphist, "flags=%d", flags,0,0,0);
+
+	/*
+	 * step 1: handled the case where fault data structures are locked.
+	 */
+
+	if (flags & PGO_LOCKED) {
+
+		/*
+		 * gotpages is the current number of pages we've gotten (which
+		 * we pass back up to caller via *npagesp.
+		 */
+
+		gotpages = 0;
+
+		/*
+		 * step 1a: get pages that are already resident.   only do this
+		 * if the data structures are locked (i.e. the first time
+		 * through).
+		 */
+
+		done = TRUE;	/* be optimistic */
+
+		for (lcv = 0, current_offset = offset ; lcv < *npagesp ;
+		    lcv++, current_offset += PAGE_SIZE) {
+
+			/* do we care about this page?  if not, skip it */
+			if (pps[lcv] == PGO_DONTCARE)
+				continue;
+
+			/* lookup page */
+			ptmp = uvm_pagelookup(uobj, current_offset);
+
+			/* to be useful must get a non-busy, non-released pg */
+			if (ptmp == NULL ||
+			    (ptmp->flags & (PG_BUSY|PG_RELEASED)) != 0) {
+				if (lcv == centeridx || (flags & PGO_ALLPAGES)
+				    != 0)
+				done = FALSE;	/* need to do a wait or I/O! */
+				continue;
+			}
+
+			/*
+			 * useful page: busy/lock it and plug it in our
+			 * result array
+			 */
+			ptmp->flags |= PG_BUSY;		/* loan up to caller */
+			UVM_PAGE_OWN(ptmp, "uvn_get1");
+			pps[lcv] = ptmp;
+			gotpages++;
+
+		}	/* "for" lcv loop */
+
+		/*
+		 * XXX: given the "advice", should we consider async read-ahead?
+		 * XXX: fault current does deactive of pages behind us.  is 
+		 * this good (other callers might now).
+		 */
+		/* 
+		 * XXX: read-ahead currently handled by buffer cache (bread)
+		 * level.
+		 * XXX: no async i/o available.
+		 * XXX: so we don't do anything now.
+		 */
+
+		/*
+		 * step 1c: now we've either done everything needed or we to
+		 * unlock and do some waiting or I/O.
+		 */
+
+		*npagesp = gotpages;		/* let caller know */
+		if (done)
+			return(VM_PAGER_OK);		/* bingo! */
+		else
+			/* EEK!   Need to unlock and I/O */
+			return(VM_PAGER_UNLOCK);
 	}
-	error = VOP_GETPAGES(vp, offset, pps, npagesp, centeridx,
-		     access_type, advice, flags);
-	VOP_UNLOCK(vp, LK_RELEASE, p);
-	return error;
-}
 
+	/*
+	 * step 2: get non-resident or busy pages.
+	 * object is locked.   data structures are unlocked.
+	 *
+	 * XXX: because we can't do async I/O at this level we get things
+	 * page at a time (otherwise we'd chunk).   the VOP_READ() will do 
+	 * async-read-ahead for us at a lower level.
+	 */
+
+	for (lcv = 0, current_offset = offset ; 
+			 lcv < *npagesp ; lcv++, current_offset += PAGE_SIZE) {
+		
+		/* skip over pages we've already gotten or don't want */
+		/* skip over pages we don't _have_ to get */
+		if (pps[lcv] != NULL || (lcv != centeridx &&
+		    (flags & PGO_ALLPAGES) == 0))
+			continue;
+
+		/*
+		 * we have yet to locate the current page (pps[lcv]).   we first
+		 * look for a page that is already at the current offset.   if
+		 * we fine a page, we check to see if it is busy or released.
+		 * if that is the case, then we sleep on the page until it is
+		 * no longer busy or released and repeat the lookup.    if the
+		 * page we found is neither busy nor released, then we busy it
+		 * (so we own it) and plug it into pps[lcv].   this breaks the
+		 * following while loop and indicates we are ready to move on
+		 * to the next page in the "lcv" loop above.
+		 *
+		 * if we exit the while loop with pps[lcv] still set to NULL,
+		 * then it means that we allocated a new busy/fake/clean page
+		 * ptmp in the object and we need to do I/O to fill in the data.
+		 */
+
+		while (pps[lcv] == NULL) {	/* top of "pps" while loop */
+			
+			/* look for a current page */
+			ptmp = uvm_pagelookup(uobj, current_offset);
+
+			/* nope?   allocate one now (if we can) */
+			if (ptmp == NULL) {
+
+				ptmp = uvm_pagealloc(uobj, current_offset,
+				    NULL, 0);
+
+				/* out of RAM? */
+				if (ptmp == NULL) {
+					simple_unlock(&uobj->vmobjlock);
+					uvm_wait("uvn_getpage");
+					simple_lock(&uobj->vmobjlock);
+
+					/* goto top of pps while loop */
+					continue;	
+				}
+
+				/* 
+				 * got new page ready for I/O.  break pps
+				 * while loop.  pps[lcv] is still NULL.
+				 */
+				break;		
+			}
+
+			/* page is there, see if we need to wait on it */
+			if ((ptmp->flags & (PG_BUSY|PG_RELEASED)) != 0) {
+				ptmp->flags |= PG_WANTED;
+				UVM_UNLOCK_AND_WAIT(ptmp,
+				    &uobj->vmobjlock, FALSE, "uvn_get",0);
+				simple_lock(&uobj->vmobjlock);
+				continue;	/* goto top of pps while loop */
+			}
+			
+			/* 
+			 * if we get here then the page has become resident
+			 * and unbusy between steps 1 and 2.  we busy it
+			 * now (so we own it) and set pps[lcv] (so that we
+			 * exit the while loop).
+			 */
+			ptmp->flags |= PG_BUSY;
+			UVM_PAGE_OWN(ptmp, "uvn_get2");
+			pps[lcv] = ptmp;
+		}
+
+		/*
+		 * if we own the a valid page at the correct offset, pps[lcv]
+		 * will point to it.   nothing more to do except go to the
+		 * next page.
+		 */
+
+		if (pps[lcv])
+			continue;			/* next lcv */
+
+		/*
+		 * we have a "fake/busy/clean" page that we just allocated.  do
+		 * I/O to fill it with valid data.  note that object must be
+		 * locked going into uvn_io, but will be unlocked afterwards.
+		 */
+
+		result = uvn_io((struct uvm_vnode *) uobj, &ptmp, 1,
+		    PGO_SYNCIO, UIO_READ);
+
+		/*
+		 * I/O done.   object is unlocked (by uvn_io).   because we used
+		 * syncio the result can not be PEND or AGAIN.   we must relock
+		 * and check for errors.
+		 */
+
+		/* lock object.   check for errors.   */
+		simple_lock(&uobj->vmobjlock);
+		if (result != VM_PAGER_OK) {
+			if (ptmp->flags & PG_WANTED)
+				/* object lock still held */
+				wakeup(ptmp);
+
+			ptmp->flags &= ~(PG_WANTED|PG_BUSY);
+			UVM_PAGE_OWN(ptmp, NULL);
+			uvm_lock_pageq();
+			uvm_pagefree(ptmp);
+			uvm_unlock_pageq();
+			simple_unlock(&uobj->vmobjlock);
+			return(result);
+		}
+
+		/* 
+		 * we got the page!   clear the fake flag (indicates valid
+		 * data now in page) and plug into our result array.   note
+		 * that page is still busy.   
+		 *
+		 * it is the callers job to:
+		 * => check if the page is released
+		 * => unbusy the page
+		 * => activate the page
+		 */
+
+		ptmp->flags &= ~PG_FAKE;		/* data is valid ... */
+		pmap_clear_modify(ptmp);		/* ... and clean */
+		pps[lcv] = ptmp;
+
+	}	/* lcv loop */
+
+	/*
+	 * finally, unlock object and return.
+	 */
+
+	simple_unlock(&uobj->vmobjlock);
+	return (VM_PAGER_OK);
+}
 
 /*
- * uvn_findpages:
- * return the page for the uobj and offset requested, allocating if needed.
- * => uobj must be locked.
- * => returned page will be BUSY.
+ * uvn_io: do I/O to a vnode
+ *
+ * => prefer map unlocked (not required)
+ * => object must be locked!   we will _unlock_ it before starting I/O.
+ * => flags: PGO_SYNCIO -- use sync. I/O
+ * => XXX: currently we use VOP_READ/VOP_WRITE which are only sync.
+ *	[thus we never do async i/o!  see iodone comment]
  */
 
-void
-uvn_findpages(uobj, offset, npagesp, pps, flags)
-	struct uvm_object *uobj;
-	voff_t offset;
-	int *npagesp;
-	struct vm_page **pps;
-	int flags;
+static int
+uvn_io(uvn, pps, npages, flags, rw)
+	struct uvm_vnode *uvn;
+	vm_page_t *pps;
+	int npages, flags, rw;
 {
-	int i, rv, npages;
+	struct vnode *vn;
+	struct uio uio;
+	struct iovec iov;
+	vaddr_t kva;
+	off_t file_offset;
+	int waitf, result, mapinflags;
+	size_t got, wanted;
+	UVMHIST_FUNC("uvn_io"); UVMHIST_CALLED(maphist);
+
+	UVMHIST_LOG(maphist, "rw=%d", rw,0,0,0);
+	
+	/*
+	 * init values
+	 */
+
+	waitf = (flags & PGO_SYNCIO) ? M_WAITOK : M_NOWAIT;
+	vn = (struct vnode *) uvn;
+	file_offset = pps[0]->offset;
+	
+	/*
+	 * check for sync'ing I/O.
+	 */
+	
+	while (uvn->u_flags & UVM_VNODE_IOSYNC) {
+		if (waitf == M_NOWAIT) { 
+			simple_unlock(&uvn->u_obj.vmobjlock);
+			UVMHIST_LOG(maphist,"<- try again (iosync)",0,0,0,0);
+			return(VM_PAGER_AGAIN);
+		}
+		uvn->u_flags |= UVM_VNODE_IOSYNCWANTED;
+		UVM_UNLOCK_AND_WAIT(&uvn->u_flags, &uvn->u_obj.vmobjlock, 
+			FALSE, "uvn_iosync",0);
+		simple_lock(&uvn->u_obj.vmobjlock);
+	}
+
+	/*
+	 * check size
+	 */
+
+	if (file_offset >= uvn->u_size) {
+			simple_unlock(&uvn->u_obj.vmobjlock);
+			UVMHIST_LOG(maphist,"<- BAD (size check)",0,0,0,0);
+			return(VM_PAGER_BAD);
+	}
+
+	/*
+	 * first try and map the pages in (without waiting)
+	 */
+
+	mapinflags = (rw == UIO_READ) ?
+	    UVMPAGER_MAPIN_READ : UVMPAGER_MAPIN_WRITE;
+
+	kva = uvm_pagermapin(pps, npages, mapinflags);
+	if (kva == 0 && waitf == M_NOWAIT) {
+		simple_unlock(&uvn->u_obj.vmobjlock);
+		UVMHIST_LOG(maphist,"<- mapin failed (try again)",0,0,0,0);
+		return(VM_PAGER_AGAIN);
+	}
+
+	/*
+	 * ok, now bump u_nio up.   at this point we are done with uvn
+	 * and can unlock it.   if we still don't have a kva, try again
+	 * (this time with sleep ok).
+	 */
+	
+	uvn->u_nio++;			/* we have an I/O in progress! */
+	simple_unlock(&uvn->u_obj.vmobjlock);
+	/* NOTE: object now unlocked */
+	if (kva == 0)
+		kva = uvm_pagermapin(pps, npages,
+		    mapinflags | UVMPAGER_MAPIN_WAITOK);
+
+	/*
+	 * ok, mapped in.  our pages are PG_BUSY so they are not going to
+	 * get touched (so we can look at "offset" without having to lock
+	 * the object).  set up for I/O.
+	 */
+
+	/*
+	 * fill out uio/iov
+	 */
+	
+	iov.iov_base = (caddr_t) kva;
+	wanted = npages << PAGE_SHIFT;
+	if (file_offset + wanted > uvn->u_size)
+		wanted = uvn->u_size - file_offset;	/* XXX: needed? */
+	iov.iov_len = wanted;
+	uio.uio_iov = &iov;
+	uio.uio_iovcnt = 1;
+	uio.uio_offset = file_offset;
+	uio.uio_segflg = UIO_SYSSPACE;
+	uio.uio_rw = rw;
+	uio.uio_resid = wanted;
+	uio.uio_procp = curproc;
+
+	/*
+	 * do the I/O!  (XXX: curproc?)
+	 */
+
+	UVMHIST_LOG(maphist, "calling VOP",0,0,0,0);
+
+	/*
+	 * This process may already have this vnode locked, if we faulted in
+	 * copyin() or copyout() on a region backed by this vnode
+	 * while doing I/O to the vnode.  If this is the case, don't
+	 * panic.. instead, return the error to the user.
+	 *
+	 * XXX this is a stopgap to prevent a panic.
+	 * Ideally, this kind of operation *should* work.
+	 */
+	result = 0;
+	if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
+		result = vn_lock(vn, LK_EXCLUSIVE | LK_RETRY | LK_RECURSEFAIL, curproc);
+
+	if (result == 0) {
+		/* NOTE: vnode now locked! */
+
+		if (rw == UIO_READ)
+			result = VOP_READ(vn, &uio, 0, curproc->p_ucred);
+		else
+			result = VOP_WRITE(vn, &uio, 0, curproc->p_ucred);
 
-	rv = 0;
-	npages = *npagesp;
-	for (i = 0; i < npages; i++, offset += PAGE_SIZE) {
-		rv += uvn_findpage(uobj, offset, &pps[i], flags);
+		if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
+			VOP_UNLOCK(vn, 0, curproc);
+	}
+	
+	/* NOTE: vnode now unlocked (unless vnislocked) */
+
+	UVMHIST_LOG(maphist, "done calling VOP",0,0,0,0);
+
+	/*
+	 * result == unix style errno (0 == OK!)
+	 *
+	 * zero out rest of buffer (if needed)
+	 */
+
+	if (result == 0) {
+		got = wanted - uio.uio_resid;
+
+		if (wanted && got == 0) {
+			result = EIO;		/* XXX: error? */
+		} else if (got < PAGE_SIZE * npages && rw == UIO_READ) {
+			memset((void *) (kva + got), 0,
+			       (npages << PAGE_SHIFT) - got);
+		}
 	}
-	*npagesp = rv;
+
+	/*
+	 * now remove pager mapping
+	 */
+	uvm_pagermapout(kva, npages);
+		
+	/*
+	 * now clean up the object (i.e. drop I/O count)
+	 */
+
+	simple_lock(&uvn->u_obj.vmobjlock);
+	/* NOTE: object now locked! */
+
+	uvn->u_nio--;			/* I/O DONE! */
+	if ((uvn->u_flags & UVM_VNODE_IOSYNC) != 0 && uvn->u_nio == 0) {
+		wakeup(&uvn->u_nio);
+	}
+	simple_unlock(&uvn->u_obj.vmobjlock);
+	/* NOTE: object now unlocked! */
+
+	/*
+	 * done!
+	 */
+
+	UVMHIST_LOG(maphist, "<- done (result %d)", result,0,0,0);
+	if (result == 0)
+		return(VM_PAGER_OK);
+	else
+		return(VM_PAGER_ERROR);
 }
 
-static int
-uvn_findpage(uobj, offset, pgp, flags)
-	struct uvm_object *uobj;
-	voff_t offset;
-	struct vm_page **pgp;
-	int flags;
+/*
+ * uvm_vnp_uncache: disable "persisting" in a vnode... when last reference
+ * is gone we will kill the object (flushing dirty pages back to the vnode
+ * if needed).
+ *
+ * => returns TRUE if there was no uvm_object attached or if there was
+ *	one and we killed it [i.e. if there is no active uvn]
+ * => called with the vnode VOP_LOCK'd [we will unlock it for I/O, if
+ *	needed]
+ *
+ * => XXX: given that we now kill uvn's when a vnode is recycled (without
+ *	having to hold a reference on the vnode) and given a working
+ *	uvm_vnp_sync(), how does that effect the need for this function?
+ *      [XXXCDC: seems like it can die?]
+ *
+ * => XXX: this function should DIE once we merge the VM and buffer 
+ *	cache.
+ *
+ * research shows that this is called in the following places:
+ * ext2fs_truncate, ffs_truncate, detrunc[msdosfs]: called when vnode
+ *	changes sizes
+ * ext2fs_write, WRITE [ufs_readwrite], msdosfs_write: called when we
+ *	are written to
+ * ex2fs_chmod, ufs_chmod: called if VTEXT vnode and the sticky bit
+ *	is off
+ * ffs_realloccg: when we can't extend the current block and have 
+ *	to allocate a new one we call this [XXX: why?]
+ * nfsrv_rename, rename_files: called when the target filename is there
+ *	and we want to remove it
+ * nfsrv_remove, sys_unlink: called on file we are removing
+ * nfsrv_access: if VTEXT and we want WRITE access and we don't uncache
+ *	then return "text busy"
+ * nfs_open: seems to uncache any file opened with nfs
+ * vn_writechk: if VTEXT vnode and can't uncache return "text busy"
+ */
+
+boolean_t
+uvm_vnp_uncache(vp)
+	struct vnode *vp;
 {
-	struct vm_page *pg;
-	int s;
-	UVMHIST_FUNC("uvn_findpage"); UVMHIST_CALLED(ubchist);
-	UVMHIST_LOG(ubchist, "vp %p off 0x%lx", uobj, offset,0,0);
+	struct uvm_vnode *uvn = &vp->v_uvm;
 
-	if (*pgp != NULL) {
-		UVMHIST_LOG(ubchist, "dontcare", 0,0,0,0);
-		return 0;
+	/*
+	 * lock uvn part of the vnode and check to see if we need to do anything
+	 */
+
+	simple_lock(&uvn->u_obj.vmobjlock);
+	if ((uvn->u_flags & UVM_VNODE_VALID) == 0 || 
+			(uvn->u_flags & UVM_VNODE_BLOCKED) != 0) {
+		simple_unlock(&uvn->u_obj.vmobjlock);
+		return(TRUE);
 	}
-	for (;;) {
-		/* look for an existing page */
-		pg = uvm_pagelookup(uobj, offset);
-
-		/* nope?   allocate one now */
-		if (pg == NULL) {
-			if (flags & UFP_NOALLOC) {
-				UVMHIST_LOG(ubchist, "noalloc", 0,0,0,0);
-				return 0;
-			}
-			pg = uvm_pagealloc(uobj, offset, NULL, 0);
-			if (pg == NULL) {
-				if (flags & UFP_NOWAIT) {
-					UVMHIST_LOG(ubchist, "nowait",0,0,0,0);
-					return 0;
-				}
-				simple_unlock(&uobj->vmobjlock);
-				uvm_wait("uvn_fp1");
-				simple_lock(&uobj->vmobjlock);
-				continue;
-			}
-			if (UVM_OBJ_IS_VTEXT(uobj)) {
-				uvmexp.vtextpages++;
-			} else {
-				uvmexp.vnodepages++;
-			}
-			s = splbio();
-			vhold((struct vnode *)uobj);
-			splx(s);
-			UVMHIST_LOG(ubchist, "alloced",0,0,0,0);
-			break;
-		} else if (flags & UFP_NOCACHE) {
-			UVMHIST_LOG(ubchist, "nocache",0,0,0,0);
-			return 0;
-		}
 
-		/* page is there, see if we need to wait on it */
-		if ((pg->flags & (PG_BUSY|PG_RELEASED)) != 0) {
-			if (flags & UFP_NOWAIT) {
-				UVMHIST_LOG(ubchist, "nowait",0,0,0,0);
-				return 0;
-			}
-			pg->flags |= PG_WANTED;
-			UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
-					    "uvn_fp2", 0);
-			simple_lock(&uobj->vmobjlock);
-			continue;
-		}
+	/*
+	 * we have a valid, non-blocked uvn.   clear persist flag.
+	 * if uvn is currently active we can return now.
+	 */
 
-		/* skip PG_RDONLY pages if requested */
-		if ((flags & UFP_NORDONLY) && (pg->flags & PG_RDONLY)) {
-			UVMHIST_LOG(ubchist, "nordonly",0,0,0,0);
-			return 0;
-		}
+	uvn->u_flags &= ~UVM_VNODE_CANPERSIST;
+	if (uvn->u_obj.uo_refs) {
+		simple_unlock(&uvn->u_obj.vmobjlock);
+		return(FALSE);
+	}
 
-		/* mark the page BUSY and we're done. */
-		pg->flags |= PG_BUSY;
-		UVM_PAGE_OWN(pg, "uvn_findpage");
-		UVMHIST_LOG(ubchist, "found",0,0,0,0);
-		break;
+	/*
+	 * uvn is currently persisting!   we have to gain a reference to
+	 * it so that we can call uvn_detach to kill the uvn.
+	 */
+
+	VREF(vp);			/* seems ok, even with VOP_LOCK */
+	uvn->u_obj.uo_refs++;		/* value is now 1 */
+	simple_unlock(&uvn->u_obj.vmobjlock);
+
+
+#ifdef DEBUG
+	/*
+	 * carry over sanity check from old vnode pager: the vnode should
+	 * be VOP_LOCK'd, and we confirm it here.
+	 */
+	if (!VOP_ISLOCKED(vp)) {
+		boolean_t is_ok_anyway = FALSE;
+#if defined(NFSCLIENT)
+		extern int (**nfsv2_vnodeop_p) __P((void *));
+		extern int (**spec_nfsv2nodeop_p) __P((void *));
+		extern int (**fifo_nfsv2nodeop_p) __P((void *));
+
+		/* vnode is NOT VOP_LOCKed: some vnode types _never_ lock */
+		if (vp->v_op == nfsv2_vnodeop_p ||
+		    vp->v_op == spec_nfsv2nodeop_p) {
+			is_ok_anyway = TRUE;
+		}
+		if (vp->v_op == fifo_nfsv2nodeop_p) {
+			is_ok_anyway = TRUE;
+		}
+#endif	/* defined(NFSSERVER) || defined(NFSCLIENT) */
+		if (!is_ok_anyway)
+			panic("uvm_vnp_uncache: vnode not locked!");
 	}
-	*pgp = pg;
-	return 1;
+#endif	/* DEBUG */
+
+	/*
+	 * now drop our reference to the vnode.   if we have the sole 
+	 * reference to the vnode then this will cause it to die [as we
+	 * just cleared the persist flag].   we have to unlock the vnode 
+	 * while we are doing this as it may trigger I/O.
+	 *
+	 * XXX: it might be possible for uvn to get reclaimed while we are
+	 * unlocked causing us to return TRUE when we should not.   we ignore
+	 * this as a false-positive return value doesn't hurt us.
+	 */
+	VOP_UNLOCK(vp, 0, curproc);
+	uvn_detach(&uvn->u_obj);
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curproc);
+	
+	/*
+	 * and return...
+	 */
+	
+	return(TRUE);
 }
 
 /*
@@ -976,7 +1861,7 @@ uvn_findpage(uobj, offset, pgp, flags)
  * grow   => just update size value
  * shrink => toss un-needed pages
  *
- * => we assume that the caller has a reference of some sort to the
+ * => we assume that the caller has a reference of some sort to the 
  *	vnode in question so that it will not be yanked out from under
  *	us.
  *
@@ -993,50 +1878,151 @@ uvm_vnp_setsize(vp, newsize)
 	struct vnode *vp;
 	voff_t newsize;
 {
-	struct uvm_object *uobj = &vp->v_uobj;
-	voff_t pgend = round_page(newsize);
-	UVMHIST_FUNC("uvm_vnp_setsize"); UVMHIST_CALLED(ubchist);
-
-	simple_lock(&uobj->vmobjlock);
-
-	UVMHIST_LOG(ubchist, "old 0x%x new 0x%x", vp->v_size, newsize, 0,0);
+	struct uvm_vnode *uvn = &vp->v_uvm;
 
 	/*
-	 * now check if the size has changed: if we shrink we had better
-	 * toss some pages...
+	 * lock uvn and check for valid object, and if valid: do it!
 	 */
+	simple_lock(&uvn->u_obj.vmobjlock);
+	if (uvn->u_flags & UVM_VNODE_VALID) {
+
+		/*
+		 * now check if the size has changed: if we shrink we had better
+		 * toss some pages...
+		 */
 
-	if (vp->v_size > pgend && vp->v_size != VSIZENOTSET) {
-		(void) uvn_flush(uobj, pgend, 0, PGO_FREE);
+		if (uvn->u_size > newsize) {
+			(void)uvn_flush(&uvn->u_obj, newsize,
+			    uvn->u_size, PGO_FREE);
+		}
+		uvn->u_size = newsize;
 	}
-	vp->v_size = newsize;
-	simple_unlock(&uobj->vmobjlock);
+	simple_unlock(&uvn->u_obj.vmobjlock);
+
+	/*
+	 * done
+	 */
+	return;
 }
 
 /*
- * uvm_vnp_zerorange:  set a range of bytes in a file to zero.
+ * uvm_vnp_sync: flush all dirty VM pages back to their backing vnodes.
+ *
+ * => called from sys_sync with no VM structures locked
+ * => only one process can do a sync at a time (because the uvn
+ *    structure only has one queue for sync'ing).  we ensure this
+ *    by holding the uvn_sync_lock while the sync is in progress.
+ *    other processes attempting a sync will sleep on this lock
+ *    until we are done.
  */
 
 void
-uvm_vnp_zerorange(vp, off, len)
-	struct vnode *vp;
-	off_t off;
-	size_t len;
+uvm_vnp_sync(mp)
+	struct mount *mp;
 {
-        void *win;
+	struct uvm_vnode *uvn;
+	struct vnode *vp;
+	boolean_t got_lock;
+
+	/*
+	 * step 1: ensure we are only ones using the uvn_sync_q by locking
+	 * our lock...
+	 */
+	lockmgr(&uvn_sync_lock, LK_EXCLUSIVE, NULL, curproc);
 
-        /*
-         * XXXUBC invent kzero() and use it
-         */
+	/*
+	 * step 2: build up a simpleq of uvns of interest based on the 
+	 * write list.   we gain a reference to uvns of interest.  must 
+	 * be careful about locking uvn's since we will be holding uvn_wl_lock
+	 * in the body of the loop.
+	 */
+	SIMPLEQ_INIT(&uvn_sync_q);
+	simple_lock(&uvn_wl_lock);
+	for (uvn = uvn_wlist.lh_first ; uvn != NULL ;
+	    uvn = uvn->u_wlist.le_next) {
+
+		vp = (struct vnode *) uvn;
+		if (mp && vp->v_mount != mp)
+			continue;
+
+		/* attempt to gain reference */
+		while ((got_lock = simple_lock_try(&uvn->u_obj.vmobjlock)) ==
+		    						FALSE && 
+				(uvn->u_flags & UVM_VNODE_BLOCKED) == 0)
+			/* spin */ ;
+
+		/*
+		 * we will exit the loop if either if the following are true:
+		 *  - we got the lock [always true if NCPU == 1]
+		 *  - we failed to get the lock but noticed the vnode was
+		 * 	"blocked" -- in this case the vnode must be a dying
+		 *	vnode, and since dying vnodes are in the process of
+		 *	being flushed out, we can safely skip this one
+		 *
+		 * we want to skip over the vnode if we did not get the lock,
+		 * or if the vnode is already dying (due to the above logic).
+		 *
+		 * note that uvn must already be valid because we found it on
+		 * the wlist (this also means it can't be ALOCK'd).
+		 */
+		if (!got_lock || (uvn->u_flags & UVM_VNODE_BLOCKED) != 0) {
+			if (got_lock)
+				simple_unlock(&uvn->u_obj.vmobjlock);
+			continue;		/* skip it */
+		}
+		
+		/*
+		 * gain reference.   watch out for persisting uvns (need to
+		 * regain vnode REF).
+		 */
+		if (uvn->u_obj.uo_refs == 0)
+			VREF(vp);
+		uvn->u_obj.uo_refs++;
+		simple_unlock(&uvn->u_obj.vmobjlock);
+
+		/*
+		 * got it!
+		 */
+		SIMPLEQ_INSERT_HEAD(&uvn_sync_q, uvn, u_syncq);
+	}
+	simple_unlock(&uvn_wl_lock);
 
-        while (len) {
-                vsize_t bytelen = len;
+	/*
+	 * step 3: we now have a list of uvn's that may need cleaning.
+	 * we are holding the uvn_sync_lock, but have dropped the uvn_wl_lock
+	 * (so we can now safely lock uvn's again).
+	 */
 
-                win = ubc_alloc(&vp->v_uobj, off, &bytelen, UBC_WRITE);
-                memset(win, 0, bytelen);
-                ubc_release(win, 0);
+	for (uvn = uvn_sync_q.sqh_first ; uvn ; uvn = uvn->u_syncq.sqe_next) {
+		simple_lock(&uvn->u_obj.vmobjlock);
+#ifdef DEBUG
+		if (uvn->u_flags & UVM_VNODE_DYING) {
+			printf("uvm_vnp_sync: dying vnode on sync list\n");
+		}
+#endif
+		uvn_flush(&uvn->u_obj, 0, 0,
+		    PGO_CLEANIT|PGO_ALLPAGES|PGO_DOACTCLUST);
 
-                off += bytelen;
-                len -= bytelen;
-        }
+		/*
+		 * if we have the only reference and we just cleaned the uvn,
+		 * then we can pull it out of the UVM_VNODE_WRITEABLE state
+		 * thus allowing us to avoid thinking about flushing it again
+		 * on later sync ops.
+		 */
+		if (uvn->u_obj.uo_refs == 1 &&
+		    (uvn->u_flags & UVM_VNODE_WRITEABLE)) {
+			LIST_REMOVE(uvn, u_wlist);
+			uvn->u_flags &= ~UVM_VNODE_WRITEABLE;
+		}
+
+		simple_unlock(&uvn->u_obj.vmobjlock);
+
+		/* now drop our reference to the uvn */
+		uvn_detach(&uvn->u_obj);
+	}
+
+	/*
+	 * done!  release sync lock
+	 */
+	lockmgr(&uvn_sync_lock, LK_RELEASE, (void *)0, curproc);
 }
diff --git a/sys/uvm/uvm_vnode.h b/sys/uvm/uvm_vnode.h
new file mode 100644
index 00000000000..64636bc15a3
--- /dev/null
+++ b/sys/uvm/uvm_vnode.h
@@ -0,0 +1,110 @@
+/*	$OpenBSD: uvm_vnode.h,v 1.11 2001/12/19 08:58:07 art Exp $	*/
+/*	$NetBSD: uvm_vnode.h,v 1.9 2000/03/26 20:54:48 kleink Exp $	*/
+
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_vnode.h,v 1.1.2.4 1997/10/03 21:18:24 chuck Exp
+ */
+
+#ifndef _UVM_UVM_VNODE_H_
+#define _UVM_UVM_VNODE_H_
+
+/*
+ * uvm_vnode.h
+ *
+ * vnode handle into the VM system.
+ */
+
+/*
+ * the uvm_vnode structure.   put at the top of the vnode data structure.
+ * this allows:
+ *   (struct vnode *) == (struct uvm_vnode *) == (struct uvm_object *)
+ */
+
+struct uvm_vnode {
+	struct uvm_object u_obj;	/* the actual VM object */
+	int u_flags;			/* flags */
+	int u_nio;			/* number of running I/O requests */
+	voff_t u_size;			/* size of object */
+
+	/* the following entry is locked by uvn_wl_lock */
+	LIST_ENTRY(uvm_vnode) u_wlist;	/* list of writeable vnode objects */
+
+	/* the following entry is locked by uvn_sync_lock */
+	SIMPLEQ_ENTRY(uvm_vnode) u_syncq; /* vnode objects due for a "sync" */
+};
+
+/*
+ * u_flags values
+ */
+#define UVM_VNODE_VALID		0x001	/* we are attached to the vnode */
+#define UVM_VNODE_CANPERSIST	0x002	/* we can persist after ref == 0 */
+#define UVM_VNODE_ALOCK		0x004	/* uvn_attach is locked out */
+#define UVM_VNODE_DYING		0x008	/* final detach/terminate in 
+					   progress */
+#define UVM_VNODE_RELKILL	0x010	/* uvn should be killed by releasepg
+					   when final i/o is done */
+#define UVM_VNODE_WANTED	0x020	/* someone is waiting for alock,
+					   dying, or relkill to clear */
+#define UVM_VNODE_VNISLOCKED	0x040	/* underlying vnode struct is locked
+					   (valid when DYING is true) */
+#define UVM_VNODE_IOSYNC	0x080	/* I/O sync in progress ... setter
+					   sleeps on &uvn->u_nio */
+#define UVM_VNODE_IOSYNCWANTED	0x100	/* a process is waiting for the
+					   i/o sync to clear so it can do
+					   i/o */
+#define UVM_VNODE_WRITEABLE	0x200	/* uvn has pages that are writeable */
+
+/*
+ * UVM_VNODE_BLOCKED: any condition that should new processes from
+ * touching the vnode [set WANTED and sleep to wait for it to clear]
+ */
+#define UVM_VNODE_BLOCKED (UVM_VNODE_ALOCK|UVM_VNODE_DYING|UVM_VNODE_RELKILL)
+
+#ifdef _KERNEL
+
+/*
+ * prototypes
+ */
+
+#if 0
+/*
+ * moved uvn_attach to uvm_extern.h because uvm_vnode.h is needed to
+ * include sys/vnode.h, and files that include sys/vnode.h don't know
+ * what a vm_prot_t is.
+ */
+struct uvm_object  *uvn_attach __P((void *, vm_prot_t));
+#endif
+
+#endif /* _KERNEL */
+
+#endif /* _UVM_UVM_VNODE_H_ */