72 files changed, 4246 insertions, 2516 deletions
diff --git a/sys/adosfs/advnops.c b/sys/adosfs/advnops.c
index 78d237f41e5..19bfdcc5738 100644
--- a/sys/adosfs/advnops.c
+++ b/sys/adosfs/advnops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: advnops.c,v 1.17 2001/06/23 02:14:21 csapuntz Exp $	*/
+/*	$OpenBSD: advnops.c,v 1.18 2001/11/27 05:27:11 art Exp $	*/
 /*	$NetBSD: advnops.c,v 1.32 1996/10/13 02:52:09 christos Exp $	*/
 
 /*
@@ -131,7 +131,9 @@ struct vnodeopv_entry_desc adosfs_vnodeop_entries[] = {
 	{ &vop_pathconf_desc, adosfs_pathconf },	/* pathconf */
 	{ &vop_advlock_desc, adosfs_advlock },		/* advlock */
 	{ &vop_bwrite_desc, adosfs_bwrite },		/* bwrite */
-	{ (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
+	{ &vop_getpages_desc, genfs_getpages },
+	{ &vop_size_desc, genfs_size },
+	{ NULL, NULL }
 };
 
 struct vnodeopv_desc adosfs_vnodeop_opv_desc =
@@ -272,6 +274,28 @@ adosfs_read(v)
 	/*
 	 * taken from ufs_read()
 	 */
+
+	if (sp->a_vp->v_type == VREG) {
+		error = 0;
+		while (uio->uio_resid > 0) {
+			void *win;
+			vsize_t bytelen = min(ap->fsize - uio->uio_offset,
+					uio->uio_resid);
+
+			if (bytelen == 0) {
+				break;
+			}
+			win = ubc_alloc(&sp->a_vp->v_uvm.u_obj, uio->uio_offset,
+					&bytelen, UBC_READ);
+			error = uiomove(win, bytelen, uio);
+			ubc_release(win, 0);
+			if (error) {
+				break;
+			}
+		}
+		goto reterr;
+	}
+
 	do {
 		/*
 		 * we are only supporting ADosFFS currently
diff --git a/sys/arch/alpha/alpha/pmap.c b/sys/arch/alpha/alpha/pmap.c
index 9ff390da8c2..1d50a35d446 100644
--- a/sys/arch/alpha/alpha/pmap.c
+++ b/sys/arch/alpha/alpha/pmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.c,v 1.23 2001/11/09 15:31:11 art Exp $ */
+/* $OpenBSD: pmap.c,v 1.24 2001/11/27 05:27:11 art Exp $ */
 /* $NetBSD: pmap.c,v 1.154 2000/12/07 22:18:55 thorpej Exp $ */
 
 /*-
@@ -804,8 +804,8 @@ pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids)
 	/*
 	 * Figure out how many PTE's are necessary to map the kernel.
 	 */
-	lev3mapsize = (VM_PHYS_SIZE +
-		nbuf * MAXBSIZE + + PAGER_MAP_SIZE + 16 * NCARGS) / NBPG +
+	lev3mapsize = (VM_PHYS_SIZE + ubc_nwins * ubc_winsize +
+		nbuf * MAXBSIZE + 16 * NCARGS + PAGER_MAP_SIZE) / NBPG +
 		(maxproc * UPAGES) + NKMEMCLUSTERS;
 
 #ifdef SYSVSHM
diff --git a/sys/arch/i386/i386/vm_machdep.c b/sys/arch/i386/i386/vm_machdep.c
index 516dea6ebea..7de82391532 100644
--- a/sys/arch/i386/i386/vm_machdep.c
+++ b/sys/arch/i386/i386/vm_machdep.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vm_machdep.c,v 1.32 2001/11/06 19:53:14 miod Exp $	*/
+/*	$OpenBSD: vm_machdep.c,v 1.33 2001/11/27 05:27:11 art Exp $	*/
 /*	$NetBSD: vm_machdep.c,v 1.61 1996/05/03 19:42:35 christos Exp $	*/
 
 /*-
@@ -371,9 +371,7 @@ vmapbuf(bp, len)
 	while (len) {
 		pmap_extract(vm_map_pmap(&bp->b_proc->p_vmspace->vm_map),
 		    faddr, &fpa);
-		pmap_enter(vm_map_pmap(phys_map), taddr, fpa,
-		    VM_PROT_READ | VM_PROT_WRITE,
-		    VM_PROT_READ | VM_PROT_WRITE | PMAP_WIRED);
+		pmap_kenter_pa(taddr, fpa, VM_PROT_READ|VM_PROT_WRITE);
 		faddr += PAGE_SIZE;
 		taddr += PAGE_SIZE;
 		len -= PAGE_SIZE;
@@ -396,6 +394,7 @@ vunmapbuf(bp, len)
 	addr = trunc_page((vaddr_t)bp->b_data);
 	off = (vm_offset_t)bp->b_data - addr;
 	len = round_page(off + len);
+	pmap_kremove(addr, len);
 	uvm_km_free_wakeup(phys_map, addr, len);
 	bp->b_data = bp->b_saveaddr;
 	bp->b_saveaddr = 0;
diff --git a/sys/conf/files b/sys/conf/files
index a6ce3bcedb0..0ec11fc5bbb 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -1,4 +1,4 @@
-#	$OpenBSD: files,v 1.230 2001/11/21 21:23:56 csapuntz Exp $
+#	$OpenBSD: files,v 1.231 2001/11/27 05:27:11 art Exp $
 #	$NetBSD: files,v 1.87 1996/05/19 17:17:50 jonathan Exp $
 
 #	@(#)files.newconf	7.5 (Berkeley) 5/10/93
@@ -801,6 +801,7 @@ file xfs/xfs_syscalls-dummy.c		!xfs
 file uvm/uvm_amap.c
 file uvm/uvm_anon.c
 file uvm/uvm_aobj.c
+file uvm/uvm_bio.c
 file uvm/uvm_device.c
 file uvm/uvm_fault.c
 file uvm/uvm_glue.c
diff --git a/sys/dev/vnd.c b/sys/dev/vnd.c
index b2935e0edba..6f8c268a283 100644
--- a/sys/dev/vnd.c
+++ b/sys/dev/vnd.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vnd.c,v 1.28 2001/11/15 23:15:15 art Exp $	*/
+/*	$OpenBSD: vnd.c,v 1.29 2001/11/27 05:27:11 art Exp $	*/
 /*	$NetBSD: vnd.c,v 1.26 1996/03/30 23:06:11 christos Exp $	*/
 
 /*
@@ -558,10 +558,6 @@ vndstrategy(bp)
 		nbp->vb_buf.b_proc = bp->b_proc;
 		nbp->vb_buf.b_iodone = vndiodone;
 		nbp->vb_buf.b_vp = vp;
-		nbp->vb_buf.b_dirtyoff = bp->b_dirtyoff;
-		nbp->vb_buf.b_dirtyend = bp->b_dirtyend;
-		nbp->vb_buf.b_validoff = bp->b_validoff;
-		nbp->vb_buf.b_validend = bp->b_validend;
 		LIST_INIT(&nbp->vb_buf.b_dep);
 
 		/* save a reference to the old buffer */
diff --git a/sys/isofs/cd9660/cd9660_vfsops.c b/sys/isofs/cd9660/cd9660_vfsops.c
index b4199c4df15..b2b1455e6eb 100644
--- a/sys/isofs/cd9660/cd9660_vfsops.c
+++ b/sys/isofs/cd9660/cd9660_vfsops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: cd9660_vfsops.c,v 1.24 2001/11/15 08:27:28 art Exp $	*/
+/*	$OpenBSD: cd9660_vfsops.c,v 1.25 2001/11/27 05:27:11 art Exp $	*/
 /*	$NetBSD: cd9660_vfsops.c,v 1.26 1997/06/13 15:38:58 pk Exp $	*/
 
 /*-
@@ -359,6 +359,8 @@ iso_mountfs(devvp, mp, p, argp)
 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
 	mp->mnt_maxsymlinklen = 0;
 	mp->mnt_flag |= MNT_LOCAL;
+	mp->mnt_dev_bshift = iso_bsize;
+	mp->mnt_fs_bshift = isomp->im_bshift;
 	isomp->im_mountp = mp;
 	isomp->im_dev = dev;
 	isomp->im_devvp = devvp;
diff --git a/sys/isofs/cd9660/cd9660_vnops.c b/sys/isofs/cd9660/cd9660_vnops.c
index 5f05dc9d65f..cd5567a77b4 100644
--- a/sys/isofs/cd9660/cd9660_vnops.c
+++ b/sys/isofs/cd9660/cd9660_vnops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: cd9660_vnops.c,v 1.14 2001/06/23 02:14:23 csapuntz Exp $	*/
+/*	$OpenBSD: cd9660_vnops.c,v 1.15 2001/11/27 05:27:11 art Exp $	*/
 /*	$NetBSD: cd9660_vnops.c,v 1.42 1997/10/16 23:56:57 christos Exp $	*/
 
 /*-
@@ -314,9 +314,9 @@ cd9660_read(v)
 		struct ucred *a_cred;
 	} */ *ap = v;
 	struct vnode *vp = ap->a_vp;
-	register struct uio *uio = ap->a_uio;
-	register struct iso_node *ip = VTOI(vp);
-	register struct iso_mnt *imp;
+	struct uio *uio = ap->a_uio;
+	struct iso_node *ip = VTOI(vp);
+	struct iso_mnt *imp;
 	struct buf *bp;
 	daddr_t lbn, rablock;
 	off_t diff;
@@ -329,6 +329,26 @@ cd9660_read(v)
 		return (EINVAL);
 	ip->i_flag |= IN_ACCESS;
 	imp = ip->i_mnt;
+
+	if (vp->v_type == VREG) {
+		error = 0;
+		while (uio->uio_resid > 0) {
+			void *win;
+			vsize_t bytelen = MIN(ip->i_size - uio->uio_offset,
+					uio->uio_resid);
+
+			if (bytelen == 0)
+				break;
+			win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset,
+					&bytelen, UBC_READ);
+			error = uiomove(win, bytelen, uio);
+			ubc_release(win, 0);
+			if (error)
+				break;
+		}
+		goto out;
+	}
+
 	do {
 		lbn = lblkno(imp, uio->uio_offset);
 		on = blkoff(imp, uio->uio_offset);
@@ -370,6 +390,8 @@ cd9660_read(v)
 			bp->b_flags |= B_AGE;
 		brelse(bp);
 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
+
+out:
 	return (error);
 }
 
@@ -1045,7 +1067,9 @@ struct vnodeopv_entry_desc cd9660_vnodeop_entries[] = {
 	{ &vop_pathconf_desc, cd9660_pathconf },/* pathconf */
 	{ &vop_advlock_desc, cd9660_advlock },	/* advlock */
 	{ &vop_bwrite_desc, vop_generic_bwrite },
-	{ (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
+	{ &vop_getpages_desc, genfs_getpages },
+	{ &vop_size_desc, genfs_size },
+	{ NULL, NULL }
 };
 struct vnodeopv_desc cd9660_vnodeop_opv_desc =
 	{ &cd9660_vnodeop_p, cd9660_vnodeop_entries };
diff --git a/sys/kern/exec_subr.c b/sys/kern/exec_subr.c
index 770a29f8adc..e79db64dcae 100644
--- a/sys/kern/exec_subr.c
+++ b/sys/kern/exec_subr.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: exec_subr.c,v 1.14 2001/11/07 01:18:01 art Exp $	*/
+/*	$OpenBSD: exec_subr.c,v 1.15 2001/11/27 05:27:11 art Exp $	*/
 /*	$NetBSD: exec_subr.c,v 1.9 1994/12/04 03:10:42 mycroft Exp $	*/
 
 /*
@@ -167,6 +167,7 @@ vmcmd_map_pagedvn(p, cmd)
 	uobj = uvn_attach((void *) cmd->ev_vp, VM_PROT_READ|VM_PROT_EXECUTE);
 	if (uobj == NULL)
 		return(ENOMEM);
+	VREF(cmd->ev_vp);
 
 	/*
 	 * do the map
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index c909a23141b..f807a181062 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: init_main.c,v 1.84 2001/11/10 18:42:31 art Exp $	*/
+/*	$OpenBSD: init_main.c,v 1.85 2001/11/27 05:27:11 art Exp $	*/
 /*	$NetBSD: init_main.c,v 1.84.4.1 1996/06/02 09:08:06 mrg Exp $	*/
 
 /*
@@ -217,6 +217,8 @@ main(framep)
 
 	cpu_configure();
 
+	ubc_init();		/* Initialize the unified buffer cache */
+
 	/* Initialize sysctls (must be done before any processes run) */
 	sysctl_init();
 
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 2d12034b386..9f621da43d2 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: kern_exec.c,v 1.60 2001/11/12 01:26:09 art Exp $	*/
+/*	$OpenBSD: kern_exec.c,v 1.61 2001/11/27 05:27:11 art Exp $	*/
 /*	$NetBSD: kern_exec.c,v 1.75 1996/02/09 18:59:28 christos Exp $	*/
 
 /*-
@@ -150,6 +150,7 @@ check_exec(p, epp)
 		goto bad1;
 
 	/* now we have the file, get the exec header */
+	uvn_attach(vp, VM_PROT_READ);
 	error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0,
 	    UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred, &resid, p);
 	if (error)
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 71674e95236..6f361c989c0 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vfs_bio.c,v 1.51 2001/11/15 23:25:37 art Exp $	*/
+/*	$OpenBSD: vfs_bio.c,v 1.52 2001/11/27 05:27:11 art Exp $	*/
 /*	$NetBSD: vfs_bio.c,v 1.44 1996/06/11 11:15:36 pk Exp $	*/
 
 /*-
@@ -406,7 +406,6 @@ bwrite(bp)
 	/* Initiate disk write.  Make sure the appropriate party is charged. */
 	bp->b_vp->v_numoutput++;
 	splx(s);
-	SET(bp->b_flags, B_WRITEINPROG);
 	VOP_STRATEGY(bp);
 
 	if (async)
@@ -466,7 +465,6 @@ bdwrite(bp)
 	}
 
 	/* Otherwise, the "write" is done, so mark and release the buffer. */
-	CLR(bp->b_flags, B_NEEDCOMMIT);
 	SET(bp->b_flags, B_DONE);
 	brelse(bp);
 }
@@ -588,6 +586,7 @@ brelse(bp)
 
 	/* Unlock the buffer. */
 	CLR(bp->b_flags, (B_AGE | B_ASYNC | B_BUSY | B_NOCACHE | B_DEFERRED));
+	SET(bp->b_flags, B_CACHE);
 
 	/* Allow disk interrupts. */
 	splx(s);
@@ -651,44 +650,30 @@ getblk(vp, blkno, size, slpflag, slptimeo)
 	daddr_t blkno;
 	int size, slpflag, slptimeo;
 {
-	struct bufhashhdr *bh;
 	struct buf *bp, *nbp = NULL;
 	int s, err;
 
-	/*
-	 * XXX
-	 * The following is an inlined version of 'incore()', but with
-	 * the 'invalid' test moved to after the 'busy' test.  It's
-	 * necessary because there are some cases in which the NFS
-	 * code sets B_INVAL prior to writing data to the server, but
-	 * in which the buffers actually contain valid data.  In this
-	 * case, we can't allow the system to allocate a new buffer for
-	 * the block until the write is finished.
-	 */
-	bh = BUFHASH(vp, blkno);
 start:
-	bp = bh->lh_first;
-	for (; bp != NULL; bp = bp->b_hash.le_next) {
-		if (bp->b_lblkno != blkno || bp->b_vp != vp)
-			continue;
-
+	bp = incore(vp, blkno);
+	if (bp != NULL) {
 		s = splbio();
 		if (ISSET(bp->b_flags, B_BUSY)) {
 			SET(bp->b_flags, B_WANTED);
 			err = tsleep(bp, slpflag | (PRIBIO + 1), "getblk",
 			    slptimeo);
 			splx(s);
-			if (err)
+			if (err) {
+				if (nbp != NULL) {
+					SET(nbp->b_flags, B_AGE);
+					brelse(nbp);
+				}
 				return (NULL);
+			}
 			goto start;
 		}
 
-		if (!ISSET(bp->b_flags, B_INVAL)) {
-			SET(bp->b_flags, (B_BUSY | B_CACHE));
-			bremfree(bp);
-			splx(s);
-			break;
-		}
+		SET(bp->b_flags, (B_BUSY | B_CACHE));
+		bremfree(bp);
 		splx(s);
 	}
 
@@ -697,7 +682,7 @@ start:
 			goto start;
 		}
 		bp = nbp;
-		binshash(bp, bh);
+		binshash(bp, BUFHASH(vp, blkno));
 		bp->b_blkno = bp->b_lblkno = blkno;
 		s = splbio();
 		bgetvp(vp, bp);
@@ -900,8 +885,6 @@ start:
 	bp->b_error = 0;
 	bp->b_resid = 0;
 	bp->b_bcount = 0;
-	bp->b_dirtyoff = bp->b_dirtyend = 0;
-	bp->b_validoff = bp->b_validend = 0;
 
 	bremhash(bp);
 	*bpp = bp;
@@ -1022,7 +1005,6 @@ biodone(bp)
 		buf_complete(bp);
 
 	if (!ISSET(bp->b_flags, B_READ)) {
-		CLR(bp->b_flags, B_WRITEINPROG);
 		vwakeup(bp->b_vp);
 	}
 
diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
index 8f426b3a3f5..61f6d0217e9 100644
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c
@@ -1,10 +1,9 @@
-/*       $OpenBSD: vfs_default.c,v 1.7 2001/06/25 03:28:03 csapuntz Exp $  */
-
+/*       $OpenBSD: vfs_default.c,v 1.8 2001/11/27 05:27:12 art Exp $  */
 
 /*
  *    Portions of this code are:
  *
- * Copyright (c) 1989, 1993
+ * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
@@ -49,9 +48,11 @@
 #include <sys/vnode.h>
 #include <sys/namei.h>
 #include <sys/malloc.h>
+#include <sys/pool.h>
 #include <sys/event.h>
 #include <miscfs/specfs/specdev.h>
 
+#include <uvm/uvm.h>
 
 extern struct simplelock spechash_slock;
 
@@ -310,3 +311,679 @@ lease_check(void *v)
 {
 	return (0);
 }
+
+/*
+ * generic VM getpages routine.
+ * Return PG_BUSY pages for the given range,
+ * reading from backing store if necessary.
+ */
+
+int
+genfs_getpages(v)
+	void *v;
+{
+	struct vop_getpages_args /* {
+		struct vnode *a_vp;
+		voff_t a_offset;
+		vm_page_t *a_m;
+		int *a_count;
+		int a_centeridx;
+		vm_prot_t a_access_type;
+		int a_advice;
+		int a_flags;
+	} */ *ap = v;
+
+	off_t newsize, diskeof, memeof;
+	off_t offset, origoffset, startoffset, endoffset, raoffset;
+	daddr_t lbn, blkno;
+	int s, i, error, npages, orignpages, npgs, run, ridx, pidx, pcount;
+	int fs_bshift, fs_bsize, dev_bshift, dev_bsize;
+	int flags = ap->a_flags;
+	size_t bytes, iobytes, tailbytes, totalbytes, skipbytes;
+	vaddr_t kva;
+	struct buf *bp, *mbp;
+	struct vnode *vp = ap->a_vp;
+	struct uvm_object *uobj = &vp->v_uvm.u_obj;
+	struct vm_page *pgs[16];			/* XXXUBC 16 */
+	struct ucred *cred = curproc->p_ucred;		/* XXXUBC curproc */
+	boolean_t async = (flags & PGO_SYNCIO) == 0;
+	boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0;
+	boolean_t sawhole = FALSE;
+	struct proc *p = curproc;
+	UVMHIST_FUNC("genfs_getpages"); UVMHIST_CALLED(ubchist);
+
+	UVMHIST_LOG(ubchist, "vp %p off 0x%x/%x count %d",
+		    vp, ap->a_offset >> 32, ap->a_offset, *ap->a_count);
+
+	/* XXXUBC temp limit */
+	if (*ap->a_count > 16) {
+		return EINVAL;
+	}
+
+	error = 0;
+	origoffset = ap->a_offset;
+	orignpages = *ap->a_count;
+	error = VOP_SIZE(vp, vp->v_uvm.u_size, &diskeof);
+	if (error) {
+		return error;
+	}
+	if (flags & PGO_PASTEOF) {
+		newsize = MAX(vp->v_uvm.u_size,
+			      origoffset + (orignpages << PAGE_SHIFT));
+		error = VOP_SIZE(vp, newsize, &memeof);
+		if (error) {
+			return error;
+		}
+	} else {
+		memeof = diskeof;
+	}
+	KASSERT(ap->a_centeridx >= 0 || ap->a_centeridx <= orignpages);
+	KASSERT((origoffset & (PAGE_SIZE - 1)) == 0 && origoffset >= 0);
+	KASSERT(orignpages > 0);
+
+	/*
+	 * Bounds-check the request.
+	 */
+
+	if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= memeof) {
+		if ((flags & PGO_LOCKED) == 0) {
+			simple_unlock(&uobj->vmobjlock);
+		}
+		UVMHIST_LOG(ubchist, "off 0x%x count %d goes past EOF 0x%x",
+			    origoffset, *ap->a_count, memeof,0);
+		return EINVAL;
+	}
+
+	/*
+	 * For PGO_LOCKED requests, just return whatever's in memory.
+	 */
+
+	if (flags & PGO_LOCKED) {
+		uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m,
+			      UFP_NOWAIT|UFP_NOALLOC|UFP_NORDONLY);
+
+		return ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0;
+	}
+
+	/* vnode is VOP_LOCKed, uobj is locked */
+
+	if (write && (vp->v_bioflag & VBIOONSYNCLIST) == 0) {
+		vn_syncer_add_to_worklist(vp, syncdelay);
+	}
+
+	/*
+	 * find the requested pages and make some simple checks.
+	 * leave space in the page array for a whole block.
+	 */
+
+	fs_bshift = vp->v_mount->mnt_fs_bshift;
+	fs_bsize = 1 << fs_bshift;
+	dev_bshift = vp->v_mount->mnt_dev_bshift;
+	dev_bsize = 1 << dev_bshift;
+	KASSERT((diskeof & (dev_bsize - 1)) == 0);
+	KASSERT((memeof & (dev_bsize - 1)) == 0);
+
+	orignpages = MIN(orignpages,
+	    round_page(memeof - origoffset) >> PAGE_SHIFT);
+	npages = orignpages;
+	startoffset = origoffset & ~(fs_bsize - 1);
+	endoffset = round_page((origoffset + (npages << PAGE_SHIFT)
+				+ fs_bsize - 1) & ~(fs_bsize - 1));
+	endoffset = MIN(endoffset, round_page(memeof));
+	ridx = (origoffset - startoffset) >> PAGE_SHIFT;
+
+	memset(pgs, 0, sizeof(pgs));
+	uvn_findpages(uobj, origoffset, &npages, &pgs[ridx], UFP_ALL);
+
+	/*
+	 * if PGO_OVERWRITE is set, don't bother reading the pages.
+	 * PGO_OVERWRITE also means that the caller guarantees
+	 * that the pages already have backing store allocated.
+	 */
+
+	if (flags & PGO_OVERWRITE) {
+		UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0);
+
+		for (i = 0; i < npages; i++) {
+			struct vm_page *pg = pgs[ridx + i];
+
+			if (pg->flags & PG_FAKE) {
+				uvm_pagezero(pg);
+				pg->flags &= ~(PG_FAKE);
+			}
+			pg->flags &= ~(PG_RDONLY);
+		}
+		npages += ridx;
+		goto out;
+	}
+
+	/*
+	 * if the pages are already resident, just return them.
+	 */
+
+	for (i = 0; i < npages; i++) {
+		struct vm_page *pg = pgs[ridx + i];
+
+		if ((pg->flags & PG_FAKE) ||
+		    (write && (pg->flags & PG_RDONLY))) {
+			break;
+		}
+	}
+	if (i == npages) {
+		UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0);
+		raoffset = origoffset + (orignpages << PAGE_SHIFT);
+		npages += ridx;
+		goto raout;
+	}
+
+	/*
+	 * the page wasn't resident and we're not overwriting,
+	 * so we're going to have to do some i/o.
+	 * find any additional pages needed to cover the expanded range.
+	 */
+
+	if (startoffset != origoffset) {
+
+		/*
+		 * XXXUBC we need to avoid deadlocks caused by locking
+		 * additional pages at lower offsets than pages we
+		 * already have locked.  for now, unlock them all and
+		 * start over.
+		 */
+
+		for (i = 0; i < npages; i++) {
+			struct vm_page *pg = pgs[ridx + i];
+
+			if (pg->flags & PG_FAKE) {
+				pg->flags |= PG_RELEASED;
+			}
+		}
+		uvm_page_unbusy(&pgs[ridx], npages);
+		memset(pgs, 0, sizeof(pgs));
+
+		UVMHIST_LOG(ubchist, "reset npages start 0x%x end 0x%x",
+			    startoffset, endoffset, 0,0);
+		npages = (endoffset - startoffset) >> PAGE_SHIFT;
+		npgs = npages;
+		uvn_findpages(uobj, startoffset, &npgs, pgs, UFP_ALL);
+	}
+	simple_unlock(&uobj->vmobjlock);
+
+	/*
+	 * read the desired page(s).
+	 */
+
+	totalbytes = npages << PAGE_SHIFT;
+	bytes = MIN(totalbytes, MAX(diskeof - startoffset, 0));
+	tailbytes = totalbytes - bytes;
+	skipbytes = 0;
+
+	kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WAITOK |
+			     UVMPAGER_MAPIN_READ);
+
+	s = splbio();
+	mbp = pool_get(&bufpool, PR_WAITOK);
+	splx(s);
+	mbp->b_bufsize = totalbytes;
+	mbp->b_data = (void *)kva;
+	mbp->b_resid = mbp->b_bcount = bytes;
+	mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL : 0);
+	mbp->b_iodone = uvm_aio_biodone;
+	mbp->b_vp = vp;
+	LIST_INIT(&mbp->b_dep);
+
+	/*
+	 * if EOF is in the middle of the range, zero the part past EOF.
+	 */
+
+	if (tailbytes > 0) {
+		memset((void *)(kva + bytes), 0, tailbytes);
+	}
+
+	/*
+	 * now loop over the pages, reading as needed.
+	 */
+
+	if (write) {
+		lockmgr(&vp->v_glock, LK_EXCLUSIVE, NULL, p);
+	} else {
+		lockmgr(&vp->v_glock, LK_SHARED, NULL, p);
+	}
+
+	bp = NULL;
+	for (offset = startoffset;
+	     bytes > 0;
+	     offset += iobytes, bytes -= iobytes) {
+
+		/*
+		 * skip pages which don't need to be read.
+		 */
+
+		pidx = (offset - startoffset) >> PAGE_SHIFT;
+		while ((pgs[pidx]->flags & PG_FAKE) == 0) {
+			size_t b;
+
+			KASSERT((offset & (PAGE_SIZE - 1)) == 0);
+			b = MIN(PAGE_SIZE, bytes);
+			offset += b;
+			bytes -= b;
+			skipbytes += b;
+			pidx++;
+			UVMHIST_LOG(ubchist, "skipping, new offset 0x%x",
+				    offset, 0,0,0);
+			if (bytes == 0) {
+				goto loopdone;
+			}
+		}
+
+		/*
+		 * bmap the file to find out the blkno to read from and
+		 * how much we can read in one i/o.  if bmap returns an error,
+		 * skip the rest of the top-level i/o.
+		 */
+
+		lbn = offset >> fs_bshift;
+		error = VOP_BMAP(vp, lbn, NULL, &blkno, &run);
+		if (error) {
+			UVMHIST_LOG(ubchist, "VOP_BMAP lbn 0x%x -> %d\n",
+				    lbn, error,0,0);
+			skipbytes += bytes;
+			goto loopdone;
+		}
+
+		/*
+		 * see how many pages can be read with this i/o.
+		 * reduce the i/o size if necessary to avoid
+		 * overwriting pages with valid data.
+		 */
+
+		iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
+		    bytes);
+		if (offset + iobytes > round_page(offset)) {
+			pcount = 1;
+			while (pidx + pcount < npages &&
+			       pgs[pidx + pcount]->flags & PG_FAKE) {
+				pcount++;
+			}
+			iobytes = MIN(iobytes, (pcount << PAGE_SHIFT) -
+				      (offset - trunc_page(offset)));
+		}
+
+		/*
+		 * if this block isn't allocated, zero it instead of reading it.
+		 * if this is a read access, mark the pages we zeroed PG_RDONLY.
+		 */
+
+		if (blkno < 0) {
+			UVMHIST_LOG(ubchist, "lbn 0x%x -> HOLE", lbn,0,0,0);
+
+			sawhole = TRUE;
+			memset((char *)kva + (offset - startoffset), 0,
+			       iobytes);
+			skipbytes += iobytes;
+
+			if (!write) {
+				int holepages =
+					(round_page(offset + iobytes) - 
+					 trunc_page(offset)) >> PAGE_SHIFT;
+				for (i = 0; i < holepages; i++) {
+					pgs[pidx + i]->flags |= PG_RDONLY;
+				}
+			}
+			continue;
+		}
+
+		/*
+		 * allocate a sub-buf for this piece of the i/o
+		 * (or just use mbp if there's only 1 piece),
+		 * and start it going.
+		 */
+
+		if (offset == startoffset && iobytes == bytes) {
+			bp = mbp;
+		} else {
+			s = splbio();
+			bp = pool_get(&bufpool, PR_WAITOK);
+			splx(s);
+			bp->b_data = (char *)kva + offset - startoffset;
+			bp->b_resid = bp->b_bcount = iobytes;
+			bp->b_flags = B_BUSY|B_READ|B_CALL;
+			bp->b_iodone = uvm_aio_biodone1;
+			bp->b_vp = vp;
+			LIST_INIT(&bp->b_dep);
+		}
+		bp->b_lblkno = 0;
+		bp->b_private = mbp;
+
+		/* adjust physical blkno for partial blocks */
+		bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
+				       dev_bshift);
+
+		UVMHIST_LOG(ubchist, "bp %p offset 0x%x bcount 0x%x blkno 0x%x",
+			    bp, offset, iobytes, bp->b_blkno);
+
+		VOP_STRATEGY(bp);
+	}
+
+loopdone:
+	if (skipbytes) {
+		s = splbio();
+		if (error) {
+			mbp->b_flags |= B_ERROR;
+			mbp->b_error = error;
+		}
+		mbp->b_resid -= skipbytes;
+		if (mbp->b_resid == 0) {
+			biodone(mbp);
+		}
+		splx(s);
+	}
+
+	if (async) {
+		UVMHIST_LOG(ubchist, "returning PEND",0,0,0,0);
+		lockmgr(&vp->v_glock, LK_RELEASE, NULL, p);
+		return EINPROGRESS;
+	}
+	if (bp != NULL) {
+		error = biowait(mbp);
+	}
+	s = splbio();
+	pool_put(&bufpool, mbp);
+	splx(s);
+	uvm_pagermapout(kva, npages);
+	raoffset = startoffset + totalbytes;
+
+	/*
+	 * if this we encountered a hole then we have to do a little more work.
+	 * for read faults, we marked the page PG_RDONLY so that future
+	 * write accesses to the page will fault again.
+	 * for write faults, we must make sure that the backing store for
+	 * the page is completely allocated while the pages are locked.
+	 */
+
+	if (error == 0 && sawhole && write) {
+		error = VOP_BALLOCN(vp, startoffset, npages << PAGE_SHIFT,
+				   cred, 0);
+		if (error) {
+			UVMHIST_LOG(ubchist, "balloc lbn 0x%x -> %d",
+				    lbn, error,0,0);
+			lockmgr(&vp->v_glock, LK_RELEASE, NULL, p);
+			simple_lock(&uobj->vmobjlock);
+			goto out;
+		}
+	}
+	lockmgr(&vp->v_glock, LK_RELEASE, NULL, p);
+	simple_lock(&uobj->vmobjlock);
+
+	/*
+	 * see if we want to start any readahead.
+	 * XXXUBC for now, just read the next 128k on 64k boundaries.
+	 * this is pretty nonsensical, but it is 50% faster than reading
+	 * just the next 64k.
+	 */
+
+raout:
+	if (!error && !async && !write && ((int)raoffset & 0xffff) == 0 &&
+	    PAGE_SHIFT <= 16) {
+		int racount;
+
+		racount = 1 << (16 - PAGE_SHIFT);
+		(void) VOP_GETPAGES(vp, raoffset, NULL, &racount, 0,
+				    VM_PROT_READ, 0, 0);
+		simple_lock(&uobj->vmobjlock);
+
+		racount = 1 << (16 - PAGE_SHIFT);
+		(void) VOP_GETPAGES(vp, raoffset + 0x10000, NULL, &racount, 0,
+				    VM_PROT_READ, 0, 0);
+		simple_lock(&uobj->vmobjlock);
+	}
+
+	/*
+	 * we're almost done!  release the pages...
+	 * for errors, we free the pages.
+	 * otherwise we activate them and mark them as valid and clean.
+	 * also, unbusy pages that were not actually requested.
+	 */
+
+out:
+	if (error) {
+		uvm_lock_pageq();
+		for (i = 0; i < npages; i++) {
+			if (pgs[i] == NULL) {
+				continue;
+			}
+			UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
+				    pgs[i], pgs[i]->flags, 0,0);
+			if (pgs[i]->flags & PG_WANTED) {
+				wakeup(pgs[i]);
+			}
+			if (pgs[i]->flags & PG_RELEASED) {
+				uvm_unlock_pageq();
+				(uobj->pgops->pgo_releasepg)(pgs[i], NULL);
+				uvm_lock_pageq();
+				continue;
+			}
+			if (pgs[i]->flags & PG_FAKE) {
+				uvm_pagefree(pgs[i]);
+				continue;
+			}
+			uvm_pageactivate(pgs[i]);
+			pgs[i]->flags &= ~(PG_WANTED|PG_BUSY);
+			UVM_PAGE_OWN(pgs[i], NULL);
+		}
+		uvm_unlock_pageq();
+		simple_unlock(&uobj->vmobjlock);
+		UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0);
+		return error;
+	}
+
+	UVMHIST_LOG(ubchist, "succeeding, npages %d", npages,0,0,0);
+	uvm_lock_pageq();
+	for (i = 0; i < npages; i++) {
+		if (pgs[i] == NULL) {
+			continue;
+		}
+		UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
+			    pgs[i], pgs[i]->flags, 0,0);
+		if (pgs[i]->flags & PG_FAKE) {
+			UVMHIST_LOG(ubchist, "unfaking pg %p offset 0x%x",
+				    pgs[i], pgs[i]->offset,0,0);
+			pgs[i]->flags &= ~(PG_FAKE);
+			pmap_clear_modify(pgs[i]);
+			pmap_clear_reference(pgs[i]);
+		}
+		if (write) {
+			pgs[i]->flags &= ~(PG_RDONLY);
+		}
+		if (i < ridx || i >= ridx + orignpages || async) {
+			UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x",
+				    pgs[i], pgs[i]->offset,0,0);
+			if (pgs[i]->flags & PG_WANTED) {
+				wakeup(pgs[i]);
+			}
+			if (pgs[i]->flags & PG_RELEASED) {
+				uvm_unlock_pageq();
+				(uobj->pgops->pgo_releasepg)(pgs[i], NULL);
+				uvm_lock_pageq();
+				continue;
+			}
+			uvm_pageactivate(pgs[i]);
+			pgs[i]->flags &= ~(PG_WANTED|PG_BUSY);
+			UVM_PAGE_OWN(pgs[i], NULL);
+		}
+	}
+	uvm_unlock_pageq();
+	simple_unlock(&uobj->vmobjlock);
+	if (ap->a_m != NULL) {
+		memcpy(ap->a_m, &pgs[ridx],
+		       orignpages * sizeof(struct vm_page *));
+	}
+	return 0;
+}
+
+/*
+ * generic VM putpages routine.
+ * Write the given range of pages to backing store.
+ */
+
+int
+genfs_putpages(v)
+	void *v;
+{
+	struct vop_putpages_args /* {
+		struct vnode *a_vp;
+		struct vm_page **a_m;
+		int a_count;
+		int a_flags;
+		int *a_rtvals;
+	} */ *ap = v;
+
+	int s, error, npages, run;
+	int fs_bshift, dev_bshift, dev_bsize;
+	vaddr_t kva;
+	off_t eof, offset, startoffset;
+	size_t bytes, iobytes, skipbytes;
+	daddr_t lbn, blkno;
+	struct vm_page *pg;
+	struct buf *mbp, *bp;
+	struct vnode *vp = ap->a_vp;
+	boolean_t async = (ap->a_flags & PGO_SYNCIO) == 0;
+	UVMHIST_FUNC("genfs_putpages"); UVMHIST_CALLED(ubchist);
+	UVMHIST_LOG(ubchist, "vp %p offset 0x%x count %d",
+		    vp, ap->a_m[0]->offset, ap->a_count, 0);
+
+	simple_unlock(&vp->v_uvm.u_obj.vmobjlock);
+
+	error = VOP_SIZE(vp, vp->v_uvm.u_size, &eof);
+	if (error) {
+		return error;
+	}
+
+	error = 0;
+	npages = ap->a_count;
+	fs_bshift = vp->v_mount->mnt_fs_bshift;
+	dev_bshift = vp->v_mount->mnt_dev_bshift;
+	dev_bsize = 1 << dev_bshift;
+	KASSERT((eof & (dev_bsize - 1)) == 0);
+
+	pg = ap->a_m[0];
+	startoffset = pg->offset;
+	bytes = MIN(npages << PAGE_SHIFT, eof - startoffset);
+	skipbytes = 0;
+	KASSERT(bytes != 0);
+
+	kva = uvm_pagermapin(ap->a_m, npages, UVMPAGER_MAPIN_WAITOK);
+
+	s = splbio();
+	vp->v_numoutput += 2;
+	mbp = pool_get(&bufpool, PR_WAITOK);
+	UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x",
+		    vp, mbp, vp->v_numoutput, bytes);
+	splx(s);
+	mbp->b_bufsize = npages << PAGE_SHIFT;
+	mbp->b_data = (void *)kva;
+	mbp->b_resid = mbp->b_bcount = bytes;
+	mbp->b_flags = B_BUSY|B_WRITE|B_AGE |
+		(async ? B_CALL : 0) |
+		(curproc == uvm.pagedaemon_proc ? B_PDAEMON : 0);
+	mbp->b_iodone = uvm_aio_biodone;
+	mbp->b_vp = vp;
+	LIST_INIT(&mbp->b_dep);
+
+	bp = NULL;
+	for (offset = startoffset;
+	     bytes > 0;
+	     offset += iobytes, bytes -= iobytes) {
+		lbn = offset >> fs_bshift;
+		error = VOP_BMAP(vp, lbn, NULL, &blkno, &run);
+		if (error) {
+			UVMHIST_LOG(ubchist, "VOP_BMAP() -> %d", error,0,0,0);
+			skipbytes += bytes;
+			bytes = 0;
+			break;
+		}
+
+		iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
+		    bytes);
+		if (blkno == (daddr_t)-1) {
+			skipbytes += iobytes;
+			continue;
+		}
+
+		/* if it's really one i/o, don't make a second buf */
+		if (offset == startoffset && iobytes == bytes) {
+			bp = mbp;
+		} else {
+			s = splbio();
+			vp->v_numoutput++;
+			bp = pool_get(&bufpool, PR_WAITOK);
+			UVMHIST_LOG(ubchist, "vp %p bp %p num now %d",
+				    vp, bp, vp->v_numoutput, 0);
+			splx(s);
+			bp->b_data = (char *)kva +
+				(vaddr_t)(offset - pg->offset);
+			bp->b_resid = bp->b_bcount = iobytes;
+			bp->b_flags = B_BUSY|B_WRITE|B_CALL|B_ASYNC;
+			bp->b_iodone = uvm_aio_biodone1;
+			bp->b_vp = vp;
+			LIST_INIT(&bp->b_dep);
+		}
+		bp->b_lblkno = 0;
+		bp->b_private = mbp;
+
+		/* adjust physical blkno for partial blocks */
+		bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
+				       dev_bshift);
+		UVMHIST_LOG(ubchist, "vp %p offset 0x%x bcount 0x%x blkno 0x%x",
+			    vp, offset, bp->b_bcount, bp->b_blkno);
+		VOP_STRATEGY(bp);
+	}
+	if (skipbytes) {
+		UVMHIST_LOG(ubchist, "skipbytes %d", skipbytes, 0,0,0);
+		s = splbio();
+		mbp->b_resid -= skipbytes;
+		if (error) {
+			mbp->b_flags |= B_ERROR;
+			mbp->b_error = error;
+		}
+		if (mbp->b_resid == 0) {
+			biodone(mbp);
+		}
+		splx(s);
+	}
+	if (async) {
+		UVMHIST_LOG(ubchist, "returning PEND", 0,0,0,0);
+		return EINPROGRESS;
+	}
+	if (bp != NULL) {
+		UVMHIST_LOG(ubchist, "waiting for mbp %p", mbp,0,0,0);
+		error = biowait(mbp);
+	}
+	if (bioops.io_pageiodone) {
+		(*bioops.io_pageiodone)(mbp);
+	}
+	s = splbio();
+	if (mbp->b_vp)
+		vwakeup(mbp->b_vp);
+	pool_put(&bufpool, mbp);
+	splx(s);
+	uvm_pagermapout(kva, npages);
+	UVMHIST_LOG(ubchist, "returning, error %d", error,0,0,0);
+	return error;
+}
+
+int
+genfs_size(v)
+	void *v;
+{
+	struct vop_size_args /* {
+		struct vnode *a_vp;
+		off_t a_size;
+		off_t *a_eobp;
+	} */ *ap = v;
+	int bsize;
+
+	bsize = 1 << ap->a_vp->v_mount->mnt_fs_bshift;
+	*ap->a_eobp = (ap->a_size + bsize - 1) & ~(bsize - 1);
+	return 0;
+}
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index e4efaff930f..7f668a7edde 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vfs_subr.c,v 1.72 2001/11/21 21:13:34 csapuntz Exp $	*/
+/*	$OpenBSD: vfs_subr.c,v 1.73 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $	*/
 
 /*
@@ -377,6 +377,8 @@ getnewvnode(tag, mp, vops, vpp)
 	int (**vops) __P((void *));
 	struct vnode **vpp;
 {
+	extern struct uvm_pagerops uvm_vnodeops;
+	struct uvm_object *uobj;
 	struct proc *p = curproc;			/* XXX */
 	struct freelst *listhd;
 	static int toggle;
@@ -410,7 +412,7 @@ getnewvnode(tag, mp, vops, vpp)
 		splx(s);
 		simple_unlock(&vnode_free_list_slock);
 		vp = pool_get(&vnode_pool, PR_WAITOK);
-		bzero((char *)vp, sizeof *vp);
+		bzero(vp, sizeof *vp);
 		numvnodes++;
 	} else {
 		for (vp = TAILQ_FIRST(listhd); vp != NULLVP;
@@ -423,7 +425,7 @@ getnewvnode(tag, mp, vops, vpp)
 		 * the first NCPUS items on the free list are
 		 * locked, so this is close enough to being empty.
 		 */
-		if (vp == NULLVP) {
+		if (vp == NULL) {
 			splx(s);
 			simple_unlock(&vnode_free_list_slock);
 			tablefull("vnode");
@@ -458,6 +460,7 @@ getnewvnode(tag, mp, vops, vpp)
 		vp->v_socket = 0;
 	}
 	vp->v_type = VNON;
+	lockinit(&vp->v_glock, PVFS, "glock", 0, 0);
 	cache_purge(vp);
 	vp->v_tag = tag;
 	vp->v_op = vops;
@@ -466,6 +469,16 @@ getnewvnode(tag, mp, vops, vpp)
 	vp->v_usecount = 1;
 	vp->v_data = 0;
 	simple_lock_init(&vp->v_uvm.u_obj.vmobjlock);
+
+	/*
+	 * initialize uvm_object within vnode.
+	 */
+
+	uobj = &vp->v_uvm.u_obj;
+	uobj->pgops = &uvm_vnodeops;
+	TAILQ_INIT(&uobj->memq);
+	vp->v_uvm.u_size = VSIZENOTSET;
+
 	return (0);
 }
 
@@ -669,6 +682,10 @@ vget(vp, flags, p)
 		flags |= LK_INTERLOCK;
 	}
 	if (vp->v_flag & VXLOCK) {
+		if (flags & LK_NOWAIT) {
+			simple_unlock(&vp->v_interlock);
+			return (EBUSY);
+		}
  		vp->v_flag |= VXWANT;
 		simple_unlock(&vp->v_interlock);
 		tsleep((caddr_t)vp, PINOD, "vget", 0);
@@ -787,6 +804,11 @@ vput(vp)
 #endif
 	vputonfreelist(vp);
 
+	if (vp->v_flag & VTEXT) {
+		uvmexp.vtextpages -= vp->v_uvm.u_obj.uo_npages;
+		uvmexp.vnodepages += vp->v_uvm.u_obj.uo_npages;
+	}
+	vp->v_flag &= ~VTEXT;
 	simple_unlock(&vp->v_interlock);
 
 	VOP_INACTIVE(vp, p);
@@ -827,6 +849,11 @@ vrele(vp)
 #endif
 	vputonfreelist(vp);
 
+	if (vp->v_flag & VTEXT) {
+		uvmexp.vtextpages -= vp->v_uvm.u_obj.uo_npages;
+		uvmexp.vnodepages += vp->v_uvm.u_obj.uo_npages;
+	}
+	vp->v_flag &= ~VTEXT;
 	if (vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p) == 0)
 		VOP_INACTIVE(vp, p);
 }
@@ -1009,6 +1036,12 @@ vclean(vp, flags, p)
 	if (vp->v_flag & VXLOCK)
 		panic("vclean: deadlock");
 	vp->v_flag |= VXLOCK;
+	if (vp->v_flag & VTEXT) {
+		uvmexp.vtextpages -= vp->v_uvm.u_obj.uo_npages;
+		uvmexp.vnodepages += vp->v_uvm.u_obj.uo_npages;
+	}
+	vp->v_flag &= ~VTEXT;
+
 	/*
 	 * Even if the count is zero, the VOP_INACTIVE routine may still
 	 * have the object locked while it cleans it out. The VOP_LOCK
@@ -1019,11 +1052,7 @@ vclean(vp, flags, p)
 	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
 
 	/*
-	 * clean out any VM data associated with the vnode.
-	 */
-	uvm_vnp_terminate(vp);
-	/*
-	 * Clean out any buffers associated with the vnode.
+	 * Clean out any cached data associated with the vnode.
 	 */
 	if (flags & DOCLOSE)
 		vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
@@ -1968,9 +1997,22 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
 	struct proc *p;
 	int slpflag, slptimeo;
 {
-	register struct buf *bp;
+	struct uvm_object *uobj = &vp->v_uvm.u_obj;
+	struct buf *bp;
 	struct buf *nbp, *blist;
-	int s, error;
+	int s, error, rv;
+	int flushflags = PGO_ALLPAGES|PGO_FREE|PGO_SYNCIO|
+	    (flags & V_SAVE ? PGO_CLEANIT : 0);
+
+	/* XXXUBC this doesn't look at flags or slp* */
+	if (vp->v_type == VREG) {
+		simple_lock(&uobj->vmobjlock);
+		rv = (uobj->pgops->pgo_flush)(uobj, 0, 0, flushflags);
+		simple_unlock(&uobj->vmobjlock);
+		if (!rv) {
+			return EIO;
+		}
+	}
 
 	if (flags & V_SAVE) {
 		s = splbio();
@@ -2040,12 +2082,21 @@ loop:
 
 void
 vflushbuf(vp, sync)
-	register struct vnode *vp;
+	struct vnode *vp;
 	int sync;
 {
-	register struct buf *bp, *nbp;
+	struct uvm_object *uobj = &vp->v_uvm.u_obj;
+	struct buf *bp, *nbp;
 	int s;
 
+	if (vp->v_type == VREG) {
+		int flags = PGO_CLEANIT|PGO_ALLPAGES| (sync ? PGO_SYNCIO : 0);
+
+		simple_lock(&uobj->vmobjlock);
+		(uobj->pgops->pgo_flush)(uobj, 0, 0, flags);
+		simple_unlock(&uobj->vmobjlock);
+	}
+
 loop:
 	s = splbio();
 	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
@@ -2112,23 +2163,25 @@ bgetvp(vp, bp)
  */
 void
 brelvp(bp)
-	register struct buf *bp;
+	struct buf *bp;
 {
 	struct vnode *vp;
 
-	if ((vp = bp->b_vp) == (struct vnode *) 0)
+	if ((vp = bp->b_vp) == NULL)
 		panic("brelvp: NULL");
+
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
 	if (bp->b_vnbufs.le_next != NOLIST)
 		bufremvn(bp);
-	if ((vp->v_bioflag & VBIOONSYNCLIST) &&
+	if (TAILQ_EMPTY(&vp->v_uvm.u_obj.memq) &&
+	    (vp->v_bioflag & VBIOONSYNCLIST) &&
 	    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
 		vp->v_bioflag &= ~VBIOONSYNCLIST;
 		LIST_REMOVE(vp, v_synclist);
 	}
-	bp->b_vp = (struct vnode *) 0;
+	bp->b_vp = NULL;
 
 	simple_lock(&vp->v_interlock);
 #ifdef DIAGNOSTIC
@@ -2205,7 +2258,8 @@ reassignbuf(bp)
 	 */
 	if ((bp->b_flags & B_DELWRI) == 0) {
 		listheadp = &vp->v_cleanblkhd;
-		if ((vp->v_bioflag & VBIOONSYNCLIST) &&
+		if (TAILQ_EMPTY(&vp->v_uvm.u_obj.memq) &&
+		    (vp->v_bioflag & VBIOONSYNCLIST) &&
 		    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
 			vp->v_bioflag &= ~VBIOONSYNCLIST;
 			LIST_REMOVE(vp, v_synclist);
diff --git a/sys/kern/vfs_sync.c b/sys/kern/vfs_sync.c
index 4b07d0f373a..0adeb2f3065 100644
--- a/sys/kern/vfs_sync.c
+++ b/sys/kern/vfs_sync.c
@@ -1,4 +1,4 @@
-/*       $OpenBSD: vfs_sync.c,v 1.20 2001/11/15 06:38:48 art Exp $  */
+/*       $OpenBSD: vfs_sync.c,v 1.21 2001/11/27 05:27:12 art Exp $  */
 
 /*
  *  Portions of this code are:
@@ -176,15 +176,12 @@ sched_sync(p)
 			VOP_UNLOCK(vp, 0, p);
 			s = splbio();
 			if (LIST_FIRST(slp) == vp) {
-				/*
-				 * Note: disk vps can remain on the
-				 * worklist too with no dirty blocks, but
-				 * since sync_fsync() moves it to a different
-				 * slot we are safe.
-				 */
-				if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
-				    vp->v_type != VBLK)
-					panic("sched_sync: fsync failed");
+#ifdef DIAGNOSTIC
+				if (!(vp->v_bioflag & VBIOONSYNCLIST)) {
+					vprint("vnode", vp);
+					panic("sched_fsync: on synclist, but no flag");
+				}
+#endif
 				/*
 				 * Put us back on the worklist.  The worklist
 				 * routine will remove us from our current
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 284fad0fbda..5433711decd 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vfs_syscalls.c,v 1.82 2001/11/06 19:53:20 miod Exp $	*/
+/*	$OpenBSD: vfs_syscalls.c,v 1.83 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: vfs_syscalls.c,v 1.71 1996/04/23 10:29:02 mycroft Exp $	*/
 
 /*
@@ -493,7 +493,6 @@ sys_sync(p, v, retval)
 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
 			asyncflag = mp->mnt_flag & MNT_ASYNC;
 			mp->mnt_flag &= ~MNT_ASYNC;
-			uvm_vnp_sync(mp);
 			VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, p);
 			if (asyncflag)
 				mp->mnt_flag |= MNT_ASYNC;
@@ -1064,6 +1063,13 @@ sys_fhopen(p, v, retval)
 	}
 	if ((error = VOP_OPEN(vp, flags, cred, p)) != 0)
 		goto bad;
+
+	if (vp->v_type == VREG &&
+	    uvn_attach(vp, flags & FWRITE ? VM_PROT_WRITE : 0) == NULL) {
+		error = EIO;
+		goto bad;
+	}
+
 	if (flags & FWRITE)
 		vp->v_writecount++;
 
@@ -1475,8 +1481,6 @@ sys_unlink(p, v, retval)
 		goto out;
 	}
 
-	(void)uvm_vnp_uncache(vp);
-
 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
@@ -2338,7 +2342,6 @@ out:
 		if (fromnd.ni_dvp != tdvp)
 			VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 		if (tvp) {
-			(void)uvm_vnp_uncache(tvp);
 			VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE);
 		}
 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index ee5eb0baee2..491db1172fa 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vfs_vnops.c,v 1.35 2001/11/15 06:22:30 art Exp $	*/
+/*	$OpenBSD: vfs_vnops.c,v 1.36 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: vfs_vnops.c,v 1.20 1996/02/04 02:18:41 christos Exp $	*/
 
 /*
@@ -165,6 +165,11 @@ vn_open(ndp, fmode, cmode)
 	}
 	if ((error = VOP_OPEN(vp, fmode, cred, p)) != 0)
 		goto bad;
+	if (vp->v_type == VREG &&
+	    uvn_attach(vp, fmode & FWRITE ? VM_PROT_WRITE : 0) == NULL) {
+		error = EIO;
+		goto bad;
+	}
 	if (fmode & FWRITE)
 		vp->v_writecount++;
 	return (0);
@@ -197,11 +202,10 @@ vn_writechk(vp)
 		}
 	}
 	/*
-	 * If there's shared text associated with
-	 * the vnode, try to free it up once.  If
-	 * we fail, we can't allow writing.
+	 * If the vnode is in use as a process's text,
+	 * we can't allow writing.
 	 */
-	if ((vp->v_flag & VTEXT) && !uvm_vnp_uncache(vp))
+	if (vp->v_flag & VTEXT)
 		return (ETXTBSY);
 
 	return (0);
@@ -214,6 +218,23 @@ void
 vn_marktext(vp)
 	struct vnode *vp;
 {
+	if ((vp->v_flag & VTEXT) == 0) {
+		uvmexp.vnodepages -= vp->v_uvm.u_obj.uo_npages;
+		uvmexp.vtextpages += vp->v_uvm.u_obj.uo_npages;
+#if 0
+	/*
+	 * Doesn't help much because the pager is borked and ubc_flush is
+	 * slow.
+	 */
+#ifdef PMAP_PREFER
+		/*
+		 * Get rid of any cached reads from this vnode.
+		 * exec can't respect PMAP_PREFER when mapping the text.
+		 */
+		ubc_flush(&vp->v_uvm.u_obj, 0, 0);
+#endif
+#endif
+	}
 	vp->v_flag |= VTEXT;
 }
 
diff --git a/sys/kern/vnode_if.c b/sys/kern/vnode_if.c
index 1f30d85c507..d2a3d8298bf 100644
--- a/sys/kern/vnode_if.c
+++ b/sys/kern/vnode_if.c
@@ -3,7 +3,7 @@
  * (Modifications made here may easily be lost!)
  *
  * Created from the file:
- *	OpenBSD: vnode_if.src,v 1.11 2001/06/23 02:21:05 csapuntz Exp 
+ *	OpenBSD: vnode_if.src,v 1.13 2001/07/26 20:24:47 millert Exp 
  * by the script:
  *	OpenBSD: vnode_if.sh,v 1.8 2001/02/26 17:34:18 art Exp 
  */
@@ -1230,6 +1230,140 @@ int VOP_WHITEOUT(dvp, cnp, flags)
 	return (VCALL(dvp, VOFFSET(vop_whiteout), &a));
 }
 
+int vop_ballocn_vp_offsets[] = {
+	VOPARG_OFFSETOF(struct vop_ballocn_args,a_vp),
+	VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_ballocn_desc = {
+	0,
+	"vop_ballocn",
+	0,
+	vop_ballocn_vp_offsets,
+	VDESC_NO_OFFSET,
+	VOPARG_OFFSETOF(struct vop_ballocn_args, a_cred),
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	NULL,
+};
+
+int VOP_BALLOCN(vp, offset, length, cred, flags)
+	struct vnode *vp;
+	off_t offset;
+	off_t length;
+	struct ucred *cred;
+	int flags;
+{
+	struct vop_ballocn_args a;
+	a.a_desc = VDESC(vop_ballocn);
+	a.a_vp = vp;
+	a.a_offset = offset;
+	a.a_length = length;
+	a.a_cred = cred;
+	a.a_flags = flags;
+	return (VCALL(vp, VOFFSET(vop_ballocn), &a));
+}
+
+int vop_getpages_vp_offsets[] = {
+	VOPARG_OFFSETOF(struct vop_getpages_args,a_vp),
+	VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_getpages_desc = {
+	0,
+	"vop_getpages",
+	0,
+	vop_getpages_vp_offsets,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	NULL,
+};
+
+int VOP_GETPAGES(vp, offset, m, count, centeridx, access_type, advice, flags)
+	struct vnode *vp;
+	voff_t offset;
+	vm_page_t *m;
+	int *count;
+	int centeridx;
+	vm_prot_t access_type;
+	int advice;
+	int flags;
+{
+	struct vop_getpages_args a;
+	a.a_desc = VDESC(vop_getpages);
+	a.a_vp = vp;
+	a.a_offset = offset;
+	a.a_m = m;
+	a.a_count = count;
+	a.a_centeridx = centeridx;
+	a.a_access_type = access_type;
+	a.a_advice = advice;
+	a.a_flags = flags;
+	return (VCALL(vp, VOFFSET(vop_getpages), &a));
+}
+
+int vop_putpages_vp_offsets[] = {
+	VOPARG_OFFSETOF(struct vop_putpages_args,a_vp),
+	VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_putpages_desc = {
+	0,
+	"vop_putpages",
+	0,
+	vop_putpages_vp_offsets,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	NULL,
+};
+
+int VOP_PUTPAGES(vp, m, count, flags, rtvals)
+	struct vnode *vp;
+	vm_page_t *m;
+	int count;
+	int flags;
+	int *rtvals;
+{
+	struct vop_putpages_args a;
+	a.a_desc = VDESC(vop_putpages);
+	a.a_vp = vp;
+	a.a_m = m;
+	a.a_count = count;
+	a.a_flags = flags;
+	a.a_rtvals = rtvals;
+	return (VCALL(vp, VOFFSET(vop_putpages), &a));
+}
+
+int vop_size_vp_offsets[] = {
+	VOPARG_OFFSETOF(struct vop_size_args,a_vp),
+	VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_size_desc = {
+	0,
+	"vop_size",
+	0,
+	vop_size_vp_offsets,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	NULL,
+};
+
+int VOP_SIZE(vp, size, eobp)
+	struct vnode *vp;
+	off_t size;
+	off_t *eobp;
+{
+	struct vop_size_args a;
+	a.a_desc = VDESC(vop_size);
+	a.a_vp = vp;
+	a.a_size = size;
+	a.a_eobp = eobp;
+	return (VCALL(vp, VOFFSET(vop_size), &a));
+}
+
 /* Special cases: */
 
 int vop_strategy_vp_offsets[] = {
@@ -1323,6 +1457,10 @@ struct vnodeop_desc *vfs_op_descs[] = {
 	&vop_advlock_desc,
 	&vop_reallocblks_desc,
 	&vop_whiteout_desc,
+	&vop_ballocn_desc,
+	&vop_getpages_desc,
+	&vop_putpages_desc,
+	&vop_size_desc,
 	NULL
 };
 
diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src
index fdf8e6e4015..1af0f56e276 100644
--- a/sys/kern/vnode_if.src
+++ b/sys/kern/vnode_if.src
@@ -1,4 +1,4 @@
-#	$OpenBSD: vnode_if.src,v 1.13 2001/07/26 20:24:47 millert Exp $
+#	$OpenBSD: vnode_if.src,v 1.14 2001/11/27 05:27:12 art Exp $
 #	$NetBSD: vnode_if.src,v 1.10 1996/05/11 18:26:27 mycroft Exp $
 #
 # Copyright (c) 1992, 1993
@@ -467,3 +467,48 @@ vop_whiteout {
 #vop_bwrite {
 #	IN struct buf *bp;
 #};
+
+#
+#% ballocn    vp      L L L
+#
+vop_ballocn {
+	IN struct vnode *vp;
+	IN off_t offset;
+	IN off_t length;
+	IN struct ucred *cred;
+	IN int flags;
+};
+
+#
+#% getpages    vp L L L
+#
+vop_getpages {
+	IN struct vnode *vp;
+	IN voff_t offset;
+	IN vm_page_t *m;
+	IN int *count;
+	IN int centeridx;
+	IN vm_prot_t access_type;
+	IN int advice;
+	IN int flags;
+};
+
+#
+#% putpages    vp L L L
+#
+vop_putpages {
+	IN struct vnode *vp;
+	IN vm_page_t *m;
+	IN int count;
+	IN int flags;
+	IN int *rtvals;
+};
+
+#
+#% size                vp = = =
+#
+vop_size {
+	IN struct vnode *vp;
+	IN off_t size;
+	OUT off_t *eobp;
+};
diff --git a/sys/msdosfs/msdosfs_denode.c b/sys/msdosfs/msdosfs_denode.c
index eb82f75afe3..f4ab33d5272 100644
--- a/sys/msdosfs/msdosfs_denode.c
+++ b/sys/msdosfs/msdosfs_denode.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: msdosfs_denode.c,v 1.19 2001/11/06 19:53:20 miod Exp $	*/
+/*	$OpenBSD: msdosfs_denode.c,v 1.20 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: msdosfs_denode.c,v 1.23 1997/10/17 11:23:58 ws Exp $	*/
 
 /*-
@@ -72,6 +72,8 @@ u_long dehash;			/* size of hash table - 1 */
 #define	DEHASH(dev, dcl, doff)	(((dev) + (dcl) + (doff) / sizeof(struct direntry)) \
 				 & dehash)
 
+extern int prtactive;
+
 static struct denode *msdosfs_hashget __P((dev_t, u_long, u_long));
 static int msdosfs_hashins __P((struct denode *));
 static void msdosfs_hashrem __P((struct denode *));
@@ -332,6 +334,7 @@ retry:
 		nvp->v_type = VREG;
 	VREF(ldep->de_devvp);
 	*depp = ldep;
+	nvp->v_uvm.u_size = ldep->de_FileSize;
 	return (0);
 }
 
@@ -461,7 +464,7 @@ detrunc(dep, length, flags, cred, p)
 #endif
 			return (error);
 		}
-		uvm_vnp_uncache(DETOV(dep));
+
 		/*
 		 * is this the right place for it?
 		 */
@@ -524,7 +527,7 @@ deextend(dep, length, cred)
 	struct ucred *cred;
 {
 	struct msdosfsmount *pmp = dep->de_pmp;
-	u_long count;
+	u_long count, osize;
 	int error;
 	
 	/*
@@ -557,8 +560,12 @@ deextend(dep, length, cred)
 		}
 	}
 		
+	osize = dep->de_FileSize;
 	dep->de_FileSize = length;
+	uvm_vnp_setsize(DETOV(dep), (voff_t)dep->de_FileSize);
 	dep->de_flag |= DE_UPDATE|DE_MODIFIED;
+	uvm_vnp_zerorange(DETOV(dep), (off_t)osize,
+	    (size_t)(dep->de_FileSize - osize));
 	return (deupdat(dep, 1));
 }
 
@@ -593,7 +600,6 @@ msdosfs_reclaim(v)
 	} */ *ap = v;
 	struct vnode *vp = ap->a_vp;
 	struct denode *dep = VTODE(vp);
-	extern int prtactive;
 	
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_reclaim(): dep %08x, file %s, refcnt %d\n",
@@ -634,7 +640,6 @@ msdosfs_inactive(v)
 	struct denode *dep = VTODE(vp);
 	struct proc *p = ap->a_p;
 	int error;
-	extern int prtactive;
 	
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_inactive(): dep %08x, de_Name[0] %x\n", dep, dep->de_Name[0]);
@@ -661,7 +666,9 @@ msdosfs_inactive(v)
 	       dep, dep->de_refcnt, vp->v_mount->mnt_flag, MNT_RDONLY);
 #endif
 	if (dep->de_refcnt <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
-		error = detrunc(dep, (u_long)0, 0, NOCRED, NULL);
+		if (dep->de_FileSize != 0) {
+			error = detrunc(dep, (u_long)0, 0, NOCRED, NULL);
+		}
 		dep->de_Name[0] = SLOT_DELETED;
 	}
 	deupdat(dep, 0);
diff --git a/sys/msdosfs/msdosfs_fat.c b/sys/msdosfs/msdosfs_fat.c
index 772bdfb67e9..3576a663cdc 100644
--- a/sys/msdosfs/msdosfs_fat.c
+++ b/sys/msdosfs/msdosfs_fat.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: msdosfs_fat.c,v 1.8 1999/01/10 21:50:32 art Exp $	*/
+/*	$OpenBSD: msdosfs_fat.c,v 1.9 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: msdosfs_fat.c,v 1.26 1997/10/17 11:24:02 ws Exp $	*/
 
 /*-
@@ -988,8 +988,7 @@ extendfile(dep, count, bpp, ncp, flags)
 	int flags;
 {
 	int error;
-	u_long frcn;
-	u_long cn, got;
+	u_long frcn = 0, cn, got;
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct buf *bp;
 	
@@ -1060,41 +1059,26 @@ extendfile(dep, count, bpp, ncp, flags)
 		}
 		
 		/*
-		 * Update the "last cluster of the file" entry in the denode's fat
-		 * cache.
+		 * Update the "last cluster of the file" entry in the
+		 * denode's fat cache.
 		 */
+
 		fc_setcache(dep, FC_LASTFC, frcn + got - 1, cn + got - 1);
-		
-		if (flags & DE_CLEAR) {
+		if (flags & DE_CLEAR &&
+		    (dep->de_Attributes & ATTR_DIRECTORY)) {
 			while (got-- > 0) {
-				/*
-				 * Get the buf header for the new block of the file.
-				 */
-				if (dep->de_Attributes & ATTR_DIRECTORY)
-					bp = getblk(pmp->pm_devvp, cntobn(pmp, cn++),
-						    pmp->pm_bpcluster, 0, 0);
-				else {
-					bp = getblk(DETOV(dep), de_cn2bn(pmp, frcn++),
-					    pmp->pm_bpcluster, 0, 0);
-					/*
-					 * Do the bmap now, as in msdosfs_write
-					 */
-					if (pcbmap(dep,
-					    de_bn2cn(pmp, bp->b_lblkno),
-					    &bp->b_blkno, 0, 0))
-						bp->b_blkno = -1;
-					if (bp->b_blkno == -1)
-						panic("extendfile: pcbmap");
-				}
+				bp = getblk(pmp->pm_devvp, cntobn(pmp, cn++),
+				    pmp->pm_bpcluster, 0, 0);
 				clrbuf(bp);
 				if (bpp) {
 					*bpp = bp;
 					bpp = NULL;
-				} else
+				} else {
 					bdwrite(bp);
+				}
 			}
 		}
 	}
-	
+
 	return (0);
 }
diff --git a/sys/msdosfs/msdosfs_vfsops.c b/sys/msdosfs/msdosfs_vfsops.c
index fec59174189..63175e08754 100644
--- a/sys/msdosfs/msdosfs_vfsops.c
+++ b/sys/msdosfs/msdosfs_vfsops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: msdosfs_vfsops.c,v 1.25 2001/11/21 21:37:01 csapuntz Exp $	*/
+/*	$OpenBSD: msdosfs_vfsops.c,v 1.26 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: msdosfs_vfsops.c,v 1.48 1997/10/18 02:54:57 briggs Exp $	*/
 
 /*-
@@ -584,15 +584,9 @@ msdosfs_mountfs(devvp, mp, p, argp)
 	mp->mnt_data = (qaddr_t)pmp;
         mp->mnt_stat.f_fsid.val[0] = (long)dev;
         mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
-#ifdef QUOTA
-	/*
-	 * If we ever do quotas for DOS filesystems this would be a place
-	 * to fill in the info in the msdosfsmount structure. You dolt,
-	 * quotas on dos filesystems make no sense because files have no
-	 * owners on dos filesystems. of course there is some empty space
-	 * in the directory entry where we could put uid's and gid's.
-	 */
-#endif
+	mp->mnt_dev_bshift = pmp->pm_bnshift;
+	mp->mnt_fs_bshift = pmp->pm_cnshift;
+
 	devvp->v_specmountpoint = mp;
 
 	return (0);
@@ -720,10 +714,11 @@ msdosfs_sync_vnode(struct vnode *vp, void *arg)
 	struct denode *dep;
 
 	dep = VTODE(vp);
-	if (vp->v_type == VNON || 
-	    ((dep->de_flag & (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0
-		&& vp->v_dirtyblkhd.lh_first == NULL) ||
-	    msa->waitfor == MNT_LAZY) {
+	if (msa->waitfor == MNT_LAZY || vp->v_type == VNON ||
+	    (((dep->de_flag &
+	    (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0) &&
+	    (LIST_EMPTY(&vp->v_dirtyblkhd) &&
+	     vp->v_uvm.u_obj.uo_npages == 0))) {
 		simple_unlock(&vp->v_interlock);
 		return (0);
 	}
diff --git a/sys/msdosfs/msdosfs_vnops.c b/sys/msdosfs/msdosfs_vnops.c
index 7f1ab384295..1e364039937 100644
--- a/sys/msdosfs/msdosfs_vnops.c
+++ b/sys/msdosfs/msdosfs_vnops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: msdosfs_vnops.c,v 1.28 2001/11/06 19:53:20 miod Exp $	*/
+/*	$OpenBSD: msdosfs_vnops.c,v 1.29 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: msdosfs_vnops.c,v 1.63 1997/10/17 11:24:19 ws Exp $	*/
 
 /*-
@@ -413,11 +413,11 @@ msdosfs_read(v)
 	int error = 0;
 	int diff;
 	int blsize;
-	int isadir;
 	long n;
 	long on;
 	daddr_t lbn;
-	daddr_t rablock;
+	void *win;
+	vsize_t bytelen;
 	struct buf *bp;
 	struct vnode *vp = ap->a_vp;
 	struct denode *dep = VTODE(vp);
@@ -432,42 +432,45 @@ msdosfs_read(v)
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 
-	isadir = dep->de_Attributes & ATTR_DIRECTORY;
+	if (vp->v_type == VREG) {
+		while (uio->uio_resid > 0) {
+			bytelen = MIN(dep->de_FileSize - uio->uio_offset,
+			    uio->uio_resid);
+
+			if (bytelen == 0)
+				break;
+			win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset,
+			    &bytelen, UBC_READ);
+			error = uiomove(win, bytelen, uio);
+			ubc_release(win, 0);
+			if (error)
+				break;
+		}
+		dep->de_flag |= DE_ACCESS;
+		goto out;
+	}
+
+	/* this loop is only for directories now */
 	do {
 		lbn = de_cluster(pmp, uio->uio_offset);
 		on = uio->uio_offset & pmp->pm_crbomask;
-		n = min((u_long) (pmp->pm_bpcluster - on), uio->uio_resid);
+		n = MIN((pmp->pm_bpcluster - on), uio->uio_resid);
 		diff = dep->de_FileSize - uio->uio_offset;
 		if (diff <= 0)
 			return (0);
 		if (diff < n)
 			n = diff;
 		/* convert cluster # to block # if a directory */
-		if (isadir) {
-			error = pcbmap(dep, lbn, &lbn, 0, &blsize);
-			if (error)
-				return (error);
-		}
+		error = pcbmap(dep, lbn, &lbn, 0, &blsize);
+		if (error)
+			return (error);
 		/*
 		 * If we are operating on a directory file then be sure to
 		 * do i/o with the vnode for the filesystem instead of the
 		 * vnode for the directory.
 		 */
-		if (isadir) {
-			error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, &bp);
-		} else {
-			rablock = lbn + 1;
-			if (dep->de_lastr + 1 == lbn &&
-			    de_cn2off(pmp, rablock) < dep->de_FileSize)
-				error = breada(vp, de_cn2bn(pmp, lbn),
-				    pmp->pm_bpcluster, de_cn2bn(pmp, rablock),
-				    pmp->pm_bpcluster, NOCRED, &bp);
-			else
-				error = bread(vp, de_cn2bn(pmp, lbn),
-				    pmp->pm_bpcluster, NOCRED, &bp);
-			dep->de_lastr = lbn;
-		}
-		n = min(n, pmp->pm_bpcluster - bp->b_resid);
+		error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, &bp);
+		n = MIN(n, pmp->pm_bpcluster - bp->b_resid);
 		if (error) {
 			brelse(bp);
 			return (error);
@@ -475,8 +478,10 @@ msdosfs_read(v)
 		error = uiomove(bp->b_data + on, (int) n, uio);
 		brelse(bp);
 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
-	if (!isadir && !(vp->v_mount->mnt_flag & MNT_NOATIME))
-		dep->de_flag |= DE_ACCESS;
+
+out:
+	if ((ap->a_ioflag & IO_SYNC) == IO_SYNC)
+		error = deupdat(dep, 1);
 	return (error);
 }
 
@@ -493,19 +498,19 @@ msdosfs_write(v)
 		int a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap = v;
-	int n;
-	int croffset;
 	int resid;
 	u_long osize;
 	int error = 0;
 	u_long count;
-	daddr_t bn, lastcn;
-	struct buf *bp;
+	daddr_t lastcn;
 	int ioflag = ap->a_ioflag;
+	void *win;
+	vsize_t bytelen;
+	off_t oldoff;
+	boolean_t rv;
 	struct uio *uio = ap->a_uio;
 	struct proc *p = uio->uio_procp;
 	struct vnode *vp = ap->a_vp;
-	struct vnode *thisvp;
 	struct denode *dep = VTODE(vp);
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct ucred *cred = ap->a_cred;
@@ -521,7 +526,6 @@ msdosfs_write(v)
 	case VREG:
 		if (ioflag & IO_APPEND)
 			uio->uio_offset = dep->de_FileSize;
-		thisvp = vp;
 		break;
 	case VDIR:
 		return EISDIR;
@@ -576,84 +580,52 @@ msdosfs_write(v)
 	} else
 		lastcn = de_clcount(pmp, osize) - 1;
 
+	if (dep->de_FileSize < uio->uio_offset + resid) {
+		dep->de_FileSize = uio->uio_offset + resid;
+		uvm_vnp_setsize(vp, dep->de_FileSize);
+	}
+
 	do {
-		if (de_cluster(pmp, uio->uio_offset) > lastcn) {
+		oldoff = uio->uio_offset;
+		if (de_cluster(pmp, oldoff) > lastcn) {
 			error = ENOSPC;
 			break;
 		}
-
-		bn = de_blk(pmp, uio->uio_offset);
-		if ((uio->uio_offset & pmp->pm_crbomask) == 0
-		    && (de_blk(pmp, uio->uio_offset + uio->uio_resid) > de_blk(pmp, uio->uio_offset)
-			|| uio->uio_offset + uio->uio_resid >= dep->de_FileSize)) {
-			/*
-			 * If either the whole cluster gets written,
-			 * or we write the cluster from its start beyond EOF,
-			 * then no need to read data from disk.
-			 */
-			bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0);
-			clrbuf(bp);
-			/*
-			 * Do the bmap now, since pcbmap needs buffers
-			 * for the fat table. (see msdosfs_strategy)
-			 */
-			if (bp->b_blkno == bp->b_lblkno) {
-				error = pcbmap(dep,
-					       de_bn2cn(pmp, bp->b_lblkno),
-					       &bp->b_blkno, 0, 0);
-				if (error)
-					bp->b_blkno = -1;
-			}
-			if (bp->b_blkno == -1) {
-				brelse(bp);
-				if (!error)
-					error = EIO;		/* XXX */
-				break;
-			}
-		} else {
-			/*
-			 * The block we need to write into exists, so read it in.
-			 */
-			error = bread(thisvp, bn, pmp->pm_bpcluster,
-				      NOCRED, &bp);
-			if (error) {
-				brelse(bp);
-				break;
-			}
-		}
-
-		croffset = uio->uio_offset & pmp->pm_crbomask;
-		n = min(uio->uio_resid, pmp->pm_bpcluster - croffset);
-		if (uio->uio_offset + n > dep->de_FileSize) {
-			dep->de_FileSize = uio->uio_offset + n;
-			uvm_vnp_setsize(vp, dep->de_FileSize);
-		}
-		uvm_vnp_uncache(vp);
-		/*
-		 * Should these vnode_pager_* functions be done on dir
-		 * files?
-		 */
+		bytelen = MIN(dep->de_FileSize - oldoff, uio->uio_resid);
 
 		/*
-		 * Copy the data from user space into the buf header.
+		 * XXXUBC if file is mapped and this is the last block,
+		 * process one page at a time.
 		 */
-		error = uiomove(bp->b_data + croffset, n, uio);
 
+		if (bytelen == 0)
+			break;
+		win = ubc_alloc(&vp->v_uvm.u_obj, oldoff, &bytelen, UBC_READ);
+		error = uiomove(win, bytelen, uio);
+		ubc_release(win, 0);
+		if (error) {
+			break;
+		}
 		/*
-		 * If they want this synchronous then write it and wait for
-		 * it.  Otherwise, if on a cluster boundary write it
-		 * asynchronously so we can move on to the next block
-		 * without delay.  Otherwise do a delayed write because we
-		 * may want to write somemore into the block later.
+		 * flush what we just wrote if necessary.
+		 * XXXUBC simplistic async flushing.
 		 */
-		if (ioflag & IO_SYNC)
-			(void) bwrite(bp);
-		else if (n + croffset == pmp->pm_bpcluster)
-			bawrite(bp);
-		else
-			bdwrite(bp);
-		dep->de_flag |= DE_UPDATE;
+		if (ioflag & IO_SYNC) {
+			
+			simple_lock(&vp->v_uvm.u_obj.vmobjlock);
+			rv = vp->v_uvm.u_obj.pgops->pgo_flush(
+			    &vp->v_uvm.u_obj, oldoff,
+			    oldoff + bytelen, PGO_CLEANIT|PGO_SYNCIO);
+			simple_unlock(&vp->v_uvm.u_obj.vmobjlock);
+		} else if (oldoff >> 16 != uio->uio_offset >> 16) {
+			simple_lock(&vp->v_uvm.u_obj.vmobjlock);
+			rv = vp->v_uvm.u_obj.pgops->pgo_flush(
+			    &vp->v_uvm.u_obj, (oldoff >> 16) << 16,
+			    (uio->uio_offset >> 16) << 16, PGO_CLEANIT);
+			simple_unlock(&vp->v_uvm.u_obj.vmobjlock);
+		}
 	} while (error == 0 && uio->uio_resid > 0);
+	dep->de_flag |= DE_UPDATE;
 
 	/*
 	 * If the write failed and they want us to, truncate the file back
@@ -666,7 +638,8 @@ errexit:
 			uio->uio_offset -= resid - uio->uio_resid;
 			uio->uio_resid = resid;
 		} else {
-			detrunc(dep, dep->de_FileSize, ioflag & IO_SYNC, NOCRED, NULL);
+			detrunc(dep, dep->de_FileSize, ioflag & IO_SYNC, NOCRED,
+			    NULL);
 			if (uio->uio_resid != resid)
 				error = 0;
 		}
@@ -1506,11 +1479,11 @@ msdosfs_readdir(v)
 	while (uio->uio_resid > 0) {
 		lbn = de_cluster(pmp, offset - bias);
 		on = (offset - bias) & pmp->pm_crbomask;
-		n = min(pmp->pm_bpcluster - on, uio->uio_resid);
+		n = MIN(pmp->pm_bpcluster - on, uio->uio_resid);
 		diff = dep->de_FileSize - (offset - bias);
 		if (diff <= 0)
 			break;
-		n = min(n, diff);
+		n = MIN(n, diff);
 		if ((error = pcbmap(dep, lbn, &bn, &cn, &blsize)) != 0)
 			break;
 		error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp);
@@ -1518,7 +1491,7 @@ msdosfs_readdir(v)
 			brelse(bp);
 			return (error);
 		}
-		n = min(n, blsize - bp->b_resid);
+		n = MIN(n, blsize - bp->b_resid);
 
 		/*
 		 * Convert from dos directory entries to fs-independent
@@ -1779,12 +1752,12 @@ msdosfs_strategy(v)
 		biodone(bp);
 		return (error);
 	}
-#ifdef DIAGNOSTIC
-#endif
+
 	/*
 	 * Read/write the block from/to the disk that contains the desired
 	 * file block.
 	 */
+
 	vp = dep->de_devvp;
 	bp->b_dev = vp->v_rdev;
 	VOCALL(vp->v_op, VOFFSET(vop_strategy), ap);
@@ -1902,7 +1875,10 @@ struct vnodeopv_entry_desc msdosfs_vnodeop_entries[] = {
 	{ &vop_advlock_desc, msdosfs_advlock },		/* advlock */
 	{ &vop_reallocblks_desc, msdosfs_reallocblks },	/* reallocblks */
 	{ &vop_bwrite_desc, vop_generic_bwrite },		/* bwrite */
-	{ (struct vnodeop_desc *)NULL, (int (*) __P((void *)))NULL }
+	{ &vop_getpages_desc, genfs_getpages },
+	{ &vop_putpages_desc, genfs_putpages },
+	{ &vop_size_desc, genfs_size },
+	{ NULL, NULL }
 };
 struct vnodeopv_desc msdosfs_vnodeop_opv_desc =
 	{ &msdosfs_vnodeop_p, msdosfs_vnodeop_entries };
diff --git a/sys/nfs/nfs.h b/sys/nfs/nfs.h
index 33435dc23e4..b86819902f2 100644
--- a/sys/nfs/nfs.h
+++ b/sys/nfs/nfs.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: nfs.h,v 1.13 2001/09/16 00:42:44 millert Exp $	*/
+/*	$OpenBSD: nfs.h,v 1.14 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: nfs.h,v 1.10.4.1 1996/05/27 11:23:56 fvdl Exp $	*/
 
 /*
@@ -78,8 +78,18 @@
  * Ideally, NFS_DIRBLKSIZ should be bigger, but I've seen servers with
  * broken NFS/ethernet drivers that won't work with anything bigger (Linux..)
  */
-#define	NFS_DIRBLKSIZ	1024		/* Must be a multiple of DIRBLKSIZ */
+#if 1
+/*
+ * XXXUBC temp hack because of the removal of b_validend.
+ * eventually we'll store NFS VDIR data in the page cache as well,
+ * we'll fix this at that point.
+ */
+#define	NFS_DIRBLKSIZ		PAGE_SIZE
+#define	NFS_READDIRBLKSIZ	PAGE_SIZE
+#else
+#define	NFS_DIRBLKSIZ		1024	/* Must be a multiple of DIRBLKSIZ */
 #define NFS_READDIRBLKSIZ	512	/* Size of read dir blocks. XXX */
+#endif
 
 /*
  * Oddballs
@@ -111,10 +121,10 @@
 #endif
 
 /*
- * The B_INVAFTERWRITE flag should be set to whatever is required by the
- * buffer cache code to say "Invalidate the block after it is written back".
+ * Use the vm_page flag reserved for pager use to indicate pages
+ * which have been written to the server but not yet committed.
  */
-#define	B_INVAFTERWRITE	B_INVAL
+#define	PG_NEEDCOMMIT	PG_PAGER1
 
 /*
  * The IO_METASYNC flag should be implemented for local file systems.
diff --git a/sys/nfs/nfs_bio.c b/sys/nfs/nfs_bio.c
index 1f33bc2eab7..42b25763a88 100644
--- a/sys/nfs/nfs_bio.c
+++ b/sys/nfs/nfs_bio.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: nfs_bio.c,v 1.24 2001/11/15 23:15:15 art Exp $	*/
+/*	$OpenBSD: nfs_bio.c,v 1.25 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: nfs_bio.c,v 1.25.4.2 1996/07/08 20:47:04 jtc Exp $	*/
 
 /*
@@ -50,8 +50,9 @@
 #include <sys/mount.h>
 #include <sys/kernel.h>
 #include <sys/namei.h>
+#include <sys/pool.h>
 
-#include <uvm/uvm_extern.h>
+#include <uvm/uvm.h>
 
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
@@ -70,20 +71,19 @@ struct nfsstats nfsstats;
  */
 int
 nfs_bioread(vp, uio, ioflag, cred)
-	register struct vnode *vp;
-	register struct uio *uio;
+	struct vnode *vp;
+	struct uio *uio;
 	int ioflag;
 	struct ucred *cred;
 {
-	register struct nfsnode *np = VTONFS(vp);
-	register int biosize, diff;
-	struct buf *bp = NULL, *rabp;
+	struct nfsnode *np = VTONFS(vp);
+	int biosize;
+	struct buf *bp = NULL;
 	struct vattr vattr;
 	struct proc *p;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
-	daddr_t lbn, bn, rabn;
 	caddr_t baddr;
-	int got_buf = 0, nra, error = 0, n = 0, on = 0, not_readin;
+	int got_buf = 0, error = 0, n = 0, on = 0;
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_READ)
@@ -153,87 +153,25 @@ nfs_bioread(vp, uio, ioflag, cred)
 	    switch (vp->v_type) {
 	    case VREG:
 		nfsstats.biocache_reads++;
-		lbn = uio->uio_offset / biosize;
-		on = uio->uio_offset & (biosize - 1);
-		bn = lbn * (biosize / DEV_BSIZE);
-		not_readin = 1;
-
-		/*
-		 * Start the read ahead(s), as required.
-		 */
-		if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
-		    for (nra = 0; nra < nmp->nm_readahead &&
-			(lbn + 1 + nra) * biosize < np->n_size; nra++) {
-			rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
-			if (!incore(vp, rabn)) {
-			    rabp = nfs_getcacheblk(vp, rabn, biosize, p);
-			    if (!rabp)
-				return (EINTR);
-			    if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) {
-				rabp->b_flags |= (B_READ | B_ASYNC);
-				if (nfs_asyncio(rabp)) {
-				    rabp->b_flags |= B_INVAL;
-				    brelse(rabp);
-				}
-			    } else
-				brelse(rabp);
-			}
-		    }
-		}
+		error = 0;
+		while (uio->uio_resid > 0) {
+			void *win;
+			vsize_t bytelen = MIN(np->n_size - uio->uio_offset,
+					      uio->uio_resid);
 
-		/*
-		 * If the block is in the cache and has the required data
-		 * in a valid region, just copy it out.
-		 * Otherwise, get the block and write back/read in,
-		 * as required.
-		 */
-		if ((bp = incore(vp, bn)) &&
-		    (bp->b_flags & (B_BUSY | B_WRITEINPROG)) ==
-		    (B_BUSY | B_WRITEINPROG))
-			got_buf = 0;
-		else {
-again:
-			bp = nfs_getcacheblk(vp, bn, biosize, p);
-			if (!bp)
-				return (EINTR);
-			got_buf = 1;
-			if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
-				bp->b_flags |= B_READ;
-				not_readin = 0;
-				error = nfs_doio(bp, p);
-				if (error) {
-				    brelse(bp);
-				    return (error);
-				}
-			}
-		}
-		n = min((unsigned)(biosize - on), uio->uio_resid);
-		diff = np->n_size - uio->uio_offset;
-		if (diff < n)
-			n = diff;
-		if (not_readin && n > 0) {
-			if (on < bp->b_validoff || (on + n) > bp->b_validend) {
-				if (!got_buf) {
-				    bp = nfs_getcacheblk(vp, bn, biosize, p);
-				    if (!bp)
-					return (EINTR);
-				    got_buf = 1;
-				}
-				bp->b_flags |= B_INVAFTERWRITE;
-				if (bp->b_dirtyend > 0) {
-				    if ((bp->b_flags & B_DELWRI) == 0)
-					panic("nfsbioread");
-				    if (VOP_BWRITE(bp) == EINTR)
-					return (EINTR);
-				} else
-				    brelse(bp);
-				goto again;
+			if (bytelen == 0)
+				break;
+			win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset,
+					&bytelen, UBC_READ);
+			error = uiomove(win, bytelen, uio);
+			ubc_release(win, 0);
+			if (error) {
+				break;
 			}
 		}
-		diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
-		if (diff < n)
-			n = diff;
+		n = 0;
 		break;
+
 	    case VLNK:
 		nfsstats.biocache_readlinks++;
 		bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
@@ -247,7 +185,7 @@ again:
 				return (error);
 			}
 		}
-		n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
+		n = MIN(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
 		got_buf = 1;
 		on = 0;
 		break;
@@ -289,18 +227,17 @@ nfs_write(v)
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap = v;
-	register int biosize;
-	register struct uio *uio = ap->a_uio;
+	int biosize;
+	struct uio *uio = ap->a_uio;
 	struct proc *p = uio->uio_procp;
-	register struct vnode *vp = ap->a_vp;
+	struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
-	register struct ucred *cred = ap->a_cred;
+	struct ucred *cred = ap->a_cred;
 	int ioflag = ap->a_ioflag;
-	struct buf *bp;
 	struct vattr vattr;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
-	daddr_t lbn, bn;
-	int n, on, error = 0;
+	int error = 0;
+	int rv;
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_WRITE)
@@ -360,85 +297,47 @@ nfs_write(v)
 	 */
 	biosize = nmp->nm_rsize;
 	do {
-
-		/*
-		 * XXX make sure we aren't cached in the VM page cache
-		 */
-		uvm_vnp_uncache(vp);
+		void *win;
+		voff_t oldoff = uio->uio_offset;
+		vsize_t bytelen = uio->uio_resid;
 
 		nfsstats.biocache_writes++;
-		lbn = uio->uio_offset / biosize;
-		on = uio->uio_offset & (biosize-1);
-		n = min((unsigned)(biosize - on), uio->uio_resid);
-		bn = lbn * (biosize / DEV_BSIZE);
-again:
-		bp = nfs_getcacheblk(vp, bn, biosize, p);
-		if (!bp)
-			return (EINTR);
 		np->n_flag |= NMODIFIED;
-		if (uio->uio_offset + n > np->n_size) {
-			np->n_size = uio->uio_offset + n;
-			uvm_vnp_setsize(vp, (u_long)np->n_size);
-		}
-
-		/*
-		 * If the new write will leave a contiguous dirty
-		 * area, just update the b_dirtyoff and b_dirtyend,
-		 * otherwise force a write rpc of the old dirty area.
-		 */
-		if (bp->b_dirtyend > 0 &&
-		    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
-			bp->b_proc = p;
-			if (VOP_BWRITE(bp) == EINTR)
-				return (EINTR);
-			goto again;
-		}
-
-		error = uiomove((char *)bp->b_data + on, n, uio);
-		if (error) {
-			bp->b_flags |= B_ERROR;
-			brelse(bp);
-			return (error);
+		if (np->n_size < uio->uio_offset + bytelen) {
+			np->n_size = uio->uio_offset + bytelen;
+			uvm_vnp_setsize(vp, np->n_size);
 		}
-		if (bp->b_dirtyend > 0) {
-			bp->b_dirtyoff = min(on, bp->b_dirtyoff);
-			bp->b_dirtyend = max((on + n), bp->b_dirtyend);
-		} else {
-			bp->b_dirtyoff = on;
-			bp->b_dirtyend = on + n;
+		win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset, &bytelen,
+				UBC_WRITE);
+		error = uiomove(win, bytelen, uio);
+		ubc_release(win, 0);
+		rv = 1;
+		if ((ioflag & IO_SYNC)) {
+			simple_lock(&vp->v_uvm.u_obj.vmobjlock);
+			rv = vp->v_uvm.u_obj.pgops->pgo_flush(
+			    &vp->v_uvm.u_obj,
+			    oldoff & ~(nmp->nm_wsize - 1),
+			    uio->uio_offset & ~(nmp->nm_wsize - 1),
+			    PGO_CLEANIT|PGO_SYNCIO);
+			simple_unlock(&vp->v_uvm.u_obj.vmobjlock);
+		} else if ((oldoff & ~(nmp->nm_wsize - 1)) !=
+		    (uio->uio_offset & ~(nmp->nm_wsize - 1))) {
+			simple_lock(&vp->v_uvm.u_obj.vmobjlock);
+			rv = vp->v_uvm.u_obj.pgops->pgo_flush(
+			    &vp->v_uvm.u_obj,
+			    oldoff & ~(nmp->nm_wsize - 1),
+			    uio->uio_offset & ~(nmp->nm_wsize - 1),
+			    PGO_CLEANIT|PGO_WEAK);
+			simple_unlock(&vp->v_uvm.u_obj.vmobjlock);
 		}
-		if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
-		    bp->b_validoff > bp->b_dirtyend) {
-			bp->b_validoff = bp->b_dirtyoff;
-			bp->b_validend = bp->b_dirtyend;
-		} else {
-			bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
-			bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
+		if (!rv) {
+			error = EIO;
 		}
-
-		/*
-		 * Since this block is being modified, it must be written
-		 * again and not just committed.
-		 */
-		bp->b_flags &= ~B_NEEDCOMMIT;
-
-		/*
-		 * If the lease is non-cachable or IO_SYNC do bwrite().
-		 */
-		if (ioflag & IO_SYNC) {
-			bp->b_proc = p;
-			error = VOP_BWRITE(bp);
-			if (error)
-				return (error);
-		} else if ((n + on) == biosize) {
-			bp->b_proc = (struct proc *)0;
-			bp->b_flags |= B_ASYNC;
-			(void)nfs_writebp(bp, 0);
-		} else {
-			bdwrite(bp);
+		if (error) {
+			break;
 		}
-	} while (uio->uio_resid > 0 && n > 0);
-	return (0);
+	} while (uio->uio_resid > 0);
+	return (error);
 }
 
 /*
@@ -460,9 +359,9 @@ nfs_getcacheblk(vp, bn, size, p)
 
 	if (nmp->nm_flag & NFSMNT_INT) {
 		bp = getblk(vp, bn, size, PCATCH, 0);
-		while (bp == (struct buf *)0) {
-			if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
-				return ((struct buf *)0);
+		while (bp == NULL) {
+			if (nfs_sigintr(nmp, NULL, p))
+				return (NULL);
 			bp = getblk(vp, bn, size, 0, 2 * hz);
 		}
 	} else
@@ -502,7 +401,7 @@ nfs_vinvalbuf(vp, flags, cred, p, intrflg)
 		np->n_flag |= NFLUSHWANT;
 		error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
 			slptimeo);
-		if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
+		if (error && intrflg && nfs_sigintr(nmp, NULL, p))
 			return (EINTR);
 	}
 
@@ -512,7 +411,7 @@ nfs_vinvalbuf(vp, flags, cred, p, intrflg)
 	np->n_flag |= NFLUSHINPROG;
 	error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
 	while (error) {
-		if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
+		if (intrflg && nfs_sigintr(nmp, NULL, p)) {
 			np->n_flag &= ~NFLUSHINPROG;
 			if (np->n_flag & NFLUSHWANT) {
 				np->n_flag &= ~NFLUSHWANT;
@@ -539,41 +438,20 @@ int
 nfs_asyncio(bp)
 	struct buf *bp;
 {
-	int i,s;
+	int i;
 
 	if (nfs_numasync == 0)
 		return (EIO);
-	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+	for (i = 0; i < NFS_MAXASYNCDAEMON; i++) {
 	    if (nfs_iodwant[i]) {
-		if ((bp->b_flags & B_READ) == 0) {
-			bp->b_flags |= B_WRITEINPROG;
-		}
-	
 		TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist);
-		nfs_iodwant[i] = (struct proc *)0;
+		nfs_iodwant[i] = NULL;
 		wakeup((caddr_t)&nfs_iodwant[i]);
 		return (0);
 	    }
+	}
 
-	/*
-	 * If it is a read or a write already marked B_WRITEINPROG or B_NOCACHE
-	 * return EIO so the process will call nfs_doio() and do it
-	 * synchronously.
-	 */
-	if (bp->b_flags & (B_READ | B_WRITEINPROG | B_NOCACHE))
-		return (EIO);
-
-	/*
-	 * Just turn the async write into a delayed write, instead of
-	 * doing in synchronously. Hopefully, at least one of the nfsiods
-	 * is currently doing a write for this file and will pick up the
-	 * delayed writes before going back to sleep.
-	 */
-	s = splbio();
-	buf_dirty(bp);
-	splx(s);
-	biodone(bp);
-	return (0);
+	return (EIO);
 }
 
 /*
@@ -589,7 +467,7 @@ nfs_doio(bp, p)
 	register struct vnode *vp;
 	struct nfsnode *np;
 	struct nfsmount *nmp;
-	int s, error = 0, diff, len, iomode, must_commit = 0;
+	int error = 0, diff, len, iomode, must_commit = 0;
 	struct uio uio;
 	struct iovec io;
 
@@ -636,9 +514,7 @@ nfs_doio(bp, p)
 		uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT;
 		nfsstats.read_bios++;
 		error = nfs_readrpc(vp, uiop);
-		if (!error) {
-		    bp->b_validoff = 0;
-		    if (uiop->uio_resid) {
+		if (!error && uiop->uio_resid) {
 			/*
 			 * If len > 0, there is a hole in the file and
 			 * no writes after the hole have been pushed to
@@ -649,13 +525,9 @@ nfs_doio(bp, p)
 			len = np->n_size - ((((off_t)bp->b_blkno) << DEV_BSHIFT)
 				+ diff);
 			if (len > 0) {
-			    len = min(len, uiop->uio_resid);
-			    bzero((char *)bp->b_data + diff, len);
-			    bp->b_validend = diff + len;
-			} else
-			    bp->b_validend = diff;
-		    } else
-			bp->b_validend = bp->b_bcount;
+				len = MIN(len, uiop->uio_resid);
+				memset((char *)bp->b_data + diff, 0, len);
+			}
 		}
 		if (p && (vp->v_flag & VTEXT) &&
 		    (np->n_mtime != np->n_vattr.va_mtime.tv_sec)) {
@@ -672,62 +544,19 @@ nfs_doio(bp, p)
 	    default:
 		printf("nfs_doio:  type %x unexpected\n",vp->v_type);
 		break;
-	    };
+	    }
 	    if (error) {
 		bp->b_flags |= B_ERROR;
 		bp->b_error = error;
 	    }
 	} else {
-	    io.iov_len = uiop->uio_resid = bp->b_dirtyend
-		- bp->b_dirtyoff;
-	    uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE
-		+ bp->b_dirtyoff;
-	    io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
+	    io.iov_base = bp->b_data;
+	    io.iov_len = uiop->uio_resid = bp->b_bcount;
+	    uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT;
 	    uiop->uio_rw = UIO_WRITE;
 	    nfsstats.write_bios++;
-	    if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC)
-		iomode = NFSV3WRITE_UNSTABLE;
-	    else
-		iomode = NFSV3WRITE_FILESYNC;
-	    bp->b_flags |= B_WRITEINPROG;
-#ifdef fvdl_debug
-	    printf("nfs_doio(%x): bp %x doff %d dend %d\n", 
-		vp, bp, bp->b_dirtyoff, bp->b_dirtyend);
-#endif
+	    iomode = NFSV3WRITE_UNSTABLE;
 	    error = nfs_writerpc(vp, uiop, &iomode, &must_commit);
-	    if (!error && iomode == NFSV3WRITE_UNSTABLE)
-		bp->b_flags |= B_NEEDCOMMIT;
-	    else
-		bp->b_flags &= ~B_NEEDCOMMIT;
-	    bp->b_flags &= ~B_WRITEINPROG;
-
-	    /*
-	     * For an interrupted write, the buffer is still valid and the
-	     * write hasn't been pushed to the server yet, so we can't set
-	     * B_ERROR and report the interruption by setting B_EINTR. For
-	     * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt
-	     * is essentially a noop.
-	     * For the case of a V3 write rpc not being committed to stable
-	     * storage, the block is still dirty and requires either a commit
-	     * rpc or another write rpc with iomode == NFSV3WRITE_FILESYNC
-	     * before the block is reused. This is indicated by setting the
-	     * B_DELWRI and B_NEEDCOMMIT flags.
-	     */
-	    if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
-		    s = splbio();
-		    buf_dirty(bp);
-		    splx(s);
-
-		    if (!(bp->b_flags & B_ASYNC) && error)
-			    bp->b_flags |= B_EINTR;
-	    } else {
-		if (error) {
-		    bp->b_flags |= B_ERROR;
-		    bp->b_error = np->n_error = error;
-		    np->n_flag |= NWRITEERR;
-		}
-		bp->b_dirtyoff = bp->b_dirtyend = 0;
-	    }
 	}
 	bp->b_resid = uiop->uio_resid;
 	if (must_commit)
@@ -735,3 +564,590 @@ nfs_doio(bp, p)
 	biodone(bp);
 	return (error);
 }
+
+/*
+ * Vnode op for VM getpages.
+ */
+int
+nfs_getpages(v)
+	void *v;
+{
+	struct vop_getpages_args /* {
+		struct vnode *a_vp;
+		voff_t a_offset;
+		vm_page_t *a_m;
+		int *a_count;
+		int a_centeridx;
+		vm_prot_t a_access_type;
+		int a_advice;
+		int a_flags;
+	} */ *ap = v;
+
+	off_t eof, offset, origoffset, startoffset, endoffset;
+	int s, i, error, npages, orignpages, npgs, ridx, pidx, pcount;
+	vaddr_t kva;
+	struct buf *bp, *mbp;
+	struct vnode *vp = ap->a_vp;
+	struct nfsnode *np = VTONFS(vp);
+	struct uvm_object *uobj = &vp->v_uvm.u_obj;
+	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+	size_t bytes, iobytes, tailbytes, totalbytes, skipbytes;
+	int flags = ap->a_flags;
+	int bsize;
+	struct vm_page *pgs[16];			/* XXXUBC 16 */
+	boolean_t v3 = NFS_ISV3(vp);
+	boolean_t async = (flags & PGO_SYNCIO) == 0;
+	boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0;
+	struct proc *p = curproc;
+
+	UVMHIST_FUNC("nfs_getpages"); UVMHIST_CALLED(ubchist);
+	UVMHIST_LOG(ubchist, "vp %p off 0x%x count %d", vp, (int)ap->a_offset,
+		    *ap->a_count,0);
+
+#ifdef DIAGNOSTIC
+	if (ap->a_centeridx < 0 || ap->a_centeridx >= *ap->a_count) {
+		panic("nfs_getpages: centeridx %d out of range",
+		      ap->a_centeridx);
+	}
+#endif
+
+	error = 0;
+	origoffset = ap->a_offset;
+	eof = vp->v_uvm.u_size;
+	if (origoffset >= eof) {
+		if ((flags & PGO_LOCKED) == 0) {
+			simple_unlock(&uobj->vmobjlock);
+		}
+		UVMHIST_LOG(ubchist, "off 0x%x past EOF 0x%x",
+			    (int)origoffset, (int)eof,0,0);
+		return EINVAL;
+	}
+
+	if (flags & PGO_LOCKED) {
+		uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m,
+			      UFP_NOWAIT|UFP_NOALLOC);
+		return 0;
+	}
+
+	/* vnode is VOP_LOCKed, uobj is locked */
+
+	bsize = nmp->nm_rsize;
+	orignpages = MIN(*ap->a_count,
+			 round_page(eof - origoffset) >> PAGE_SHIFT);
+	npages = orignpages;
+	startoffset = origoffset & ~(bsize - 1);
+	endoffset = round_page((origoffset + (npages << PAGE_SHIFT)
+				+ bsize - 1) & ~(bsize - 1));
+	endoffset = MIN(endoffset, round_page(eof));
+	ridx = (origoffset - startoffset) >> PAGE_SHIFT;
+
+	if (!async && !write) {
+		int rapages = MAX(PAGE_SIZE, nmp->nm_rsize) >> PAGE_SHIFT;
+
+		(void) VOP_GETPAGES(vp, endoffset, NULL, &rapages, 0,
+				    VM_PROT_READ, 0, 0);
+		simple_lock(&uobj->vmobjlock);
+	}
+
+	UVMHIST_LOG(ubchist, "npages %d offset 0x%x", npages,
+		    (int)origoffset, 0,0);
+	memset(pgs, 0, sizeof(pgs));
+	uvn_findpages(uobj, origoffset, &npages, &pgs[ridx], UFP_ALL);
+
+	if (flags & PGO_OVERWRITE) {
+		UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0);
+
+		/* XXXUBC for now, zero the page if we allocated it */
+		for (i = 0; i < npages; i++) {
+			struct vm_page *pg = pgs[ridx + i];
+
+			if (pg->flags & PG_FAKE) {
+				uvm_pagezero(pg);
+				pg->flags &= ~(PG_FAKE);
+			}
+		}
+		npages += ridx;
+		if (v3) {
+			simple_unlock(&uobj->vmobjlock);
+			goto uncommit;
+		}
+		goto out;
+	}
+
+	/*
+	 * if the pages are already resident, just return them.
+	 */
+
+	for (i = 0; i < npages; i++) {
+		struct vm_page *pg = pgs[ridx + i];
+
+		if ((pg->flags & PG_FAKE) != 0 ||
+		    ((ap->a_access_type & VM_PROT_WRITE) &&
+		      (pg->flags & PG_RDONLY))) {
+			break;
+		}
+	}
+	if (i == npages) {
+		UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0);
+		npages += ridx;
+		goto out;
+	}
+
+	/*
+	 * the page wasn't resident and we're not overwriting,
+	 * so we're going to have to do some i/o.
+	 * find any additional pages needed to cover the expanded range.
+	 */
+
+	if (startoffset != origoffset ||
+	    startoffset + (npages << PAGE_SHIFT) != endoffset) {
+
+		/*
+		 * XXXUBC we need to avoid deadlocks caused by locking
+		 * additional pages at lower offsets than pages we
+		 * already have locked.  for now, unlock them all and
+		 * start over.
+		 */
+
+		for (i = 0; i < npages; i++) {
+			struct vm_page *pg = pgs[ridx + i];
+
+			if (pg->flags & PG_FAKE) {
+				pg->flags |= PG_RELEASED;
+			}
+		}
+		uvm_page_unbusy(&pgs[ridx], npages);
+		memset(pgs, 0, sizeof(pgs));
+
+		UVMHIST_LOG(ubchist, "reset npages start 0x%x end 0x%x",
+			    startoffset, endoffset, 0,0);
+		npages = (endoffset - startoffset) >> PAGE_SHIFT;
+		npgs = npages;
+		uvn_findpages(uobj, startoffset, &npgs, pgs, UFP_ALL);
+	}
+	simple_unlock(&uobj->vmobjlock);
+
+	/*
+	 * update the cached read creds for this node.
+	 */
+
+	if (np->n_rcred) {
+		crfree(np->n_rcred);
+	}
+	np->n_rcred = curproc->p_ucred;
+	crhold(np->n_rcred);
+
+	/*
+	 * read the desired page(s).
+	 */
+
+	totalbytes = npages << PAGE_SHIFT;
+	bytes = MIN(totalbytes, vp->v_uvm.u_size - startoffset);
+	tailbytes = totalbytes - bytes;
+	skipbytes = 0;
+
+	kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WAITOK |
+			     UVMPAGER_MAPIN_READ);
+
+	s = splbio();
+	mbp = pool_get(&bufpool, PR_WAITOK);
+	splx(s);
+	mbp->b_bufsize = totalbytes;
+	mbp->b_data = (void *)kva;
+	mbp->b_resid = mbp->b_bcount = bytes;
+	mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL|B_ASYNC : 0);
+	mbp->b_iodone = uvm_aio_biodone;
+	mbp->b_vp = vp;
+	mbp->b_proc = NULL;		/* XXXUBC */
+	LIST_INIT(&mbp->b_dep);
+
+	/*
+	 * if EOF is in the middle of the last page, zero the part past EOF.
+	 */
+
+	if (tailbytes > 0 && (pgs[bytes >> PAGE_SHIFT]->flags & PG_FAKE)) {
+		memset((char *)kva + bytes, 0, tailbytes);
+	}
+
+	/*
+	 * now loop over the pages, reading as needed.
+	 */
+
+	bp = NULL;
+	for (offset = startoffset;
+	     bytes > 0;
+	     offset += iobytes, bytes -= iobytes) {
+
+		/*
+		 * skip pages which don't need to be read.
+		 */
+
+		pidx = (offset - startoffset) >> PAGE_SHIFT;
+		UVMHIST_LOG(ubchist, "pidx %d offset 0x%x startoffset 0x%x",
+			    pidx, (int)offset, (int)startoffset,0);
+		while ((pgs[pidx]->flags & PG_FAKE) == 0) {
+			size_t b;
+
+			KASSERT((offset & (PAGE_SIZE - 1)) == 0);
+			b = MIN(PAGE_SIZE, bytes);
+			offset += b;
+			bytes -= b;
+			skipbytes += b;
+			pidx++;
+			UVMHIST_LOG(ubchist, "skipping, new offset 0x%x",
+				    (int)offset, 0,0,0);
+			if (bytes == 0) {
+				goto loopdone;
+			}
+		}
+
+		/*
+		 * see how many pages can be read with this i/o.
+		 * reduce the i/o size if necessary.
+		 */
+
+		iobytes = bytes;
+		if (offset + iobytes > round_page(offset)) {
+			pcount = 1;
+			while (pidx + pcount < npages &&
+			       pgs[pidx + pcount]->flags & PG_FAKE) {
+				pcount++;
+			}
+			iobytes = MIN(iobytes, (pcount << PAGE_SHIFT) -
+				      (offset - trunc_page(offset)));
+		}
+		iobytes = MIN(iobytes, nmp->nm_rsize);
+
+		/*
+		 * allocate a sub-buf for this piece of the i/o
+		 * (or just use mbp if there's only 1 piece),
+		 * and start it going.
+		 */
+
+		if (offset == startoffset && iobytes == bytes) {
+			bp = mbp;
+		} else {
+			s = splbio();
+			bp = pool_get(&bufpool, PR_WAITOK);
+			splx(s);
+			bp->b_data = (char *)kva + offset - startoffset;
+			bp->b_resid = bp->b_bcount = iobytes;
+			bp->b_flags = B_BUSY|B_READ|B_CALL|B_ASYNC;
+			bp->b_iodone = uvm_aio_biodone1;
+			bp->b_vp = vp;
+			bp->b_proc = NULL;	/* XXXUBC */
+			LIST_INIT(&bp->b_dep);
+		}
+		bp->b_private = mbp;
+		bp->b_lblkno = bp->b_blkno = offset >> DEV_BSHIFT;
+
+		UVMHIST_LOG(ubchist, "bp %p offset 0x%x bcount 0x%x blkno 0x%x",
+			    bp, offset, iobytes, bp->b_blkno);
+
+		VOP_STRATEGY(bp);
+	}
+
+loopdone:
+	if (skipbytes) {
+		s = splbio();
+		mbp->b_resid -= skipbytes;
+		if (mbp->b_resid == 0) {
+			biodone(mbp);
+		}
+		splx(s);
+	}
+	if (async) {
+		UVMHIST_LOG(ubchist, "returning PEND",0,0,0,0);
+		return EINPROGRESS;
+	}
+	if (bp != NULL) {
+		error = biowait(mbp);
+	}
+	s = splbio();
+	pool_put(&bufpool, mbp);
+	splx(s);
+	uvm_pagermapout(kva, npages);
+ 
+	if (write && v3) {
+uncommit:
+ 		lockmgr(&np->n_commitlock, LK_EXCLUSIVE, NULL, p);
+		nfs_del_committed_range(vp, origoffset, npages);
+		nfs_del_tobecommitted_range(vp, origoffset, npages);
+		simple_lock(&uobj->vmobjlock);
+		for (i = 0; i < npages; i++) {
+			if (pgs[i] == NULL) {
+				continue;
+			}
+			pgs[i]->flags &= ~(PG_NEEDCOMMIT|PG_RDONLY);
+		}
+		simple_unlock(&uobj->vmobjlock);
+ 		lockmgr(&np->n_commitlock, LK_RELEASE, NULL, p);
+	}
+
+	simple_lock(&uobj->vmobjlock);
+
+out:
+	if (error) {
+		uvm_lock_pageq();
+		for (i = 0; i < npages; i++) {
+			if (pgs[i] == NULL) {
+				continue;
+			}
+			UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
+				    pgs[i], pgs[i]->flags, 0,0);
+			if (pgs[i]->flags & PG_WANTED) {
+				wakeup(pgs[i]);
+			}
+			if (pgs[i]->flags & PG_RELEASED) {
+				uvm_unlock_pageq();
+				(uobj->pgops->pgo_releasepg)(pgs[i], NULL);
+				uvm_lock_pageq();
+				continue;
+			}
+			if (pgs[i]->flags & PG_FAKE) {
+				uvm_pagefree(pgs[i]);
+				continue;
+			}
+			uvm_pageactivate(pgs[i]);
+			pgs[i]->flags &= ~(PG_WANTED|PG_BUSY);
+			UVM_PAGE_OWN(pgs[i], NULL);
+		}
+		uvm_unlock_pageq();
+		simple_unlock(&uobj->vmobjlock);
+		UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0);
+		return error;
+	}
+
+	UVMHIST_LOG(ubchist, "ridx %d count %d", ridx, npages, 0,0);
+	uvm_lock_pageq();
+	for (i = 0; i < npages; i++) {
+		if (pgs[i] == NULL) {
+			continue;
+		}
+		UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
+			    pgs[i], pgs[i]->flags, 0,0);
+		if (pgs[i]->flags & PG_FAKE) {
+			UVMHIST_LOG(ubchist, "unfaking pg %p offset 0x%x",
+				    pgs[i], (int)pgs[i]->offset,0,0);
+			pgs[i]->flags &= ~(PG_FAKE);
+			pmap_clear_modify(pgs[i]);
+			pmap_clear_reference(pgs[i]);
+		}
+		if (i < ridx || i >= ridx + orignpages || async) {
+			UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x",
+				    pgs[i], (int)pgs[i]->offset,0,0);
+			if (pgs[i]->flags & PG_WANTED) {
+				wakeup(pgs[i]);
+			}
+			if (pgs[i]->flags & PG_RELEASED) {
+				uvm_unlock_pageq();
+				(uobj->pgops->pgo_releasepg)(pgs[i], NULL);
+				uvm_lock_pageq();
+				continue;
+			}
+			uvm_pageactivate(pgs[i]);
+			pgs[i]->flags &= ~(PG_WANTED|PG_BUSY);
+			UVM_PAGE_OWN(pgs[i], NULL);
+		}
+	}
+	uvm_unlock_pageq();
+	simple_unlock(&uobj->vmobjlock);
+	if (ap->a_m != NULL) {
+		memcpy(ap->a_m, &pgs[ridx],
+		       *ap->a_count * sizeof(struct vm_page *));
+	}
+	return 0;
+}
+
+/*
+ * Vnode op for VM putpages.
+ */
+int
+nfs_putpages(v)
+	void *v;
+{
+	struct vop_putpages_args /* {
+		struct vnode *a_vp;
+		struct vm_page **a_m;
+		int a_count;
+		int a_flags;
+		int *a_rtvals;
+	} */ *ap = v;
+
+	struct vnode *vp = ap->a_vp;
+	struct nfsnode *np = VTONFS(vp);
+	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+	struct buf *bp, *mbp;
+	struct vm_page **pgs = ap->a_m;
+	int flags = ap->a_flags;
+	int npages = ap->a_count;
+	int s, error, i;
+	size_t bytes, iobytes, skipbytes;
+	vaddr_t kva;
+	off_t offset, origoffset, commitoff;
+	uint32_t commitbytes;
+	boolean_t v3 = NFS_ISV3(vp);
+	boolean_t async = (flags & PGO_SYNCIO) == 0;
+	boolean_t weak = (flags & PGO_WEAK) && v3;
+	struct proc *p = curproc;
+	UVMHIST_FUNC("nfs_putpages"); UVMHIST_CALLED(ubchist);
+
+	UVMHIST_LOG(ubchist, "vp %p pgp %p count %d",
+		    vp, ap->a_m, ap->a_count,0);
+
+	simple_unlock(&vp->v_uvm.u_obj.vmobjlock);
+
+	error = 0;
+	origoffset = pgs[0]->offset;
+	bytes = MIN(ap->a_count << PAGE_SHIFT, vp->v_uvm.u_size - origoffset);
+	skipbytes = 0;
+
+	/*
+	 * if the range has been committed already, mark the pages thus.
+	 * if the range just needs to be committed, we're done
+	 * if it's a weak putpage, otherwise commit the range.
+	 */
+
+	if (v3) {
+ 		lockmgr(&np->n_commitlock, LK_EXCLUSIVE, NULL, p);
+		if (nfs_in_committed_range(vp, origoffset, bytes)) {
+			goto committed;
+		}
+		if (nfs_in_tobecommitted_range(vp, origoffset, bytes)) {
+			if (weak) {
+				lockmgr(&np->n_commitlock, LK_RELEASE, NULL, p);
+				return 0;
+			} else {
+				commitoff = np->n_pushlo;
+				commitbytes = (uint32_t)(np->n_pushhi -
+							 np->n_pushlo);
+				goto commit;
+			}
+		}
+		lockmgr(&np->n_commitlock, LK_RELEASE, NULL, p);
+	}
+
+	/*
+	 * otherwise write or commit all the pages.
+	 */
+
+	kva = uvm_pagermapin(pgs, ap->a_count, UVMPAGER_MAPIN_WAITOK|
+			     UVMPAGER_MAPIN_WRITE);
+
+	s = splbio();
+	vp->v_numoutput += 2;
+	mbp = pool_get(&bufpool, PR_WAITOK);
+	UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x",
+		    vp, mbp, vp->v_numoutput, bytes);
+	splx(s);
+	mbp->b_bufsize = npages << PAGE_SHIFT;
+	mbp->b_data = (void *)kva;
+	mbp->b_resid = mbp->b_bcount = bytes;
+	mbp->b_flags = B_BUSY|B_WRITE|B_AGE |
+		(async ? B_CALL|B_ASYNC : 0) |
+		(curproc == uvm.pagedaemon_proc ? B_PDAEMON : 0);
+	mbp->b_iodone = uvm_aio_biodone;
+	mbp->b_vp = vp;
+	mbp->b_proc = NULL;		/* XXXUBC */
+	LIST_INIT(&mbp->b_dep);
+
+	for (offset = origoffset;
+	     bytes > 0;
+	     offset += iobytes, bytes -= iobytes) {
+		iobytes = MIN(nmp->nm_wsize, bytes);
+
+ 		/*
+		 * skip writing any pages which only need a commit.
+		 */
+
+		if ((pgs[(offset - origoffset) >> PAGE_SHIFT]->flags &
+		     PG_NEEDCOMMIT) != 0) {
+			KASSERT((offset & (PAGE_SIZE - 1)) == 0);
+			iobytes = MIN(PAGE_SIZE, bytes);
+			skipbytes += iobytes;
+			continue;
+		}
+
+		/* if it's really one i/o, don't make a second buf */
+		if (offset == origoffset && iobytes == bytes) {
+			bp = mbp;
+		} else {
+			s = splbio();
+			vp->v_numoutput++;
+			bp = pool_get(&bufpool, PR_WAITOK);
+			UVMHIST_LOG(ubchist, "vp %p bp %p num now %d",
+				    vp, bp, vp->v_numoutput, 0);
+			splx(s);
+			bp->b_data = (char *)kva + (offset - origoffset);
+			bp->b_resid = bp->b_bcount = iobytes;
+			bp->b_flags = B_BUSY|B_WRITE|B_CALL|B_ASYNC;
+			bp->b_iodone = uvm_aio_biodone1;
+			bp->b_vp = vp;
+			bp->b_proc = NULL;	/* XXXUBC */
+			LIST_INIT(&bp->b_dep);
+		}
+		bp->b_private = mbp;
+		bp->b_lblkno = bp->b_blkno = (daddr_t)(offset >> DEV_BSHIFT);
+		UVMHIST_LOG(ubchist, "bp %p numout %d",
+			    bp, vp->v_numoutput,0,0);
+		VOP_STRATEGY(bp);
+	}
+	if (skipbytes) {
+		UVMHIST_LOG(ubchist, "skipbytes %d", bytes, 0,0,0);
+		s = splbio();
+		mbp->b_resid -= skipbytes;
+		if (mbp->b_resid == 0) {
+			biodone(mbp);
+		}
+		splx(s);
+	}
+	if (async) {
+		return EINPROGRESS;
+	}
+	if (bp != NULL) {
+		error = biowait(mbp);
+	}
+
+	s = splbio();
+	if (mbp->b_vp)
+		vwakeup(mbp->b_vp);
+	pool_put(&bufpool, mbp);
+	splx(s);
+
+	uvm_pagermapout(kva, ap->a_count);
+	if (error || !v3) {
+		UVMHIST_LOG(ubchist, "returning error %d", error, 0,0,0);
+		return error;
+	}
+
+	/*
+	 * for a weak put, mark the range as "to be committed"
+	 * and mark the pages read-only so that we will be notified
+	 * to remove the pages from the "to be committed" range
+	 * if they are made dirty again.
+	 * for a strong put, commit the pages and remove them from the
+	 * "to be committed" range.  also, mark them as writable
+	 * and not cleanable with just a commit.
+	 */
+
+	lockmgr(&np->n_commitlock, LK_EXCLUSIVE, NULL, p);
+	if (weak) {
+		nfs_add_tobecommitted_range(vp, origoffset,
+					    npages << PAGE_SHIFT);
+		for (i = 0; i < npages; i++) {
+			pgs[i]->flags |= PG_NEEDCOMMIT|PG_RDONLY;
+		}
+	} else {
+		commitoff = origoffset;
+		commitbytes = npages << PAGE_SHIFT;
+commit:
+		error = nfs_commit(vp, commitoff, commitbytes, curproc);
+		nfs_del_tobecommitted_range(vp, commitoff, commitbytes);
+committed:
+		for (i = 0; i < npages; i++) {
+			pgs[i]->flags &= ~(PG_NEEDCOMMIT|PG_RDONLY);
+		}
+	}
+	lockmgr(&np->n_commitlock, LK_RELEASE, NULL, p);
+	return error;
+}
diff --git a/sys/nfs/nfs_node.c b/sys/nfs/nfs_node.c
index 987259eadc3..567738584da 100644
--- a/sys/nfs/nfs_node.c
+++ b/sys/nfs/nfs_node.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: nfs_node.c,v 1.16 2001/11/15 23:15:15 art Exp $	*/
+/*	$OpenBSD: nfs_node.c,v 1.17 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: nfs_node.c,v 1.16 1996/02/18 11:53:42 fvdl Exp $	*/
 
 /*
@@ -145,6 +145,7 @@ loop:
 	vp = nvp;
 	np = pool_get(&nfs_node_pool, PR_WAITOK);
 	bzero((caddr_t)np, sizeof *np);
+	lockinit(&np->n_commitlock, PINOD, "nfsclock", 0, 0);
 	vp->v_data = np;
 	np->n_vnode = vp;
 
@@ -169,6 +170,17 @@ loop:
 		np->n_fhp = &np->n_fh;
 	bcopy((caddr_t)fhp, (caddr_t)np->n_fhp, fhsize);
 	np->n_fhsize = fhsize;
+
+	/*
+	 * XXXUBC doing this while holding the nfs_hashlock is bad,
+	 * but there's no alternative at the moment.
+	 */
+	error = VOP_GETATTR(vp, &np->n_vattr, curproc->p_ucred, curproc);
+	if (error) {
+		return error;
+	}
+	uvm_vnp_setsize(vp, np->n_vattr.va_size);
+
 	lockmgr(&nfs_hashlock, LK_RELEASE, 0, p);
 	*npp = np;
 	return (0);
@@ -185,11 +197,12 @@ nfs_inactive(v)
 	struct nfsnode *np;
 	struct sillyrename *sp;
 	struct proc *p = curproc;	/* XXX */
+	struct vnode *vp = ap->a_vp;
 
-	np = VTONFS(ap->a_vp);
-	if (prtactive && ap->a_vp->v_usecount != 0)
-		vprint("nfs_inactive: pushing active", ap->a_vp);
-	if (ap->a_vp->v_type != VDIR) {
+	np = VTONFS(vp);
+	if (prtactive && vp->v_usecount != 0)
+		vprint("nfs_inactive: pushing active", vp);
+	if (vp->v_type != VDIR) {
 		sp = np->n_sillyrename;
 		np->n_sillyrename = (struct sillyrename *)0;
 	} else
@@ -198,7 +211,7 @@ nfs_inactive(v)
 		/*
 		 * Remove the silly file that was rename'd earlier
 		 */
-		(void) nfs_vinvalbuf(ap->a_vp, 0, sp->s_cred, p, 1);
+		(void) nfs_vinvalbuf(vp, 0, sp->s_cred, p, 1);
 		nfs_removeit(sp);
 		crfree(sp->s_cred);
 		vrele(sp->s_dvp);
@@ -206,7 +219,7 @@ nfs_inactive(v)
 	}
 	np->n_flag &= (NMODIFIED | NFLUSHINPROG | NFLUSHWANT);
 
-	VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
+	VOP_UNLOCK(vp, 0, ap->a_p);
 	return (0);
 }
 
diff --git a/sys/nfs/nfs_serv.c b/sys/nfs/nfs_serv.c
index 9d4de9fd9a1..9534e7221da 100644
--- a/sys/nfs/nfs_serv.c
+++ b/sys/nfs/nfs_serv.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: nfs_serv.c,v 1.27 2001/11/06 19:53:21 miod Exp $	*/
+/*	$OpenBSD: nfs_serv.c,v 1.28 2001/11/27 05:27:12 art Exp $	*/
 /*     $NetBSD: nfs_serv.c,v 1.34 1997/05/12 23:37:12 fvdl Exp $       */
 
 /*
@@ -1663,8 +1663,6 @@ nfsrv_remove(nfsd, slp, procp, mrq)
 			error = EBUSY;
 			goto out;
 		}
-		if (vp->v_flag & VTEXT)
-			uvm_vnp_uncache(vp);
 out:
 		if (!error) {
 			error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
@@ -3276,11 +3274,10 @@ nfsrv_access(vp, flags, cred, rdonly, p, override)
 			}
 		}
 		/*
-		 * If there's shared text associated with
-		 * the inode, try to free it up once.  If
-		 * we fail, we can't allow writing.
+		 * If the vnode is in use as a process's text,
+		 * we can't allow writing.
 		 */
-		if ((vp->v_flag & VTEXT) && !uvm_vnp_uncache(vp))
+		if ((vp->v_flag & VTEXT))
 			return (ETXTBSY);
 	}
 	error = VOP_ACCESS(vp, flags, cred, p);
diff --git a/sys/nfs/nfs_subs.c b/sys/nfs/nfs_subs.c
index 9689d9f36a5..4a8bc11528d 100644
--- a/sys/nfs/nfs_subs.c
+++ b/sys/nfs/nfs_subs.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: nfs_subs.c,v 1.35 2001/11/06 19:53:21 miod Exp $	*/
+/*	$OpenBSD: nfs_subs.c,v 1.36 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: nfs_subs.c,v 1.27.4.3 1996/07/08 20:34:24 jtc Exp $	*/
 
 /*
@@ -39,6 +39,40 @@
  *	@(#)nfs_subs.c	8.8 (Berkeley) 5/22/95
  */
 
+/*
+ * Copyright 2000 Wasabi Systems, Inc.
+ * All rights reserved.
+ *
+ * Written by Frank van der Linden for Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed for the NetBSD Project by
+ *      Wasabi Systems, Inc.
+ * 4. The name of Wasabi Systems, Inc. may not be used to endorse
+ *    or promote products derived from this software without specific prior
+ *    written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
 
 /*
  * These functions support the macros and help fiddle mbuf chains for
@@ -1241,17 +1275,14 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper)
 		vap->va_filerev = 0;
 	}
 	if (vap->va_size != np->n_size) {
-		if (vap->va_type == VREG) {
-			if (np->n_flag & NMODIFIED) {
-				if (vap->va_size < np->n_size)
-					vap->va_size = np->n_size;
-				else
-					np->n_size = vap->va_size;
-			} else
-				np->n_size = vap->va_size;
-			uvm_vnp_setsize(vp, np->n_size);
-		} else
+		if ((np->n_flag & NMODIFIED) && vap->va_size < np->n_size) {
+			vap->va_size = np->n_size;
+		} else {
 			np->n_size = vap->va_size;
+			if (vap->va_type == VREG) {
+				uvm_vnp_setsize(vp, np->n_size);
+			}
+		}
 	}
 	np->n_attrstamp = time.tv_sec;
 	if (vaper != NULL) {
@@ -1741,26 +1772,216 @@ void
 nfs_clearcommit(mp)
 	struct mount *mp;
 {
-	register struct vnode *vp, *nvp;
-	register struct buf *bp, *nbp;
+	struct vnode *vp;
+	struct vm_page *pg;
+	struct nfsnode *np;
 	int s;
 
 	s = splbio();
-loop:
-	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
-		if (vp->v_mount != mp)	/* Paranoia */
-			goto loop;
-		nvp = vp->v_mntvnodes.le_next;
-		for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
-			nbp = bp->b_vnbufs.le_next;
-			if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT))
-				== (B_DELWRI | B_NEEDCOMMIT))
-				bp->b_flags &= ~B_NEEDCOMMIT;
+	LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
+		if (vp->v_type == VNON)
+			continue;
+		np = VTONFS(vp);
+		np->n_pushlo = np->n_pushhi = np->n_pushedlo =
+		    np->n_pushedhi = 0;
+		np->n_commitflags &=
+		    ~(NFS_COMMIT_PUSH_VALID | NFS_COMMIT_PUSHED_VALID);
+		simple_lock(&vp->v_uvm.u_obj.vmobjlock);
+		TAILQ_FOREACH(pg, &vp->v_uvm.u_obj.memq, listq) {
+			pg->flags &= ~PG_NEEDCOMMIT;
 		}
+		simple_unlock(&vp->v_uvm.u_obj.vmobjlock);
 	}
 	splx(s);
 }
 
+void
+nfs_merge_commit_ranges(vp)
+	struct vnode *vp;
+{
+	struct nfsnode *np = VTONFS(vp);
+
+	if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID)) {
+		np->n_pushedlo = np->n_pushlo;
+		np->n_pushedhi = np->n_pushhi;
+		np->n_commitflags |= NFS_COMMIT_PUSHED_VALID;
+	} else {
+		if (np->n_pushlo < np->n_pushedlo)
+			np->n_pushedlo = np->n_pushlo;
+		if (np->n_pushhi > np->n_pushedhi)
+			np->n_pushedhi = np->n_pushhi;
+	}
+
+	np->n_pushlo = np->n_pushhi = 0;
+	np->n_commitflags &= ~NFS_COMMIT_PUSH_VALID;
+
+#ifdef fvdl_debug
+	printf("merge: committed: %u - %u\n", (unsigned)np->n_pushedlo,
+	    (unsigned)np->n_pushedhi);
+#endif
+}
+
+int
+nfs_in_committed_range(vp, off, len)
+	struct vnode *vp;
+	off_t off, len;
+{
+	struct nfsnode *np = VTONFS(vp);
+	off_t lo, hi;
+
+	if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID))
+		return 0;
+	lo = off;
+	hi = lo + len;
+
+	return (lo >= np->n_pushedlo && hi <= np->n_pushedhi);
+}
+
+int
+nfs_in_tobecommitted_range(vp, off, len)
+	struct vnode *vp;
+	off_t off, len;
+{
+	struct nfsnode *np = VTONFS(vp);
+	off_t lo, hi;
+
+	if (!(np->n_commitflags & NFS_COMMIT_PUSH_VALID))
+		return 0;
+	lo = off;
+	hi = lo + len;
+
+	return (lo >= np->n_pushlo && hi <= np->n_pushhi);
+}
+
+void
+nfs_add_committed_range(vp, off, len)
+	struct vnode *vp;
+	off_t off, len;
+{
+	struct nfsnode *np = VTONFS(vp);
+	off_t lo, hi;
+
+	lo = off;
+	hi = lo + len;
+
+	if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID)) {
+		np->n_pushedlo = lo;
+		np->n_pushedhi = hi;
+		np->n_commitflags |= NFS_COMMIT_PUSHED_VALID;
+	} else {
+		if (hi > np->n_pushedhi)
+			np->n_pushedhi = hi;
+		if (lo < np->n_pushedlo)
+			np->n_pushedlo = lo;
+	}
+#ifdef fvdl_debug
+	printf("add: committed: %u - %u\n", (unsigned)np->n_pushedlo,
+	    (unsigned)np->n_pushedhi);
+#endif
+}
+
+void
+nfs_del_committed_range(vp, off, len)
+	struct vnode *vp;
+	off_t off, len;
+{
+	struct nfsnode *np = VTONFS(vp);
+	off_t lo, hi;
+
+	if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID))
+		return;
+
+	lo = off;
+	hi = lo + len;
+
+	if (lo > np->n_pushedhi || hi < np->n_pushedlo)
+		return;
+	if (lo <= np->n_pushedlo)
+		np->n_pushedlo = hi;
+	else if (hi >= np->n_pushedhi)
+		np->n_pushedhi = lo;
+	else {
+		/*
+		 * XXX There's only one range. If the deleted range
+		 * is in the middle, pick the largest of the
+		 * contiguous ranges that it leaves.
+		 */
+		if ((np->n_pushedlo - lo) > (hi - np->n_pushedhi))
+			np->n_pushedhi = lo;
+		else
+			np->n_pushedlo = hi;
+	}
+#ifdef fvdl_debug
+	printf("del: committed: %u - %u\n", (unsigned)np->n_pushedlo,
+	    (unsigned)np->n_pushedhi);
+#endif
+}
+
+void
+nfs_add_tobecommitted_range(vp, off, len)
+	struct vnode *vp;
+	off_t off, len;
+{
+	struct nfsnode *np = VTONFS(vp);
+	off_t lo, hi;
+
+	lo = off;
+	hi = lo + len;
+
+	if (!(np->n_commitflags & NFS_COMMIT_PUSH_VALID)) {
+		np->n_pushlo = lo;
+		np->n_pushhi = hi;
+		np->n_commitflags |= NFS_COMMIT_PUSH_VALID;
+	} else {
+		if (lo < np->n_pushlo)
+			np->n_pushlo = lo;
+		if (hi > np->n_pushhi)
+			np->n_pushhi = hi;
+	}
+#ifdef fvdl_debug
+	printf("add: tobecommitted: %u - %u\n", (unsigned)np->n_pushlo,
+	    (unsigned)np->n_pushhi);
+#endif
+}
+
+void
+nfs_del_tobecommitted_range(vp, off, len)
+	struct vnode *vp;
+	off_t off, len;
+{
+	struct nfsnode *np = VTONFS(vp);
+	off_t lo, hi;
+
+	if (!(np->n_commitflags & NFS_COMMIT_PUSH_VALID))
+		return;
+
+	lo = off;
+	hi = lo + len;
+
+	if (lo > np->n_pushhi || hi < np->n_pushlo)
+		return;
+
+	if (lo <= np->n_pushlo)
+		np->n_pushlo = hi;
+	else if (hi >= np->n_pushhi)
+		np->n_pushhi = lo;
+	else {
+		/*
+		 * XXX There's only one range. If the deleted range
+		 * is in the middle, pick the largest of the
+		 * contiguous ranges that it leaves.
+		 */
+		if ((np->n_pushlo - lo) > (hi - np->n_pushhi))
+			np->n_pushhi = lo;
+		else
+			np->n_pushlo = hi;
+	}
+#ifdef fvdl_debug
+	printf("del: tobecommitted: %u - %u\n", (unsigned)np->n_pushlo,
+	    (unsigned)np->n_pushhi);
+#endif
+}
+
 /*
  * Map errnos to NFS error numbers. For Version 3 also filter out error
  * numbers not specified for the associated procedure.
diff --git a/sys/nfs/nfs_syscalls.c b/sys/nfs/nfs_syscalls.c
index c71a662ccb2..5a189ba344d 100644
--- a/sys/nfs/nfs_syscalls.c
+++ b/sys/nfs/nfs_syscalls.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: nfs_syscalls.c,v 1.20 2001/11/15 23:15:15 art Exp $	*/
+/*	$OpenBSD: nfs_syscalls.c,v 1.21 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: nfs_syscalls.c,v 1.19 1996/02/18 11:53:52 fvdl Exp $	*/
 
 /*
@@ -913,10 +913,9 @@ int
 nfssvc_iod(p)
 	struct proc *p;
 {
-	register struct buf *bp, *nbp;
-	register int i, myiod;
-	struct vnode *vp;
-	int error = 0, s;
+	struct buf *bp;
+	int i, myiod;
+	int error = 0;
 
 	/*
 	 * Assign my position or return error if too many already running
@@ -944,39 +943,7 @@ nfssvc_iod(p)
 	    while ((bp = nfs_bufq.tqh_first) != NULL) {
 		/* Take one off the front of the list */
 		TAILQ_REMOVE(&nfs_bufq, bp, b_freelist);
-		if (bp->b_flags & B_READ)
-		    (void) nfs_doio(bp, NULL);
-		else do {
-		    /*
-		     * Look for a delayed write for the same vnode, so I can do 
-		     * it now. We must grab it before calling nfs_doio() to
-		     * avoid any risk of the vnode getting vclean()'d while
-		     * we are doing the write rpc.
-		     */
-		    vp = bp->b_vp;
-		    s = splbio();
-		    for (nbp = vp->v_dirtyblkhd.lh_first; nbp;
-			nbp = nbp->b_vnbufs.le_next) {
-			if ((nbp->b_flags &
-			    (B_BUSY|B_DELWRI|B_NEEDCOMMIT|B_NOCACHE))!=B_DELWRI)
-			    continue;
-			bremfree(nbp);
-			nbp->b_flags |= (B_BUSY|B_ASYNC);
-			break;
-		    }
-		    /*
-		     * For the delayed write, do the first part of nfs_bwrite()
-		     * up to, but not including nfs_strategy().
-		     */
-		    if (nbp) {
-			nbp->b_flags &= ~(B_READ|B_DONE|B_ERROR);
-			buf_undirty(bp);
-			nbp->b_vp->v_numoutput++;
-		    }
-		    splx(s);
-
-		    (void) nfs_doio(bp, NULL);
-		} while ((bp = nbp) != NULL);
+		(void) nfs_doio(bp, NULL);
 	    }
 	    if (error) {
 		PRELE(p);
diff --git a/sys/nfs/nfs_var.h b/sys/nfs/nfs_var.h
index 861eaf3059e..71985e581a8 100644
--- a/sys/nfs/nfs_var.h
+++ b/sys/nfs/nfs_var.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: nfs_var.h,v 1.15 2001/11/15 23:15:15 art Exp $	*/
+/*	$OpenBSD: nfs_var.h,v 1.16 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: nfs_var.h,v 1.3 1996/02/18 11:53:54 fvdl Exp $	*/
 
 /*
@@ -119,7 +119,7 @@ int nfs_sillyrename __P((struct vnode *, struct vnode *,
 			 struct componentname *));
 int nfs_lookitup __P((struct vnode *, char *, int, struct ucred *,
 		      struct proc *, struct nfsnode **));
-int nfs_commit __P((struct vnode *, u_quad_t, int, struct proc *));
+int nfs_commit __P((struct vnode *, u_quad_t, unsigned, struct proc *));
 int nfs_bmap __P((void *));
 int nfs_strategy __P((void *));
 int nfs_mmap __P((void *));
@@ -134,7 +134,6 @@ int nfs_vfree __P((void *));
 int nfs_truncate __P((void *));
 int nfs_update __P((void *));
 int nfs_bwrite __P((void *));
-int nfs_writebp __P((struct buf *, int));
 int nfsspec_access __P((void *));
 int nfsspec_read __P((void *));
 int nfsspec_write __P((void *));
@@ -258,7 +257,16 @@ void nfsm_srvfattr __P((struct nfsrv_descript *, struct vattr *,
 int nfsrv_fhtovp __P((fhandle_t *, int, struct vnode **, struct ucred *,
 		      struct nfssvc_sock *, struct mbuf *, int *, int));
 int netaddr_match __P((int, union nethostaddr *, struct mbuf *));
+
 void nfs_clearcommit __P((struct mount *));
+void nfs_merge_commit_ranges __P((struct vnode *));
+int nfs_in_committed_range __P((struct vnode *, off_t, off_t));
+int nfs_in_tobecommitted_range __P((struct vnode *, off_t, off_t));
+void nfs_add_committed_range __P((struct vnode *, off_t, off_t));
+void nfs_del_committed_range __P((struct vnode *, off_t, off_t));
+void nfs_add_tobecommitted_range __P((struct vnode *, off_t, off_t));
+void nfs_del_tobecommitted_range __P((struct vnode *, off_t, off_t));
+
 int nfsrv_errmap __P((struct nfsrv_descript *, int));
 void nfsrvw_sort __P((gid_t *, int));
 void nfsrv_setcred __P((struct ucred *, struct ucred *));
diff --git a/sys/nfs/nfs_vfsops.c b/sys/nfs/nfs_vfsops.c
index 13420530fc3..91f84da52b6 100644
--- a/sys/nfs/nfs_vfsops.c
+++ b/sys/nfs/nfs_vfsops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: nfs_vfsops.c,v 1.38 2001/11/14 23:37:33 mickey Exp $	*/
+/*	$OpenBSD: nfs_vfsops.c,v 1.39 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: nfs_vfsops.c,v 1.46.4.1 1996/05/25 22:40:35 fvdl Exp $	*/
 
 /*
@@ -748,6 +748,8 @@ mountnfs(argp, mp, nam, pth, hst)
 	 * point.
 	 */
 	mp->mnt_stat.f_iosize = NFS_MAXDGRAMDATA;
+	mp->mnt_fs_bshift = DEV_BSHIFT;
+	mp->mnt_dev_bshift = -1;
 
 	return (0);
 bad:
@@ -856,8 +858,9 @@ loop:
 		 */
 		if (vp->v_mount != mp)
 			goto loop;
-		if (VOP_ISLOCKED(vp) || vp->v_dirtyblkhd.lh_first == NULL ||
-		    waitfor == MNT_LAZY)
+		if (waitfor == MNT_LAZY || VOP_ISLOCKED(vp) ||
+		    (LIST_EMPTY(&vp->v_dirtyblkhd) &&
+		     vp->v_uvm.u_obj.uo_npages == 0))
 			continue;
 		if (vget(vp, LK_EXCLUSIVE, p))
 			goto loop;
diff --git a/sys/nfs/nfs_vnops.c b/sys/nfs/nfs_vnops.c
index 0813b439cb2..4c176c1c1ec 100644
--- a/sys/nfs/nfs_vnops.c
+++ b/sys/nfs/nfs_vnops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: nfs_vnops.c,v 1.39 2001/11/15 23:15:15 art Exp $	*/
+/*	$OpenBSD: nfs_vnops.c,v 1.40 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: nfs_vnops.c,v 1.62.4.1 1996/07/08 20:26:52 jtc Exp $	*/
 
 /*
@@ -126,7 +126,9 @@ struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
 	{ &vop_advlock_desc, nfs_advlock },	/* advlock */
 	{ &vop_reallocblks_desc, nfs_reallocblks },	/* reallocblks */
 	{ &vop_bwrite_desc, nfs_bwrite },
-	{ (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
+	{ &vop_getpages_desc, nfs_getpages },		/* getpages */
+	{ &vop_putpages_desc, nfs_putpages },		/* putpages */
+	{ NULL, NULL }
 };
 struct vnodeopv_desc nfsv2_vnodeop_opv_desc =
 	{ &nfsv2_vnodeop_p, nfsv2_vnodeop_entries };
@@ -151,7 +153,7 @@ struct vnodeopv_entry_desc spec_nfsv2nodeop_entries[] = {
 	{ &vop_ioctl_desc, spec_ioctl },	/* ioctl */
 	{ &vop_select_desc, spec_select },	/* select */
 	{ &vop_revoke_desc, spec_revoke },	/* revoke */
-	{ &vop_fsync_desc, nfs_fsync },		/* fsync */
+	{ &vop_fsync_desc, spec_fsync },	/* fsync */
 	{ &vop_remove_desc, spec_remove },	/* remove */
 	{ &vop_link_desc, spec_link },		/* link */
 	{ &vop_rename_desc, spec_rename },	/* rename */
@@ -373,11 +375,30 @@ nfs_open(v)
 		return (EACCES);
 	}
 
+	/*
+	 * Initialize read and write creds here, for swapfiles
+	 * and other paths that don't set the creds themselves.
+	 */
+
+	if (ap->a_mode & FREAD) {
+		if (np->n_rcred) {
+			crfree(np->n_rcred);
+		}
+		np->n_rcred = ap->a_cred;
+		crhold(np->n_rcred);
+	}
+	if (ap->a_mode & FWRITE) {
+		if (np->n_wcred) {
+			crfree(np->n_wcred);
+		}
+		np->n_wcred = ap->a_cred;
+		crhold(np->n_wcred);
+	}
+
 	if (np->n_flag & NMODIFIED) {
 		if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
 			 ap->a_p, 1)) == EINTR)
 			return (error);
-		uvm_vnp_uncache(vp);
 		np->n_attrstamp = 0;
 		if (vp->v_type == VDIR)
 			np->n_direofoffset = 0;
@@ -395,7 +416,6 @@ nfs_open(v)
 			if ((error = nfs_vinvalbuf(vp, V_SAVE,
 				 ap->a_cred, ap->a_p, 1)) == EINTR)
 				return (error);
-			uvm_vnp_uncache(vp);
 			np->n_mtime = vattr.va_mtime.tv_sec;
 		}
 	}
@@ -2511,7 +2531,7 @@ int
 nfs_commit(vp, offset, cnt, procp)
 	struct vnode *vp;
 	u_quad_t offset;
-	int cnt;
+	unsigned cnt;
 	struct proc *procp;
 {
 	caddr_t cp;
@@ -2626,9 +2646,7 @@ nfs_fsync(v)
 }
 
 /*
- * Flush all the blocks associated with a vnode.
- * 	Walk through the buffer pool and push any dirty pages
- *	associated with the vnode.
+ * Flush all the data associated with a vnode.
  */
 int
 nfs_flush(vp, cred, waitfor, p, commit)
@@ -2638,154 +2656,19 @@ nfs_flush(vp, cred, waitfor, p, commit)
 	struct proc *p;
 	int commit;
 {
+	struct uvm_object *uobj = &vp->v_uvm.u_obj;
 	struct nfsnode *np = VTONFS(vp);
-	struct buf *bp;
-	int i;
-	struct buf *nbp;
-	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
-	int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
-	int passone = 1;
-	u_quad_t off = (u_quad_t)-1, endoff = 0, toff;
-#ifndef NFS_COMMITBVECSIZ
-#define NFS_COMMITBVECSIZ	20
-#endif
-	struct buf *bvec[NFS_COMMITBVECSIZ];
+	int error;
+	int flushflags = PGO_ALLPAGES|PGO_CLEANIT|PGO_SYNCIO;
+	int rv;
 
-	if (nmp->nm_flag & NFSMNT_INT)
-		slpflag = PCATCH;
-	if (!commit)
-		passone = 0;
-	/*
-	 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
-	 * server, but nas not been committed to stable storage on the server
-	 * yet. On the first pass, the byte range is worked out and the commit
-	 * rpc is done. On the second pass, nfs_writebp() is called to do the
-	 * job.
-	 */
-again:
-	bvecpos = 0;
-	if (NFS_ISV3(vp) && commit) {
-		s = splbio();
-		for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
-			nbp = bp->b_vnbufs.le_next;
-			if (bvecpos >= NFS_COMMITBVECSIZ)
-				break;
-			if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT))
-				!= (B_DELWRI | B_NEEDCOMMIT))
-				continue;
-			bremfree(bp);
-			bp->b_flags |= (B_BUSY | B_WRITEINPROG);
-			/*
-			 * A list of these buffers is kept so that the
-			 * second loop knows which buffers have actually
-			 * been committed. This is necessary, since there
-			 * may be a race between the commit rpc and new
-			 * uncommitted writes on the file.
-			 */
-			bvec[bvecpos++] = bp;
-			toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
-				bp->b_dirtyoff;
-			if (toff < off)
-				off = toff;
-			toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
-			if (toff > endoff)
-				endoff = toff;
-		}
-		splx(s);
-	}
-	if (bvecpos > 0) {
-		/*
-		 * Commit data on the server, as required.
-		 */
-		retv = nfs_commit(vp, off, (int)(endoff - off), p);
-		if (retv == NFSERR_STALEWRITEVERF)
-			nfs_clearcommit(vp->v_mount);
-		/*
-		 * Now, either mark the blocks I/O done or mark the
-		 * blocks dirty, depending on whether the commit
-		 * succeeded.
-		 */
-		for (i = 0; i < bvecpos; i++) {
-			bp = bvec[i];
-			bp->b_flags &= ~(B_NEEDCOMMIT | B_WRITEINPROG);
-			if (retv)
-			    brelse(bp);
-			else {
-			    s = splbio();
-			    buf_undirty(bp);
-			    vp->v_numoutput++;
-			    bp->b_flags |= B_ASYNC;
-			    bp->b_flags &= ~(B_READ|B_DONE|B_ERROR);
-			    bp->b_dirtyoff = bp->b_dirtyend = 0;
-			    splx(s);
-			    biodone(bp);
-			}
-		}
-	}
+	error = 0;
 
-	/*
-	 * Start/do any write(s) that are required.
-	 */
-loop:
-	s = splbio();
-	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
-		nbp = bp->b_vnbufs.le_next;
-		if (bp->b_flags & B_BUSY) {
-			if (waitfor != MNT_WAIT || passone)
-				continue;
-			bp->b_flags |= B_WANTED;
-			error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
-				"nfsfsync", slptimeo);
-			splx(s);
-			if (error) {
-			    if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
-				return (EINTR);
-			    if (slpflag == PCATCH) {
-				slpflag = 0;
-				slptimeo = 2 * hz;
-			    }
-			}
-			goto loop;
-		}
-		if ((bp->b_flags & B_DELWRI) == 0)
-			panic("nfs_fsync: not dirty");
-		if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT))
-			continue;
-		bremfree(bp);
-		if (passone || !commit)
-		    bp->b_flags |= (B_BUSY|B_ASYNC);
-		else
-		    bp->b_flags |= (B_BUSY|B_ASYNC|B_WRITEINPROG|B_NEEDCOMMIT);
-		splx(s);
-		VOP_BWRITE(bp);
-		goto loop;
-	}
-	splx(s);
-	if (passone) {
-		passone = 0;
-		goto again;
-	}
-	if (waitfor == MNT_WAIT) {
- loop2:
-	        s = splbio();
-		error = vwaitforio(vp, slpflag, "nfs_fsync", slptimeo);
-		splx(s);
-		if (error) {
-			if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
-				return (EINTR);
-			if (slpflag == PCATCH) {
-				slpflag = 0;
-				slptimeo = 2 * hz;
-			}
-			goto loop2;
-		}
-			
-		if (vp->v_dirtyblkhd.lh_first && commit) {
-#if 0
-			vprint("nfs_fsync: dirty", vp);
-#endif
-			goto loop;
-		}
+	simple_lock(&uobj->vmobjlock);
+	rv = (uobj->pgops->pgo_flush)(uobj, 0, 0, flushflags);
+	simple_unlock(&uobj->vmobjlock);
+	if (!rv) {
+		error = EIO;
 	}
 	if (np->n_flag & NWRITEERR) {
 		error = np->n_error;
@@ -2860,7 +2743,7 @@ nfs_print(v)
 }
 
 /*
- * Just call nfs_writebp() with the force argument set to 1.
+ * Just call bwrite().
  */
 int
 nfs_bwrite(v)
@@ -2870,76 +2753,7 @@ nfs_bwrite(v)
 		struct buf *a_bp;
 	} */ *ap = v;
 
-	return (nfs_writebp(ap->a_bp, 1));
-}
-
-/*
- * This is a clone of vop_generic_bwrite(), except that B_WRITEINPROG isn't set unless
- * the force flag is one and it also handles the B_NEEDCOMMIT flag.
- */
-int
-nfs_writebp(bp, force)
-	register struct buf *bp;
-	int force;
-{
-	register int oldflags = bp->b_flags, retv = 1;
-	register struct proc *p = curproc;	/* XXX */
-	off_t off;
-	int   s;
-
-	if(!(bp->b_flags & B_BUSY))
-		panic("bwrite: buffer is not busy???");
-
-#ifdef fvdl_debug
-	printf("nfs_writebp(%x): vp %x voff %d vend %d doff %d dend %d\n",
-	    bp, bp->b_vp, bp->b_validoff, bp->b_validend, bp->b_dirtyoff,
-	    bp->b_dirtyend);
-#endif
-	bp->b_flags &= ~(B_READ|B_DONE|B_ERROR);
-
-	s = splbio();
-	buf_undirty(bp);
-
-	if ((oldflags & B_ASYNC) && !(oldflags & B_DELWRI) && p)
-		++p->p_stats->p_ru.ru_oublock;
-
-	bp->b_vp->v_numoutput++;
-	splx(s);
-
-	/*
-	 * If B_NEEDCOMMIT is set, a commit rpc may do the trick. If not
-	 * an actual write will have to be scheduled via. VOP_STRATEGY().
-	 * If B_WRITEINPROG is already set, then push it with a write anyhow.
-	 */
-	if ((oldflags & (B_NEEDCOMMIT | B_WRITEINPROG)) == B_NEEDCOMMIT) {
-		off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff;
-		bp->b_flags |= B_WRITEINPROG;
-		retv = nfs_commit(bp->b_vp, off, bp->b_dirtyend-bp->b_dirtyoff,
-			bp->b_proc);
-		bp->b_flags &= ~B_WRITEINPROG;
-		if (!retv) {
-			bp->b_dirtyoff = bp->b_dirtyend = 0;
-			bp->b_flags &= ~B_NEEDCOMMIT;
-			biodone(bp);
-		} else if (retv == NFSERR_STALEWRITEVERF)
-			nfs_clearcommit(bp->b_vp->v_mount);
-	}
-	if (retv) {
-		if (force)
-			bp->b_flags |= B_WRITEINPROG;
-		VOP_STRATEGY(bp);
-	}
-
-	if( (oldflags & B_ASYNC) == 0) {
-		int rtval = biowait(bp);
-		if (!(oldflags & B_DELWRI) && p) {
-			++p->p_stats->p_ru.ru_oublock;
-		}
-		brelse(bp);
-		return (rtval);
-	} 
-
-	return (0);
+	return (bwrite(ap->a_bp));
 }
 
 /*
diff --git a/sys/nfs/nfsnode.h b/sys/nfs/nfsnode.h
index e1e0fd64327..42aaddfa637 100644
--- a/sys/nfs/nfsnode.h
+++ b/sys/nfs/nfsnode.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: nfsnode.h,v 1.11 2001/11/15 23:15:15 art Exp $	*/
+/*	$OpenBSD: nfsnode.h,v 1.12 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: nfsnode.h,v 1.16 1996/02/18 11:54:04 fvdl Exp $	*/
 
 /*
@@ -119,8 +119,20 @@ struct nfsnode {
 	nfsfh_t			n_fh;		/* Small File Handle */
 	struct ucred		*n_rcred;
 	struct ucred		*n_wcred;
+	off_t			n_pushedlo;     /* 1st blk in commited range */
+	off_t			n_pushedhi;     /* Last block in range */
+	off_t			n_pushlo;       /* 1st block in commit range */
+	off_t			n_pushhi;       /* Last block in range */
+	struct lock		n_commitlock;   /* Serialize commits XXX */
+	int			n_commitflags;
 };
 
+/*
+ * Values for n_commitflags
+ */
+#define NFS_COMMIT_PUSH_VALID		0x0001	/* push range valid */
+#define NFS_COMMIT_PUSHED_VALID		0x0002	/* pushed range valid */
+
 #define n_atim		n_un1.nf_atim
 #define n_mtim		n_un2.nf_mtim
 #define n_sillyrename	n_un3.nf_silly
@@ -199,6 +211,8 @@ int	nfs_bwrite __P((void *));
 int	nfs_vget __P((struct mount *, ino_t, struct vnode **));
 #define nfs_reallocblks \
 	((int (*) __P((void *)))eopnotsupp)
+int	nfs_getpages __P((void *));
+int	nfs_putpages __P((void *));
 
 /* other stuff */
 int	nfs_removeit __P((struct sillyrename *));
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
index cabdcbbe084..054a07c24d5 100644
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: buf.h,v 1.33 2001/11/15 23:15:15 art Exp $	*/
+/*	$OpenBSD: buf.h,v 1.34 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: buf.h,v 1.25 1997/04/09 21:12:17 mycroft Exp $	*/
 
 /*
@@ -68,6 +68,7 @@ extern struct bio_ops {
 	void	(*io_deallocate) __P((struct buf *));
 	void	(*io_movedeps) __P((struct buf *, struct buf *));
 	int	(*io_countdeps) __P((struct buf *, int, int));
+	void	(*io_pageiodone) __P((struct buf *));
 } bioops;
 
 /*
@@ -96,10 +97,7 @@ struct buf {
 					/* Function to call upon completion. */
 	void	(*b_iodone) __P((struct buf *));
 	struct	vnode *b_vp;		/* Device vnode. */
-	int	b_dirtyoff;		/* Offset in buffer of dirty region. */
-	int	b_dirtyend;		/* Offset of end of dirty region. */
-	int	b_validoff;		/* Offset in buffer of valid region. */
-	int	b_validend;		/* Offset of end of valid region. */
+	void	*b_private;
  	struct	workhead b_dep;		/* List of filesystem dependencies. */
 };
 
@@ -120,7 +118,6 @@ struct buf {
  * These flags are kept in b_flags.
  */
 #define	B_AGE		0x00000001	/* Move to age queue when I/O done. */
-#define	B_NEEDCOMMIT	0x00000002	/* Needs committing to stable storage */
 #define	B_ASYNC		0x00000004	/* Start I/O, do not wait. */
 #define	B_BAD		0x00000008	/* Bad block revectoring in progress. */
 #define	B_BUSY		0x00000010	/* I/O in progress. */
@@ -144,7 +141,6 @@ struct buf {
 #define	B_UAREA		0x00400000	/* Buffer describes Uarea I/O. */
 #define	B_WANTED	0x00800000	/* Process wants this buffer. */
 #define	B_WRITE		0x00000000	/* Write buffer (pseudo flag). */
-#define	B_WRITEINPROG	0x01000000	/* Write in progress. */
 #define	B_XXX		0x02000000	/* Debugging flag. */
 #define	B_DEFERRED	0x04000000	/* Skipped over for cleaning */
 #define	B_SCANNED	0x08000000	/* Block already pushed during sync */
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
index f398a301c69..50f59e4a532 100644
--- a/sys/sys/mount.h
+++ b/sys/sys/mount.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: mount.h,v 1.40 2001/11/21 21:13:34 csapuntz Exp $	*/
+/*	$OpenBSD: mount.h,v 1.41 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: mount.h,v 1.48 1996/02/18 11:55:47 fvdl Exp $	*/
 
 /*
@@ -336,6 +336,8 @@ struct mount {
 	struct lock     mnt_lock;               /* mount structure lock */
 	int		mnt_flag;		/* flags */
 	int		mnt_maxsymlinklen;	/* max size of short symlink */
+	int		mnt_fs_bshift;		/* offset shift for lblkno */
+	int		mnt_dev_bshift;		/* shift for device sectors */
 	struct statfs	mnt_stat;		/* cache of filesystem stats */
 	qaddr_t		mnt_data;		/* private data */
 };
diff --git a/sys/sys/param.h b/sys/sys/param.h
index a950b196cc3..59fe3a01548 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: param.h,v 1.41 2001/09/11 13:11:18 deraadt Exp $	*/
+/*	$OpenBSD: param.h,v 1.42 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: param.h,v 1.23 1996/03/17 01:02:29 thorpej Exp $	*/
 
 /*-
@@ -227,3 +227,16 @@
 #define RFCNAMEG	(1<<10) /* UNIMPL zero plan9 `name space' */
 #define RFCENVG		(1<<11) /* UNIMPL zero plan9 `env space' */
 #define RFCFDG		(1<<12)	/* zero fd table */
+
+#ifdef _KERNEL
+/*
+ * Defaults for Unified Buffer Cache parameters.
+ */
+
+#ifndef UBC_WINSIZE
+#define UBC_WINSIZE 8192
+#endif
+#ifndef UBC_NWINS
+#define UBC_NWINS 1024
+#endif
+#endif /* _KERNEL */
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index 64a90990d0e..9eaf484201f 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vnode.h,v 1.41 2001/11/15 06:22:30 art Exp $	*/
+/*	$OpenBSD: vnode.h,v 1.42 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: vnode.h,v 1.38 1996/02/29 20:59:05 cgd Exp $	*/
 
 /*
@@ -90,8 +90,10 @@ struct vnode {
 	struct uvm_vnode v_uvm;			/* uvm data */
 	int	(**v_op) __P((void *));		/* vnode operations vector */
 	enum	vtype v_type;			/* vnode type */
-	u_int	v_flag;				/* vnode flags (see below) */
-	u_int   v_usecount;			/* reference count of users */
+#define v_flag v_uvm.u_flags
+#define v_usecount v_uvm.u_obj.uo_refs
+#define v_interlock v_uvm.u_obj.vmobjlock
+#define v_numoutput v_uvm.u_nio
 	/* reference count of writers */
 	u_int   v_writecount;			
 	/* Flags that can be read/written in interrupts */
@@ -103,7 +105,6 @@ struct vnode {
 	LIST_ENTRY(vnode) v_mntvnodes;		/* vnodes for mount point */
 	struct	buflists v_cleanblkhd;		/* clean blocklist head */
 	struct	buflists v_dirtyblkhd;		/* dirty blocklist head */
-	u_int   v_numoutput;			/* num of writes in progress */
 	LIST_ENTRY(vnode) v_synclist;		/* vnode with dirty buffers */
 	union {
 		struct mount	*vu_mountedhere;/* ptr to mounted vfs (VDIR) */
@@ -112,8 +113,8 @@ struct vnode {
 		struct fifoinfo	*vu_fifoinfo;	/* fifo (VFIFO) */
 	} v_un;
 
-	struct  simplelock v_interlock;		/* lock on usecount and flag */
 	struct  lock *v_vnlock;			/* used for non-locking fs's */
+	struct	lock v_glock;			/* getpage lock */
 	enum	vtagtype v_tag;			/* type of underlying data */
 	void 	*v_data;			/* private data for fs */
 	struct {
@@ -137,6 +138,9 @@ struct vnode {
 #define	VXWANT		0x0200	/* process is waiting for vnode */
 #define	VALIASED	0x0800	/* vnode has an alias */
 #define VLOCKSWORK	0x4000	/* FS supports locking discipline */
+#define	VDIRTY		0x8000	/* vnode possibly has dirty pages */
+
+#define VSIZENOTSET	((voff_t)-1)
 
 /*
  * (v_bioflag) Flags that may be manipulated by interrupt handlers
@@ -446,6 +450,12 @@ int	vop_generic_unlock __P((void *));
 int	vop_generic_revoke __P((void *));
 int	vop_generic_kqfilter __P((void *));
 
+/* XXXUBC - doesn't really belong here. */
+int	genfs_getpages __P((void *));
+int	genfs_putpages __P((void *));
+int	genfs_size __P((void *));
+
+
 int	vn_stat __P((struct vnode *vp, struct stat *sb, struct proc *p));
 int	vn_statfile __P((struct file *fp, struct stat *sb, struct proc *p));
 int	vn_writechk __P((struct vnode *vp));
diff --git a/sys/sys/vnode_if.h b/sys/sys/vnode_if.h
index 00cdadabe25..57aff6f4c97 100644
--- a/sys/sys/vnode_if.h
+++ b/sys/sys/vnode_if.h
@@ -3,7 +3,7 @@
  * (Modifications made here may easily be lost!)
  *
  * Created from the file:
- *	OpenBSD: vnode_if.src,v 1.11 2001/06/23 02:21:05 csapuntz Exp 
+ *	OpenBSD: vnode_if.src,v 1.13 2001/07/26 20:24:47 millert Exp 
  * by the script:
  *	OpenBSD: vnode_if.sh,v 1.8 2001/02/26 17:34:18 art Exp 
  */
@@ -397,6 +397,52 @@ struct vop_whiteout_args {
 extern struct vnodeop_desc vop_whiteout_desc;
 int VOP_WHITEOUT __P((struct vnode *, struct componentname *, int));
 
+struct vop_ballocn_args {
+	struct vnodeop_desc *a_desc;
+	struct vnode *a_vp;
+	off_t a_offset;
+	off_t a_length;
+	struct ucred *a_cred;
+	int a_flags;
+};
+extern struct vnodeop_desc vop_ballocn_desc;
+int VOP_BALLOCN __P((struct vnode *, off_t, off_t, struct ucred *, int));
+
+struct vop_getpages_args {
+	struct vnodeop_desc *a_desc;
+	struct vnode *a_vp;
+	voff_t a_offset;
+	vm_page_t *a_m;
+	int *a_count;
+	int a_centeridx;
+	vm_prot_t a_access_type;
+	int a_advice;
+	int a_flags;
+};
+extern struct vnodeop_desc vop_getpages_desc;
+int VOP_GETPAGES __P((struct vnode *, voff_t, vm_page_t *, int *, int, 
+    vm_prot_t, int, int));
+
+struct vop_putpages_args {
+	struct vnodeop_desc *a_desc;
+	struct vnode *a_vp;
+	vm_page_t *a_m;
+	int a_count;
+	int a_flags;
+	int *a_rtvals;
+};
+extern struct vnodeop_desc vop_putpages_desc;
+int VOP_PUTPAGES __P((struct vnode *, vm_page_t *, int, int, int *));
+
+struct vop_size_args {
+	struct vnodeop_desc *a_desc;
+	struct vnode *a_vp;
+	off_t a_size;
+	off_t *a_eobp;
+};
+extern struct vnodeop_desc vop_size_desc;
+int VOP_SIZE __P((struct vnode *, off_t, off_t *));
+
 /* Special cases: */
 #include <sys/buf.h>
 
diff --git a/sys/ufs/ext2fs/ext2fs_balloc.c b/sys/ufs/ext2fs/ext2fs_balloc.c
index 849a8864b2a..78fb0a8371c 100644
--- a/sys/ufs/ext2fs/ext2fs_balloc.c
+++ b/sys/ufs/ext2fs/ext2fs_balloc.c
@@ -1,5 +1,4 @@
-/*	$OpenBSD: ext2fs_balloc.c,v 1.7 2001/11/06 19:53:21 miod Exp $	*/
-/*	$NetBSD: ext2fs_balloc.c,v 1.10 2001/07/04 21:16:01 chs Exp $	*/
+/*	$NetBSD: ext2fs_balloc.c,v 1.8 2000/12/10 06:38:31 chs Exp $	*/
 
 /*
  * Copyright (c) 1997 Manuel Bouyer.
@@ -44,8 +43,9 @@
 #include <sys/proc.h>
 #include <sys/file.h>
 #include <sys/vnode.h>
+#include <sys/mount.h>
 
-#include <uvm/uvm_extern.h>
+#include <uvm/uvm.h>
 
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
@@ -73,8 +73,13 @@ ext2fs_buf_alloc(struct inode *ip, daddr_t bn, int size, struct ucred *cred,
 	u_int deallocated;
 	ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
 	int unwindidx = -1;
+	UVMHIST_FUNC("ext2fs_buf_alloc"); UVMHIST_CALLED(ubchist);
 
-	*bpp = NULL;
+	UVMHIST_LOG(ubchist, "bn 0x%x", bn,0,0,0);
+
+	if (bpp != NULL) {
+		*bpp = NULL;
+	}
 	if (bn < 0)
 		return (EFBIG);
 	fs = ip->i_e2fs;
@@ -86,20 +91,29 @@ ext2fs_buf_alloc(struct inode *ip, daddr_t bn, int size, struct ucred *cred,
 	if (bn < NDADDR) {
 		nb = fs2h32(ip->i_e2fs_blocks[bn]);
 		if (nb != 0) {
-			error = bread(vp, bn, fs->e2fs_bsize, NOCRED, &bp);
-			if (error) {
-				brelse(bp);
-				return (error);
+
+			/*
+			 * the block is already allocated, just read it.
+			 */
+
+			if (bpp != NULL) {
+				error = bread(vp, bn, fs->e2fs_bsize, NOCRED,
+					      &bp);
+				if (error) {
+					brelse(bp);
+					return (error);
+				}
+				*bpp = bp;
 			}
-			*bpp = bp;
 			return (0);
 		}
 
 		/*
 		 * allocate a new direct block.
 		 */
+
 		error = ext2fs_alloc(ip, bn,
-		    ext2fs_blkpref(ip, bn, (int)bn, &ip->i_e2fs_blocks[0]),
+		    ext2fs_blkpref(ip, bn, bn, &ip->i_e2fs_blocks[0]),
 		    cred, &newb);
 		if (error)
 			return (error);
@@ -107,11 +121,13 @@ ext2fs_buf_alloc(struct inode *ip, daddr_t bn, int size, struct ucred *cred,
 		ip->i_e2fs_last_blk = newb;
 		ip->i_e2fs_blocks[bn] = h2fs32(newb);
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
-		bp = getblk(vp, bn, fs->e2fs_bsize, 0, 0);
-		bp->b_blkno = fsbtodb(fs, newb);
-		if (flags & B_CLRBUF)
-			clrbuf(bp);
-		*bpp = bp;
+		if (bpp != NULL) {
+			bp = getblk(vp, bn, fs->e2fs_bsize, 0, 0);
+			bp->b_blkno = fsbtodb(fs, newb);
+			if (flags & B_CLRBUF)
+				clrbuf(bp);
+			*bpp = bp;
+		}
 		return (0);
 	}
 	/*
@@ -229,26 +245,30 @@ ext2fs_buf_alloc(struct inode *ip, daddr_t bn, int size, struct ucred *cred,
 		} else {
 			bdwrite(bp);
 		}
-		nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0);
-		nbp->b_blkno = fsbtodb(fs, nb);
-		if (flags & B_CLRBUF)
-			clrbuf(nbp);
-		*bpp = nbp;
+		if (bpp != NULL) {
+			nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0);
+			nbp->b_blkno = fsbtodb(fs, nb);
+			if (flags & B_CLRBUF)
+				clrbuf(nbp);
+			*bpp = nbp;
+		}
 		return (0);
 	}
 	brelse(bp);
-	if (flags & B_CLRBUF) {
-		error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED, &nbp);
-		if (error) {
-			brelse(nbp);
-			goto fail;
+	if (bpp != NULL) {
+		if (flags & B_CLRBUF) {
+			error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED,
+				      &nbp);
+			if (error) {
+				brelse(nbp);
+				goto fail;
+			}
+		} else {
+			nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0);
+			nbp->b_blkno = fsbtodb(fs, nb);
 		}
-	} else {
-		nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0);
-		nbp->b_blkno = fsbtodb(fs, nb);
+		*bpp = nbp;
 	}
-
-	*bpp = nbp;
 	return (0);
 fail:
 	/*
@@ -292,3 +312,153 @@ fail:
 	}
 	return error;
 }
+
+int
+ext2fs_ballocn(v)
+	void *v;
+{
+	struct vop_ballocn_args /* {
+		struct vnode *a_vp;
+		off_t a_offset;
+		off_t a_length;
+		struct ucred *a_cred;
+		int a_flags;
+	} */ *ap = v;
+	off_t off, len;
+	struct vnode *vp = ap->a_vp;
+	struct inode *ip = VTOI(vp);
+	struct m_ext2fs *fs = ip->i_e2fs;
+	int error, delta, bshift, bsize;
+	UVMHIST_FUNC("ext2fs_ballocn"); UVMHIST_CALLED(ubchist);
+
+	bshift = fs->e2fs_bshift;
+	bsize = 1 << bshift;
+
+	off = ap->a_offset;
+	len = ap->a_length;
+
+	delta = off & (bsize - 1);
+	off -= delta;
+	len += delta;
+
+	while (len > 0) {
+		bsize = min(bsize, len);
+		UVMHIST_LOG(ubchist, "off 0x%x len 0x%x bsize 0x%x",
+			    off, len, bsize, 0);
+
+		error = ext2fs_buf_alloc(ip, lblkno(fs, off), bsize, ap->a_cred,
+		    NULL, ap->a_flags);
+		if (error) {
+			UVMHIST_LOG(ubchist, "error %d", error, 0,0,0);
+			return error;
+		}
+
+		/*
+		 * increase file size now, VOP_BALLOC() requires that
+		 * EOF be up-to-date before each call.
+		 */
+
+		if (ip->i_e2fs_size < off + bsize) {
+			UVMHIST_LOG(ubchist, "old 0x%x new 0x%x",
+				    ip->i_e2fs_size, off + bsize,0,0);
+			ip->i_e2fs_size = off + bsize;
+			if (vp->v_uvm.u_size < ip->i_e2fs_size) {
+				uvm_vnp_setsize(vp, ip->i_e2fs_size);
+			}
+		}
+
+		off += bsize;
+		len -= bsize;
+	}
+	return 0;
+}
+
+/*
+ * allocate a range of blocks in a file.
+ * after this function returns, any page entirely contained within the range
+ * will map to invalid data and thus must be overwritten before it is made
+ * accessible to others.
+ */
+
+int
+ext2fs_balloc_range(vp, off, len, cred, flags)
+	struct vnode *vp;
+	off_t off, len;
+	struct ucred *cred;
+	int flags;
+{
+	off_t oldeof, eof, pagestart;
+	struct uvm_object *uobj;
+	int i, delta, error, npages;
+	int bshift = vp->v_mount->mnt_fs_bshift;
+	int bsize = 1 << bshift;
+	int ppb = max(bsize >> PAGE_SHIFT, 1);
+	struct vm_page *pgs[ppb];
+	UVMHIST_FUNC("ext2fs_balloc_range"); UVMHIST_CALLED(ubchist);
+	UVMHIST_LOG(ubchist, "vp %p off 0x%x len 0x%x u_size 0x%x",
+		    vp, off, len, vp->v_uvm.u_size);
+
+	error = 0;
+	uobj = &vp->v_uvm.u_obj;
+	oldeof = vp->v_uvm.u_size;
+	eof = max(oldeof, off + len);
+	UVMHIST_LOG(ubchist, "new eof 0x%x", eof,0,0,0);
+	pgs[0] = NULL;
+
+	/*
+	 * cache the new range of the file.  this will create zeroed pages
+	 * where the new block will be and keep them locked until the
+	 * new block is allocated, so there will be no window where
+	 * the old contents of the new block is visible to racing threads.
+	 */
+
+	pagestart = trunc_page(off) & ~(bsize - 1);
+	npages = min(ppb, (round_page(eof) - pagestart) >> PAGE_SHIFT);
+	memset(pgs, 0, npages);
+	simple_lock(&uobj->vmobjlock);
+	error = VOP_GETPAGES(vp, pagestart, pgs, &npages, 0,
+	    VM_PROT_READ, 0, PGO_SYNCIO | PGO_PASTEOF);
+	if (error) {
+		UVMHIST_LOG(ubchist, "getpages %d", error,0,0,0);
+		goto errout;
+	}
+	for (i = 0; i < npages; i++) {
+		UVMHIST_LOG(ubchist, "got pgs[%d] %p", i, pgs[i],0,0);
+		KASSERT((pgs[i]->flags & PG_RELEASED) == 0);
+		pgs[i]->flags &= ~PG_CLEAN;
+		uvm_pageactivate(pgs[i]);
+	}
+
+	/*
+	 * adjust off to be block-aligned.
+	 */
+
+	delta = off & (bsize - 1);
+	off -= delta;
+	len += delta;
+
+	/*
+	 * now allocate the range.
+	 */
+
+	lockmgr(&vp->v_glock, LK_EXCLUSIVE, NULL, curproc);
+	error = VOP_BALLOCN(vp, off, len, cred, flags);
+	UVMHIST_LOG(ubchist, "ballocn %d", error,0,0,0);
+	lockmgr(&vp->v_glock, LK_RELEASE, NULL, curproc);
+
+	/*
+	 * unbusy any pages we are holding.
+	 */
+
+errout:
+	simple_lock(&uobj->vmobjlock);
+	if (error) {
+		(void) (uobj->pgops->pgo_flush)(uobj, oldeof, pagestart + ppb,
+		    PGO_FREE);
+	}
+	if (pgs[0] != NULL) {
+		uvm_page_unbusy(pgs, npages);
+	}
+	simple_unlock(&uobj->vmobjlock);
+	return (error);
+}
diff --git a/sys/ufs/ext2fs/ext2fs_extern.h b/sys/ufs/ext2fs/ext2fs_extern.h
index b7a3f96df38..af23fb6ef2d 100644
--- a/sys/ufs/ext2fs/ext2fs_extern.h
+++ b/sys/ufs/ext2fs/ext2fs_extern.h
@@ -1,5 +1,5 @@
-/*	$OpenBSD: ext2fs_extern.h,v 1.10 2001/09/18 00:39:15 art Exp $	*/
-/*	$NetBSD: ext2fs_extern.h,v 1.1 1997/06/11 09:33:55 bouyer Exp $	*/
+/*	$OpenBSD: ext2fs_extern.h,v 1.11 2001/11/27 05:27:12 art Exp $	*/
+/*	$NetBSD: ext2fs_extern.h,v 1.9 2000/11/27 08:39:53 chs Exp $	*/
 
 /*-
  * Copyright (c) 1997 Manuel Bouyer.
@@ -74,6 +74,9 @@ int ext2fs_inode_free(struct inode *pip, ino_t ino, int mode);
 /* ext2fs_balloc.c */
 int ext2fs_buf_alloc(struct inode *, daddr_t, int, struct ucred *,
 			struct buf **, int);
+int ext2fs_ballocn __P((void *));
+int ext2fs_balloc_range __P((struct vnode *, off_t, off_t, struct ucred *,
+			     int));
 
 /* ext2fs_bmap.c */
 int ext2fs_bmap __P((void *));
diff --git a/sys/ufs/ext2fs/ext2fs_inode.c b/sys/ufs/ext2fs/ext2fs_inode.c
index 4af28d9bf0e..f77c99c47b5 100644
--- a/sys/ufs/ext2fs/ext2fs_inode.c
+++ b/sys/ufs/ext2fs/ext2fs_inode.c
@@ -1,5 +1,4 @@
-/*	$OpenBSD: ext2fs_inode.c,v 1.17 2001/11/06 19:53:21 miod Exp $	*/
-/*	$NetBSD: ext2fs_inode.c,v 1.24 2001/06/19 12:59:18 wiz Exp $	*/
+/*	$NetBSD: ext2fs_inode.c,v 1.23 2001/02/18 20:17:04 chs Exp $	*/
 
 /*
  * Copyright (c) 1997 Manuel Bouyer.
@@ -59,8 +58,10 @@
 #include <ufs/ext2fs/ext2fs.h>
 #include <ufs/ext2fs/ext2fs_extern.h>
 
+extern int prtactive;
+
 static int ext2fs_indirtrunc __P((struct inode *, ufs_daddr_t, ufs_daddr_t,
-				ufs_daddr_t, int, long *));
+				  ufs_daddr_t, int, long *));
 
 /*
  * Last reference to an inode.  If necessary, write or delete it.
@@ -78,7 +79,6 @@ ext2fs_inactive(v)
 	struct proc *p = ap->a_p;
 	struct timespec ts;
 	int error = 0;
-	extern int prtactive;
 	
 	if (prtactive && vp->v_usecount != 0)
 		vprint("ext2fs_inactive: pushing active", vp);
@@ -171,14 +171,13 @@ ext2fs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred)
 {
 	struct vnode *ovp = ITOV(oip);
 	ufs_daddr_t lastblock;
-	ufs_daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR];
+	ufs_daddr_t bn, lastiblock[NIADDR], indir_lbn[NIADDR];
 	ufs_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
 	struct m_ext2fs *fs;
-	struct buf *bp;
 	int offset, size, level;
 	long count, nblocks, vflags, blocksreleased = 0;
 	int i;
-	int aflags, error, allerror;
+	int error, allerror;
 	off_t osize;
 
 	if (length < 0)
@@ -219,22 +218,8 @@ ext2fs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred)
 		if (length > fs->fs_maxfilesize)
 			return (EFBIG);
 #endif
-		offset = blkoff(fs, length - 1);
-		lbn = lblkno(fs, length - 1);
-		aflags = B_CLRBUF;
-		if (flags & IO_SYNC)
-			aflags |= B_SYNC;
-		error = ext2fs_buf_alloc(oip, lbn, offset + 1, cred, &bp,
-		    aflags);
-		if (error)
-			return (error);
-		oip->i_e2fs_size = length;
-		uvm_vnp_setsize(ovp, length);
-		uvm_vnp_uncache(ovp);
-		if (aflags & B_SYNC)
-			bwrite(bp);
-		else
-			bawrite(bp);
+		ext2fs_balloc_range(ovp, length - 1, 1, cred,
+		    flags & IO_SYNC ? B_SYNC : 0);
 		oip->i_flag |= IN_CHANGE | IN_UPDATE;
 		return (ext2fs_update(oip, NULL, NULL, 1));
 	}
@@ -246,28 +231,15 @@ ext2fs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred)
 	 * of subsequent file growth.
 	 */
 	offset = blkoff(fs, length);
-	if (offset == 0) {
-		oip->i_e2fs_size = length;
-	} else {
-		lbn = lblkno(fs, length);
-		aflags = B_CLRBUF;
-		if (flags & IO_SYNC)
-			aflags |= B_SYNC;
-		error = ext2fs_buf_alloc(oip, lbn, offset, cred, &bp, 
-		    aflags);
-		if (error)
-			return (error);
-		oip->i_e2fs_size = length;
+	if (offset != 0) {
 		size = fs->e2fs_bsize;
-		uvm_vnp_setsize(ovp, length);
-		uvm_vnp_uncache(ovp);
-		bzero((char *)bp->b_data + offset, (u_int)(size - offset));
-		allocbuf(bp, size);
-		if (aflags & B_SYNC)
-			bwrite(bp);
-		else
-			bawrite(bp);
+
+		/* XXXUBC we should handle more than just VREG */
+		uvm_vnp_zerorange(ovp, length, size - offset);
 	}
+	oip->i_e2fs_size = length;
+	uvm_vnp_setsize(ovp, length);
+
 	/*
 	 * Calculate index into inode's block list of
 	 * last direct and indirect blocks (if any)
diff --git a/sys/ufs/ext2fs/ext2fs_readwrite.c b/sys/ufs/ext2fs/ext2fs_readwrite.c
index 9ae4322756f..94424055733 100644
--- a/sys/ufs/ext2fs/ext2fs_readwrite.c
+++ b/sys/ufs/ext2fs/ext2fs_readwrite.c
@@ -79,6 +79,8 @@ ext2fs_read(v)
 	struct uio *uio;
 	struct m_ext2fs *fs;
 	struct buf *bp;
+	void *win;
+	vsize_t bytelen;
 	ufs_daddr_t lbn, nextlbn;
 	off_t bytesinfile;
 	long size, xfersize, blkoffset;
@@ -107,6 +109,27 @@ ext2fs_read(v)
 	if (uio->uio_resid == 0)
 		return (0);
 
+	if (vp->v_type == VREG) {
+		error = 0;
+		while (uio->uio_resid > 0) {
+
+			bytelen = MIN(ip->i_e2fs_size - uio->uio_offset,
+			    uio->uio_resid);
+
+			if (bytelen == 0) {
+				break;
+			}
+			win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset,
+					&bytelen, UBC_READ);
+			error = uiomove(win, bytelen, uio);
+			ubc_release(win, 0);
+			if (error) {
+				break;
+			}
+		}
+		goto out;
+	}
+
 	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
 		if ((bytesinfile = ip->i_e2fs_size - uio->uio_offset) <= 0)
 			break;
@@ -156,8 +179,11 @@ ext2fs_read(v)
 	if (bp != NULL)
 		brelse(bp);
 
+out:
 	if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) {
 		ip->i_flag |= IN_ACCESS;
+		if ((ap->a_ioflag & IO_SYNC) == IO_SYNC)
+			error = ext2fs_update(ip, NULL, NULL, 1);
 	}
 	return (error);
 }
@@ -183,12 +209,17 @@ ext2fs_write(v)
 	struct proc *p;
 	ufs_daddr_t lbn;
 	off_t osize;
-	int blkoffset, error, flags, ioflag, resid, size, xfersize;
+	int blkoffset, error, flags, ioflag, resid, xfersize;
+	vsize_t bytelen;
+	void *win;
+	off_t oldoff;
+	boolean_t rv;
 
 	ioflag = ap->a_ioflag;
 	uio = ap->a_uio;
 	vp = ap->a_vp;
 	ip = VTOI(vp);
+	error = 0;
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_WRITE)
@@ -232,35 +263,65 @@ ext2fs_write(v)
 
 	resid = uio->uio_resid;
 	osize = ip->i_e2fs_size;
-	flags = ioflag & IO_SYNC ? B_SYNC : 0;
 
+	if (vp->v_type == VREG) {
+		while (uio->uio_resid > 0) {
+			oldoff = uio->uio_offset;
+			blkoffset = blkoff(fs, uio->uio_offset);
+			bytelen = MIN(fs->e2fs_bsize - blkoffset,
+			    uio->uio_resid);
+
+			/*
+			 * XXXUBC if file is mapped and this is the last block,
+			 * process one page at a time.
+			 */
+
+			error = ext2fs_balloc_range(vp, uio->uio_offset,
+			    bytelen, ap->a_cred, 0);
+			if (error) {
+				break;
+			}
+			win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset,
+			    &bytelen, UBC_WRITE);
+			error = uiomove(win, bytelen, uio);
+			ubc_release(win, 0);
+			if (error) {
+				break;
+			}
+
+			/*
+			 * flush what we just wrote if necessary.
+			 * XXXUBC simplistic async flushing.
+			 */
+
+			if (oldoff >> 16 != uio->uio_offset >> 16) {
+				simple_lock(&vp->v_uvm.u_obj.vmobjlock);
+				rv = vp->v_uvm.u_obj.pgops->pgo_flush(
+				    &vp->v_uvm.u_obj, (oldoff >> 16) << 16,
+				    (uio->uio_offset >> 16) << 16, PGO_CLEANIT);
+				simple_unlock(&vp->v_uvm.u_obj.vmobjlock);
+			}
+		}
+		goto out;
+	}
+
+	flags = ioflag & IO_SYNC ? B_SYNC : 0;
 	for (error = 0; uio->uio_resid > 0;) {
 		lbn = lblkno(fs, uio->uio_offset);
 		blkoffset = blkoff(fs, uio->uio_offset);
-		xfersize = fs->e2fs_bsize - blkoffset;
-		if (uio->uio_resid < xfersize)
-			xfersize = uio->uio_resid;
-		if (fs->e2fs_bsize > xfersize)
+		xfersize = MIN(fs->e2fs_bsize - blkoffset, uio->uio_resid);
+		if (xfersize < fs->e2fs_bsize)
 			flags |= B_CLRBUF;
 		else
 			flags &= ~B_CLRBUF;
-
 		error = ext2fs_buf_alloc(ip,
-			lbn, blkoffset + xfersize, ap->a_cred, &bp, flags);
+		    lbn, blkoffset + xfersize, ap->a_cred, &bp, flags);
 		if (error)
 			break;
-		if (uio->uio_offset + xfersize > ip->i_e2fs_size) {
+		if (ip->i_e2fs_size < uio->uio_offset + xfersize) {
 			ip->i_e2fs_size = uio->uio_offset + xfersize;
-			uvm_vnp_setsize(vp, ip->i_e2fs_size);
 		}
-		uvm_vnp_uncache(vp);
-
-		size = fs->e2fs_bsize - bp->b_resid;
-		if (size < xfersize)
-			xfersize = size;
-
-		error =
-			uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
+		error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
 		if (ioflag & IO_SYNC)
 			(void)bwrite(bp);
 		else if (xfersize + blkoffset == fs->e2fs_bsize) {
@@ -272,13 +333,14 @@ ext2fs_write(v)
 			bdwrite(bp);
 		if (error || xfersize == 0)
 			break;
-		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 	}
 	/*
 	 * If we successfully wrote any data, and we are not the superuser
 	 * we clear the setuid and setgid bits as a precaution against
 	 * tampering.
 	 */
+out:
+	ip->i_flag |= IN_CHANGE | IN_UPDATE;
 	if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
 		ip->i_e2fs_mode &= ~(ISUID | ISGID);
 	if (error) {
@@ -288,8 +350,7 @@ ext2fs_write(v)
 			uio->uio_offset -= resid - uio->uio_resid;
 			uio->uio_resid = resid;
 		}
-	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
+	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC) == IO_SYNC)
 		error = ext2fs_update(ip, NULL, NULL, 1);
-	}
 	return (error);
 }
diff --git a/sys/ufs/ext2fs/ext2fs_subr.c b/sys/ufs/ext2fs/ext2fs_subr.c
index 82165b8f242..3263f7e5391 100644
--- a/sys/ufs/ext2fs/ext2fs_subr.c
+++ b/sys/ufs/ext2fs/ext2fs_subr.c
@@ -1,5 +1,4 @@
-/*	$OpenBSD: ext2fs_subr.c,v 1.6 2001/09/18 01:39:13 art Exp $	*/
-/*	$NetBSD: ext2fs_subr.c,v 1.1 1997/06/11 09:34:03 bouyer Exp $	*/
+/*	$NetBSD: ext2fs_subr.c,v 1.4 2000/03/30 12:41:11 augustss Exp $	*/
 
 /*
  * Copyright (c) 1997 Manuel Bouyer.
@@ -96,7 +95,7 @@ ext2fs_checkoverlap(bp, ip)
 		if (ep == bp || (ep->b_flags & B_INVAL) ||
 			ep->b_vp == NULLVP)
 			continue;
-		if (VOP_BMAP(ep->b_vp, (daddr_t)0, &vp, (daddr_t)0, NULL))
+		if (VOP_BMAP(ep->b_vp, (ufs_daddr_t)0, &vp, (ufs_daddr_t)0, NULL))
 			continue;
 		if (vp != ip->i_devvp)
 			continue;
diff --git a/sys/ufs/ext2fs/ext2fs_vfsops.c b/sys/ufs/ext2fs/ext2fs_vfsops.c
index 6991cf9d650..e438268acbc 100644
--- a/sys/ufs/ext2fs/ext2fs_vfsops.c
+++ b/sys/ufs/ext2fs/ext2fs_vfsops.c
@@ -1,5 +1,5 @@
-/*	$OpenBSD: ext2fs_vfsops.c,v 1.16 2001/11/21 22:21:48 csapuntz Exp $	*/
-/*	$NetBSD: ext2fs_vfsops.c,v 1.1 1997/06/11 09:34:07 bouyer Exp $	*/
+/*	$OpenBSD: ext2fs_vfsops.c,v 1.17 2001/11/27 05:27:12 art Exp $	*/
+/*	$NetBSD: ext2fs_vfsops.c,v 1.40 2000/11/27 08:39:53 chs Exp $	*/
 
 /*
  * Copyright (c) 1997 Manuel Bouyer.
@@ -402,9 +402,11 @@ ext2fs_reload(mountp, cred, p)
 	 * Step 1: invalidate all cached meta-data.
 	 */
 	devvp = VFSTOUFS(mountp)->um_devvp;
-	if (vinvalbuf(devvp, 0, cred, p, 0, 0))
+	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
+	error = vinvalbuf(devvp, 0, cred, p, 0, 0);
+	VOP_UNLOCK(devvp, 0, p);
+	if (error)
 		panic("ext2fs_reload: dirty1");
-
 	/*
 	 * Step 2: re-read superblock from disk.
 	 */
@@ -583,14 +585,18 @@ ext2fs_mountfs(devvp, mp, p)
 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
 	mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN;
 	mp->mnt_flag |= MNT_LOCAL;
+	mp->mnt_dev_bshift = DEV_BSHIFT;	/* XXX */
+	mp->mnt_fs_bshift = m_fs->e2fs_bshift;
 	ump->um_mountp = mp;
 	ump->um_dev = dev;
 	ump->um_devvp = devvp;
 	ump->um_nindir = NINDIR(m_fs);
+	ump->um_lognindir = ffs(NINDIR(m_fs)) - 1;
 	ump->um_bptrtodb = m_fs->e2fs_fsbtodb;
 	ump->um_seqinc = 1; /* no frags */
 	devvp->v_specmountpoint = mp;
 	return (0);
+
 out:
 	if (bp)
 		brelse(bp);
@@ -924,6 +930,7 @@ ext2fs_vget(mp, ino, vpp)
 			ip->i_flag |= IN_MODIFIED;
 	}
 
+	vp->v_uvm.u_size = ip->i_e2fs_size;
 	*vpp = vp;
 	return (0);
 }
diff --git a/sys/ufs/ext2fs/ext2fs_vnops.c b/sys/ufs/ext2fs/ext2fs_vnops.c
index 0faba75ffd2..fffdd494d5a 100644
--- a/sys/ufs/ext2fs/ext2fs_vnops.c
+++ b/sys/ufs/ext2fs/ext2fs_vnops.c
@@ -1,5 +1,5 @@
-/*	$OpenBSD: ext2fs_vnops.c,v 1.17 2001/11/06 19:53:21 miod Exp $	*/
-/*	$NetBSD: ext2fs_vnops.c,v 1.1 1997/06/11 09:34:09 bouyer Exp $	*/
+/*	$OpenBSD: ext2fs_vnops.c,v 1.18 2001/11/27 05:27:12 art Exp $	*/
+/*	$NetBSD: ext2fs_vnops.c,v 1.30 2000/11/27 08:39:53 chs Exp $	*/
 
 /*
  * Copyright (c) 1997 Manuel Bouyer.
@@ -402,8 +402,6 @@ ext2fs_chmod(vp, mode, cred, p)
 	ip->i_e2fs_mode &= ~ALLPERMS;
 	ip->i_e2fs_mode |= (mode & ALLPERMS);
 	ip->i_flag |= IN_CHANGE;
-	if ((vp->v_flag & VTEXT) && (ip->i_e2fs_mode & S_ISTXT) == 0)
-		(void) uvm_vnp_uncache(vp);
 	return (0);
 }
 
@@ -1469,7 +1467,11 @@ struct vnodeopv_entry_desc ext2fs_vnodeop_entries[] = {
 	{ &vop_pathconf_desc, ufs_pathconf },	/* pathconf */
 	{ &vop_advlock_desc, ext2fs_advlock },	/* advlock */
 	{ &vop_bwrite_desc, vop_generic_bwrite },		/* bwrite */
-	{ (struct vnodeop_desc*)NULL, (int(*) __P((void*)))NULL }
+	{ &vop_ballocn_desc, ext2fs_ballocn },
+	{ &vop_getpages_desc, genfs_getpages },
+	{ &vop_putpages_desc, genfs_putpages },
+	{ &vop_size_desc, genfs_size },
+	{ NULL, NULL }
 };
 struct vnodeopv_desc ext2fs_vnodeop_opv_desc =
 	{ &ext2fs_vnodeop_p, ext2fs_vnodeop_entries };
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index 8ddf99405fc..a53d87828c3 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ffs_alloc.c,v 1.35 2001/11/21 21:23:56 csapuntz Exp $	*/
+/*	$OpenBSD: ffs_alloc.c,v 1.36 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: ffs_alloc.c,v 1.11 1996/05/11 18:27:09 mycroft Exp $	*/
 
 /*
@@ -169,14 +169,15 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp, blknop)
 	struct buf **bpp;
 	ufs_daddr_t *blknop;
 {
-	register struct fs *fs;
-	struct buf *bp = NULL;
+	struct fs *fs;
+	struct buf *bp;
 	ufs_daddr_t quota_updated = 0;
 	int cg, request, error;
 	daddr_t bprev, bno;
 
 	if (bpp != NULL)
 		*bpp = NULL;
+
 	fs = ip->i_fs;
 #ifdef DIAGNOSTIC
 	if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
@@ -282,7 +283,6 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp, blknop)
 	if (bno <= 0) 
 		goto nospace;
 
-	(void) uvm_vnp_uncache(ITOV(ip));
 	if (!DOINGSOFTDEP(ITOV(ip)))
 		ffs_blkfree(ip, bprev, (long)osize);
 	if (nsize < request)
@@ -362,7 +362,8 @@ ffs_reallocblks(v)
 	struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
 	int i, len, start_lvl, end_lvl, pref, ssize;
 
-	if (doreallocblks == 0)
+	/* XXXUBC - don't reallocblks for now */
+	if (1 || doreallocblks == 0)
 		return (ENOSPC);
 
 	vp = ap->a_vp;
diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c
index 009adc91ff9..5f6ddc3d94e 100644
--- a/sys/ufs/ffs/ffs_balloc.c
+++ b/sys/ufs/ffs/ffs_balloc.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ffs_balloc.c,v 1.18 2001/11/21 21:23:56 csapuntz Exp $	*/
+/*	$OpenBSD: ffs_balloc.c,v 1.19 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: ffs_balloc.c,v 1.3 1996/02/09 22:22:21 christos Exp $	*/
 
 /*
@@ -402,3 +402,61 @@ fail:
 
 	return (error);
 }
+
+int
+ffs_ballocn(v)
+	void *v;
+{
+	struct vop_ballocn_args /* {
+		struct vnode *a_vp;
+		off_t a_offset;
+		off_t a_length;
+		struct ucred *a_cred;
+		int a_flags;
+	} */ *ap = v;
+
+	off_t off, len;
+	struct vnode *vp = ap->a_vp;
+	struct inode *ip = VTOI(vp);
+	struct fs *fs = ip->i_fs;
+	int error, delta, bshift, bsize;
+
+	error = 0;
+	bshift = fs->fs_bshift;
+	bsize = 1 << bshift;
+
+	off = ap->a_offset;
+	len = ap->a_length;
+
+	delta = off & (bsize - 1);
+	off -= delta;
+	len += delta;
+
+	while (len > 0) {
+		bsize = min(bsize, len);
+
+		error = ffs_balloc(ip, off, bsize, ap->a_cred, ap->a_flags,
+				   NULL);
+		if (error) {
+			goto out;
+		}
+
+		/*
+		 * increase file size now, VOP_BALLOC() requires that
+		 * EOF be up-to-date before each call.
+		 */
+
+		if (ip->i_ffs_size < off + bsize) {
+			ip->i_ffs_size = off + bsize;
+			if (vp->v_uvm.u_size < ip->i_ffs_size) {
+				uvm_vnp_setsize(vp, ip->i_ffs_size);
+			}
+		}
+
+		off += bsize;
+		len -= bsize;
+	}
+
+out:
+	return error;
+ }
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h
index eeeba209c69..2875a332a57 100644
--- a/sys/ufs/ffs/ffs_extern.h
+++ b/sys/ufs/ffs/ffs_extern.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ffs_extern.h,v 1.14 2001/11/13 00:10:56 art Exp $	*/
+/*	$OpenBSD: ffs_extern.h,v 1.15 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: ffs_extern.h,v 1.4 1996/02/09 22:22:22 christos Exp $	*/
 
 /*-
@@ -87,6 +87,7 @@ void ffs_clusteracct __P((struct fs *, struct cg *, daddr_t, int));
 
 /* ffs_balloc.c */
 int ffs_balloc(struct inode *, off_t, int, struct ucred *, int, struct buf **);
+int ffs_ballocn(void *);
 
 /* ffs_inode.c */
 int ffs_init __P((struct vfsconf *));
@@ -128,7 +129,7 @@ int ffs_read __P((void *));
 int ffs_write __P((void *));
 int ffs_fsync __P((void *));
 int ffs_reclaim __P((void *));
-
+int ffs_size __P((void *));
 
 /*
  * Soft dependency function prototypes.
diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c
index c81c795b2ac..cddf6a368ca 100644
--- a/sys/ufs/ffs/ffs_inode.c
+++ b/sys/ufs/ffs/ffs_inode.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ffs_inode.c,v 1.25 2001/11/21 21:23:56 csapuntz Exp $	*/
+/*	$OpenBSD: ffs_inode.c,v 1.26 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: ffs_inode.c,v 1.10 1996/05/11 18:27:19 mycroft Exp $	*/
 
 /*
@@ -150,14 +150,14 @@ ffs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred)
 {
 	struct vnode *ovp;
 	daddr_t lastblock;
-	daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR];
+	daddr_t bn, lastiblock[NIADDR], indir_lbn[NIADDR];
 	daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
 	struct fs *fs;
-	struct buf *bp;
+	struct proc *p = curproc;
 	int offset, size, level;
 	long count, nblocks, vflags, blocksreleased = 0;
 	register int i;
-	int aflags, error, allerror;
+	int error, allerror;
 	off_t osize;
 
 	if (length < 0)
@@ -188,10 +188,55 @@ ffs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred)
 	if ((error = getinoquota(oip)) != 0)
 		return (error);
 
-	uvm_vnp_setsize(ovp, length);
+	fs = oip->i_fs;
+	if (length > fs->fs_maxfilesize)
+		return (EFBIG);
+	osize = oip->i_ffs_size; 
 	oip->i_ci.ci_lasta = oip->i_ci.ci_clen 
 	    = oip->i_ci.ci_cstart = oip->i_ci.ci_lastw = 0;
 
+	/*
+	 * Lengthen the size of the file. We must ensure that the
+	 * last byte of the file is allocated. Since the smallest
+	 * value of osize is 0, length will be at least 1.
+	 */
+
+	if (osize < length) {
+		ufs_balloc_range(ovp, length - 1, 1, cred,
+		    flags & IO_SYNC ? B_SYNC : 0);
+		oip->i_flag |= IN_CHANGE | IN_UPDATE;
+		return (UFS_UPDATE(oip, 1));
+	}
+
+	/*
+	 * When truncating a regular file down to a non-block-aligned size,
+	 * we must zero the part of last block which is past the new EOF.
+	 * We must synchronously flush the zeroed pages to disk
+	 * since the new pages will be invalidated as soon as we
+	 * inform the VM system of the new, smaller size.
+	 * We must to this before acquiring the GLOCK, since fetching
+	 * the pages will acquire the GLOCK internally.
+	 * So there is a window where another thread could see a whole
+	 * zeroed page past EOF, but that's life.
+	 */
+
+	offset = blkoff(fs, length);
+	if (ovp->v_type == VREG && length < osize && offset != 0) {
+		struct uvm_object *uobj;
+		voff_t eoz;
+
+		size = blksize(fs, oip, lblkno(fs, length));
+		eoz = min(lblktosize(fs, lblkno(fs, length)) + size, osize);
+		uvm_vnp_zerorange(ovp, length, eoz - length);
+		uobj = &ovp->v_uvm.u_obj;
+		simple_lock(&uobj->vmobjlock);
+		uobj->pgops->pgo_flush(uobj, length, eoz,
+		    PGO_CLEANIT|PGO_DEACTIVATE|PGO_SYNCIO);
+		simple_unlock(&ovp->v_uvm.u_obj.vmobjlock);
+	}
+
+	lockmgr(&ovp->v_glock, LK_EXCLUSIVE, NULL, p);
+
 	if (DOINGSOFTDEP(ovp)) {
 		if (length > 0 || softdep_slowdown(ovp)) {
 			/*
@@ -204,80 +249,29 @@ ffs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred)
 			 * so that it will have no data structures left.
 			 */
 			if ((error = VOP_FSYNC(ovp, cred, MNT_WAIT,
-					       curproc)) != 0)
+			    curproc)) != 0) {
+				lockmgr(&ovp->v_glock, LK_RELEASE, NULL, p);
 				return (error);
+			}
 		} else {
+			uvm_vnp_setsize(ovp, length);
 			(void)ufs_quota_free_blocks(oip, oip->i_ffs_blocks, 
 			    NOCRED);
 			softdep_setup_freeblocks(oip, length);
 			(void) vinvalbuf(ovp, 0, cred, curproc, 0, 0);
+			lockmgr(&ovp->v_glock, LK_RELEASE, NULL, p);
 			oip->i_flag |= IN_CHANGE | IN_UPDATE;
 			return (UFS_UPDATE(oip, 0));
 		}
 	}
 
-	fs = oip->i_fs;
-	osize = oip->i_ffs_size; 
 	/*
-	 * Lengthen the size of the file. We must ensure that the
-	 * last byte of the file is allocated. Since the smallest
-	 * value of osize is 0, length will be at least 1.
+	 * Reduce the size of the file.
 	 */
-	if (osize < length) {
-		if (length > fs->fs_maxfilesize)
-			return (EFBIG);
-		aflags = B_CLRBUF;
-		if (flags & IO_SYNC)
-			aflags |= B_SYNC;
-		error = UFS_BUF_ALLOC(oip, length - 1, 1, 
-				   cred, aflags, &bp);
-		if (error)
-			return (error);
-		oip->i_ffs_size = length;
-		uvm_vnp_setsize(ovp, length);
-		(void) uvm_vnp_uncache(ovp);
-		if (aflags & B_SYNC)
-			bwrite(bp);
-		else
-			bawrite(bp);
-		oip->i_flag |= IN_CHANGE | IN_UPDATE;
-		return (UFS_UPDATE(oip, MNT_WAIT));
-	}
+	oip->i_ffs_size = length;
 	uvm_vnp_setsize(ovp, length);
 
 	/*
-	 * Shorten the size of the file. If the file is not being
-	 * truncated to a block boundary, the contents of the
-	 * partial block following the end of the file must be
-	 * zero'ed in case it ever becomes accessible again because
-	 * of subsequent file growth. Directories however are not
-	 * zero'ed as they should grow back initialized to empty.
-	 */
-	offset = blkoff(fs, length);
-	if (offset == 0) {
-		oip->i_ffs_size = length;
-	} else {
-		lbn = lblkno(fs, length);
-		aflags = B_CLRBUF;
-		if (flags & IO_SYNC)
-			aflags |= B_SYNC;
-		error = UFS_BUF_ALLOC(oip, length - 1, 1,
-				   cred, aflags, &bp);
-		if (error)
-			return (error);
-		oip->i_ffs_size = length;
-		size = blksize(fs, oip, lbn);
-		(void) uvm_vnp_uncache(ovp);
-		if (ovp->v_type != VDIR)
-			bzero((char *)bp->b_data + offset,
-			      (u_int)(size - offset));
-		allocbuf(bp, size);
-		if (aflags & B_SYNC)
-			bwrite(bp);
-		else
-			bawrite(bp);
-	}
-	/*
 	 * Calculate index into inode's block list of
 	 * last direct and indirect blocks (if any)
 	 * which we want to keep.  Lastblock is -1 when
@@ -402,6 +396,7 @@ done:
 	oip->i_ffs_blocks -= blocksreleased;
 	if (oip->i_ffs_blocks < 0)			/* sanity */
 		oip->i_ffs_blocks = 0;
+	lockmgr(&ovp->v_glock, LK_RELEASE, NULL, p);
 	oip->i_flag |= IN_CHANGE;
 	(void)ufs_quota_free_blocks(oip, blocksreleased, NOCRED);
 	return (allerror);
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 1d66094cc06..7a66eed4d8b 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ffs_softdep.c,v 1.25 2001/11/13 14:19:24 art Exp $	*/
+/*	$OpenBSD: ffs_softdep.c,v 1.26 2001/11/27 05:27:12 art Exp $	*/
 /*
  * Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved.
  *
@@ -56,6 +56,7 @@
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/proc.h>
+#include <sys/pool.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/vnode.h>
@@ -69,6 +70,10 @@
 #include <ufs/ffs/ffs_extern.h>
 #include <ufs/ufs/ufs_extern.h>
 
+#include <uvm/uvm.h>
+struct pool sdpcpool;
+int softdep_lockedbufs;
+
 #define STATIC
 
 /*
@@ -109,6 +114,13 @@ extern char *memname[];
  */
 
 /*
+ * Definitions for page cache info hashtable.
+ */
+#define PCBPHASHSIZE 1024
+LIST_HEAD(, buf) pcbphashhead[PCBPHASHSIZE];
+#define PCBPHASH(vp, lbn) ((((vaddr_t)(vp) >> 8) ^ (lbn)) & (PCBPHASHSIZE - 1))
+
+/*
  * Internal function prototypes.
  */
 STATIC	void softdep_error __P((char *, int));
@@ -160,6 +172,13 @@ STATIC	void pause_timer __P((void *));
 STATIC	int request_cleanup __P((int, int));
 STATIC	int process_worklist_item __P((struct mount *, int));
 STATIC	void add_to_worklist __P((struct worklist *));
+STATIC struct buf *softdep_setup_pagecache __P((struct inode *, ufs_lbn_t,
+						long));
+STATIC void softdep_collect_pagecache __P((struct inode *));
+STATIC void softdep_free_pagecache __P((struct inode *));
+STATIC struct vnode *softdep_lookupvp(struct fs *, ino_t);
+STATIC struct buf *softdep_lookup_pcbp __P((struct vnode *, ufs_lbn_t));
+void softdep_pageiodone __P((struct buf *));
 
 /*
  * Exported softdep operations.
@@ -176,6 +195,7 @@ struct bio_ops bioops = {
 	softdep_deallocate_dependencies,	/* io_deallocate */
 	softdep_move_dependencies,		/* io_movedeps */
 	softdep_count_dependencies,		/* io_countdeps */
+	softdep_pageiodone,			/* io_pagedone */
 };
 
 /*
@@ -1055,6 +1075,7 @@ top:
 void 
 softdep_initialize()
 {
+	int i;
 
 	LIST_INIT(&mkdirlisthd);
 	LIST_INIT(&softdep_workitem_pending);
@@ -1073,6 +1094,11 @@ softdep_initialize()
 	newblk_hashtbl = hashinit(64, M_NEWBLK, M_WAITOK, &newblk_hash);
 	sema_init(&newblk_in_progress, "newblk", PRIBIO, 0);
 	timeout_set(&proc_waiting_timeout, pause_timer, 0);
+	pool_init(&sdpcpool, sizeof(struct buf), 0, 0, 0, "sdpcpool",
+	    0, pool_page_alloc_nointr, pool_page_free_nointr, M_TEMP);
+	for (i = 0; i < PCBPHASHSIZE; i++) {
+		LIST_INIT(&pcbphashhead[i]);
+	}
 }
 
 /*
@@ -1325,11 +1351,16 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
 	LIST_REMOVE(newblk, nb_hash);
 	FREE(newblk, M_NEWBLK);
 
+	/*
+	 * If we were not passed a bp to attach the dep to,
+	 * then this must be for a regular file.
+	 * Allocate a buffer to represent the page cache pages
+	 * that are the real dependency.  The pages themselves
+	 * cannot refer to the dependency since we don't want to
+	 * add a field to struct vm_page for this.
+	 */
 	if (bp == NULL) {
-		/*
-		 * XXXUBC - Yes, I know how to fix this, but not right now.
-		 */
-		panic("softdep_setup_allocdirect: Bonk art in the head\n");
+		bp = softdep_setup_pagecache(ip, lbn, newsize);
 	}
 	WORKLIST_INSERT(&bp->b_dep, &adp->ad_list);
 	if (lbn >= NDADDR) {
@@ -1563,10 +1594,7 @@ softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp)
 	    pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0)
 		WORKLIST_INSERT(&nbp->b_dep, &pagedep->pd_list);
 	if (nbp == NULL) {
-		/*
-		 * XXXUBC - Yes, I know how to fix this, but not right now.
-		 */
-		panic("softdep_setup_allocindir_page: Bonk art in the head\n");
+		nbp = softdep_setup_pagecache(ip, lbn, ip->i_fs->fs_bsize);
 	}
 	WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list);
 	FREE_LOCK(&lk);
@@ -1745,6 +1773,7 @@ softdep_setup_freeblocks(ip, length)
 	int i, delay, error;
 
 	fs = ip->i_fs;
+	vp = ITOV(ip);
 	if (length != 0)
 		panic("softdep_setup_freeblocks: non-zero length");
 	MALLOC(freeblks, struct freeblks *, sizeof(struct freeblks),
@@ -1804,9 +1833,15 @@ softdep_setup_freeblocks(ip, length)
 	 * with this inode are obsolete and can simply be de-allocated.
 	 * We must first merge the two dependency lists to get rid of
 	 * any duplicate freefrag structures, then purge the merged list.
+	 * We must remove any pagecache markers from the pagecache
+	 * hashtable first because any I/Os in flight will want to see
+	 * dependencies attached to their pagecache markers.  We cannot
+	 * free the pagecache markers until after we've freed all the
+	 * dependencies that reference them later.
 	 * If we still have a bitmap dependency, then the inode has never
 	 * been written to disk, so we can free any fragments without delay.
 	 */
+	softdep_collect_pagecache(ip);
 	merge_inode_lists(inodedep);
 	while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0)
 		free_allocdirect(&inodedep->id_inoupdt, adp, delay);
@@ -1818,7 +1853,6 @@ softdep_setup_freeblocks(ip, length)
 	 * Once they are all there, walk the list and get rid of
 	 * any dependencies.
 	 */
-	vp = ITOV(ip);
 	ACQUIRE_LOCK(&lk);
 	drain_output(vp, 1);
 	while (getdirtybuf(&LIST_FIRST(&vp->v_dirtyblkhd), MNT_WAIT)) {
@@ -1830,6 +1864,7 @@ softdep_setup_freeblocks(ip, length)
 		brelse(bp);
 		ACQUIRE_LOCK(&lk);
 	}
+	softdep_free_pagecache(ip);
 	if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) != 0)
 		(void) free_inodedep(inodedep);
 	FREE_LOCK(&lk);
@@ -2898,7 +2933,6 @@ handle_workitem_freefile(freefile)
 	struct freefile *freefile;
 {
 	struct fs *fs;
-	struct vnode vp;
 	struct inode tip;
 	struct inodedep *idp;
 	int error;
@@ -2914,8 +2948,7 @@ handle_workitem_freefile(freefile)
 	tip.i_devvp = freefile->fx_devvp;
 	tip.i_dev = freefile->fx_devvp->v_rdev;
 	tip.i_fs = fs;
-	tip.i_vnode = &vp;
-	vp.v_data = &tip;
+	tip.i_vnode = NULL;
 
 	if ((error = ffs_freefile(&tip, freefile->fx_oldinum, 
 		 freefile->fx_mode)) != 0) {
@@ -4313,6 +4346,7 @@ flush_inodedep_deps(fs, ino)
 	struct allocdirect *adp;
 	int error, waitfor;
 	struct buf *bp;
+	struct vnode *vp;
 
 	/*
 	 * This work is done in two passes. The first pass grabs most
@@ -4332,6 +4366,27 @@ flush_inodedep_deps(fs, ino)
 		ACQUIRE_LOCK(&lk);
 		if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
 			return (0);
+
+		/*
+		 * When file data was in the buffer cache,
+		 * softdep_sync_metadata() would start i/o on
+		 * file data buffers itself.  But now that
+		 * we're using the page cache to hold file data,
+		 * we need something else to trigger those flushes.
+		 * let's just do it here.
+		 */
+
+		vp = softdep_lookupvp(fs, ino);
+		if (vp) {
+			struct uvm_object *uobj = &vp->v_uvm.u_obj;
+
+			simple_lock(&uobj->vmobjlock);
+			(uobj->pgops->pgo_flush)(uobj, 0, 0,
+			    PGO_ALLPAGES|PGO_CLEANIT|
+			    (waitfor == MNT_NOWAIT ? 0: PGO_SYNCIO));
+			simple_unlock(&uobj->vmobjlock);
+		}
+
 		TAILQ_FOREACH(adp, &inodedep->id_inoupdt, ad_next) {
 			if (adp->ad_state & DEPCOMPLETE)
 				continue;
@@ -4944,3 +4999,196 @@ softdep_error(func, error)
 	/* XXX should do something better! */
 	printf("%s: got error %d while accessing filesystem\n", func, error);
 }
+
+/*
+ * Allocate a buffer on which to attach a dependency.
+ */
+STATIC struct buf *
+softdep_setup_pagecache(ip, lbn, size)
+	struct inode *ip;
+	ufs_lbn_t lbn;
+	long size;
+{
+	struct vnode *vp = ITOV(ip);
+	struct buf *bp;
+	int s;
+
+	/*
+	 * Enter pagecache dependency buf in hash.
+	 */
+
+	bp = softdep_lookup_pcbp(vp, lbn);
+	if (bp == NULL) {
+		s = splbio();
+		bp = pool_get(&sdpcpool, PR_WAITOK);
+		splx(s);
+
+		bp->b_vp = vp;
+		bp->b_lblkno = lbn;
+		bp->b_bcount = bp->b_resid = size;
+		LIST_INIT(&bp->b_dep);
+		LIST_INSERT_HEAD(&pcbphashhead[PCBPHASH(vp, lbn)], bp, b_hash);
+		LIST_INSERT_HEAD(&ip->i_pcbufhd, bp, b_vnbufs);
+	} else {
+		KASSERT(size >= bp->b_bcount);
+		bp->b_resid += size - bp->b_bcount;
+		bp->b_bcount = size;
+	}
+	return bp;
+}
+
+/*
+ * softdep_collect_pagecache() and softdep_free_pagecache()
+ * are used to remove page cache dependency buffers when
+ * a file is being truncated to 0.
+ */
+
+STATIC void
+softdep_collect_pagecache(ip)
+	struct inode *ip;
+{
+	struct buf *bp;
+
+	LIST_FOREACH(bp, &ip->i_pcbufhd, b_vnbufs) {
+		LIST_REMOVE(bp, b_hash);
+	}
+}
+
+STATIC void
+softdep_free_pagecache(ip)
+	struct inode *ip;
+{
+	struct buf *bp, *nextbp;
+
+	for (bp = LIST_FIRST(&ip->i_pcbufhd); bp != NULL; bp = nextbp) {
+		nextbp = LIST_NEXT(bp, b_vnbufs);
+		LIST_REMOVE(bp, b_vnbufs);
+		KASSERT(LIST_FIRST(&bp->b_dep) == NULL);
+		pool_put(&sdpcpool, bp);
+	}
+}
+
+STATIC struct vnode *
+softdep_lookupvp(fs, ino)
+	struct fs *fs;
+	ino_t ino;
+{
+	struct mount *mp;
+	extern struct vfsops ffs_vfsops;
+
+	CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
+		if (mp->mnt_op == &ffs_vfsops &&
+		    VFSTOUFS(mp)->um_fs == fs) {
+			break;
+		}
+	}
+	if (mp == NULL) {
+		return NULL;
+	}
+	return ufs_ihashlookup(VFSTOUFS(mp)->um_dev, ino);
+}
+
+STATIC struct buf *
+softdep_lookup_pcbp(vp, lbn)
+	struct vnode *vp;
+	ufs_lbn_t lbn;
+{
+	struct buf *bp;
+
+	LIST_FOREACH(bp, &pcbphashhead[PCBPHASH(vp, lbn)], b_hash) {
+		if (bp->b_vp == vp && bp->b_lblkno == lbn) {
+			break;
+		}
+	}
+	return bp;	     
+}
+
+/*
+ * Do softdep i/o completion processing for page cache writes.
+ */
+ 
+void
+softdep_pageiodone(bp)
+	struct buf *bp;
+{
+	int npages = bp->b_bufsize >> PAGE_SHIFT;
+	struct vnode *vp = bp->b_vp;
+	struct vm_page *pg;
+	struct buf *pcbp = NULL;
+	struct allocdirect *adp;
+	struct allocindir *aip;
+	struct worklist *wk;
+	ufs_lbn_t lbn;
+	voff_t off;
+	long iosize = bp->b_bcount;
+	int size, asize, bshift, bsize;
+	int i;
+
+	KASSERT(!(bp->b_flags & B_READ));
+	bshift = vp->v_mount->mnt_fs_bshift;
+	bsize = 1 << bshift;
+	asize = min(PAGE_SIZE, bsize);
+	ACQUIRE_LOCK(&lk);
+	for (i = 0; i < npages; i++) {
+		pg = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT));
+		if (pg == NULL) {
+			continue;
+		}
+
+		for (off = pg->offset;
+		     off < pg->offset + PAGE_SIZE;
+		     off += bsize) {
+			size = min(asize, iosize);
+			iosize -= size;
+			lbn = off >> bshift;
+			if (pcbp == NULL || pcbp->b_lblkno != lbn) {
+				pcbp = softdep_lookup_pcbp(vp, lbn);
+			}
+			if (pcbp == NULL) {
+				continue;
+			}
+			pcbp->b_resid -= size;
+			if (pcbp->b_resid < 0) {
+				panic("softdep_pageiodone: "
+				      "resid < 0, vp %p lbn 0x%lx pcbp %p",
+				      vp, lbn, pcbp);
+			}
+			if (pcbp->b_resid > 0) {
+				continue;
+			}
+
+			/*
+			 * We've completed all the i/o for this block.
+			 * mark the dep complete.
+			 */
+
+			KASSERT(LIST_FIRST(&pcbp->b_dep) != NULL);
+			while ((wk = LIST_FIRST(&pcbp->b_dep))) {
+				WORKLIST_REMOVE(wk);
+				switch (wk->wk_type) {
+				case D_ALLOCDIRECT:
+					adp = WK_ALLOCDIRECT(wk);
+					adp->ad_state |= COMPLETE;
+					handle_allocdirect_partdone(adp);
+					break;
+
+				case D_ALLOCINDIR:
+					aip = WK_ALLOCINDIR(wk);
+					aip->ai_state |= COMPLETE;
+					handle_allocindir_partdone(aip);
+					break;
+
+				default:
+					panic("softdep_pageiodone: "
+					      "bad type %d, pcbp %p wk %p",
+					      wk->wk_type, pcbp, wk);
+				}
+			}
+			LIST_REMOVE(pcbp, b_hash);
+			LIST_REMOVE(pcbp, b_vnbufs);
+			pool_put(&sdpcpool, pcbp);
+			pcbp = NULL;
+		}
+	}
+	FREE_LOCK(&lk);
+}
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index b1dee123893..19c77726fa8 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ffs_vfsops.c,v 1.45 2001/11/21 22:21:48 csapuntz Exp $	*/
+/*	$OpenBSD: ffs_vfsops.c,v 1.46 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: ffs_vfsops.c,v 1.19 1996/02/09 22:22:26 christos Exp $	*/
 
 /*
@@ -737,11 +737,14 @@ ffs_mountfs(devvp, mp, p)
 	else
 		mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
+	mp->mnt_fs_bshift = fs->fs_bshift;
+	mp->mnt_dev_bshift = DEV_BSHIFT;
 	mp->mnt_flag |= MNT_LOCAL;
 	ump->um_mountp = mp;
 	ump->um_dev = dev;
 	ump->um_devvp = devvp;
 	ump->um_nindir = fs->fs_nindir;
+	ump->um_lognindir = ffs(fs->fs_nindir) - 1;
 	ump->um_bptrtodb = fs->fs_fsbtodb;
 	ump->um_seqinc = fs->fs_frag;
 	for (i = 0; i < MAXQUOTAS; i++)
@@ -1119,6 +1122,7 @@ retry:
 	ip->i_fs = fs = ump->um_fs;
 	ip->i_dev = dev;
 	ip->i_number = ino;
+	LIST_INIT(&ip->i_pcbufhd);
 	ip->i_vtbl = &ffs_vtbl;
 
 	/*
@@ -1199,6 +1203,7 @@ retry:
 		ip->i_ffs_uid = ip->i_din.ffs_din.di_ouid;	/* XXX */
 		ip->i_ffs_gid = ip->i_din.ffs_din.di_ogid;	/* XXX */
 	}							/* XXX */
+	uvm_vnp_setsize(vp, ip->i_ffs_size);
 
 	*vpp = vp;
 	return (0);
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
index 26e9bbaf9da..8190ef82eb3 100644
--- a/sys/ufs/ffs/ffs_vnops.c
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ffs_vnops.c,v 1.20 2001/11/06 19:53:21 miod Exp $	*/
+/*	$OpenBSD: ffs_vnops.c,v 1.21 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: ffs_vnops.c,v 1.7 1996/05/11 18:27:24 mycroft Exp $	*/
 
 /*
@@ -107,8 +107,13 @@ struct vnodeopv_entry_desc ffs_vnodeop_entries[] = {
 	{ &vop_advlock_desc, ufs_advlock },		/* advlock */
 	{ &vop_reallocblks_desc, ffs_reallocblks },	/* reallocblks */
 	{ &vop_bwrite_desc, vop_generic_bwrite },
-	{ (struct vnodeop_desc*)NULL, (int(*) __P((void*)))NULL }
+	{ &vop_ballocn_desc, ffs_ballocn },
+	{ &vop_getpages_desc, genfs_getpages },
+	{ &vop_putpages_desc, genfs_putpages },
+	{ &vop_size_desc, ffs_size },
+	{ NULL, NULL }
 };
+
 struct vnodeopv_desc ffs_vnodeop_opv_desc =
 	{ &ffs_vnodeop_p, ffs_vnodeop_entries };
 
@@ -229,6 +234,7 @@ ffs_fsync(v)
 	struct vnode *vp = ap->a_vp;
 	struct buf *bp, *nbp;
 	int s, error, passes, skipmeta;
+	struct uvm_object *uobj;
 
 	if (vp->v_type == VBLK &&
 	    vp->v_specmountpoint != NULL &&
@@ -236,13 +242,22 @@ ffs_fsync(v)
 		softdep_fsync_mountdev(vp);
 
 	/*
-	 * Flush all dirty buffers associated with a vnode.
+	 * Flush all dirty data associated with a vnode.
 	 */
 	passes = NIADDR + 1;
 	skipmeta = 0;
 	if (ap->a_waitfor == MNT_WAIT)
 		skipmeta = 1;
 	s = splbio();
+
+	if (vp->v_type == VREG) {
+		uobj = &vp->v_uvm.u_obj;
+		simple_lock(&uobj->vmobjlock);
+		(uobj->pgops->pgo_flush)(uobj, 0, 0, PGO_ALLPAGES|PGO_CLEANIT|
+		    ((ap->a_waitfor == MNT_WAIT) ? PGO_SYNCIO : 0));
+		simple_unlock(&uobj->vmobjlock);
+	}
+
 loop:
 	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp;
 	     bp = LIST_NEXT(bp, b_vnbufs))
@@ -281,8 +296,10 @@ loop:
 		 */
 		if (passes > 0 || ap->a_waitfor != MNT_WAIT)
 			(void) bawrite(bp);
-		else if ((error = bwrite(bp)) != 0)
+		else if ((error = bwrite(bp)) != 0) {
+			printf("ffs_fsync: bwrite failed %d\n", error);
 			return (error);
+		}
 		s = splbio();
 		/*
 		 * Since we may have slept during the I/O, we need
@@ -325,7 +342,11 @@ loop:
 		}
 	}
 	splx(s);
-	return (UFS_UPDATE(VTOI(vp), ap->a_waitfor == MNT_WAIT));
+	
+	error = (UFS_UPDATE(VTOI(vp), ap->a_waitfor == MNT_WAIT));
+	if (error)
+		printf("ffs_fsync: UFS_UPDATE failed. %d\n", error);
+	return (error);
 }
 
 /*
@@ -349,3 +370,31 @@ ffs_reclaim(v)
 	vp->v_data = NULL;
 	return (0);
 }
+
+/*
+ * Return the last logical file offset that should be written for this file
+ * if we're doing a write that ends at "size".
+ */
+int
+ffs_size(v)
+	void *v;
+{
+	struct vop_size_args /* {
+		struct vnode *a_vp;
+		off_t a_size;
+		off_t *a_eobp;
+	} */ *ap = v;
+	struct inode *ip = VTOI(ap->a_vp);
+	struct fs *fs = ip->i_fs;
+	ufs_lbn_t olbn, nlbn;
+
+	olbn = lblkno(fs, ip->i_ffs_size);
+	nlbn = lblkno(fs, ap->a_size);
+
+	if (nlbn < NDADDR && olbn <= nlbn) {
+		*ap->a_eobp = fragroundup(fs, ap->a_size);
+	} else {
+		*ap->a_eobp = blkroundup(fs, ap->a_size);
+	}
+	return 0;
+}
diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h
index 5665b276a0f..98c73de5579 100644
--- a/sys/ufs/ufs/inode.h
+++ b/sys/ufs/ufs/inode.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: inode.h,v 1.16 2001/07/04 06:10:50 angelos Exp $	*/
+/*	$OpenBSD: inode.h,v 1.17 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: inode.h,v 1.8 1995/06/15 23:22:50 cgd Exp $	*/
 
 /*
@@ -84,6 +84,7 @@ struct inode {
 #define i_e2fs  inode_u.e2fs
 
 	struct   cluster_info i_ci;
+	LIST_HEAD(,buf) i_pcbufhd;
 	struct	 dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */
 	u_quad_t i_modrev;	/* Revision level for NFS lease. */
 	struct	 lockf *i_lockf;/* Head of byte-level lock list. */
diff --git a/sys/ufs/ufs/ufs_bmap.c b/sys/ufs/ufs/ufs_bmap.c
index add641e15ce..fdf5c1be055 100644
--- a/sys/ufs/ufs/ufs_bmap.c
+++ b/sys/ufs/ufs/ufs_bmap.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ufs_bmap.c,v 1.10 2001/11/21 22:24:24 csapuntz Exp $	*/
+/*	$OpenBSD: ufs_bmap.c,v 1.11 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: ufs_bmap.c,v 1.3 1996/02/09 22:36:00 christos Exp $	*/
 
 /*
@@ -233,6 +233,7 @@ ufs_getlbns(vp, bn, ap, nump)
 	long metalbn, realbn;
 	struct ufsmount *ump;
 	int64_t blockcnt;
+	int lbc;
 	int i, numlevels, off;
 
 	ump = VFSTOUFS(vp->v_mount);
@@ -260,10 +261,14 @@ ufs_getlbns(vp, bn, ap, nump)
 	 * at the given level of indirection, and NIADDR - i is the number
 	 * of levels of indirection needed to locate the requested block.
 	 */
-	for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) {
+	bn -= NDADDR;
+	for (lbc = 0, i = NIADDR;; i--, bn -= blockcnt) {
 		if (i == 0)
 			return (EFBIG);
-		blockcnt *= MNINDIR(ump);
+
+		lbc += ump->um_lognindir;
+		blockcnt = (int64_t)1 << lbc;
+
 		if (bn < blockcnt)
 			break;
 	}
@@ -289,8 +294,9 @@ ufs_getlbns(vp, bn, ap, nump)
 		if (metalbn == realbn)
 			break;
 
-		blockcnt /= MNINDIR(ump);
-		off = (bn / blockcnt) % MNINDIR(ump);
+		lbc -= ump->um_lognindir;
+		blockcnt = (int64_t)1 << lbc;
+		off = (bn >> lbc) & (MNINDIR(ump) - 1);
 
 		++numlevels;
 		ap->in_lbn = metalbn;
diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h
index 50175a0ec86..fc39e16b45e 100644
--- a/sys/ufs/ufs/ufs_extern.h
+++ b/sys/ufs/ufs/ufs_extern.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ufs_extern.h,v 1.12 2001/11/21 21:23:56 csapuntz Exp $	*/
+/*	$OpenBSD: ufs_extern.h,v 1.13 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: ufs_extern.h,v 1.5 1996/02/09 22:36:03 christos Exp $	*/
 
 /*-
@@ -121,6 +121,7 @@ void ufs_ihashrem __P((struct inode *));
 /* ufs_inode.c */
 int ufs_init __P((struct vfsconf *));
 int ufs_reclaim __P((struct vnode *, struct proc *));
+int ufs_balloc_range __P((struct vnode *, off_t, off_t, struct ucred *, int));
 
 /* ufs_lookup.c */
 void ufs_dirbad __P((struct inode *, doff_t, char *));
diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c
index 8a3935632fb..3865342fde0 100644
--- a/sys/ufs/ufs/ufs_inode.c
+++ b/sys/ufs/ufs/ufs_inode.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ufs_inode.c,v 1.10 2001/11/21 21:23:56 csapuntz Exp $	*/
+/*	$OpenBSD: ufs_inode.c,v 1.11 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: ufs_inode.c,v 1.7 1996/05/11 18:27:52 mycroft Exp $	*/
 
 /*
@@ -151,3 +151,150 @@ ufs_reclaim(vp, p)
 	ufs_quota_delete(ip);
 	return (0);
 }
+
+/*
+ * allocate a range of blocks in a file.
+ * after this function returns, any page entirely contained within the range
+ * will map to invalid data and thus must be overwritten before it is made
+ * accessible to others.
+ */
+
+int
+ufs_balloc_range(vp, off, len, cred, flags)
+	struct vnode *vp;
+	off_t off, len;
+	struct ucred *cred;
+	int flags;
+{
+	off_t oldeof, neweof, oldeob, neweob, oldpagestart, pagestart;
+	struct uvm_object *uobj;
+	int i, delta, error, npages1, npages2;
+	int bshift = vp->v_mount->mnt_fs_bshift;
+	int bsize = 1 << bshift;
+	int ppb = MAX(bsize >> PAGE_SHIFT, 1);
+	struct vm_page *pgs1[ppb], *pgs2[ppb];
+	UVMHIST_FUNC("ufs_balloc_range"); UVMHIST_CALLED(ubchist);
+	UVMHIST_LOG(ubchist, "vp %p off 0x%x len 0x%x u_size 0x%x",
+		    vp, off, len, vp->v_uvm.u_size);
+
+	oldeof = vp->v_uvm.u_size;
+	error = VOP_SIZE(vp, oldeof, &oldeob);
+	if (error) {
+		return error;
+	}
+
+	neweof = MAX(vp->v_uvm.u_size, off + len);
+	error = VOP_SIZE(vp, neweof, &neweob);
+	if (error) {
+		return error;
+	}
+
+	error = 0;
+	uobj = &vp->v_uvm.u_obj;
+	pgs1[0] = pgs2[0] = NULL;
+
+	/*
+	 * if the last block in the file is not a full block (ie. it is a
+	 * fragment), and this allocation is causing the fragment to change
+	 * size (either to expand the fragment or promote it to a full block),
+	 * cache the old last block (at its new size).
+	 */
+
+	oldpagestart = trunc_page(oldeof) & ~(bsize - 1);
+	if ((oldeob & (bsize - 1)) != 0 && oldeob != neweob) {
+		npages1 = MIN(ppb, (round_page(neweob) - oldpagestart) >>
+			      PAGE_SHIFT);
+		memset(pgs1, 0, npages1 * sizeof(struct vm_page *));
+		simple_lock(&uobj->vmobjlock);
+		error = VOP_GETPAGES(vp, oldpagestart, pgs1, &npages1,
+		    0, VM_PROT_READ, 0, PGO_SYNCIO|PGO_PASTEOF);
+		if (error) {
+			goto out;
+		}
+		simple_lock(&uobj->vmobjlock);
+		uvm_lock_pageq();
+		for (i = 0; i < npages1; i++) {
+			UVMHIST_LOG(ubchist, "got pgs1[%d] %p", i, pgs1[i],0,0);
+			KASSERT((pgs1[i]->flags & PG_RELEASED) == 0);
+			pgs1[i]->flags &= ~PG_CLEAN;
+			uvm_pageactivate(pgs1[i]);
+		}
+		uvm_unlock_pageq();
+		simple_unlock(&uobj->vmobjlock);
+	}
+
+	/*
+	 * cache the new range as well.  this will create zeroed pages
+	 * where the new block will be and keep them locked until the
+	 * new block is allocated, so there will be no window where
+	 * the old contents of the new block is visible to racing threads.
+	 */
+
+	pagestart = trunc_page(off) & ~(bsize - 1);
+	if (pagestart != oldpagestart || pgs1[0] == NULL) {
+		npages2 = MIN(ppb, (round_page(neweob) - pagestart) >>
+			      PAGE_SHIFT);
+		memset(pgs2, 0, npages2 * sizeof(struct vm_page *));
+		simple_lock(&uobj->vmobjlock);
+		error = VOP_GETPAGES(vp, pagestart, pgs2, &npages2, 0,
+		    VM_PROT_READ, 0, PGO_SYNCIO|PGO_PASTEOF);
+		if (error) {
+			goto out;
+		}
+		simple_lock(&uobj->vmobjlock);
+		uvm_lock_pageq();
+		for (i = 0; i < npages2; i++) {
+			UVMHIST_LOG(ubchist, "got pgs2[%d] %p", i, pgs2[i],0,0);
+			KASSERT((pgs2[i]->flags & PG_RELEASED) == 0);
+			pgs2[i]->flags &= ~PG_CLEAN;
+			uvm_pageactivate(pgs2[i]);
+		}
+		uvm_unlock_pageq();
+		simple_unlock(&uobj->vmobjlock);
+	}
+
+	/*
+	 * adjust off to be block-aligned.
+	 */
+
+	delta = off & (bsize - 1);
+	off -= delta;
+	len += delta;
+
+	/*
+	 * now allocate the range.
+	 */
+
+	lockmgr(&vp->v_glock, LK_EXCLUSIVE, NULL, curproc);
+	error = VOP_BALLOCN(vp, off, len, cred, flags);
+	lockmgr(&vp->v_glock, LK_RELEASE, NULL, curproc);
+
+	/*
+	 * unbusy any pages we are holding.
+	 * if we got an error, free any pages we created past the old eob.
+	 */
+
+out:
+	simple_lock(&uobj->vmobjlock);
+	if (error) {
+		(void) (uobj->pgops->pgo_flush)(uobj, round_page(oldeob), 0,
+		    PGO_FREE);
+	}
+	if (pgs1[0] != NULL) {
+		uvm_page_unbusy(pgs1, npages1);
+
+		/*
+		 * The data in the frag might be moving to a new disk location.
+		 * We need to flush pages to the new disk locations.
+		 */
+
+		(uobj->pgops->pgo_flush)(uobj, oldeof & ~(bsize - 1),
+		    MIN((oldeof + bsize) & ~(bsize - 1), neweof),
+		    PGO_CLEANIT | ((flags & B_SYNC) ? PGO_SYNCIO : 0));
+	}
+	if (pgs2[0] != NULL) {
+		uvm_page_unbusy(pgs2, npages2);
+	}
+	simple_unlock(&uobj->vmobjlock);
+	return error;
+}
diff --git a/sys/ufs/ufs/ufs_readwrite.c b/sys/ufs/ufs/ufs_readwrite.c
index bbf1391dfe5..e0777e4b55f 100644
--- a/sys/ufs/ufs/ufs_readwrite.c
+++ b/sys/ufs/ufs/ufs_readwrite.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ufs_readwrite.c,v 1.19 2001/06/27 04:58:49 art Exp $	*/
+/*	$OpenBSD: ufs_readwrite.c,v 1.20 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: ufs_readwrite.c,v 1.9 1996/05/11 18:27:57 mycroft Exp $	*/
 
 /*-
@@ -76,21 +76,22 @@ READ(v)
 		int a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap = v;
-	register struct vnode *vp;
-	register struct inode *ip;
-	register struct uio *uio;
-	register FS *fs;
+	struct vnode *vp;
+	struct inode *ip;
+	struct uio *uio;
+	FS *fs;
+	void *win;
+	vsize_t bytelen;
 	struct buf *bp;
 	daddr_t lbn, nextlbn;
 	off_t bytesinfile;
 	long size, xfersize, blkoffset;
 	int error;
-	u_short mode;
 
 	vp = ap->a_vp;
 	ip = VTOI(vp);
-	mode = ip->i_ffs_mode;
 	uio = ap->a_uio;
+	error = 0;
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_READ)
@@ -110,6 +111,24 @@ READ(v)
 
 	if (uio->uio_resid == 0)
 		return (0);
+	if (uio->uio_offset >= ip->i_ffs_size)
+		goto out;
+
+	if (vp->v_type == VREG) {
+		while (uio->uio_resid > 0) {
+			bytelen = min(ip->i_ffs_size - uio->uio_offset,
+			    uio->uio_resid);
+			if (bytelen == 0)
+				break;
+			win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset,
+			    &bytelen, UBC_READ);
+			error = uiomove(win, bytelen, uio);
+			ubc_release(win, 0);
+			if (error)
+			        break;
+		}
+		goto out;
+	}
 
 	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
 		if ((bytesinfile = ip->i_ffs_size - uio->uio_offset) <= 0)
@@ -131,9 +150,6 @@ READ(v)
 #else
 		if (lblktosize(fs, nextlbn) >= ip->i_ffs_size)
 			error = bread(vp, lbn, size, NOCRED, &bp);
-		else if (doclusterread)
-			error = cluster_read(vp, &ip->i_ci,
-			    ip->i_ffs_size, lbn, size, NOCRED, &bp);
 		else if (lbn - 1 == ip->i_ci.ci_lastr) {
 			int nextsize = BLKSIZE(fs, ip, nextlbn);
 			error = breadn(vp, lbn,
@@ -158,7 +174,7 @@ READ(v)
 				break;
 			xfersize = size;
 		}
-		error = uiomove((char *)bp->b_data + blkoffset, (int)xfersize,
+		error = uiomove((char *)bp->b_data + blkoffset, xfersize,
 				uio);
 		if (error)
 			break;
@@ -166,6 +182,7 @@ READ(v)
 	}
 	if (bp != NULL)
 		brelse(bp);
+out:
 	ip->i_flag |= IN_ACCESS;
 	return (error);
 }
@@ -183,15 +200,19 @@ WRITE(v)
 		int a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap = v;
-	register struct vnode *vp;
-	register struct uio *uio;
-	register struct inode *ip;
-	register FS *fs;
+	struct vnode *vp;
+	struct uio *uio;
+	struct inode *ip;
+	FS *fs;
 	struct buf *bp;
 	struct proc *p;
 	daddr_t lbn;
 	off_t osize;
 	int blkoffset, error, extended, flags, ioflag, resid, size, xfersize;
+	void *win;
+	vsize_t bytelen;
+	off_t oldoff;
+	boolean_t rv;
 
 	extended = 0;
 	ioflag = ap->a_ioflag;
@@ -239,9 +260,77 @@ WRITE(v)
 
 	resid = uio->uio_resid;
 	osize = ip->i_ffs_size;
-	flags = ioflag & IO_SYNC ? B_SYNC : 0;
+	error = 0;
+
+	if (vp->v_type != VREG)
+		goto bcache;
+
+	while (uio->uio_resid > 0) {
+		oldoff = uio->uio_offset;
+		blkoffset = blkoff(fs, uio->uio_offset);
+		bytelen = min(fs->fs_bsize - blkoffset, uio->uio_resid);
+ 
+		/*
+		 * XXXUBC if file is mapped and this is the last block,
+		 * process one page at a time.
+		 */
+
+		error = ufs_balloc_range(vp, uio->uio_offset, bytelen,
+		    ap->a_cred, ioflag & IO_SYNC ? B_SYNC : 0);
+		if (error) {
+			return error;
+		}
 
-	for (error = 0; uio->uio_resid > 0;) {
+		win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset, &bytelen,
+				UBC_WRITE);
+		error = uiomove(win, bytelen, uio);
+		ubc_release(win, 0);
+
+		/*
+		 * flush what we just wrote if necessary.
+		 * XXXUBC simplistic async flushing.
+		 */
+
+		if (ioflag & IO_SYNC) {
+			simple_lock(&vp->v_uvm.u_obj.vmobjlock);
+#if 1
+			/*
+			 * XXX 
+			 * flush whole blocks in case there are deps.
+			 * otherwise we can dirty and flush part of
+			 * a block multiple times and the softdep code
+			 * will get confused.  fixing this the right way
+			 * is complicated so we'll work around it for now.
+			 */
+                      
+			rv = vp->v_uvm.u_obj.pgops->pgo_flush(
+			    &vp->v_uvm.u_obj,
+			    oldoff & ~(fs->fs_bsize - 1),
+			    (oldoff + bytelen + fs->fs_bsize - 1) &
+			    ~(fs->fs_bsize - 1),
+			    PGO_CLEANIT|PGO_SYNCIO);
+#else
+			rv = vp->v_uvm.u_obj.pgops->pgo_flush(
+			    &vp->v_uvm.u_obj, oldoff, oldoff + bytelen,
+			    PGO_CLEANIT|PGO_SYNCIO);
+#endif
+			simple_unlock(&vp->v_uvm.u_obj.vmobjlock);
+		} else if (oldoff >> 16 != uio->uio_offset >> 16) {
+			simple_lock(&vp->v_uvm.u_obj.vmobjlock);
+			rv = vp->v_uvm.u_obj.pgops->pgo_flush(
+			    &vp->v_uvm.u_obj, (oldoff >> 16) << 16,
+			    (uio->uio_offset >> 16) << 16, PGO_CLEANIT);
+			simple_unlock(&vp->v_uvm.u_obj.vmobjlock);
+		}
+		if (error) {
+			break;
+		}
+	}
+	goto out;
+
+bcache:
+	flags = ioflag & IO_SYNC ? B_SYNC : 0;
+	while (uio->uio_resid > 0) {
 		lbn = lblkno(fs, uio->uio_offset);
 		blkoffset = blkoff(fs, uio->uio_offset);
 		xfersize = fs->fs_bsize - blkoffset;
@@ -260,14 +349,12 @@ WRITE(v)
 			uvm_vnp_setsize(vp, ip->i_ffs_size);
 			extended = 1;
 		}
-		(void)uvm_vnp_uncache(vp);
 
 		size = BLKSIZE(fs, ip, lbn) - bp->b_resid;
 		if (size < xfersize)
 			xfersize = size;
 
-		error =
-		    uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
+		error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
 
 		if (error != 0)
 			bzero((char *)bp->b_data + blkoffset, xfersize);
@@ -287,13 +374,14 @@ WRITE(v)
 #endif
 		if (error || xfersize == 0)
 			break;
-		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 	}
 	/*
 	 * If we successfully wrote any data, and we are not the superuser
 	 * we clear the setuid and setgid bits as a precaution against
 	 * tampering.
 	 */
+out:
+	ip->i_flag |= IN_CHANGE | IN_UPDATE;
 	if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
 		ip->i_ffs_mode &= ~(ISUID | ISGID);
 	if (resid > uio->uio_resid)
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 4caf0ef78c7..e926ee7aff6 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ufs_vnops.c,v 1.39 2001/11/21 21:23:56 csapuntz Exp $	*/
+/*	$OpenBSD: ufs_vnops.c,v 1.40 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: ufs_vnops.c,v 1.18 1996/05/11 18:28:04 mycroft Exp $	*/
 
 /*
@@ -469,8 +469,6 @@ ufs_chmod(vp, mode, cred, p)
 	ip->i_ffs_mode &= ~ALLPERMS;
 	ip->i_ffs_mode |= (mode & ALLPERMS);
 	ip->i_flag |= IN_CHANGE;
-	if ((vp->v_flag & VTEXT) && (ip->i_ffs_mode & S_ISTXT) == 0)
-		(void) uvm_vnp_uncache(vp);
 	return (0);
 }
 
diff --git a/sys/ufs/ufs/ufsmount.h b/sys/ufs/ufs/ufsmount.h
index e9dc71f9855..981eb21474b 100644
--- a/sys/ufs/ufs/ufsmount.h
+++ b/sys/ufs/ufs/ufsmount.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: ufsmount.h,v 1.5 1999/06/01 01:48:52 millert Exp $	*/
+/*	$OpenBSD: ufsmount.h,v 1.6 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: ufsmount.h,v 1.4 1994/12/21 20:00:23 mycroft Exp $	*/
 
 /*
@@ -64,6 +64,7 @@ struct ufsmount {
 	struct	vnode *um_quotas[MAXQUOTAS];	/* pointer to quota files */
 	struct	ucred *um_cred[MAXQUOTAS];	/* quota file access cred */
 	u_long	um_nindir;			/* indirect ptrs per block */
+	u_long	um_lognindir;			/* log2 of um_nindir */
 	u_long	um_bptrtodb;			/* indir ptr to disk block */
 	u_long	um_seqinc;			/* inc between seq blocks */
 	time_t	um_btime[MAXQUOTAS];		/* block quota time limit */
diff --git a/sys/uvm/uvm_anon.c b/sys/uvm/uvm_anon.c
index 347867e47b8..8478141a72c 100644
--- a/sys/uvm/uvm_anon.c
+++ b/sys/uvm/uvm_anon.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: uvm_anon.c,v 1.15 2001/11/11 01:16:56 art Exp $	*/
+/*	$OpenBSD: uvm_anon.c,v 1.16 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: uvm_anon.c,v 1.15 2001/02/18 21:19:08 chs Exp $	*/
 
 /*
@@ -518,9 +518,6 @@ anon_pagein(anon)
 	 */
 
 	pmap_clear_reference(pg);
-#ifndef UBC
-	pmap_page_protect(pg, VM_PROT_NONE);
-#endif
 	uvm_lock_pageq();
 	uvm_pagedeactivate(pg);
 	uvm_unlock_pageq();
diff --git a/sys/uvm/uvm_aobj.c b/sys/uvm/uvm_aobj.c
index 85ce0a495f6..0ebf53c3502 100644
--- a/sys/uvm/uvm_aobj.c
+++ b/sys/uvm/uvm_aobj.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: uvm_aobj.c,v 1.20 2001/11/11 01:16:56 art Exp $	*/
+/*	$OpenBSD: uvm_aobj.c,v 1.21 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: uvm_aobj.c,v 1.39 2001/02/18 21:19:08 chs Exp $	*/
 
 /*
@@ -878,15 +878,8 @@ uao_flush(uobj, start, stop, flags)
 			    pp->wire_count != 0)
 				continue;
 
-#ifdef UBC
 			/* ...and deactivate the page. */
 			pmap_clear_reference(pp);
-#else
-			/* zap all mappings for the page. */
-			pmap_page_protect(pp, VM_PROT_NONE);
-
-			/* ...and deactivate the page. */
-#endif
 			uvm_pagedeactivate(pp);
 
 			continue;
@@ -1523,9 +1516,6 @@ uao_pagein_page(aobj, pageidx)
 	 * deactivate the page (to put it on a page queue).
 	 */
 	pmap_clear_reference(pg);
-#ifndef UBC
-	pmap_page_protect(pg, VM_PROT_NONE);
-#endif
 	uvm_lock_pageq();
 	uvm_pagedeactivate(pg);
 	uvm_unlock_pageq();
diff --git a/sys/uvm/uvm_bio.c b/sys/uvm/uvm_bio.c
new file mode 100644
index 00000000000..fccf51b8ece
--- /dev/null
+++ b/sys/uvm/uvm_bio.c
@@ -0,0 +1,547 @@
+/*	$NetBSD: uvm_bio.c,v 1.7 2001/02/02 01:55:52 enami Exp $	*/
+
+/* 
+ * Copyright (c) 1998 Chuck Silvers.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+/*
+ * uvm_bio.c: buffered i/o vnode mapping cache
+ */
+
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+
+#include <uvm/uvm.h>
+#include <uvm/uvm_page.h>
+
+/*
+ * global data structures
+ */
+
+/*
+ * local functions
+ */
+
+static int	ubc_fault __P((struct uvm_faultinfo *, vaddr_t, 
+			       vm_page_t *, int, int, vm_fault_t, vm_prot_t,
+			       int));
+static struct ubc_map *ubc_find_mapping __P((struct uvm_object *, voff_t));
+
+/*
+ * local data structues
+ */
+
+#define UBC_HASH(uobj, offset) (((((u_long)(uobj)) >> 8) + \
+				 (((u_long)(offset)) >> PAGE_SHIFT)) & \
+				ubc_object.hashmask)
+
+#define UBC_QUEUE(offset) (&ubc_object.inactive[((offset) / ubc_winsize) & \
+					       (UBC_NQUEUES - 1)])
+
+struct ubc_map
+{
+	struct uvm_object *	uobj;		/* mapped object */
+	voff_t			offset;		/* offset into uobj */
+	int			refcount;	/* refcount on mapping */
+	voff_t			writeoff;	/* overwrite offset */
+	vsize_t			writelen;	/* overwrite len */
+
+	LIST_ENTRY(ubc_map)	hash;		/* hash table */
+	TAILQ_ENTRY(ubc_map)	inactive;	/* inactive queue */
+};
+
+static struct ubc_object
+{
+	struct uvm_object uobj;		/* glue for uvm_map() */
+	char *kva;			/* where ubc_object is mapped */
+	struct ubc_map *umap;		/* array of ubc_map's */
+
+	LIST_HEAD(, ubc_map) *hash;	/* hashtable for cached ubc_map's */
+	u_long hashmask;		/* mask for hashtable */
+
+	TAILQ_HEAD(ubc_inactive_head, ubc_map) *inactive;
+					/* inactive queues for ubc_map's */
+
+} ubc_object;
+
+struct uvm_pagerops ubc_pager =
+{
+	NULL,		/* init */
+	NULL,		/* reference */
+	NULL,		/* detach */
+	ubc_fault,	/* fault */
+	/* ... rest are NULL */
+};
+
+int ubc_nwins = UBC_NWINS;
+int ubc_winsize = UBC_WINSIZE;
+#ifdef PMAP_PREFER
+int ubc_nqueues;
+boolean_t ubc_release_unmap = FALSE;
+#define UBC_NQUEUES ubc_nqueues
+#define UBC_RELEASE_UNMAP ubc_release_unmap
+#else
+#define UBC_NQUEUES 1
+#define UBC_RELEASE_UNMAP FALSE
+#endif
+
+/*
+ * ubc_init
+ *
+ * init pager private data structures.
+ */
+
+void
+ubc_init(void)
+{
+	struct ubc_map *umap;
+	vaddr_t va;
+	int i;
+
+	/*
+	 * init ubc_object.
+	 * alloc and init ubc_map's.
+	 * init inactive queues.
+	 * alloc and init hashtable.
+	 * map in ubc_object.
+	 */
+
+	simple_lock_init(&ubc_object.uobj.vmobjlock);
+	ubc_object.uobj.pgops = &ubc_pager;
+	TAILQ_INIT(&ubc_object.uobj.memq);
+	ubc_object.uobj.uo_npages = 0;
+	ubc_object.uobj.uo_refs = UVM_OBJ_KERN;
+
+	ubc_object.umap = malloc(ubc_nwins * sizeof(struct ubc_map),
+				 M_TEMP, M_NOWAIT);
+	if (ubc_object.umap == NULL)
+		panic("ubc_init: failed to allocate ubc_map");
+	bzero(ubc_object.umap, ubc_nwins * sizeof(struct ubc_map));
+
+	va = (vaddr_t)1L;
+#ifdef PMAP_PREFER
+	PMAP_PREFER(0, &va);
+	if (va < ubc_winsize) {
+		va = ubc_winsize;
+	}
+	ubc_nqueues = va / ubc_winsize;
+	if (ubc_nqueues != 1) {
+		ubc_release_unmap = TRUE;
+	}
+#endif
+	ubc_object.inactive = malloc(UBC_NQUEUES *
+				     sizeof(struct ubc_inactive_head),
+				     M_TEMP, M_NOWAIT);
+	if (ubc_object.inactive == NULL)
+		panic("ubc_init: failed to allocate inactive queue heads");
+	for (i = 0; i < UBC_NQUEUES; i++) {
+		TAILQ_INIT(&ubc_object.inactive[i]);
+	}
+	for (i = 0; i < ubc_nwins; i++) {
+		umap = &ubc_object.umap[i];
+		TAILQ_INSERT_TAIL(&ubc_object.inactive[i & (UBC_NQUEUES - 1)],
+				  umap, inactive);
+	}
+
+	ubc_object.hash = hashinit(ubc_nwins, M_TEMP, M_NOWAIT,
+				   &ubc_object.hashmask);
+	for (i = 0; i <= ubc_object.hashmask; i++) {
+		LIST_INIT(&ubc_object.hash[i]);
+	}
+
+	if (uvm_map(kernel_map, (vaddr_t *)&ubc_object.kva,
+		    ubc_nwins * ubc_winsize, &ubc_object.uobj, 0, (vsize_t)va,
+		    UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE,
+				UVM_ADV_RANDOM, UVM_FLAG_NOMERGE))
+	    != KERN_SUCCESS) {
+		panic("ubc_init: failed to map ubc_object\n");
+	}
+	UVMHIST_INIT(ubchist, 300);
+}
+
+
+/*
+ * ubc_fault: fault routine for ubc mapping
+ */
+static int
+ubc_fault(ufi, ign1, ign2, ign3, ign4, fault_type, access_type, flags)
+	struct uvm_faultinfo *ufi;
+	vaddr_t ign1;
+	vm_page_t *ign2;
+	int ign3, ign4;
+	vm_fault_t fault_type;
+	vm_prot_t access_type;
+	int flags;
+{
+	struct uvm_object *uobj;
+	struct vnode *vp;
+	struct ubc_map *umap;
+	vaddr_t va, eva, ubc_offset, slot_offset;
+	int i, error, rv, npages;
+	struct vm_page *pgs[ubc_winsize >> PAGE_SHIFT], *pg;
+	UVMHIST_FUNC("ubc_fault");  UVMHIST_CALLED(ubchist);
+
+	/*
+	 * no need to try with PGO_LOCKED...
+	 * we don't need to have the map locked since we know that
+	 * no one will mess with it until our reference is released.
+	 */
+	if (flags & PGO_LOCKED) {
+#if 0
+		return VM_PAGER_UNLOCK;
+#else
+		uvmfault_unlockall(ufi, NULL, &ubc_object.uobj, NULL);
+		flags &= ~PGO_LOCKED;
+#endif
+	}
+
+	va = ufi->orig_rvaddr;
+	ubc_offset = va - (vaddr_t)ubc_object.kva;
+
+	UVMHIST_LOG(ubchist, "va 0x%lx ubc_offset 0x%lx at %d",
+		    va, ubc_offset, access_type,0);
+
+	umap = &ubc_object.umap[ubc_offset / ubc_winsize];
+	KASSERT(umap->refcount != 0);
+	slot_offset = trunc_page(ubc_offset & (ubc_winsize - 1));
+
+	/* no umap locking needed since we have a ref on the umap */
+	uobj = umap->uobj;
+	vp = (struct vnode *)uobj;
+	KASSERT(uobj != NULL);
+
+	npages = (ubc_winsize - slot_offset) >> PAGE_SHIFT;
+
+	/*
+	 * XXXUBC
+	 * if npages is more than 1 we have to be sure that
+	 * we set PGO_OVERWRITE correctly.
+	 */
+	if (access_type == VM_PROT_WRITE) {
+		npages = 1;
+	}
+
+again:
+	memset(pgs, 0, sizeof (pgs));
+	simple_lock(&uobj->vmobjlock);
+
+	UVMHIST_LOG(ubchist, "slot_offset 0x%x writeoff 0x%x writelen 0x%x "
+		    "u_size 0x%x", slot_offset, umap->writeoff, umap->writelen,
+		    vp->v_uvm.u_size);
+
+	if (access_type & VM_PROT_WRITE &&
+	    slot_offset >= umap->writeoff &&
+	    (slot_offset + PAGE_SIZE <= umap->writeoff + umap->writelen ||
+	     slot_offset + PAGE_SIZE >= vp->v_uvm.u_size - umap->offset)) {
+		UVMHIST_LOG(ubchist, "setting PGO_OVERWRITE", 0,0,0,0);
+		flags |= PGO_OVERWRITE;
+	}
+	else { UVMHIST_LOG(ubchist, "NOT setting PGO_OVERWRITE", 0,0,0,0); }
+	/* XXX be sure to zero any part of the page past EOF */
+
+	/*
+	 * XXX
+	 * ideally we'd like to pre-fault all of the pages we're overwriting.
+	 * so for PGO_OVERWRITE, we should call VOP_GETPAGES() with all of the
+	 * pages in [writeoff, writeoff+writesize] instead of just the one.
+	 */
+
+	UVMHIST_LOG(ubchist, "getpages vp %p offset 0x%x npages %d",
+		    uobj, umap->offset + slot_offset, npages, 0);
+
+	error = VOP_GETPAGES(vp, umap->offset + slot_offset, pgs, &npages, 0,
+	    access_type, 0, flags);
+	UVMHIST_LOG(ubchist, "getpages error %d npages %d", error, npages,0,0);
+
+	if (error == EAGAIN) {
+		tsleep(&lbolt, PVM, "ubc_fault", 0);
+		goto again;
+	}
+	if (error) {
+		return VM_PAGER_ERROR;
+	}
+	if (npages == 0) {
+		return VM_PAGER_OK;
+	}
+
+	va = ufi->orig_rvaddr;
+	eva = ufi->orig_rvaddr + (npages << PAGE_SHIFT);
+
+	UVMHIST_LOG(ubchist, "va 0x%lx eva 0x%lx", va, eva, 0,0);
+	simple_lock(&uobj->vmobjlock);
+	for (i = 0; va < eva; i++, va += PAGE_SIZE) {
+		UVMHIST_LOG(ubchist, "pgs[%d] = %p", i, pgs[i],0,0);
+		pg = pgs[i];
+
+		if (pg == NULL || pg == PGO_DONTCARE) {
+			continue;
+		}
+		if (pg->flags & PG_WANTED) {
+			wakeup(pg);
+		}
+		KASSERT((pg->flags & PG_FAKE) == 0);
+		if (pg->flags & PG_RELEASED) {
+			rv = uobj->pgops->pgo_releasepg(pg, NULL);
+			KASSERT(rv);
+			continue;
+		}
+		KASSERT(access_type == VM_PROT_READ ||
+			(pg->flags & PG_RDONLY) == 0);
+
+		uvm_lock_pageq();
+		uvm_pageactivate(pg);
+		uvm_unlock_pageq();
+
+		pmap_enter(ufi->orig_map->pmap, va, VM_PAGE_TO_PHYS(pg),
+			   VM_PROT_READ | VM_PROT_WRITE, access_type);
+
+		pg->flags &= ~(PG_BUSY);
+		UVM_PAGE_OWN(pg, NULL);
+	}
+	simple_unlock(&uobj->vmobjlock);
+	return VM_PAGER_OK;
+}
+
+/*
+ * local functions
+ */
+
+static struct ubc_map *
+ubc_find_mapping(uobj, offset)
+	struct uvm_object *uobj;
+	voff_t offset;
+{
+	struct ubc_map *umap;
+
+	LIST_FOREACH(umap, &ubc_object.hash[UBC_HASH(uobj, offset)], hash) {
+		if (umap->uobj == uobj && umap->offset == offset) {
+			return umap;
+		}
+	}
+	return NULL;
+}
+
+
+/*
+ * ubc interface functions
+ */
+
+/*
+ * ubc_alloc:  allocate a buffer mapping
+ */
+void *
+ubc_alloc(uobj, offset, lenp, flags)
+	struct uvm_object *uobj;
+	voff_t offset;
+	vsize_t *lenp;
+	int flags;
+{
+	int s;
+	vaddr_t slot_offset, va;
+	struct ubc_map *umap;
+	voff_t umap_offset;
+	UVMHIST_FUNC("ubc_alloc"); UVMHIST_CALLED(ubchist);
+
+	UVMHIST_LOG(ubchist, "uobj %p offset 0x%lx len 0x%lx filesize 0x%x",
+		    uobj, offset, *lenp, ((struct uvm_vnode *)uobj)->u_size);
+
+	umap_offset = (offset & ~((voff_t)ubc_winsize - 1));
+	slot_offset = (vaddr_t)(offset & ((voff_t)ubc_winsize - 1));
+	*lenp = min(*lenp, ubc_winsize - slot_offset);
+
+	/*
+	 * the vnode is always locked here, so we don't need to add a ref.
+	 */
+
+	s = splbio();
+
+again:
+	simple_lock(&ubc_object.uobj.vmobjlock);
+	umap = ubc_find_mapping(uobj, umap_offset);
+	if (umap == NULL) {
+		umap = TAILQ_FIRST(UBC_QUEUE(offset));
+		if (umap == NULL) {
+			simple_unlock(&ubc_object.uobj.vmobjlock);
+			tsleep(&lbolt, PVM, "ubc_alloc", 0);
+			goto again;
+		}
+
+		/*
+		 * remove from old hash (if any),
+		 * add to new hash.
+		 */
+
+		if (umap->uobj != NULL) {
+			LIST_REMOVE(umap, hash);
+		}
+
+		umap->uobj = uobj;
+		umap->offset = umap_offset;
+
+		LIST_INSERT_HEAD(&ubc_object.hash[UBC_HASH(uobj, umap_offset)],
+				 umap, hash);
+
+		va = (vaddr_t)(ubc_object.kva +
+			       (umap - ubc_object.umap) * ubc_winsize);
+		pmap_remove(pmap_kernel(), va, va + ubc_winsize);
+	}
+
+	if (umap->refcount == 0) {
+		TAILQ_REMOVE(UBC_QUEUE(offset), umap, inactive);
+	}
+
+#ifdef DIAGNOSTIC
+	if ((flags & UBC_WRITE) &&
+	    (umap->writeoff || umap->writelen)) {
+		panic("ubc_fault: concurrent writes vp %p", uobj);
+	}
+#endif
+	if (flags & UBC_WRITE) {
+		umap->writeoff = slot_offset;
+		umap->writelen = *lenp;
+	}
+
+	umap->refcount++;
+	simple_unlock(&ubc_object.uobj.vmobjlock);
+	splx(s);
+	UVMHIST_LOG(ubchist, "umap %p refs %d va %p",
+		    umap, umap->refcount,
+		    ubc_object.kva + (umap - ubc_object.umap) * ubc_winsize,0);
+
+	return ubc_object.kva +
+		(umap - ubc_object.umap) * ubc_winsize + slot_offset;
+}
+
+
+void
+ubc_release(va, wlen)
+	void *va;
+	vsize_t wlen;
+{
+	struct ubc_map *umap;
+	struct uvm_object *uobj;
+	int s;
+	UVMHIST_FUNC("ubc_release"); UVMHIST_CALLED(ubchist);
+
+	UVMHIST_LOG(ubchist, "va %p", va,0,0,0);
+
+	s = splbio();
+	simple_lock(&ubc_object.uobj.vmobjlock);
+
+	umap = &ubc_object.umap[((char *)va - ubc_object.kva) / ubc_winsize];
+	uobj = umap->uobj;
+	KASSERT(uobj != NULL);
+
+	umap->writeoff = 0;
+	umap->writelen = 0;
+	umap->refcount--;
+	if (umap->refcount == 0) {
+		if (UBC_RELEASE_UNMAP &&
+		    (((struct vnode *)uobj)->v_flag & VTEXT)) {
+			vaddr_t va;
+
+			/*
+			 * if this file is the executable image of
+			 * some process, that process will likely have
+			 * the file mapped at an alignment other than
+			 * what PMAP_PREFER() would like.  we'd like
+			 * to have process text be able to use the
+			 * cache even if someone is also reading the
+			 * file, so invalidate mappings of such files
+			 * as soon as possible.
+			 */
+
+			va = (vaddr_t)(ubc_object.kva +
+			    (umap - ubc_object.umap) * ubc_winsize);
+			pmap_remove(pmap_kernel(), va, va + ubc_winsize);
+			LIST_REMOVE(umap, hash);
+			umap->uobj = NULL;
+			TAILQ_INSERT_HEAD(UBC_QUEUE(umap->offset), umap,
+			    inactive);
+		} else {
+			TAILQ_INSERT_TAIL(UBC_QUEUE(umap->offset), umap,
+			    inactive);
+		}
+	}
+	UVMHIST_LOG(ubchist, "umap %p refs %d", umap, umap->refcount,0,0);
+	simple_unlock(&ubc_object.uobj.vmobjlock);
+	splx(s);
+}
+
+
+/*
+ * removing a range of mappings from the ubc mapping cache.
+ */
+
+void
+ubc_flush(uobj, start, end)
+	struct uvm_object *uobj;
+	voff_t start, end;
+{
+	struct ubc_map *umap;
+	vaddr_t va;
+	int s;
+	UVMHIST_FUNC("ubc_flush");  UVMHIST_CALLED(ubchist);
+
+	UVMHIST_LOG(ubchist, "uobj %p start 0x%lx end 0x%lx",
+		    uobj, start, end,0);
+
+	s = splbio(); 
+	simple_lock(&ubc_object.uobj.vmobjlock);
+	for (umap = ubc_object.umap;
+	     umap < &ubc_object.umap[ubc_nwins];
+	     umap++) {
+
+		if (umap->uobj != uobj || 
+		    umap->offset < start ||
+		    (umap->offset >= end && end != 0) ||
+		    umap->refcount > 0) {
+			continue;
+		}
+
+		/*
+		 * remove from hash,
+		 * move to head of inactive queue.
+		 */
+
+		va = (vaddr_t)(ubc_object.kva +
+			       (umap - ubc_object.umap) * ubc_winsize);
+		pmap_remove(pmap_kernel(), va, va + ubc_winsize);
+
+		LIST_REMOVE(umap, hash);
+		umap->uobj = NULL;
+		TAILQ_REMOVE(UBC_QUEUE(umap->offset), umap, inactive);
+		TAILQ_INSERT_HEAD(UBC_QUEUE(umap->offset), umap, inactive);
+	}
+	simple_unlock(&ubc_object.uobj.vmobjlock);
+	splx(s);
+}
diff --git a/sys/uvm/uvm_extern.h b/sys/uvm/uvm_extern.h
index 5575021ad6f..bb6b841f0ca 100644
--- a/sys/uvm/uvm_extern.h
+++ b/sys/uvm/uvm_extern.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: uvm_extern.h,v 1.33 2001/11/12 01:26:09 art Exp $	*/
+/*	$OpenBSD: uvm_extern.h,v 1.34 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: uvm_extern.h,v 1.57 2001/03/09 01:02:12 chs Exp $	*/
 
 /*
@@ -223,6 +223,21 @@ typedef int		vm_prot_t;
 #define	UVM_PGA_ZERO		0x0002	/* returned page must be zero'd */
 
 /*
+ * the following defines are for ubc_alloc's flags
+ */
+#define UBC_READ	0
+#define UBC_WRITE	1
+
+/*
+ * flags for uvn_findpages().
+ */
+#define UFP_ALL		0x0
+#define UFP_NOWAIT	0x1
+#define UFP_NOALLOC	0x2
+#define UFP_NOCACHE	0x4
+#define UFP_NORDONLY	0x8
+
+/*
  * lockflags that control the locking behavior of various functions.
  */
 #define	UVM_LK_ENTER	0x00000001	/* map locked on entry */
@@ -464,9 +479,16 @@ void			uao_detach_locked __P((struct uvm_object *));
 void			uao_reference __P((struct uvm_object *));
 void			uao_reference_locked __P((struct uvm_object *));
 
+/* uvm_bio.c */
+void			ubc_init __P((void));
+void *			ubc_alloc __P((struct uvm_object *, voff_t, vsize_t *,
+				       int));
+void			ubc_release __P((void *, vsize_t));
+void			ubc_flush __P((struct uvm_object *, voff_t, voff_t));
+
 /* uvm_fault.c */
-int			uvm_fault __P((vm_map_t, vaddr_t, 
-				vm_fault_t, vm_prot_t));
+int			uvm_fault __P((vm_map_t, vaddr_t, vm_fault_t,
+				       vm_prot_t));
 				/* handle a page fault */
 
 /* uvm_glue.c */
@@ -593,10 +615,11 @@ int			uvm_deallocate __P((vm_map_t, vaddr_t, vsize_t));
 /* uvm_vnode.c */
 void			uvm_vnp_setsize __P((struct vnode *, voff_t));
 void			uvm_vnp_sync __P((struct mount *));
-void 			uvm_vnp_terminate __P((struct vnode *));
-				/* terminate a uvm/uvn object */
-boolean_t		uvm_vnp_uncache __P((struct vnode *));
 struct uvm_object	*uvn_attach __P((void *, vm_prot_t));
+void			uvn_findpages __P((struct uvm_object *, voff_t,
+					   int *, struct vm_page **, int));
+void			uvm_vnp_zerorange __P((struct vnode *, off_t, size_t));
+void			uvm_vnp_asyncget __P((struct vnode *, off_t, size_t));
 
 /* kern_malloc.c */
 void			kmeminit_nkmempages __P((void));
diff --git a/sys/uvm/uvm_fault.c b/sys/uvm/uvm_fault.c
index 662e2509321..0e4103fe49b 100644
--- a/sys/uvm/uvm_fault.c
+++ b/sys/uvm/uvm_fault.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: uvm_fault.c,v 1.24 2001/11/12 01:26:09 art Exp $	*/
+/*	$OpenBSD: uvm_fault.c,v 1.25 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: uvm_fault.c,v 1.56 2001/02/18 21:19:08 chs Exp $	*/
 
 /*
@@ -204,11 +204,7 @@ uvmfault_anonflush(anons, n)
 		if (pg && (pg->flags & PG_BUSY) == 0 && pg->loan_count == 0) {
 			uvm_lock_pageq();
 			if (pg->wire_count == 0) {
-#ifdef UBC
 				pmap_clear_reference(pg);
-#else
-				pmap_page_protect(pg, VM_PROT_NONE);
-#endif
 				uvm_pagedeactivate(pg);
 			}
 			uvm_unlock_pageq();
diff --git a/sys/uvm/uvm_map.h b/sys/uvm/uvm_map.h
index bbc2afb9f19..2c95aff1607 100644
--- a/sys/uvm/uvm_map.h
+++ b/sys/uvm/uvm_map.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: uvm_map.h,v 1.15 2001/11/12 01:26:09 art Exp $	*/
+/*	$OpenBSD: uvm_map.h,v 1.16 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: uvm_map.h,v 1.24 2001/02/18 21:19:08 chs Exp $	*/
 
 /* 
@@ -427,7 +427,7 @@ vm_map_lock(map)
 	simple_lock(&map->flags_lock);
 	while (map->flags & VM_MAP_BUSY) {
 		map->flags |= VM_MAP_WANTLOCK;
-		ltsleep(&map->flags, PVM, (char *)vmmapbsy, 0, &map->flags_lock);
+		ltsleep(&map->flags, PVM, vmmapbsy, 0, &map->flags_lock);
 	}
 
 	error = lockmgr(&map->lock, LK_EXCLUSIVE|LK_SLEEPFAIL|LK_INTERLOCK,
diff --git a/sys/uvm/uvm_mmap.c b/sys/uvm/uvm_mmap.c
index 6bd7260b6a0..3c4c4bdf961 100644
--- a/sys/uvm/uvm_mmap.c
+++ b/sys/uvm/uvm_mmap.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: uvm_mmap.c,v 1.27 2001/11/12 01:26:09 art Exp $	*/
+/*	$OpenBSD: uvm_mmap.c,v 1.28 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: uvm_mmap.c,v 1.49 2001/02/18 21:19:08 chs Exp $	*/
 
 /*
@@ -1126,40 +1126,8 @@ uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff, locklimit)
 			uobj = uvn_attach((void *) vp, (flags & MAP_SHARED) ?
 			   maxprot : (maxprot & ~VM_PROT_WRITE));
 
-#ifndef UBC
-			/*
-			 * XXXCDC: hack from old code
-			 * don't allow vnodes which have been mapped
-			 * shared-writeable to persist [forces them to be
-			 * flushed out when last reference goes].
-			 * XXXCDC: interesting side effect: avoids a bug.
-			 * note that in WRITE [ufs_readwrite.c] that we
-			 * allocate buffer, uncache, and then do the write.
-			 * the problem with this is that if the uncache causes
-			 * VM data to be flushed to the same area of the file
-			 * we are writing to... in that case we've got the
-			 * buffer locked and our process goes to sleep forever.
-			 *
-			 * XXXCDC: checking maxprot protects us from the
-			 * "persistbug" program but this is not a long term
-			 * solution.
-			 * 
-			 * XXXCDC: we don't bother calling uncache with the vp
-			 * VOP_LOCKed since we know that we are already
-			 * holding a valid reference to the uvn (from the
-			 * uvn_attach above), and thus it is impossible for
-			 * the uncache to kill the uvn and trigger I/O.
-			 */
-			if (flags & MAP_SHARED) {
-				if ((prot & VM_PROT_WRITE) ||
-				    (maxprot & VM_PROT_WRITE)) {
-					uvm_vnp_uncache(vp);
-				}
-			}
-#else
 			/* XXX for now, attach doesn't gain a ref */
 			VREF(vp);
-#endif
 		} else {
 			uobj = udv_attach((void *) &vp->v_rdev,
 			    (flags & MAP_SHARED) ? maxprot :
diff --git a/sys/uvm/uvm_page.c b/sys/uvm/uvm_page.c
index 4ea890c8c3b..f7ebbd77f80 100644
--- a/sys/uvm/uvm_page.c
+++ b/sys/uvm/uvm_page.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: uvm_page.c,v 1.31 2001/11/12 01:26:09 art Exp $	*/
+/*	$OpenBSD: uvm_page.c,v 1.32 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: uvm_page.c,v 1.51 2001/03/09 01:02:12 chs Exp $	*/
 
 /* 
@@ -906,17 +906,11 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list)
 	 * the pagedaemon.
 	 */
 
-#ifdef UBC
 	if (uvmexp.free + uvmexp.paging < uvmexp.freemin ||
 	    (uvmexp.free + uvmexp.paging < uvmexp.freetarg &&
 	     uvmexp.inactive < uvmexp.inactarg)) {
 		wakeup(&uvm.pagedaemon);
 	}
-#else
-	if (uvmexp.free < uvmexp.freemin || (uvmexp.free < uvmexp.freetarg &&
-	    uvmexp.inactive < uvmexp.inactarg))
-		wakeup(&uvm.pagedaemon);
-#endif
 
 	/*
 	 * fail if any of these conditions is true:
diff --git a/sys/uvm/uvm_page_i.h b/sys/uvm/uvm_page_i.h
index e0547d8414b..3ea680714c6 100644
--- a/sys/uvm/uvm_page_i.h
+++ b/sys/uvm/uvm_page_i.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: uvm_page_i.h,v 1.10 2001/11/12 01:26:10 art Exp $	*/
+/*	$OpenBSD: uvm_page_i.h,v 1.11 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: uvm_page_i.h,v 1.16 2001/01/28 23:30:45 thorpej Exp $	*/
 
 /* 
@@ -219,9 +219,6 @@ uvm_pagedeactivate(pg)
 			TAILQ_INSERT_TAIL(&uvm.page_inactive_obj, pg, pageq);
 		pg->pqflags |= PQ_INACTIVE;
 		uvmexp.inactive++;
-#ifndef UBC
-		pmap_clear_reference(pg);
-#endif
 		/*
 		 * update the "clean" bit.  this isn't 100%
 		 * accurate, and doesn't have to be.  we'll
diff --git a/sys/uvm/uvm_pager.c b/sys/uvm/uvm_pager.c
index 69400e5f010..2fded9caf08 100644
--- a/sys/uvm/uvm_pager.c
+++ b/sys/uvm/uvm_pager.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: uvm_pager.c,v 1.22 2001/11/12 01:26:10 art Exp $	*/
+/*	$OpenBSD: uvm_pager.c,v 1.23 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: uvm_pager.c,v 1.41 2001/02/18 19:26:50 chs Exp $	*/
 
 /*
@@ -58,17 +58,13 @@ struct pool *uvm_aiobuf_pool;
 
 extern struct uvm_pagerops uvm_deviceops;
 extern struct uvm_pagerops uvm_vnodeops;
-#ifdef UBC
 extern struct uvm_pagerops ubc_pager;
-#endif
 
 struct uvm_pagerops *uvmpagerops[] = {
 	&aobj_pager,
 	&uvm_deviceops,
 	&uvm_vnodeops,
-#ifdef UBC
 	&ubc_pager,
-#endif
 };
 
 /*
@@ -153,7 +149,7 @@ ReStart:
 	kva = 0;			/* let system choose VA */
 
 	if (uvm_map(pager_map, &kva, size, NULL, 
-	      UVM_UNKNOWN_OFFSET, 0, UVM_FLAG_NOMERGE) != KERN_SUCCESS) {
+	    UVM_UNKNOWN_OFFSET, 0, UVM_FLAG_NOMERGE) != KERN_SUCCESS) {
 		if (curproc == uvm.pagedaemon_proc) {
 			simple_lock(&pager_map_wanted_lock);
 			if (emerginuse) {
@@ -733,7 +729,6 @@ uvm_pager_dropcluster(uobj, pg, ppsp, npages, flags)
 	}
 }
 
-#ifdef UBC
 /*
  * interrupt-context iodone handler for nested i/o bufs.
  *
@@ -757,7 +752,6 @@ uvm_aio_biodone1(bp)
 		biodone(mbp);
 	}
 }
-#endif
 
 /*
  * interrupt-context iodone handler for single-buf i/os
@@ -798,12 +792,10 @@ uvm_aio_aiodone(bp)
 
 	error = (bp->b_flags & B_ERROR) ? (bp->b_error ? bp->b_error : EIO) : 0;
 	write = (bp->b_flags & B_READ) == 0;
-#ifdef UBC
 	/* XXXUBC B_NOCACHE is for swap pager, should be done differently */
 	if (write && !(bp->b_flags & B_NOCACHE) && bioops.io_pageiodone) {
 		(*bioops.io_pageiodone)(bp);
 	}
-#endif
 
 	uobj = NULL;
 	for (i = 0; i < npages; i++) {
diff --git a/sys/uvm/uvm_param.h b/sys/uvm/uvm_param.h
index d7cdccc28a4..78b3f1bc5ba 100644
--- a/sys/uvm/uvm_param.h
+++ b/sys/uvm/uvm_param.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: uvm_param.h,v 1.2 2001/11/12 01:26:10 art Exp $	*/
+/*	$OpenBSD: uvm_param.h,v 1.3 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: uvm_param.h,v 1.5 2001/03/09 01:02:12 chs Exp $	*/
 
 /* 
@@ -114,7 +114,7 @@ typedef int	boolean_t;
 #define	VM_ANONMIN	7
 #define	VM_VTEXTMIN	8
 #define	VM_VNODEMIN	9
-#define	VM_MAXID	9		/* number of valid vm ids */
+#define	VM_MAXID	10		/* number of valid vm ids */
 
 #define	CTL_VM_NAMES { \
 	{ 0, 0 }, \
@@ -166,10 +166,8 @@ struct _ps_strings {
 #define	trunc_page(x)	((x) & ~PAGE_MASK)
 
 extern psize_t		mem_size;	/* size of physical memory (bytes) */
-#ifdef UBC
 extern int		ubc_nwins;	/* number of UBC mapping windows */
 extern int		ubc_winsize;	/* size of a UBC mapping window */
-#endif
 
 #else
 /* out-of-kernel versions of round_page and trunc_page */
diff --git a/sys/uvm/uvm_swap.c b/sys/uvm/uvm_swap.c
index 4697d8a23f6..c4298200688 100644
--- a/sys/uvm/uvm_swap.c
+++ b/sys/uvm/uvm_swap.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: uvm_swap.c,v 1.41 2001/11/15 23:15:15 art Exp $	*/
+/*	$OpenBSD: uvm_swap.c,v 1.42 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: uvm_swap.c,v 1.46 2001/02/18 21:19:08 chs Exp $	*/
 
 /*
@@ -1393,32 +1393,6 @@ sw_reg_strategy(sdp, bp, bn)
 		nbp->vb_buf.b_vnbufs.le_next = NOLIST;
 		LIST_INIT(&nbp->vb_buf.b_dep);
 
-		/* 
-		 * set b_dirtyoff/end and b_validoff/end.   this is
-		 * required by the NFS client code (otherwise it will
-		 * just discard our I/O request).
-		 */
-		if (bp->b_dirtyend == 0) {
-			nbp->vb_buf.b_dirtyoff = 0;
-			nbp->vb_buf.b_dirtyend = sz;
-		} else {
-			nbp->vb_buf.b_dirtyoff =
-			    max(0, bp->b_dirtyoff - (bp->b_bcount-resid));
-			nbp->vb_buf.b_dirtyend =
-			    min(sz,
-				max(0, bp->b_dirtyend - (bp->b_bcount-resid)));
-		}
-		if (bp->b_validend == 0) {
-			nbp->vb_buf.b_validoff = 0;
-			nbp->vb_buf.b_validend = sz;
-		} else {
-			nbp->vb_buf.b_validoff =
-			    max(0, bp->b_validoff - (bp->b_bcount-resid));
-			nbp->vb_buf.b_validend =
-			    min(sz,
-				max(0, bp->b_validend - (bp->b_bcount-resid)));
-		}
-
 		nbp->vb_xfer = vnx;	/* patch it back in to vnx */
 
 		/*
@@ -1990,8 +1964,6 @@ uvm_swap_io(pps, startslot, npages, flags)
 	 * and we bump v_numoutput (counter of number of active outputs).
 	 */
 	if (write) {
-		bp->b_dirtyoff = 0;
-		bp->b_dirtyend = npages << PAGE_SHIFT;
 #ifdef UVM_SWAP_ENCRYPT
 		/* mark the pages in the drum for decryption */
 		if (swap_encrypt_initalized)
diff --git a/sys/uvm/uvm_vnode.c b/sys/uvm/uvm_vnode.c
index e921e4fb846..667cbc5b458 100644
--- a/sys/uvm/uvm_vnode.c
+++ b/sys/uvm/uvm_vnode.c
@@ -1,5 +1,5 @@
-/*	$OpenBSD: uvm_vnode.c,v 1.24 2001/11/10 18:42:32 art Exp $	*/
-/*	$NetBSD: uvm_vnode.c,v 1.36 2000/11/24 20:34:01 chs Exp $	*/
+/*	$OpenBSD: uvm_vnode.c,v 1.25 2001/11/27 05:27:12 art Exp $	*/
+/*	$NetBSD: uvm_vnode.c,v 1.47 2001/03/09 01:02:13 chs Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -52,6 +52,7 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/vnode.h>
@@ -59,6 +60,8 @@
 #include <sys/ioctl.h>
 #include <sys/fcntl.h>
 #include <sys/conf.h>
+#include <sys/pool.h>
+#include <sys/mount.h>
 
 #include <miscfs/specfs/specdev.h>
 
@@ -66,55 +69,38 @@
 #include <uvm/uvm_vnode.h>
 
 /*
- * private global data structure
- *
- * we keep a list of writeable active vnode-backed VM objects for sync op.
- * we keep a simpleq of vnodes that are currently being sync'd.
- */
-
-LIST_HEAD(uvn_list_struct, uvm_vnode);
-static struct uvn_list_struct uvn_wlist;	/* writeable uvns */
-static simple_lock_data_t uvn_wl_lock;		/* locks uvn_wlist */
-
-SIMPLEQ_HEAD(uvn_sq_struct, uvm_vnode);
-static struct uvn_sq_struct uvn_sync_q;		/* sync'ing uvns */
-lock_data_t uvn_sync_lock;			/* locks sync operation */
-
-/*
  * functions
  */
 
-static void		   uvn_cluster __P((struct uvm_object *, voff_t,
-					   voff_t *, voff_t *));
-static void                uvn_detach __P((struct uvm_object *));
-static boolean_t           uvn_flush __P((struct uvm_object *, voff_t, 
-					 voff_t, int));
-static int                 uvn_get __P((struct uvm_object *, voff_t,
-					vm_page_t *, int *, int, 
-					vm_prot_t, int, int));
-static void		   uvn_init __P((void));
-static int		   uvn_io __P((struct uvm_vnode *, vm_page_t *,
-				      int, int, int));
-static int		   uvn_put __P((struct uvm_object *, vm_page_t *,
-					int, boolean_t));
-static void                uvn_reference __P((struct uvm_object *));
-static boolean_t	   uvn_releasepg __P((struct vm_page *, 
-					      struct vm_page **));
+static void		uvn_cluster __P((struct uvm_object *, voff_t, voff_t *,
+					 voff_t *));
+static void		uvn_detach __P((struct uvm_object *));
+static int		uvn_findpage __P((struct uvm_object *, voff_t,
+					  struct vm_page **, int));
+boolean_t		uvn_flush __P((struct uvm_object *, voff_t, voff_t,
+				       int));
+static int		uvn_get __P((struct uvm_object *, voff_t, vm_page_t *,
+				     int *, int, vm_prot_t, int, int));
+static int		uvn_put __P((struct uvm_object *, vm_page_t *, int,
+				     boolean_t));
+static void		uvn_reference __P((struct uvm_object *));
+static boolean_t	uvn_releasepg __P((struct vm_page *,
+					   struct vm_page **));
 
 /*
  * master pager structure
  */
 
 struct uvm_pagerops uvm_vnodeops = {
-	uvn_init,
+	NULL,
 	uvn_reference,
 	uvn_detach,
-	NULL,			/* no specialized fault routine required */
+	NULL,
 	uvn_flush,
 	uvn_get,
 	uvn_put,
 	uvn_cluster,
-	uvm_mk_pcluster, /* use generic version of this: see uvm_pager.c */
+	uvm_mk_pcluster,
 	uvn_releasepg,
 };
 
@@ -123,22 +109,6 @@ struct uvm_pagerops uvm_vnodeops = {
  */
 
 /*
- * uvn_init
- *
- * init pager private data structures.
- */
-
-static void
-uvn_init()
-{
-
-	LIST_INIT(&uvn_wlist);
-	simple_lock_init(&uvn_wl_lock);
-	/* note: uvn_sync_q init'd in uvm_vnp_sync() */
-	lockinit(&uvn_sync_lock, PVM, "uvnsync", 0, 0);
-}
-
-/*
  * uvn_attach
  *
  * attach a vnode structure to a VM object.  if the vnode is already
@@ -161,23 +131,20 @@ uvn_attach(arg, accessprot)
 	struct vnode *vp = arg;
 	struct uvm_vnode *uvn = &vp->v_uvm;
 	struct vattr vattr;
-	int oldflags, result;
+	int result;
 	struct partinfo pi;
-	u_quad_t used_vnode_size;
+	voff_t used_vnode_size;
 	UVMHIST_FUNC("uvn_attach"); UVMHIST_CALLED(maphist);
 
 	UVMHIST_LOG(maphist, "(vn=0x%x)", arg,0,0,0);
-
-	used_vnode_size = (u_quad_t)0;	/* XXX gcc -Wuninitialized */
+	used_vnode_size = (voff_t)0;
 
 	/*
 	 * first get a lock on the uvn.
 	 */
 	simple_lock(&uvn->u_obj.vmobjlock);
-	while (uvn->u_flags & UVM_VNODE_BLOCKED) {
-		printf("uvn_attach: blocked at 0x%p flags 0x%x\n",
-		    uvn, uvn->u_flags);
-		uvn->u_flags |= UVM_VNODE_WANTED;
+	while (uvn->u_flags & VXLOCK) {
+		uvn->u_flags |= VXWANT;
 		UVMHIST_LOG(maphist, "  SLEEPING on blocked vn",0,0,0,0);
 		UVM_UNLOCK_AND_WAIT(uvn, &uvn->u_obj.vmobjlock, FALSE,
 		    "uvn_attach", 0);
@@ -189,56 +156,26 @@ uvn_attach(arg, accessprot)
 	 * if we're mapping a BLK device, make sure it is a disk.
 	 */
 	if (vp->v_type == VBLK && bdevsw[major(vp->v_rdev)].d_type != D_DISK) {
-		simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock */
+		simple_unlock(&uvn->u_obj.vmobjlock);
 		UVMHIST_LOG(maphist,"<- done (VBLK not D_DISK!)", 0,0,0,0);
 		return(NULL);
 	}
 
-	/*
-	 * now we have lock and uvn must not be in a blocked state.
-	 * first check to see if it is already active, in which case
-	 * we can bump the reference count, check to see if we need to
-	 * add it to the writeable list, and then return.
-	 */
-	if (uvn->u_flags & UVM_VNODE_VALID) {	/* already active? */
-
-		/* regain VREF if we were persisting */
-		if (uvn->u_obj.uo_refs == 0) {
-			VREF(vp);
-			UVMHIST_LOG(maphist," VREF (reclaim persisting vnode)",
-			    0,0,0,0);
-		}
-		uvn->u_obj.uo_refs++;		/* bump uvn ref! */
-
-		/* check for new writeable uvn */
-		if ((accessprot & VM_PROT_WRITE) != 0 && 
-		    (uvn->u_flags & UVM_VNODE_WRITEABLE) == 0) {
-			simple_lock(&uvn_wl_lock);
-			LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist);
-			simple_unlock(&uvn_wl_lock);
-			/* we are now on wlist! */
-			uvn->u_flags |= UVM_VNODE_WRITEABLE;
-		}
-
-		/* unlock and return */
-		simple_unlock(&uvn->u_obj.vmobjlock);
-		UVMHIST_LOG(maphist,"<- done, refcnt=%d", uvn->u_obj.uo_refs,
-		    0, 0, 0);
-		return (&uvn->u_obj);
-	} 
+#ifdef DIAGNOSTIC
+	if (vp->v_type != VREG) {
+		panic("uvn_attach: vp %p not VREG", vp);
+	}
+#endif
 
 	/*
-	 * need to call VOP_GETATTR() to get the attributes, but that could
-	 * block (due to I/O), so we want to unlock the object before calling.
-	 * however, we want to keep anyone else from playing with the object
-	 * while it is unlocked.   to do this we set UVM_VNODE_ALOCK which
-	 * prevents anyone from attaching to the vnode until we are done with
-	 * it.
+	 * set up our idea of the size
+	 * if this hasn't been done already.
 	 */
-	uvn->u_flags = UVM_VNODE_ALOCK;
+	if (uvn->u_size == VSIZENOTSET) {
+
+	uvn->u_flags |= VXLOCK;
 	simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock in case we sleep */
 		/* XXX: curproc? */
-
 	if (vp->v_type == VBLK) {
 		/*
 		 * We could implement this as a specfs getattr call, but:
@@ -252,8 +189,8 @@ uvn_attach(arg, accessprot)
 		    DIOCGPART, (caddr_t)&pi, FREAD, curproc);
 		if (result == 0) {
 			/* XXX should remember blocksize */
-			used_vnode_size = (u_quad_t)pi.disklab->d_secsize *
-			    (u_quad_t)pi.part->p_size;
+			used_vnode_size = (voff_t)pi.disklab->d_secsize *
+			    (voff_t)pi.part->p_size;
 		}
 	} else {
 		result = VOP_GETATTR(vp, &vattr, curproc->p_ucred, curproc);
@@ -262,58 +199,26 @@ uvn_attach(arg, accessprot)
 	}
 
 	/* relock object */
-	simple_lock(&uvn->u_obj.vmobjlock); 
+	simple_lock(&uvn->u_obj.vmobjlock);
+
+	if (uvn->u_flags & VXWANT)
+		wakeup(uvn);
+	uvn->u_flags &= ~(VXLOCK|VXWANT);
 
 	if (result != 0) {
-		if (uvn->u_flags & UVM_VNODE_WANTED)
-			wakeup(uvn);
-		uvn->u_flags = 0;
 		simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock */
 		UVMHIST_LOG(maphist,"<- done (VOP_GETATTR FAILED!)", 0,0,0,0);
 		return(NULL);
 	}
-
-	/*
-	 * make sure that the newsize fits within a vaddr_t
-	 * XXX: need to revise addressing data types
-	 */
-#ifdef DEBUG
-	if (vp->v_type == VBLK)
-		printf("used_vnode_size = %llu\n", (long long)used_vnode_size);
-#endif
-
-	/*
-	 * now set up the uvn.
-	 */
-	uvn->u_obj.pgops = &uvm_vnodeops;
-	TAILQ_INIT(&uvn->u_obj.memq);
-	uvn->u_obj.uo_npages = 0;
-	uvn->u_obj.uo_refs = 1;			/* just us... */
-	oldflags = uvn->u_flags;
-	uvn->u_flags = UVM_VNODE_VALID|UVM_VNODE_CANPERSIST;
-	uvn->u_nio = 0;
 	uvn->u_size = used_vnode_size;
 
-	/* if write access, we need to add it to the wlist */
-	if (accessprot & VM_PROT_WRITE) {
-		simple_lock(&uvn_wl_lock);
-		LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist);
-		simple_unlock(&uvn_wl_lock);
-		uvn->u_flags |= UVM_VNODE_WRITEABLE;	/* we are on wlist! */
 	}
 
-	/*
-	 * add a reference to the vnode.   this reference will stay as long
-	 * as there is a valid mapping of the vnode.   dropped when the
-	 * reference count goes to zero [and we either free or persist].
-	 */
-	VREF(vp);
+	/* unlock and return */
 	simple_unlock(&uvn->u_obj.vmobjlock);
-	if (oldflags & UVM_VNODE_WANTED)
-		wakeup(uvn);
-
-	UVMHIST_LOG(maphist,"<- done/VREF, ret 0x%x", &uvn->u_obj,0,0,0);
-	return(&uvn->u_obj);
+	UVMHIST_LOG(maphist,"<- done, refcnt=%d", uvn->u_obj.uo_refs,
+	    0, 0, 0);
+	return (&uvn->u_obj);
 }
 
 
@@ -333,23 +238,7 @@ static void
 uvn_reference(uobj)
 	struct uvm_object *uobj;
 {
-#ifdef DEBUG
-	struct uvm_vnode *uvn = (struct uvm_vnode *) uobj;
-#endif
-	UVMHIST_FUNC("uvn_reference"); UVMHIST_CALLED(maphist);
-
-	simple_lock(&uobj->vmobjlock);
-#ifdef DEBUG
-	if ((uvn->u_flags & UVM_VNODE_VALID) == 0) {
-		printf("uvn_reference: ref=%d, flags=0x%x\n", uvn->u_flags,
-		    uobj->uo_refs);
-		panic("uvn_reference: invalid state");
-	}
-#endif
-	uobj->uo_refs++;
-	UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)", 
-	uobj, uobj->uo_refs,0,0);
-	simple_unlock(&uobj->vmobjlock);
+	VREF((struct vnode *)uobj);
 }
 
 /*
@@ -365,291 +254,7 @@ static void
 uvn_detach(uobj)
 	struct uvm_object *uobj;
 {
-	struct uvm_vnode *uvn;
-	struct vnode *vp;
-	int oldflags;
-	UVMHIST_FUNC("uvn_detach"); UVMHIST_CALLED(maphist);
-
-	simple_lock(&uobj->vmobjlock);
-
-	UVMHIST_LOG(maphist,"  (uobj=0x%x)  ref=%d", uobj,uobj->uo_refs,0,0);
-	uobj->uo_refs--;			/* drop ref! */
-	if (uobj->uo_refs) {			/* still more refs */
-		simple_unlock(&uobj->vmobjlock);
-		UVMHIST_LOG(maphist, "<- done (rc>0)", 0,0,0,0);
-		return;
-	}
-
-	/*
-	 * get other pointers ...
-	 */
-
-	uvn = (struct uvm_vnode *) uobj;
-	vp = (struct vnode *) uobj;
-
-	/*
-	 * clear VTEXT flag now that there are no mappings left (VTEXT is used
-	 * to keep an active text file from being overwritten).
-	 */
-	vp->v_flag &= ~VTEXT;
-
-	/*
-	 * we just dropped the last reference to the uvn.   see if we can
-	 * let it "stick around".
-	 */
-
-	if (uvn->u_flags & UVM_VNODE_CANPERSIST) {
-		/* won't block */
-		uvn_flush(uobj, 0, 0, PGO_DEACTIVATE|PGO_ALLPAGES);
-		simple_unlock(&uobj->vmobjlock);
-		vrele(vp);			/* drop vnode reference */
-		UVMHIST_LOG(maphist,"<- done/vrele!  (persist)", 0,0,0,0);
-		return;
-	}
-
-	/*
-	 * its a goner!
-	 */
-
-	UVMHIST_LOG(maphist,"  its a goner (flushing)!", 0,0,0,0);
-
-	uvn->u_flags |= UVM_VNODE_DYING;
-
-	/*
-	 * even though we may unlock in flush, no one can gain a reference
-	 * to us until we clear the "dying" flag [because it blocks
-	 * attaches].  we will not do that until after we've disposed of all
-	 * the pages with uvn_flush().  note that before the flush the only
-	 * pages that could be marked PG_BUSY are ones that are in async
-	 * pageout by the daemon.  (there can't be any pending "get"'s
-	 * because there are no references to the object).
-	 */
-
-	(void) uvn_flush(uobj, 0, 0, PGO_CLEANIT|PGO_FREE|PGO_ALLPAGES);
-
-	UVMHIST_LOG(maphist,"  its a goner (done flush)!", 0,0,0,0);
-
-	/*
-	 * given the structure of this pager, the above flush request will
-	 * create the following state: all the pages that were in the object
-	 * have either been free'd or they are marked PG_BUSY|PG_RELEASED.
-	 * the PG_BUSY bit was set either by us or the daemon for async I/O.
-	 * in either case, if we have pages left we can't kill the object
-	 * yet because i/o is pending.  in this case we set the "relkill"
-	 * flag which will cause pgo_releasepg to kill the object once all
-	 * the I/O's are done [pgo_releasepg will be called from the aiodone
-	 * routine or from the page daemon].
-	 */
-
-	if (uobj->uo_npages) {		/* I/O pending.  iodone will free */
-#ifdef DEBUG
-		/* 
-		 * XXXCDC: very unlikely to happen until we have async i/o
-		 * so print a little info message in case it does.
-		 */
-		printf("uvn_detach: vn %p has pages left after flush - "
-		    "relkill mode\n", uobj);
-#endif
-		uvn->u_flags |= UVM_VNODE_RELKILL;
-		simple_unlock(&uobj->vmobjlock);
-		UVMHIST_LOG(maphist,"<- done! (releasepg will kill obj)", 0, 0,
-		    0, 0);
-		return;
-	}
-
-	/*
-	 * kill object now.   note that we can't be on the sync q because
-	 * all references are gone.
-	 */
-	if (uvn->u_flags & UVM_VNODE_WRITEABLE) {
-		simple_lock(&uvn_wl_lock);		/* protect uvn_wlist */
-		LIST_REMOVE(uvn, u_wlist);
-		simple_unlock(&uvn_wl_lock);
-	}
-#ifdef DIAGNOSTIC
-	if (uobj->memq.tqh_first != NULL)
-		panic("uvn_deref: vnode VM object still has pages afer "
-		    "syncio/free flush");
-#endif
-	oldflags = uvn->u_flags;
-	uvn->u_flags = 0;
-	simple_unlock(&uobj->vmobjlock);
-	
-	/* wake up any sleepers */
-	if (oldflags & UVM_VNODE_WANTED)
-		wakeup(uvn);
-
-	/*
-	 * drop our reference to the vnode.
-	 */
-	vrele(vp);
-	UVMHIST_LOG(maphist,"<- done (vrele) final", 0,0,0,0);
-
-	return;
-}
-
-/*
- * uvm_vnp_terminate: external hook to clear out a vnode's VM
- *
- * called in two cases:
- *  [1] when a persisting vnode vm object (i.e. one with a zero reference
- *      count) needs to be freed so that a vnode can be reused.  this
- *      happens under "getnewvnode" in vfs_subr.c.   if the vnode from
- *      the free list is still attached (i.e. not VBAD) then vgone is
- *	called.   as part of the vgone trace this should get called to
- *	free the vm object.   this is the common case.
- *  [2] when a filesystem is being unmounted by force (MNT_FORCE, 
- *	"umount -f") the vgone() function is called on active vnodes
- *	on the mounted file systems to kill their data (the vnodes become
- *	"dead" ones [see src/sys/miscfs/deadfs/...]).  that results in a
- *	call here (even if the uvn is still in use -- i.e. has a non-zero
- *	reference count).  this case happens at "umount -f" and during a
- *	"reboot/halt" operation.
- *
- * => the caller must XLOCK and VOP_LOCK the vnode before calling us
- *	[protects us from getting a vnode that is already in the DYING
- *	 state...]
- * => unlike uvn_detach, this function must not return until all the
- *	uvn's pages are disposed of.
- * => in case [2] the uvn is still alive after this call, but all I/O
- *	ops will fail (due to the backing vnode now being "dead").  this
- *	will prob. kill any process using the uvn due to pgo_get failing.
- */
-
-void
-uvm_vnp_terminate(vp)
-	struct vnode *vp;
-{
-	struct uvm_vnode *uvn = &vp->v_uvm;
-	int oldflags;
-	UVMHIST_FUNC("uvm_vnp_terminate"); UVMHIST_CALLED(maphist);
-
-	/*
-	 * lock object and check if it is valid
-	 */
-	simple_lock(&uvn->u_obj.vmobjlock);
-	UVMHIST_LOG(maphist, "  vp=0x%x, ref=%d, flag=0x%x", vp,
-	    uvn->u_obj.uo_refs, uvn->u_flags, 0);
-	if ((uvn->u_flags & UVM_VNODE_VALID) == 0) {
-		simple_unlock(&uvn->u_obj.vmobjlock);
-		UVMHIST_LOG(maphist, "<- done (not active)", 0, 0, 0, 0);
-		return;
-	}
-
-	/*
-	 * must be a valid uvn that is not already dying (because XLOCK
-	 * protects us from that).   the uvn can't in the ALOCK state
-	 * because it is valid, and uvn's that are in the ALOCK state haven't
-	 * been marked valid yet.
-	 */
-
-#ifdef DEBUG
-	/*
-	 * debug check: are we yanking the vnode out from under our uvn?
-	 */
-	if (uvn->u_obj.uo_refs) {
-		printf("uvm_vnp_terminate(%p): terminating active vnode "
-		    "(refs=%d)\n", uvn, uvn->u_obj.uo_refs);
-	} 
-#endif
-	
-	/*
-	 * it is possible that the uvn was detached and is in the relkill
-	 * state [i.e. waiting for async i/o to finish so that releasepg can
-	 * kill object].  we take over the vnode now and cancel the relkill.
-	 * we want to know when the i/o is done so we can recycle right
-	 * away.   note that a uvn can only be in the RELKILL state if it
-	 * has a zero reference count.
-	 */
-	
-	if (uvn->u_flags & UVM_VNODE_RELKILL)
-		uvn->u_flags &= ~UVM_VNODE_RELKILL;	/* cancel RELKILL */
-
-	/*
-	 * block the uvn by setting the dying flag, and then flush the
-	 * pages.  (note that flush may unlock object while doing I/O, but
-	 * it will re-lock it before it returns control here).
-	 *
-	 * also, note that we tell I/O that we are already VOP_LOCK'd so
-	 * that uvn_io doesn't attempt to VOP_LOCK again.
-	 *
-	 * XXXCDC: setting VNISLOCKED on an active uvn which is being terminated
-	 *	due to a forceful unmount might not be a good idea.  maybe we
-	 *	need a way to pass in this info to uvn_flush through a
-	 *	pager-defined PGO_ constant [currently there are none].
-	 */
-	uvn->u_flags |= UVM_VNODE_DYING|UVM_VNODE_VNISLOCKED;
-
-	(void) uvn_flush(&uvn->u_obj, 0, 0, PGO_CLEANIT|PGO_FREE|PGO_ALLPAGES);
-
-	/*
-	 * as we just did a flush we expect all the pages to be gone or in 
-	 * the process of going.  sleep to wait for the rest to go [via iosync].
-	 */
-
-	while (uvn->u_obj.uo_npages) {
-#ifdef DEBUG
-		struct vm_page *pp;
-		for (pp = uvn->u_obj.memq.tqh_first ; pp != NULL ; 
-		     pp = pp->listq.tqe_next) {
-			if ((pp->flags & PG_BUSY) == 0)
-				panic("uvm_vnp_terminate: detected unbusy pg");
-		}
-		if (uvn->u_nio == 0)
-			panic("uvm_vnp_terminate: no I/O to wait for?");
-		printf("uvm_vnp_terminate: waiting for I/O to fin.\n");
-		/* 
-		 * XXXCDC: this is unlikely to happen without async i/o so we 
-		 * put a printf in just to keep an eye on it.
-		 */
-#endif
-		uvn->u_flags |= UVM_VNODE_IOSYNC;
-		UVM_UNLOCK_AND_WAIT(&uvn->u_nio, &uvn->u_obj.vmobjlock, FALSE, 
-		    "uvn_term",0);
-		simple_lock(&uvn->u_obj.vmobjlock);
-	}
-
-	/*
-	 * done.   now we free the uvn if its reference count is zero
-	 * (true if we are zapping a persisting uvn).   however, if we are
-	 * terminating a uvn with active mappings we let it live ... future
-	 * calls down to the vnode layer will fail.
-	 */
-
-	oldflags = uvn->u_flags;
-	if (uvn->u_obj.uo_refs) {
-
-		/*
-		 * uvn must live on it is dead-vnode state until all references 
-		 * are gone.   restore flags.    clear CANPERSIST state.
-		 */
-
-		uvn->u_flags &= ~(UVM_VNODE_DYING|UVM_VNODE_VNISLOCKED|
-		      UVM_VNODE_WANTED|UVM_VNODE_CANPERSIST);
-	
-	} else {
-
-		/*
-		 * free the uvn now.   note that the VREF reference is already
-		 * gone [it is dropped when we enter the persist state].
-		 */
-		if (uvn->u_flags & UVM_VNODE_IOSYNCWANTED)
-			panic("uvm_vnp_terminate: io sync wanted bit set");
-
-		if (uvn->u_flags & UVM_VNODE_WRITEABLE) {
-			simple_lock(&uvn_wl_lock);
-			LIST_REMOVE(uvn, u_wlist);
-			simple_unlock(&uvn_wl_lock);
-		}
-		uvn->u_flags = 0;	/* uvn is history, clear all bits */
-	}
-
-	if (oldflags & UVM_VNODE_WANTED)
-		wakeup(uvn);		/* object lock still held */
-
-	simple_unlock(&uvn->u_obj.vmobjlock);
-	UVMHIST_LOG(maphist, "<- done", 0, 0, 0, 0);
-
+	vrele((struct vnode *)uobj);
 }
 
 /*
@@ -662,7 +267,7 @@ uvm_vnp_terminate(vp)
  * => returns TRUE if page's object is still alive, FALSE if we
  *	killed the page's object.    if we return TRUE, then we
  *	return with the object locked.
- * => if (nextpgp != NULL) => we return pageq.tqe_next here, and return
+ * => if (nextpgp != NULL) => we return the next page on the queue, and return
  *				with the page queues locked [for pagedaemon]
  * => if (nextpgp == NULL) => we return with page queues unlocked [normal case]
  * => we kill the uvn if it is not referenced and we are suppose to
@@ -674,11 +279,7 @@ uvn_releasepg(pg, nextpgp)
 	struct vm_page *pg;
 	struct vm_page **nextpgp;	/* OUT */
 {
-	struct uvm_vnode *uvn = (struct uvm_vnode *) pg->uobject;
-#ifdef DIAGNOSTIC
-	if ((pg->flags & PG_RELEASED) == 0)
-		panic("uvn_releasepg: page not released!");
-#endif
+	KASSERT(pg->flags & PG_RELEASED);
 	
 	/*
 	 * dispose of the page [caller handles PG_WANTED]
@@ -686,64 +287,25 @@ uvn_releasepg(pg, nextpgp)
 	pmap_page_protect(pg, VM_PROT_NONE);
 	uvm_lock_pageq();
 	if (nextpgp)
-		*nextpgp = pg->pageq.tqe_next;	/* next page for daemon */
+		*nextpgp = TAILQ_NEXT(pg, pageq);
 	uvm_pagefree(pg);
 	if (!nextpgp)
 		uvm_unlock_pageq();
 
-	/*
-	 * now see if we need to kill the object
-	 */
-	if (uvn->u_flags & UVM_VNODE_RELKILL) {
-		if (uvn->u_obj.uo_refs)
-			panic("uvn_releasepg: kill flag set on referenced "
-			    "object!");
-		if (uvn->u_obj.uo_npages == 0) {
-			if (uvn->u_flags & UVM_VNODE_WRITEABLE) {
-				simple_lock(&uvn_wl_lock);
-				LIST_REMOVE(uvn, u_wlist);
-				simple_unlock(&uvn_wl_lock);
-			}
-#ifdef DIAGNOSTIC
-			if (uvn->u_obj.memq.tqh_first)
-	panic("uvn_releasepg: pages in object with npages == 0");
-#endif
-			if (uvn->u_flags & UVM_VNODE_WANTED)
-				/* still holding object lock */
-				wakeup(uvn);
-
-			uvn->u_flags = 0;		/* DEAD! */
-			simple_unlock(&uvn->u_obj.vmobjlock);
-			return (FALSE);
-		}
-	}
 	return (TRUE);
 }
 
 /*
- * NOTE: currently we have to use VOP_READ/VOP_WRITE because they go
- * through the buffer cache and allow I/O in any size.  These VOPs use
- * synchronous i/o.  [vs. VOP_STRATEGY which can be async, but doesn't
- * go through the buffer cache or allow I/O sizes larger than a
- * block].  we will eventually want to change this.
- *
  * issues to consider:
- *   uvm provides the uvm_aiodesc structure for async i/o management.
  * there are two tailq's in the uvm. structure... one for pending async
  * i/o and one for "done" async i/o.   to do an async i/o one puts
- * an aiodesc on the "pending" list (protected by splbio()), starts the
+ * a buf on the "pending" list (protected by splbio()), starts the
  * i/o and returns VM_PAGER_PEND.    when the i/o is done, we expect
  * some sort of "i/o done" function to be called (at splbio(), interrupt
- * time).   this function should remove the aiodesc from the pending list
+ * time).   this function should remove the buf from the pending list
  * and place it on the "done" list and wakeup the daemon.   the daemon
  * will run at normal spl() and will remove all items from the "done"
- * list and call the "aiodone" hook for each done request (see uvm_pager.c).
- * [in the old vm code, this was done by calling the "put" routine with
- * null arguments which made the code harder to read and understand because
- * you had one function ("put") doing two things.]  
- *
- * so the current pager needs: 
- *   int uvn_aiodone(struct uvm_aiodesc *)
+ * list and call the iodone hook for each done request (see uvm_pager.c).
  *
  * => return KERN_SUCCESS (aio finished, free it).  otherwise requeue for
  *	later collection.
@@ -764,15 +326,17 @@ uvn_releasepg(pg, nextpgp)
 /*
  * uvn_flush: flush pages out of a uvm object.
  *
+ * => "stop == 0" means flush all pages at or after "start".
  * => object should be locked by caller.   we may _unlock_ the object
- *	if (and only if) we need to clean a page (PGO_CLEANIT).
+ *	if (and only if) we need to clean a page (PGO_CLEANIT), or
+ *	if PGO_SYNCIO is set and there are pages busy.
  *	we return with the object locked.
- * => if PGO_CLEANIT is set, we may block (due to I/O).   thus, a caller
- *	might want to unlock higher level resources (e.g. vm_map)
- *	before calling flush.
- * => if PGO_CLEANIT is not set, then we will neither unlock the object
- *	or block.
- * => if PGO_ALLPAGE is set, then all pages in the object are valid targets
+ * => if PGO_CLEANIT or PGO_SYNCIO is set, we may block (due to I/O).
+ *	thus, a caller might want to unlock higher level resources
+ *	(e.g. vm_map) before calling flush.
+ * => if neither PGO_CLEANIT nor PGO_SYNCIO is set, then we will neither
+ *	unlock the object nor block.
+ * => if PGO_ALLPAGES is set, then all pages in the object are valid targets
  *	for flushing.
  * => NOTE: we rely on the fact that the object's memq is a TAILQ and
  *	that new pages are inserted on the tail end of the list.   thus,
@@ -814,39 +378,62 @@ uvn_releasepg(pg, nextpgp)
 
 #define UVN_HASH_PENALTY 4	/* XXX: a guess */
 
-static boolean_t
+boolean_t
 uvn_flush(uobj, start, stop, flags)
 	struct uvm_object *uobj;
 	voff_t start, stop;
 	int flags;
 {
-	struct uvm_vnode *uvn = (struct uvm_vnode *) uobj;
+	struct uvm_vnode *uvn = (struct uvm_vnode *)uobj;
+	struct vnode *vp = (struct vnode *)uobj;
 	struct vm_page *pp, *ppnext, *ptmp;
-	struct vm_page *pps[MAXBSIZE >> PAGE_SHIFT], **ppsp;
+	struct vm_page *pps[256], **ppsp;
+	int s;
 	int npages, result, lcv;
-	boolean_t retval, need_iosync, by_list, needs_clean, all;
+	boolean_t retval, need_iosync, by_list, needs_clean, all, wasclean;
 	voff_t curoff;
 	u_short pp_version;
 	UVMHIST_FUNC("uvn_flush"); UVMHIST_CALLED(maphist);
+	UVMHIST_LOG(maphist, "uobj %p start 0x%x stop 0x%x flags 0x%x",
+		    uobj, start, stop, flags);
+	KASSERT(flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE));
+
+	if (uobj->uo_npages == 0) {
+		s = splbio();
+		if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
+		    (vp->v_bioflag & VBIOONSYNCLIST)) {
+			vp->v_bioflag &= ~VBIOONSYNCLIST;
+			LIST_REMOVE(vp, v_synclist);
+		}
+		splx(s);
+		return TRUE;
+	}
+
+#ifdef DIAGNOSTIC
+	if (uvn->u_size == VSIZENOTSET) {
+		printf("uvn_flush: size not set vp %p\n", uvn);
+		vprint("uvn_flush VSIZENOTSET", vp);
+		flags |= PGO_ALLPAGES;
+	}
+#endif
 
-	curoff = 0;	/* XXX: shut up gcc */
 	/*
 	 * get init vals and determine how we are going to traverse object
 	 */
 
+	if (stop == 0) {
+		stop = trunc_page(LLONG_MAX);
+	}
+	curoff = 0;
 	need_iosync = FALSE;
-	retval = TRUE;		/* return value */
+	retval = TRUE;
+	wasclean = TRUE;
 	if (flags & PGO_ALLPAGES) {
 		all = TRUE;
-		by_list = TRUE;		/* always go by the list */
+		by_list = TRUE;
 	} else {
 		start = trunc_page(start);
 		stop = round_page(stop);
-#ifdef DEBUG
-		if (stop > round_page(uvn->u_size))
-			printf("uvn_flush: strange, got an out of range "
-			    "flush (fixed)\n");
-#endif
 		all = FALSE;
 		by_list = (uobj->uo_npages <= 
 		    ((stop - start) >> PAGE_SHIFT) * UVN_HASH_PENALTY);
@@ -870,8 +457,7 @@ uvn_flush(uobj, start, stop, flags)
 	if ((flags & PGO_CLEANIT) != 0 &&
 	    uobj->pgops->pgo_mk_pcluster != NULL) {
 		if (by_list) {
-			for (pp = uobj->memq.tqh_first ; pp != NULL ;
-			    pp = pp->listq.tqe_next) {
+			TAILQ_FOREACH(pp, &uobj->memq, listq) {
 				if (!all &&
 				    (pp->offset < start || pp->offset >= stop))
 					continue;
@@ -895,45 +481,32 @@ uvn_flush(uobj, start, stop, flags)
 	 */
 
 	if (by_list) {
-		pp = uobj->memq.tqh_first;
+		pp = TAILQ_FIRST(&uobj->memq);
 	} else {
 		curoff = start;
 		pp = uvm_pagelookup(uobj, curoff);
 	}
 
-	ppnext = NULL;	/* XXX: shut up gcc */ 
-	ppsp = NULL;		/* XXX: shut up gcc */
-	uvm_lock_pageq();	/* page queues locked */
+	ppnext = NULL;
+	ppsp = NULL;
+	uvm_lock_pageq();
 
 	/* locked: both page queues and uobj */
 	for ( ; (by_list && pp != NULL) || 
-	  (!by_list && curoff < stop) ; pp = ppnext) {
-
+		      (!by_list && curoff < stop) ; pp = ppnext) {
 		if (by_list) {
-
-			/*
-			 * range check
-			 */
-
 			if (!all &&
 			    (pp->offset < start || pp->offset >= stop)) {
-				ppnext = pp->listq.tqe_next;
+				ppnext = TAILQ_NEXT(pp, listq);
 				continue;
 			}
-
 		} else {
-
-			/*
-			 * null check
-			 */
-
 			curoff += PAGE_SIZE;
 			if (pp == NULL) {
 				if (curoff < stop)
 					ppnext = uvm_pagelookup(uobj, curoff);
 				continue;
 			}
-
 		}
 
 		/*
@@ -949,24 +522,23 @@ uvn_flush(uobj, start, stop, flags)
 
 		if ((flags & PGO_CLEANIT) == 0 || (pp->flags & PG_BUSY) != 0) {
 			needs_clean = FALSE;
-			if ((pp->flags & PG_BUSY) != 0 &&
-			    (flags & (PGO_CLEANIT|PGO_SYNCIO)) ==
-			             (PGO_CLEANIT|PGO_SYNCIO))
+			if (flags & PGO_SYNCIO)
 				need_iosync = TRUE;
 		} else {
+
 			/*
 			 * freeing: nuke all mappings so we can sync
 			 * PG_CLEAN bit with no race
 			 */
 			if ((pp->flags & PG_CLEAN) != 0 && 
 			    (flags & PGO_FREE) != 0 &&
-			    (pp->pqflags & PQ_ACTIVE) != 0)
+			    /* XXX ACTIVE|INACTIVE test unnecessary? */
+			    (pp->pqflags & (PQ_ACTIVE|PQ_INACTIVE)) != 0)
 				pmap_page_protect(pp, VM_PROT_NONE);
 			if ((pp->flags & PG_CLEAN) != 0 &&
 			    pmap_is_modified(pp))
 				pp->flags &= ~(PG_CLEAN);
-			pp->flags |= PG_CLEANCHK;	/* update "hint" */
-
+			pp->flags |= PG_CLEANCHK;
 			needs_clean = ((pp->flags & PG_CLEAN) == 0);
 		}
 
@@ -974,29 +546,26 @@ uvn_flush(uobj, start, stop, flags)
 		 * if we don't need a clean... load ppnext and dispose of pp
 		 */
 		if (!needs_clean) {
-			/* load ppnext */
 			if (by_list)
-				ppnext = pp->listq.tqe_next;
+				ppnext = TAILQ_NEXT(pp, listq);
 			else {
 				if (curoff < stop)
 					ppnext = uvm_pagelookup(uobj, curoff);
 			}
 
-			/* now dispose of pp */
 			if (flags & PGO_DEACTIVATE) {
 				if ((pp->pqflags & PQ_INACTIVE) == 0 &&
+				    (pp->flags & PG_BUSY) == 0 &&
 				    pp->wire_count == 0) {
-					pmap_page_protect(pp, VM_PROT_NONE);
+					pmap_clear_reference(pp);
 					uvm_pagedeactivate(pp);
 				}
 
 			} else if (flags & PGO_FREE) {
 				if (pp->flags & PG_BUSY) {
-					/* release busy pages */
 					pp->flags |= PG_RELEASED;
 				} else {
 					pmap_page_protect(pp, VM_PROT_NONE);
-					/* removed page from object */
 					uvm_pagefree(pp);
 				}
 			}
@@ -1013,6 +582,7 @@ uvn_flush(uobj, start, stop, flags)
 		 * note: locked: uobj and page queues.
 		 */
 
+		wasclean = FALSE;
 		pp->flags |= PG_BUSY;	/* we 'own' page now */
 		UVM_PAGE_OWN(pp, "uvn_flush");
 		pmap_page_protect(pp, VM_PROT_READ);
@@ -1023,7 +593,7 @@ ReTry:
 
 		/* locked: page queues, uobj */
 		result = uvm_pager_put(uobj, pp, &ppsp, &npages, 
-			   flags | PGO_DOACTCLUST, start, stop);
+				       flags | PGO_DOACTCLUST, start, stop);
 		/* unlocked: page queues, uobj */
 
 		/*
@@ -1046,7 +616,8 @@ ReTry:
 		 */
 
 		if (result == VM_PAGER_AGAIN) {
-			/* 
+
+			/*
 			 * it is unlikely, but page could have been released
 			 * while we had the object lock dropped.   we ignore
 			 * this now and retry the I/O.  we will detect and
@@ -1073,27 +644,22 @@ ReTry:
 		 * we can move on to the next page.
 		 */
 
-		if (result == VM_PAGER_PEND) {
+		if (result == VM_PAGER_PEND &&
+		    (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
 
-			if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
-				/*
-				 * no per-page ops: refresh ppnext and continue
-				 */
-				if (by_list) {
-					if (pp->version == pp_version)
-						ppnext = pp->listq.tqe_next;
-					else
-						/* reset */
-						ppnext = uobj->memq.tqh_first;
-				} else {
-					if (curoff < stop)
-						ppnext = uvm_pagelookup(uobj,
-						    curoff);
-				}
-				continue;
+			/*
+			 * no per-page ops: refresh ppnext and continue
+			 */
+			if (by_list) {
+				if (pp->version == pp_version)
+					ppnext = TAILQ_NEXT(pp, listq);
+				else
+					ppnext = TAILQ_FIRST(&uobj->memq);
+			} else {
+				if (curoff < stop)
+					ppnext = uvm_pagelookup(uobj, curoff);
 			}
-
-			/* need to do anything here? */
+			continue;
 		}
 
 		/*
@@ -1120,18 +686,19 @@ ReTry:
 				/* set up next page for outer loop */
 				if (by_list) {
 					if (pp->version == pp_version)
-						ppnext = pp->listq.tqe_next;
+						ppnext = TAILQ_NEXT(pp, listq);
 					else
-						/* reset */
-						ppnext = uobj->memq.tqh_first;
+						ppnext = TAILQ_FIRST(
+						    &uobj->memq);
 				} else {
 					if (curoff < stop)
-					ppnext = uvm_pagelookup(uobj, curoff);
+						ppnext = uvm_pagelookup(uobj,
+						    curoff);
 				}
 			}
 
 			/*
-			 * verify the page didn't get moved while obj was
+			 * verify the page wasn't moved while obj was
 			 * unlocked
 			 */
 			if (result == VM_PAGER_PEND && ptmp->uobject != uobj)
@@ -1145,26 +712,32 @@ ReTry:
 			 */
 
 			if (result != VM_PAGER_PEND) {
-				if (ptmp->flags & PG_WANTED)
+				if (ptmp->flags & PG_WANTED) {
 					/* still holding object lock */
 					wakeup(ptmp);
-
+				}
 				ptmp->flags &= ~(PG_WANTED|PG_BUSY);
 				UVM_PAGE_OWN(ptmp, NULL);
 				if (ptmp->flags & PG_RELEASED) {
-
-					/* pgo_releasepg wants this */
 					uvm_unlock_pageq();
-					if (!uvn_releasepg(ptmp, NULL))
+					if (!uvn_releasepg(ptmp, NULL)) {
+						UVMHIST_LOG(maphist,
+							    "released %p",
+							    ptmp, 0,0,0);
 						return (TRUE);
-
-					uvm_lock_pageq();	/* relock */
-					continue;		/* next page */
-
+					}
+					uvm_lock_pageq();
+					continue;
 				} else {
-					ptmp->flags |= (PG_CLEAN|PG_CLEANCHK);
-					if ((flags & PGO_FREE) == 0)
-						pmap_clear_modify(ptmp);
+					if ((flags & PGO_WEAK) == 0 &&
+					    !(result == VM_PAGER_ERROR &&
+					      curproc == uvm.pagedaemon_proc)) {
+						ptmp->flags |=
+							(PG_CLEAN|PG_CLEANCHK);
+						if ((flags & PGO_FREE) == 0) {
+							pmap_clear_modify(ptmp);
+						}
+					}
 				}
 			}
 	  
@@ -1174,11 +747,11 @@ ReTry:
 
 			if (flags & PGO_DEACTIVATE) {
 				if ((pp->pqflags & PQ_INACTIVE) == 0 &&
+				    (pp->flags & PG_BUSY) == 0 &&
 				    pp->wire_count == 0) {
-					pmap_page_protect(ptmp, VM_PROT_NONE);
+					pmap_clear_reference(ptmp);
 					uvm_pagedeactivate(ptmp);
 				}
-
 			} else if (flags & PGO_FREE) {
 				if (result == VM_PAGER_PEND) {
 					if ((ptmp->flags & PG_BUSY) != 0)
@@ -1187,10 +760,10 @@ ReTry:
 				} else {
 					if (result != VM_PAGER_OK) {
 						printf("uvn_flush: obj=%p, "
-						   "offset=0x%llx.  error "
-						   "during pageout.\n",
+						   "offset=0x%llx.  error %d\n",
 						    pp->uobject,
-						    (long long)pp->offset);
+						    (long long)pp->offset,
+						    result);
 						printf("uvn_flush: WARNING: "
 						    "changes to page may be "
 						    "lost!\n");
@@ -1200,31 +773,38 @@ ReTry:
 					uvm_pagefree(ptmp);
 				}
 			}
-
 		}		/* end of "lcv" for loop */
-
 	}		/* end of "pp" for loop */
 
-	/*
-	 * done with pagequeues: unlock
-	 */
 	uvm_unlock_pageq();
-
-	/*
-	 * now wait for all I/O if required.
-	 */
+	s = splbio();
+	if ((flags & PGO_CLEANIT) && all && wasclean &&
+	    LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
+	    (vp->v_bioflag & VBIOONSYNCLIST)) {
+		vp->v_bioflag &= ~VBIOONSYNCLIST;
+		LIST_REMOVE(vp, v_synclist);
+	}
+	splx(s);
 	if (need_iosync) {
-
 		UVMHIST_LOG(maphist,"  <<DOING IOSYNC>>",0,0,0,0);
-		while (uvn->u_nio != 0) {
-			uvn->u_flags |= UVM_VNODE_IOSYNC;
-			UVM_UNLOCK_AND_WAIT(&uvn->u_nio, &uvn->u_obj.vmobjlock, 
-			  FALSE, "uvn_flush",0);
+
+		/*
+		 * XXX this doesn't use the new two-flag scheme,
+		 * but to use that, all i/o initiators will have to change.
+		 */
+
+		s = splbio();
+		while (vp->v_numoutput != 0) {
+			UVMHIST_LOG(ubchist, "waiting for vp %p num %d",
+				    vp, vp->v_numoutput,0,0);
+
+	                vp->v_bioflag |= VBIOWAIT;
+			UVM_UNLOCK_AND_WAIT(&vp->v_numoutput,
+					    &uvn->u_obj.vmobjlock, 
+					    FALSE, "uvn_flush",0);
 			simple_lock(&uvn->u_obj.vmobjlock);
 		}
-		if (uvn->u_flags & UVM_VNODE_IOSYNCWANTED)
-			wakeup(&uvn->u_flags);
-		uvn->u_flags &= ~(UVM_VNODE_IOSYNC|UVM_VNODE_IOSYNCWANTED);
+		splx(s);
 	}
 
 	/* return, with object locked! */
@@ -1248,31 +828,18 @@ uvn_cluster(uobj, offset, loffset, hoffset)
 	voff_t offset;
 	voff_t *loffset, *hoffset; /* OUT */
 {
-	struct uvm_vnode *uvn = (struct uvm_vnode *) uobj;
-	*loffset = offset;
-
-	if (*loffset >= uvn->u_size)
-		panic("uvn_cluster: offset out of range");
+	struct uvm_vnode *uvn = (struct uvm_vnode *)uobj;
 
-	/*
-	 * XXX: old pager claims we could use VOP_BMAP to get maxcontig value.
-	 */
-	*hoffset = *loffset + MAXBSIZE;
-	if (*hoffset > round_page(uvn->u_size))	/* past end? */
-		*hoffset = round_page(uvn->u_size);
-
-	return;
+	*loffset = offset;
+	*hoffset = MIN(offset + MAXBSIZE, round_page(uvn->u_size));
 }
 
 /*
  * uvn_put: flush page data to backing store.
  *
- * => prefer map unlocked (not required)
  * => object must be locked!   we will _unlock_ it before starting I/O.
  * => flags: PGO_SYNCIO -- use sync. I/O
  * => note: caller must set PG_CLEAN and pmap_clear_modify (if needed)
- * => XXX: currently we use VOP_READ/VOP_WRITE which are only sync.
- *	[thus we never do async i/o!  see iodone comment]
  */
 
 static int
@@ -1281,13 +848,11 @@ uvn_put(uobj, pps, npages, flags)
 	struct vm_page **pps;
 	int npages, flags;
 {
-	int retval;
-
-	/* note: object locked */
-	retval = uvn_io((struct uvm_vnode*)uobj, pps, npages, flags, UIO_WRITE);
-	/* note: object unlocked */
+	struct vnode *vp = (struct vnode *)uobj;
+	int error;
 
-	return(retval);
+	error = VOP_PUTPAGES(vp, pps, npages, flags, NULL);
+	return uvm_errno2vmerror(error);
 }
 
 
@@ -1308,551 +873,121 @@ uvn_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
 	voff_t offset;
 	struct vm_page **pps;		/* IN/OUT */
 	int *npagesp;			/* IN (OUT if PGO_LOCKED) */
-	int centeridx, advice, flags;
+	int centeridx;
 	vm_prot_t access_type;
+	int advice, flags;
 {
-	voff_t current_offset;
-	struct vm_page *ptmp;
-	int lcv, result, gotpages;
-	boolean_t done;
-	UVMHIST_FUNC("uvn_get"); UVMHIST_CALLED(maphist);
-	UVMHIST_LOG(maphist, "flags=%d", flags,0,0,0);
-
-	/*
-	 * step 1: handled the case where fault data structures are locked.
-	 */
-
-	if (flags & PGO_LOCKED) {
-
-		/*
-		 * gotpages is the current number of pages we've gotten (which
-		 * we pass back up to caller via *npagesp.
-		 */
-
-		gotpages = 0;
-
-		/*
-		 * step 1a: get pages that are already resident.   only do this
-		 * if the data structures are locked (i.e. the first time
-		 * through).
-		 */
-
-		done = TRUE;	/* be optimistic */
-
-		for (lcv = 0, current_offset = offset ; lcv < *npagesp ;
-		    lcv++, current_offset += PAGE_SIZE) {
-
-			/* do we care about this page?  if not, skip it */
-			if (pps[lcv] == PGO_DONTCARE)
-				continue;
-
-			/* lookup page */
-			ptmp = uvm_pagelookup(uobj, current_offset);
-
-			/* to be useful must get a non-busy, non-released pg */
-			if (ptmp == NULL ||
-			    (ptmp->flags & (PG_BUSY|PG_RELEASED)) != 0) {
-				if (lcv == centeridx || (flags & PGO_ALLPAGES)
-				    != 0)
-				done = FALSE;	/* need to do a wait or I/O! */
-				continue;
-			}
-
-			/*
-			 * useful page: busy/lock it and plug it in our
-			 * result array
-			 */
-			ptmp->flags |= PG_BUSY;		/* loan up to caller */
-			UVM_PAGE_OWN(ptmp, "uvn_get1");
-			pps[lcv] = ptmp;
-			gotpages++;
-
-		}	/* "for" lcv loop */
-
-		/*
-		 * XXX: given the "advice", should we consider async read-ahead?
-		 * XXX: fault current does deactive of pages behind us.  is 
-		 * this good (other callers might now).
-		 */
-		/* 
-		 * XXX: read-ahead currently handled by buffer cache (bread)
-		 * level.
-		 * XXX: no async i/o available.
-		 * XXX: so we don't do anything now.
-		 */
-
-		/*
-		 * step 1c: now we've either done everything needed or we to
-		 * unlock and do some waiting or I/O.
-		 */
-
-		*npagesp = gotpages;		/* let caller know */
-		if (done)
-			return(VM_PAGER_OK);		/* bingo! */
-		else
-			/* EEK!   Need to unlock and I/O */
-			return(VM_PAGER_UNLOCK);
-	}
-
-	/*
-	 * step 2: get non-resident or busy pages.
-	 * object is locked.   data structures are unlocked.
-	 *
-	 * XXX: because we can't do async I/O at this level we get things
-	 * page at a time (otherwise we'd chunk).   the VOP_READ() will do 
-	 * async-read-ahead for us at a lower level.
-	 */
-
-	for (lcv = 0, current_offset = offset ; 
-			 lcv < *npagesp ; lcv++, current_offset += PAGE_SIZE) {
-		
-		/* skip over pages we've already gotten or don't want */
-		/* skip over pages we don't _have_ to get */
-		if (pps[lcv] != NULL || (lcv != centeridx &&
-		    (flags & PGO_ALLPAGES) == 0))
-			continue;
-
-		/*
-		 * we have yet to locate the current page (pps[lcv]).   we first
-		 * look for a page that is already at the current offset.   if
-		 * we fine a page, we check to see if it is busy or released.
-		 * if that is the case, then we sleep on the page until it is
-		 * no longer busy or released and repeat the lookup.    if the
-		 * page we found is neither busy nor released, then we busy it
-		 * (so we own it) and plug it into pps[lcv].   this breaks the
-		 * following while loop and indicates we are ready to move on
-		 * to the next page in the "lcv" loop above.
-		 *
-		 * if we exit the while loop with pps[lcv] still set to NULL,
-		 * then it means that we allocated a new busy/fake/clean page
-		 * ptmp in the object and we need to do I/O to fill in the data.
-		 */
-
-		while (pps[lcv] == NULL) {	/* top of "pps" while loop */
-			
-			/* look for a current page */
-			ptmp = uvm_pagelookup(uobj, current_offset);
-
-			/* nope?   allocate one now (if we can) */
-			if (ptmp == NULL) {
-
-				ptmp = uvm_pagealloc(uobj, current_offset,
-				    NULL, 0);
-
-				/* out of RAM? */
-				if (ptmp == NULL) {
-					simple_unlock(&uobj->vmobjlock);
-					uvm_wait("uvn_getpage");
-					simple_lock(&uobj->vmobjlock);
-
-					/* goto top of pps while loop */
-					continue;	
-				}
-
-				/* 
-				 * got new page ready for I/O.  break pps
-				 * while loop.  pps[lcv] is still NULL.
-				 */
-				break;		
-			}
-
-			/* page is there, see if we need to wait on it */
-			if ((ptmp->flags & (PG_BUSY|PG_RELEASED)) != 0) {
-				ptmp->flags |= PG_WANTED;
-				UVM_UNLOCK_AND_WAIT(ptmp,
-				    &uobj->vmobjlock, FALSE, "uvn_get",0);
-				simple_lock(&uobj->vmobjlock);
-				continue;	/* goto top of pps while loop */
-			}
-			
-			/* 
-			 * if we get here then the page has become resident
-			 * and unbusy between steps 1 and 2.  we busy it
-			 * now (so we own it) and set pps[lcv] (so that we
-			 * exit the while loop).
-			 */
-			ptmp->flags |= PG_BUSY;
-			UVM_PAGE_OWN(ptmp, "uvn_get2");
-			pps[lcv] = ptmp;
-		}
-
-		/*
-		 * if we own the a valid page at the correct offset, pps[lcv]
-		 * will point to it.   nothing more to do except go to the
-		 * next page.
-		 */
-
-		if (pps[lcv])
-			continue;			/* next lcv */
-
-		/*
-		 * we have a "fake/busy/clean" page that we just allocated.  do
-		 * I/O to fill it with valid data.  note that object must be
-		 * locked going into uvn_io, but will be unlocked afterwards.
-		 */
-
-		result = uvn_io((struct uvm_vnode *) uobj, &ptmp, 1,
-		    PGO_SYNCIO, UIO_READ);
-
-		/*
-		 * I/O done.   object is unlocked (by uvn_io).   because we used
-		 * syncio the result can not be PEND or AGAIN.   we must relock
-		 * and check for errors.
-		 */
-
-		/* lock object.   check for errors.   */
-		simple_lock(&uobj->vmobjlock);
-		if (result != VM_PAGER_OK) {
-			if (ptmp->flags & PG_WANTED)
-				/* object lock still held */
-				wakeup(ptmp);
-
-			ptmp->flags &= ~(PG_WANTED|PG_BUSY);
-			UVM_PAGE_OWN(ptmp, NULL);
-			uvm_lock_pageq();
-			uvm_pagefree(ptmp);
-			uvm_unlock_pageq();
-			simple_unlock(&uobj->vmobjlock);
-			return(result);
-		}
-
-		/* 
-		 * we got the page!   clear the fake flag (indicates valid
-		 * data now in page) and plug into our result array.   note
-		 * that page is still busy.   
-		 *
-		 * it is the callers job to:
-		 * => check if the page is released
-		 * => unbusy the page
-		 * => activate the page
-		 */
-
-		ptmp->flags &= ~PG_FAKE;		/* data is valid ... */
-		pmap_clear_modify(ptmp);		/* ... and clean */
-		pps[lcv] = ptmp;
-
-	}	/* lcv loop */
-
-	/*
-	 * finally, unlock object and return.
-	 */
-
-	simple_unlock(&uobj->vmobjlock);
-	return (VM_PAGER_OK);
+	struct vnode *vp = (struct vnode *)uobj;
+	int error;
+	UVMHIST_FUNC("uvn_get"); UVMHIST_CALLED(ubchist);
+
+	UVMHIST_LOG(ubchist, "vp %p off 0x%x", vp, (int)offset, 0,0);
+	error = VOP_GETPAGES(vp, offset, pps, npagesp, centeridx,
+			     access_type, advice, flags);
+	return uvm_errno2vmerror(error);
 }
 
+
 /*
- * uvn_io: do I/O to a vnode
- *
- * => prefer map unlocked (not required)
- * => object must be locked!   we will _unlock_ it before starting I/O.
- * => flags: PGO_SYNCIO -- use sync. I/O
- * => XXX: currently we use VOP_READ/VOP_WRITE which are only sync.
- *	[thus we never do async i/o!  see iodone comment]
+ * uvn_findpages:
+ * return the page for the uobj and offset requested, allocating if needed.
+ * => uobj must be locked.
+ * => returned page will be BUSY.
  */
 
-static int
-uvn_io(uvn, pps, npages, flags, rw)
-	struct uvm_vnode *uvn;
-	vm_page_t *pps;
-	int npages, flags, rw;
+void
+uvn_findpages(uobj, offset, npagesp, pps, flags)
+	struct uvm_object *uobj;
+	voff_t offset;
+	int *npagesp;
+	struct vm_page **pps;
+	int flags;
 {
-	struct vnode *vn;
-	struct uio uio;
-	struct iovec iov;
-	vaddr_t kva;
-	off_t file_offset;
-	int waitf, result, mapinflags;
-	size_t got, wanted;
-	UVMHIST_FUNC("uvn_io"); UVMHIST_CALLED(maphist);
-
-	UVMHIST_LOG(maphist, "rw=%d", rw,0,0,0);
-	
-	/*
-	 * init values
-	 */
-
-	waitf = (flags & PGO_SYNCIO) ? M_WAITOK : M_NOWAIT;
-	vn = (struct vnode *) uvn;
-	file_offset = pps[0]->offset;
-	
-	/*
-	 * check for sync'ing I/O.
-	 */
-	
-	while (uvn->u_flags & UVM_VNODE_IOSYNC) {
-		if (waitf == M_NOWAIT) { 
-			simple_unlock(&uvn->u_obj.vmobjlock);
-			UVMHIST_LOG(maphist,"<- try again (iosync)",0,0,0,0);
-			return(VM_PAGER_AGAIN);
-		}
-		uvn->u_flags |= UVM_VNODE_IOSYNCWANTED;
-		UVM_UNLOCK_AND_WAIT(&uvn->u_flags, &uvn->u_obj.vmobjlock, 
-			FALSE, "uvn_iosync",0);
-		simple_lock(&uvn->u_obj.vmobjlock);
-	}
-
-	/*
-	 * check size
-	 */
-
-	if (file_offset >= uvn->u_size) {
-			simple_unlock(&uvn->u_obj.vmobjlock);
-			UVMHIST_LOG(maphist,"<- BAD (size check)",0,0,0,0);
-			return(VM_PAGER_BAD);
-	}
-
-	/*
-	 * first try and map the pages in (without waiting)
-	 */
-
-	mapinflags = (rw == UIO_READ) ?
-	    UVMPAGER_MAPIN_READ : UVMPAGER_MAPIN_WRITE;
-
-	kva = uvm_pagermapin(pps, npages, mapinflags);
-	if (kva == 0 && waitf == M_NOWAIT) {
-		simple_unlock(&uvn->u_obj.vmobjlock);
-		UVMHIST_LOG(maphist,"<- mapin failed (try again)",0,0,0,0);
-		return(VM_PAGER_AGAIN);
-	}
-
-	/*
-	 * ok, now bump u_nio up.   at this point we are done with uvn
-	 * and can unlock it.   if we still don't have a kva, try again
-	 * (this time with sleep ok).
-	 */
-	
-	uvn->u_nio++;			/* we have an I/O in progress! */
-	simple_unlock(&uvn->u_obj.vmobjlock);
-	/* NOTE: object now unlocked */
-	if (kva == 0)
-		kva = uvm_pagermapin(pps, npages,
-		    mapinflags | UVMPAGER_MAPIN_WAITOK);
-
-	/*
-	 * ok, mapped in.  our pages are PG_BUSY so they are not going to
-	 * get touched (so we can look at "offset" without having to lock
-	 * the object).  set up for I/O.
-	 */
-
-	/*
-	 * fill out uio/iov
-	 */
-	
-	iov.iov_base = (caddr_t) kva;
-	wanted = npages << PAGE_SHIFT;
-	if (file_offset + wanted > uvn->u_size)
-		wanted = uvn->u_size - file_offset;	/* XXX: needed? */
-	iov.iov_len = wanted;
-	uio.uio_iov = &iov;
-	uio.uio_iovcnt = 1;
-	uio.uio_offset = file_offset;
-	uio.uio_segflg = UIO_SYSSPACE;
-	uio.uio_rw = rw;
-	uio.uio_resid = wanted;
-	uio.uio_procp = curproc;
-
-	/*
-	 * do the I/O!  (XXX: curproc?)
-	 */
-
-	UVMHIST_LOG(maphist, "calling VOP",0,0,0,0);
-
-	/*
-	 * This process may already have this vnode locked, if we faulted in
-	 * copyin() or copyout() on a region backed by this vnode
-	 * while doing I/O to the vnode.  If this is the case, don't
-	 * panic.. instead, return the error to the user.
-	 *
-	 * XXX this is a stopgap to prevent a panic.
-	 * Ideally, this kind of operation *should* work.
-	 */
-	result = 0;
-	if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
-		result = vn_lock(vn, LK_EXCLUSIVE | LK_RETRY | LK_RECURSEFAIL, curproc);
-
-	if (result == 0) {
-		/* NOTE: vnode now locked! */
-
-		if (rw == UIO_READ)
-			result = VOP_READ(vn, &uio, 0, curproc->p_ucred);
-		else
-			result = VOP_WRITE(vn, &uio, 0, curproc->p_ucred);
+	int i, rv, npages;
 
-		if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
-			VOP_UNLOCK(vn, 0, curproc);
+	rv = 0;
+	npages = *npagesp;
+	for (i = 0; i < npages; i++, offset += PAGE_SIZE) {
+		rv += uvn_findpage(uobj, offset, &pps[i], flags);
 	}
-	
-	/* NOTE: vnode now unlocked (unless vnislocked) */
-
-	UVMHIST_LOG(maphist, "done calling VOP",0,0,0,0);
-
-	/*
-	 * result == unix style errno (0 == OK!)
-	 *
-	 * zero out rest of buffer (if needed)
-	 */
-
-	if (result == 0) {
-		got = wanted - uio.uio_resid;
-
-		if (wanted && got == 0) {
-			result = EIO;		/* XXX: error? */
-		} else if (got < PAGE_SIZE * npages && rw == UIO_READ) {
-			memset((void *) (kva + got), 0,
-			       (npages << PAGE_SHIFT) - got);
-		}
-	}
-
-	/*
-	 * now remove pager mapping
-	 */
-	uvm_pagermapout(kva, npages);
-		
-	/*
-	 * now clean up the object (i.e. drop I/O count)
-	 */
-
-	simple_lock(&uvn->u_obj.vmobjlock);
-	/* NOTE: object now locked! */
-
-	uvn->u_nio--;			/* I/O DONE! */
-	if ((uvn->u_flags & UVM_VNODE_IOSYNC) != 0 && uvn->u_nio == 0) {
-		wakeup(&uvn->u_nio);
-	}
-	simple_unlock(&uvn->u_obj.vmobjlock);
-	/* NOTE: object now unlocked! */
-
-	/*
-	 * done!
-	 */
-
-	UVMHIST_LOG(maphist, "<- done (result %d)", result,0,0,0);
-	if (result == 0)
-		return(VM_PAGER_OK);
-	else
-		return(VM_PAGER_ERROR);
+	*npagesp = rv;
 }
 
-/*
- * uvm_vnp_uncache: disable "persisting" in a vnode... when last reference
- * is gone we will kill the object (flushing dirty pages back to the vnode
- * if needed).
- *
- * => returns TRUE if there was no uvm_object attached or if there was
- *	one and we killed it [i.e. if there is no active uvn]
- * => called with the vnode VOP_LOCK'd [we will unlock it for I/O, if
- *	needed]
- *
- * => XXX: given that we now kill uvn's when a vnode is recycled (without
- *	having to hold a reference on the vnode) and given a working
- *	uvm_vnp_sync(), how does that effect the need for this function?
- *      [XXXCDC: seems like it can die?]
- *
- * => XXX: this function should DIE once we merge the VM and buffer 
- *	cache.
- *
- * research shows that this is called in the following places:
- * ext2fs_truncate, ffs_truncate, detrunc[msdosfs]: called when vnode
- *	changes sizes
- * ext2fs_write, WRITE [ufs_readwrite], msdosfs_write: called when we
- *	are written to
- * ex2fs_chmod, ufs_chmod: called if VTEXT vnode and the sticky bit
- *	is off
- * ffs_realloccg: when we can't extend the current block and have 
- *	to allocate a new one we call this [XXX: why?]
- * nfsrv_rename, rename_files: called when the target filename is there
- *	and we want to remove it
- * nfsrv_remove, sys_unlink: called on file we are removing
- * nfsrv_access: if VTEXT and we want WRITE access and we don't uncache
- *	then return "text busy"
- * nfs_open: seems to uncache any file opened with nfs
- * vn_writechk: if VTEXT vnode and can't uncache return "text busy"
- */
-
-boolean_t
-uvm_vnp_uncache(vp)
-	struct vnode *vp;
+static int
+uvn_findpage(uobj, offset, pgp, flags)
+	struct uvm_object *uobj;
+	voff_t offset;
+	struct vm_page **pgp;
+	int flags;
 {
-	struct uvm_vnode *uvn = &vp->v_uvm;
-
-	/*
-	 * lock uvn part of the vnode and check to see if we need to do anything
-	 */
+	struct vm_page *pg;
+	UVMHIST_FUNC("uvn_findpage"); UVMHIST_CALLED(ubchist);
+	UVMHIST_LOG(ubchist, "vp %p off 0x%lx", uobj, offset,0,0);
 
-	simple_lock(&uvn->u_obj.vmobjlock);
-	if ((uvn->u_flags & UVM_VNODE_VALID) == 0 || 
-			(uvn->u_flags & UVM_VNODE_BLOCKED) != 0) {
-		simple_unlock(&uvn->u_obj.vmobjlock);
-		return(TRUE);
+	if (*pgp != NULL) {
+		UVMHIST_LOG(ubchist, "dontcare", 0,0,0,0);
+		return 0;
 	}
+	for (;;) {
+		/* look for an existing page */
+		pg = uvm_pagelookup(uobj, offset);
+
+		/* nope?   allocate one now */
+		if (pg == NULL) {
+			if (flags & UFP_NOALLOC) {
+				UVMHIST_LOG(ubchist, "noalloc", 0,0,0,0);
+				return 0;
+			}
+			pg = uvm_pagealloc(uobj, offset, NULL, 0);
+			if (pg == NULL) {
+				if (flags & UFP_NOWAIT) {
+					UVMHIST_LOG(ubchist, "nowait",0,0,0,0);
+					return 0;
+				}
+				simple_unlock(&uobj->vmobjlock);
+				uvm_wait("uvn_fp1");
+				simple_lock(&uobj->vmobjlock);
+				continue;
+			}
+			if (UVM_OBJ_IS_VTEXT(uobj)) {
+				uvmexp.vtextpages++;
+			} else {
+				uvmexp.vnodepages++;
+			}
+			UVMHIST_LOG(ubchist, "alloced",0,0,0,0);
+			break;
+		} else if (flags & UFP_NOCACHE) {
+			UVMHIST_LOG(ubchist, "nocache",0,0,0,0);
+			return 0;
+		}
 
-	/*
-	 * we have a valid, non-blocked uvn.   clear persist flag.
-	 * if uvn is currently active we can return now.
-	 */
-
-	uvn->u_flags &= ~UVM_VNODE_CANPERSIST;
-	if (uvn->u_obj.uo_refs) {
-		simple_unlock(&uvn->u_obj.vmobjlock);
-		return(FALSE);
-	}
-
-	/*
-	 * uvn is currently persisting!   we have to gain a reference to
-	 * it so that we can call uvn_detach to kill the uvn.
-	 */
-
-	VREF(vp);			/* seems ok, even with VOP_LOCK */
-	uvn->u_obj.uo_refs++;		/* value is now 1 */
-	simple_unlock(&uvn->u_obj.vmobjlock);
-
-
-#ifdef DEBUG
-	/*
-	 * carry over sanity check from old vnode pager: the vnode should
-	 * be VOP_LOCK'd, and we confirm it here.
-	 */
-	if (!VOP_ISLOCKED(vp)) {
-		boolean_t is_ok_anyway = FALSE;
-#if defined(NFSCLIENT)
-		extern int (**nfsv2_vnodeop_p) __P((void *));
-		extern int (**spec_nfsv2nodeop_p) __P((void *));
-		extern int (**fifo_nfsv2nodeop_p) __P((void *));
-
-		/* vnode is NOT VOP_LOCKed: some vnode types _never_ lock */
-		if (vp->v_op == nfsv2_vnodeop_p ||
-		    vp->v_op == spec_nfsv2nodeop_p) {
-			is_ok_anyway = TRUE;
+		/* page is there, see if we need to wait on it */
+		if ((pg->flags & (PG_BUSY|PG_RELEASED)) != 0) {
+			if (flags & UFP_NOWAIT) {
+				UVMHIST_LOG(ubchist, "nowait",0,0,0,0);
+				return 0;
+			}
+			pg->flags |= PG_WANTED;
+			UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
+					    "uvn_fp2", 0);
+			simple_lock(&uobj->vmobjlock);
+			continue;
 		}
-		if (vp->v_op == fifo_nfsv2nodeop_p) {
-			is_ok_anyway = TRUE;
+			
+		/* skip PG_RDONLY pages if requested */
+		if ((flags & UFP_NORDONLY) && (pg->flags & PG_RDONLY)) {
+			UVMHIST_LOG(ubchist, "nordonly",0,0,0,0);
+			return 0;
 		}
-#endif	/* defined(NFSSERVER) || defined(NFSCLIENT) */
-		if (!is_ok_anyway)
-			panic("uvm_vnp_uncache: vnode not locked!");
-	}
-#endif	/* DEBUG */
 
-	/*
-	 * now drop our reference to the vnode.   if we have the sole 
-	 * reference to the vnode then this will cause it to die [as we
-	 * just cleared the persist flag].   we have to unlock the vnode 
-	 * while we are doing this as it may trigger I/O.
-	 *
-	 * XXX: it might be possible for uvn to get reclaimed while we are
-	 * unlocked causing us to return TRUE when we should not.   we ignore
-	 * this as a false-positive return value doesn't hurt us.
-	 */
-	VOP_UNLOCK(vp, 0, curproc);
-	uvn_detach(&uvn->u_obj);
-	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curproc);
-	
-	/*
-	 * and return...
-	 */
-	
-	return(TRUE);
+		/* mark the page BUSY and we're done. */
+		pg->flags |= PG_BUSY;
+		UVM_PAGE_OWN(pg, "uvn_findpage");
+		UVMHIST_LOG(ubchist, "found",0,0,0,0);
+		break;
+	}
+	*pgp = pg;
+	return 1;
 }
 
 /*
@@ -1879,150 +1014,49 @@ uvm_vnp_setsize(vp, newsize)
 	voff_t newsize;
 {
 	struct uvm_vnode *uvn = &vp->v_uvm;
+	voff_t pgend = round_page(newsize);
+	UVMHIST_FUNC("uvm_vnp_setsize"); UVMHIST_CALLED(ubchist);
 
-	/*
-	 * lock uvn and check for valid object, and if valid: do it!
-	 */
 	simple_lock(&uvn->u_obj.vmobjlock);
-	if (uvn->u_flags & UVM_VNODE_VALID) {
-
-		/*
-		 * now check if the size has changed: if we shrink we had better
-		 * toss some pages...
-		 */
 
-		if (uvn->u_size > newsize) {
-			(void)uvn_flush(&uvn->u_obj, newsize,
-			    uvn->u_size, PGO_FREE);
-		}
-		uvn->u_size = newsize;
-	}
-	simple_unlock(&uvn->u_obj.vmobjlock);
+	UVMHIST_LOG(ubchist, "old 0x%x new 0x%x", uvn->u_size, newsize, 0,0);
 
 	/*
-	 * done
+	 * now check if the size has changed: if we shrink we had better
+	 * toss some pages...
 	 */
-	return;
+
+	if (uvn->u_size > pgend && uvn->u_size != VSIZENOTSET) {
+		(void) uvn_flush(&uvn->u_obj, pgend, 0, PGO_FREE);
+	}
+	uvn->u_size = newsize;
+	simple_unlock(&uvn->u_obj.vmobjlock);
 }
 
 /*
- * uvm_vnp_sync: flush all dirty VM pages back to their backing vnodes.
- *
- * => called from sys_sync with no VM structures locked
- * => only one process can do a sync at a time (because the uvn
- *    structure only has one queue for sync'ing).  we ensure this
- *    by holding the uvn_sync_lock while the sync is in progress.
- *    other processes attempting a sync will sleep on this lock
- *    until we are done.
+ * uvm_vnp_zerorange:  set a range of bytes in a file to zero.
  */
 
 void
-uvm_vnp_sync(mp)
-	struct mount *mp;
-{
-	struct uvm_vnode *uvn;
+uvm_vnp_zerorange(vp, off, len)
 	struct vnode *vp;
-	boolean_t got_lock;
-
-	/*
-	 * step 1: ensure we are only ones using the uvn_sync_q by locking
-	 * our lock...
-	 */
-	lockmgr(&uvn_sync_lock, LK_EXCLUSIVE, NULL, curproc);
-
-	/*
-	 * step 2: build up a simpleq of uvns of interest based on the 
-	 * write list.   we gain a reference to uvns of interest.  must 
-	 * be careful about locking uvn's since we will be holding uvn_wl_lock
-	 * in the body of the loop.
-	 */
-	SIMPLEQ_INIT(&uvn_sync_q);
-	simple_lock(&uvn_wl_lock);
-	for (uvn = uvn_wlist.lh_first ; uvn != NULL ;
-	    uvn = uvn->u_wlist.le_next) {
-
-		vp = (struct vnode *) uvn;
-		if (mp && vp->v_mount != mp)
-			continue;
-
-		/* attempt to gain reference */
-		while ((got_lock = simple_lock_try(&uvn->u_obj.vmobjlock)) ==
-		    						FALSE && 
-				(uvn->u_flags & UVM_VNODE_BLOCKED) == 0)
-			/* spin */ ;
-
-		/*
-		 * we will exit the loop if either if the following are true:
-		 *  - we got the lock [always true if NCPU == 1]
-		 *  - we failed to get the lock but noticed the vnode was
-		 * 	"blocked" -- in this case the vnode must be a dying
-		 *	vnode, and since dying vnodes are in the process of
-		 *	being flushed out, we can safely skip this one
-		 *
-		 * we want to skip over the vnode if we did not get the lock,
-		 * or if the vnode is already dying (due to the above logic).
-		 *
-		 * note that uvn must already be valid because we found it on
-		 * the wlist (this also means it can't be ALOCK'd).
-		 */
-		if (!got_lock || (uvn->u_flags & UVM_VNODE_BLOCKED) != 0) {
-			if (got_lock)
-				simple_unlock(&uvn->u_obj.vmobjlock);
-			continue;		/* skip it */
-		}
-		
-		/*
-		 * gain reference.   watch out for persisting uvns (need to
-		 * regain vnode REF).
-		 */
-		if (uvn->u_obj.uo_refs == 0)
-			VREF(vp);
-		uvn->u_obj.uo_refs++;
-		simple_unlock(&uvn->u_obj.vmobjlock);
-
-		/*
-		 * got it!
-		 */
-		SIMPLEQ_INSERT_HEAD(&uvn_sync_q, uvn, u_syncq);
-	}
-	simple_unlock(&uvn_wl_lock);
+	off_t off;
+	size_t len;
+{
+        void *win;
 
-	/*
-	 * step 3: we now have a list of uvn's that may need cleaning.
-	 * we are holding the uvn_sync_lock, but have dropped the uvn_wl_lock
-	 * (so we can now safely lock uvn's again).
-	 */
+        /*
+         * XXXUBC invent kzero() and use it
+         */
 
-	for (uvn = uvn_sync_q.sqh_first ; uvn ; uvn = uvn->u_syncq.sqe_next) {
-		simple_lock(&uvn->u_obj.vmobjlock);
-#ifdef DEBUG
-		if (uvn->u_flags & UVM_VNODE_DYING) {
-			printf("uvm_vnp_sync: dying vnode on sync list\n");
-		}
-#endif
-		uvn_flush(&uvn->u_obj, 0, 0,
-		    PGO_CLEANIT|PGO_ALLPAGES|PGO_DOACTCLUST);
+        while (len) {
+                vsize_t bytelen = len;
 
-		/*
-		 * if we have the only reference and we just cleaned the uvn,
-		 * then we can pull it out of the UVM_VNODE_WRITEABLE state
-		 * thus allowing us to avoid thinking about flushing it again
-		 * on later sync ops.
-		 */
-		if (uvn->u_obj.uo_refs == 1 &&
-		    (uvn->u_flags & UVM_VNODE_WRITEABLE)) {
-			LIST_REMOVE(uvn, u_wlist);
-			uvn->u_flags &= ~UVM_VNODE_WRITEABLE;
-		}
-
-		simple_unlock(&uvn->u_obj.vmobjlock);
+                win = ubc_alloc(&vp->v_uvm.u_obj, off, &bytelen, UBC_WRITE);
+                memset(win, 0, bytelen);
+                ubc_release(win, 0);
 
-		/* now drop our reference to the uvn */
-		uvn_detach(&uvn->u_obj);
-	}
-
-	/*
-	 * done!  release sync lock
-	 */
-	lockmgr(&uvn_sync_lock, LK_RELEASE, (void *)0, curproc);
+                off += bytelen;
+                len -= bytelen;
+        }
 }
diff --git a/sys/uvm/uvm_vnode.h b/sys/uvm/uvm_vnode.h
index 29efe4d2ac4..ce853189207 100644
--- a/sys/uvm/uvm_vnode.h
+++ b/sys/uvm/uvm_vnode.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: uvm_vnode.h,v 1.8 2001/08/06 14:03:05 art Exp $	*/
+/*	$OpenBSD: uvm_vnode.h,v 1.9 2001/11/27 05:27:12 art Exp $	*/
 /*	$NetBSD: uvm_vnode.h,v 1.9 2000/03/26 20:54:48 kleink Exp $	*/
 
 /*
@@ -55,56 +55,6 @@ struct uvm_vnode {
 	int u_flags;			/* flags */
 	int u_nio;			/* number of running I/O requests */
 	voff_t u_size;			/* size of object */
-
-	/* the following entry is locked by uvn_wl_lock */
-	LIST_ENTRY(uvm_vnode) u_wlist;	/* list of writeable vnode objects */
-
-	/* the following entry is locked by uvn_sync_lock */
-	SIMPLEQ_ENTRY(uvm_vnode) u_syncq; /* vnode objects due for a "sync" */
 };
 
-/*
- * u_flags values
- */
-#define UVM_VNODE_VALID		0x001	/* we are attached to the vnode */
-#define UVM_VNODE_CANPERSIST	0x002	/* we can persist after ref == 0 */
-#define UVM_VNODE_ALOCK		0x004	/* uvn_attach is locked out */
-#define UVM_VNODE_DYING		0x008	/* final detach/terminate in 
-					   progress */
-#define UVM_VNODE_RELKILL	0x010	/* uvn should be killed by releasepg
-					   when final i/o is done */
-#define UVM_VNODE_WANTED	0x020	/* someone is waiting for alock,
-					   dying, or relkill to clear */
-#define UVM_VNODE_VNISLOCKED	0x040	/* underlying vnode struct is locked
-					   (valid when DYING is true) */
-#define UVM_VNODE_IOSYNC	0x080	/* I/O sync in progress ... setter
-					   sleeps on &uvn->u_nio */
-#define UVM_VNODE_IOSYNCWANTED	0x100	/* a process is waiting for the
-					   i/o sync to clear so it can do
-					   i/o */
-#define UVM_VNODE_WRITEABLE	0x200	/* uvn has pages that are writeable */
-
-/*
- * UVM_VNODE_BLOCKED: any condition that should new processes from
- * touching the vnode [set WANTED and sleep to wait for it to clear]
- */
-#define UVM_VNODE_BLOCKED (UVM_VNODE_ALOCK|UVM_VNODE_DYING|UVM_VNODE_RELKILL)
-
-#ifdef _KERNEL
-
-/*
- * prototypes
- */
-
-#if 0
-/*
- * moved uvn_attach to uvm_extern.h because uvm_vnode.h is needed to
- * include sys/vnode.h, and files that include sys/vnode.h don't know
- * what a vm_prot_t is.
- */
-struct uvm_object  *uvn_attach __P((void *, vm_prot_t));
-#endif
-
-#endif /* _KERNEL */
-
 #endif /* _UVM_UVM_VNODE_H_ */